From aab27a655a6450a0685050291258fccbea177a9c Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 14 May 2018 19:41:17 -0400 Subject: [PATCH] Start of GPU kernels --- lib/qcd/action/fermion/WilsonKernelsGpu.cc | 176 +++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 lib/qcd/action/fermion/WilsonKernelsGpu.cc diff --git a/lib/qcd/action/fermion/WilsonKernelsGpu.cc b/lib/qcd/action/fermion/WilsonKernelsGpu.cc new file mode 100644 index 00000000..6bc01386 --- /dev/null +++ b/lib/qcd/action/fermion/WilsonKernelsGpu.cc @@ -0,0 +1,176 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernelsGpu.cc + +Copyright (C) 2018 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +NAMESPACE_BEGIN(Grid); + +////////////////////////////////////////////////////////////// +// Gpu implementation; view code at a premium and less unroll +////////////////////////////////////////////////////////////// + +#define GPU_STENCIL_LEG_PROJ(Dir,spProj) \ + if (SE->_is_local) { \ + spProj(chi, in[SE->_offset]); \ + if (SE->_permute) { \ + permute(tmp, chi, ptype); \ + chi = tmp; \ + } \ + } else { \ + chi = buf[SE->_offset]; \ + } + +#define GPU_STENCIL_LEG_RECON(Recon) Recon(result, Uchi); +// Xp is mu= 0 +template +accelerator void WilsonKernels::GpuDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, + SiteHalfSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + SiteHalfSpinor tmp; + SiteHalfSpinor chi; + SiteHalfSpinor Uchi; + SiteSpinor result; + StencilEntry *SE; + int ptype; + + for(int mu=0;mu<2*Nd;mu++) { + + SE = st.GetEntry(ptype, mu, sF); + + switch(mu){ + case Xp: + GPU_STENCIL_LEG_PROJ(Xp,spProjXp); break; + case Yp: + GPU_STENCIL_LEG_PROJ(Yp,spProjYp); break; + case Zp: + GPU_STENCIL_LEG_PROJ(Zp,spProjZp); break; + case Tp: + GPU_STENCIL_LEG_PROJ(Tp,spProjTp); break; + case Xm: + GPU_STENCIL_LEG_PROJ(Xm,spProjXm); break; + case Ym: + GPU_STENCIL_LEG_PROJ(Ym,spProjYm); break; + case Zm: + GPU_STENCIL_LEG_PROJ(Zm,spProjZm); break; + case Tm: + default: + GPU_STENCIL_LEG_PROJ(Tm,spProjTm); break; + } + + Impl::multLink(Uchi, U[sU], chi, mu, SE, st); + + switch(mu){ + case Xp: + spReconXp(result, Uchi); break; + case Yp: + accumReconYp(result, Uchi); break; + case Zp: + accumReconZp(result, Uchi); break; + case Tp: + accumReconTp(result, Uchi); break; + case Xm: + accumReconXm(result, Uchi); break; + case Ym: + accumReconYm(result, Uchi); break; + case Zm: + accumReconZm(result, Uchi); break; + case Tm: + default: + accumReconTm(result, Uchi); break; + } + } + vstream(out[sF], result); +}; + +template +accelerator void WilsonKernels::GpuDhopSite(StencilView &st, DoubledGaugeFieldView &U, + SiteHalfSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + SiteHalfSpinor tmp; + SiteHalfSpinor chi; + SiteHalfSpinor Uchi; + SiteSpinor result; + StencilEntry *SE; + int ptype; + + for(int mu=0;mu<2*Nd;mu++) { + + SE = st.GetEntry(ptype, mu, sF); + + switch(mu){ + case Xp: + GPU_STENCIL_LEG_PROJ(Xp,spProjXm); break; + case Yp: + GPU_STENCIL_LEG_PROJ(Yp,spProjYm); break; + case Zp: + GPU_STENCIL_LEG_PROJ(Zp,spProjZm); break; + case Tp: + GPU_STENCIL_LEG_PROJ(Tp,spProjTm); break; + case Xm: + GPU_STENCIL_LEG_PROJ(Xm,spProjXp); break; + case Ym: + GPU_STENCIL_LEG_PROJ(Ym,spProjYp); break; + case Zm: + GPU_STENCIL_LEG_PROJ(Zm,spProjZp); break; + case Tm: + default: + GPU_STENCIL_LEG_PROJ(Tm,spProjTp); break; + } + + Impl::multLink(Uchi, U[sU], chi, mu, SE, st); + + switch(mu){ + case Xp: + spReconXm(result, Uchi); break; + case Yp: + accumReconYm(result, Uchi); break; + case Zp: + accumReconZm(result, Uchi); break; + case Tp: + accumReconTm(result, Uchi); break; + case Xm: + accumReconXp(result, Uchi); break; + case Ym: + accumReconYp(result, Uchi); break; + case Zm: + accumReconZp(result, Uchi); break; + case Tm: + default: + accumReconTp(result, Uchi); break; + } + } + vstream(out[sF], result); + +}; + +FermOpTemplateInstantiate(WilsonKernels); + +NAMESPACE_END(Grid); +