1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

GPU happy. Still need to prevent hand kernels being callable under NVCC

This commit is contained in:
Peter Boyle 2019-01-01 15:00:33 +00:00
parent 0c2498fe2f
commit 07ee87ff5a
14 changed files with 81 additions and 51 deletions

View File

@ -163,7 +163,7 @@ class FourierAcceleratedPV {
for (int sidx=0;sidx<group_in_s;sidx++) { for (int sidx=0;sidx<group_in_s;sidx++) {
int s = sgroup*group_in_s + sidx; int s = sgroup*group_in_s + sidx;
int sprime = Ls-s-1; // int sprime = Ls-s-1;
RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0); RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
RealD cosp = ::cos(phase); RealD cosp = ::cos(phase);

View File

@ -259,7 +259,7 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
auto B_v = B.View(); auto B_v = B.View();
auto Btilde_v = Btilde.View(); auto Btilde_v = Btilde.View();
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++), { thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++), {
Kernels::DhopDir(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1); Kernels::DhopDirKernel(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
}); });
// Force in three link terms // Force in three link terms
@ -380,7 +380,7 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
auto in_v = in.View(); auto in_v = in.View();
auto out_v = out.View(); auto out_v = out.View();
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) , { thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) , {
Kernels::DhopDir(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp); Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
}); });
}; };

View File

@ -233,7 +233,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionFi
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
int sU=ss; int sU=ss;
int sF = s+Ls*sU; int sF = s+Ls*sU;
Kernels::DhopDir(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sF, sU, in_v, out_v, dir, disp); Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sF, sU, in_v, out_v, dir, disp);
} }
}); });
}; };
@ -427,15 +427,15 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
auto in_v = in.View(); auto in_v = in.View();
auto out_v = out.View(); auto out_v = out.View();
if (dag == DaggerYes) { if (dag == DaggerYes) {
parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) { thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++), {
int sU=ss; int sU=ss;
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v); Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v);
} });
} else { } else {
parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) { thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++) ,{
int sU=ss; int sU=ss;
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v); Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v);
} });
} }
DhopComputeTime += usecond(); DhopComputeTime += usecond();
DhopTotalTime += usecond(); DhopTotalTime += usecond();

View File

@ -170,7 +170,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
SiteSpinor *buf, int LLs, int sU, SiteSpinor *buf, int LLs, int sU,
const FermionFieldView &in, FermionFieldView &out,int dag) { const FermionFieldView &in, FermionFieldView &out,int dag) {
const SiteSpinor *chi_p; const SiteSpinor *chi_p;
SiteSpinor chi; // SiteSpinor chi;
SiteSpinor Uchi; SiteSpinor Uchi;
StencilEntry *SE; StencilEntry *SE;
int ptype; int ptype;
@ -276,8 +276,8 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
}; };
template <class Impl> template <class Impl>
void StaggeredKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF, void StaggeredKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp) int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
{ {
// Disp should be either +1,-1,+3,-3 // Disp should be either +1,-1,+3,-3
// What about "dag" ? // What about "dag" ?

View File

@ -49,8 +49,8 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
public: public:
void DhopDir(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp); int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
// Generic Nc kernels // Generic Nc kernels

View File

@ -963,10 +963,10 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
SiteSpinor *buf, int LLs, \ SiteSpinor *buf, int LLs, \
int sU, const FermionFieldView &in, FermionFieldView &out,int dag); int sU, const FermionFieldView &in, FermionFieldView &out,int dag);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD); //KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF); //KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplD); //KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplD);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplF); //KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplF);
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -247,7 +247,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
Simd U_22; Simd U_22;
SiteSpinor result; SiteSpinor result;
int offset,local,perm, ptype; int offset, ptype, local, perm;
StencilEntry *SE; StencilEntry *SE;
int skew; int skew;
@ -323,7 +323,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
Simd U_22; Simd U_22;
SiteSpinor result; SiteSpinor result;
int offset,local,perm, ptype; int offset, ptype, local, perm;
StencilEntry *SE; StencilEntry *SE;
int skew; int skew;

View File

@ -109,8 +109,10 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
for (int j = 0; j < Ns; j++) for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++) for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++) for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++) for (int b = 0; b < DimRep; b++){
EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b); auto zz = Qx()(j, k)(a, b);
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
}
// if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
EigenInvCloverOp = EigenCloverOp.inverse(); EigenInvCloverOp = EigenCloverOp.inverse();

View File

@ -104,8 +104,8 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd); assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
for(int d=0;d<4;d++){ for(int d=0;d<4;d++){
assert(FourDimGrid._simd_layout[d]=1); assert(FourDimGrid._simd_layout[d]==1);
assert(FourDimRedBlackGrid._simd_layout[d]=1); assert(FourDimRedBlackGrid._simd_layout[d]==1);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1); assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
} }
@ -528,14 +528,14 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
auto U_v = U.View(); auto U_v = U.View();
int Opt = WilsonKernelsStatic::Opt; int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) { if (dag == DaggerYes) {
Kernels::DhopDag(Opt,st,U,st.CommBuf(),LLs,U_v.size(),in,out); Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U_v.size(),in,out);
// parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) { // parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) {
// int sU = ss; // int sU = ss;
// int sF = LLs * sU; // int sF = LLs * sU;
// Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); // Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
// } // }
} else { } else {
Kernels::Dhop(Opt,st,U,st.CommBuf(),LLs,U_v.size(),in,out); Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U_v.size(),in,out);
// parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) { // parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) {
// int sU = ss; // int sU = ss;
// int sF = LLs * sU; // int sF = LLs * sU;
@ -672,7 +672,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
for(int idx=0;idx<_grid->lSites();idx++){ for(int idx=0;idx<_grid->lSites();idx++){
Coordinate lcoor(Nd); Coordinate lcoor(Nd);
Tcomplex cc; Tcomplex cc;
RealD sgn; // RealD sgn;
_grid->LocalIndexToLocalCoor(idx,lcoor); _grid->LocalIndexToLocalCoor(idx,lcoor);
peekLocalSite(cc,cosha,lcoor); peekLocalSite(cc,cosha,lcoor);
assert((double)real(cc)>=1.0); assert((double)real(cc)>=1.0);
@ -867,7 +867,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
for(int idx=0;idx<_grid->lSites();idx++){ for(int idx=0;idx<_grid->lSites();idx++){
Coordinate lcoor(Nd); Coordinate lcoor(Nd);
Tcomplex cc; Tcomplex cc;
RealD sgn; // RealD sgn;
_grid->LocalIndexToLocalCoor(idx,lcoor); _grid->LocalIndexToLocalCoor(idx,lcoor);
peekLocalSite(cc,cosha,lcoor); peekLocalSite(cc,cosha,lcoor);
assert((double)real(cc)>=1.0); assert((double)real(cc)>=1.0);

View File

@ -52,12 +52,12 @@ public:
public: public:
static void Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ; int interior=1,int exterior=1) ;
static void DhopDag(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, static void DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ; int interior=1,int exterior=1) ;
template <bool EnableBool = true> static accelerator template <bool EnableBool = true> static accelerator
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type

View File

@ -137,9 +137,11 @@
int nmax=U.oSites(); int nmax=U.oSites();
for(int site=0;site<Ns;site++) { for(int site=0;site<Ns;site++) {
#ifndef EXTERIOR #ifndef EXTERIOR
int sU =lo.Reorder(ssU); // int sU =lo.Reorder(ssU);
int sU =ssU;
int ssn=ssU+1; if(ssn>=nmax) ssn=0; int ssn=ssU+1; if(ssn>=nmax) ssn=0;
int sUn=lo.Reorder(ssn); // int sUn=lo.Reorder(ssn);
int sUn=ssn;
LOCK_GAUGE(0); LOCK_GAUGE(0);
#else #else
int sU =ssU; int sU =ssU;

View File

@ -303,9 +303,9 @@ GPU_EMPTY(GparityWilsonImplD);
GPU_EMPTY(GparityWilsonImplDF); GPU_EMPTY(GparityWilsonImplDF);
template <class Impl> template <class Impl>
void WilsonKernels<Impl>::Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior) int interior,int exterior)
{ {
auto U_v = U.View(); auto U_v = U.View();
auto in_v = in.View(); auto in_v = in.View();
@ -319,7 +319,8 @@ void WilsonKernels<Impl>::Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, S
// uint64_t lane = cur % nsimd; // uint64_t lane = cur % nsimd;
cur = cur / nsimd; cur = cur / nsimd;
uint64_t s = cur%Ls; uint64_t s = cur%Ls;
uint64_t sF = cur; cur = cur / Ls; // uint64_t sF = cur;
cur = cur / Ls;
uint64_t sU = cur; uint64_t sU = cur;
WilsonKernels<Impl>::GpuDhopSite(st_v,U_v[sU],buf,Ls,s,sU,in_v,out_v); WilsonKernels<Impl>::GpuDhopSite(st_v,U_v[sU],buf,Ls,s,sU,in_v,out_v);
}); });
@ -332,9 +333,9 @@ void WilsonKernels<Impl>::Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, S
} }
} }
template <class Impl> template <class Impl>
void WilsonKernels<Impl>::DhopDag(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, void WilsonKernels<Impl>::DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior) int interior,int exterior)
{ {
auto U_v = U.View(); auto U_v = U.View();
auto in_v = in.View(); auto in_v = in.View();
@ -349,7 +350,8 @@ void WilsonKernels<Impl>::Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, S
// uint64_t lane = cur % nsimd; // uint64_t lane = cur % nsimd;
cur = cur / nsimd; cur = cur / nsimd;
uint64_t s = cur%Ls; uint64_t s = cur%Ls;
uint64_t sF = cur; cur = cur / Ls; //uint64_t sF = cur;
cur = cur / Ls;
uint64_t sU = cur; uint64_t sU = cur;
WilsonKernels<Impl>::GpuDhopSiteDag(st_v,U_v,buf,Ls,s,sU,in_v,out_v); WilsonKernels<Impl>::GpuDhopSiteDag(st_v,U_v,buf,Ls,s,sU,in_v,out_v);
}); });

View File

@ -453,6 +453,7 @@ template<class Impl> void
WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
#ifndef GRID_NVCC
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -471,12 +472,16 @@ WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,Site
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM); HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM); HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_RESULT(ss); HAND_RESULT(ss);
#else
assert(0);
#endif
} }
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
#ifndef GRID_NVCC
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -494,12 +499,16 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM); HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM); HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_RESULT(ss); HAND_RESULT(ss);
#else
assert(0);
#endif
} }
template<class Impl> void template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
#ifndef GRID_NVCC
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -518,12 +527,16 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,Si
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM); HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM); HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_RESULT(ss); HAND_RESULT(ss);
#else
assert(0);
#endif
} }
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
#ifndef GRID_NVCC
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -541,12 +554,16 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldVi
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM); HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM); HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_RESULT(ss); HAND_RESULT(ss);
#else
assert(0);
#endif
} }
template<class Impl> void template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
#ifndef GRID_NVCC
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -566,12 +583,16 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,Si
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM); HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM); HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_RESULT_EXT(ss); HAND_RESULT_EXT(ss);
#else
assert(0);
#endif
} }
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
#ifndef GRID_NVCC
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -590,6 +611,9 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldVi
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM); HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM); HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_RESULT_EXT(ss); HAND_RESULT_EXT(ss);
#else
assert(0);
#endif
} }
////////////// Wilson ; uses this implementation ///////////////////// ////////////// Wilson ; uses this implementation /////////////////////

View File

@ -931,13 +931,13 @@ template void WilsonKernels<A>::HandDhopSiteExt(StencilView &st, DoubledGaugeFie
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ template void WilsonKernels<A>::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); int ss,int sU,const FermionFieldView &in, FermionFieldView &out);
INSTANTIATE_THEM(GparityWilsonImplF); //INSTANTIATE_THEM(GparityWilsonImplF);
INSTANTIATE_THEM(GparityWilsonImplD); //INSTANTIATE_THEM(GparityWilsonImplD);
INSTANTIATE_THEM(GparityWilsonImplFH); //INSTANTIATE_THEM(GparityWilsonImplFH);
INSTANTIATE_THEM(GparityWilsonImplDF); //INSTANTIATE_THEM(GparityWilsonImplDF);
INSTANTIATE_THEM(DomainWallVec5dImplFH); //INSTANTIATE_THEM(DomainWallVec5dImplFH);
INSTANTIATE_THEM(DomainWallVec5dImplDF); //INSTANTIATE_THEM(DomainWallVec5dImplDF);
INSTANTIATE_THEM(ZDomainWallVec5dImplFH); //INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
INSTANTIATE_THEM(ZDomainWallVec5dImplDF); //INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
NAMESPACE_END(Grid); NAMESPACE_END(Grid);