1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-05 19:55:56 +01:00

GPU happy. Still need to prevent hand kernels being callable under NVCC

This commit is contained in:
Peter Boyle 2019-01-01 15:00:33 +00:00
parent 0c2498fe2f
commit 07ee87ff5a
14 changed files with 81 additions and 51 deletions

View File

@ -163,7 +163,7 @@ class FourierAcceleratedPV {
for (int sidx=0;sidx<group_in_s;sidx++) {
int s = sgroup*group_in_s + sidx;
int sprime = Ls-s-1;
// int sprime = Ls-s-1;
RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
RealD cosp = ::cos(phase);

View File

@ -259,7 +259,7 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
auto B_v = B.View();
auto Btilde_v = Btilde.View();
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++), {
Kernels::DhopDir(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
Kernels::DhopDirKernel(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
});
// Force in three link terms
@ -380,7 +380,7 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
auto in_v = in.View();
auto out_v = out.View();
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) , {
Kernels::DhopDir(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
});
};

View File

@ -233,7 +233,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionFi
for(int s=0;s<Ls;s++){
int sU=ss;
int sF = s+Ls*sU;
Kernels::DhopDir(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sF, sU, in_v, out_v, dir, disp);
Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sF, sU, in_v, out_v, dir, disp);
}
});
};
@ -427,15 +427,15 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) {
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++), {
int sU=ss;
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v);
}
});
} else {
parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) {
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++) ,{
int sU=ss;
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v);
}
});
}
DhopComputeTime += usecond();
DhopTotalTime += usecond();

View File

@ -170,7 +170,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
SiteSpinor *buf, int LLs, int sU,
const FermionFieldView &in, FermionFieldView &out,int dag) {
const SiteSpinor *chi_p;
SiteSpinor chi;
// SiteSpinor chi;
SiteSpinor Uchi;
StencilEntry *SE;
int ptype;
@ -276,8 +276,8 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
};
template <class Impl>
void StaggeredKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
void StaggeredKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
{
// Disp should be either +1,-1,+3,-3
// What about "dag" ?

View File

@ -49,8 +49,8 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
public:
void DhopDir(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
///////////////////////////////////////////////////////////////////////////////////////
// Generic Nc kernels

View File

@ -963,10 +963,10 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
SiteSpinor *buf, int LLs, \
int sU, const FermionFieldView &in, FermionFieldView &out,int dag);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplD);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplF);
//KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
//KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);
//KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplD);
//KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplF);
NAMESPACE_END(Grid);

View File

@ -247,7 +247,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
Simd U_22;
SiteSpinor result;
int offset,local,perm, ptype;
int offset, ptype, local, perm;
StencilEntry *SE;
int skew;
@ -323,7 +323,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
Simd U_22;
SiteSpinor result;
int offset,local,perm, ptype;
int offset, ptype, local, perm;
StencilEntry *SE;
int skew;

View File

@ -109,8 +109,10 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++)
EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b);
for (int b = 0; b < DimRep; b++){
auto zz = Qx()(j, k)(a, b);
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
}
// if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
EigenInvCloverOp = EigenCloverOp.inverse();

View File

@ -104,8 +104,8 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
for(int d=0;d<4;d++){
assert(FourDimGrid._simd_layout[d]=1);
assert(FourDimRedBlackGrid._simd_layout[d]=1);
assert(FourDimGrid._simd_layout[d]==1);
assert(FourDimRedBlackGrid._simd_layout[d]==1);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
}
@ -528,14 +528,14 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
auto U_v = U.View();
int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) {
Kernels::DhopDag(Opt,st,U,st.CommBuf(),LLs,U_v.size(),in,out);
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U_v.size(),in,out);
// parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) {
// int sU = ss;
// int sF = LLs * sU;
// Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
// }
} else {
Kernels::Dhop(Opt,st,U,st.CommBuf(),LLs,U_v.size(),in,out);
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U_v.size(),in,out);
// parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) {
// int sU = ss;
// int sF = LLs * sU;
@ -672,7 +672,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
for(int idx=0;idx<_grid->lSites();idx++){
Coordinate lcoor(Nd);
Tcomplex cc;
RealD sgn;
// RealD sgn;
_grid->LocalIndexToLocalCoor(idx,lcoor);
peekLocalSite(cc,cosha,lcoor);
assert((double)real(cc)>=1.0);
@ -867,7 +867,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
for(int idx=0;idx<_grid->lSites();idx++){
Coordinate lcoor(Nd);
Tcomplex cc;
RealD sgn;
// RealD sgn;
_grid->LocalIndexToLocalCoor(idx,lcoor);
peekLocalSite(cc,cosha,lcoor);
assert((double)real(cc)>=1.0);

View File

@ -52,12 +52,12 @@ public:
public:
static void Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
static void DhopDag(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
static void DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
template <bool EnableBool = true> static accelerator
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type

View File

@ -137,9 +137,11 @@
int nmax=U.oSites();
for(int site=0;site<Ns;site++) {
#ifndef EXTERIOR
int sU =lo.Reorder(ssU);
// int sU =lo.Reorder(ssU);
int sU =ssU;
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
int sUn=lo.Reorder(ssn);
// int sUn=lo.Reorder(ssn);
int sUn=ssn;
LOCK_GAUGE(0);
#else
int sU =ssU;

View File

@ -303,9 +303,9 @@ GPU_EMPTY(GparityWilsonImplD);
GPU_EMPTY(GparityWilsonImplDF);
template <class Impl>
void WilsonKernels<Impl>::Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior)
void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior)
{
auto U_v = U.View();
auto in_v = in.View();
@ -319,7 +319,8 @@ void WilsonKernels<Impl>::Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, S
// uint64_t lane = cur % nsimd;
cur = cur / nsimd;
uint64_t s = cur%Ls;
uint64_t sF = cur; cur = cur / Ls;
// uint64_t sF = cur;
cur = cur / Ls;
uint64_t sU = cur;
WilsonKernels<Impl>::GpuDhopSite(st_v,U_v[sU],buf,Ls,s,sU,in_v,out_v);
});
@ -332,9 +333,9 @@ void WilsonKernels<Impl>::Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, S
}
}
template <class Impl>
void WilsonKernels<Impl>::DhopDag(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior)
void WilsonKernels<Impl>::DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior)
{
auto U_v = U.View();
auto in_v = in.View();
@ -349,7 +350,8 @@ void WilsonKernels<Impl>::Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, S
// uint64_t lane = cur % nsimd;
cur = cur / nsimd;
uint64_t s = cur%Ls;
uint64_t sF = cur; cur = cur / Ls;
//uint64_t sF = cur;
cur = cur / Ls;
uint64_t sU = cur;
WilsonKernels<Impl>::GpuDhopSiteDag(st_v,U_v,buf,Ls,s,sU,in_v,out_v);
});

View File

@ -453,6 +453,7 @@ template<class Impl> void
WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
#ifndef GRID_NVCC
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -471,12 +472,16 @@ WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,Site
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_RESULT(ss);
#else
assert(0);
#endif
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
#ifndef GRID_NVCC
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -494,12 +499,16 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_RESULT(ss);
#else
assert(0);
#endif
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
#ifndef GRID_NVCC
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -518,12 +527,16 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,Si
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_RESULT(ss);
#else
assert(0);
#endif
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
#ifndef GRID_NVCC
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -541,12 +554,16 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldVi
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_RESULT(ss);
#else
assert(0);
#endif
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
#ifndef GRID_NVCC
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -566,12 +583,16 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,Si
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_RESULT_EXT(ss);
#else
assert(0);
#endif
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
#ifndef GRID_NVCC
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -590,6 +611,9 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldVi
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_RESULT_EXT(ss);
#else
assert(0);
#endif
}
////////////// Wilson ; uses this implementation /////////////////////

View File

@ -931,13 +931,13 @@ template void WilsonKernels<A>::HandDhopSiteExt(StencilView &st, DoubledGaugeFie
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out);
INSTANTIATE_THEM(GparityWilsonImplF);
INSTANTIATE_THEM(GparityWilsonImplD);
INSTANTIATE_THEM(GparityWilsonImplFH);
INSTANTIATE_THEM(GparityWilsonImplDF);
INSTANTIATE_THEM(DomainWallVec5dImplFH);
INSTANTIATE_THEM(DomainWallVec5dImplDF);
INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
//INSTANTIATE_THEM(GparityWilsonImplF);
//INSTANTIATE_THEM(GparityWilsonImplD);
//INSTANTIATE_THEM(GparityWilsonImplFH);
//INSTANTIATE_THEM(GparityWilsonImplDF);
//INSTANTIATE_THEM(DomainWallVec5dImplFH);
//INSTANTIATE_THEM(DomainWallVec5dImplDF);
//INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
//INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
NAMESPACE_END(Grid);