1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-14 01:35:36 +00:00

Move the loop into a central kernel call.

This commit is contained in:
Peter Boyle 2019-06-05 00:08:13 +01:00
parent 8113845f9c
commit 8a5489d9e6

View File

@ -55,150 +55,11 @@ public:
static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ; int interior=1,int exterior=1) ;
static void DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, static void DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ; int interior=1,int exterior=1) ;
template<bool EnableBool=true>
static accelerator_inline void
DhopSite(typename std::enable_if<(Impl::isFundamental==true && Nc == 3 &&EnableBool), int>::type Opt,
StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite,
const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
{
//bgq_l1p_optimisation(1);
switch(Opt) {
#if defined(AVX512) || defined (QPX)
case WilsonKernelsStatic::OptInlineAsm:
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite (st,U,buf,sF,sU,Ls,Nsite,in,out);
else if (interior) WilsonKernels<Impl>::AsmDhopSiteInt(st,U,buf,sF,sU,Ls,Nsite,in,out);
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteExt(st,U,buf,sF,sU,Ls,Nsite,in,out);
else assert(0);
break;
#endif
#if !defined(GRID_NVCC)
case WilsonKernelsStatic::OptHandUnroll:
for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::HandDhopSiteExt(st,U,buf,sF,sU,in,out);
sF++;
}
sU++;
}
break;
#else
case WilsonKernelsStatic::OptHandUnroll:
#endif
case WilsonKernelsStatic::OptGpu:
case WilsonKernelsStatic::OptGeneric:
for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
break;
default:
assert(0);
}
//bgq_l1p_optimisation(0);
}
template<bool EnableBool=true>
static accelerator_inline void
DhopSite(typename std::enable_if<((Impl::isFundamental==false)||(Nc != 3))&& EnableBool, int>::type Opt,
StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1 )
{
// no kernel choice
for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
}
template<bool EnableBool=true>
static accelerator_inline void
DhopSiteDag(typename std::enable_if<(Impl::isFundamental==true && Nc == 3 &&EnableBool), int>::type Opt,
StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
{
//bgq_l1p_optimisation(1);
switch(Opt) {
#if defined(AVX512) || defined (QPX)
case WilsonKernelsStatic::OptInlineAsm:
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag (st,U,buf,sF,sU,Ls,Nsite,in,out);
else if (interior) WilsonKernels<Impl>::AsmDhopSiteDagInt(st,U,buf,sF,sU,Ls,Nsite,in,out);
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteDagExt(st,U,buf,sF,sU,Ls,Nsite,in,out);
else assert(0);
break;
#endif
#if !defined(GRID_NVCC)
case WilsonKernelsStatic::OptHandUnroll:
for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::HandDhopSiteDagInt(st,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::HandDhopSiteDagExt(st,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
break;
#else
case WilsonKernelsStatic::OptHandUnroll:
#endif
case WilsonKernelsStatic::OptGpu:
case WilsonKernelsStatic::OptGeneric:
for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
break;
default:
assert(0);
}
//bgq_l1p_optimisation(0);
}
template<bool EnableBool=true>
static accelerator_inline void
DhopSiteDag(typename std::enable_if<((Impl::isFundamental==false)||(Nc != 3))&& EnableBool, int>::type Opt,
StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
{
for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
}
static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma); int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
@ -239,28 +100,28 @@ public:
private: private:
// Specialised variants // Specialised variants
static accelerator_inline void GpuDhopSite(StencilView &st, SiteDoubledGaugeField &U, SiteHalfSpinor * buf, static accelerator void GpuDhopSite(StencilView &st, SiteDoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int Ls, int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator_inline void GpuDhopSiteDag(StencilView &st, SiteDoubledGaugeField &U, SiteHalfSpinor * buf, static accelerator void GpuDhopSiteDag(StencilView &st, SiteDoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls,int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int Ls,int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator_inline void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator_inline void GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator_inline void GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator_inline void GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator_inline void GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator_inline void GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static void AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static void AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
@ -281,25 +142,26 @@ private:
static void AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static void AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
// Keep Hand unrolled temporarily
static accelerator_inline void HandDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, #if 1
static accelerator void HandDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator_inline void HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator_inline void HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator_inline void HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator_inline void HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator_inline void HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
#endif
public: public:
WilsonKernels(const ImplParams &p = ImplParams()) : Base(p){}; WilsonKernels(const ImplParams &p = ImplParams()) : Base(p){};
}; };