mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 01:35:36 +00:00
Move the loop into a central kernel call.
This commit is contained in:
parent
8113845f9c
commit
8a5489d9e6
@ -55,150 +55,11 @@ public:
|
|||||||
static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||||
int interior=1,int exterior=1) ;
|
int interior=1,int exterior=1) ;
|
||||||
|
|
||||||
static void DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
static void DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||||
int interior=1,int exterior=1) ;
|
int interior=1,int exterior=1) ;
|
||||||
|
|
||||||
template<bool EnableBool=true>
|
|
||||||
static accelerator_inline void
|
|
||||||
DhopSite(typename std::enable_if<(Impl::isFundamental==true && Nc == 3 &&EnableBool), int>::type Opt,
|
|
||||||
StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, int Ls, int Nsite,
|
|
||||||
const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
|
|
||||||
{
|
|
||||||
//bgq_l1p_optimisation(1);
|
|
||||||
switch(Opt) {
|
|
||||||
|
|
||||||
#if defined(AVX512) || defined (QPX)
|
|
||||||
case WilsonKernelsStatic::OptInlineAsm:
|
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite (st,U,buf,sF,sU,Ls,Nsite,in,out);
|
|
||||||
else if (interior) WilsonKernels<Impl>::AsmDhopSiteInt(st,U,buf,sF,sU,Ls,Nsite,in,out);
|
|
||||||
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteExt(st,U,buf,sF,sU,Ls,Nsite,in,out);
|
|
||||||
else assert(0);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
#if !defined(GRID_NVCC)
|
|
||||||
case WilsonKernelsStatic::OptHandUnroll:
|
|
||||||
for (int site = 0; site < Nsite; site++) {
|
|
||||||
for (int s = 0; s < Ls; s++) {
|
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,U,buf,sF,sU,in,out);
|
|
||||||
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,U,buf,sF,sU,in,out);
|
|
||||||
else if (exterior) WilsonKernels<Impl>::HandDhopSiteExt(st,U,buf,sF,sU,in,out);
|
|
||||||
sF++;
|
|
||||||
}
|
|
||||||
sU++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
#else
|
|
||||||
case WilsonKernelsStatic::OptHandUnroll:
|
|
||||||
#endif
|
|
||||||
case WilsonKernelsStatic::OptGpu:
|
|
||||||
case WilsonKernelsStatic::OptGeneric:
|
|
||||||
for (int site = 0; site < Nsite; site++) {
|
|
||||||
for (int s = 0; s < Ls; s++) {
|
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,U,buf,sF,sU,in,out);
|
|
||||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,U,buf,sF,sU,in,out);
|
|
||||||
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,U,buf,sF,sU,in,out);
|
|
||||||
else assert(0);
|
|
||||||
sF++;
|
|
||||||
}
|
|
||||||
sU++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
//bgq_l1p_optimisation(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<bool EnableBool=true>
|
|
||||||
static accelerator_inline void
|
|
||||||
DhopSite(typename std::enable_if<((Impl::isFundamental==false)||(Nc != 3))&& EnableBool, int>::type Opt,
|
|
||||||
StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1 )
|
|
||||||
{
|
|
||||||
// no kernel choice
|
|
||||||
for (int site = 0; site < Nsite; site++) {
|
|
||||||
for (int s = 0; s < Ls; s++) {
|
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,U,buf,sF,sU,in,out);
|
|
||||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,U,buf,sF,sU,in,out);
|
|
||||||
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,U,buf,sF,sU,in,out);
|
|
||||||
else assert(0);
|
|
||||||
sF++;
|
|
||||||
}
|
|
||||||
sU++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<bool EnableBool=true>
|
|
||||||
static accelerator_inline void
|
|
||||||
DhopSiteDag(typename std::enable_if<(Impl::isFundamental==true && Nc == 3 &&EnableBool), int>::type Opt,
|
|
||||||
StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
|
|
||||||
{
|
|
||||||
//bgq_l1p_optimisation(1);
|
|
||||||
switch(Opt) {
|
|
||||||
#if defined(AVX512) || defined (QPX)
|
|
||||||
case WilsonKernelsStatic::OptInlineAsm:
|
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag (st,U,buf,sF,sU,Ls,Nsite,in,out);
|
|
||||||
else if (interior) WilsonKernels<Impl>::AsmDhopSiteDagInt(st,U,buf,sF,sU,Ls,Nsite,in,out);
|
|
||||||
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteDagExt(st,U,buf,sF,sU,Ls,Nsite,in,out);
|
|
||||||
else assert(0);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
#if !defined(GRID_NVCC)
|
|
||||||
case WilsonKernelsStatic::OptHandUnroll:
|
|
||||||
for (int site = 0; site < Nsite; site++) {
|
|
||||||
for (int s = 0; s < Ls; s++) {
|
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,U,buf,sF,sU,in,out);
|
|
||||||
else if (interior) WilsonKernels<Impl>::HandDhopSiteDagInt(st,U,buf,sF,sU,in,out);
|
|
||||||
else if (exterior) WilsonKernels<Impl>::HandDhopSiteDagExt(st,U,buf,sF,sU,in,out);
|
|
||||||
else assert(0);
|
|
||||||
sF++;
|
|
||||||
}
|
|
||||||
sU++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
#else
|
|
||||||
case WilsonKernelsStatic::OptHandUnroll:
|
|
||||||
#endif
|
|
||||||
case WilsonKernelsStatic::OptGpu:
|
|
||||||
case WilsonKernelsStatic::OptGeneric:
|
|
||||||
for (int site = 0; site < Nsite; site++) {
|
|
||||||
for (int s = 0; s < Ls; s++) {
|
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,U,buf,sF,sU,in,out);
|
|
||||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,U,buf,sF,sU,in,out);
|
|
||||||
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,U,buf,sF,sU,in,out);
|
|
||||||
else assert(0);
|
|
||||||
sF++;
|
|
||||||
}
|
|
||||||
sU++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
//bgq_l1p_optimisation(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<bool EnableBool=true>
|
|
||||||
static accelerator_inline void
|
|
||||||
DhopSiteDag(typename std::enable_if<((Impl::isFundamental==false)||(Nc != 3))&& EnableBool, int>::type Opt,
|
|
||||||
StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
|
|
||||||
{
|
|
||||||
for (int site = 0; site < Nsite; site++) {
|
|
||||||
for (int s = 0; s < Ls; s++) {
|
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,U,buf,sF,sU,in,out);
|
|
||||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,U,buf,sF,sU,in,out);
|
|
||||||
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,U,buf,sF,sU,in,out);
|
|
||||||
else assert(0);
|
|
||||||
sF++;
|
|
||||||
}
|
|
||||||
sU++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
|
static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
|
||||||
|
|
||||||
@ -239,28 +100,28 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
// Specialised variants
|
// Specialised variants
|
||||||
static accelerator_inline void GpuDhopSite(StencilView &st, SiteDoubledGaugeField &U, SiteHalfSpinor * buf,
|
static accelerator void GpuDhopSite(StencilView &st, SiteDoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int Ls, int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int Ls, int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static accelerator_inline void GpuDhopSiteDag(StencilView &st, SiteDoubledGaugeField &U, SiteHalfSpinor * buf,
|
static accelerator void GpuDhopSiteDag(StencilView &st, SiteDoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int Ls,int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int Ls,int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static accelerator_inline void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static accelerator_inline void GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static accelerator_inline void GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static accelerator_inline void GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static accelerator_inline void GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static accelerator_inline void GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static void AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static void AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
@ -281,25 +142,26 @@ private:
|
|||||||
static void AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static void AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
|
// Keep Hand unrolled temporarily
|
||||||
static accelerator_inline void HandDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
#if 1
|
||||||
|
static accelerator void HandDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static accelerator_inline void HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static accelerator_inline void HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static accelerator_inline void HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static accelerator_inline void HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
static accelerator_inline void HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
#endif
|
||||||
public:
|
public:
|
||||||
WilsonKernels(const ImplParams &p = ImplParams()) : Base(p){};
|
WilsonKernels(const ImplParams &p = ImplParams()) : Base(p){};
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user