1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Kernels need to be static to work on GPU. No reference to host resident data

This commit is contained in:
Peter Boyle 2018-03-22 18:44:53 -04:00
parent 607dc2d3c6
commit 4e1272fabf

View File

@ -56,27 +56,27 @@ public:
public: public:
template <bool EnableBool = true> accelerator template <bool EnableBool = true> static accelerator
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
DhopSite(int Opt,typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, DhopSite(int Opt,typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
{ {
bgq_l1p_optimisation(1); bgq_l1p_optimisation(1);
switch(Opt) { switch(Opt) {
#if defined(AVX512) || defined (QPX) #if defined(AVX512) || defined (QPX)
case WilsonKernelsStatic::OptInlineAsm: case WilsonKernelsStatic::OptInlineAsm:
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite (st,lo,U,buf,sF,sU,Ls,Nsite,in,out); if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite (st,U,buf,sF,sU,Ls,Nsite,in,out);
else if (interior) WilsonKernels<Impl>::AsmDhopSiteInt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out); else if (interior) WilsonKernels<Impl>::AsmDhopSiteInt(st,U,buf,sF,sU,Ls,Nsite,in,out);
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteExt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out); else if (exterior) WilsonKernels<Impl>::AsmDhopSiteExt(st,U,buf,sF,sU,Ls,Nsite,in,out);
else assert(0); else assert(0);
break; break;
#endif #endif
case WilsonKernelsStatic::OptHandUnroll: case WilsonKernelsStatic::OptHandUnroll:
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out); if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,lo,U,buf,sF,sU,in,out); else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::HandDhopSiteExt(st,lo,U,buf,sF,sU,in,out); else if (exterior) WilsonKernels<Impl>::HandDhopSiteExt(st,U,buf,sF,sU,in,out);
sF++; sF++;
} }
sU++; sU++;
@ -85,9 +85,9 @@ public:
case WilsonKernelsStatic::OptGeneric: case WilsonKernelsStatic::OptGeneric:
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out); if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out); else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out); else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,U,buf,sF,sU,in,out);
else assert(0); else assert(0);
sF++; sF++;
} }
@ -100,16 +100,16 @@ public:
bgq_l1p_optimisation(0); bgq_l1p_optimisation(0);
} }
template <bool EnableBool = true> accelerator template <bool EnableBool = true> static accelerator
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
DhopSite(int Opt, typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, DhopSite(int Opt, typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1 ) { int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1 ) {
// no kernel choice // no kernel choice
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out); if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out); else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out); else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,U,buf,sF,sU,in,out);
else assert(0); else assert(0);
sF++; sF++;
} }
@ -117,27 +117,27 @@ public:
} }
} }
template <bool EnableBool = true> accelerator template <bool EnableBool = true> static accelerator
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
DhopSiteDag(int Opt, typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, DhopSiteDag(int Opt, typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
{ {
bgq_l1p_optimisation(1); bgq_l1p_optimisation(1);
switch(Opt) { switch(Opt) {
#if defined(AVX512) || defined (QPX) #if defined(AVX512) || defined (QPX)
case WilsonKernelsStatic::OptInlineAsm: case WilsonKernelsStatic::OptInlineAsm:
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag (st,lo,U,buf,sF,sU,Ls,Nsite,in,out); if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag (st,U,buf,sF,sU,Ls,Nsite,in,out);
else if (interior) WilsonKernels<Impl>::AsmDhopSiteDagInt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out); else if (interior) WilsonKernels<Impl>::AsmDhopSiteDagInt(st,U,buf,sF,sU,Ls,Nsite,in,out);
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteDagExt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out); else if (exterior) WilsonKernels<Impl>::AsmDhopSiteDagExt(st,U,buf,sF,sU,Ls,Nsite,in,out);
else assert(0); else assert(0);
break; break;
#endif #endif
case WilsonKernelsStatic::OptHandUnroll: case WilsonKernelsStatic::OptHandUnroll:
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out); if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::HandDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out); else if (interior) WilsonKernels<Impl>::HandDhopSiteDagInt(st,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::HandDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out); else if (exterior) WilsonKernels<Impl>::HandDhopSiteDagExt(st,U,buf,sF,sU,in,out);
else assert(0); else assert(0);
sF++; sF++;
} }
@ -147,9 +147,9 @@ public:
case WilsonKernelsStatic::OptGeneric: case WilsonKernelsStatic::OptGeneric:
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out); else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out); else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,U,buf,sF,sU,in,out);
else assert(0); else assert(0);
sF++; sF++;
} }
@ -162,16 +162,16 @@ public:
bgq_l1p_optimisation(0); bgq_l1p_optimisation(0);
} }
template <bool EnableBool = true> accelerator template <bool EnableBool = true> static accelerator
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
DhopSiteDag(int Opt,typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, DhopSiteDag(int Opt,typename StencilImpl::View_type &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) { int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) {
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out); else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out); else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,U,buf,sF,sU,in,out);
else assert(0); else assert(0);
sF++; sF++;
} }
@ -179,34 +179,34 @@ public:
} }
} }
accelerator void DhopDirK(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, static accelerator void DhopDirK(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma); int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// Utilities for inserting Wilson conserved current. // Utilities for inserting Wilson conserved current.
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, static void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2, const SitePropagator &q_in_2,
SitePropagator &q_out, SitePropagator &q_out,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &U,
unsigned int sU, unsigned int sU,
unsigned int mu, unsigned int mu,
bool switch_sign = false); bool switch_sign = false);
void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1, static void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2, const SitePropagator &q_in_2,
SitePropagator &q_out, SitePropagator &q_out,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &U,
unsigned int sU, unsigned int sU,
unsigned int mu, unsigned int mu,
bool switch_sign = false); bool switch_sign = false);
void SeqConservedCurrentSiteFwd(const SitePropagator &q_in, static void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out, SitePropagator &q_out,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &U,
unsigned int sU, unsigned int sU,
unsigned int mu, unsigned int mu,
vInteger t_mask, vInteger t_mask,
bool switch_sign = false); bool switch_sign = false);
void SeqConservedCurrentSiteBwd(const SitePropagator &q_in, static void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out, SitePropagator &q_out,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &U,
unsigned int sU, unsigned int sU,
@ -216,59 +216,59 @@ public:
private: private:
// Specialised variants // Specialised variants
accelerator void GenericDhopSite(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSite(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteDag(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSiteDag(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteInt(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSiteInt(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteDagInt(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSiteDagInt(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteExt(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSiteExt(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteDagExt(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSiteDagExt(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void AsmDhopSite(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void AsmDhopSite(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
accelerator void AsmDhopSiteDag(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void AsmDhopSiteDag(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
accelerator void AsmDhopSiteInt(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void AsmDhopSiteInt(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
accelerator void AsmDhopSiteDagInt(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void AsmDhopSiteDagInt(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
accelerator void AsmDhopSiteExt(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void AsmDhopSiteExt(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
accelerator void AsmDhopSiteDagExt(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void AsmDhopSiteDagExt(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSite(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void HandDhopSite(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteDag(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void HandDhopSiteDag(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteInt(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void HandDhopSiteInt(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteDagInt(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void HandDhopSiteDagInt(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteExt(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void HandDhopSiteExt(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteDagExt(typename StencilImpl::View_type &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void HandDhopSiteDagExt(typename StencilImpl::View_type &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
public: public: