1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-10 06:00:45 +01:00

Accelerator offload and copy Opt into the kernel for GPU host var safety

This commit is contained in:
paboyle 2018-02-02 11:35:35 +00:00
parent a308dff410
commit dcf6517a93

View File

@ -45,7 +45,7 @@ public:
static int Comms; static int Comms;
}; };
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic { template<class Impl> class WilsonKernels : public FermionOperator<Impl> {
public: public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
@ -55,20 +55,20 @@ public:
template <bool EnableBool = true> accelerator template <bool EnableBool = true> accelerator
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, DhopSite(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
{ {
bgq_l1p_optimisation(1); bgq_l1p_optimisation(1);
switch(Opt) { switch(Opt) {
#if defined(AVX512) || defined (QPX) #if defined(AVX512) || defined (QPX)
case OptInlineAsm: case WilsonKernelsStatic::OptInlineAsm:
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite (st,lo,U,buf,sF,sU,Ls,Nsite,in,out); if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite (st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
else if (interior) WilsonKernels<Impl>::AsmDhopSiteInt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out); else if (interior) WilsonKernels<Impl>::AsmDhopSiteInt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteExt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out); else if (exterior) WilsonKernels<Impl>::AsmDhopSiteExt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
else assert(0); else assert(0);
break; break;
#endif #endif
case OptHandUnroll: case WilsonKernelsStatic::OptHandUnroll:
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out); if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out);
@ -79,7 +79,7 @@ public:
sU++; sU++;
} }
break; break;
case OptGeneric: case WilsonKernelsStatic::OptGeneric:
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out); if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
@ -99,7 +99,7 @@ public:
template <bool EnableBool = true> accelerator template <bool EnableBool = true> accelerator
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, DhopSite(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) { int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) {
// no kernel choice // no kernel choice
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
@ -116,20 +116,20 @@ public:
template <bool EnableBool = true> accelerator template <bool EnableBool = true> accelerator
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, DhopSiteDag(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
{ {
bgq_l1p_optimisation(1); bgq_l1p_optimisation(1);
switch(Opt) { switch(Opt) {
#if defined(AVX512) || defined (QPX) #if defined(AVX512) || defined (QPX)
case OptInlineAsm: case WilsonKernelsStatic::OptInlineAsm:
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag (st,lo,U,buf,sF,sU,Ls,Nsite,in,out); if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag (st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
else if (interior) WilsonKernels<Impl>::AsmDhopSiteDagInt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out); else if (interior) WilsonKernels<Impl>::AsmDhopSiteDagInt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteDagExt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out); else if (exterior) WilsonKernels<Impl>::AsmDhopSiteDagExt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
else assert(0); else assert(0);
break; break;
#endif #endif
case OptHandUnroll: case WilsonKernelsStatic::OptHandUnroll:
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out); if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
@ -141,7 +141,7 @@ public:
sU++; sU++;
} }
break; break;
case OptGeneric: case WilsonKernelsStatic::OptGeneric:
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
@ -161,7 +161,7 @@ public:
template <bool EnableBool = true> accelerator template <bool EnableBool = true> accelerator
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf, DhopSiteDag(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) { int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) {
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {