mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Accelerator offload and copy Opt into the kernel for GPU host var safety
This commit is contained in:
		@@ -45,9 +45,9 @@ public:
 | 
			
		||||
  static int Comms;
 | 
			
		||||
};
 | 
			
		||||
 
 | 
			
		||||
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic { 
 | 
			
		||||
template<class Impl> class WilsonKernels : public FermionOperator<Impl> { 
 | 
			
		||||
public:
 | 
			
		||||
   
 | 
			
		||||
 | 
			
		||||
  INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
  typedef FermionOperator<Impl> Base;
 | 
			
		||||
   
 | 
			
		||||
@@ -55,20 +55,20 @@ public:
 | 
			
		||||
   
 | 
			
		||||
  template <bool EnableBool = true> accelerator
 | 
			
		||||
  typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
 | 
			
		||||
  DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
  DhopSite(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
	   int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) 
 | 
			
		||||
  {
 | 
			
		||||
    bgq_l1p_optimisation(1);
 | 
			
		||||
    switch(Opt) {
 | 
			
		||||
#if defined(AVX512) || defined (QPX)
 | 
			
		||||
    case OptInlineAsm:
 | 
			
		||||
    case WilsonKernelsStatic::OptInlineAsm:
 | 
			
		||||
      if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite   (st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
 | 
			
		||||
      else if (interior)     WilsonKernels<Impl>::AsmDhopSiteInt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
 | 
			
		||||
      else if (exterior)     WilsonKernels<Impl>::AsmDhopSiteExt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
 | 
			
		||||
      else assert(0);
 | 
			
		||||
      break;
 | 
			
		||||
#endif
 | 
			
		||||
    case OptHandUnroll:
 | 
			
		||||
    case WilsonKernelsStatic::OptHandUnroll:
 | 
			
		||||
      for (int site = 0; site < Nsite; site++) {
 | 
			
		||||
	for (int s = 0; s < Ls; s++) {
 | 
			
		||||
	  if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
@@ -79,7 +79,7 @@ public:
 | 
			
		||||
	sU++;
 | 
			
		||||
      }
 | 
			
		||||
      break;
 | 
			
		||||
    case OptGeneric:
 | 
			
		||||
    case WilsonKernelsStatic::OptGeneric:
 | 
			
		||||
      for (int site = 0; site < Nsite; site++) {
 | 
			
		||||
	for (int s = 0; s < Ls; s++) {
 | 
			
		||||
	  if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
@@ -99,7 +99,7 @@ public:
 | 
			
		||||
     
 | 
			
		||||
  template <bool EnableBool = true> accelerator
 | 
			
		||||
  typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
 | 
			
		||||
  DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
  DhopSite(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
	   int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) {
 | 
			
		||||
    // no kernel choice  
 | 
			
		||||
    for (int site = 0; site < Nsite; site++) {
 | 
			
		||||
@@ -116,20 +116,20 @@ public:
 | 
			
		||||
     
 | 
			
		||||
  template <bool EnableBool = true> accelerator
 | 
			
		||||
  typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
 | 
			
		||||
  DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
  DhopSiteDag(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
	      int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) 
 | 
			
		||||
  {
 | 
			
		||||
    bgq_l1p_optimisation(1);
 | 
			
		||||
    switch(Opt) {
 | 
			
		||||
#if defined(AVX512) || defined (QPX)
 | 
			
		||||
    case OptInlineAsm:
 | 
			
		||||
    case WilsonKernelsStatic::OptInlineAsm:
 | 
			
		||||
      if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag   (st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
 | 
			
		||||
      else if (interior)     WilsonKernels<Impl>::AsmDhopSiteDagInt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
 | 
			
		||||
      else if (exterior)     WilsonKernels<Impl>::AsmDhopSiteDagExt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
 | 
			
		||||
      else assert(0);
 | 
			
		||||
      break;
 | 
			
		||||
#endif
 | 
			
		||||
    case OptHandUnroll:
 | 
			
		||||
    case WilsonKernelsStatic::OptHandUnroll:
 | 
			
		||||
      for (int site = 0; site < Nsite; site++) {
 | 
			
		||||
	for (int s = 0; s < Ls; s++) {
 | 
			
		||||
	  if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
@@ -141,7 +141,7 @@ public:
 | 
			
		||||
	sU++;
 | 
			
		||||
      }
 | 
			
		||||
      break;
 | 
			
		||||
    case OptGeneric:
 | 
			
		||||
    case WilsonKernelsStatic::OptGeneric:
 | 
			
		||||
      for (int site = 0; site < Nsite; site++) {
 | 
			
		||||
	for (int s = 0; s < Ls; s++) {
 | 
			
		||||
	  if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
@@ -161,7 +161,7 @@ public:
 | 
			
		||||
 | 
			
		||||
  template <bool EnableBool = true> accelerator
 | 
			
		||||
  typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
 | 
			
		||||
  DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
 | 
			
		||||
  DhopSiteDag(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
 | 
			
		||||
	      int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) {
 | 
			
		||||
 | 
			
		||||
    for (int site = 0; site < Nsite; site++) {
 | 
			
		||||
@@ -176,7 +176,7 @@ public:
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  acceleratorvoid DhopDirK(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
 | 
			
		||||
  accelerator void DhopDirK(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
 | 
			
		||||
			   int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
 | 
			
		||||
      
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user