Reduce the loop over exterior for GPU to indirection table

2025-07-26 01:17:06 +01:00 · 2022-06-01 14:29:25 -07:00
parent 34faa39f4f
commit e762c940c2
3 changed files with 19 additions and 4 deletions
--- a/Grid/qcd/action/fermion/WilsonCompressor.h
+++ b/Grid/qcd/action/fermion/WilsonCompressor.h
@@ -297,7 +297,7 @@ public:
  void ZeroCountersi(void)  {  }
  void Reporti(int calls)  {  }

-  std::vector<int> surface_list;
+  //  Vector<int> surface_list;

  WilsonStencil(GridBase *grid,
 		int npoints,
@@ -307,10 +307,11 @@ public:
    : CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p) 
  { 
    ZeroCountersi();
-    surface_list.resize(0);
+    //    surface_list.resize(0);
    this->same_node.resize(npoints);
  };

+  /*
  void BuildSurfaceList(int Ls,int vol4){

    // find same node for SHM
@@ -331,7 +332,8 @@ public:
      }
    }
  }
-
+  */
+  
  template < class compressor>
  void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress) 
  {
--- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h
+++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h
@@ -440,6 +440,17 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S

 #define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();

+#define KERNEL_CALL_EXT(A)						\
+  const uint64_t    NN = Nsite*Ls;					\
+  const uint64_t    sz = st.surface_list.size();			\
+  auto ptr = &st.surface_list[0];					\
+  accelerator_forNB( ss, sz, Simd::Nsimd(), {				\
+      int sF = ptr[ss];							\
+      int sU = ss/Ls;							\
+      WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v);		\
+    });									\
+  accelerator_barrier();
+
 #define ASM_CALL(A)							\
  thread_for( ss, Nsite, {						\
    int sU = ss;							\
--- a/Grid/stencil/Stencil.h
+++ b/Grid/stencil/Stencil.h
@@ -640,7 +640,9 @@ public:
 	}
      }
      if(local == 0) {
-	surface_list.push_back(site);
+	for(int s=0;s<Ls;s++){
+	  surface_list.push_back(site*Ls+s);
+	}
      }
    }
  }