Merge pull request #388 from fjosw/feature/sumd-npr

Feature/sumd npr
2025-07-18 21:57:07 +01:00 · 2022-03-15 09:05:57 -04:00
parent 9aac1e6d64 d1decee4cc
commit b615fa0f35
2 changed files with 24 additions and 9 deletions
--- a/Grid/lattice/Lattice_reduction.h
+++ b/Grid/lattice/Lattice_reduction.h
@@ -168,6 +168,22 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
  return ssum;
 }

+template<class vobj>
+inline typename vobj::scalar_object sum_large(const Lattice<vobj> &arg)
+{
+#if defined(GRID_CUDA)||defined(GRID_HIP)
+  autoView( arg_v, arg, AcceleratorRead);
+  Integer osites = arg.Grid()->oSites();
+  auto ssum= sum_gpu_large(&arg_v[0],osites);
+#else
+  autoView(arg_v, arg, CpuRead);
+  Integer osites = arg.Grid()->oSites();
+  auto ssum= sum_cpu(&arg_v[0],osites);
+#endif
+  arg.Grid()->GlobalSum(ssum);
+  return ssum;
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Deterministic Reduction operations
 ////////////////////////////////////////////////////////////////////////////////////////////////////
--- a/Grid/lattice/Lattice_reduction_gpu.h
+++ b/Grid/lattice/Lattice_reduction_gpu.h
@@ -232,10 +232,6 @@ inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osi
  
  const int words = sizeof(vobj)/sizeof(vector);

-  Integer nsimd= vobj::Nsimd();
-  Integer size = osites*nsimd;
-  Integer numThreads, numBlocks;
-
  Vector<vector> buffer(osites);
  vector *dat = (vector *)lat;
  vector *buf = &buffer[0];
@@ -258,10 +254,7 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
  typedef typename vobj::scalar_typeD scalarD;
  typedef typename vobj::scalar_objectD sobj;
  sobj ret;
-  scalarD *ret_p = (scalarD *)&ret;
  
-  const int words = sizeof(vobj)/sizeof(vector);
-
  Integer nsimd= vobj::Nsimd();
  Integer size = osites*nsimd;
  Integer numThreads, numBlocks;
@@ -275,7 +268,6 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
  return ret;
 }

-
 /////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Return as same precision as input performing reduction in double precision though
 /////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -288,6 +280,13 @@ inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites)
  return result;
 }

-
+template <class vobj>
+inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites)
+{
+  typedef typename vobj::scalar_object sobj;
+  sobj result;
+  result = sumD_gpu_large(lat,osites);
+  return result;
+}

 NAMESPACE_END(Grid);