diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index c3478ab4..0ddac437 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -168,6 +168,22 @@ inline typename vobj::scalar_object sum(const Lattice &arg) return ssum; } +template +inline typename vobj::scalar_object sum_large(const Lattice &arg) +{ +#if defined(GRID_CUDA)||defined(GRID_HIP) + autoView( arg_v, arg, AcceleratorRead); + Integer osites = arg.Grid()->oSites(); + auto ssum= sum_gpu_large(&arg_v[0],osites); +#else + autoView(arg_v, arg, CpuRead); + Integer osites = arg.Grid()->oSites(); + auto ssum= sum_cpu(&arg_v[0],osites); +#endif + arg.Grid()->GlobalSum(ssum); + return ssum; +} + //////////////////////////////////////////////////////////////////////////////////////////////////// // Deterministic Reduction operations //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/Grid/lattice/Lattice_reduction_gpu.h b/Grid/lattice/Lattice_reduction_gpu.h index c685a2c0..c3422af3 100644 --- a/Grid/lattice/Lattice_reduction_gpu.h +++ b/Grid/lattice/Lattice_reduction_gpu.h @@ -288,6 +288,14 @@ inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites) return result; } +template +inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites) +{ + typedef typename vobj::scalar_object sobj; + sobj result; + result = sumD_gpu_large(lat,osites); + return result; +} NAMESPACE_END(Grid);