From 1304172a9364ebff64f39604ef00fa0c5b5cbdfb Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 19 May 2026 08:53:13 -0400 Subject: [PATCH] Modified repack --- Grid/lattice/Lattice_reduction_gpu.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Grid/lattice/Lattice_reduction_gpu.h b/Grid/lattice/Lattice_reduction_gpu.h index 5cc8636f..7fa80bcd 100644 --- a/Grid/lattice/Lattice_reduction_gpu.h +++ b/Grid/lattice/Lattice_reduction_gpu.h @@ -261,11 +261,9 @@ inline void sumD_gpu_reduce_words(const vobj *lat, Integer osites, #ifdef GRID_REDUCTION_TIMING RealD t_pack = -usecond(); #endif - accelerator_for(ss, osites, 1, { - Bundle b; - for (int k = 0; k < R; k++) - b._internal[k] = idat[ss * words + base + k]; - buf_p[ss] = b; + constexpr int Nsimd = vobj::Nsimd(); + accelerator_for2d(k, R, ss, osites, Nsimd, { + coalescedWrite(buf_p[ss]._internal[k], coalescedRead(idat[ss * words + base + k])); }); #ifdef GRID_REDUCTION_TIMING t_pack += usecond();