From 59b9d0e0300c3f083a8970efe36858c2f28fe606 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 5 Oct 2023 16:54:48 -0400 Subject: [PATCH] coalesceRead the blockSum --- Grid/lattice/Lattice_transfer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 668ef4b4..f22b7001 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -471,13 +471,13 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) vobj zz = Zero(); - accelerator_for(sc,coarse->oSites(),1,{ + accelerator_for(sc,coarse->oSites(),vobj::Nsimd(),{ // One thread per sub block Coordinate coor_c(_ndimension); Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate - vobj cd = zz; + auto cd = coalescedRead(zz); for(int sb=0;sb &coarseData,const Lattice &fineData) for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); - cd=cd+fineData_p[sf]; + cd=cd+coalescedRead(fineData_p[sf]); } - coarseData_p[sc] = cd; + coalescedWrite(coarseData_p[sc],cd); }); return;