diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 668ef4b4..f22b7001 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -471,13 +471,13 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) vobj zz = Zero(); - accelerator_for(sc,coarse->oSites(),1,{ + accelerator_for(sc,coarse->oSites(),vobj::Nsimd(),{ // One thread per sub block Coordinate coor_c(_ndimension); Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate - vobj cd = zz; + auto cd = coalescedRead(zz); for(int sb=0;sb &coarseData,const Lattice &fineData) for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); - cd=cd+fineData_p[sf]; + cd=cd+coalescedRead(fineData_p[sf]); } - coarseData_p[sc] = cd; + coalescedWrite(coarseData_p[sc],cd); }); return;