diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index b9594678..2fd187ff 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -442,6 +442,8 @@ public: for(int p=0; poSites()*nbasis, Nsimd, { @@ -453,7 +455,7 @@ public: StencilEntry *SE; for(int p=0;p AcceleratorViewContainer; for(int p=0;p_is_local) { @@ -754,7 +758,7 @@ public: StencilEntry *SE; for(int p=0;p_is_local) { diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 91de721f..2da78398 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -360,16 +360,22 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) autoView( coarseData_ , coarseData, AcceleratorWrite); autoView( fineData_ , fineData, AcceleratorRead); + auto coarseData_p = &coarseData_[0]; + auto fineData_p = &fineData_[0]; + Coordinate fine_rdimensions = fine->_rdimensions; Coordinate coarse_rdimensions = coarse->_rdimensions; + + vobj zz = Zero(); accelerator_for(sc,coarse->oSites(),1,{ // One thread per sub block Coordinate coor_c(_ndimension); Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate - coarseData_[sc]=Zero(); + vobj cd = zz; + for(int sb=0;sb &coarseData,const Lattice &fineData) for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); - coarseData_[sc]=coarseData_[sc]+fineData_[sf]; + cd=cd+fineData_p[sf]; } + coarseData_p[sc] = cd; + }); return; }