diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index c1c3b542..9e4003b0 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -106,6 +106,7 @@ inline void blockProject(Lattice > &coarseData, block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]); } + int blockVol = fine->oSites()/coarse->oSites(); coarseData=Zero(); @@ -113,20 +114,26 @@ inline void blockProject(Lattice > &coarseData, auto coarseData_ = coarseData.View(); //////////////////////////////////////////////////////////////////////////////////////////////////////// // To make this lock free, loop over coars parallel, and then loop over fine associated with coarse. - // Otherwise do finee inner product per site, and make the update atomic + // Otherwise do fine inner product per site, and make the update atomic //////////////////////////////////////////////////////////////////////////////////////////////////////// - thread_for( sf, fine->oSites(), { - int sc; - Coordinate coor_c(_ndimension); - Coordinate coor_f(_ndimension); - Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); - for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; - Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + accelerator_for( sc, coarse->oSites(), { - for(int i=0;i_rdimensions); // Block coordinate + coarseData_[sc]=Zero(); + + for(int sb=0;sb_rdimensions); + + for(int i=0;i &coarseData,const Lattice &fineData) for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; } + int blockVol = fine->oSites()/coarse->oSites(); // Turn this around to loop threaded over sc and interior loop // over sf would thread better - coarseData=Zero(); auto coarseData_ = coarseData.View(); auto fineData_ = fineData.View(); - thread_for(sf,fine->oSites(),{ - int sc; + accelerator_for(sc,coarse->oSites(),1,{ + + // One thread per sub block Coordinate coor_c(_ndimension); - Coordinate coor_f(_ndimension); - - Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); - for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; - Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); - - thread_critical { + Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate + coarseData_[sc]=Zero(); + + for(int sb=0;sb_rdimensions); + coarseData_[sc]=coarseData_[sc]+fineData_[sf]; }