From 6b692aa726b4feb35810c6bbea824a21c992d839 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 15 Jun 2019 08:02:26 +0100 Subject: [PATCH] Thread loops --- Grid/lattice/Lattice_transfer.h | 46 ++++++++++++++++----------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 789fafbf..865a4b14 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -52,7 +52,7 @@ template inline void pickCheckerboard(int cb,Lattice &half,con auto half_v = half.View(); auto full_v = full.View(); - thread_loop( (int ss=0;ssoSites();ss++),{ + thread_for(ss, full.Grid()->oSites(),{ int cbos; Coordinate coor; full.Grid()->oCoorFromOindex(coor,ss); @@ -68,7 +68,8 @@ template inline void setCheckerboard(Lattice &full,const Latti int cb = half.Checkerboard(); auto half_v = half.View(); auto full_v = full.View(); - thread_loop( (int ss=0;ssoSites();ss++), { + thread_for(ss,full.Grid()->oSites(),{ + Coordinate coor; int cbos; @@ -111,8 +112,7 @@ inline void blockProject(Lattice > &coarseData, auto fineData_ = fineData.View(); auto coarseData_ = coarseData.View(); // Loop over coars parallel, and then loop over fine associated with coarse. - thread_loop( (int sf=0;sfoSites();sf++),{ - + thread_for( sf, fine->oSites(), { int sc; Coordinate coor_c(_ndimension); Coordinate coor_f(_ndimension); @@ -160,7 +160,7 @@ inline void blockZAXPY(Lattice &fineZ, auto fineY_ = fineY.View(); auto coarseA_= coarseA.View(); - thread_loop( (int sf=0;sfoSites();sf++),{ + thread_for(sf, fine->oSites(), { int sc; Coordinate coor_c(_ndimension); @@ -196,7 +196,7 @@ inline void blockInnerProduct(Lattice &CoarseInner, fine_inner = localInnerProduct(fineX,fineY); blockSum(coarse_inner,fine_inner); - thread_loop( (int ss=0;ssoSites();ss++),{ + thread_for(ss, coarse->oSites(),{ CoarseInner_[ss] = coarse_inner_[ss]; }); } @@ -233,7 +233,7 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) auto coarseData_ = coarseData.View(); auto fineData_ = fineData.View(); - thread_loop( (int sf=0;sfoSites();sf++),{ + thread_for(sf,fine->oSites(),{ int sc; Coordinate coor_c(_ndimension); Coordinate coor_f(_ndimension); @@ -321,7 +321,7 @@ inline void blockPromote(const Lattice > &coarseData, auto coarseData_ = coarseData.View(); // Loop with a cache friendly loop ordering - thread_loop( (int sf=0;sfoSites();sf++),{ + thread_for(sf,fine->oSites(),{ int sc; Coordinate coor_c(_ndimension); Coordinate coor_f(_ndimension); @@ -362,7 +362,7 @@ void localConvert(const Lattice &in,Lattice &out) assert(ig->lSites() == og->lSites()); } - thread_loop( (int idx=0;idxlSites();idx++),{ + thread_for(idx, ig->lSites(),{ sobj s; ssobj ss; @@ -400,7 +400,7 @@ void InsertSlice(const Lattice &lowDim,Lattice & higherDim,int slice } // the above should guarantee that the operations are local - thread_loop( (int idx=0;idxlSites();idx++),{ + thread_for(idx,lg->lSites(),{ sobj s; Coordinate lcoor(nl); Coordinate hcoor(nh); @@ -441,7 +441,7 @@ void ExtractSlice(Lattice &lowDim,const Lattice & higherDim,int slic } } // the above should guarantee that the operations are local - thread_loop((int idx=0;idxlSites();idx++),{ + thread_for(idx,lg->lSites(),{ sobj s; Coordinate lcoor(nl); Coordinate hcoor(nh); @@ -482,7 +482,7 @@ void InsertSliceLocal(const Lattice &lowDim, Lattice & higherDim,int } // the above should guarantee that the operations are local - thread_loop( (int idx=0;idxlSites();idx++),{ + thread_for(idx,lg->lSites(),{ sobj s; Coordinate lcoor(nl); Coordinate hcoor(nh); @@ -519,7 +519,7 @@ void ExtractSliceLocal(Lattice &lowDim,const Lattice & higherDim,int } // the above should guarantee that the operations are local - thread_loop( (int idx=0;idxlSites();idx++),{ + thread_for(idx,lg->lSites(),{ sobj s; Coordinate lcoor(nl); Coordinate hcoor(nh); @@ -593,7 +593,7 @@ unvectorizeToLexOrdArray(std::vector &out, const Lattice &in) //loop over outer index auto in_v = in.View(); - thread_loop( (int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++),{ + thread_for(in_oidx,in_grid->oSites(),{ //Assemble vector of pointers to output elements ExtractPointerArray out_ptrs(in_nsimd); @@ -640,7 +640,7 @@ unvectorizeToRevLexOrdArray(std::vector &out, const Lattice &in) in_grid->iCoorFromIindex(in_icoor[lane], lane); } - thread_loop( (int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++),{ //loop over outer index + thread_for(in_oidx, in_grid->oSites(),{ //Assemble vector of pointers to output elements std::vector out_ptrs(in_nsimd); @@ -686,7 +686,7 @@ vectorizeFromLexOrdArray( std::vector &in, Lattice &out) grid->iCoorFromIindex(icoor[lane],lane); } auto out_v = out.View(); - thread_loop( (uint64_t oidx = 0; oidx < grid->oSites(); oidx++),{ + thread_for(oidx, grid->oSites(),{ //Assemble vector of pointers to output elements ExtractPointerArray ptrs(nsimd); @@ -733,7 +733,7 @@ vectorizeFromRevLexOrdArray( std::vector &in, Lattice &out) grid->iCoorFromIindex(icoor[lane],lane); } - thread_loop( (uint64_t oidx = 0; oidx < grid->oSites(); oidx++),{ //loop over outer index + thread_for(oidx, grid->oSites(), { //Assemble vector of pointers to output elements std::vector ptrs(nsimd); @@ -789,7 +789,7 @@ void precisionChange(Lattice &out, const Lattice &in) unvectorizeToLexOrdArray(in_slex_conv, in); auto out_v = out.View(); - thread_loop( (uint64_t out_oidx=0;out_oidxoSites();out_oidx++),{ + thread_for(out_oidx,out_grid->oSites(),{ Coordinate out_ocoor(ndim); out_grid->oCoorFromOindex(out_ocoor, out_oidx); @@ -906,7 +906,7 @@ void Grid_split(std::vector > & full,Lattice & split) for(int v=0;v > & full,Lattice & split) int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol); // Loop over reordered data post A2A - thread_loop( (int c=0;c > & full,Lattice & split) { // Loop over reordered data post A2A - thread_loop( (int c=0;c > & full,Lattice & split) lsites = full_grid->lSites(); for(int v=0;v