mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Thread loops
This commit is contained in:
parent
7f99e1cd3b
commit
6b692aa726
@ -52,7 +52,7 @@ template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,con
|
|||||||
|
|
||||||
auto half_v = half.View();
|
auto half_v = half.View();
|
||||||
auto full_v = full.View();
|
auto full_v = full.View();
|
||||||
thread_loop( (int ss=0;ss<full.Grid()->oSites();ss++),{
|
thread_for(ss, full.Grid()->oSites(),{
|
||||||
int cbos;
|
int cbos;
|
||||||
Coordinate coor;
|
Coordinate coor;
|
||||||
full.Grid()->oCoorFromOindex(coor,ss);
|
full.Grid()->oCoorFromOindex(coor,ss);
|
||||||
@ -68,7 +68,8 @@ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Latti
|
|||||||
int cb = half.Checkerboard();
|
int cb = half.Checkerboard();
|
||||||
auto half_v = half.View();
|
auto half_v = half.View();
|
||||||
auto full_v = full.View();
|
auto full_v = full.View();
|
||||||
thread_loop( (int ss=0;ss<full.Grid()->oSites();ss++), {
|
thread_for(ss,full.Grid()->oSites(),{
|
||||||
|
|
||||||
Coordinate coor;
|
Coordinate coor;
|
||||||
int cbos;
|
int cbos;
|
||||||
|
|
||||||
@ -111,8 +112,7 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
auto fineData_ = fineData.View();
|
auto fineData_ = fineData.View();
|
||||||
auto coarseData_ = coarseData.View();
|
auto coarseData_ = coarseData.View();
|
||||||
// Loop over coars parallel, and then loop over fine associated with coarse.
|
// Loop over coars parallel, and then loop over fine associated with coarse.
|
||||||
thread_loop( (int sf=0;sf<fine->oSites();sf++),{
|
thread_for( sf, fine->oSites(), {
|
||||||
|
|
||||||
int sc;
|
int sc;
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
Coordinate coor_f(_ndimension);
|
Coordinate coor_f(_ndimension);
|
||||||
@ -160,7 +160,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
|||||||
auto fineY_ = fineY.View();
|
auto fineY_ = fineY.View();
|
||||||
auto coarseA_= coarseA.View();
|
auto coarseA_= coarseA.View();
|
||||||
|
|
||||||
thread_loop( (int sf=0;sf<fine->oSites();sf++),{
|
thread_for(sf, fine->oSites(), {
|
||||||
|
|
||||||
int sc;
|
int sc;
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
@ -196,7 +196,7 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
|
|||||||
|
|
||||||
fine_inner = localInnerProduct(fineX,fineY);
|
fine_inner = localInnerProduct(fineX,fineY);
|
||||||
blockSum(coarse_inner,fine_inner);
|
blockSum(coarse_inner,fine_inner);
|
||||||
thread_loop( (int ss=0;ss<coarse->oSites();ss++),{
|
thread_for(ss, coarse->oSites(),{
|
||||||
CoarseInner_[ss] = coarse_inner_[ss];
|
CoarseInner_[ss] = coarse_inner_[ss];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -233,7 +233,7 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
|||||||
auto coarseData_ = coarseData.View();
|
auto coarseData_ = coarseData.View();
|
||||||
auto fineData_ = fineData.View();
|
auto fineData_ = fineData.View();
|
||||||
|
|
||||||
thread_loop( (int sf=0;sf<fine->oSites();sf++),{
|
thread_for(sf,fine->oSites(),{
|
||||||
int sc;
|
int sc;
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
Coordinate coor_f(_ndimension);
|
Coordinate coor_f(_ndimension);
|
||||||
@ -321,7 +321,7 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
auto coarseData_ = coarseData.View();
|
auto coarseData_ = coarseData.View();
|
||||||
|
|
||||||
// Loop with a cache friendly loop ordering
|
// Loop with a cache friendly loop ordering
|
||||||
thread_loop( (int sf=0;sf<fine->oSites();sf++),{
|
thread_for(sf,fine->oSites(),{
|
||||||
int sc;
|
int sc;
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
Coordinate coor_f(_ndimension);
|
Coordinate coor_f(_ndimension);
|
||||||
@ -362,7 +362,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
|
|||||||
assert(ig->lSites() == og->lSites());
|
assert(ig->lSites() == og->lSites());
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_loop( (int idx=0;idx<ig->lSites();idx++),{
|
thread_for(idx, ig->lSites(),{
|
||||||
sobj s;
|
sobj s;
|
||||||
ssobj ss;
|
ssobj ss;
|
||||||
|
|
||||||
@ -400,7 +400,7 @@ void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice
|
|||||||
}
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
thread_loop( (int idx=0;idx<lg->lSites();idx++),{
|
thread_for(idx,lg->lSites(),{
|
||||||
sobj s;
|
sobj s;
|
||||||
Coordinate lcoor(nl);
|
Coordinate lcoor(nl);
|
||||||
Coordinate hcoor(nh);
|
Coordinate hcoor(nh);
|
||||||
@ -441,7 +441,7 @@ void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slic
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
thread_loop((int idx=0;idx<lg->lSites();idx++),{
|
thread_for(idx,lg->lSites(),{
|
||||||
sobj s;
|
sobj s;
|
||||||
Coordinate lcoor(nl);
|
Coordinate lcoor(nl);
|
||||||
Coordinate hcoor(nh);
|
Coordinate hcoor(nh);
|
||||||
@ -482,7 +482,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
|||||||
}
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
thread_loop( (int idx=0;idx<lg->lSites();idx++),{
|
thread_for(idx,lg->lSites(),{
|
||||||
sobj s;
|
sobj s;
|
||||||
Coordinate lcoor(nl);
|
Coordinate lcoor(nl);
|
||||||
Coordinate hcoor(nh);
|
Coordinate hcoor(nh);
|
||||||
@ -519,7 +519,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int
|
|||||||
}
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
thread_loop( (int idx=0;idx<lg->lSites();idx++),{
|
thread_for(idx,lg->lSites(),{
|
||||||
sobj s;
|
sobj s;
|
||||||
Coordinate lcoor(nl);
|
Coordinate lcoor(nl);
|
||||||
Coordinate hcoor(nh);
|
Coordinate hcoor(nh);
|
||||||
@ -593,7 +593,7 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
|||||||
|
|
||||||
//loop over outer index
|
//loop over outer index
|
||||||
auto in_v = in.View();
|
auto in_v = in.View();
|
||||||
thread_loop( (int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++),{
|
thread_for(in_oidx,in_grid->oSites(),{
|
||||||
//Assemble vector of pointers to output elements
|
//Assemble vector of pointers to output elements
|
||||||
ExtractPointerArray<sobj> out_ptrs(in_nsimd);
|
ExtractPointerArray<sobj> out_ptrs(in_nsimd);
|
||||||
|
|
||||||
@ -640,7 +640,7 @@ unvectorizeToRevLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
|||||||
in_grid->iCoorFromIindex(in_icoor[lane], lane);
|
in_grid->iCoorFromIindex(in_icoor[lane], lane);
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_loop( (int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++),{ //loop over outer index
|
thread_for(in_oidx, in_grid->oSites(),{
|
||||||
//Assemble vector of pointers to output elements
|
//Assemble vector of pointers to output elements
|
||||||
std::vector<sobj*> out_ptrs(in_nsimd);
|
std::vector<sobj*> out_ptrs(in_nsimd);
|
||||||
|
|
||||||
@ -686,7 +686,7 @@ vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
|||||||
grid->iCoorFromIindex(icoor[lane],lane);
|
grid->iCoorFromIindex(icoor[lane],lane);
|
||||||
}
|
}
|
||||||
auto out_v = out.View();
|
auto out_v = out.View();
|
||||||
thread_loop( (uint64_t oidx = 0; oidx < grid->oSites(); oidx++),{
|
thread_for(oidx, grid->oSites(),{
|
||||||
//Assemble vector of pointers to output elements
|
//Assemble vector of pointers to output elements
|
||||||
ExtractPointerArray<sobj> ptrs(nsimd);
|
ExtractPointerArray<sobj> ptrs(nsimd);
|
||||||
|
|
||||||
@ -733,7 +733,7 @@ vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
|||||||
grid->iCoorFromIindex(icoor[lane],lane);
|
grid->iCoorFromIindex(icoor[lane],lane);
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_loop( (uint64_t oidx = 0; oidx < grid->oSites(); oidx++),{ //loop over outer index
|
thread_for(oidx, grid->oSites(), {
|
||||||
//Assemble vector of pointers to output elements
|
//Assemble vector of pointers to output elements
|
||||||
std::vector<sobj*> ptrs(nsimd);
|
std::vector<sobj*> ptrs(nsimd);
|
||||||
|
|
||||||
@ -789,7 +789,7 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
|
|||||||
unvectorizeToLexOrdArray(in_slex_conv, in);
|
unvectorizeToLexOrdArray(in_slex_conv, in);
|
||||||
|
|
||||||
auto out_v = out.View();
|
auto out_v = out.View();
|
||||||
thread_loop( (uint64_t out_oidx=0;out_oidx<out_grid->oSites();out_oidx++),{
|
thread_for(out_oidx,out_grid->oSites(),{
|
||||||
Coordinate out_ocoor(ndim);
|
Coordinate out_ocoor(ndim);
|
||||||
out_grid->oCoorFromOindex(out_ocoor, out_oidx);
|
out_grid->oCoorFromOindex(out_ocoor, out_oidx);
|
||||||
|
|
||||||
@ -906,7 +906,7 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
|||||||
|
|
||||||
for(int v=0;v<nvector;v++){
|
for(int v=0;v<nvector;v++){
|
||||||
unvectorizeToLexOrdArray(scalardata,full[v]);
|
unvectorizeToLexOrdArray(scalardata,full[v]);
|
||||||
thread_loop( (int site=0;site<lsites;site++),{
|
thread_for(site,lsites,{
|
||||||
alldata[v*lsites+site] = scalardata[site];
|
alldata[v*lsites+site] = scalardata[site];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -938,7 +938,7 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
|||||||
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
|
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
|
||||||
|
|
||||||
// Loop over reordered data post A2A
|
// Loop over reordered data post A2A
|
||||||
thread_loop( (int c=0;c<chunk;c++),{
|
thread_for(c, chunk, {
|
||||||
Coordinate coor(ndim);
|
Coordinate coor(ndim);
|
||||||
for(int m=0;m<M;m++){
|
for(int m=0;m<M;m++){
|
||||||
for(int s=0;s<sP;s++){
|
for(int s=0;s<sP;s++){
|
||||||
@ -1057,7 +1057,7 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
|||||||
|
|
||||||
{
|
{
|
||||||
// Loop over reordered data post A2A
|
// Loop over reordered data post A2A
|
||||||
thread_loop( (int c=0;c<chunk;c++),{
|
thread_for(c, chunk,{
|
||||||
Coordinate coor(ndim);
|
Coordinate coor(ndim);
|
||||||
for(int m=0;m<M;m++){
|
for(int m=0;m<M;m++){
|
||||||
for(int s=0;s<sP;s++){
|
for(int s=0;s<sP;s++){
|
||||||
@ -1095,9 +1095,7 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
|||||||
|
|
||||||
lsites = full_grid->lSites();
|
lsites = full_grid->lSites();
|
||||||
for(int v=0;v<nvector;v++){
|
for(int v=0;v<nvector;v++){
|
||||||
// assert(v<full.size());
|
thread_for(site, lsites,{
|
||||||
thread_loop( (int site=0;site<lsites;site++),{
|
|
||||||
// assert(v*lsites+site < alldata.size());
|
|
||||||
scalardata[site] = alldata[v*lsites+site];
|
scalardata[site] = alldata[v*lsites+site];
|
||||||
});
|
});
|
||||||
vectorizeFromLexOrdArray(scalardata,full[v]);
|
vectorizeFromLexOrdArray(scalardata,full[v]);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user