1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-07-07 08:57:06 +01:00

parallel_for elimination -> thread_loop

This commit is contained in:
paboyle
2018-01-28 01:01:14 +00:00
parent 9472b02771
commit 70e276e1ab
21 changed files with 269 additions and 300 deletions

View File

@ -48,7 +48,7 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
thread_loop( (int thr=0;thr<grid->SumArraySize();thr++),{
int mywork, myoff;
GridThread::GetWork(left.Grid()->oSites(),thr,mywork,myoff);
@ -57,7 +57,7 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
vnrm = vnrm + innerProductD(left[ss],right[ss]);
}
sumarray[thr]=TensorRemove(vnrm) ;
}
});
vector_type vvnrm; vvnrm=Zero(); // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
@ -104,7 +104,7 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
sumarray[i]=Zero();
}
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
thread_loop( (int thr=0;thr<grid->SumArraySize();thr++),{
int mywork, myoff;
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
@ -113,7 +113,7 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
vvsum = vvsum + arg[ss];
}
sumarray[thr]=vvsum;
}
});
vobj vsum=Zero(); // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
@ -172,8 +172,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
int stride=grid->_slice_stride[orthogdim];
// sum over reduced dimension planes, breaking out orthog dir
// Parallel over orthog direction
parallel_for(int r=0;r<rd;r++){
thread_loop( (int r=0;r<rd;r++),{
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
@ -183,7 +182,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
lvSum[r]=lvSum[r]+Data[ss];
}
}
}
});
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
@ -252,7 +251,7 @@ static void sliceInnerProductVector( std::vector<ComplexD> & result, const Latti
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
parallel_for(int r=0;r<rd;r++){
thread_loop( (int r=0;r<rd;r++),{
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
@ -263,7 +262,7 @@ static void sliceInnerProductVector( std::vector<ComplexD> & result, const Latti
lvSum[r]=lvSum[r]+vv;
}
}
}
});
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
@ -359,12 +358,12 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice
tensor_reduced at; at=av;
parallel_for_nest2(int n=0;n<e1;n++){
thread_loop_collapse(2, (int n=0;n<e1;n++),{
for(int b=0;b<e2;b++){
int ss= so+n*stride+b;
R[ss] = at*X[ss]+Y[ss];
}
}
});
}
};