1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Red black friendly coarsening

This commit is contained in:
paboyle 2017-10-25 23:51:18 +01:00
parent b395a312af
commit 08583afaff

View File

@ -109,8 +109,8 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
coarseData=zero;
// Loop with a cache friendly loop ordering
for(int sf=0;sf<fine->oSites();sf++){
// Loop over coars parallel, and then loop over fine associated with coarse.
parallel_for(int sf=0;sf<fine->oSites();sf++){
int sc;
std::vector<int> coor_c(_ndimension);
@ -119,8 +119,9 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
PARALLEL_CRITICAL
for(int i=0;i<nbasis;i++) {
coarseData._odata[sc](i)=coarseData._odata[sc](i)
+ innerProduct(Basis[i]._odata[sf],fineData._odata[sf]);
@ -139,6 +140,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
GridBase * coarse= coarseA._grid;
fineZ.checkerboard=fineX.checkerboard;
assert(fineX.checkerboard==fineY.checkerboard);
subdivides(coarse,fine); // require they map
conformable(fineX,fineY);
conformable(fineX,fineZ);
@ -180,9 +182,10 @@ template<class vobj,class CComplex>
GridBase *coarse(CoarseInner._grid);
GridBase *fine (fineX._grid);
Lattice<dotp> fine_inner(fine);
Lattice<dotp> fine_inner(fine); fine_inner.checkerboard = fineX.checkerboard;
Lattice<dotp> coarse_inner(coarse);
// Precision promotion?
fine_inner = localInnerProduct(fineX,fineY);
blockSum(coarse_inner,fine_inner);
parallel_for(int ss=0;ss<coarse->oSites();ss++){
@ -193,7 +196,7 @@ template<class vobj,class CComplex>
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
{
GridBase *coarse = ip._grid;
Lattice<vobj> zz(fineX._grid); zz=zero;
Lattice<vobj> zz(fineX._grid); zz=zero; zz.checkerboard=fineX.checkerboard;
blockInnerProduct(ip,fineX,fineX);
ip = pow(ip,-0.5);
blockZAXPY(fineX,ip,fineX,zz);
@ -216,19 +219,25 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
}
// Turn this around to loop threaded over sc and interior loop
// over sf would thread better
coarseData=zero;
for(int sf=0;sf<fine->oSites();sf++){
parallel_region {
int sc;
std::vector<int> coor_c(_ndimension);
std::vector<int> coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
parallel_for_internal(int sf=0;sf<fine->oSites();sf++){
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
PARALLEL_CRITICAL
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
}
}
return;
}
@ -238,7 +247,7 @@ inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vob
{
GridBase * fine = unpicked._grid;
Lattice<vobj> zz(fine);
Lattice<vobj> zz(fine); zz.checkerboard = unpicked.checkerboard;
Lattice<iScalar<vInteger> > fcoor(fine);
zz = zero;
@ -303,20 +312,21 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
}
// Loop with a cache friendly loop ordering
for(int sf=0;sf<fine->oSites();sf++){
parallel_region {
int sc;
std::vector<int> coor_c(_ndimension);
std::vector<int> coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
for(int i=0;i<nbasis;i++) {
if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
parallel_for_internal(int sf=0;sf<fine->oSites();sf++){
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
for(int i=0;i<nbasis;i++) {
if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
}
}
}
return;