mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Red black friendly coarsening
This commit is contained in:
parent
b395a312af
commit
08583afaff
@ -109,8 +109,8 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
|
|
||||||
coarseData=zero;
|
coarseData=zero;
|
||||||
|
|
||||||
// Loop with a cache friendly loop ordering
|
// Loop over coars parallel, and then loop over fine associated with coarse.
|
||||||
for(int sf=0;sf<fine->oSites();sf++){
|
parallel_for(int sf=0;sf<fine->oSites();sf++){
|
||||||
|
|
||||||
int sc;
|
int sc;
|
||||||
std::vector<int> coor_c(_ndimension);
|
std::vector<int> coor_c(_ndimension);
|
||||||
@ -119,8 +119,9 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||||
|
|
||||||
|
PARALLEL_CRITICAL
|
||||||
for(int i=0;i<nbasis;i++) {
|
for(int i=0;i<nbasis;i++) {
|
||||||
|
|
||||||
coarseData._odata[sc](i)=coarseData._odata[sc](i)
|
coarseData._odata[sc](i)=coarseData._odata[sc](i)
|
||||||
+ innerProduct(Basis[i]._odata[sf],fineData._odata[sf]);
|
+ innerProduct(Basis[i]._odata[sf],fineData._odata[sf]);
|
||||||
|
|
||||||
@ -139,6 +140,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
|||||||
GridBase * coarse= coarseA._grid;
|
GridBase * coarse= coarseA._grid;
|
||||||
|
|
||||||
fineZ.checkerboard=fineX.checkerboard;
|
fineZ.checkerboard=fineX.checkerboard;
|
||||||
|
assert(fineX.checkerboard==fineY.checkerboard);
|
||||||
subdivides(coarse,fine); // require they map
|
subdivides(coarse,fine); // require they map
|
||||||
conformable(fineX,fineY);
|
conformable(fineX,fineY);
|
||||||
conformable(fineX,fineZ);
|
conformable(fineX,fineZ);
|
||||||
@ -180,9 +182,10 @@ template<class vobj,class CComplex>
|
|||||||
GridBase *coarse(CoarseInner._grid);
|
GridBase *coarse(CoarseInner._grid);
|
||||||
GridBase *fine (fineX._grid);
|
GridBase *fine (fineX._grid);
|
||||||
|
|
||||||
Lattice<dotp> fine_inner(fine);
|
Lattice<dotp> fine_inner(fine); fine_inner.checkerboard = fineX.checkerboard;
|
||||||
Lattice<dotp> coarse_inner(coarse);
|
Lattice<dotp> coarse_inner(coarse);
|
||||||
|
|
||||||
|
// Precision promotion?
|
||||||
fine_inner = localInnerProduct(fineX,fineY);
|
fine_inner = localInnerProduct(fineX,fineY);
|
||||||
blockSum(coarse_inner,fine_inner);
|
blockSum(coarse_inner,fine_inner);
|
||||||
parallel_for(int ss=0;ss<coarse->oSites();ss++){
|
parallel_for(int ss=0;ss<coarse->oSites();ss++){
|
||||||
@ -193,7 +196,7 @@ template<class vobj,class CComplex>
|
|||||||
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
|
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
|
||||||
{
|
{
|
||||||
GridBase *coarse = ip._grid;
|
GridBase *coarse = ip._grid;
|
||||||
Lattice<vobj> zz(fineX._grid); zz=zero;
|
Lattice<vobj> zz(fineX._grid); zz=zero; zz.checkerboard=fineX.checkerboard;
|
||||||
blockInnerProduct(ip,fineX,fineX);
|
blockInnerProduct(ip,fineX,fineX);
|
||||||
ip = pow(ip,-0.5);
|
ip = pow(ip,-0.5);
|
||||||
blockZAXPY(fineX,ip,fineX,zz);
|
blockZAXPY(fineX,ip,fineX,zz);
|
||||||
@ -216,19 +219,25 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
|||||||
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Turn this around to loop threaded over sc and interior loop
|
||||||
|
// over sf would thread better
|
||||||
coarseData=zero;
|
coarseData=zero;
|
||||||
for(int sf=0;sf<fine->oSites();sf++){
|
parallel_region {
|
||||||
|
|
||||||
int sc;
|
int sc;
|
||||||
std::vector<int> coor_c(_ndimension);
|
std::vector<int> coor_c(_ndimension);
|
||||||
std::vector<int> coor_f(_ndimension);
|
std::vector<int> coor_f(_ndimension);
|
||||||
|
|
||||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
parallel_for_internal(int sf=0;sf<fine->oSites();sf++){
|
||||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
|
||||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||||
|
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||||
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
|
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||||
|
|
||||||
|
PARALLEL_CRITICAL
|
||||||
|
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -238,7 +247,7 @@ inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vob
|
|||||||
{
|
{
|
||||||
GridBase * fine = unpicked._grid;
|
GridBase * fine = unpicked._grid;
|
||||||
|
|
||||||
Lattice<vobj> zz(fine);
|
Lattice<vobj> zz(fine); zz.checkerboard = unpicked.checkerboard;
|
||||||
Lattice<iScalar<vInteger> > fcoor(fine);
|
Lattice<iScalar<vInteger> > fcoor(fine);
|
||||||
|
|
||||||
zz = zero;
|
zz = zero;
|
||||||
@ -303,20 +312,21 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Loop with a cache friendly loop ordering
|
// Loop with a cache friendly loop ordering
|
||||||
for(int sf=0;sf<fine->oSites();sf++){
|
parallel_region {
|
||||||
|
|
||||||
int sc;
|
int sc;
|
||||||
std::vector<int> coor_c(_ndimension);
|
std::vector<int> coor_c(_ndimension);
|
||||||
std::vector<int> coor_f(_ndimension);
|
std::vector<int> coor_f(_ndimension);
|
||||||
|
|
||||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
parallel_for_internal(int sf=0;sf<fine->oSites();sf++){
|
||||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
|
||||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
|
||||||
|
|
||||||
for(int i=0;i<nbasis;i++) {
|
|
||||||
if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
|
|
||||||
else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
|
|
||||||
|
|
||||||
|
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||||
|
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||||
|
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||||
|
|
||||||
|
for(int i=0;i<nbasis;i++) {
|
||||||
|
if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
|
||||||
|
else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user