mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Red black friendly coarsening
This commit is contained in:
parent
b395a312af
commit
08583afaff
@ -109,8 +109,8 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
|
||||
coarseData=zero;
|
||||
|
||||
// Loop with a cache friendly loop ordering
|
||||
for(int sf=0;sf<fine->oSites();sf++){
|
||||
// Loop over coars parallel, and then loop over fine associated with coarse.
|
||||
parallel_for(int sf=0;sf<fine->oSites();sf++){
|
||||
|
||||
int sc;
|
||||
std::vector<int> coor_c(_ndimension);
|
||||
@ -119,8 +119,9 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
PARALLEL_CRITICAL
|
||||
for(int i=0;i<nbasis;i++) {
|
||||
|
||||
|
||||
coarseData._odata[sc](i)=coarseData._odata[sc](i)
|
||||
+ innerProduct(Basis[i]._odata[sf],fineData._odata[sf]);
|
||||
|
||||
@ -139,6 +140,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
||||
GridBase * coarse= coarseA._grid;
|
||||
|
||||
fineZ.checkerboard=fineX.checkerboard;
|
||||
assert(fineX.checkerboard==fineY.checkerboard);
|
||||
subdivides(coarse,fine); // require they map
|
||||
conformable(fineX,fineY);
|
||||
conformable(fineX,fineZ);
|
||||
@ -180,9 +182,10 @@ template<class vobj,class CComplex>
|
||||
GridBase *coarse(CoarseInner._grid);
|
||||
GridBase *fine (fineX._grid);
|
||||
|
||||
Lattice<dotp> fine_inner(fine);
|
||||
Lattice<dotp> fine_inner(fine); fine_inner.checkerboard = fineX.checkerboard;
|
||||
Lattice<dotp> coarse_inner(coarse);
|
||||
|
||||
// Precision promotion?
|
||||
fine_inner = localInnerProduct(fineX,fineY);
|
||||
blockSum(coarse_inner,fine_inner);
|
||||
parallel_for(int ss=0;ss<coarse->oSites();ss++){
|
||||
@ -193,7 +196,7 @@ template<class vobj,class CComplex>
|
||||
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
|
||||
{
|
||||
GridBase *coarse = ip._grid;
|
||||
Lattice<vobj> zz(fineX._grid); zz=zero;
|
||||
Lattice<vobj> zz(fineX._grid); zz=zero; zz.checkerboard=fineX.checkerboard;
|
||||
blockInnerProduct(ip,fineX,fineX);
|
||||
ip = pow(ip,-0.5);
|
||||
blockZAXPY(fineX,ip,fineX,zz);
|
||||
@ -216,19 +219,25 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
||||
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||
}
|
||||
|
||||
// Turn this around to loop threaded over sc and interior loop
|
||||
// over sf would thread better
|
||||
coarseData=zero;
|
||||
for(int sf=0;sf<fine->oSites();sf++){
|
||||
|
||||
parallel_region {
|
||||
|
||||
int sc;
|
||||
std::vector<int> coor_c(_ndimension);
|
||||
std::vector<int> coor_f(_ndimension);
|
||||
|
||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
|
||||
parallel_for_internal(int sf=0;sf<fine->oSites();sf++){
|
||||
|
||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
PARALLEL_CRITICAL
|
||||
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
|
||||
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -238,7 +247,7 @@ inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vob
|
||||
{
|
||||
GridBase * fine = unpicked._grid;
|
||||
|
||||
Lattice<vobj> zz(fine);
|
||||
Lattice<vobj> zz(fine); zz.checkerboard = unpicked.checkerboard;
|
||||
Lattice<iScalar<vInteger> > fcoor(fine);
|
||||
|
||||
zz = zero;
|
||||
@ -303,20 +312,21 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
}
|
||||
|
||||
// Loop with a cache friendly loop ordering
|
||||
for(int sf=0;sf<fine->oSites();sf++){
|
||||
|
||||
parallel_region {
|
||||
int sc;
|
||||
std::vector<int> coor_c(_ndimension);
|
||||
std::vector<int> coor_f(_ndimension);
|
||||
|
||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
for(int i=0;i<nbasis;i++) {
|
||||
if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
|
||||
else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
|
||||
parallel_for_internal(int sf=0;sf<fine->oSites();sf++){
|
||||
|
||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
for(int i=0;i<nbasis;i++) {
|
||||
if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
|
||||
else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
Loading…
x
Reference in New Issue
Block a user