mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Accelerator loop attempt at speed up
This commit is contained in:
parent
152b525a4d
commit
9e15474999
@ -106,6 +106,7 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||||
assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
|
assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
|
||||||
}
|
}
|
||||||
|
int blockVol = fine->oSites()/coarse->oSites();
|
||||||
|
|
||||||
coarseData=Zero();
|
coarseData=Zero();
|
||||||
|
|
||||||
@ -113,20 +114,26 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
auto coarseData_ = coarseData.View();
|
auto coarseData_ = coarseData.View();
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// To make this lock free, loop over coars parallel, and then loop over fine associated with coarse.
|
// To make this lock free, loop over coars parallel, and then loop over fine associated with coarse.
|
||||||
// Otherwise do finee inner product per site, and make the update atomic
|
// Otherwise do fine inner product per site, and make the update atomic
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
thread_for( sf, fine->oSites(), {
|
accelerator_for( sc, coarse->oSites(), {
|
||||||
int sc;
|
|
||||||
Coordinate coor_c(_ndimension);
|
|
||||||
Coordinate coor_f(_ndimension);
|
|
||||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
|
||||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
|
||||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
|
||||||
|
|
||||||
for(int i=0;i<nbasis;i++) {
|
Coordinate coor_c(_ndimension);
|
||||||
auto Basis_ = Basis[i].View();
|
Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
|
||||||
auto ip = innerProduct(Basis_[sf],fineData_[sf]);
|
coarseData_[sc]=Zero();
|
||||||
thread_critical {
|
|
||||||
|
for(int sb=0;sb<blockVol;sb++){
|
||||||
|
|
||||||
|
Coordinate coor_b(_ndimension);
|
||||||
|
Coordinate coor_f(_ndimension);
|
||||||
|
|
||||||
|
Lexicographic::CoorFromIndex(coor_b,sb,block_r);
|
||||||
|
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d]+coor_b[d];
|
||||||
|
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
|
||||||
|
|
||||||
|
for(int i=0;i<nbasis;i++) {
|
||||||
|
auto Basis_ = Basis[i].View();
|
||||||
|
auto ip = innerProduct(Basis_[sf],fineData_[sf]);
|
||||||
coarseData_[sc](i)=coarseData_[sc](i) + ip;
|
coarseData_[sc](i)=coarseData_[sc](i) + ip;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -230,23 +237,29 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
|||||||
for(int d=0 ; d<_ndimension;d++){
|
for(int d=0 ; d<_ndimension;d++){
|
||||||
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||||
}
|
}
|
||||||
|
int blockVol = fine->oSites()/coarse->oSites();
|
||||||
|
|
||||||
// Turn this around to loop threaded over sc and interior loop
|
// Turn this around to loop threaded over sc and interior loop
|
||||||
// over sf would thread better
|
// over sf would thread better
|
||||||
coarseData=Zero();
|
|
||||||
auto coarseData_ = coarseData.View();
|
auto coarseData_ = coarseData.View();
|
||||||
auto fineData_ = fineData.View();
|
auto fineData_ = fineData.View();
|
||||||
|
|
||||||
thread_for(sf,fine->oSites(),{
|
accelerator_for(sc,coarse->oSites(),1,{
|
||||||
int sc;
|
|
||||||
|
// One thread per sub block
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
Coordinate coor_f(_ndimension);
|
Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
|
||||||
|
coarseData_[sc]=Zero();
|
||||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
|
||||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
for(int sb=0;sb<blockVol;sb++){
|
||||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
|
||||||
|
int sf;
|
||||||
thread_critical {
|
Coordinate coor_b(_ndimension);
|
||||||
|
Coordinate coor_f(_ndimension);
|
||||||
|
Lexicographic::CoorFromIndex(coor_b,sb,block_r); // Block sub coordinate
|
||||||
|
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
|
||||||
|
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
|
||||||
|
|
||||||
coarseData_[sc]=coarseData_[sc]+fineData_[sf];
|
coarseData_[sc]=coarseData_[sc]+fineData_[sf];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user