1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Read coalescing on Nvidia

This commit is contained in:
Peter Boyle 2020-01-27 12:29:56 -05:00
parent fa856c9669
commit 1bd87c35d7

View File

@ -196,7 +196,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
auto fineY_ = fineY.View(); auto fineY_ = fineY.View();
auto coarseA_= coarseA.View(); auto coarseA_= coarseA.View();
accelerator_for(sf, fine->oSites(), 1, { accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), {
int sc; int sc;
Coordinate coor_c(_ndimension); Coordinate coor_c(_ndimension);
@ -207,7 +207,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
// z = A x + y // z = A x + y
fineZ_[sf]=coarseA_[sc]*fineX_[sf]+fineY_[sf]; coalescedWrite(fineZ_[sf],coarseA_(sc)*fineX_(sf)+fineY_(sf));
}); });
@ -397,8 +397,8 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
Lattice<CComplex> cip(coarse); Lattice<CComplex> cip(coarse);
auto cip_ = cip.View(); auto cip_ = cip.View();
auto ip_ = ip.View(); auto ip_ = ip.View();
accelerator_for(sc,coarse->oSites(),1,{ accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{
cip_[sc] = ip_[sc](); coalescedWrite(cip_[sc], ip_(sc)());
}); });
blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData); blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
} }