1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Read coalescing on Nvidia

This commit is contained in:
Peter Boyle 2020-01-27 12:29:56 -05:00
parent fa856c9669
commit 1bd87c35d7

View File

@ -196,7 +196,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
auto fineY_ = fineY.View();
auto coarseA_= coarseA.View();
accelerator_for(sf, fine->oSites(), 1, {
accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), {
int sc;
Coordinate coor_c(_ndimension);
@ -207,7 +207,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
// z = A x + y
fineZ_[sf]=coarseA_[sc]*fineX_[sf]+fineY_[sf];
coalescedWrite(fineZ_[sf],coarseA_(sc)*fineX_(sf)+fineY_(sf));
});
@ -397,8 +397,8 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
Lattice<CComplex> cip(coarse);
auto cip_ = cip.View();
auto ip_ = ip.View();
accelerator_for(sc,coarse->oSites(),1,{
cip_[sc] = ip_[sc]();
accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{
coalescedWrite(cip_[sc], ip_(sc)());
});
blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
}