mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Read coalescing on Nvidia
This commit is contained in:
parent
fa856c9669
commit
1bd87c35d7
@ -196,7 +196,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
|||||||
auto fineY_ = fineY.View();
|
auto fineY_ = fineY.View();
|
||||||
auto coarseA_= coarseA.View();
|
auto coarseA_= coarseA.View();
|
||||||
|
|
||||||
accelerator_for(sf, fine->oSites(), 1, {
|
accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), {
|
||||||
|
|
||||||
int sc;
|
int sc;
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
@ -207,7 +207,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
|||||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||||
|
|
||||||
// z = A x + y
|
// z = A x + y
|
||||||
fineZ_[sf]=coarseA_[sc]*fineX_[sf]+fineY_[sf];
|
coalescedWrite(fineZ_[sf],coarseA_(sc)*fineX_(sf)+fineY_(sf));
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -397,8 +397,8 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
Lattice<CComplex> cip(coarse);
|
Lattice<CComplex> cip(coarse);
|
||||||
auto cip_ = cip.View();
|
auto cip_ = cip.View();
|
||||||
auto ip_ = ip.View();
|
auto ip_ = ip.View();
|
||||||
accelerator_for(sc,coarse->oSites(),1,{
|
accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{
|
||||||
cip_[sc] = ip_[sc]();
|
coalescedWrite(cip_[sc], ip_(sc)());
|
||||||
});
|
});
|
||||||
blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
|
blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user