mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-27 14:15:55 +01:00
Simple block project promote runs faster on GPU
This commit is contained in:
parent
9e15474999
commit
9aafd20468
@ -1,5 +1,4 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/lattice/Lattice_transfer.h
|
Source file: ./lib/lattice/Lattice_transfer.h
|
||||||
@ -83,12 +82,35 @@ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Latti
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class vobj,class CComplex,int nbasis>
|
template<class vobj,class CComplex,int nbasis>
|
||||||
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||||
|
const Lattice<vobj> &fineData,
|
||||||
|
const std::vector<Lattice<vobj> > &Basis)
|
||||||
|
{
|
||||||
|
GridBase * fine = fineData.Grid();
|
||||||
|
GridBase * coarse= coarseData.Grid();
|
||||||
|
|
||||||
|
Lattice<CComplex> ip(coarse);
|
||||||
|
|
||||||
|
// auto fineData_ = fineData.View();
|
||||||
|
auto coarseData_ = coarseData.View();
|
||||||
|
auto ip_ = ip.View();
|
||||||
|
for(int v=0;v<nbasis;v++) {
|
||||||
|
blockInnerProduct(ip,Basis[v],fineData);
|
||||||
|
accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), {
|
||||||
|
coalescedWrite(coarseData_[sc](v),ip_(sc));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class CComplex,int nbasis>
|
||||||
|
inline void blockProject1(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||||
const Lattice<vobj> &fineData,
|
const Lattice<vobj> &fineData,
|
||||||
const std::vector<Lattice<vobj> > &Basis)
|
const std::vector<Lattice<vobj> > &Basis)
|
||||||
{
|
{
|
||||||
|
typedef iVector<CComplex,nbasis > coarseSiteData;
|
||||||
|
coarseSiteData elide;
|
||||||
|
typedef decltype(coalescedRead(elide)) ScalarComplex;
|
||||||
GridBase * fine = fineData.Grid();
|
GridBase * fine = fineData.Grid();
|
||||||
GridBase * coarse= coarseData.Grid();
|
GridBase * coarse= coarseData.Grid();
|
||||||
int _ndimension = coarse->_ndimension;
|
int _ndimension = coarse->_ndimension;
|
||||||
@ -116,11 +138,17 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
// To make this lock free, loop over coars parallel, and then loop over fine associated with coarse.
|
// To make this lock free, loop over coars parallel, and then loop over fine associated with coarse.
|
||||||
// Otherwise do fine inner product per site, and make the update atomic
|
// Otherwise do fine inner product per site, and make the update atomic
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
accelerator_for( sc, coarse->oSites(), {
|
accelerator_for( sci, nbasis*coarse->oSites(), vobj::Nsimd(), {
|
||||||
|
|
||||||
|
auto sc=sci/nbasis;
|
||||||
|
auto i=sci%nbasis;
|
||||||
|
auto Basis_ = Basis[i].View();
|
||||||
|
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
|
Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
|
||||||
coarseData_[sc]=Zero();
|
|
||||||
|
int sf;
|
||||||
|
decltype(innerProduct(Basis_(sf),fineData_(sf))) reduce=Zero();
|
||||||
|
|
||||||
for(int sb=0;sb<blockVol;sb++){
|
for(int sb=0;sb<blockVol;sb++){
|
||||||
|
|
||||||
@ -131,12 +159,9 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d]+coor_b[d];
|
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d]+coor_b[d];
|
||||||
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
|
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
|
||||||
|
|
||||||
for(int i=0;i<nbasis;i++) {
|
reduce=reduce+innerProduct(Basis_(sf),fineData_(sf));
|
||||||
auto Basis_ = Basis[i].View();
|
|
||||||
auto ip = innerProduct(Basis_[sf],fineData_[sf]);
|
|
||||||
coarseData_[sc](i)=coarseData_[sc](i) + ip;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
coalescedWrite(coarseData_[sc](i),reduce);
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -313,6 +338,7 @@ inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> >
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
template<class vobj,class CComplex,int nbasis>
|
template<class vobj,class CComplex,int nbasis>
|
||||||
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||||
Lattice<vobj> &fineData,
|
Lattice<vobj> &fineData,
|
||||||
@ -349,13 +375,35 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
|
|
||||||
for(int i=0;i<nbasis;i++) {
|
for(int i=0;i<nbasis;i++) {
|
||||||
auto basis_ = Basis[i].View();
|
auto basis_ = Basis[i].View();
|
||||||
if(i==0) fineData_[sf]=coarseData_[sc](i) *basis_[sf];
|
if(i==0) fineData_[sf]=coarseData_[sc](i) *basis_[sf]);
|
||||||
else fineData_[sf]=fineData_[sf]+coarseData_[sc](i)*basis_[sf];
|
else fineData_[sf]=fineData_[sf]+coarseData_[sc](i)*basis_[sf]);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
template<class vobj,class CComplex,int nbasis>
|
||||||
|
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||||
|
Lattice<vobj> &fineData,
|
||||||
|
const std::vector<Lattice<vobj> > &Basis)
|
||||||
|
{
|
||||||
|
GridBase * fine = fineData.Grid();
|
||||||
|
GridBase * coarse= coarseData.Grid();
|
||||||
|
|
||||||
|
fineData=Zero();
|
||||||
|
for(int i=0;i<nbasis;i++) {
|
||||||
|
Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i);
|
||||||
|
Lattice<CComplex> cip(coarse);
|
||||||
|
auto cip_ = cip.View();
|
||||||
|
auto ip_ = ip.View();
|
||||||
|
accelerator_for(sc,coarse->oSites(),1,{
|
||||||
|
cip_[sc] = ip_[sc]();
|
||||||
|
});
|
||||||
|
blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Useful for precision conversion, or indeed anything where an operator= does a conversion on scalars.
|
// Useful for precision conversion, or indeed anything where an operator= does a conversion on scalars.
|
||||||
// Simd layouts need not match since we use peek/poke Local
|
// Simd layouts need not match since we use peek/poke Local
|
||||||
|
Loading…
x
Reference in New Issue
Block a user