mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
multRHS initial support -- needs optimisation for multi project/promote.
Bug fix in freeing intermediate grids to stop double free
This commit is contained in:
parent
639cc6f73a
commit
031f85247c
@ -265,8 +265,8 @@ inline auto localInnerProductD(const Lattice<vobj> &lhs,const Lattice<vobj> &rhs
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj,class CComplex,int nbasis,class VLattice>
|
||||
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
const Lattice<vobj> &fineData,
|
||||
const VLattice &Basis)
|
||||
const Lattice<vobj> &fineData,
|
||||
const VLattice &Basis)
|
||||
{
|
||||
GridBase * fine = fineData.Grid();
|
||||
GridBase * coarse= coarseData.Grid();
|
||||
@ -300,6 +300,7 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
// std::cout << GridLogPerformance << " blockProject : conv : "<<t_co<<" us"<<std::endl;
|
||||
// std::cout << GridLogPerformance << " blockProject : blockZaxpy : "<<t_za<<" us"<<std::endl;
|
||||
}
|
||||
|
||||
// This only minimises data motion from CPU to GPU
|
||||
// there is chance of better implementation that does a vxk loop of inner products to data share
|
||||
// at the GPU thread level
|
||||
@ -1802,5 +1803,32 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MultiRHS interface support for coarse space
|
||||
// -- Simplest possible implementation to begin with
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj,class CComplex,int nbasis,class VLattice>
|
||||
inline void blockProjectMany(Lattice<iVector<CComplex,nbasis > > &coarseIP,
|
||||
Lattice<iVector<CComplex,nbasis > > &coarseTMP,
|
||||
const VLattice &fineData, // Basis and fineData necessarily same type
|
||||
const VLattice &Basis)
|
||||
{
|
||||
for(int r=0;r<fineData.size();r++){
|
||||
blockProject(coarseTMP,fineData[r],Basis);
|
||||
InsertSliceLocal(coarseTMP, coarseIP,r,r,0);
|
||||
}
|
||||
}
|
||||
template<class vobj,class CComplex,int nbasis,class VLattice>
|
||||
inline void blockPromoteMany(Lattice<iVector<CComplex,nbasis > > &coarseIP,
|
||||
Lattice<iVector<CComplex,nbasis > > &coarseTMP,
|
||||
const VLattice &fineData, // Basis and fineData necessarily same type
|
||||
const VLattice &Basis)
|
||||
{
|
||||
for(int r=0;r<fineData.size();r++){
|
||||
ExtractSliceLocal(coarseTMP, coarseIP,r,r,0);
|
||||
blockPromote(coarseTMP,fineData[r],Basis);
|
||||
}
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -234,8 +234,11 @@ public:
|
||||
}
|
||||
void DeleteGrids(void)
|
||||
{
|
||||
Coordinate processors=unpadded_grid->_processors;
|
||||
for(int d=0;d<grids.size();d++){
|
||||
delete grids[d];
|
||||
if ( processors[d] > 1 ) {
|
||||
delete grids[d];
|
||||
}
|
||||
}
|
||||
grids.resize(0);
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user