mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-21 17:22:03 +01:00
Compare commits
7 Commits
09946cf1ba
...
3e448435d3
Author | SHA1 | Date | |
---|---|---|---|
3e448435d3 | |||
a294bc3c5b | |||
b302ad3d49 | |||
82fc4b1e94 | |||
b4f1740380 | |||
031f85247c | |||
639cc6f73a |
@ -68,7 +68,7 @@ public:
|
||||
///////////////////////
|
||||
// Interface
|
||||
///////////////////////
|
||||
GridBase * Grid(void) { return _FineGrid; }; // this is all the linalg routines need to know
|
||||
GridBase * Grid(void) { return _CoarseGrid; }; // this is all the linalg routines need to know
|
||||
GridBase * FineGrid(void) { return _FineGrid; }; // this is all the linalg routines need to know
|
||||
GridCartesian * CoarseGrid(void) { return _CoarseGrid; }; // this is all the linalg routines need to know
|
||||
|
||||
@ -238,7 +238,7 @@ public:
|
||||
std::cout << GridLogPerformance<<"Coarse total bytes "<< bytes/1e6<<" MB"<<std::endl;
|
||||
|
||||
};
|
||||
|
||||
|
||||
void PopulateAdag(void)
|
||||
{
|
||||
for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){
|
||||
@ -451,4 +451,5 @@ public:
|
||||
};
|
||||
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
185
Grid/algorithms/multigrid/GeneralCoarsenedMatrixMultiRHS.h
Normal file
185
Grid/algorithms/multigrid/GeneralCoarsenedMatrixMultiRHS.h
Normal file
@ -0,0 +1,185 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/GeneralCoarsenedMatrixMultiRHS.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pboyle@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// Fine Object == (per site) type of fine field
|
||||
// nbasis == number of deflation vectors
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class MultiGeneralCoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
|
||||
public:
|
||||
typedef typename CComplex::scalar_object SComplex;
|
||||
typedef GeneralCoarsenedMatrix<Fobj,CComplex,nbasis> GeneralCoarseOp;
|
||||
typedef MultiGeneralCoarsenedMatrix<Fobj,CComplex,nbasis> MultiGeneralCoarseOp;
|
||||
|
||||
typedef iVector<CComplex,nbasis > siteVector;
|
||||
typedef iMatrix<CComplex,nbasis > siteMatrix;
|
||||
typedef iMatrix<SComplex,nbasis > calcMatrix;
|
||||
typedef Lattice<iScalar<CComplex> > CoarseComplexField;
|
||||
typedef Lattice<siteVector> CoarseVector;
|
||||
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
|
||||
typedef iMatrix<CComplex,nbasis > Cobj;
|
||||
typedef iVector<CComplex,nbasis > Cvec;
|
||||
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj > FineField;
|
||||
typedef CoarseVector Field;
|
||||
|
||||
////////////////////
|
||||
// Data members
|
||||
////////////////////
|
||||
GridCartesian * _CoarseGridMulti;
|
||||
GridCartesian * _CoarseGrid;
|
||||
GeneralCoarseOp & _Op;
|
||||
NonLocalStencilGeometry geom;
|
||||
PaddedCell Cell;
|
||||
GeneralLocalStencil Stencil;
|
||||
|
||||
std::vector<deviceVector<calcMatrix> > _A;
|
||||
std::vector<CoarseVector> MultTemporaries;
|
||||
|
||||
///////////////////////
|
||||
// Interface
|
||||
///////////////////////
|
||||
GridBase * Grid(void) { return _CoarseGridMulti; }; // this is all the linalg routines need to know
|
||||
GridCartesian * CoarseGrid(void) { return _CoarseGridMulti; }; // this is all the linalg routines need to know
|
||||
|
||||
MultiGeneralCoarsenedMatrix(GeneralCoarseOp & Op,GridCartesian *CoarseGridMulti) :
|
||||
_Op(Op),
|
||||
_CoarseGrid(Op.CoarseGrid()),
|
||||
_CoarseGridMulti(CoarseGridMulti),
|
||||
geom(_CoarseGridMulti,Op.geom.hops,Op.geom.skip+1),
|
||||
Cell(Op.geom.Depth(),_CoarseGridMulti),
|
||||
Stencil(Cell.grids.back(),geom.shifts)
|
||||
{
|
||||
_A.resize(geom.npoint);
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
_A[p].resize(_CoarseGrid->lSites());
|
||||
}
|
||||
CopyMatrix();
|
||||
}
|
||||
void CopyMatrix (void)
|
||||
{
|
||||
// Clone "A" to be lexicographic in the physics coords
|
||||
// Use unvectorisetolexordarray
|
||||
// Copy to device
|
||||
std::vector<calcMatrix> tmp;
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
unvectorizeToLexOrdArray(tmp,_Op._A[p]);
|
||||
acceleratorCopyToDevice(&tmp[0],&_A[p][0],sizeof(calcMatrix)*tmp.size());
|
||||
}
|
||||
}
|
||||
void Mdag(const CoarseVector &in, CoarseVector &out)
|
||||
{
|
||||
this->M(in,out);
|
||||
}
|
||||
void M (const CoarseVector &in, CoarseVector &out)
|
||||
{
|
||||
conformable(CoarseGrid(),in.Grid());
|
||||
conformable(in.Grid(),out.Grid());
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
CoarseVector tin=in;
|
||||
|
||||
CoarseVector pin = Cell.ExchangePeriodic(tin);
|
||||
CoarseVector pout(pin.Grid());
|
||||
|
||||
int npoint = geom.npoint;
|
||||
typedef calcMatrix* Aview;
|
||||
typedef LatticeView<Cvec> Vview;
|
||||
|
||||
const int Nsimd = CComplex::Nsimd();
|
||||
|
||||
int64_t osites=pin.Grid()->oSites();
|
||||
int64_t nrhs =pin.Grid()->GlobalDimensions()[0]/Nsimd;
|
||||
|
||||
{
|
||||
autoView( in_v , pin, AcceleratorRead);
|
||||
autoView( out_v , pout, AcceleratorWriteDiscard);
|
||||
autoView( Stencil_v , Stencil, AcceleratorRead);
|
||||
|
||||
// Static and prereserve to keep UVM region live and not resized across multiple calls
|
||||
MultTemporaries.resize(npoint,pin.Grid());
|
||||
|
||||
std::vector<Aview> AcceleratorViewContainer_h;
|
||||
std::vector<Vview> AcceleratorVecViewContainer_h;
|
||||
|
||||
for(int p=0;p<npoint;p++) {
|
||||
AcceleratorViewContainer_h.push_back( &_A[p][0]);
|
||||
AcceleratorVecViewContainer_h.push_back(MultTemporaries[p].View(AcceleratorWrite));
|
||||
}
|
||||
|
||||
static deviceVector<Aview> AcceleratorViewContainer; AcceleratorViewContainer.resize(npoint);
|
||||
static deviceVector<Vview> AcceleratorVecViewContainer; AcceleratorVecViewContainer.resize(npoint);
|
||||
|
||||
auto Aview_p = &AcceleratorViewContainer[0];
|
||||
auto Vview_p = &AcceleratorVecViewContainer[0];
|
||||
|
||||
acceleratorCopyToDevice(&AcceleratorViewContainer_h[0],&AcceleratorViewContainer[0],npoint *sizeof(Aview));
|
||||
acceleratorCopyToDevice(&AcceleratorVecViewContainer_h[0],&AcceleratorVecViewContainer[0],npoint *sizeof(Vview));
|
||||
|
||||
accelerator_for(rspb, osites*nbasis*npoint, Nsimd, {
|
||||
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
|
||||
int32_t ss = rspb/(nbasis*npoint);
|
||||
int32_t bp = rspb%(nbasis*npoint);
|
||||
int32_t point= bp/nbasis;
|
||||
int32_t b = bp%nbasis;
|
||||
auto SE = Stencil_v.GetEntry(point,ss);
|
||||
if ( SE->_permute == 0 ) {
|
||||
int32_t snbr= SE->_offset;
|
||||
auto nbr = coalescedReadGeneralPermute(in_v[snbr],SE->_permute,Nd);
|
||||
auto res = Aview_p[point][ss](0,b)*nbr(0);
|
||||
for(int bb=1;bb<nbasis;bb++) {
|
||||
res = res + Aview_p[point][ss](bb,b)*nbr(bb);
|
||||
}
|
||||
coalescedWrite(Vview_p[point][ss](b),res);
|
||||
}
|
||||
});
|
||||
accelerator_for(sb, osites*nbasis, Nsimd, {
|
||||
int ss = sb/nbasis;
|
||||
int b = sb%nbasis;
|
||||
auto res = coalescedRead(Vview_p[0][ss](b));
|
||||
for(int point=1;point<npoint;point++){
|
||||
res = res + coalescedRead(Vview_p[point][ss](b));
|
||||
}
|
||||
coalescedWrite(out_v[ss](b),res);
|
||||
});
|
||||
for(int p=0;p<npoint;p++) {
|
||||
AcceleratorVecViewContainer_h[p].ViewClose();
|
||||
}
|
||||
}
|
||||
|
||||
out = Cell.Extract(pout);
|
||||
|
||||
};
|
||||
virtual void Mdiag (const Field &in, Field &out){ assert(0);};
|
||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);};
|
||||
virtual void MdirAll (const Field &in, std::vector<Field> &out){assert(0);};
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -104,7 +104,8 @@ public:
|
||||
/////////////////////////////////////////////////////////////////
|
||||
class NonLocalStencilGeometry {
|
||||
public:
|
||||
int depth;
|
||||
// int depth;
|
||||
int skip;
|
||||
int hops;
|
||||
int npoint;
|
||||
std::vector<Coordinate> shifts;
|
||||
@ -115,8 +116,7 @@ public:
|
||||
GridCartesian *Grid() {return grid;};
|
||||
int Depth(void){return 1;}; // Ghost zone depth
|
||||
int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil
|
||||
|
||||
virtual int DimSkip(void) =0;
|
||||
int DimSkip(void){return skip;};
|
||||
|
||||
virtual ~NonLocalStencilGeometry() {};
|
||||
|
||||
@ -156,7 +156,7 @@ public:
|
||||
std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<<std::endl;
|
||||
}
|
||||
|
||||
NonLocalStencilGeometry(GridCartesian *_coarse_grid,int _hops) : grid(_coarse_grid), hops(_hops)
|
||||
NonLocalStencilGeometry(GridCartesian *_coarse_grid,int _hops,int _skip) : grid(_coarse_grid), hops(_hops), skip(_skip)
|
||||
{
|
||||
Coordinate latt = grid->GlobalDimensions();
|
||||
stencil_size.resize(grid->Nd());
|
||||
@ -177,6 +177,7 @@ public:
|
||||
stencil_size[d]= 3;
|
||||
}
|
||||
}
|
||||
this->BuildShifts();
|
||||
};
|
||||
|
||||
};
|
||||
@ -184,14 +185,14 @@ public:
|
||||
// Need to worry about red-black now
|
||||
class NonLocalStencilGeometry4D : public NonLocalStencilGeometry {
|
||||
public:
|
||||
virtual int DimSkip(void) { return 0;};
|
||||
NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { };
|
||||
virtual int DerivedDimSkip(void) { return 0;};
|
||||
NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops,0) { };
|
||||
virtual ~NonLocalStencilGeometry4D() {};
|
||||
};
|
||||
class NonLocalStencilGeometry5D : public NonLocalStencilGeometry {
|
||||
public:
|
||||
virtual int DimSkip(void) { return 1; };
|
||||
NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { };
|
||||
virtual int DerivedDimSkip(void) { return 1; };
|
||||
NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops,1) { };
|
||||
virtual ~NonLocalStencilGeometry5D() {};
|
||||
};
|
||||
/*
|
||||
@ -201,42 +202,36 @@ class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometr
|
||||
public:
|
||||
NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,4)
|
||||
{
|
||||
this->BuildShifts();
|
||||
};
|
||||
};
|
||||
class NextToNextToNextToNearestStencilGeometry5D : public NonLocalStencilGeometry5D {
|
||||
public:
|
||||
NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,4)
|
||||
{
|
||||
this->BuildShifts();
|
||||
};
|
||||
};
|
||||
class NextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D {
|
||||
public:
|
||||
NextToNearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,2)
|
||||
{
|
||||
this->BuildShifts();
|
||||
};
|
||||
};
|
||||
class NextToNearestStencilGeometry5D : public NonLocalStencilGeometry5D {
|
||||
public:
|
||||
NextToNearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,2)
|
||||
{
|
||||
this->BuildShifts();
|
||||
};
|
||||
};
|
||||
class NearestStencilGeometry4D : public NonLocalStencilGeometry4D {
|
||||
public:
|
||||
NearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,1)
|
||||
{
|
||||
this->BuildShifts();
|
||||
};
|
||||
};
|
||||
class NearestStencilGeometry5D : public NonLocalStencilGeometry5D {
|
||||
public:
|
||||
NearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,1)
|
||||
{
|
||||
this->BuildShifts();
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -31,3 +31,4 @@ Author: Peter Boyle <pboyle@bnl.gov>
|
||||
#include <Grid/algorithms/multigrid/Geometry.h>
|
||||
#include <Grid/algorithms/multigrid/CoarsenedMatrix.h>
|
||||
#include <Grid/algorithms/multigrid/GeneralCoarsenedMatrix.h>
|
||||
#include <Grid/algorithms/multigrid/GeneralCoarsenedMatrixMultiRHS.h>
|
||||
|
@ -265,8 +265,8 @@ inline auto localInnerProductD(const Lattice<vobj> &lhs,const Lattice<vobj> &rhs
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj,class CComplex,int nbasis,class VLattice>
|
||||
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
const Lattice<vobj> &fineData,
|
||||
const VLattice &Basis)
|
||||
const Lattice<vobj> &fineData,
|
||||
const VLattice &Basis)
|
||||
{
|
||||
GridBase * fine = fineData.Grid();
|
||||
GridBase * coarse= coarseData.Grid();
|
||||
@ -300,6 +300,7 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
// std::cout << GridLogPerformance << " blockProject : conv : "<<t_co<<" us"<<std::endl;
|
||||
// std::cout << GridLogPerformance << " blockProject : blockZaxpy : "<<t_za<<" us"<<std::endl;
|
||||
}
|
||||
|
||||
// This only minimises data motion from CPU to GPU
|
||||
// there is chance of better implementation that does a vxk loop of inner products to data share
|
||||
// at the GPU thread level
|
||||
@ -1802,5 +1803,32 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MultiRHS interface support for coarse space
|
||||
// -- Simplest possible implementation to begin with
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj,class CComplex,int nbasis,class VLattice>
|
||||
inline void blockProjectMany(Lattice<iVector<CComplex,nbasis > > &coarseIP,
|
||||
Lattice<iVector<CComplex,nbasis > > &coarseTMP,
|
||||
const VLattice &fineData, // Basis and fineData necessarily same type
|
||||
const VLattice &Basis)
|
||||
{
|
||||
for(int r=0;r<fineData.size();r++){
|
||||
blockProject(coarseTMP,fineData[r],Basis);
|
||||
InsertSliceLocal(coarseTMP, coarseIP,r,r,0);
|
||||
}
|
||||
}
|
||||
template<class vobj,class CComplex,int nbasis,class VLattice>
|
||||
inline void blockPromoteMany(Lattice<iVector<CComplex,nbasis > > &coarseIP,
|
||||
Lattice<iVector<CComplex,nbasis > > &coarseTMP,
|
||||
const VLattice &fineData, // Basis and fineData necessarily same type
|
||||
const VLattice &Basis)
|
||||
{
|
||||
for(int r=0;r<fineData.size();r++){
|
||||
ExtractSliceLocal(coarseTMP, coarseIP,r,r,0);
|
||||
blockPromote(coarseTMP,fineData[r],Basis);
|
||||
}
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -234,8 +234,11 @@ public:
|
||||
}
|
||||
void DeleteGrids(void)
|
||||
{
|
||||
Coordinate processors=unpadded_grid->_processors;
|
||||
for(int d=0;d<grids.size();d++){
|
||||
delete grids[d];
|
||||
if ( processors[d] > 1 ) {
|
||||
delete grids[d];
|
||||
}
|
||||
}
|
||||
grids.resize(0);
|
||||
};
|
||||
|
@ -107,7 +107,7 @@ public:
|
||||
int rd = grid->_rdimensions[d];
|
||||
int ly = grid->_simd_layout[d];
|
||||
|
||||
assert((ly==1)||(ly==2));
|
||||
assert((ly==1)||(ly==2)||(ly==grid->Nsimd()));
|
||||
|
||||
int shift = (shifts[ii][d]+fd)%fd; // make it strictly positive 0.. L-1
|
||||
int x = Coor[d]; // x in [0... rd-1] as an oSite
|
||||
|
@ -538,6 +538,10 @@ void Grid_init(int *argc,char ***argv)
|
||||
|
||||
void Grid_finalize(void)
|
||||
{
|
||||
std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
|
||||
std::cout<<GridLogMessage<<"******* Grid Finalize ******"<<std::endl;
|
||||
std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
|
||||
|
||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT)
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
MPI_Finalize();
|
||||
|
@ -229,7 +229,36 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
|
||||
std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
|
||||
std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
|
||||
|
||||
|
||||
// Create a higher dim coarse grid
|
||||
const int nrhs=vComplex::Nsimd();
|
||||
|
||||
Coordinate mpi=GridDefaultMpi();
|
||||
Coordinate rhMpi ({1,1,mpi[0],mpi[1],mpi[2],mpi[3]});
|
||||
Coordinate rhLatt({nrhs,1,clatt[0],clatt[2],clatt[2],clatt[3]});
|
||||
Coordinate rhSimd({nrhs,1, 1,1,1,1});
|
||||
|
||||
GridCartesian *CoarseMrhs = new GridCartesian(rhLatt,rhSimd,rhMpi);
|
||||
|
||||
MultiGeneralCoarsenedMatrix mrhs(LittleDiracOp,CoarseMrhs);
|
||||
|
||||
{
|
||||
GridParallelRNG rh_CRNG(CoarseMrhs);rh_CRNG.SeedFixedIntegers(cseeds);
|
||||
CoarseVector rh_phi(CoarseMrhs);
|
||||
CoarseVector rh_res(CoarseMrhs);
|
||||
random(rh_CRNG,rh_phi);
|
||||
mrhs.M(rh_phi,rh_res);
|
||||
|
||||
for(int r=0;r<nrhs;r++){
|
||||
ExtractSlice(phi,rh_phi,r,0);
|
||||
ExtractSlice(chi,rh_res,r,0);
|
||||
LittleDiracOp.M(phi,Aphi);
|
||||
std::cout << r << " mrhs " << norm2(chi)<<std::endl;
|
||||
std::cout << r << " srhs " << norm2(Aphi)<<std::endl;
|
||||
chi=chi-Aphi;
|
||||
std::cout << r << " diff " << norm2(chi)<<std::endl;
|
||||
}
|
||||
}
|
||||
Grid_finalize();
|
||||
return 0;
|
||||
}
|
||||
|
Reference in New Issue
Block a user