diff --git a/Grid/algorithms/multigrid/Aggregates.h b/Grid/algorithms/multigrid/Aggregates.h new file mode 100644 index 00000000..9112b868 --- /dev/null +++ b/Grid/algorithms/multigrid/Aggregates.h @@ -0,0 +1,286 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/Aggregates.h + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +template +class Aggregation { +public: + typedef iVector siteVector; + typedef Lattice CoarseVector; + typedef Lattice > CoarseMatrix; + + typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + GridBase *CoarseGrid; + GridBase *FineGrid; + std::vector > subspace; + int checkerboard; + int Checkerboard(void){return checkerboard;} + Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) : + CoarseGrid(_CoarseGrid), + FineGrid(_FineGrid), + subspace(nbasis,_FineGrid), + checkerboard(_checkerboard) + { + }; + + void SaveState(std::string file) + { +#ifdef HAVE_LIME + emptyUserRecord record; + ScidacWriter WR(Grid()->IsBoss()); + WR.open(file); + for(int b=0;b &hermop,int nn=nbasis) + { + + RealD scale; + + ConjugateGradient CG(1.0e-2,100,false); + FineField noise(FineGrid); + FineField Mn(FineGrid); + + for(int b=0;b "< "< &hermop, + int nn, + double hi, + double lo, + int orderfilter, + int ordermin, + int orderstep, + double filterlo + ) { + + RealD scale; + + FineField noise(FineGrid); + FineField Mn(FineGrid); + FineField tmp(FineGrid); + + // New normalised noise + gaussian(RNG,noise); + scale = std::pow(norm2(noise),-0.5); + noise=noise*scale; + + std::cout << GridLogMessage<<" Chebyshev subspace pass-1 : ord "< "< Cheb(lo,hi,orderfilter); + Cheb(hermop,noise,Mn); + // normalise + scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; + subspace[b] = Mn; + hermop.Op(Mn,tmp); + std::cout< "<oSites(), Nsimd, { + coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); + coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss)); + }); + + // Possible more fine grained control is needed than a linear sweep, + // but huge productivity gain if this is simple algorithm and not a tunable + int m =1; + if ( n>=ordermin ) m=n-ordermin; + if ( (m%orderstep)==0 ) { + Mn=*Tnp; + scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; + subspace[b] = Mn; + hermop.Op(Mn,tmp); + std::cout< "< &hermop, + int nn, + double hi, + double lo, + int orderfilter + ) { + + RealD scale; + + FineField noise(FineGrid); + FineField Mn(FineGrid); + FineField tmp(FineGrid); + + // New normalised noise + std::cout << GridLogMessage<<" Chebyshev subspace pure noise : ord "< "< Cheb(lo,hi,orderfilter); + Cheb(hermop,noise,Mn); + // normalise + scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; + subspace[b] = Mn; + hermop.Op(Mn,tmp); + std::cout< "< +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H +#define GRID_ALGORITHM_COARSENED_MATRIX_H + +#include // needed for Dagger(Yes|No), Inverse(Yes|No) + +NAMESPACE_BEGIN(Grid); + +template +inline void blockMaskedInnerProduct(Lattice &CoarseInner, + const Lattice &FineMask, + const Lattice &fineX, + const Lattice &fineY) +{ + typedef decltype(innerProduct(vobj(),vobj())) dotp; + + GridBase *coarse(CoarseInner.Grid()); + GridBase *fine (fineX.Grid()); + + Lattice fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard(); + Lattice fine_inner_msk(fine); + + // Multiply could be fused with innerProduct + // Single block sum kernel could do both masks. + fine_inner = localInnerProduct(fineX,fineY); + mult(fine_inner_msk, fine_inner,FineMask); + blockSum(CoarseInner,fine_inner_msk); +} + +// Fine Object == (per site) type of fine field +// nbasis == number of deflation vectors +template +class CoarsenedMatrix : public CheckerBoardedSparseMatrixBase > > { +public: + + typedef iVector siteVector; + typedef Lattice CoarseComplexField; + typedef Lattice CoarseVector; + typedef Lattice > CoarseMatrix; + typedef iMatrix Cobj; + typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + typedef CoarseVector FermionField; + + // enrich interface, use default implementation as in FermionOperator /////// + void Dminus(CoarseVector const& in, CoarseVector& out) { out = in; } + void DminusDag(CoarseVector const& in, CoarseVector& out) { out = in; } + void ImportPhysicalFermionSource(CoarseVector const& input, CoarseVector& imported) { imported = input; } + void ImportUnphysicalFermion(CoarseVector const& input, CoarseVector& imported) { imported = input; } + void ExportPhysicalFermionSolution(CoarseVector const& solution, CoarseVector& exported) { exported = solution; }; + void ExportPhysicalFermionSource(CoarseVector const& solution, CoarseVector& exported) { exported = solution; }; + + //////////////////// + // Data members + //////////////////// + Geometry geom; + GridBase * _grid; + GridBase* _cbgrid; + int hermitian; + + CartesianStencil Stencil; + CartesianStencil StencilEven; + CartesianStencil StencilOdd; + + std::vector A; + std::vector Aeven; + std::vector Aodd; + + CoarseMatrix AselfInv; + CoarseMatrix AselfInvEven; + CoarseMatrix AselfInvOdd; + + Vector dag_factor; + + /////////////////////// + // Interface + /////////////////////// + GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know + GridBase * RedBlackGrid() { return _cbgrid; }; + + int ConstEE() { return 0; } + + void M (const CoarseVector &in, CoarseVector &out) + { + conformable(_grid,in.Grid()); + conformable(in.Grid(),out.Grid()); + out.Checkerboard() = in.Checkerboard(); + + SimpleCompressor compressor; + + Stencil.HaloExchange(in,compressor); + autoView( in_v , in, AcceleratorRead); + autoView( out_v , out, AcceleratorWrite); + autoView( Stencil_v , Stencil, AcceleratorRead); + int npoint = geom.npoint; + typedef LatticeView Aview; + + Vector AcceleratorViewContainer; + + for(int p=0;poSites(); + + accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { + int ss = sss/nbasis; + int b = sss%nbasis; + calcComplex res = Zero(); + calcVector nbr; + int ptype; + StencilEntry *SE; + + for(int point=0;point_is_local) { + nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); + } else { + nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); + } + acceleratorSynchronise(); + + for(int bb=0;bb compressor; + + Stencil.HaloExchange(in,compressor); + autoView( in_v , in, AcceleratorRead); + autoView( out_v , out, AcceleratorWrite); + autoView( Stencil_v , Stencil, AcceleratorRead); + int npoint = geom.npoint; + typedef LatticeView Aview; + + Vector AcceleratorViewContainer; + + for(int p=0;poSites(); + + Vector points(geom.npoint, 0); + for(int p=0; poSites()*nbasis, Nsimd, { + int ss = sss/nbasis; + int b = sss%nbasis; + calcComplex res = Zero(); + calcVector nbr; + int ptype; + StencilEntry *SE; + + for(int p=0;p_is_local) { + nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); + } else { + nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); + } + acceleratorSynchronise(); + + for(int bb=0;bb compressor; + Stencil.HaloExchange(in,compressor); + } + void MdirCalc(const CoarseVector &in, CoarseVector &out, int point) + { + conformable(_grid,in.Grid()); + conformable(_grid,out.Grid()); + out.Checkerboard() = in.Checkerboard(); + + typedef LatticeView Aview; + Vector AcceleratorViewContainer; + for(int p=0;poSites()*nbasis, Nsimd, { + int ss = sss/nbasis; + int b = sss%nbasis; + calcComplex res = Zero(); + calcVector nbr; + int ptype; + StencilEntry *SE; + + SE=Stencil_v.GetEntry(ptype,point,ss); + + if(SE->_is_local) { + nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); + } else { + nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); + } + acceleratorSynchronise(); + + for(int bb=0;bb &out) + { + this->MdirComms(in); + int ndir=geom.npoint-1; + if ((out.size()!=ndir)&&(out.size()!=ndir+1)) { + std::cout <<"MdirAll out size "<< out.size()<MdirComms(in); + + MdirCalc(in,out,geom.point(dir,disp)); + }; + + void Mdiag(const CoarseVector &in, CoarseVector &out) + { + int point=geom.npoint-1; + MdirCalc(in, out, point); // No comms + }; + + void Mooee(const CoarseVector &in, CoarseVector &out) { + MooeeInternal(in, out, DaggerNo, InverseNo); + } + + void MooeeInv(const CoarseVector &in, CoarseVector &out) { + MooeeInternal(in, out, DaggerNo, InverseYes); + } + + void MooeeDag(const CoarseVector &in, CoarseVector &out) { + MooeeInternal(in, out, DaggerYes, InverseNo); + } + + void MooeeInvDag(const CoarseVector &in, CoarseVector &out) { + MooeeInternal(in, out, DaggerYes, InverseYes); + } + + void Meooe(const CoarseVector &in, CoarseVector &out) { + if(in.Checkerboard() == Odd) { + DhopEO(in, out, DaggerNo); + } else { + DhopOE(in, out, DaggerNo); + } + } + + void MeooeDag(const CoarseVector &in, CoarseVector &out) { + if(in.Checkerboard() == Odd) { + DhopEO(in, out, DaggerYes); + } else { + DhopOE(in, out, DaggerYes); + } + } + + void Dhop(const CoarseVector &in, CoarseVector &out, int dag) { + conformable(in.Grid(), _grid); // verifies full grid + conformable(in.Grid(), out.Grid()); + + out.Checkerboard() = in.Checkerboard(); + + DhopInternal(Stencil, A, in, out, dag); + } + + void DhopOE(const CoarseVector &in, CoarseVector &out, int dag) { + conformable(in.Grid(), _cbgrid); // verifies half grid + conformable(in.Grid(), out.Grid()); // drops the cb check + + assert(in.Checkerboard() == Even); + out.Checkerboard() = Odd; + + DhopInternal(StencilEven, Aodd, in, out, dag); + } + + void DhopEO(const CoarseVector &in, CoarseVector &out, int dag) { + conformable(in.Grid(), _cbgrid); // verifies half grid + conformable(in.Grid(), out.Grid()); // drops the cb check + + assert(in.Checkerboard() == Odd); + out.Checkerboard() = Even; + + DhopInternal(StencilOdd, Aeven, in, out, dag); + } + + void MooeeInternal(const CoarseVector &in, CoarseVector &out, int dag, int inv) { + out.Checkerboard() = in.Checkerboard(); + assert(in.Checkerboard() == Odd || in.Checkerboard() == Even); + + CoarseMatrix *Aself = nullptr; + if(in.Grid()->_isCheckerBoarded) { + if(in.Checkerboard() == Odd) { + Aself = (inv) ? &AselfInvOdd : &Aodd[geom.npoint-1]; + DselfInternal(StencilOdd, *Aself, in, out, dag); + } else { + Aself = (inv) ? &AselfInvEven : &Aeven[geom.npoint-1]; + DselfInternal(StencilEven, *Aself, in, out, dag); + } + } else { + Aself = (inv) ? &AselfInv : &A[geom.npoint-1]; + DselfInternal(Stencil, *Aself, in, out, dag); + } + assert(Aself != nullptr); + } + + void DselfInternal(CartesianStencil &st, CoarseMatrix &a, + const CoarseVector &in, CoarseVector &out, int dag) { + int point = geom.npoint-1; + autoView( out_v, out, AcceleratorWrite); + autoView( in_v, in, AcceleratorRead); + autoView( st_v, st, AcceleratorRead); + autoView( a_v, a, AcceleratorRead); + + const int Nsimd = CComplex::Nsimd(); + typedef decltype(coalescedRead(in_v[0])) calcVector; + typedef decltype(coalescedRead(in_v[0](0))) calcComplex; + + RealD* dag_factor_p = &dag_factor[0]; + + if(dag) { + accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { + int ss = sss/nbasis; + int b = sss%nbasis; + calcComplex res = Zero(); + calcVector nbr; + int ptype; + StencilEntry *SE; + + SE=st_v.GetEntry(ptype,point,ss); + + if(SE->_is_local) { + nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); + } else { + nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); + } + acceleratorSynchronise(); + + for(int bb=0;bboSites()*nbasis, Nsimd, { + int ss = sss/nbasis; + int b = sss%nbasis; + calcComplex res = Zero(); + calcVector nbr; + int ptype; + StencilEntry *SE; + + SE=st_v.GetEntry(ptype,point,ss); + + if(SE->_is_local) { + nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); + } else { + nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); + } + acceleratorSynchronise(); + + for(int bb=0;bb &st, std::vector &a, + const CoarseVector &in, CoarseVector &out, int dag) { + SimpleCompressor compressor; + + st.HaloExchange(in,compressor); + autoView( in_v, in, AcceleratorRead); + autoView( out_v, out, AcceleratorWrite); + autoView( st_v , st, AcceleratorRead); + typedef LatticeView Aview; + + // determine in what order we need the points + int npoint = geom.npoint-1; + Vector points(npoint, 0); + for(int p=0; p AcceleratorViewContainer; + for(int p=0;poSites()*nbasis, Nsimd, { + int ss = sss/nbasis; + int b = sss%nbasis; + calcComplex res = Zero(); + calcVector nbr; + int ptype; + StencilEntry *SE; + + for(int p=0;p_is_local) { + nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); + } else { + nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); + } + acceleratorSynchronise(); + + for(int bb=0;bboSites()*nbasis, Nsimd, { + int ss = sss/nbasis; + int b = sss%nbasis; + calcComplex res = Zero(); + calcVector nbr; + int ptype; + StencilEntry *SE; + + for(int p=0;p_is_local) { + nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); + } else { + nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); + } + acceleratorSynchronise(); + + for(int bb=0;bb > &linop, + Aggregation & Subspace) + { + typedef Lattice FineComplexField; + typedef typename Fobj::scalar_type scalar_type; + + std::cout << GridLogMessage<< "CoarsenMatrix "<< std::endl; + + FineComplexField one(FineGrid); one=scalar_type(1.0,0.0); + FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0); + + std::vector masks(geom.npoint,FineGrid); + FineComplexField imask(FineGrid); // contributions from within this block + FineComplexField omask(FineGrid); // contributions from outwith this block + + FineComplexField evenmask(FineGrid); + FineComplexField oddmask(FineGrid); + + FineField phi(FineGrid); + FineField tmp(FineGrid); + FineField zz(FineGrid); zz=Zero(); + FineField Mphi(FineGrid); + FineField Mphie(FineGrid); + FineField Mphio(FineGrid); + std::vector Mphi_p(geom.npoint,FineGrid); + + Lattice > coor (FineGrid); + Lattice > bcoor(FineGrid); + Lattice > bcb (FineGrid); bcb = Zero(); + + CoarseVector iProj(Grid()); + CoarseVector oProj(Grid()); + CoarseVector SelfProj(Grid()); + CoarseComplexField iZProj(Grid()); + CoarseComplexField oZProj(Grid()); + + CoarseScalar InnerProd(Grid()); + + std::cout << GridLogMessage<< "CoarsenMatrix Orthog "<< std::endl; + // Orthogonalise the subblocks over the basis + blockOrthogonalise(InnerProd,Subspace.subspace); + + // Compute the matrix elements of linop between this orthonormal + // set of vectors. + std::cout << GridLogMessage<< "CoarsenMatrix masks "<< std::endl; + int self_stencil=-1; + for(int p=0;p_rdimensions[dir])/(Grid()->_rdimensions[dir]); + + LatticeCoordinate(coor,dir); + + /////////////////////////////////////////////////////// + // Work out even and odd block checkerboarding for fast diagonal term + /////////////////////////////////////////////////////// + if ( disp==1 ) { + bcb = bcb + div(coor,block); + } + + if ( disp==0 ) { + masks[p]= Zero(); + } else if ( disp==1 ) { + masks[p] = where(mod(coor,block)==(block-1),one,zero); + } else if ( disp==-1 ) { + masks[p] = where(mod(coor,block)==(Integer)0,one,zero); + } + } + evenmask = where(mod(bcb,2)==(Integer)0,one,zero); + oddmask = one-evenmask; + + assert(self_stencil!=-1); + + for(int i=0;ioSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); + if ( hermitian && (disp==-1) ) { + for(int pp=0;pp = * + int dirp = geom.directions[pp]; + int dispp = geom.displacements[pp]; + if ( (dirp==dir) && (dispp==1) ){ + auto sft = conjugate(Cshift(oZProj,dir,1)); + autoView( sft_v , sft , AcceleratorWrite); + autoView( A_pp , A[pp], AcceleratorWrite); + accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); }); + } + } + } + + } + } + } + + /////////////////////////////////////////// + // Faster alternate self coupling.. use hermiticity to save 2x + /////////////////////////////////////////// + { + mult(tmp,phi,evenmask); linop.Op(tmp,Mphie); + mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio); + + { + autoView( tmp_ , tmp, AcceleratorWrite); + autoView( evenmask_ , evenmask, AcceleratorRead); + autoView( oddmask_ , oddmask, AcceleratorRead); + autoView( Mphie_ , Mphie, AcceleratorRead); + autoView( Mphio_ , Mphio, AcceleratorRead); + accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{ + coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss)); + }); + } + + blockProject(SelfProj,tmp,Subspace.subspace); + + autoView( SelfProj_ , SelfProj, AcceleratorRead); + autoView( A_self , A[self_stencil], AcceleratorWrite); + + accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ + for(int j=0;jlSites(); + + typedef typename Cobj::scalar_object scalar_object; + + autoView(Aself_v, A[geom.npoint-1], CpuRead); + autoView(AselfInv_v, AselfInv, CpuWrite); + thread_for(site, localVolume, { // NOTE: Not able to bring this to GPU because of Eigen + peek/poke + Eigen::MatrixXcd selfLinkEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis); + Eigen::MatrixXcd selfLinkInvEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis); + + scalar_object selfLink = Zero(); + scalar_object selfLinkInv = Zero(); + + Coordinate lcoor; + + Grid()->LocalIndexToLocalCoor(site, lcoor); + peekLocalSite(selfLink, Aself_v, lcoor); + + for (int i = 0; i < nbasis; ++i) + for (int j = 0; j < nbasis; ++j) + selfLinkEigen(i, j) = static_cast(TensorRemove(selfLink(i, j))); + + selfLinkInvEigen = selfLinkEigen.inverse(); + + for(int i = 0; i < nbasis; ++i) + for(int j = 0; j < nbasis; ++j) + selfLinkInv(i, j) = selfLinkInvEigen(i, j); + + pokeLocalSite(selfLinkInv, AselfInv_v, lcoor); + }); + } + + void FillHalfCbs() { + std::cout << GridLogDebug << "CoarsenedMatrix::FillHalfCbs" << std::endl; + for(int p = 0; p < geom.npoint; ++p) { + pickCheckerboard(Even, Aeven[p], A[p]); + pickCheckerboard(Odd, Aodd[p], A[p]); + } + pickCheckerboard(Even, AselfInvEven, AselfInv); + pickCheckerboard(Odd, AselfInvOdd, AselfInv); + } +}; + +NAMESPACE_END(Grid); +#endif diff --git a/Grid/algorithms/multigrid/GeneralCoarsenedMatrix.h b/Grid/algorithms/multigrid/GeneralCoarsenedMatrix.h new file mode 100644 index 00000000..c15a2a63 --- /dev/null +++ b/Grid/algorithms/multigrid/GeneralCoarsenedMatrix.h @@ -0,0 +1,418 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +#include // needed for Dagger(Yes|No), Inverse(Yes|No) + +#include +#include + +NAMESPACE_BEGIN(Grid); + +// Fine Object == (per site) type of fine field +// nbasis == number of deflation vectors +template +class GeneralCoarsenedMatrix : public SparseMatrixBase > > { +public: + + typedef GeneralCoarsenedMatrix GeneralCoarseOp; + typedef iVector siteVector; + typedef iMatrix siteMatrix; + typedef Lattice > CoarseComplexField; + typedef Lattice CoarseVector; + typedef Lattice > CoarseMatrix; + typedef iMatrix Cobj; + typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + typedef CoarseVector Field; + //////////////////// + // Data members + //////////////////// + int hermitian; + GridBase * _FineGrid; + GridCartesian * _CoarseGrid; + NonLocalStencilGeometry &geom; + PaddedCell Cell; + GeneralLocalStencil Stencil; + + std::vector _A; + std::vector _Adag; + + /////////////////////// + // Interface + /////////////////////// + GridBase * Grid(void) { return _FineGrid; }; // this is all the linalg routines need to know + GridBase * FineGrid(void) { return _FineGrid; }; // this is all the linalg routines need to know + GridCartesian * CoarseGrid(void) { return _CoarseGrid; }; // this is all the linalg routines need to know + + void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe) + { + int nfound=0; + std::cout << GridLogMessage <<"GeneralCoarsenedMatrix::ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<oSites(); + for(int ss=0;ss_offset; + assert( o< osites); + } + } + } + + _A.resize(geom.npoint,CoarseGrid); + _Adag.resize(geom.npoint,CoarseGrid); + } + void M (const CoarseVector &in, CoarseVector &out) + { + Mult(_A,in,out); + } + void Mdag (const CoarseVector &in, CoarseVector &out) + { + if ( hermitian ) M(in,out); + else Mult(_Adag,in,out); + } + void Mult (std::vector &A,const CoarseVector &in, CoarseVector &out) + { + RealD tviews=0; + RealD ttot=0; + RealD tmult=0; + RealD texch=0; + RealD text=0; + ttot=-usecond(); + conformable(CoarseGrid(),in.Grid()); + conformable(in.Grid(),out.Grid()); + out.Checkerboard() = in.Checkerboard(); + CoarseVector tin=in; + + texch-=usecond(); + CoarseVector pin = Cell.Exchange(tin); + texch+=usecond(); + + CoarseVector pout(pin.Grid()); pout=Zero(); + + int npoint = geom.npoint; + typedef LatticeView Aview; + + const int Nsimd = CComplex::Nsimd(); + + int osites=pin.Grid()->oSites(); + // int gsites=pin.Grid()->gSites(); + + RealD flops = 1.0* npoint * nbasis * nbasis * 8 * osites; + RealD bytes = (1.0*osites*sizeof(siteMatrix)*npoint+2.0*osites*sizeof(siteVector))*npoint; + + // for(int point=0;point_offset],SE->_permute,Nd); + auto res = out_v(ss)(b); + for(int bb=0;bbgSites() ;bidx++){ + Coordinate bcoor; + CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor); + + for(int p=0;pGlobalDimensions()[mu]; + scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic + } + // Flip to poke/peekLocalSite and not too bad + auto link = peekSite(_A[p],scoor); + int pp = geom.Reverse(p); + pokeSite(adj(link),_Adag[pp],bcoor); + } + } + } + ///////////////////////////////////////////////////////////// + // + // A) Only reduced flops option is to use a padded cell of depth 4 + // and apply MpcDagMpc in the padded cell. + // + // Makes for ONE application of MpcDagMpc per vector instead of 30 or 80. + // With the effective cell size around (B+8)^4 perhaps 12^4/4^4 ratio + // Cost is 81x more, same as stencil size. + // + // But: can eliminate comms and do as local dirichlet. + // + // Local exchange gauge field once. + // Apply to all vectors, local only computation. + // Must exchange ghost subcells in reverse process of PaddedCell to take inner products + // + // B) Can reduce cost: pad by 1, apply Deo (4^4+6^4+8^4+8^4 )/ (4x 4^4) + // pad by 2, apply Doe + // pad by 3, apply Deo + // then break out 8x directions; cost is ~10x MpcDagMpc per vector + // + // => almost factor of 10 in setup cost, excluding data rearrangement + // + // Intermediates -- ignore the corner terms, leave approximate and force Hermitian + // Intermediates -- pad by 2 and apply 1+8+24 = 33 times. + ///////////////////////////////////////////////////////////// + + ////////////////////////////////////////////////////////// + // BFM HDCG style approach: Solve a system of equations to get Aij + ////////////////////////////////////////////////////////// + /* + * Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM. + * + * conj(phases[block]) proj[k][ block*Nvec+j ] = \sum_ball e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} > + * = \sum_ball e^{iqk.delta} A_ji + * + * Must invert matrix M_k,l = e^[i q_k . delta_l] + * + * Where q_k = delta_k . (2*M_PI/global_nb[mu]) + */ + void CoarsenOperator(LinearOperatorBase > &linop, + Aggregation & Subspace) + { + std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl; + GridBase *grid = FineGrid(); + + RealD tproj=0.0; + RealD teigen=0.0; + RealD tmat=0.0; + RealD tphase=0.0; + RealD tinv=0.0; + + ///////////////////////////////////////////////////////////// + // Orthogonalise the subblocks over the basis + ///////////////////////////////////////////////////////////// + CoarseScalar InnerProd(CoarseGrid()); + blockOrthogonalise(InnerProd,Subspace.subspace); + + const int npoint = geom.npoint; + + Coordinate clatt = CoarseGrid()->GlobalDimensions(); + int Nd = CoarseGrid()->Nd(); + + /* + * Here, k,l index which possible momentum/shift within the N-points connected by MdagM. + * Matrix index i is mapped to this shift via + * geom.shifts[i] + * + * conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block] + * = \sum_{l in ball} e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} > + * = \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} + * = M_{kl} A_ji^{b.b+l} + * + * Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] + * + * Where q_k = delta_k . (2*M_PI/global_nb[mu]) + * + * Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} + */ + teigen-=usecond(); + Eigen::MatrixXcd Mkl = Eigen::MatrixXcd::Zero(npoint,npoint); + Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint); + ComplexD ci(0.0,1.0); + for(int k=0;k ComputeProj(npoint,CoarseGrid()); + std::vector FT(npoint,CoarseGrid()); + for(int i=0;ioSites(); + autoView( A_v , _A[k], AcceleratorWrite); + autoView( FT_v , FT[k], AcceleratorRead); + accelerator_for(sss, osites, 1, { + for(int j=0;j &out){assert(0);}; +}; + + +NAMESPACE_END(Grid); diff --git a/Grid/algorithms/multigrid/Geometry.h b/Grid/algorithms/multigrid/Geometry.h new file mode 100644 index 00000000..74db4417 --- /dev/null +++ b/Grid/algorithms/multigrid/Geometry.h @@ -0,0 +1,243 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + + +///////////////////////////////////////////////////////////////// +// Geometry class in cartesian case +///////////////////////////////////////////////////////////////// + +class Geometry { +public: + int npoint; + int base; + std::vector directions ; + std::vector displacements; + std::vector points_dagger; + + Geometry(int _d) { + + base = (_d==5) ? 1:0; + + // make coarse grid stencil for 4d , not 5d + if ( _d==5 ) _d=4; + + npoint = 2*_d+1; + directions.resize(npoint); + displacements.resize(npoint); + points_dagger.resize(npoint); + for(int d=0;d<_d;d++){ + directions[d ] = d+base; + directions[d+_d] = d+base; + displacements[d ] = +1; + displacements[d+_d]= -1; + points_dagger[d ] = d+_d; + points_dagger[d+_d] = d; + } + directions [2*_d]=0; + displacements[2*_d]=0; + points_dagger[2*_d]=2*_d; + } + + int point(int dir, int disp) { + assert(disp == -1 || disp == 0 || disp == 1); + assert(base+0 <= dir && dir < base+4); + + // directions faster index = new indexing + // 4d (base = 0): + // point 0 1 2 3 4 5 6 7 8 + // dir 0 1 2 3 0 1 2 3 0 + // disp +1 +1 +1 +1 -1 -1 -1 -1 0 + // 5d (base = 1): + // point 0 1 2 3 4 5 6 7 8 + // dir 1 2 3 4 1 2 3 4 0 + // disp +1 +1 +1 +1 -1 -1 -1 -1 0 + + // displacements faster index = old indexing + // 4d (base = 0): + // point 0 1 2 3 4 5 6 7 8 + // dir 0 0 1 1 2 2 3 3 0 + // disp +1 -1 +1 -1 +1 -1 +1 -1 0 + // 5d (base = 1): + // point 0 1 2 3 4 5 6 7 8 + // dir 1 1 2 2 3 3 4 4 0 + // disp +1 -1 +1 -1 +1 -1 +1 -1 0 + + if(dir == 0 and disp == 0) + return 8; + else // New indexing + return (1 - disp) / 2 * 4 + dir - base; + // else // Old indexing + // return (4 * (dir - base) + 1 - disp) / 2; + } +}; + +///////////////////////////////////////////////////////////////// +// Less local equivalent of Geometry class in cartesian case +///////////////////////////////////////////////////////////////// +class NonLocalStencilGeometry { +public: + int depth; + int hops; + int npoint; + std::vector shifts; + Coordinate stencil_size; + Coordinate stencil_lo; + Coordinate stencil_hi; + GridCartesian *grid; + GridCartesian *Grid() {return grid;}; + int Depth(void){return 1;}; // Ghost zone depth + int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil + + virtual int DimSkip(void) =0; + + virtual ~NonLocalStencilGeometry() {}; + + int Reverse(int point) + { + int Nd = Grid()->Nd(); + Coordinate shft = shifts[point]; + Coordinate rev(Nd); + for(int mu=0;mushifts.resize(0); + int Nd = this->grid->Nd(); + + int dd = this->DimSkip(); + for(int s0=this->stencil_lo[dd+0];s0<=this->stencil_hi[dd+0];s0++){ + for(int s1=this->stencil_lo[dd+1];s1<=this->stencil_hi[dd+1];s1++){ + for(int s2=this->stencil_lo[dd+2];s2<=this->stencil_hi[dd+2];s2++){ + for(int s3=this->stencil_lo[dd+3];s3<=this->stencil_hi[dd+3];s3++){ + Coordinate sft(Nd,0); + sft[dd+0] = s0; + sft[dd+1] = s1; + sft[dd+2] = s2; + sft[dd+3] = s3; + int nhops = abs(s0)+abs(s1)+abs(s2)+abs(s3); + if(nhops<=this->hops) this->shifts.push_back(sft); + }}}} + this->npoint = this->shifts.size(); + std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<GlobalDimensions(); + stencil_size.resize(grid->Nd()); + stencil_lo.resize(grid->Nd()); + stencil_hi.resize(grid->Nd()); + for(int d=0;dNd();d++){ + if ( latt[d] == 1 ) { + stencil_lo[d] = 0; + stencil_hi[d] = 0; + stencil_size[d]= 1; + } else if ( latt[d] == 2 ) { + stencil_lo[d] = -1; + stencil_hi[d] = 0; + stencil_size[d]= 2; + } else if ( latt[d] > 2 ) { + stencil_lo[d] = -1; + stencil_hi[d] = 1; + stencil_size[d]= 3; + } + } + }; + +}; + +// Need to worry about red-black now +class NonLocalStencilGeometry4D : public NonLocalStencilGeometry { +public: + virtual int DimSkip(void) { return 0;}; + NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; + virtual ~NonLocalStencilGeometry4D() {}; +}; +class NonLocalStencilGeometry5D : public NonLocalStencilGeometry { +public: + virtual int DimSkip(void) { return 1; }; + NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; + virtual ~NonLocalStencilGeometry5D() {}; +}; +/* + * Bunch of different options classes + */ +class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { +public: + NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,4) + { + this->BuildShifts(); + }; +}; +class NextToNextToNextToNearestStencilGeometry5D : public NonLocalStencilGeometry5D { +public: + NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,4) + { + this->BuildShifts(); + }; +}; +class NextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { +public: + NextToNearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,2) + { + this->BuildShifts(); + }; +}; +class NextToNearestStencilGeometry5D : public NonLocalStencilGeometry5D { +public: + NextToNearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,2) + { + this->BuildShifts(); + }; +}; +class NearestStencilGeometry4D : public NonLocalStencilGeometry4D { +public: + NearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,1) + { + this->BuildShifts(); + }; +}; +class NearestStencilGeometry5D : public NonLocalStencilGeometry5D { +public: + NearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,1) + { + this->BuildShifts(); + }; +}; + +NAMESPACE_END(Grid); diff --git a/Grid/algorithms/multigrid/Multigrid.h b/Grid/algorithms/multigrid/Multigrid.h new file mode 100644 index 00000000..a1bf400c --- /dev/null +++ b/Grid/algorithms/multigrid/Multigrid.h @@ -0,0 +1,33 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: Grid/algorithms/multigrid/MultiGrid.h + + Copyright (C) 2023 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#pragma once + +#include +#include +#include +#include