/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/algorithms/CoarsenedMatrix.h Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: Peter Boyle Author: paboyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #ifndef GRID_ALGORITHM_COARSENED_MATRIX_H #define GRID_ALGORITHM_COARSENED_MATRIX_H NAMESPACE_BEGIN(Grid); template inline void blockMaskedInnerProduct(Lattice &CoarseInner, const Lattice &FineMask, const Lattice &fineX, const Lattice &fineY) { typedef decltype(innerProduct(vobj(),vobj())) dotp; GridBase *coarse(CoarseInner.Grid()); GridBase *fine (fineX.Grid()); Lattice fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard(); Lattice fine_inner_msk(fine); // Multiply could be fused with innerProduct // Single block sum kernel could do both masks. fine_inner = localInnerProduct(fineX,fineY); mult(fine_inner_msk, fine_inner,FineMask); blockSum(CoarseInner,fine_inner_msk); } class Geometry { public: int npoint; std::vector directions ; std::vector displacements; Geometry(int _d) { int base = (_d==5) ? 1:0; // make coarse grid stencil for 4d , not 5d if ( _d==5 ) _d=4; npoint = 2*_d+1; directions.resize(npoint); displacements.resize(npoint); for(int d=0;d<_d;d++){ directions[d ] = d+base; directions[d+_d] = d+base; displacements[d ] = +1; displacements[d+_d]= -1; } directions [2*_d]=0; displacements[2*_d]=0; } }; template class Aggregation { public: typedef iVector siteVector; typedef Lattice CoarseVector; typedef Lattice > CoarseMatrix; typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field typedef Lattice FineField; GridBase *CoarseGrid; GridBase *FineGrid; std::vector > subspace; int checkerboard; int Checkerboard(void){return checkerboard;} Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) : CoarseGrid(_CoarseGrid), FineGrid(_FineGrid), subspace(nbasis,_FineGrid), checkerboard(_checkerboard) { }; void Orthogonalise(void){ CoarseScalar InnerProd(CoarseGrid); std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"< &hermop,int nn=nbasis) { RealD scale; ConjugateGradient CG(1.0e-2,100,false); FineField noise(FineGrid); FineField Mn(FineGrid); for(int b=0;b "< "< &hermop, int nn, double hi, double lo, int orderfilter, int ordermin, int orderstep, double filterlo ) { RealD scale; FineField noise(FineGrid); FineField Mn(FineGrid); FineField tmp(FineGrid); // New normalised noise gaussian(RNG,noise); scale = std::pow(norm2(noise),-0.5); noise=noise*scale; // Initial matrix element hermop.Op(noise,Mn); std::cout< "< Cheb(lo,hi,orderfilter); Cheb(hermop,noise,Mn); // normalise scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; subspace[b] = Mn; hermop.Op(Mn,tmp); std::cout< "<oSites(), Nsimd, { coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss)); }); // Possible more fine grained control is needed than a linear sweep, // but huge productivity gain if this is simple algorithm and not a tunable int m =1; if ( n>=ordermin ) m=n-ordermin; if ( (m%orderstep)==0 ) { Mn=*Tnp; scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; subspace[b] = Mn; hermop.Op(Mn,tmp); std::cout< "< class CoarsenedMatrix : public SparseMatrixBase > > { public: typedef iVector siteVector; typedef Lattice CoarseComplexField; typedef Lattice CoarseVector; typedef Lattice > CoarseMatrix; typedef iMatrix Cobj; typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field typedef Lattice FineField; //////////////////// // Data members //////////////////// Geometry geom; GridBase * _grid; int hermitian; CartesianStencil Stencil; std::vector A; /////////////////////// // Interface /////////////////////// GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know void M (const CoarseVector &in, CoarseVector &out) { conformable(_grid,in.Grid()); conformable(in.Grid(),out.Grid()); SimpleCompressor compressor; Stencil.HaloExchange(in,compressor); autoView( in_v , in, AcceleratorRead); autoView( out_v , out, AcceleratorWrite); typedef LatticeView Aview; Vector AcceleratorViewContainer; for(int p=0;poSites(); accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { int ss = sss/nbasis; int b = sss%nbasis; calcComplex res = Zero(); calcVector nbr; int ptype; StencilEntry *SE; for(int point=0;point_is_local) { nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); } else { nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]); } acceleratorSynchronise(); for(int bb=0;bb compressor; Stencil.HaloExchange(in,compressor); } void MdirCalc(const CoarseVector &in, CoarseVector &out, int point) { conformable(_grid,in.Grid()); conformable(_grid,out.Grid()); typedef LatticeView Aview; Vector AcceleratorViewContainer; for(int p=0;poSites()*nbasis, Nsimd, { int ss = sss/nbasis; int b = sss%nbasis; calcComplex res = Zero(); calcVector nbr; int ptype; StencilEntry *SE; SE=Stencil.GetEntry(ptype,point,ss); if(SE->_is_local) { nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); } else { nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]); } acceleratorSynchronise(); for(int bb=0;bb &out) { this->MdirComms(in); int ndir=geom.npoint-1; if ((out.size()!=ndir)&&(out.size()!=ndir+1)) { std::cout <<"MdirAll out size "<< out.size()<MdirComms(in); int ndim = in.Grid()->Nd(); ////////////// // 4D action like wilson // 0+ => 0 // 0- => 1 // 1+ => 2 // 1- => 3 // etc.. ////////////// // 5D action like DWF // 1+ => 0 // 1- => 1 // 2+ => 2 // 2- => 3 // etc.. auto point = [dir, disp, ndim](){ if(dir == 0 and disp == 0) return 8; else if ( ndim==4 ) { return (4 * dir + 1 - disp) / 2; } else { return (4 * (dir-1) + 1 - disp) / 2; } }(); MdirCalc(in,out,point); }; void Mdiag(const CoarseVector &in, CoarseVector &out) { int point=geom.npoint-1; MdirCalc(in, out, point); // No comms }; CoarsenedMatrix(GridCartesian &CoarseGrid, int hermitian_=0) : _grid(&CoarseGrid), geom(CoarseGrid._ndimension), hermitian(hermitian_), Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0), A(geom.npoint,&CoarseGrid) { }; void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase > &linop, Aggregation & Subspace) { typedef Lattice FineComplexField; typedef typename Fobj::scalar_type scalar_type; FineComplexField one(FineGrid); one=scalar_type(1.0,0.0); FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0); std::vector masks(geom.npoint,FineGrid); FineComplexField imask(FineGrid); // contributions from within this block FineComplexField omask(FineGrid); // contributions from outwith this block FineComplexField evenmask(FineGrid); FineComplexField oddmask(FineGrid); FineField phi(FineGrid); FineField tmp(FineGrid); FineField zz(FineGrid); zz=Zero(); FineField Mphi(FineGrid); FineField Mphie(FineGrid); FineField Mphio(FineGrid); std::vector Mphi_p(geom.npoint,FineGrid); Lattice > coor (FineGrid); Lattice > bcoor(FineGrid); Lattice > bcb (FineGrid); bcb = Zero(); CoarseVector iProj(Grid()); CoarseVector oProj(Grid()); CoarseVector SelfProj(Grid()); CoarseComplexField iZProj(Grid()); CoarseComplexField oZProj(Grid()); CoarseScalar InnerProd(Grid()); // Orthogonalise the subblocks over the basis blockOrthogonalise(InnerProd,Subspace.subspace); // Compute the matrix elements of linop between this orthonormal // set of vectors. int self_stencil=-1; for(int p=0;p_rdimensions[dir])/(Grid()->_rdimensions[dir]); LatticeCoordinate(coor,dir); /////////////////////////////////////////////////////// // Work out even and odd block checkerboarding for fast diagonal term /////////////////////////////////////////////////////// if ( disp==1 ) { bcb = bcb + div(coor,block); } if ( disp==0 ) { masks[p]= Zero(); } else if ( disp==1 ) { masks[p] = where(mod(coor,block)==(block-1),one,zero); } else if ( disp==-1 ) { masks[p] = where(mod(coor,block)==(Integer)0,one,zero); } } evenmask = where(mod(bcb,2)==(Integer)0,one,zero); oddmask = one-evenmask; assert(self_stencil!=-1); for(int i=0;ioSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); } } } /////////////////////////////////////////// // Faster alternate self coupling.. use hermiticity to save 2x /////////////////////////////////////////// { mult(tmp,phi,evenmask); linop.Op(tmp,Mphie); mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio); { autoView( tmp_ , tmp, AcceleratorWrite); autoView( evenmask_ , evenmask, AcceleratorRead); autoView( oddmask_ , oddmask, AcceleratorRead); autoView( Mphie_ , Mphie, AcceleratorRead); autoView( Mphio_ , Mphio, AcceleratorRead); accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{ coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss)); }); } blockProject(SelfProj,tmp,Subspace.subspace); autoView( SelfProj_ , SelfProj, AcceleratorRead); autoView( A_self , A[self_stencil], AcceleratorWrite); accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ for(int j=0;j