From d29abfdcafd8e0da9803b43cae9951f94dbf833f Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 6 Oct 2023 21:03:34 -0400 Subject: [PATCH] Transfer code to Frontier now --- Grid/algorithms/CoarsenedMatrix.h | 1107 ----------------- Grid/algorithms/GeneralCoarsenedMatrix.h | 573 --------- .../multigrid/{Multigrid.h => MultiGrid.h} | 0 3 files changed, 1680 deletions(-) delete mode 100644 Grid/algorithms/CoarsenedMatrix.h delete mode 100644 Grid/algorithms/GeneralCoarsenedMatrix.h rename Grid/algorithms/multigrid/{Multigrid.h => MultiGrid.h} (100%) diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h deleted file mode 100644 index 177305d9..00000000 --- a/Grid/algorithms/CoarsenedMatrix.h +++ /dev/null @@ -1,1107 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/algorithms/CoarsenedMatrix.h - - Copyright (C) 2015 - -Author: Azusa Yamaguchi -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H -#define GRID_ALGORITHM_COARSENED_MATRIX_H - -#include // needed for Dagger(Yes|No), Inverse(Yes|No) - -NAMESPACE_BEGIN(Grid); - -template -inline void blockMaskedInnerProduct(Lattice &CoarseInner, - const Lattice &FineMask, - const Lattice &fineX, - const Lattice &fineY) -{ - typedef decltype(innerProduct(vobj(),vobj())) dotp; - - GridBase *coarse(CoarseInner.Grid()); - GridBase *fine (fineX.Grid()); - - Lattice fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard(); - Lattice fine_inner_msk(fine); - - // Multiply could be fused with innerProduct - // Single block sum kernel could do both masks. - fine_inner = localInnerProduct(fineX,fineY); - mult(fine_inner_msk, fine_inner,FineMask); - blockSum(CoarseInner,fine_inner_msk); -} - - -class Geometry { -public: - int npoint; - int base; - std::vector directions ; - std::vector displacements; - std::vector points_dagger; - - Geometry(int _d) { - - base = (_d==5) ? 1:0; - - // make coarse grid stencil for 4d , not 5d - if ( _d==5 ) _d=4; - - npoint = 2*_d+1; - directions.resize(npoint); - displacements.resize(npoint); - points_dagger.resize(npoint); - for(int d=0;d<_d;d++){ - directions[d ] = d+base; - directions[d+_d] = d+base; - displacements[d ] = +1; - displacements[d+_d]= -1; - points_dagger[d ] = d+_d; - points_dagger[d+_d] = d; - } - directions [2*_d]=0; - displacements[2*_d]=0; - points_dagger[2*_d]=2*_d; - } - - int point(int dir, int disp) { - assert(disp == -1 || disp == 0 || disp == 1); - assert(base+0 <= dir && dir < base+4); - - // directions faster index = new indexing - // 4d (base = 0): - // point 0 1 2 3 4 5 6 7 8 - // dir 0 1 2 3 0 1 2 3 0 - // disp +1 +1 +1 +1 -1 -1 -1 -1 0 - // 5d (base = 1): - // point 0 1 2 3 4 5 6 7 8 - // dir 1 2 3 4 1 2 3 4 0 - // disp +1 +1 +1 +1 -1 -1 -1 -1 0 - - // displacements faster index = old indexing - // 4d (base = 0): - // point 0 1 2 3 4 5 6 7 8 - // dir 0 0 1 1 2 2 3 3 0 - // disp +1 -1 +1 -1 +1 -1 +1 -1 0 - // 5d (base = 1): - // point 0 1 2 3 4 5 6 7 8 - // dir 1 1 2 2 3 3 4 4 0 - // disp +1 -1 +1 -1 +1 -1 +1 -1 0 - - if(dir == 0 and disp == 0) - return 8; - else // New indexing - return (1 - disp) / 2 * 4 + dir - base; - // else // Old indexing - // return (4 * (dir - base) + 1 - disp) / 2; - } -}; - -template -class Aggregation { -public: - typedef iVector siteVector; - typedef Lattice CoarseVector; - typedef Lattice > CoarseMatrix; - - typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field - typedef Lattice FineField; - - GridBase *CoarseGrid; - GridBase *FineGrid; - std::vector > subspace; - int checkerboard; - int Checkerboard(void){return checkerboard;} - Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) : - CoarseGrid(_CoarseGrid), - FineGrid(_FineGrid), - subspace(nbasis,_FineGrid), - checkerboard(_checkerboard) - { - }; - - void Orthogonalise(void){ - CoarseScalar InnerProd(CoarseGrid); - std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"< &hermop,int nn=nbasis) - { - - RealD scale; - - ConjugateGradient CG(1.0e-2,100,false); - FineField noise(FineGrid); - FineField Mn(FineGrid); - - for(int b=0;b "< "< &hermop, - int nn, - double hi, - double lo, - int orderfilter, - int ordermin, - int orderstep, - double filterlo - ) { - - RealD scale; - - FineField noise(FineGrid); - FineField Mn(FineGrid); - FineField tmp(FineGrid); - - // New normalised noise - gaussian(RNG,noise); - scale = std::pow(norm2(noise),-0.5); - noise=noise*scale; - - std::cout << GridLogMessage<<" Chebyshev subspace pass-1 : ord "< "< Cheb(lo,hi,orderfilter); - Cheb(hermop,noise,Mn); - // normalise - scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; - subspace[b] = Mn; - hermop.Op(Mn,tmp); - std::cout< "<oSites(), Nsimd, { - coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); - coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss)); - }); - - // Possible more fine grained control is needed than a linear sweep, - // but huge productivity gain if this is simple algorithm and not a tunable - int m =1; - if ( n>=ordermin ) m=n-ordermin; - if ( (m%orderstep)==0 ) { - Mn=*Tnp; - scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; - subspace[b] = Mn; - hermop.Op(Mn,tmp); - std::cout< "< &hermop, - int nn, - double hi, - double lo, - int orderfilter - ) { - - RealD scale; - - FineField noise(FineGrid); - FineField Mn(FineGrid); - FineField tmp(FineGrid); - - // New normalised noise - std::cout << GridLogMessage<<" Chebyshev subspace pure noise : ord "< "< Cheb(lo,hi,orderfilter); - Cheb(hermop,noise,Mn); - // normalise - scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; - subspace[b] = Mn; - hermop.Op(Mn,tmp); - std::cout< "< -class CoarsenedMatrix : public CheckerBoardedSparseMatrixBase > > { -public: - - typedef iVector siteVector; - typedef Lattice CoarseComplexField; - typedef Lattice CoarseVector; - typedef Lattice > CoarseMatrix; - typedef iMatrix Cobj; - typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field - typedef Lattice FineField; - typedef CoarseVector FermionField; - - // enrich interface, use default implementation as in FermionOperator /////// - void Dminus(CoarseVector const& in, CoarseVector& out) { out = in; } - void DminusDag(CoarseVector const& in, CoarseVector& out) { out = in; } - void ImportPhysicalFermionSource(CoarseVector const& input, CoarseVector& imported) { imported = input; } - void ImportUnphysicalFermion(CoarseVector const& input, CoarseVector& imported) { imported = input; } - void ExportPhysicalFermionSolution(CoarseVector const& solution, CoarseVector& exported) { exported = solution; }; - void ExportPhysicalFermionSource(CoarseVector const& solution, CoarseVector& exported) { exported = solution; }; - - //////////////////// - // Data members - //////////////////// - Geometry geom; - GridBase * _grid; - GridBase* _cbgrid; - int hermitian; - - CartesianStencil Stencil; - CartesianStencil StencilEven; - CartesianStencil StencilOdd; - - std::vector A; - std::vector Aeven; - std::vector Aodd; - - CoarseMatrix AselfInv; - CoarseMatrix AselfInvEven; - CoarseMatrix AselfInvOdd; - - Vector dag_factor; - - /////////////////////// - // Interface - /////////////////////// - GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know - GridBase * RedBlackGrid() { return _cbgrid; }; - - int ConstEE() { return 0; } - - void M (const CoarseVector &in, CoarseVector &out) - { - conformable(_grid,in.Grid()); - conformable(in.Grid(),out.Grid()); - out.Checkerboard() = in.Checkerboard(); - - SimpleCompressor compressor; - - Stencil.HaloExchange(in,compressor); - autoView( in_v , in, AcceleratorRead); - autoView( out_v , out, AcceleratorWrite); - autoView( Stencil_v , Stencil, AcceleratorRead); - int npoint = geom.npoint; - typedef LatticeView Aview; - - Vector AcceleratorViewContainer; - - for(int p=0;poSites(); - - accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { - int ss = sss/nbasis; - int b = sss%nbasis; - calcComplex res = Zero(); - calcVector nbr; - int ptype; - StencilEntry *SE; - - for(int point=0;point_is_local) { - nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); - } else { - nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); - } - acceleratorSynchronise(); - - for(int bb=0;bb compressor; - - Stencil.HaloExchange(in,compressor); - autoView( in_v , in, AcceleratorRead); - autoView( out_v , out, AcceleratorWrite); - autoView( Stencil_v , Stencil, AcceleratorRead); - int npoint = geom.npoint; - typedef LatticeView Aview; - - Vector AcceleratorViewContainer; - - for(int p=0;poSites(); - - Vector points(geom.npoint, 0); - for(int p=0; poSites()*nbasis, Nsimd, { - int ss = sss/nbasis; - int b = sss%nbasis; - calcComplex res = Zero(); - calcVector nbr; - int ptype; - StencilEntry *SE; - - for(int p=0;p_is_local) { - nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); - } else { - nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); - } - acceleratorSynchronise(); - - for(int bb=0;bb compressor; - Stencil.HaloExchange(in,compressor); - } - void MdirCalc(const CoarseVector &in, CoarseVector &out, int point) - { - conformable(_grid,in.Grid()); - conformable(_grid,out.Grid()); - out.Checkerboard() = in.Checkerboard(); - - typedef LatticeView Aview; - Vector AcceleratorViewContainer; - for(int p=0;poSites()*nbasis, Nsimd, { - int ss = sss/nbasis; - int b = sss%nbasis; - calcComplex res = Zero(); - calcVector nbr; - int ptype; - StencilEntry *SE; - - SE=Stencil_v.GetEntry(ptype,point,ss); - - if(SE->_is_local) { - nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); - } else { - nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); - } - acceleratorSynchronise(); - - for(int bb=0;bb &out) - { - this->MdirComms(in); - int ndir=geom.npoint-1; - if ((out.size()!=ndir)&&(out.size()!=ndir+1)) { - std::cout <<"MdirAll out size "<< out.size()<MdirComms(in); - - MdirCalc(in,out,geom.point(dir,disp)); - }; - - void Mdiag(const CoarseVector &in, CoarseVector &out) - { - int point=geom.npoint-1; - MdirCalc(in, out, point); // No comms - }; - - void Mooee(const CoarseVector &in, CoarseVector &out) { - MooeeInternal(in, out, DaggerNo, InverseNo); - } - - void MooeeInv(const CoarseVector &in, CoarseVector &out) { - MooeeInternal(in, out, DaggerNo, InverseYes); - } - - void MooeeDag(const CoarseVector &in, CoarseVector &out) { - MooeeInternal(in, out, DaggerYes, InverseNo); - } - - void MooeeInvDag(const CoarseVector &in, CoarseVector &out) { - MooeeInternal(in, out, DaggerYes, InverseYes); - } - - void Meooe(const CoarseVector &in, CoarseVector &out) { - if(in.Checkerboard() == Odd) { - DhopEO(in, out, DaggerNo); - } else { - DhopOE(in, out, DaggerNo); - } - } - - void MeooeDag(const CoarseVector &in, CoarseVector &out) { - if(in.Checkerboard() == Odd) { - DhopEO(in, out, DaggerYes); - } else { - DhopOE(in, out, DaggerYes); - } - } - - void Dhop(const CoarseVector &in, CoarseVector &out, int dag) { - conformable(in.Grid(), _grid); // verifies full grid - conformable(in.Grid(), out.Grid()); - - out.Checkerboard() = in.Checkerboard(); - - DhopInternal(Stencil, A, in, out, dag); - } - - void DhopOE(const CoarseVector &in, CoarseVector &out, int dag) { - conformable(in.Grid(), _cbgrid); // verifies half grid - conformable(in.Grid(), out.Grid()); // drops the cb check - - assert(in.Checkerboard() == Even); - out.Checkerboard() = Odd; - - DhopInternal(StencilEven, Aodd, in, out, dag); - } - - void DhopEO(const CoarseVector &in, CoarseVector &out, int dag) { - conformable(in.Grid(), _cbgrid); // verifies half grid - conformable(in.Grid(), out.Grid()); // drops the cb check - - assert(in.Checkerboard() == Odd); - out.Checkerboard() = Even; - - DhopInternal(StencilOdd, Aeven, in, out, dag); - } - - void MooeeInternal(const CoarseVector &in, CoarseVector &out, int dag, int inv) { - out.Checkerboard() = in.Checkerboard(); - assert(in.Checkerboard() == Odd || in.Checkerboard() == Even); - - CoarseMatrix *Aself = nullptr; - if(in.Grid()->_isCheckerBoarded) { - if(in.Checkerboard() == Odd) { - Aself = (inv) ? &AselfInvOdd : &Aodd[geom.npoint-1]; - DselfInternal(StencilOdd, *Aself, in, out, dag); - } else { - Aself = (inv) ? &AselfInvEven : &Aeven[geom.npoint-1]; - DselfInternal(StencilEven, *Aself, in, out, dag); - } - } else { - Aself = (inv) ? &AselfInv : &A[geom.npoint-1]; - DselfInternal(Stencil, *Aself, in, out, dag); - } - assert(Aself != nullptr); - } - - void DselfInternal(CartesianStencil &st, CoarseMatrix &a, - const CoarseVector &in, CoarseVector &out, int dag) { - int point = geom.npoint-1; - autoView( out_v, out, AcceleratorWrite); - autoView( in_v, in, AcceleratorRead); - autoView( st_v, st, AcceleratorRead); - autoView( a_v, a, AcceleratorRead); - - const int Nsimd = CComplex::Nsimd(); - typedef decltype(coalescedRead(in_v[0])) calcVector; - typedef decltype(coalescedRead(in_v[0](0))) calcComplex; - - RealD* dag_factor_p = &dag_factor[0]; - - if(dag) { - accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { - int ss = sss/nbasis; - int b = sss%nbasis; - calcComplex res = Zero(); - calcVector nbr; - int ptype; - StencilEntry *SE; - - SE=st_v.GetEntry(ptype,point,ss); - - if(SE->_is_local) { - nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); - } else { - nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); - } - acceleratorSynchronise(); - - for(int bb=0;bboSites()*nbasis, Nsimd, { - int ss = sss/nbasis; - int b = sss%nbasis; - calcComplex res = Zero(); - calcVector nbr; - int ptype; - StencilEntry *SE; - - SE=st_v.GetEntry(ptype,point,ss); - - if(SE->_is_local) { - nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); - } else { - nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); - } - acceleratorSynchronise(); - - for(int bb=0;bb &st, std::vector &a, - const CoarseVector &in, CoarseVector &out, int dag) { - SimpleCompressor compressor; - - st.HaloExchange(in,compressor); - autoView( in_v, in, AcceleratorRead); - autoView( out_v, out, AcceleratorWrite); - autoView( st_v , st, AcceleratorRead); - typedef LatticeView Aview; - - // determine in what order we need the points - int npoint = geom.npoint-1; - Vector points(npoint, 0); - for(int p=0; p AcceleratorViewContainer; - for(int p=0;poSites()*nbasis, Nsimd, { - int ss = sss/nbasis; - int b = sss%nbasis; - calcComplex res = Zero(); - calcVector nbr; - int ptype; - StencilEntry *SE; - - for(int p=0;p_is_local) { - nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); - } else { - nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); - } - acceleratorSynchronise(); - - for(int bb=0;bboSites()*nbasis, Nsimd, { - int ss = sss/nbasis; - int b = sss%nbasis; - calcComplex res = Zero(); - calcVector nbr; - int ptype; - StencilEntry *SE; - - for(int p=0;p_is_local) { - nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); - } else { - nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); - } - acceleratorSynchronise(); - - for(int bb=0;bb > &linop, - Aggregation & Subspace) - { - typedef Lattice FineComplexField; - typedef typename Fobj::scalar_type scalar_type; - - std::cout << GridLogMessage<< "CoarsenMatrix "<< std::endl; - - FineComplexField one(FineGrid); one=scalar_type(1.0,0.0); - FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0); - - std::vector masks(geom.npoint,FineGrid); - FineComplexField imask(FineGrid); // contributions from within this block - FineComplexField omask(FineGrid); // contributions from outwith this block - - FineComplexField evenmask(FineGrid); - FineComplexField oddmask(FineGrid); - - FineField phi(FineGrid); - FineField tmp(FineGrid); - FineField zz(FineGrid); zz=Zero(); - FineField Mphi(FineGrid); - FineField Mphie(FineGrid); - FineField Mphio(FineGrid); - std::vector Mphi_p(geom.npoint,FineGrid); - - Lattice > coor (FineGrid); - Lattice > bcoor(FineGrid); - Lattice > bcb (FineGrid); bcb = Zero(); - - CoarseVector iProj(Grid()); - CoarseVector oProj(Grid()); - CoarseVector SelfProj(Grid()); - CoarseComplexField iZProj(Grid()); - CoarseComplexField oZProj(Grid()); - - CoarseScalar InnerProd(Grid()); - - std::cout << GridLogMessage<< "CoarsenMatrix Orthog "<< std::endl; - // Orthogonalise the subblocks over the basis - blockOrthogonalise(InnerProd,Subspace.subspace); - - // Compute the matrix elements of linop between this orthonormal - // set of vectors. - std::cout << GridLogMessage<< "CoarsenMatrix masks "<< std::endl; - int self_stencil=-1; - for(int p=0;p_rdimensions[dir])/(Grid()->_rdimensions[dir]); - - LatticeCoordinate(coor,dir); - - /////////////////////////////////////////////////////// - // Work out even and odd block checkerboarding for fast diagonal term - /////////////////////////////////////////////////////// - if ( disp==1 ) { - bcb = bcb + div(coor,block); - } - - if ( disp==0 ) { - masks[p]= Zero(); - } else if ( disp==1 ) { - masks[p] = where(mod(coor,block)==(block-1),one,zero); - } else if ( disp==-1 ) { - masks[p] = where(mod(coor,block)==(Integer)0,one,zero); - } - } - evenmask = where(mod(bcb,2)==(Integer)0,one,zero); - oddmask = one-evenmask; - - assert(self_stencil!=-1); - - for(int i=0;ioSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); - if ( hermitian && (disp==-1) ) { - for(int pp=0;pp = * - int dirp = geom.directions[pp]; - int dispp = geom.displacements[pp]; - if ( (dirp==dir) && (dispp==1) ){ - auto sft = conjugate(Cshift(oZProj,dir,1)); - autoView( sft_v , sft , AcceleratorWrite); - autoView( A_pp , A[pp], AcceleratorWrite); - accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); }); - } - } - } - - } - } - } - - /////////////////////////////////////////// - // Faster alternate self coupling.. use hermiticity to save 2x - /////////////////////////////////////////// - { - mult(tmp,phi,evenmask); linop.Op(tmp,Mphie); - mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio); - - { - autoView( tmp_ , tmp, AcceleratorWrite); - autoView( evenmask_ , evenmask, AcceleratorRead); - autoView( oddmask_ , oddmask, AcceleratorRead); - autoView( Mphie_ , Mphie, AcceleratorRead); - autoView( Mphio_ , Mphio, AcceleratorRead); - accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{ - coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss)); - }); - } - - blockProject(SelfProj,tmp,Subspace.subspace); - - autoView( SelfProj_ , SelfProj, AcceleratorRead); - autoView( A_self , A[self_stencil], AcceleratorWrite); - - accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ - for(int j=0;jlSites(); - - typedef typename Cobj::scalar_object scalar_object; - - autoView(Aself_v, A[geom.npoint-1], CpuRead); - autoView(AselfInv_v, AselfInv, CpuWrite); - thread_for(site, localVolume, { // NOTE: Not able to bring this to GPU because of Eigen + peek/poke - Eigen::MatrixXcd selfLinkEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis); - Eigen::MatrixXcd selfLinkInvEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis); - - scalar_object selfLink = Zero(); - scalar_object selfLinkInv = Zero(); - - Coordinate lcoor; - - Grid()->LocalIndexToLocalCoor(site, lcoor); - peekLocalSite(selfLink, Aself_v, lcoor); - - for (int i = 0; i < nbasis; ++i) - for (int j = 0; j < nbasis; ++j) - selfLinkEigen(i, j) = static_cast(TensorRemove(selfLink(i, j))); - - selfLinkInvEigen = selfLinkEigen.inverse(); - - for(int i = 0; i < nbasis; ++i) - for(int j = 0; j < nbasis; ++j) - selfLinkInv(i, j) = selfLinkInvEigen(i, j); - - pokeLocalSite(selfLinkInv, AselfInv_v, lcoor); - }); - } - - void FillHalfCbs() { - std::cout << GridLogDebug << "CoarsenedMatrix::FillHalfCbs" << std::endl; - for(int p = 0; p < geom.npoint; ++p) { - pickCheckerboard(Even, Aeven[p], A[p]); - pickCheckerboard(Odd, Aodd[p], A[p]); - } - pickCheckerboard(Even, AselfInvEven, AselfInv); - pickCheckerboard(Odd, AselfInvOdd, AselfInv); - } -}; - -NAMESPACE_END(Grid); -#endif diff --git a/Grid/algorithms/GeneralCoarsenedMatrix.h b/Grid/algorithms/GeneralCoarsenedMatrix.h deleted file mode 100644 index 86971fdf..00000000 --- a/Grid/algorithms/GeneralCoarsenedMatrix.h +++ /dev/null @@ -1,573 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#pragma once - -#include // needed for Dagger(Yes|No), Inverse(Yes|No) - -#include -#include - -NAMESPACE_BEGIN(Grid); - -// Fixme need coalesced read gpermute -template void gpermute(vobj & inout,int perm){ - vobj tmp=inout; - if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;} - if (perm & 0x2 ) { permute(inout,tmp,1); tmp=inout;} - if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;} - if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;} -} - -///////////////////////////////////////////////////////////////// -// Reuse Aggregation class from CoarsenedMatrix for now -// Might think about *smoothed* Aggregation -// Equivalent of Geometry class in cartesian case -///////////////////////////////////////////////////////////////// -class NonLocalStencilGeometry { -public: - int depth; - int hops; - int npoint; - std::vector shifts; - Coordinate stencil_size; - Coordinate stencil_lo; - Coordinate stencil_hi; - GridCartesian *grid; - GridCartesian *Grid() {return grid;}; - int Depth(void){return 1;}; // Ghost zone depth - int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil - - virtual int DimSkip(void) =0; - - virtual ~NonLocalStencilGeometry() {}; - - int Reverse(int point) - { - int Nd = Grid()->Nd(); - Coordinate shft = shifts[point]; - Coordinate rev(Nd); - for(int mu=0;mushifts.resize(0); - int Nd = this->grid->Nd(); - - int dd = this->DimSkip(); - for(int s0=this->stencil_lo[dd+0];s0<=this->stencil_hi[dd+0];s0++){ - for(int s1=this->stencil_lo[dd+1];s1<=this->stencil_hi[dd+1];s1++){ - for(int s2=this->stencil_lo[dd+2];s2<=this->stencil_hi[dd+2];s2++){ - for(int s3=this->stencil_lo[dd+3];s3<=this->stencil_hi[dd+3];s3++){ - Coordinate sft(Nd,0); - sft[dd+0] = s0; - sft[dd+1] = s1; - sft[dd+2] = s2; - sft[dd+3] = s3; - int nhops = abs(s0)+abs(s1)+abs(s2)+abs(s3); - if(nhops<=this->hops) this->shifts.push_back(sft); - }}}} - this->npoint = this->shifts.size(); - std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<GlobalDimensions(); - stencil_size.resize(grid->Nd()); - stencil_lo.resize(grid->Nd()); - stencil_hi.resize(grid->Nd()); - for(int d=0;dNd();d++){ - if ( latt[d] == 1 ) { - stencil_lo[d] = 0; - stencil_hi[d] = 0; - stencil_size[d]= 1; - } else if ( latt[d] == 2 ) { - stencil_lo[d] = -1; - stencil_hi[d] = 0; - stencil_size[d]= 2; - } else if ( latt[d] > 2 ) { - stencil_lo[d] = -1; - stencil_hi[d] = 1; - stencil_size[d]= 3; - } - } - }; - -}; - -// Need to worry about red-black now -class NonLocalStencilGeometry4D : public NonLocalStencilGeometry { -public: - virtual int DimSkip(void) { return 0;}; - NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; - virtual ~NonLocalStencilGeometry4D() {}; -}; -class NonLocalStencilGeometry5D : public NonLocalStencilGeometry { -public: - virtual int DimSkip(void) { return 1; }; - NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; - virtual ~NonLocalStencilGeometry5D() {}; -}; -/* - * Bunch of different options classes - */ -class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { -public: - NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,4) - { - this->BuildShifts(); - }; -}; -class NextToNextToNextToNearestStencilGeometry5D : public NonLocalStencilGeometry5D { -public: - NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,4) - { - this->BuildShifts(); - }; -}; -class NextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { -public: - NextToNearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,2) - { - this->BuildShifts(); - }; -}; -class NextToNearestStencilGeometry5D : public NonLocalStencilGeometry5D { -public: - NextToNearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,2) - { - this->BuildShifts(); - }; -}; -class NearestStencilGeometry4D : public NonLocalStencilGeometry4D { -public: - NearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,1) - { - this->BuildShifts(); - }; -}; -class NearestStencilGeometry5D : public NonLocalStencilGeometry5D { -public: - NearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,1) - { - this->BuildShifts(); - }; -}; - -// Fine Object == (per site) type of fine field -// nbasis == number of deflation vectors -template -class GeneralCoarsenedMatrix : public SparseMatrixBase > > { -public: - - typedef GeneralCoarsenedMatrix GeneralCoarseOp; - typedef iVector siteVector; - typedef iMatrix siteMatrix; - typedef Lattice > CoarseComplexField; - typedef Lattice CoarseVector; - typedef Lattice > CoarseMatrix; - typedef iMatrix Cobj; - typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field - typedef Lattice FineField; - typedef CoarseVector Field; - //////////////////// - // Data members - //////////////////// - int hermitian; - GridBase * _FineGrid; - GridCartesian * _CoarseGrid; - NonLocalStencilGeometry &geom; - PaddedCell Cell; - GeneralLocalStencil Stencil; - - std::vector _A; - std::vector _Adag; - - /////////////////////// - // Interface - /////////////////////// - GridBase * Grid(void) { return _FineGrid; }; // this is all the linalg routines need to know - GridBase * FineGrid(void) { return _FineGrid; }; // this is all the linalg routines need to know - GridCartesian * CoarseGrid(void) { return _CoarseGrid; }; // this is all the linalg routines need to know - - - void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe) - { - int nfound=0; - std::cout << " ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<oSites(); - for(int ss=0;ss_offset; - assert( o< osites); - } - } - } - - _A.resize(geom.npoint,CoarseGrid); - _Adag.resize(geom.npoint,CoarseGrid); - } - void M (const CoarseVector &in, CoarseVector &out) - { - Mult(_A,in,out); - } - void Mdag (const CoarseVector &in, CoarseVector &out) - { - if ( hermitian ) M(in,out); - else Mult(_Adag,in,out); - } - void Mult (std::vector &A,const CoarseVector &in, CoarseVector &out) - { - RealD tviews=0; - RealD ttot=0; - RealD tmult=0; - RealD texch=0; - RealD text=0; - ttot=-usecond(); - conformable(CoarseGrid(),in.Grid()); - conformable(in.Grid(),out.Grid()); - out.Checkerboard() = in.Checkerboard(); - CoarseVector tin=in; - - texch-=usecond(); - CoarseVector pin = Cell.Exchange(tin); - texch+=usecond(); - - CoarseVector pout(pin.Grid()); pout=Zero(); - - int npoint = geom.npoint; - typedef LatticeView Aview; - - const int Nsimd = CComplex::Nsimd(); - - int osites=pin.Grid()->oSites(); - // int gsites=pin.Grid()->gSites(); - - RealD flops = 1.0* npoint * nbasis * nbasis * 8 * osites; - RealD bytes = (1.0*osites*sizeof(siteMatrix)*npoint+2.0*osites*sizeof(siteVector))*npoint; - - // for(int point=0;point_offset],SE->_permute,Nd); - auto res = out_v(ss)(b); - for(int bb=0;bbgSites() ;bidx++){ - Coordinate bcoor; - CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor); - - for(int p=0;pGlobalDimensions()[mu]; - scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic - } - // Flip to poke/peekLocalSite and not too bad - auto link = peekSite(_A[p],scoor); - int pp = geom.Reverse(p); - pokeSite(adj(link),_Adag[pp],bcoor); - } - } - } - ///////////////////////////////////////////////////////////// - // - // A) Only reduced flops option is to use a padded cell of depth 4 - // and apply MpcDagMpc in the padded cell. - // - // Makes for ONE application of MpcDagMpc per vector instead of 30 or 80. - // With the effective cell size around (B+8)^4 perhaps 12^4/4^4 ratio - // Cost is 81x more, same as stencil size. - // - // But: can eliminate comms and do as local dirichlet. - // - // Local exchange gauge field once. - // Apply to all vectors, local only computation. - // Must exchange ghost subcells in reverse process of PaddedCell to take inner products - // - // B) Can reduce cost: pad by 1, apply Deo (4^4+6^4+8^4+8^4 )/ (4x 4^4) - // pad by 2, apply Doe - // pad by 3, apply Deo - // then break out 8x directions; cost is ~10x MpcDagMpc per vector - // - // => almost factor of 10 in setup cost, excluding data rearrangement - // - // Intermediates -- ignore the corner terms, leave approximate and force Hermitian - // Intermediates -- pad by 2 and apply 1+8+24 = 33 times. - ///////////////////////////////////////////////////////////// - - ////////////////////////////////////////////////////////// - // BFM HDCG style approach: Solve a system of equations to get Aij - ////////////////////////////////////////////////////////// - /* - * Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM. - * - * conj(phases[block]) proj[k][ block*Nvec+j ] = \sum_ball e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} > - * = \sum_ball e^{iqk.delta} A_ji - * - * Must invert matrix M_k,l = e^[i q_k . delta_l] - * - * Where q_k = delta_k . (2*M_PI/global_nb[mu]) - */ - void CoarsenOperator(LinearOperatorBase > &linop, - Aggregation & Subspace) - { - std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl; - GridBase *grid = FineGrid(); - - RealD tproj=0.0; - RealD teigen=0.0; - RealD tmat=0.0; - RealD tphase=0.0; - RealD tinv=0.0; - - ///////////////////////////////////////////////////////////// - // Orthogonalise the subblocks over the basis - ///////////////////////////////////////////////////////////// - CoarseScalar InnerProd(CoarseGrid()); - blockOrthogonalise(InnerProd,Subspace.subspace); - - const int npoint = geom.npoint; - - Coordinate clatt = CoarseGrid()->GlobalDimensions(); - int Nd = CoarseGrid()->Nd(); - - /* - * Here, k,l index which possible momentum/shift within the N-points connected by MdagM. - * Matrix index i is mapped to this shift via - * geom.shifts[i] - * - * conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block] - * = \sum_{l in ball} e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} > - * = \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} - * = M_{kl} A_ji^{b.b+l} - * - * Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] - * - * Where q_k = delta_k . (2*M_PI/global_nb[mu]) - * - * Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} - */ - teigen-=usecond(); - Eigen::MatrixXcd Mkl = Eigen::MatrixXcd::Zero(npoint,npoint); - Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint); - ComplexD ci(0.0,1.0); - for(int k=0;k ComputeProj(npoint,CoarseGrid()); - std::vector FT(npoint,CoarseGrid()); - for(int i=0;ioSites(); - autoView( A_v , _A[k], AcceleratorWrite); - autoView( FT_v , FT[k], AcceleratorRead); - accelerator_for(sss, osites, 1, { - for(int j=0;j &out){assert(0);}; -}; - - -NAMESPACE_END(Grid); diff --git a/Grid/algorithms/multigrid/Multigrid.h b/Grid/algorithms/multigrid/MultiGrid.h similarity index 100% rename from Grid/algorithms/multigrid/Multigrid.h rename to Grid/algorithms/multigrid/MultiGrid.h