From 6c3ade5d892a48ed50b1b7f431da708adf8db1e4 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 25 Sep 2023 17:14:40 -0400 Subject: [PATCH] Improved the coarsening --- Grid/algorithms/GeneralCoarsenedMatrix.h | 524 +++++++++++++---------- 1 file changed, 307 insertions(+), 217 deletions(-) diff --git a/Grid/algorithms/GeneralCoarsenedMatrix.h b/Grid/algorithms/GeneralCoarsenedMatrix.h index 4762bf73..0d666eac 100644 --- a/Grid/algorithms/GeneralCoarsenedMatrix.h +++ b/Grid/algorithms/GeneralCoarsenedMatrix.h @@ -34,6 +34,7 @@ Author: Peter Boyle NAMESPACE_BEGIN(Grid); +// Fixme need coalesced read gpermute template void gpermute(vobj & inout,int perm){ vobj tmp=inout; if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;} @@ -50,175 +51,139 @@ template void gpermute(vobj & inout,int perm){ class NonLocalStencilGeometry { public: int depth; + int hops; int npoint; std::vector shifts; - virtual void BuildShifts(void) { assert(0); } ; - int Depth(void){return depth;}; - NonLocalStencilGeometry(int _depth) : depth(_depth) - { - }; + Coordinate stencil_size; + Coordinate stencil_lo; + Coordinate stencil_hi; + GridCartesian *grid; + GridCartesian *Grid() {return grid;}; + int Depth(void){return 1;}; // Ghost zone depth + int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil + + virtual int DimSkip(void) =0; + virtual ~NonLocalStencilGeometry() {}; -}; -// Need to worry about red-black now -class NextToNearestStencilGeometry4D : public NonLocalStencilGeometry { -public: - NextToNearestStencilGeometry4D(void) : NonLocalStencilGeometry(1) + + int Reverse(int point) { - this->BuildShifts(); - }; - virtual ~NextToNearestStencilGeometry4D() {}; - virtual void BuildShifts(void) + int Nd = Grid()->Nd(); + Coordinate shft = shifts[point]; + Coordinate rev(Nd); + for(int mu=0;mushifts.resize(0); - // Like HDCG: 81 point stencil including self connection - this->shifts.push_back(Coordinate({0,0,0,0})); - // +-x, +-y, +-z, +-t : 8 - for(int s=-1;s<=1;s+=2){ - this->shifts.push_back(Coordinate({s,0,0,0})); - this->shifts.push_back(Coordinate({0,s,0,0})); - this->shifts.push_back(Coordinate({0,0,s,0})); - this->shifts.push_back(Coordinate({0,0,0,s})); - } - // +-x+-y, +-x+-z, +-x+-t, +-y+-z, +-y+-t, +-z+-t : 24 - for(int s1=-1;s1<=1;s1+=2){ - for(int s2=-1;s2<=1;s2+=2){ - this->shifts.push_back(Coordinate({s1,s2,0,0})); - this->shifts.push_back(Coordinate({s1,0,s2,0})); - this->shifts.push_back(Coordinate({s1,0,0,s2})); - this->shifts.push_back(Coordinate({0,s1,s2,0})); - this->shifts.push_back(Coordinate({0,s1,0,s2})); - this->shifts.push_back(Coordinate({0,0,s1,s2})); - }} + int Nd = this->grid->Nd(); + + int dd = this->DimSkip(); + for(int s0=this->stencil_lo[dd+0];s0<=this->stencil_hi[dd+0];s0++){ + for(int s1=this->stencil_lo[dd+1];s1<=this->stencil_hi[dd+1];s1++){ + for(int s2=this->stencil_lo[dd+2];s2<=this->stencil_hi[dd+2];s2++){ + for(int s3=this->stencil_lo[dd+3];s3<=this->stencil_hi[dd+3];s3++){ + Coordinate sft(Nd,0); + sft[dd+0] = s0; + sft[dd+1] = s1; + sft[dd+2] = s2; + sft[dd+3] = s3; + int nhops = abs(s0)+abs(s1)+abs(s2)+abs(s3); + if(nhops<=this->hops) this->shifts.push_back(sft); + }}}} this->npoint = this->shifts.size(); + std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<GlobalDimensions(); + stencil_size.resize(grid->Nd()); + stencil_lo.resize(grid->Nd()); + stencil_hi.resize(grid->Nd()); + for(int d=0;dNd();d++){ + if ( latt[d] == 1 ) { + stencil_lo[d] = 0; + stencil_hi[d] = 0; + stencil_size[d]= 1; + } else if ( latt[d] == 2 ) { + stencil_lo[d] = -1; + stencil_hi[d] = 0; + stencil_size[d]= 2; + } else if ( latt[d] > 2 ) { + stencil_lo[d] = -1; + stencil_hi[d] = 1; + stencil_size[d]= 3; + } + } + }; + }; + // Need to worry about red-black now -class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry { +class NonLocalStencilGeometry4D : public NonLocalStencilGeometry { public: - NextToNextToNextToNearestStencilGeometry4D(void) : NonLocalStencilGeometry(1) + virtual int DimSkip(void) { return 0;}; + NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; + virtual ~NonLocalStencilGeometry4D() {}; +}; +class NonLocalStencilGeometry5D : public NonLocalStencilGeometry { +public: + virtual int DimSkip(void) { return 1; }; + NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; + virtual ~NonLocalStencilGeometry5D() {}; +}; +/* + * Bunch of different options classes + */ +class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { +public: + NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,4) { this->BuildShifts(); }; - virtual ~NextToNextToNextToNearestStencilGeometry4D() {} - virtual void BuildShifts(void) - { - this->shifts.resize(0); - // Like HDCG: 81 point stencil including self connection - this->shifts.push_back(Coordinate({0,0,0,0})); - // +-x, +-y, +-z, +-t : 8 - for(int s=-1;s<=1;s+=2){ - this->shifts.push_back(Coordinate({s,0,0,0})); - this->shifts.push_back(Coordinate({0,s,0,0})); - this->shifts.push_back(Coordinate({0,0,s,0})); - this->shifts.push_back(Coordinate({0,0,0,s})); - } - // +-x+-y, +-x+-z, +-x+-t, +-y+-z, +-y+-t, +-z+-t : 24 - for(int s1=-1;s1<=1;s1+=2){ - for(int s2=-1;s2<=1;s2+=2){ - this->shifts.push_back(Coordinate({s1,s2,0,0})); - this->shifts.push_back(Coordinate({s1,0,s2,0})); - this->shifts.push_back(Coordinate({s1,0,0,s2})); - this->shifts.push_back(Coordinate({0,s1,s2,0})); - this->shifts.push_back(Coordinate({0,s1,0,s2})); - this->shifts.push_back(Coordinate({0,0,s1,s2})); - }} - // +-x+-y+-z, +-x+-y+-z, +-x+-y+-z, - for(int s1=-1;s1<=1;s1+=2){ - for(int s2=-1;s2<=1;s2+=2){ - for(int s3=-1;s3<=1;s3+=2){ - this->shifts.push_back(Coordinate({s1,s2,s3,0})); // 8x4 = 32 - this->shifts.push_back(Coordinate({s1,s2,0,s3})); - this->shifts.push_back(Coordinate({s1,0,s2,s3})); - this->shifts.push_back(Coordinate({0,s1,s2,s3})); - }}} - for(int s1=-1;s1<=1;s1+=2){ - for(int s2=-1;s2<=1;s2+=2){ - for(int s3=-1;s3<=1;s3+=2){ - for(int s4=-1;s4<=1;s4+=2){ - this->shifts.push_back(Coordinate({s1,s2,s3,s4})); // 16 - }}}} - this->npoint = this->shifts.size(); - } }; -class NextToNearestStencilGeometry5D : public NonLocalStencilGeometry { +class NextToNextToNextToNearestStencilGeometry5D : public NonLocalStencilGeometry5D { public: - NextToNearestStencilGeometry5D(void) : NonLocalStencilGeometry(1) - { - this->BuildShifts(); - }; - virtual ~NextToNearestStencilGeometry5D() {}; - virtual void BuildShifts(void) - { - this->shifts.resize(0); - // Like HDCG: 81 point stencil including self connection - this->shifts.push_back(Coordinate({0,0,0,0,0})); - // +-x, +-y, +-z, +-t : 8 - for(int s=-1;s<=1;s+=2){ - this->shifts.push_back(Coordinate({0,s,0,0,0})); - this->shifts.push_back(Coordinate({0,0,s,0,0})); - this->shifts.push_back(Coordinate({0,0,0,s,0})); - this->shifts.push_back(Coordinate({0,0,0,0,s})); - } - // +-x+-y, +-x+-z, +-x+-t, +-y+-z, +-y+-t, +-z+-t : 24 - for(int s1=-1;s1<=1;s1+=2){ - for(int s2=-1;s2<=1;s2+=2){ - this->shifts.push_back(Coordinate({0,s1,s2,0,0})); - this->shifts.push_back(Coordinate({0,s1,0,s2,0})); - this->shifts.push_back(Coordinate({0,s1,0,0,s2})); - this->shifts.push_back(Coordinate({0,0,s1,s2,0})); - this->shifts.push_back(Coordinate({0,0,s1,0,s2})); - this->shifts.push_back(Coordinate({0,0,0,s1,s2})); - }} - this->npoint = this->shifts.size(); - } -}; -// Need to worry about red-black now -class NextToNextToNextToNearestStencilGeometry5D : public NonLocalStencilGeometry { -public: - NextToNextToNextToNearestStencilGeometry5D(void) : NonLocalStencilGeometry(1) + NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,4) { this->BuildShifts(); }; - virtual ~NextToNextToNextToNearestStencilGeometry5D() {} - virtual void BuildShifts(void) +}; +class NextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { +public: + NextToNearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,2) { - this->shifts.resize(0); - // Like HDCG: 81 point stencil including self connection - this->shifts.push_back(Coordinate({0,0,0,0,0})); - // +-x, +-y, +-z, +-t : 8 - for(int s=-1;s<=1;s+=2){ - this->shifts.push_back(Coordinate({0,s,0,0,0})); - this->shifts.push_back(Coordinate({0,0,s,0,0})); - this->shifts.push_back(Coordinate({0,0,0,s,0})); - this->shifts.push_back(Coordinate({0,0,0,0,s})); - } - // +-x+-y, +-x+-z, +-x+-t, +-y+-z, +-y+-t, +-z+-t : 24 - for(int s1=-1;s1<=1;s1+=2){ - for(int s2=-1;s2<=1;s2+=2){ - this->shifts.push_back(Coordinate({0,s1,s2,0,0})); - this->shifts.push_back(Coordinate({0,s1,0,s2,0})); - this->shifts.push_back(Coordinate({0,s1,0,0,s2})); - this->shifts.push_back(Coordinate({0,0,s1,s2,0})); - this->shifts.push_back(Coordinate({0,0,s1,0,s2})); - this->shifts.push_back(Coordinate({0,0,0,s1,s2})); - }} - // +-x+-y+-z, +-x+-y+-z, +-x+-y+-z, - for(int s1=-1;s1<=1;s1+=2){ - for(int s2=-1;s2<=1;s2+=2){ - for(int s3=-1;s3<=1;s3+=2){ - this->shifts.push_back(Coordinate({0,s1,s2,s3,0})); // 8x4 = 32 - this->shifts.push_back(Coordinate({0,s1,s2,0,s3})); - this->shifts.push_back(Coordinate({0,s1,0,s2,s3})); - this->shifts.push_back(Coordinate({0,0,s1,s2,s3})); - }}} - for(int s1=-1;s1<=1;s1+=2){ - for(int s2=-1;s2<=1;s2+=2){ - for(int s3=-1;s3<=1;s3+=2){ - for(int s4=-1;s4<=1;s4+=2){ - this->shifts.push_back(Coordinate({0,s1,s2,s3,s4})); // 16 - }}}} - this->npoint = this->shifts.size(); - } + this->BuildShifts(); + }; +}; +class NextToNearestStencilGeometry5D : public NonLocalStencilGeometry5D { +public: + NextToNearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,2) + { + this->BuildShifts(); + }; +}; +class NearestStencilGeometry4D : public NonLocalStencilGeometry4D { +public: + NearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,1) + { + this->BuildShifts(); + }; +}; +class NearestStencilGeometry5D : public NonLocalStencilGeometry5D { +public: + NearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,1) + { + this->BuildShifts(); + }; }; // Fine Object == (per site) type of fine field @@ -228,7 +193,7 @@ class GeneralCoarsenedMatrix : public SparseMatrixBase siteVector; - typedef Lattice CoarseComplexField; + typedef Lattice > CoarseComplexField; typedef Lattice CoarseVector; typedef Lattice > CoarseMatrix; typedef iMatrix Cobj; @@ -318,10 +283,9 @@ public: int osites=pin.Grid()->oSites(); for(int point=0;point > &linop, - Aggregation & Subspace) + void PopulateAdag(void) { - // Create a random - GridBase *grid = FineGrid(); - FineField MbV(grid); - FineField tmp(grid); - FineField f_src(grid); - FineField f_res(grid); - FineField f_ref(grid); - CoarseVector c_src(CoarseGrid()); - CoarseVector c_res(CoarseGrid()); - CoarseVector coarseInner(CoarseGrid()); - GridParallelRNG RNG(CoarseGrid()); RNG.SeedUniqueString(std::string("Coarse RNG")); - random(RNG,c_src); - blockPromote(c_src,f_src,Subspace.subspace); - linop.op(f_src,f_ref); - this->Mult (_A,c_src,c_res); - blockPromote(c_res,f_res,Subspace.subspace); - std::cout << " GeneralCoarsenedMatrix comparison res "<gSites() ;bidx++){ + Coordinate bcoor; + CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor); + + for(int p=0;pGlobalDimensions()[mu]; + scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic + } + // Flip to poke/peekLocalSite and not too bad + auto link = peekSite(_A[p],scoor); + int pp = geom.Reverse(p); + pokeSite(adj(link),_Adag[pp],bcoor); + } + } } void CoarsenOperator(LinearOperatorBase > &linop, Aggregation & Subspace) @@ -388,15 +347,17 @@ public: RealD tpeek=0.0; std::cout << GridLogMessage<< "CoarsenMatrix "<< std::endl; GridBase *grid = FineGrid(); + + //////////////////////////////////////////////// // Orthogonalise the subblocks over the basis + //////////////////////////////////////////////// CoarseScalar InnerProd(CoarseGrid()); - for(int b=0;b almost factor of 10 in setup cost, excluding data rearrangement + // + // Intermediates -- ignore the corner terms, leave approximate and force Hermitian + // Intermediates -- pad by 2 and apply 1+8+24 = 33 times. + ///////////////////////////////////////////////////////////// + + ////////////////////////////////////////////////////////// + // BFM HDCG style approach: Solve a system of equations to get Aij + ////////////////////////////////////////////////////////// + /* + * Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM. + * + * conj(phases[block]) proj[k][ block*Nvec+j ] = \sum_ball e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} > + * = \sum_ball e^{iqk.delta} A_ji + * + * Must invert matrix M_k,l = e^[i q_k . delta_l] + * + * Where q_k = delta_k . (2*M_PI/global_nb[mu]) + */ void CoarsenOperatorColoured(LinearOperatorBase > &linop, - Aggregation & Subspace) + Aggregation & Subspace) { std::cout << GridLogMessage<< "CoarsenMatrixColoured "<< std::endl; GridBase *grid = FineGrid(); + RealD tproj=0.0; + RealD teigen=0.0; + RealD tmat=0.0; + RealD tphase=0.0; + RealD tinv=0.0; + ///////////////////////////////////////////////////////////// // Orthogonalise the subblocks over the basis ///////////////////////////////////////////////////////////// CoarseScalar InnerProd(CoarseGrid()); blockOrthogonalise(InnerProd,Subspace.subspace); - const int npoint = geom.npoint + const int npoint = geom.npoint; - // Now compute the matrix elements of linop between this orthonormal - // set of vectors. - FineField bV(grid); - FineField MbV(grid); - FineField tmp(grid); - CoarseVector coarseInner(CoarseGrid()); Coordinate clatt = CoarseGrid()->GlobalDimensions(); - Coordinate steclatt = CoarseGrid()->GlobalDimensions(); - for(int v=0;v dmom = - for(int mu=0;mu<5;mu++){ - dmom[mu] = imom[mu]*2*M_PI/global_nb[mu]; + int Nd = CoarseGrid()->Nd(); + + /* + * Here, k,l index which possible momentum/shift within the N-points connected by MdagM. + * Matrix index i is mapped to this shift via + * geom.shifts[i] + * + * conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block] + * = \sum_{l in ball} e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} > + * = \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} + * = M_{kl} A_ji^{b.b+l} + * + * Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] + * + * Where q_k = delta_k . (2*M_PI/global_nb[mu]) + * + * Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} + */ + teigen-=usecond(); + ComplexD ci(0.0,1.0); + Eigen::MatrixXcd Mkl = Eigen::MatrixXcd::Zero(npoint,npoint); + Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint); + for(int k=0;k phase(0.0,0.0); + for(int mu=0;mu - * = \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} - * = M_{kl} A_ji^{b.b+l} - * - * Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] - * - * Where q_k = delta_k . (2*M_PI/global_nb[mu]) - * - * Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} - */ + phase=exp(phase*ci); + Mkl(k,l) = phase; } } - + invMkl = Mkl.inverse(); + teigen+=usecond(); + + /////////////////////////////////////////////////////////////////////// + // Now compute the matrix elements of linop between the orthonormal + // set of vectors. + /////////////////////////////////////////////////////////////////////// + FineField phaV(grid); // Phased block basis vector + FineField MphaV(grid);// Matrix applied + CoarseVector coarseInner(CoarseGrid()); + + std::vector ComputeProj(npoint,CoarseGrid()); + std::vector FT(npoint,CoarseGrid()); + for(int i=0;ioSites(); + autoView( A_v , _A[k], AcceleratorWrite); + autoView( FT_v , FT[k], AcceleratorRead); + accelerator_for(sss, osites, 1, { + for(int j=0;j &out){assert(0);}; }; + NAMESPACE_END(Grid);