mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 20:14:32 +00:00 
			
		
		
		
	Compare commits
	
		
			8 Commits
		
	
	
		
			b58fd80379
			...
			2111e7ab5f
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 2111e7ab5f | ||
|  | d29abfdcaf | ||
|  | a751c42cc5 | ||
|  | 6a3bc9865e | ||
|  | 4d5f7e4377 | ||
|  | 78b117fb78 | ||
|  | ded63a1319 | ||
|  | df3e4d1e9c | 
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,573 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <pboyle@bnl.gov> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #pragma once |  | ||||||
|  |  | ||||||
| #include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No) |  | ||||||
|  |  | ||||||
| #include <Grid/lattice/PaddedCell.h> |  | ||||||
| #include <Grid/stencil/GeneralLocalStencil.h> |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
| // Fixme need coalesced read gpermute |  | ||||||
| template<class vobj> void gpermute(vobj & inout,int perm){ |  | ||||||
|   vobj tmp=inout; |  | ||||||
|   if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;} |  | ||||||
|   if (perm & 0x2 ) { permute(inout,tmp,1); tmp=inout;} |  | ||||||
|   if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;} |  | ||||||
|   if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////// |  | ||||||
| // Reuse Aggregation class from CoarsenedMatrix for now |  | ||||||
| // Might think about *smoothed* Aggregation |  | ||||||
| // Equivalent of Geometry class in cartesian case |  | ||||||
| ///////////////////////////////////////////////////////////////// |  | ||||||
| class NonLocalStencilGeometry { |  | ||||||
| public: |  | ||||||
|   int depth; |  | ||||||
|   int hops; |  | ||||||
|   int npoint; |  | ||||||
|   std::vector<Coordinate> shifts; |  | ||||||
|   Coordinate stencil_size; |  | ||||||
|   Coordinate stencil_lo; |  | ||||||
|   Coordinate stencil_hi; |  | ||||||
|   GridCartesian *grid; |  | ||||||
|   GridCartesian *Grid() {return grid;}; |  | ||||||
|   int Depth(void){return 1;};   // Ghost zone depth |  | ||||||
|   int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil |  | ||||||
|  |  | ||||||
|   virtual int DimSkip(void) =0; |  | ||||||
|  |  | ||||||
|   virtual ~NonLocalStencilGeometry() {}; |  | ||||||
|  |  | ||||||
|   int  Reverse(int point) |  | ||||||
|   { |  | ||||||
|     int Nd = Grid()->Nd(); |  | ||||||
|     Coordinate shft = shifts[point]; |  | ||||||
|     Coordinate rev(Nd); |  | ||||||
|     for(int mu=0;mu<Nd;mu++) rev[mu]= -shft[mu]; |  | ||||||
|     for(int p=0;p<npoint;p++){ |  | ||||||
|       if(rev==shifts[p]){ |  | ||||||
| 	return p; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     assert(0); |  | ||||||
|     return -1; |  | ||||||
|   } |  | ||||||
|   void BuildShifts(void) |  | ||||||
|   { |  | ||||||
|     this->shifts.resize(0); |  | ||||||
|     int Nd = this->grid->Nd(); |  | ||||||
|  |  | ||||||
|     int dd = this->DimSkip(); |  | ||||||
|     for(int s0=this->stencil_lo[dd+0];s0<=this->stencil_hi[dd+0];s0++){ |  | ||||||
|     for(int s1=this->stencil_lo[dd+1];s1<=this->stencil_hi[dd+1];s1++){ |  | ||||||
|     for(int s2=this->stencil_lo[dd+2];s2<=this->stencil_hi[dd+2];s2++){ |  | ||||||
|     for(int s3=this->stencil_lo[dd+3];s3<=this->stencil_hi[dd+3];s3++){ |  | ||||||
|       Coordinate sft(Nd,0); |  | ||||||
|       sft[dd+0] = s0; |  | ||||||
|       sft[dd+1] = s1; |  | ||||||
|       sft[dd+2] = s2; |  | ||||||
|       sft[dd+3] = s3; |  | ||||||
|       int nhops = abs(s0)+abs(s1)+abs(s2)+abs(s3); |  | ||||||
|       if(nhops<=this->hops) this->shifts.push_back(sft); |  | ||||||
|     }}}} |  | ||||||
|     this->npoint = this->shifts.size(); |  | ||||||
|     std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<<std::endl; |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   NonLocalStencilGeometry(GridCartesian *_coarse_grid,int _hops) : grid(_coarse_grid), hops(_hops) |  | ||||||
|   { |  | ||||||
|     Coordinate latt = grid->GlobalDimensions(); |  | ||||||
|     stencil_size.resize(grid->Nd()); |  | ||||||
|     stencil_lo.resize(grid->Nd()); |  | ||||||
|     stencil_hi.resize(grid->Nd()); |  | ||||||
|     for(int d=0;d<grid->Nd();d++){ |  | ||||||
|      if ( latt[d] == 1 ) { |  | ||||||
|       stencil_lo[d] = 0; |  | ||||||
|       stencil_hi[d] = 0; |  | ||||||
|       stencil_size[d]= 1; |  | ||||||
|      } else if ( latt[d] == 2 ) { |  | ||||||
|       stencil_lo[d] = -1; |  | ||||||
|       stencil_hi[d] = 0; |  | ||||||
|       stencil_size[d]= 2; |  | ||||||
|      } else if ( latt[d] > 2 ) { |  | ||||||
|        stencil_lo[d] = -1; |  | ||||||
|        stencil_hi[d] =  1; |  | ||||||
|        stencil_size[d]= 3; |  | ||||||
|      } |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Need to worry about red-black now |  | ||||||
| class NonLocalStencilGeometry4D : public NonLocalStencilGeometry { |  | ||||||
| public: |  | ||||||
|   virtual int DimSkip(void) { return 0;}; |  | ||||||
|   NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; |  | ||||||
|   virtual ~NonLocalStencilGeometry4D() {}; |  | ||||||
| }; |  | ||||||
| class NonLocalStencilGeometry5D : public NonLocalStencilGeometry { |  | ||||||
| public: |  | ||||||
|   virtual int DimSkip(void) { return 1; };  |  | ||||||
|   NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops)  { }; |  | ||||||
|   virtual ~NonLocalStencilGeometry5D() {}; |  | ||||||
| }; |  | ||||||
| /* |  | ||||||
|  * Bunch of different options classes |  | ||||||
|  */ |  | ||||||
| class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { |  | ||||||
| public: |  | ||||||
|   NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,4) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NextToNextToNextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { |  | ||||||
| public: |  | ||||||
|   NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,4) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NextToNearestStencilGeometry4D : public  NonLocalStencilGeometry4D { |  | ||||||
| public: |  | ||||||
|   NextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,2) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { |  | ||||||
| public: |  | ||||||
|   NextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,2) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NearestStencilGeometry4D : public  NonLocalStencilGeometry4D { |  | ||||||
| public: |  | ||||||
|   NearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,1) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NearestStencilGeometry5D : public  NonLocalStencilGeometry5D { |  | ||||||
| public: |  | ||||||
|   NearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,1) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Fine Object == (per site) type of fine field |  | ||||||
| // nbasis      == number of deflation vectors |  | ||||||
| template<class Fobj,class CComplex,int nbasis> |  | ||||||
| class GeneralCoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  { |  | ||||||
| public: |  | ||||||
|  |  | ||||||
|   typedef GeneralCoarsenedMatrix<Fobj,CComplex,nbasis> GeneralCoarseOp; |  | ||||||
|   typedef iVector<CComplex,nbasis >           siteVector; |  | ||||||
|   typedef iMatrix<CComplex,nbasis >           siteMatrix; |  | ||||||
|   typedef Lattice<iScalar<CComplex> >         CoarseComplexField; |  | ||||||
|   typedef Lattice<siteVector>                 CoarseVector; |  | ||||||
|   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; |  | ||||||
|   typedef iMatrix<CComplex,nbasis >  Cobj; |  | ||||||
|   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field |  | ||||||
|   typedef Lattice<Fobj >        FineField; |  | ||||||
|   typedef CoarseVector Field; |  | ||||||
|   //////////////////// |  | ||||||
|   // Data members |  | ||||||
|   //////////////////// |  | ||||||
|   int hermitian; |  | ||||||
|   GridBase      *       _FineGrid;  |  | ||||||
|   GridCartesian *       _CoarseGrid;  |  | ||||||
|   NonLocalStencilGeometry &geom; |  | ||||||
|   PaddedCell Cell; |  | ||||||
|   GeneralLocalStencil Stencil; |  | ||||||
|    |  | ||||||
|   std::vector<CoarseMatrix> _A; |  | ||||||
|   std::vector<CoarseMatrix> _Adag; |  | ||||||
|  |  | ||||||
|   /////////////////////// |  | ||||||
|   // Interface |  | ||||||
|   /////////////////////// |  | ||||||
|   GridBase      * Grid(void)           { return _FineGrid; };   // this is all the linalg routines need to know |  | ||||||
|   GridBase      * FineGrid(void)       { return _FineGrid; };   // this is all the linalg routines need to know |  | ||||||
|   GridCartesian * CoarseGrid(void)     { return _CoarseGrid; };   // this is all the linalg routines need to know |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe) |  | ||||||
|   { |  | ||||||
|     int nfound=0; |  | ||||||
|     std::cout << " ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<<std::endl; |  | ||||||
|     for(int p=0;p<geom.npoint;p++){ |  | ||||||
|       for(int pp=0;pp<CopyMe.geom.npoint;pp++){ |  | ||||||
|  	// Search for the same relative shift |  | ||||||
| 	// Avoids brutal handling of Grid pointers |  | ||||||
| 	if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) { |  | ||||||
| 	  _A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]); |  | ||||||
| 	  _Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]); |  | ||||||
| 	  nfound++; |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     assert(nfound==geom.npoint); |  | ||||||
|     ExchangeCoarseLinks(); |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid) |  | ||||||
|     : geom(_geom), |  | ||||||
|       _FineGrid(FineGrid), |  | ||||||
|       _CoarseGrid(CoarseGrid), |  | ||||||
|       hermitian(1), |  | ||||||
|       Cell(_geom.Depth(),_CoarseGrid), |  | ||||||
|       Stencil(Cell.grids.back(),geom.shifts) |  | ||||||
|   { |  | ||||||
|     { |  | ||||||
|       int npoint = _geom.npoint; |  | ||||||
|       autoView( Stencil_v  , Stencil, AcceleratorRead); |  | ||||||
|       int osites=Stencil.Grid()->oSites(); |  | ||||||
|       for(int ss=0;ss<osites;ss++){ |  | ||||||
| 	for(int point=0;point<npoint;point++){ |  | ||||||
| 	  auto SE = Stencil_v.GetEntry(point,ss); |  | ||||||
| 	  int o = SE->_offset; |  | ||||||
| 	  assert( o< osites); |  | ||||||
| 	} |  | ||||||
|       }     |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     _A.resize(geom.npoint,CoarseGrid); |  | ||||||
|     _Adag.resize(geom.npoint,CoarseGrid); |  | ||||||
|   } |  | ||||||
|   void M (const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     Mult(_A,in,out); |  | ||||||
|   } |  | ||||||
|   void Mdag (const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     if ( hermitian ) M(in,out); |  | ||||||
|     else Mult(_Adag,in,out); |  | ||||||
|   } |  | ||||||
|   void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     RealD tviews=0; |  | ||||||
|     RealD ttot=0; |  | ||||||
|     RealD tmult=0; |  | ||||||
|     RealD texch=0; |  | ||||||
|     RealD text=0; |  | ||||||
|     ttot=-usecond(); |  | ||||||
|     conformable(CoarseGrid(),in.Grid()); |  | ||||||
|     conformable(in.Grid(),out.Grid()); |  | ||||||
|     out.Checkerboard() = in.Checkerboard(); |  | ||||||
|     CoarseVector tin=in; |  | ||||||
|  |  | ||||||
|     texch-=usecond(); |  | ||||||
|     CoarseVector pin  = Cell.Exchange(tin); |  | ||||||
|     texch+=usecond(); |  | ||||||
|  |  | ||||||
|     CoarseVector pout(pin.Grid()); pout=Zero(); |  | ||||||
|  |  | ||||||
|     int npoint = geom.npoint; |  | ||||||
|     typedef LatticeView<Cobj> Aview; |  | ||||||
|        |  | ||||||
|     const int Nsimd = CComplex::Nsimd(); |  | ||||||
|      |  | ||||||
|     int osites=pin.Grid()->oSites(); |  | ||||||
|     //    int gsites=pin.Grid()->gSites(); |  | ||||||
|  |  | ||||||
|     RealD flops = 1.0* npoint * nbasis * nbasis * 8 * osites; |  | ||||||
|     RealD bytes = (1.0*osites*sizeof(siteMatrix)*npoint+2.0*osites*sizeof(siteVector))*npoint; |  | ||||||
|        |  | ||||||
|     //    for(int point=0;point<npoint;point++){ |  | ||||||
|     //      conformable(A[point],pin); |  | ||||||
|     //    } |  | ||||||
|  |  | ||||||
|     { |  | ||||||
|       tviews-=usecond(); |  | ||||||
|       autoView( in_v , pin, AcceleratorRead); |  | ||||||
|       autoView( out_v , pout, AcceleratorWrite); |  | ||||||
|       autoView( Stencil_v  , Stencil, AcceleratorRead); |  | ||||||
|       tviews+=usecond(); |  | ||||||
|        |  | ||||||
|       for(int point=0;point<npoint;point++){ |  | ||||||
| 	tviews-=usecond(); |  | ||||||
| 	autoView( A_v, A[point],AcceleratorRead); |  | ||||||
| 	tviews+=usecond(); |  | ||||||
| 	tmult-=usecond(); |  | ||||||
| 	accelerator_for(sss, osites*nbasis, Nsimd, { |  | ||||||
|  |  | ||||||
| 	    typedef decltype(coalescedRead(in_v[0]))    calcVector; |  | ||||||
|  |  | ||||||
| 	    int ss = sss/nbasis; |  | ||||||
| 	    int b  = sss%nbasis; |  | ||||||
|  |  | ||||||
| 	    auto SE  = Stencil_v.GetEntry(point,ss); |  | ||||||
| 	    auto nbr = coalescedReadGeneralPermute(in_v[SE->_offset],SE->_permute,Nd); |  | ||||||
| 	    auto res = out_v(ss)(b); |  | ||||||
| 	    for(int bb=0;bb<nbasis;bb++) { |  | ||||||
| 	      res = res + coalescedRead(A_v[ss](b,bb))*nbr(bb); |  | ||||||
| 	    } |  | ||||||
| 	    coalescedWrite(out_v[ss](b),res); |  | ||||||
| 	}); |  | ||||||
|  |  | ||||||
| 	tmult+=usecond(); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     text-=usecond(); |  | ||||||
|     out = Cell.Extract(pout); |  | ||||||
|     text+=usecond(); |  | ||||||
|     ttot+=usecond(); |  | ||||||
|  |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult ext  "<<text<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult tot  "<<ttot<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Kernel "<< flops/tmult<<" mflop/s"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Kernel "<< bytes/tmult<<" MB/s"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse flops/s "<< flops/ttot<<" mflop/s"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse bytes   "<< bytes/1e6<<" MB"<<std::endl; |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   void PopulateAdag(void) |  | ||||||
|   { |  | ||||||
|     for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){ |  | ||||||
|       Coordinate bcoor; |  | ||||||
|       CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor); |  | ||||||
|        |  | ||||||
|       for(int p=0;p<geom.npoint;p++){ |  | ||||||
| 	Coordinate scoor = bcoor; |  | ||||||
| 	for(int mu=0;mu<bcoor.size();mu++){ |  | ||||||
| 	  int L = CoarseGrid()->GlobalDimensions()[mu]; |  | ||||||
| 	  scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic |  | ||||||
| 	} |  | ||||||
| 	// Flip to poke/peekLocalSite and not too bad |  | ||||||
| 	auto link = peekSite(_A[p],scoor); |  | ||||||
| 	int pp = geom.Reverse(p); |  | ||||||
| 	pokeSite(adj(link),_Adag[pp],bcoor); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ///////////////////////////////////////////////////////////// |  | ||||||
|   //  |  | ||||||
|   // A) Only reduced flops option is to use a padded cell of depth 4 |  | ||||||
|   // and apply MpcDagMpc in the padded cell. |  | ||||||
|   // |  | ||||||
|   // Makes for ONE application of MpcDagMpc per vector instead of 30 or 80. |  | ||||||
|   // With the effective cell size around (B+8)^4 perhaps 12^4/4^4 ratio |  | ||||||
|   // Cost is 81x more, same as stencil size. |  | ||||||
|   // |  | ||||||
|   // But: can eliminate comms and do as local dirichlet. |  | ||||||
|   // |  | ||||||
|   // Local exchange gauge field once. |  | ||||||
|   // Apply to all vectors, local only computation. |  | ||||||
|   // Must exchange ghost subcells in reverse process of PaddedCell to take inner products |  | ||||||
|   // |  | ||||||
|   // B) Can reduce cost: pad by 1, apply Deo      (4^4+6^4+8^4+8^4 )/ (4x 4^4) |  | ||||||
|   //                     pad by 2, apply Doe |  | ||||||
|   //                     pad by 3, apply Deo |  | ||||||
|   //                     then break out 8x directions; cost is ~10x MpcDagMpc per vector |  | ||||||
|   // |  | ||||||
|   // => almost factor of 10 in setup cost, excluding data rearrangement |  | ||||||
|   // |  | ||||||
|   // Intermediates -- ignore the corner terms, leave approximate and force Hermitian |  | ||||||
|   // Intermediates -- pad by 2 and apply 1+8+24 = 33 times. |  | ||||||
|   ///////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////// |  | ||||||
|     // BFM HDCG style approach: Solve a system of equations to get Aij |  | ||||||
|     ////////////////////////////////////////////////////////// |  | ||||||
|     /* |  | ||||||
|      *     Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM. |  | ||||||
|      * |  | ||||||
|      *     conj(phases[block]) proj[k][ block*Nvec+j ] =  \sum_ball  e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} >  |  | ||||||
|      *                                                 =  \sum_ball e^{iqk.delta} A_ji |  | ||||||
|      * |  | ||||||
|      *     Must invert matrix M_k,l = e^[i q_k . delta_l] |  | ||||||
|      * |  | ||||||
|      *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) |  | ||||||
|      */ |  | ||||||
|   void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop, |  | ||||||
| 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) |  | ||||||
|   { |  | ||||||
|     std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl; |  | ||||||
|     GridBase *grid = FineGrid(); |  | ||||||
|  |  | ||||||
|     RealD tproj=0.0; |  | ||||||
|     RealD teigen=0.0; |  | ||||||
|     RealD tmat=0.0; |  | ||||||
|     RealD tphase=0.0; |  | ||||||
|     RealD tinv=0.0; |  | ||||||
|  |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|     // Orthogonalise the subblocks over the basis |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|     CoarseScalar InnerProd(CoarseGrid());  |  | ||||||
|     blockOrthogonalise(InnerProd,Subspace.subspace); |  | ||||||
|  |  | ||||||
|     const int npoint = geom.npoint; |  | ||||||
|        |  | ||||||
|     Coordinate clatt = CoarseGrid()->GlobalDimensions(); |  | ||||||
|     int Nd = CoarseGrid()->Nd(); |  | ||||||
|  |  | ||||||
|       /* |  | ||||||
|        *     Here, k,l index which possible momentum/shift within the N-points connected by MdagM. |  | ||||||
|        *     Matrix index i is mapped to this shift via  |  | ||||||
|        *               geom.shifts[i] |  | ||||||
|        * |  | ||||||
|        *     conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block]  |  | ||||||
|        *       =  \sum_{l in ball}  e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} >  |  | ||||||
|        *       =  \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} |  | ||||||
|        *       = M_{kl} A_ji^{b.b+l} |  | ||||||
|        * |  | ||||||
|        *     Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] |  | ||||||
|        *   |  | ||||||
|        *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) |  | ||||||
|        * |  | ||||||
|        *     Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} |  | ||||||
|        */ |  | ||||||
|     teigen-=usecond(); |  | ||||||
|     Eigen::MatrixXcd Mkl    = Eigen::MatrixXcd::Zero(npoint,npoint); |  | ||||||
|     Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint); |  | ||||||
|     ComplexD ci(0.0,1.0); |  | ||||||
|     for(int k=0;k<npoint;k++){ // Loop over momenta |  | ||||||
|  |  | ||||||
|       for(int l=0;l<npoint;l++){ // Loop over nbr relative |  | ||||||
| 	ComplexD phase(0.0,0.0); |  | ||||||
| 	for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; |  | ||||||
| 	  phase=phase+TwoPiL*geom.shifts[k][mu]*geom.shifts[l][mu]; |  | ||||||
| 	} |  | ||||||
| 	phase=exp(phase*ci); |  | ||||||
| 	Mkl(k,l) = phase; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     invMkl = Mkl.inverse(); |  | ||||||
|     teigen+=usecond(); |  | ||||||
|  |  | ||||||
|     /////////////////////////////////////////////////////////////////////// |  | ||||||
|     // Now compute the matrix elements of linop between the orthonormal |  | ||||||
|     // set of vectors. |  | ||||||
|     /////////////////////////////////////////////////////////////////////// |  | ||||||
|     FineField phaV(grid); // Phased block basis vector |  | ||||||
|     FineField MphaV(grid);// Matrix applied |  | ||||||
|     CoarseVector coarseInner(CoarseGrid()); |  | ||||||
|  |  | ||||||
|     std::vector<CoarseVector> ComputeProj(npoint,CoarseGrid()); |  | ||||||
|     std::vector<CoarseVector>          FT(npoint,CoarseGrid()); |  | ||||||
|     for(int i=0;i<nbasis;i++){// Loop over basis vectors |  | ||||||
|       std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl; |  | ||||||
|       for(int p=0;p<npoint;p++){ // Loop over momenta in npoint |  | ||||||
| 	///////////////////////////////////////////////////// |  | ||||||
| 	// Stick a phase on every block |  | ||||||
| 	///////////////////////////////////////////////////// |  | ||||||
| 	tphase-=usecond(); |  | ||||||
| 	CoarseComplexField coor(CoarseGrid()); |  | ||||||
| 	CoarseComplexField pha(CoarseGrid());	pha=Zero(); |  | ||||||
| 	for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	  LatticeCoordinate(coor,mu); |  | ||||||
| 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; |  | ||||||
| 	  pha = pha + (TwoPiL * geom.shifts[p][mu]) * coor; |  | ||||||
| 	} |  | ||||||
| 	pha  =exp(pha*ci); |  | ||||||
| 	phaV=Zero(); |  | ||||||
| 	blockZAXPY(phaV,pha,Subspace.subspace[i],phaV); |  | ||||||
| 	tphase+=usecond(); |  | ||||||
|  |  | ||||||
| 	///////////////////////////////////////////////////////////////////// |  | ||||||
| 	// Multiple phased subspace vector by matrix and project to subspace |  | ||||||
| 	// Remove local bulk phase to leave relative phases |  | ||||||
| 	///////////////////////////////////////////////////////////////////// |  | ||||||
| 	tmat-=usecond(); |  | ||||||
| 	linop.Op(phaV,MphaV); |  | ||||||
| 	tmat+=usecond(); |  | ||||||
|  |  | ||||||
| 	tproj-=usecond(); |  | ||||||
| 	blockProject(coarseInner,MphaV,Subspace.subspace); |  | ||||||
| 	coarseInner = conjugate(pha) * coarseInner; |  | ||||||
|  |  | ||||||
| 	ComputeProj[p] = coarseInner; |  | ||||||
| 	tproj+=usecond(); |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       tinv-=usecond(); |  | ||||||
|       for(int k=0;k<npoint;k++){ |  | ||||||
| 	FT[k] = Zero(); |  | ||||||
| 	for(int l=0;l<npoint;l++){ |  | ||||||
| 	  FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l]; |  | ||||||
| 	} |  | ||||||
|        |  | ||||||
| 	int osites=CoarseGrid()->oSites(); |  | ||||||
| 	autoView( A_v  , _A[k], AcceleratorWrite); |  | ||||||
| 	autoView( FT_v  , FT[k], AcceleratorRead); |  | ||||||
| 	accelerator_for(sss, osites, 1, { |  | ||||||
| 	    for(int j=0;j<nbasis;j++){ |  | ||||||
| 	      A_v[sss](j,i) = FT_v[sss](j); |  | ||||||
| 	    } |  | ||||||
|         }); |  | ||||||
|       } |  | ||||||
|       tinv+=usecond(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int p=0;p<geom.npoint;p++){ |  | ||||||
|       Coordinate coor({0,0,0,0,0}); |  | ||||||
|       auto sval = peekSite(_A[p],coor); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Only needed if nonhermitian |  | ||||||
|     if ( ! hermitian ) { |  | ||||||
|       std::cout << GridLogMessage<<"PopulateAdag  "<<std::endl; |  | ||||||
|       PopulateAdag(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Need to write something to populate Adag from A |  | ||||||
|     std::cout << GridLogMessage<<"ExchangeCoarseLinks  "<<std::endl; |  | ||||||
|     ExchangeCoarseLinks(); |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator eigen  "<<teigen<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator phase  "<<tphase<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator mat    "<<tmat <<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator proj   "<<tproj<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator inv    "<<tinv<<" us"<<std::endl; |  | ||||||
|   } |  | ||||||
|   void ExchangeCoarseLinks(void){ |  | ||||||
|     for(int p=0;p<geom.npoint;p++){ |  | ||||||
|       std::cout << "Exchange "<<p<<std::endl; |  | ||||||
|       _A[p] = Cell.Exchange(_A[p]); |  | ||||||
|       _Adag[p]= Cell.Exchange(_Adag[p]); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   virtual  void Mdiag    (const Field &in, Field &out){ assert(0);}; |  | ||||||
|   virtual  void Mdir     (const Field &in, Field &out,int dir, int disp){assert(0);}; |  | ||||||
|   virtual  void MdirAll  (const Field &in, std::vector<Field> &out){assert(0);}; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| @@ -38,7 +38,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|    * Vstart = P^Tx + Qb |    * Vstart = P^Tx + Qb | ||||||
|    * M1 = P^TM + Q |    * M1 = P^TM + Q | ||||||
|    * M2=M3=1 |    * M2=M3=1 | ||||||
|    * Vout = x |  | ||||||
|    */ |    */ | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| @@ -68,14 +67,13 @@ class TwoLevelCG : public LinearFunction<Field> | |||||||
|     grid       = fine; |     grid       = fine; | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
|   virtual void operator() (const Field &src, Field &psi) |   virtual void operator() (const Field &src, Field &x) | ||||||
|   { |   { | ||||||
|     Field resid(grid); |     Field resid(grid); | ||||||
|     RealD f; |     RealD f; | ||||||
|     RealD rtzp,rtz,a,d,b; |     RealD rtzp,rtz,a,d,b; | ||||||
|     RealD rptzp; |     RealD rptzp; | ||||||
|      |      | ||||||
|     Field x(grid);  |  | ||||||
|     Field p(grid); |     Field p(grid); | ||||||
|     Field z(grid); |     Field z(grid); | ||||||
|     Field tmp(grid); |     Field tmp(grid); | ||||||
| @@ -85,7 +83,6 @@ class TwoLevelCG : public LinearFunction<Field> | |||||||
|     Field rp (grid); |     Field rp (grid); | ||||||
|      |      | ||||||
|     //Initial residual computation & set up |     //Initial residual computation & set up | ||||||
|     RealD guess = norm2(psi); |  | ||||||
|     double tn; |     double tn; | ||||||
|  |  | ||||||
|     GridStopWatch HDCGTimer; |     GridStopWatch HDCGTimer; | ||||||
| @@ -165,14 +162,22 @@ class TwoLevelCG : public LinearFunction<Field> | |||||||
| 	RealD  srcnorm = sqrt(norm2(src)); | 	RealD  srcnorm = sqrt(norm2(src)); | ||||||
| 	RealD  tmpnorm = sqrt(norm2(tmp)); | 	RealD  tmpnorm = sqrt(norm2(tmp)); | ||||||
| 	RealD  true_residual = tmpnorm/srcnorm; | 	RealD  true_residual = tmpnorm/srcnorm; | ||||||
| 	std::cout<<GridLogMessage<<"HDCG: true residual is "<<true_residual | 	std::cout<<GridLogMessage | ||||||
| 		 <<" solution "<<xnorm<<" source "<<srcnorm<<std::endl; | 		 <<"HDCG: true residual is "<<true_residual | ||||||
|  | 		 <<" solution "<<xnorm | ||||||
|  | 		 <<" source "<<srcnorm | ||||||
|  | 		 <<" mmp "<<mmpnorm	   | ||||||
|  | 		 <<std::endl; | ||||||
|  |  | ||||||
| 	return; | 	return; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|     } |     } | ||||||
|     std::cout << "HDCG: Pcg not converged"<<std::endl; |     std::cout<<GridLogMessage<<"HDCG: not converged"<<std::endl; | ||||||
|  |     RealD  xnorm   = sqrt(norm2(x)); | ||||||
|  |     RealD  srcnorm = sqrt(norm2(src)); | ||||||
|  |     std::cout<<GridLogMessage<<"HDCG: non-converged solution "<<xnorm<<" source "<<srcnorm<<std::endl; | ||||||
|  |      | ||||||
|     return ; |     return ; | ||||||
|   } |   } | ||||||
|    |    | ||||||
| @@ -197,9 +202,6 @@ class TwoLevelCG : public LinearFunction<Field> | |||||||
|   ///////////////////////////////////////////////////////////////////// |   ///////////////////////////////////////////////////////////////////// | ||||||
|   // Only Def1 has non-trivial Vout. |   // Only Def1 has non-trivial Vout. | ||||||
|   ///////////////////////////////////////////////////////////////////// |   ///////////////////////////////////////////////////////////////////// | ||||||
|   virtual void   Vout  (Field & in, Field & out,Field & src){ |  | ||||||
|     out = in; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| }; | }; | ||||||
|    |    | ||||||
| @@ -321,12 +323,13 @@ public: | |||||||
|     eval(_eval) |     eval(_eval) | ||||||
|   {}; |   {}; | ||||||
|  |  | ||||||
|   // Can just inherit existing Vout |  | ||||||
|   // Can just inherit existing M2 |   // Can just inherit existing M2 | ||||||
|   // Can just inherit existing M3 |   // Can just inherit existing M3 | ||||||
|  |  | ||||||
|   // Simple vstart - do nothing |   // Simple vstart - do nothing | ||||||
|   virtual void Vstart(Field & x,const Field & src){ x=src; }; |   virtual void Vstart(Field & x,const Field & src){ | ||||||
|  |     x=src; // Could apply Q | ||||||
|  |   }; | ||||||
|  |  | ||||||
|   // Override PcgM1 |   // Override PcgM1 | ||||||
|   virtual void PcgM1(Field & in, Field & out) |   virtual void PcgM1(Field & in, Field & out) | ||||||
|   | |||||||
| @@ -457,7 +457,7 @@ until convergence | |||||||
| 	    std::vector<Field>& evec, | 	    std::vector<Field>& evec, | ||||||
| 	    Field& w,int Nm,int k) | 	    Field& w,int Nm,int k) | ||||||
|   { |   { | ||||||
|     std::cout<<GridLogIRL << "Lanczos step " <<k<<std::endl; |     std::cout<<GridLogDebug << "Lanczos step " <<k<<std::endl; | ||||||
|     const RealD tiny = 1.0e-20; |     const RealD tiny = 1.0e-20; | ||||||
|     assert( k< Nm ); |     assert( k< Nm ); | ||||||
|  |  | ||||||
| @@ -487,7 +487,7 @@ until convergence | |||||||
|  |  | ||||||
|     if(k < Nm-1) evec[k+1] = w; |     if(k < Nm-1) evec[k+1] = w; | ||||||
|  |  | ||||||
|     std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl; |     std::cout<<GridLogIRL << "Lanczos step alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl; | ||||||
|     if ( beta < tiny )  |     if ( beta < tiny )  | ||||||
|       std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl; |       std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -78,7 +78,7 @@ public: | |||||||
|   void operator() (const Field &in, Field &out){ |   void operator() (const Field &in, Field &out){ | ||||||
|   |   | ||||||
|     _Guess(in,out); |     _Guess(in,out); | ||||||
|     _HermitianSolver(_Matrix,in,out);  // Mdag M out = Mdag in |     _HermitianSolver(_Matrix,in,out);  //M out = in | ||||||
|  |  | ||||||
|   }      |   }      | ||||||
| }; | }; | ||||||
|   | |||||||
| @@ -192,15 +192,16 @@ public: | |||||||
|     text+=usecond(); |     text+=usecond(); | ||||||
|     ttot+=usecond(); |     ttot+=usecond(); | ||||||
|      |      | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl; |     std::cout << GridLogDebug<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl; | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl; |     std::cout << GridLogDebug<<"Coarse Mult exch "<<texch<<" us"<<std::endl; | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl; |     std::cout << GridLogDebug<<"Coarse Mult mult "<<tmult<<" us"<<std::endl; | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult ext  "<<text<<" us"<<std::endl; |     std::cout << GridLogDebug<<"Coarse Mult ext  "<<text<<" us"<<std::endl; | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult tot  "<<ttot<<" us"<<std::endl; |     std::cout << GridLogDebug<<"Coarse Mult tot  "<<ttot<<" us"<<std::endl; | ||||||
|     std::cout << GridLogPerformance<<"Coarse Kernel flop/s "<< flops/tmult<<" mflop/s"<<std::endl; |     std::cout << GridLogDebug<<"Coarse Kernel flop/s "<< flops/tmult<<" mflop/s"<<std::endl; | ||||||
|     std::cout << GridLogPerformance<<"Coarse Kernel bytes/s"<< bytes/tmult<<" MB/s"<<std::endl; |     std::cout << GridLogDebug<<"Coarse Kernel bytes/s"<< bytes/tmult<<" MB/s"<<std::endl; | ||||||
|     std::cout << GridLogPerformance<<"Coarse overall flops/s "<< flops/ttot<<" mflop/s"<<std::endl; |     std::cout << GridLogDebug<<"Coarse overall flops/s "<< flops/ttot<<" mflop/s"<<std::endl; | ||||||
|     std::cout << GridLogPerformance<<"Coarse total bytes   "<< bytes/1e6<<" MB"<<std::endl; |     std::cout << GridLogDebug<<"Coarse total bytes   "<< bytes/1e6<<" MB"<<std::endl; | ||||||
|  |  | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   void PopulateAdag(void) |   void PopulateAdag(void) | ||||||
|   | |||||||
| @@ -139,7 +139,7 @@ public: | |||||||
|     if(dim==0) conformable(old_grid,unpadded_grid); |     if(dim==0) conformable(old_grid,unpadded_grid); | ||||||
|     else       conformable(old_grid,grids[dim-1]); |     else       conformable(old_grid,grids[dim-1]); | ||||||
|  |  | ||||||
|     std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl; |     //    std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl; | ||||||
|  |  | ||||||
|     double tins=0, tshift=0; |     double tins=0, tshift=0; | ||||||
|  |  | ||||||
| @@ -184,7 +184,7 @@ public: | |||||||
|       } |       } | ||||||
|       tins += usecond() - t; |       tins += usecond() - t; | ||||||
|     } |     } | ||||||
|     std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl; |     std::cout << GridLogDebug << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl; | ||||||
|      |      | ||||||
|     return padded; |     return padded; | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -50,7 +50,7 @@ void SaveOperator(Coarsened &Operator,std::string file) | |||||||
| #endif | #endif | ||||||
| } | } | ||||||
| template<class Coarsened> | template<class Coarsened> | ||||||
| void LoadOperator(Coarsened Operator,std::string file) | void LoadOperator(Coarsened &Operator,std::string file) | ||||||
| { | { | ||||||
| #ifdef HAVE_LIME | #ifdef HAVE_LIME | ||||||
|   emptyUserRecord record; |   emptyUserRecord record; | ||||||
| @@ -219,7 +219,7 @@ int main (int argc, char ** argv) | |||||||
|   //////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////// | ||||||
|   LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d); |   LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d); | ||||||
|  |  | ||||||
|   bool load=false; |   bool load=true; | ||||||
|   if ( load ) { |   if ( load ) { | ||||||
|     LoadBasis(Aggregates,"Subspace.scidac"); |     LoadBasis(Aggregates,"Subspace.scidac"); | ||||||
|     LoadOperator(LittleDiracOp,"LittleDiracOp.scidac"); |     LoadOperator(LittleDiracOp,"LittleDiracOp.scidac"); | ||||||
| @@ -230,7 +230,7 @@ int main (int argc, char ** argv) | |||||||
| 				       //				     600,200,200 -- 38 iters, 162s | 				       //				     600,200,200 -- 38 iters, 162s | ||||||
| 				       //				     600,200,100 -- 38 iters, 169s | 				       //				     600,200,100 -- 38 iters, 169s | ||||||
| 				       //				     600,200,50  -- 88 iters. 370s  | 				       //				     600,200,50  -- 88 iters. 370s  | ||||||
| 				       600, | 				       800, | ||||||
| 				       200, | 				       200, | ||||||
| 				       100, | 				       100, | ||||||
| 				       0.0); | 				       0.0); | ||||||
| @@ -241,7 +241,7 @@ int main (int argc, char ** argv) | |||||||
|    |    | ||||||
|   // Try projecting to one hop only |   // Try projecting to one hop only | ||||||
|   LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d); |   LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d); | ||||||
|   LittleDiracOpProj.ProjectNearestNeighbour(0.2,LittleDiracOp); |   LittleDiracOpProj.ProjectNearestNeighbour(0.01,LittleDiracOp); // smaller shift 0.02? n | ||||||
|  |  | ||||||
|   typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix; |   typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix; | ||||||
|   HermMatrix CoarseOp     (LittleDiracOp); |   HermMatrix CoarseOp     (LittleDiracOp); | ||||||
| @@ -250,7 +250,7 @@ int main (int argc, char ** argv) | |||||||
|   ////////////////////////////////////////// |   ////////////////////////////////////////// | ||||||
|   // Build a coarse lanczos |   // Build a coarse lanczos | ||||||
|   ////////////////////////////////////////// |   ////////////////////////////////////////// | ||||||
|   Chebyshev<CoarseVector>      IRLCheby(0.5,60.0,71);  // 1 iter |   Chebyshev<CoarseVector>      IRLCheby(0.2,40.0,71);  // 1 iter | ||||||
|   FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp); |   FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp); | ||||||
|   PlainHermOp<CoarseVector>    IRLOp    (CoarseOp); |   PlainHermOp<CoarseVector>    IRLOp    (CoarseOp); | ||||||
|   int Nk=48; |   int Nk=48; | ||||||
| @@ -270,7 +270,6 @@ int main (int argc, char ** argv) | |||||||
|   IRL.calc(eval,evec,c_src,Nconv); |   IRL.calc(eval,evec,c_src,Nconv); | ||||||
|   DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval); |   DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval); | ||||||
|  |  | ||||||
|    |  | ||||||
|   ////////////////////////////////////////// |   ////////////////////////////////////////// | ||||||
|   // Build a coarse space solver |   // Build a coarse space solver | ||||||
|   ////////////////////////////////////////// |   ////////////////////////////////////////// | ||||||
| @@ -283,7 +282,8 @@ int main (int argc, char ** argv) | |||||||
|   HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser); |   HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser); | ||||||
|   c_res=Zero(); |   c_res=Zero(); | ||||||
|   HPDSolve(c_src,c_res); c_ref = c_res; |   HPDSolve(c_src,c_res); c_ref = c_res; | ||||||
|  |   std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl; | ||||||
|  |   std::cout << GridLogMessage<<"ref norm "<<norm2(c_ref)<<std::endl; | ||||||
|   ////////////////////////////////////////////////////////////////////////// |   ////////////////////////////////////////////////////////////////////////// | ||||||
|   // Deflated (with real op EV's) solve for the projected coarse op |   // Deflated (with real op EV's) solve for the projected coarse op | ||||||
|   // Work towards ADEF1 in the coarse space |   // Work towards ADEF1 in the coarse space | ||||||
| @@ -291,13 +291,21 @@ int main (int argc, char ** argv) | |||||||
|   HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser); |   HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser); | ||||||
|   c_res=Zero(); |   c_res=Zero(); | ||||||
|   HPDSolveProj(c_src,c_res); |   HPDSolveProj(c_src,c_res); | ||||||
|  |   std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl; | ||||||
|  |   std::cout << GridLogMessage<<"res norm "<<norm2(c_res)<<std::endl; | ||||||
|   c_res = c_res - c_ref; |   c_res = c_res - c_ref; | ||||||
|   std::cout << "Projected solver error "<<norm2(c_res)<<std::endl; |   std::cout << "Projected solver error "<<norm2(c_res)<<std::endl; | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////////////////////////// |   ////////////////////////////////////////////////////////////////////// | ||||||
|   // Coarse ADEF1 with deflation space |   // Coarse ADEF1 with deflation space | ||||||
|   ////////////////////////////////////////////////////////////////////// |   ////////////////////////////////////////////////////////////////////// | ||||||
|   ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(4.0,45.,16,CoarseOpProj);  // 311 |   ChebyshevSmoother<CoarseVector,HermMatrix > | ||||||
|  |     CoarseSmoother(1.0,37.,8,CoarseOpProj);  // just go to sloppy 0.1 convergence | ||||||
|  |     //  CoarseSmoother(0.1,37.,8,CoarseOpProj);  // | ||||||
|  |   //  CoarseSmoother(0.5,37.,6,CoarseOpProj);  //  8 iter 0.36s | ||||||
|  |   //    CoarseSmoother(0.5,37.,12,CoarseOpProj);  // 8 iter, 0.55s | ||||||
|  |   //    CoarseSmoother(0.5,37.,8,CoarseOpProj);// 7-9 iter | ||||||
|  |   //  CoarseSmoother(1.0,37.,8,CoarseOpProj); // 0.4 - 0.5s solve to 0.04, 7-9 iter | ||||||
|   //  ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj);  // 311 |   //  ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj);  // 311 | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////// | ||||||
| @@ -318,19 +326,25 @@ int main (int argc, char ** argv) | |||||||
|   // HDCG 38 iters 169s |   // HDCG 38 iters 169s | ||||||
|  |  | ||||||
|   TwoLevelADEF1defl<CoarseVector> |   TwoLevelADEF1defl<CoarseVector> | ||||||
|     cADEF1(1.0e-8, 100, |     cADEF1(1.0e-8, 500, | ||||||
| 	   CoarseOp, | 	   CoarseOp, | ||||||
| 	   CoarseSmoother, | 	   CoarseSmoother, | ||||||
| 	   evec,eval); | 	   evec,eval); | ||||||
|  |  | ||||||
|   c_res=Zero(); |   c_res=Zero(); | ||||||
|   cADEF1(c_src,c_res); |   cADEF1(c_src,c_res); | ||||||
|  |   std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl; | ||||||
|  |   std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl; | ||||||
|   c_res = c_res - c_ref; |   c_res = c_res - c_ref; | ||||||
|   std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; |   std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; | ||||||
|    |    | ||||||
|   cADEF1.Tolerance = 1.0e-9; |   //  cADEF1.Tolerance = 4.0e-2; | ||||||
|  |   //  cADEF1.Tolerance = 1.0e-1; | ||||||
|  |   cADEF1.Tolerance = 5.0e-2; | ||||||
|   c_res=Zero(); |   c_res=Zero(); | ||||||
|   cADEF1(c_src,c_res); |   cADEF1(c_src,c_res); | ||||||
|  |   std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl; | ||||||
|  |   std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl; | ||||||
|   c_res = c_res - c_ref; |   c_res = c_res - c_ref; | ||||||
|   std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; |   std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; | ||||||
|    |    | ||||||
| @@ -375,16 +389,13 @@ int main (int argc, char ** argv) | |||||||
|       // Build a HDCG solver |       // Build a HDCG solver | ||||||
|       ////////////////////////////////////////// |       ////////////////////////////////////////// | ||||||
|       TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> |       TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> | ||||||
| 	HDCG(1.0e-8, 3000, | 	HDCG(1.0e-8, 100, | ||||||
| 	     FineHermOp, | 	     FineHermOp, | ||||||
| 	     Smoother, | 	     Smoother, | ||||||
| 	     HPDSolveSloppy, | 	     HPDSolveSloppy, | ||||||
| 	     HPDSolve, | 	     HPDSolve, | ||||||
| 	     Aggregates); | 	     Aggregates); | ||||||
|  |  | ||||||
|       result=Zero(); |  | ||||||
|       HDCG(src,result); |  | ||||||
|  |  | ||||||
|       TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> |       TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> | ||||||
| 	HDCGdefl(1.0e-8, 100, | 	HDCGdefl(1.0e-8, 100, | ||||||
| 		 FineHermOp, | 		 FineHermOp, | ||||||
| @@ -396,6 +407,10 @@ int main (int argc, char ** argv) | |||||||
|       result=Zero(); |       result=Zero(); | ||||||
|       HDCGdefl(src,result); |       HDCGdefl(src,result); | ||||||
|  |  | ||||||
|  |       result=Zero(); | ||||||
|  |       HDCG(src,result); | ||||||
|  |  | ||||||
|  |        | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										423
									
								
								tests/debug/Test_general_coarse_hdcg_phys.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										423
									
								
								tests/debug/Test_general_coarse_hdcg_phys.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,423 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./tests/Test_general_coarse_hdcg.cc | ||||||
|  |  | ||||||
|  |     Copyright (C) 2023 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <pboyle@bnl.gov> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #include <Grid/Grid.h> | ||||||
|  | #include <Grid/lattice/PaddedCell.h> | ||||||
|  | #include <Grid/stencil/GeneralLocalStencil.h> | ||||||
|  | //#include <Grid/algorithms/GeneralCoarsenedMatrix.h> | ||||||
|  | #include <Grid/algorithms/iterative/AdefGeneric.h> | ||||||
|  |  | ||||||
|  | using namespace std; | ||||||
|  | using namespace Grid; | ||||||
|  |  | ||||||
|  | template<class Coarsened> | ||||||
|  | void SaveOperator(Coarsened &Operator,std::string file) | ||||||
|  | { | ||||||
|  | #ifdef HAVE_LIME | ||||||
|  |   emptyUserRecord record; | ||||||
|  |   ScidacWriter WR(Operator.Grid()->IsBoss()); | ||||||
|  |   assert(Operator._A.size()==Operator.geom.npoint); | ||||||
|  |   WR.open(file); | ||||||
|  |   for(int p=0;p<Operator._A.size();p++){ | ||||||
|  |     auto tmp = Operator.Cell.Extract(Operator._A[p]); | ||||||
|  |     WR.writeScidacFieldRecord(tmp,record); | ||||||
|  |   } | ||||||
|  |   WR.close(); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | template<class Coarsened> | ||||||
|  | void LoadOperator(Coarsened &Operator,std::string file) | ||||||
|  | { | ||||||
|  | #ifdef HAVE_LIME | ||||||
|  |   emptyUserRecord record; | ||||||
|  |   Grid::ScidacReader RD ; | ||||||
|  |   RD.open(file); | ||||||
|  |   assert(Operator._A.size()==Operator.geom.npoint); | ||||||
|  |   for(int p=0;p<Operator.geom.npoint;p++){ | ||||||
|  |     conformable(Operator._A[p].Grid(),Operator.CoarseGrid()); | ||||||
|  |     RD.readScidacFieldRecord(Operator._A[p],record); | ||||||
|  |   }     | ||||||
|  |   RD.close(); | ||||||
|  |   Operator.ExchangeCoarseLinks(); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | template<class aggregation> | ||||||
|  | void SaveBasis(aggregation &Agg,std::string file) | ||||||
|  | { | ||||||
|  | #ifdef HAVE_LIME | ||||||
|  |   emptyUserRecord record; | ||||||
|  |   ScidacWriter WR(Agg.FineGrid->IsBoss()); | ||||||
|  |   WR.open(file); | ||||||
|  |   for(int b=0;b<Agg.subspace.size();b++){ | ||||||
|  |     WR.writeScidacFieldRecord(Agg.subspace[b],record); | ||||||
|  |   } | ||||||
|  |   WR.close(); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | template<class aggregation> | ||||||
|  | void LoadBasis(aggregation &Agg, std::string file) | ||||||
|  | { | ||||||
|  | #ifdef HAVE_LIME | ||||||
|  |   emptyUserRecord record; | ||||||
|  |   ScidacReader RD ; | ||||||
|  |   RD.open(file); | ||||||
|  |   for(int b=0;b<Agg.subspace.size();b++){ | ||||||
|  |     RD.readScidacFieldRecord(Agg.subspace[b],record); | ||||||
|  |   }     | ||||||
|  |   RD.close(); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<class Field> class TestSolver : public LinearFunction<Field> { | ||||||
|  | public: | ||||||
|  |   TestSolver() {}; | ||||||
|  |   void operator() (const Field &in, Field &out){    out = Zero();  }      | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | RealD InverseApproximation(RealD x){ | ||||||
|  |   return 1.0/x; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Want Op in CoarsenOp to call MatPcDagMatPc | ||||||
|  | template<class Field> | ||||||
|  | class HermOpAdaptor : public LinearOperatorBase<Field> | ||||||
|  | { | ||||||
|  |   LinearOperatorBase<Field> & wrapped; | ||||||
|  | public: | ||||||
|  |   HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme)  {}; | ||||||
|  |   void Op     (const Field &in, Field &out)   { wrapped.HermOp(in,out);  } | ||||||
|  |   void HermOp(const Field &in, Field &out)    { wrapped.HermOp(in,out); } | ||||||
|  |   void AdjOp     (const Field &in, Field &out){ wrapped.HermOp(in,out);  } | ||||||
|  |   void OpDiag (const Field &in, Field &out)                  {    assert(0);  } | ||||||
|  |   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } | ||||||
|  |   void OpDirAll  (const Field &in, std::vector<Field> &out)  {    assert(0);  }; | ||||||
|  |   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } | ||||||
|  | }; | ||||||
|  | template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |   using LinearFunction<Field>::operator(); | ||||||
|  |   typedef LinearOperatorBase<Field> FineOperator; | ||||||
|  |   FineOperator   & _SmootherOperator; | ||||||
|  |   Chebyshev<Field> Cheby; | ||||||
|  |   ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator) : | ||||||
|  |     _SmootherOperator(SmootherOperator), | ||||||
|  |     Cheby(_lo,_hi,_ord,InverseApproximation) | ||||||
|  |   { | ||||||
|  |     std::cout << GridLogMessage<<" Chebyshev smoother order "<<_ord<<" ["<<_lo<<","<<_hi<<"]"<<std::endl; | ||||||
|  |   }; | ||||||
|  |   void operator() (const Field &in, Field &out)  | ||||||
|  |   { | ||||||
|  |     Field tmp(in.Grid()); | ||||||
|  |     tmp = in; | ||||||
|  |     Cheby(_SmootherOperator,tmp,out); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | int main (int argc, char ** argv) | ||||||
|  | { | ||||||
|  |   Grid_init(&argc,&argv); | ||||||
|  |  | ||||||
|  |   const int Ls=24; | ||||||
|  |   const int nbasis = 40; | ||||||
|  |   const int cb = 0 ; | ||||||
|  |   RealD mass=0.00078; | ||||||
|  |   RealD M5=1.8; | ||||||
|  |   RealD b=1.5; | ||||||
|  |   RealD c=0.5; | ||||||
|  |  | ||||||
|  |   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), | ||||||
|  | 								   GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||||
|  | 								   GridDefaultMpi()); | ||||||
|  |   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||||
|  |   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||||
|  |   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||||
|  |  | ||||||
|  |   // Construct a coarsened grid with 4^4 cell | ||||||
|  |   Coordinate clatt = GridDefaultLatt(); | ||||||
|  |   for(int d=0;d<clatt.size();d++){ | ||||||
|  |     clatt[d] = clatt[d]/4; | ||||||
|  |   } | ||||||
|  |   GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, | ||||||
|  | 							    GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||||
|  | 							    GridDefaultMpi());; | ||||||
|  |   GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d); | ||||||
|  |  | ||||||
|  |   ///////////////////////// RNGs ///////////////////////////////// | ||||||
|  |   std::vector<int> seeds4({1,2,3,4}); | ||||||
|  |   std::vector<int> seeds5({5,6,7,8}); | ||||||
|  |   std::vector<int> cseeds({5,6,7,8}); | ||||||
|  |  | ||||||
|  |   GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5); | ||||||
|  |   GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4); | ||||||
|  |   GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); | ||||||
|  |  | ||||||
|  |   ///////////////////////// Configuration ///////////////////////////////// | ||||||
|  |   LatticeGaugeField Umu(UGrid); | ||||||
|  |  | ||||||
|  |   FieldMetaData header; | ||||||
|  |   std::string file("ckpoint_lat.4000"); | ||||||
|  |   NerscIO::readConfiguration(Umu,header,file); | ||||||
|  |  | ||||||
|  |   //////////////////////// Fermion action ////////////////////////////////// | ||||||
|  |   MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); | ||||||
|  |  | ||||||
|  |   SchurDiagMooeeOperator<MobiusFermionD, LatticeFermion> HermOpEO(Ddwf); | ||||||
|  |  | ||||||
|  |   typedef HermOpAdaptor<LatticeFermionD> HermFineMatrix; | ||||||
|  |   HermFineMatrix FineHermOp(HermOpEO); | ||||||
|  |    | ||||||
|  |   LatticeFermion result(FrbGrid); result=Zero(); | ||||||
|  |  | ||||||
|  |   LatticeFermion    src(FrbGrid); random(RNG5,src); | ||||||
|  |  | ||||||
|  |   // Run power method on FineHermOp | ||||||
|  |   PowerMethod<LatticeFermion>       PM;   PM(HermOpEO,src); | ||||||
|  |  | ||||||
|  |   | ||||||
|  |   //////////////////////////////////////////////////////////// | ||||||
|  |   ///////////// Coarse basis and Little Dirac Operator /////// | ||||||
|  |   //////////////////////////////////////////////////////////// | ||||||
|  |   typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator; | ||||||
|  |   typedef LittleDiracOperator::CoarseVector CoarseVector; | ||||||
|  |  | ||||||
|  |   NextToNextToNextToNearestStencilGeometry5D geom(Coarse5d); | ||||||
|  |   NearestStencilGeometry5D geom_nn(Coarse5d); | ||||||
|  |    | ||||||
|  |   // Warning: This routine calls PVdagM.Op, not PVdagM.HermOp | ||||||
|  |   typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; | ||||||
|  |   Subspace Aggregates(Coarse5d,FrbGrid,cb); | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////////// | ||||||
|  |   // Need to check about red-black grid coarsening | ||||||
|  |   //////////////////////////////////////////////////////////// | ||||||
|  |   LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d); | ||||||
|  |  | ||||||
|  |   bool load=true; | ||||||
|  |   if ( load ) { | ||||||
|  |     LoadBasis(Aggregates,"Subspace.scidac"); | ||||||
|  |     LoadOperator(LittleDiracOp,"LittleDiracOp.scidac"); | ||||||
|  |   } else { | ||||||
|  |     Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis, | ||||||
|  | 				       95.0,0.1, | ||||||
|  | 				       //				     400,200,200 -- 48 iters | ||||||
|  | 				       //				     600,200,200 -- 38 iters, 162s | ||||||
|  | 				       //				     600,200,100 -- 38 iters, 169s | ||||||
|  | 				       //				     600,200,50  -- 88 iters. 370s  | ||||||
|  | 				       800, | ||||||
|  | 				       200, | ||||||
|  | 				       100, | ||||||
|  | 				       0.0); | ||||||
|  |     LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates); | ||||||
|  |     SaveBasis(Aggregates,"Subspace.scidac"); | ||||||
|  |     SaveOperator(LittleDiracOp,"LittleDiracOp.scidac"); | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   // Try projecting to one hop only | ||||||
|  |   LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d); | ||||||
|  |   LittleDiracOpProj.ProjectNearestNeighbour(0.01,LittleDiracOp); // smaller shift 0.02? n | ||||||
|  |  | ||||||
|  |   typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix; | ||||||
|  |   HermMatrix CoarseOp     (LittleDiracOp); | ||||||
|  |   HermMatrix CoarseOpProj (LittleDiracOpProj); | ||||||
|  |    | ||||||
|  |   ////////////////////////////////////////// | ||||||
|  |   // Build a coarse lanczos | ||||||
|  |   ////////////////////////////////////////// | ||||||
|  |   Chebyshev<CoarseVector>      IRLCheby(0.2,40.0,71);  // 1 iter | ||||||
|  |   FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp); | ||||||
|  |   PlainHermOp<CoarseVector>    IRLOp    (CoarseOp); | ||||||
|  |   int Nk=48; | ||||||
|  |   int Nm=64; | ||||||
|  |   int Nstop=Nk; | ||||||
|  |   ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-5,20); | ||||||
|  |  | ||||||
|  |   int Nconv; | ||||||
|  |   std::vector<RealD>            eval(Nm); | ||||||
|  |   std::vector<CoarseVector>     evec(Nm,Coarse5d); | ||||||
|  |   CoarseVector c_src(Coarse5d); c_src=1.0; | ||||||
|  |   CoarseVector c_res(Coarse5d);  | ||||||
|  |   CoarseVector c_ref(Coarse5d);  | ||||||
|  |  | ||||||
|  |   PowerMethod<CoarseVector>       cPM;   cPM(CoarseOp,c_src); | ||||||
|  |  | ||||||
|  |   IRL.calc(eval,evec,c_src,Nconv); | ||||||
|  |   DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval); | ||||||
|  |  | ||||||
|  |   ////////////////////////////////////////// | ||||||
|  |   // Build a coarse space solver | ||||||
|  |   ////////////////////////////////////////// | ||||||
|  |   int maxit=20000; | ||||||
|  |   ConjugateGradient<CoarseVector>  CG(1.0e-8,maxit,false); | ||||||
|  |   ConjugateGradient<LatticeFermionD>  CGfine(1.0e-8,10000,false); | ||||||
|  |   ZeroGuesser<CoarseVector> CoarseZeroGuesser; | ||||||
|  |  | ||||||
|  |   //  HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser); | ||||||
|  |   HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser); | ||||||
|  |   c_res=Zero(); | ||||||
|  |   HPDSolve(c_src,c_res); c_ref = c_res; | ||||||
|  |   std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl; | ||||||
|  |   std::cout << GridLogMessage<<"ref norm "<<norm2(c_ref)<<std::endl; | ||||||
|  |   ////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Deflated (with real op EV's) solve for the projected coarse op | ||||||
|  |   // Work towards ADEF1 in the coarse space | ||||||
|  |   ////////////////////////////////////////////////////////////////////////// | ||||||
|  |   HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser); | ||||||
|  |   c_res=Zero(); | ||||||
|  |   HPDSolveProj(c_src,c_res); | ||||||
|  |   std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl; | ||||||
|  |   std::cout << GridLogMessage<<"res norm "<<norm2(c_res)<<std::endl; | ||||||
|  |   c_res = c_res - c_ref; | ||||||
|  |   std::cout << "Projected solver error "<<norm2(c_res)<<std::endl; | ||||||
|  |  | ||||||
|  |   ////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Coarse ADEF1 with deflation space | ||||||
|  |   ////////////////////////////////////////////////////////////////////// | ||||||
|  |   ChebyshevSmoother<CoarseVector,HermMatrix > | ||||||
|  |     CoarseSmoother(1.0,37.,8,CoarseOpProj);  // just go to sloppy 0.1 convergence | ||||||
|  |     //  CoarseSmoother(0.1,37.,8,CoarseOpProj);  // | ||||||
|  |   //  CoarseSmoother(0.5,37.,6,CoarseOpProj);  //  8 iter 0.36s | ||||||
|  |   //    CoarseSmoother(0.5,37.,12,CoarseOpProj);  // 8 iter, 0.55s | ||||||
|  |   //    CoarseSmoother(0.5,37.,8,CoarseOpProj);// 7-9 iter | ||||||
|  |   //  CoarseSmoother(1.0,37.,8,CoarseOpProj); // 0.4 - 0.5s solve to 0.04, 7-9 iter | ||||||
|  |   //  ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj);  // 311 | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////// | ||||||
|  |   // CG, Cheby mode spacing 200,200 | ||||||
|  |   // Unprojected Coarse CG solve to 1e-8 : 190 iters, 4.9s | ||||||
|  |   // Unprojected Coarse CG solve to 4e-2 :  33 iters, 0.8s | ||||||
|  |   // Projected Coarse CG solve to 1e-8 : 100 iters, 0.36s | ||||||
|  |   //////////////////////////////////////////////////////// | ||||||
|  |   // CoarseSmoother(1.0,48.,8,CoarseOpProj); 48 evecs  | ||||||
|  |   //////////////////////////////////////////////////////// | ||||||
|  |   // ADEF1 Coarse solve to 1e-8 : 44 iters, 2.34s  2.1x gain | ||||||
|  |   // ADEF1 Coarse solve to 4e-2 : 7 iters, 0.4s | ||||||
|  |   // HDCG 38 iters 162s | ||||||
|  |   // | ||||||
|  |   // CoarseSmoother(1.0,40.,8,CoarseOpProj); 48 evecs  | ||||||
|  |   // ADEF1 Coarse solve to 1e-8 : 37 iters, 2.0s  2.1x gain | ||||||
|  |   // ADEF1 Coarse solve to 4e-2 : 6 iters, 0.36s | ||||||
|  |   // HDCG 38 iters 169s | ||||||
|  |  | ||||||
|  |   TwoLevelADEF1defl<CoarseVector> | ||||||
|  |     cADEF1(1.0e-8, 500, | ||||||
|  | 	   CoarseOp, | ||||||
|  | 	   CoarseSmoother, | ||||||
|  | 	   evec,eval); | ||||||
|  |  | ||||||
|  |   c_res=Zero(); | ||||||
|  |   cADEF1(c_src,c_res); | ||||||
|  |   std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl; | ||||||
|  |   std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl; | ||||||
|  |   c_res = c_res - c_ref; | ||||||
|  |   std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; | ||||||
|  |    | ||||||
|  |   //  cADEF1.Tolerance = 4.0e-2; | ||||||
|  |   //  cADEF1.Tolerance = 1.0e-1; | ||||||
|  |   cADEF1.Tolerance = 5.0e-2; | ||||||
|  |   c_res=Zero(); | ||||||
|  |   cADEF1(c_src,c_res); | ||||||
|  |   std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl; | ||||||
|  |   std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl; | ||||||
|  |   c_res = c_res - c_ref; | ||||||
|  |   std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; | ||||||
|  |    | ||||||
|  |   ////////////////////////////////////////// | ||||||
|  |   // Build a smoother | ||||||
|  |   ////////////////////////////////////////// | ||||||
|  |   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(10.0,100.0,10,FineHermOp); //499 | ||||||
|  |   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(3.0,100.0,10,FineHermOp);  //383 | ||||||
|  |   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(1.0,100.0,10,FineHermOp);  //328 | ||||||
|  |   //  std::vector<RealD> los({0.5,1.0,3.0}); // 147/142/146 nbasis 1 | ||||||
|  |   //  std::vector<RealD> los({1.0,2.0}); // Nbasis 24: 88,86 iterations | ||||||
|  |   //  std::vector<RealD> los({2.0,4.0}); // Nbasis 32 == 52, iters | ||||||
|  |   //  std::vector<RealD> los({2.0,4.0}); // Nbasis 40 == 36,36 iters | ||||||
|  |  | ||||||
|  |   // | ||||||
|  |   // Turns approx 2700 iterations into 340 fine multiplies with Nbasis 40 | ||||||
|  |   // Need to measure cost of coarse space. | ||||||
|  |   // | ||||||
|  |   // -- i) Reduce coarse residual   -- 0.04 | ||||||
|  |   // -- ii) Lanczos on coarse space -- done | ||||||
|  |   // -- iii) Possible 1 hop project and/or preconditioning it - easy - PrecCG it and | ||||||
|  |   //         use a limited stencil. Reread BFM code to check on evecs / deflation strategy with prec | ||||||
|  |   // | ||||||
|  |   std::vector<RealD> los({3.0}); // Nbasis 40 == 36,36 iters | ||||||
|  |  | ||||||
|  |   //  std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults) | ||||||
|  |   std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)   | ||||||
|  |  | ||||||
|  |   for(int l=0;l<los.size();l++){ | ||||||
|  |  | ||||||
|  |     RealD lo = los[l]; | ||||||
|  |  | ||||||
|  |     for(int o=0;o<ords.size();o++){ | ||||||
|  |  | ||||||
|  |       ConjugateGradient<CoarseVector>  CGsloppy(4.0e-2,maxit,false); | ||||||
|  |       HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser); | ||||||
|  |        | ||||||
|  |       //    ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case | ||||||
|  |       ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,ords[o],FineHermOp);  // 311 | ||||||
|  |  | ||||||
|  |       ////////////////////////////////////////// | ||||||
|  |       // Build a HDCG solver | ||||||
|  |       ////////////////////////////////////////// | ||||||
|  |       TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> | ||||||
|  | 	HDCG(1.0e-8, 100, | ||||||
|  | 	     FineHermOp, | ||||||
|  | 	     Smoother, | ||||||
|  | 	     HPDSolveSloppy, | ||||||
|  | 	     HPDSolve, | ||||||
|  | 	     Aggregates); | ||||||
|  |  | ||||||
|  |       TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> | ||||||
|  | 	HDCGdefl(1.0e-8, 100, | ||||||
|  | 		 FineHermOp, | ||||||
|  | 		 Smoother, | ||||||
|  | 		 cADEF1, | ||||||
|  | 		 HPDSolve, | ||||||
|  | 		 Aggregates); | ||||||
|  |        | ||||||
|  |       result=Zero(); | ||||||
|  |       HDCGdefl(src,result); | ||||||
|  |  | ||||||
|  |       result=Zero(); | ||||||
|  |       HDCG(src,result); | ||||||
|  |  | ||||||
|  |        | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   // Standard CG | ||||||
|  |   result=Zero(); | ||||||
|  |   CGfine(HermOpEO, src, result); | ||||||
|  |    | ||||||
|  |   Grid_finalize(); | ||||||
|  |   return 0; | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user