mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-25 10:09:34 +01:00 
			
		
		
		
	Compare commits
	
		
			49 Commits
		
	
	
		
			feature/gp
			...
			b58fd80379
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | b58fd80379 | ||
|  | 7f6e0f57d0 | ||
|  | cae27678d8 | ||
|  | 48ff655bad | ||
|  | 2525ad4623 | ||
|  | e7020017c5 | ||
|  | eacebfad74 | ||
|  | 3bc2da5321 | ||
|  | 2d710d6bfd | ||
|  | 6532b7f32b | ||
|  | 7b41b92d99 | ||
|  | dd557af84b | ||
|  | 59b9d0e030 | ||
|  | b82eee4733 | ||
|  | 6a87487544 | ||
|  | fcf5023845 | ||
|  | c8adad6d8b | ||
|  | 737d3ffb98 | ||
|  | b01e67bab1 | ||
|  | 8a70314f54 | ||
|  | 36ae6e5aba | ||
|  | 9db585cfeb | ||
|  | c564611ba7 | ||
|  | e187bcb85c | ||
|  | be18ffe3b4 | ||
|  | 0d63dce4e2 | ||
|  | 26b30e1551 | ||
|  | 7fc58ac293 | ||
|  | 3a86cce8c1 | ||
|  | 37884d369f | ||
|  | 9246e653cd | ||
|  | 64283c8673 | ||
|  | 755002da9c | ||
|  | 31b8e8b437 | ||
|  | 0ec0de97e6 | ||
|  | 6c3ade5d89 | ||
|  | 980c5f9a34 | ||
|  | 471ca5f281 | ||
|  | e82ddcff5d | ||
|  | b9dcad89e8 | ||
|  | 993f43ef4a | ||
|  | 2b43308208 | ||
|  | 04a1ac3a76 | ||
|  | 990b8798bd | ||
|  | b334a73a44 | ||
|  | 5d113d1c70 | ||
|  | c14977aeab | ||
|  | 3e94838204 | ||
|  | c0a0b8ca62 | 
| @@ -69,7 +69,8 @@ NAMESPACE_CHECK(BiCGSTAB); | ||||
| #include <Grid/algorithms/iterative/PowerMethod.h> | ||||
|  | ||||
| NAMESPACE_CHECK(PowerMethod); | ||||
| #include <Grid/algorithms/CoarsenedMatrix.h> | ||||
| #include <Grid/algorithms/multigrid/MultiGrid.h> | ||||
|  | ||||
| NAMESPACE_CHECK(CoarsendMatrix); | ||||
| #include <Grid/algorithms/FFT.h> | ||||
|  | ||||
|   | ||||
| @@ -123,7 +123,7 @@ public: | ||||
| }; | ||||
|    | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class Aggregation   { | ||||
| class Aggregation { | ||||
| public: | ||||
|   typedef iVector<CComplex,nbasis >             siteVector; | ||||
|   typedef Lattice<siteVector>                 CoarseVector; | ||||
| @@ -158,7 +158,20 @@ public: | ||||
|     blockPromote(CoarseVec,FineVec,subspace); | ||||
|   } | ||||
|  | ||||
|   virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) { | ||||
|   virtual void CreateSubspaceRandom(GridParallelRNG  &RNG) { | ||||
|     int nn=nbasis; | ||||
|     RealD scale; | ||||
|     FineField noise(FineGrid); | ||||
|     for(int b=0;b<nn;b++){ | ||||
|       subspace[b] = Zero(); | ||||
|       gaussian(RNG,noise); | ||||
|       scale = std::pow(norm2(noise),-0.5);  | ||||
|       noise=noise*scale; | ||||
|       subspace[b] = noise; | ||||
|     } | ||||
|   } | ||||
|   virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) | ||||
|   { | ||||
|  | ||||
|     RealD scale; | ||||
|  | ||||
| @@ -217,6 +230,11 @@ public: | ||||
|     scale = std::pow(norm2(noise),-0.5);  | ||||
|     noise=noise*scale; | ||||
|  | ||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pass-1 : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl; | ||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pass-2 : nbasis"<<nn<<" min " | ||||
| 	      <<ordermin<<" step "<<orderstep | ||||
| 	      <<" lo"<<filterlo<<std::endl; | ||||
|  | ||||
|     // Initial matrix element | ||||
|     hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; | ||||
|  | ||||
| @@ -290,6 +308,44 @@ public: | ||||
|     } | ||||
|     assert(b==nn); | ||||
|   } | ||||
|   virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop, | ||||
| 				       int nn, | ||||
| 				       double hi, | ||||
| 				       double lo, | ||||
| 				       int orderfilter | ||||
| 				       ) { | ||||
|  | ||||
|     RealD scale; | ||||
|  | ||||
|     FineField noise(FineGrid); | ||||
|     FineField Mn(FineGrid); | ||||
|     FineField tmp(FineGrid); | ||||
|  | ||||
|     // New normalised noise | ||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pure noise : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl; | ||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pure noise  : nbasis "<<nn<<std::endl; | ||||
|  | ||||
|  | ||||
|     for(int b =0;b<nbasis;b++) | ||||
|     { | ||||
|       gaussian(RNG,noise); | ||||
|       scale = std::pow(norm2(noise),-0.5);  | ||||
|       noise=noise*scale; | ||||
|  | ||||
|       // Initial matrix element | ||||
|       hermop.Op(noise,Mn); | ||||
|       if(b==0) std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; | ||||
|       // Filter | ||||
|       Chebyshev<FineField> Cheb(lo,hi,orderfilter); | ||||
|       Cheb(hermop,noise,Mn); | ||||
|       // normalise | ||||
|       scale = std::pow(norm2(Mn),-0.5); 	Mn=Mn*scale; | ||||
|       subspace[b]   = Mn; | ||||
|       hermop.Op(Mn,tmp);  | ||||
|       std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; | ||||
|     } | ||||
|  | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
|   | ||||
							
								
								
									
										573
									
								
								Grid/algorithms/GeneralCoarsenedMatrix.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										573
									
								
								Grid/algorithms/GeneralCoarsenedMatrix.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,573 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <pboyle@bnl.gov> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #pragma once | ||||
|  | ||||
| #include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No) | ||||
|  | ||||
| #include <Grid/lattice/PaddedCell.h> | ||||
| #include <Grid/stencil/GeneralLocalStencil.h> | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| // Fixme need coalesced read gpermute | ||||
| template<class vobj> void gpermute(vobj & inout,int perm){ | ||||
|   vobj tmp=inout; | ||||
|   if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;} | ||||
|   if (perm & 0x2 ) { permute(inout,tmp,1); tmp=inout;} | ||||
|   if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;} | ||||
|   if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;} | ||||
| } | ||||
|  | ||||
| ///////////////////////////////////////////////////////////////// | ||||
| // Reuse Aggregation class from CoarsenedMatrix for now | ||||
| // Might think about *smoothed* Aggregation | ||||
| // Equivalent of Geometry class in cartesian case | ||||
| ///////////////////////////////////////////////////////////////// | ||||
| class NonLocalStencilGeometry { | ||||
| public: | ||||
|   int depth; | ||||
|   int hops; | ||||
|   int npoint; | ||||
|   std::vector<Coordinate> shifts; | ||||
|   Coordinate stencil_size; | ||||
|   Coordinate stencil_lo; | ||||
|   Coordinate stencil_hi; | ||||
|   GridCartesian *grid; | ||||
|   GridCartesian *Grid() {return grid;}; | ||||
|   int Depth(void){return 1;};   // Ghost zone depth | ||||
|   int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil | ||||
|  | ||||
|   virtual int DimSkip(void) =0; | ||||
|  | ||||
|   virtual ~NonLocalStencilGeometry() {}; | ||||
|  | ||||
|   int  Reverse(int point) | ||||
|   { | ||||
|     int Nd = Grid()->Nd(); | ||||
|     Coordinate shft = shifts[point]; | ||||
|     Coordinate rev(Nd); | ||||
|     for(int mu=0;mu<Nd;mu++) rev[mu]= -shft[mu]; | ||||
|     for(int p=0;p<npoint;p++){ | ||||
|       if(rev==shifts[p]){ | ||||
| 	return p; | ||||
|       } | ||||
|     } | ||||
|     assert(0); | ||||
|     return -1; | ||||
|   } | ||||
|   void BuildShifts(void) | ||||
|   { | ||||
|     this->shifts.resize(0); | ||||
|     int Nd = this->grid->Nd(); | ||||
|  | ||||
|     int dd = this->DimSkip(); | ||||
|     for(int s0=this->stencil_lo[dd+0];s0<=this->stencil_hi[dd+0];s0++){ | ||||
|     for(int s1=this->stencil_lo[dd+1];s1<=this->stencil_hi[dd+1];s1++){ | ||||
|     for(int s2=this->stencil_lo[dd+2];s2<=this->stencil_hi[dd+2];s2++){ | ||||
|     for(int s3=this->stencil_lo[dd+3];s3<=this->stencil_hi[dd+3];s3++){ | ||||
|       Coordinate sft(Nd,0); | ||||
|       sft[dd+0] = s0; | ||||
|       sft[dd+1] = s1; | ||||
|       sft[dd+2] = s2; | ||||
|       sft[dd+3] = s3; | ||||
|       int nhops = abs(s0)+abs(s1)+abs(s2)+abs(s3); | ||||
|       if(nhops<=this->hops) this->shifts.push_back(sft); | ||||
|     }}}} | ||||
|     this->npoint = this->shifts.size(); | ||||
|     std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<<std::endl; | ||||
|   } | ||||
|    | ||||
|   NonLocalStencilGeometry(GridCartesian *_coarse_grid,int _hops) : grid(_coarse_grid), hops(_hops) | ||||
|   { | ||||
|     Coordinate latt = grid->GlobalDimensions(); | ||||
|     stencil_size.resize(grid->Nd()); | ||||
|     stencil_lo.resize(grid->Nd()); | ||||
|     stencil_hi.resize(grid->Nd()); | ||||
|     for(int d=0;d<grid->Nd();d++){ | ||||
|      if ( latt[d] == 1 ) { | ||||
|       stencil_lo[d] = 0; | ||||
|       stencil_hi[d] = 0; | ||||
|       stencil_size[d]= 1; | ||||
|      } else if ( latt[d] == 2 ) { | ||||
|       stencil_lo[d] = -1; | ||||
|       stencil_hi[d] = 0; | ||||
|       stencil_size[d]= 2; | ||||
|      } else if ( latt[d] > 2 ) { | ||||
|        stencil_lo[d] = -1; | ||||
|        stencil_hi[d] =  1; | ||||
|        stencil_size[d]= 3; | ||||
|      } | ||||
|     } | ||||
|   }; | ||||
|  | ||||
| }; | ||||
|  | ||||
| // Need to worry about red-black now | ||||
| class NonLocalStencilGeometry4D : public NonLocalStencilGeometry { | ||||
| public: | ||||
|   virtual int DimSkip(void) { return 0;}; | ||||
|   NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; | ||||
|   virtual ~NonLocalStencilGeometry4D() {}; | ||||
| }; | ||||
| class NonLocalStencilGeometry5D : public NonLocalStencilGeometry { | ||||
| public: | ||||
|   virtual int DimSkip(void) { return 1; };  | ||||
|   NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops)  { }; | ||||
|   virtual ~NonLocalStencilGeometry5D() {}; | ||||
| }; | ||||
| /* | ||||
|  * Bunch of different options classes | ||||
|  */ | ||||
| class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { | ||||
| public: | ||||
|   NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,4) | ||||
|   { | ||||
|     this->BuildShifts(); | ||||
|   }; | ||||
| }; | ||||
| class NextToNextToNextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { | ||||
| public: | ||||
|   NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,4) | ||||
|   { | ||||
|     this->BuildShifts(); | ||||
|   }; | ||||
| }; | ||||
| class NextToNearestStencilGeometry4D : public  NonLocalStencilGeometry4D { | ||||
| public: | ||||
|   NextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,2) | ||||
|   { | ||||
|     this->BuildShifts(); | ||||
|   }; | ||||
| }; | ||||
| class NextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { | ||||
| public: | ||||
|   NextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,2) | ||||
|   { | ||||
|     this->BuildShifts(); | ||||
|   }; | ||||
| }; | ||||
| class NearestStencilGeometry4D : public  NonLocalStencilGeometry4D { | ||||
| public: | ||||
|   NearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,1) | ||||
|   { | ||||
|     this->BuildShifts(); | ||||
|   }; | ||||
| }; | ||||
| class NearestStencilGeometry5D : public  NonLocalStencilGeometry5D { | ||||
| public: | ||||
|   NearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,1) | ||||
|   { | ||||
|     this->BuildShifts(); | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| // Fine Object == (per site) type of fine field | ||||
| // nbasis      == number of deflation vectors | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class GeneralCoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  { | ||||
| public: | ||||
|  | ||||
|   typedef GeneralCoarsenedMatrix<Fobj,CComplex,nbasis> GeneralCoarseOp; | ||||
|   typedef iVector<CComplex,nbasis >           siteVector; | ||||
|   typedef iMatrix<CComplex,nbasis >           siteMatrix; | ||||
|   typedef Lattice<iScalar<CComplex> >         CoarseComplexField; | ||||
|   typedef Lattice<siteVector>                 CoarseVector; | ||||
|   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; | ||||
|   typedef iMatrix<CComplex,nbasis >  Cobj; | ||||
|   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<Fobj >        FineField; | ||||
|   typedef CoarseVector Field; | ||||
|   //////////////////// | ||||
|   // Data members | ||||
|   //////////////////// | ||||
|   int hermitian; | ||||
|   GridBase      *       _FineGrid;  | ||||
|   GridCartesian *       _CoarseGrid;  | ||||
|   NonLocalStencilGeometry &geom; | ||||
|   PaddedCell Cell; | ||||
|   GeneralLocalStencil Stencil; | ||||
|    | ||||
|   std::vector<CoarseMatrix> _A; | ||||
|   std::vector<CoarseMatrix> _Adag; | ||||
|  | ||||
|   /////////////////////// | ||||
|   // Interface | ||||
|   /////////////////////// | ||||
|   GridBase      * Grid(void)           { return _FineGrid; };   // this is all the linalg routines need to know | ||||
|   GridBase      * FineGrid(void)       { return _FineGrid; };   // this is all the linalg routines need to know | ||||
|   GridCartesian * CoarseGrid(void)     { return _CoarseGrid; };   // this is all the linalg routines need to know | ||||
|  | ||||
|  | ||||
|   void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe) | ||||
|   { | ||||
|     int nfound=0; | ||||
|     std::cout << " ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<<std::endl; | ||||
|     for(int p=0;p<geom.npoint;p++){ | ||||
|       for(int pp=0;pp<CopyMe.geom.npoint;pp++){ | ||||
|  	// Search for the same relative shift | ||||
| 	// Avoids brutal handling of Grid pointers | ||||
| 	if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) { | ||||
| 	  _A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]); | ||||
| 	  _Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]); | ||||
| 	  nfound++; | ||||
| 	} | ||||
|       } | ||||
|     } | ||||
|     assert(nfound==geom.npoint); | ||||
|     ExchangeCoarseLinks(); | ||||
|   } | ||||
|    | ||||
|   GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid) | ||||
|     : geom(_geom), | ||||
|       _FineGrid(FineGrid), | ||||
|       _CoarseGrid(CoarseGrid), | ||||
|       hermitian(1), | ||||
|       Cell(_geom.Depth(),_CoarseGrid), | ||||
|       Stencil(Cell.grids.back(),geom.shifts) | ||||
|   { | ||||
|     { | ||||
|       int npoint = _geom.npoint; | ||||
|       autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||
|       int osites=Stencil.Grid()->oSites(); | ||||
|       for(int ss=0;ss<osites;ss++){ | ||||
| 	for(int point=0;point<npoint;point++){ | ||||
| 	  auto SE = Stencil_v.GetEntry(point,ss); | ||||
| 	  int o = SE->_offset; | ||||
| 	  assert( o< osites); | ||||
| 	} | ||||
|       }     | ||||
|     } | ||||
|  | ||||
|     _A.resize(geom.npoint,CoarseGrid); | ||||
|     _Adag.resize(geom.npoint,CoarseGrid); | ||||
|   } | ||||
|   void M (const CoarseVector &in, CoarseVector &out) | ||||
|   { | ||||
|     Mult(_A,in,out); | ||||
|   } | ||||
|   void Mdag (const CoarseVector &in, CoarseVector &out) | ||||
|   { | ||||
|     if ( hermitian ) M(in,out); | ||||
|     else Mult(_Adag,in,out); | ||||
|   } | ||||
|   void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out) | ||||
|   { | ||||
|     RealD tviews=0; | ||||
|     RealD ttot=0; | ||||
|     RealD tmult=0; | ||||
|     RealD texch=0; | ||||
|     RealD text=0; | ||||
|     ttot=-usecond(); | ||||
|     conformable(CoarseGrid(),in.Grid()); | ||||
|     conformable(in.Grid(),out.Grid()); | ||||
|     out.Checkerboard() = in.Checkerboard(); | ||||
|     CoarseVector tin=in; | ||||
|  | ||||
|     texch-=usecond(); | ||||
|     CoarseVector pin  = Cell.Exchange(tin); | ||||
|     texch+=usecond(); | ||||
|  | ||||
|     CoarseVector pout(pin.Grid()); pout=Zero(); | ||||
|  | ||||
|     int npoint = geom.npoint; | ||||
|     typedef LatticeView<Cobj> Aview; | ||||
|        | ||||
|     const int Nsimd = CComplex::Nsimd(); | ||||
|      | ||||
|     int osites=pin.Grid()->oSites(); | ||||
|     //    int gsites=pin.Grid()->gSites(); | ||||
|  | ||||
|     RealD flops = 1.0* npoint * nbasis * nbasis * 8 * osites; | ||||
|     RealD bytes = (1.0*osites*sizeof(siteMatrix)*npoint+2.0*osites*sizeof(siteVector))*npoint; | ||||
|        | ||||
|     //    for(int point=0;point<npoint;point++){ | ||||
|     //      conformable(A[point],pin); | ||||
|     //    } | ||||
|  | ||||
|     { | ||||
|       tviews-=usecond(); | ||||
|       autoView( in_v , pin, AcceleratorRead); | ||||
|       autoView( out_v , pout, AcceleratorWrite); | ||||
|       autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||
|       tviews+=usecond(); | ||||
|        | ||||
|       for(int point=0;point<npoint;point++){ | ||||
| 	tviews-=usecond(); | ||||
| 	autoView( A_v, A[point],AcceleratorRead); | ||||
| 	tviews+=usecond(); | ||||
| 	tmult-=usecond(); | ||||
| 	accelerator_for(sss, osites*nbasis, Nsimd, { | ||||
|  | ||||
| 	    typedef decltype(coalescedRead(in_v[0]))    calcVector; | ||||
|  | ||||
| 	    int ss = sss/nbasis; | ||||
| 	    int b  = sss%nbasis; | ||||
|  | ||||
| 	    auto SE  = Stencil_v.GetEntry(point,ss); | ||||
| 	    auto nbr = coalescedReadGeneralPermute(in_v[SE->_offset],SE->_permute,Nd); | ||||
| 	    auto res = out_v(ss)(b); | ||||
| 	    for(int bb=0;bb<nbasis;bb++) { | ||||
| 	      res = res + coalescedRead(A_v[ss](b,bb))*nbr(bb); | ||||
| 	    } | ||||
| 	    coalescedWrite(out_v[ss](b),res); | ||||
| 	}); | ||||
|  | ||||
| 	tmult+=usecond(); | ||||
|       } | ||||
|     } | ||||
|     text-=usecond(); | ||||
|     out = Cell.Extract(pout); | ||||
|     text+=usecond(); | ||||
|     ttot+=usecond(); | ||||
|  | ||||
|     std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse Mult ext  "<<text<<" us"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse Mult tot  "<<ttot<<" us"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse Kernel "<< flops/tmult<<" mflop/s"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse Kernel "<< bytes/tmult<<" MB/s"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse flops/s "<< flops/ttot<<" mflop/s"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse bytes   "<< bytes/1e6<<" MB"<<std::endl; | ||||
|   }; | ||||
|  | ||||
|   void PopulateAdag(void) | ||||
|   { | ||||
|     for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){ | ||||
|       Coordinate bcoor; | ||||
|       CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor); | ||||
|        | ||||
|       for(int p=0;p<geom.npoint;p++){ | ||||
| 	Coordinate scoor = bcoor; | ||||
| 	for(int mu=0;mu<bcoor.size();mu++){ | ||||
| 	  int L = CoarseGrid()->GlobalDimensions()[mu]; | ||||
| 	  scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic | ||||
| 	} | ||||
| 	// Flip to poke/peekLocalSite and not too bad | ||||
| 	auto link = peekSite(_A[p],scoor); | ||||
| 	int pp = geom.Reverse(p); | ||||
| 	pokeSite(adj(link),_Adag[pp],bcoor); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   //  | ||||
|   // A) Only reduced flops option is to use a padded cell of depth 4 | ||||
|   // and apply MpcDagMpc in the padded cell. | ||||
|   // | ||||
|   // Makes for ONE application of MpcDagMpc per vector instead of 30 or 80. | ||||
|   // With the effective cell size around (B+8)^4 perhaps 12^4/4^4 ratio | ||||
|   // Cost is 81x more, same as stencil size. | ||||
|   // | ||||
|   // But: can eliminate comms and do as local dirichlet. | ||||
|   // | ||||
|   // Local exchange gauge field once. | ||||
|   // Apply to all vectors, local only computation. | ||||
|   // Must exchange ghost subcells in reverse process of PaddedCell to take inner products | ||||
|   // | ||||
|   // B) Can reduce cost: pad by 1, apply Deo      (4^4+6^4+8^4+8^4 )/ (4x 4^4) | ||||
|   //                     pad by 2, apply Doe | ||||
|   //                     pad by 3, apply Deo | ||||
|   //                     then break out 8x directions; cost is ~10x MpcDagMpc per vector | ||||
|   // | ||||
|   // => almost factor of 10 in setup cost, excluding data rearrangement | ||||
|   // | ||||
|   // Intermediates -- ignore the corner terms, leave approximate and force Hermitian | ||||
|   // Intermediates -- pad by 2 and apply 1+8+24 = 33 times. | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|  | ||||
|     ////////////////////////////////////////////////////////// | ||||
|     // BFM HDCG style approach: Solve a system of equations to get Aij | ||||
|     ////////////////////////////////////////////////////////// | ||||
|     /* | ||||
|      *     Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM. | ||||
|      * | ||||
|      *     conj(phases[block]) proj[k][ block*Nvec+j ] =  \sum_ball  e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} >  | ||||
|      *                                                 =  \sum_ball e^{iqk.delta} A_ji | ||||
|      * | ||||
|      *     Must invert matrix M_k,l = e^[i q_k . delta_l] | ||||
|      * | ||||
|      *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) | ||||
|      */ | ||||
|   void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop, | ||||
| 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) | ||||
|   { | ||||
|     std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl; | ||||
|     GridBase *grid = FineGrid(); | ||||
|  | ||||
|     RealD tproj=0.0; | ||||
|     RealD teigen=0.0; | ||||
|     RealD tmat=0.0; | ||||
|     RealD tphase=0.0; | ||||
|     RealD tinv=0.0; | ||||
|  | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Orthogonalise the subblocks over the basis | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     CoarseScalar InnerProd(CoarseGrid());  | ||||
|     blockOrthogonalise(InnerProd,Subspace.subspace); | ||||
|  | ||||
|     const int npoint = geom.npoint; | ||||
|        | ||||
|     Coordinate clatt = CoarseGrid()->GlobalDimensions(); | ||||
|     int Nd = CoarseGrid()->Nd(); | ||||
|  | ||||
|       /* | ||||
|        *     Here, k,l index which possible momentum/shift within the N-points connected by MdagM. | ||||
|        *     Matrix index i is mapped to this shift via  | ||||
|        *               geom.shifts[i] | ||||
|        * | ||||
|        *     conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block]  | ||||
|        *       =  \sum_{l in ball}  e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} >  | ||||
|        *       =  \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} | ||||
|        *       = M_{kl} A_ji^{b.b+l} | ||||
|        * | ||||
|        *     Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] | ||||
|        *   | ||||
|        *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) | ||||
|        * | ||||
|        *     Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} | ||||
|        */ | ||||
|     teigen-=usecond(); | ||||
|     Eigen::MatrixXcd Mkl    = Eigen::MatrixXcd::Zero(npoint,npoint); | ||||
|     Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint); | ||||
|     ComplexD ci(0.0,1.0); | ||||
|     for(int k=0;k<npoint;k++){ // Loop over momenta | ||||
|  | ||||
|       for(int l=0;l<npoint;l++){ // Loop over nbr relative | ||||
| 	ComplexD phase(0.0,0.0); | ||||
| 	for(int mu=0;mu<Nd;mu++){ | ||||
| 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; | ||||
| 	  phase=phase+TwoPiL*geom.shifts[k][mu]*geom.shifts[l][mu]; | ||||
| 	} | ||||
| 	phase=exp(phase*ci); | ||||
| 	Mkl(k,l) = phase; | ||||
|       } | ||||
|     } | ||||
|     invMkl = Mkl.inverse(); | ||||
|     teigen+=usecond(); | ||||
|  | ||||
|     /////////////////////////////////////////////////////////////////////// | ||||
|     // Now compute the matrix elements of linop between the orthonormal | ||||
|     // set of vectors. | ||||
|     /////////////////////////////////////////////////////////////////////// | ||||
|     FineField phaV(grid); // Phased block basis vector | ||||
|     FineField MphaV(grid);// Matrix applied | ||||
|     CoarseVector coarseInner(CoarseGrid()); | ||||
|  | ||||
|     std::vector<CoarseVector> ComputeProj(npoint,CoarseGrid()); | ||||
|     std::vector<CoarseVector>          FT(npoint,CoarseGrid()); | ||||
|     for(int i=0;i<nbasis;i++){// Loop over basis vectors | ||||
|       std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl; | ||||
|       for(int p=0;p<npoint;p++){ // Loop over momenta in npoint | ||||
| 	///////////////////////////////////////////////////// | ||||
| 	// Stick a phase on every block | ||||
| 	///////////////////////////////////////////////////// | ||||
| 	tphase-=usecond(); | ||||
| 	CoarseComplexField coor(CoarseGrid()); | ||||
| 	CoarseComplexField pha(CoarseGrid());	pha=Zero(); | ||||
| 	for(int mu=0;mu<Nd;mu++){ | ||||
| 	  LatticeCoordinate(coor,mu); | ||||
| 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; | ||||
| 	  pha = pha + (TwoPiL * geom.shifts[p][mu]) * coor; | ||||
| 	} | ||||
| 	pha  =exp(pha*ci); | ||||
| 	phaV=Zero(); | ||||
| 	blockZAXPY(phaV,pha,Subspace.subspace[i],phaV); | ||||
| 	tphase+=usecond(); | ||||
|  | ||||
| 	///////////////////////////////////////////////////////////////////// | ||||
| 	// Multiple phased subspace vector by matrix and project to subspace | ||||
| 	// Remove local bulk phase to leave relative phases | ||||
| 	///////////////////////////////////////////////////////////////////// | ||||
| 	tmat-=usecond(); | ||||
| 	linop.Op(phaV,MphaV); | ||||
| 	tmat+=usecond(); | ||||
|  | ||||
| 	tproj-=usecond(); | ||||
| 	blockProject(coarseInner,MphaV,Subspace.subspace); | ||||
| 	coarseInner = conjugate(pha) * coarseInner; | ||||
|  | ||||
| 	ComputeProj[p] = coarseInner; | ||||
| 	tproj+=usecond(); | ||||
|  | ||||
|       } | ||||
|  | ||||
|       tinv-=usecond(); | ||||
|       for(int k=0;k<npoint;k++){ | ||||
| 	FT[k] = Zero(); | ||||
| 	for(int l=0;l<npoint;l++){ | ||||
| 	  FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l]; | ||||
| 	} | ||||
|        | ||||
| 	int osites=CoarseGrid()->oSites(); | ||||
| 	autoView( A_v  , _A[k], AcceleratorWrite); | ||||
| 	autoView( FT_v  , FT[k], AcceleratorRead); | ||||
| 	accelerator_for(sss, osites, 1, { | ||||
| 	    for(int j=0;j<nbasis;j++){ | ||||
| 	      A_v[sss](j,i) = FT_v[sss](j); | ||||
| 	    } | ||||
|         }); | ||||
|       } | ||||
|       tinv+=usecond(); | ||||
|     } | ||||
|  | ||||
|     for(int p=0;p<geom.npoint;p++){ | ||||
|       Coordinate coor({0,0,0,0,0}); | ||||
|       auto sval = peekSite(_A[p],coor); | ||||
|     } | ||||
|  | ||||
|     // Only needed if nonhermitian | ||||
|     if ( ! hermitian ) { | ||||
|       std::cout << GridLogMessage<<"PopulateAdag  "<<std::endl; | ||||
|       PopulateAdag(); | ||||
|     } | ||||
|  | ||||
|     // Need to write something to populate Adag from A | ||||
|     std::cout << GridLogMessage<<"ExchangeCoarseLinks  "<<std::endl; | ||||
|     ExchangeCoarseLinks(); | ||||
|     std::cout << GridLogMessage<<"CoarsenOperator eigen  "<<teigen<<" us"<<std::endl; | ||||
|     std::cout << GridLogMessage<<"CoarsenOperator phase  "<<tphase<<" us"<<std::endl; | ||||
|     std::cout << GridLogMessage<<"CoarsenOperator mat    "<<tmat <<" us"<<std::endl; | ||||
|     std::cout << GridLogMessage<<"CoarsenOperator proj   "<<tproj<<" us"<<std::endl; | ||||
|     std::cout << GridLogMessage<<"CoarsenOperator inv    "<<tinv<<" us"<<std::endl; | ||||
|   } | ||||
|   void ExchangeCoarseLinks(void){ | ||||
|     for(int p=0;p<geom.npoint;p++){ | ||||
|       std::cout << "Exchange "<<p<<std::endl; | ||||
|       _A[p] = Cell.Exchange(_A[p]); | ||||
|       _Adag[p]= Cell.Exchange(_Adag[p]); | ||||
|     } | ||||
|   } | ||||
|   virtual  void Mdiag    (const Field &in, Field &out){ assert(0);}; | ||||
|   virtual  void Mdir     (const Field &in, Field &out,int dir, int disp){assert(0);}; | ||||
|   virtual  void MdirAll  (const Field &in, std::vector<Field> &out){assert(0);}; | ||||
| }; | ||||
|  | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
| @@ -90,9 +90,8 @@ public: | ||||
|     order=_order; | ||||
|        | ||||
|     if(order < 2) exit(-1); | ||||
|     Coeffs.resize(order); | ||||
|     Coeffs.assign(0.,order); | ||||
|     Coeffs[order-1] = 1.; | ||||
|     Coeffs.resize(order,0.0); | ||||
|     Coeffs[order-1] = 1.0; | ||||
|   }; | ||||
|    | ||||
|   // PB - more efficient low pass drops high modes above the low as 1/x uses all Chebyshev's. | ||||
|   | ||||
| @@ -33,15 +33,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|    * Script A = SolverMatrix  | ||||
|    * Script P = Preconditioner | ||||
|    * | ||||
|    * Deflation methods considered | ||||
|    *      -- Solve P A x = P b        [ like Luscher ] | ||||
|    * DEF-1        M P A x = M P b     [i.e. left precon] | ||||
|    * DEF-2        P^T M A x = P^T M b | ||||
|    * ADEF-1       Preconditioner = M P + Q      [ Q + M + M A Q] | ||||
|    * ADEF-2       Preconditioner = P^T M + Q | ||||
|    * BNN          Preconditioner = P^T M P + Q | ||||
|    * BNN2         Preconditioner = M P + P^TM +Q - M P A M  | ||||
|    *  | ||||
|    * Implement ADEF-2 | ||||
|    * | ||||
|    * Vstart = P^Tx + Qb | ||||
| @@ -49,202 +40,245 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|    * M2=M3=1 | ||||
|    * Vout = x | ||||
|    */ | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| // abstract base | ||||
| template<class Field, class CoarseField> | ||||
| class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||
| template<class Field> | ||||
| class TwoLevelCG : public LinearFunction<Field> | ||||
| { | ||||
|  public: | ||||
|   int verbose; | ||||
|   RealD   Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   const int mmax = 5; | ||||
|   GridBase *grid; | ||||
|   GridBase *coarsegrid; | ||||
|  | ||||
|   LinearOperatorBase<Field>   *_Linop | ||||
|   OperatorFunction<Field>     *_Smoother, | ||||
|   LinearFunction<CoarseField> *_CoarseSolver; | ||||
|  | ||||
|   // Need somthing that knows how to get from Coarse to fine and back again | ||||
|   // Fine operator, Smoother, CoarseSolver | ||||
|   LinearOperatorBase<Field>   &_FineLinop; | ||||
|   LinearFunction<Field>   &_Smoother; | ||||
|    | ||||
|   // more most opertor functions | ||||
|   TwoLevelFlexiblePcg(RealD tol, | ||||
| 		     Integer maxit, | ||||
| 		     LinearOperatorBase<Field> *Linop, | ||||
| 		     LinearOperatorBase<Field> *SmootherLinop, | ||||
| 		     OperatorFunction<Field>   *Smoother, | ||||
| 		     OperatorFunction<CoarseField>  CoarseLinop | ||||
| 		     ) :  | ||||
|   TwoLevelCG(RealD tol, | ||||
| 	     Integer maxit, | ||||
| 	     LinearOperatorBase<Field>   &FineLinop, | ||||
| 	     LinearFunction<Field>       &Smoother, | ||||
| 	     GridBase *fine) :  | ||||
|       Tolerance(tol),  | ||||
|       MaxIterations(maxit), | ||||
|       _Linop(Linop), | ||||
|       _PreconditionerLinop(PrecLinop), | ||||
|       _Preconditioner(Preconditioner) | ||||
|   {  | ||||
|     verbose=0; | ||||
|       _FineLinop(FineLinop), | ||||
|       _Smoother(Smoother) | ||||
|   { | ||||
|     grid       = fine; | ||||
|   }; | ||||
|  | ||||
|   // The Pcg routine is common to all, but the various matrices differ from derived  | ||||
|   // implementation to derived implmentation | ||||
|   void operator() (const Field &src, Field &psi){ | ||||
|   void operator() (const Field &src, Field &psi){ | ||||
|  | ||||
|     psi.Checkerboard() = src.Checkerboard(); | ||||
|     grid             = src.Grid(); | ||||
|  | ||||
|    | ||||
|   virtual void operator() (const Field &src, Field &psi) | ||||
|   { | ||||
|     Field resid(grid); | ||||
|     RealD f; | ||||
|     RealD rtzp,rtz,a,d,b; | ||||
|     RealD rptzp; | ||||
|     RealD tn; | ||||
|     RealD guess = norm2(psi); | ||||
|     RealD ssq   = norm2(src); | ||||
|     RealD rsq   = ssq*Tolerance*Tolerance; | ||||
|      | ||||
|     ///////////////////////////// | ||||
|     // Set up history vectors | ||||
|     ///////////////////////////// | ||||
|     std::vector<Field> p  (mmax,grid); | ||||
|     std::vector<Field> mmp(mmax,grid); | ||||
|     std::vector<RealD> pAp(mmax); | ||||
|  | ||||
|     Field x  (grid); x = psi; | ||||
|     Field z  (grid); | ||||
|     Field x(grid);  | ||||
|     Field p(grid); | ||||
|     Field z(grid); | ||||
|     Field tmp(grid); | ||||
|     Field mmp(grid); | ||||
|     Field r  (grid); | ||||
|     Field mu (grid); | ||||
|    | ||||
|     Field rp (grid); | ||||
|      | ||||
|     //Initial residual computation & set up | ||||
|     RealD guess = norm2(psi); | ||||
|     double tn; | ||||
|  | ||||
|     GridStopWatch HDCGTimer; | ||||
|     HDCGTimer.Start(); | ||||
|     ////////////////////////// | ||||
|     // x0 = Vstart -- possibly modify guess | ||||
|     ////////////////////////// | ||||
|     x=src; | ||||
|     x=Zero(); | ||||
|     Vstart(x,src); | ||||
|  | ||||
|     // r0 = b -A x0 | ||||
|     HermOp(x,mmp); // Shouldn't this be something else? | ||||
|     axpy (r, -1.0,mmp[0], src);    // Recomputes r=src-Ax0 | ||||
|     _FineLinop.HermOp(x,mmp); | ||||
|  | ||||
|     axpy(r, -1.0, mmp, src);    // Recomputes r=src-x0 | ||||
|     rp=r; | ||||
|  | ||||
|     ////////////////////////////////// | ||||
|     // Compute z = M1 x | ||||
|     ////////////////////////////////// | ||||
|     M1(r,z,tmp,mp,SmootherMirs); | ||||
|     PcgM1(r,z); | ||||
|     rtzp =real(innerProduct(r,z)); | ||||
|  | ||||
|     /////////////////////////////////////// | ||||
|     // Solve for Mss mu = P A z and set p = z-mu | ||||
|     // Def2: p = 1 - Q Az = Pright z  | ||||
|     // Other algos M2 is trivial | ||||
|     // Except Def2, M2 is trivial | ||||
|     /////////////////////////////////////// | ||||
|     M2(z,p[0]); | ||||
|     p=z; | ||||
|  | ||||
|     for (int k=0;k<=MaxIterations;k++){ | ||||
|     RealD ssq =  norm2(src); | ||||
|     RealD rsq =  ssq*Tolerance*Tolerance; | ||||
|  | ||||
|     std::cout<<GridLogMessage<<"HDCG: k=0 residual "<<rtzp<<" target rsq "<<rsq<<" ssq "<<ssq<<std::endl; | ||||
|      | ||||
|       int peri_k  = k % mmax; | ||||
|       int peri_kp = (k+1) % mmax; | ||||
|     for (int k=1;k<=MaxIterations;k++){ | ||||
|  | ||||
|       rtz=rtzp; | ||||
|       d= M3(p[peri_k],mp,mmp[peri_k],tmp); | ||||
|       d= PcgM3(p,mmp); | ||||
|       a = rtz/d; | ||||
|      | ||||
|       // Memorise this | ||||
|       pAp[peri_k] = d; | ||||
|  | ||||
|       axpy(x,a,p[peri_k],x); | ||||
|       RealD rn = axpy_norm(r,-a,mmp[peri_k],r); | ||||
|       axpy(x,a,p,x); | ||||
|       RealD rn = axpy_norm(r,-a,mmp,r); | ||||
|  | ||||
|       // Compute z = M x | ||||
|       M1(r,z,tmp,mp); | ||||
|       PcgM1(r,z); | ||||
|  | ||||
|       rtzp =real(innerProduct(r,z)); | ||||
|  | ||||
|       M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate | ||||
|  | ||||
|       p[peri_kp]=p[peri_k]; | ||||
|  | ||||
|       // Standard search direction  p -> z + b p    ; b =  | ||||
|       b = (rtzp)/rtz; | ||||
|  | ||||
|       int northog; | ||||
|       //    northog     = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm | ||||
|       northog     = (k>mmax-1)?(mmax-1):k;        // This is the fCG-Tr(mmax-1) algorithm | ||||
|      | ||||
|       for(int back=0; back < northog; back++){ | ||||
| 	int peri_back = (k-back)%mmax; | ||||
| 	RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp])); | ||||
| 	RealD beta = -pbApk/pAp[peri_back]; | ||||
| 	axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]); | ||||
|       int ipcg=1; // almost free inexact preconditioned CG | ||||
|       if (ipcg) { | ||||
| 	rptzp =real(innerProduct(rp,z)); | ||||
|       } else { | ||||
| 	rptzp =0; | ||||
|       } | ||||
|       b = (rtzp-rptzp)/rtz; | ||||
|  | ||||
|       PcgM2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate | ||||
|  | ||||
|       axpy(p,b,p,mu);  // mu = A r | ||||
|  | ||||
|       RealD rrn=sqrt(rn/ssq); | ||||
|       std::cout<<GridLogMessage<<"TwoLevelfPcg: k= "<<k<<" residual = "<<rrn<<std::endl; | ||||
|       RealD rtn=sqrt(rtz/ssq); | ||||
|       std::cout<<GridLogMessage<<"HDCG: Pcg k= "<<k<<" residual = "<<rrn<<std::endl; | ||||
|  | ||||
|       if ( ipcg ) { | ||||
| 	axpy(rp,0.0,r,r); | ||||
|       } | ||||
|  | ||||
|       // Stopping condition | ||||
|       if ( rn <= rsq ) {  | ||||
|  | ||||
| 	HermOp(x,mmp); // Shouldn't this be something else? | ||||
| 	axpy(tmp,-1.0,src,mmp[0]); | ||||
| 	 | ||||
| 	RealD psinorm = sqrt(norm2(x)); | ||||
| 	RealD srcnorm = sqrt(norm2(src)); | ||||
| 	RealD tmpnorm = sqrt(norm2(tmp)); | ||||
| 	RealD true_residual = tmpnorm/srcnorm; | ||||
| 	std::cout<<GridLogMessage<<"TwoLevelfPcg:   true residual is "<<true_residual<<std::endl; | ||||
| 	std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl; | ||||
| 	return k; | ||||
| 	HDCGTimer.Stop(); | ||||
| 	std::cout<<GridLogMessage<<"HDCG: Pcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;; | ||||
|  | ||||
| 	_FineLinop.HermOp(x,mmp);			   | ||||
| 	axpy(tmp,-1.0,src,mmp); | ||||
|  | ||||
| 	RealD  mmpnorm = sqrt(norm2(mmp)); | ||||
| 	RealD  xnorm   = sqrt(norm2(x)); | ||||
| 	RealD  srcnorm = sqrt(norm2(src)); | ||||
| 	RealD  tmpnorm = sqrt(norm2(tmp)); | ||||
| 	RealD  true_residual = tmpnorm/srcnorm; | ||||
| 	std::cout<<GridLogMessage<<"HDCG: true residual is "<<true_residual | ||||
| 		 <<" solution "<<xnorm<<" source "<<srcnorm<<std::endl; | ||||
|  | ||||
| 	return; | ||||
|       } | ||||
|  | ||||
|     } | ||||
|     // Non-convergence | ||||
|     assert(0); | ||||
|     std::cout << "HDCG: Pcg not converged"<<std::endl; | ||||
|     return ; | ||||
|   } | ||||
|    | ||||
|  | ||||
|  public: | ||||
|  | ||||
|   virtual void M(Field & in,Field & out,Field & tmp) { | ||||
|   virtual void PcgM1(Field & in, Field & out)     =0; | ||||
|   virtual void Vstart(Field & x,const Field & src)=0; | ||||
|  | ||||
|   virtual void PcgM2(const Field & in, Field & out) { | ||||
|     out=in; | ||||
|   } | ||||
|  | ||||
|   virtual void M1(Field & in, Field & out) {// the smoother | ||||
|   virtual RealD PcgM3(const Field & p, Field & mmp){ | ||||
|     RealD dd; | ||||
|     _FineLinop.HermOp(p,mmp); | ||||
|     ComplexD dot = innerProduct(p,mmp); | ||||
|     dd=real(dot); | ||||
|     return dd; | ||||
|   } | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////////////// | ||||
|   // Only Def1 has non-trivial Vout. | ||||
|   ///////////////////////////////////////////////////////////////////// | ||||
|   virtual void   Vout  (Field & in, Field & out,Field & src){ | ||||
|     out = in; | ||||
|   } | ||||
|  | ||||
| }; | ||||
|    | ||||
| template<class Field, class CoarseField, class Aggregation> | ||||
| class TwoLevelADEF2 : public TwoLevelCG<Field> | ||||
| { | ||||
|  public: | ||||
|   /////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Need something that knows how to get from Coarse to fine and back again | ||||
|   //  void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){ | ||||
|   //  void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){ | ||||
|   /////////////////////////////////////////////////////////////////////////////////// | ||||
|   GridBase *coarsegrid; | ||||
|   Aggregation &_Aggregates;                     | ||||
|   LinearFunction<CoarseField> &_CoarseSolver; | ||||
|   LinearFunction<CoarseField> &_CoarseSolverPrecise; | ||||
|   /////////////////////////////////////////////////////////////////////////////////// | ||||
|    | ||||
|   // more most opertor functions | ||||
|   TwoLevelADEF2(RealD tol, | ||||
| 		Integer maxit, | ||||
| 		LinearOperatorBase<Field>   &FineLinop, | ||||
| 		LinearFunction<Field>   &Smoother, | ||||
| 		LinearFunction<CoarseField>  &CoarseSolver, | ||||
| 		LinearFunction<CoarseField>  &CoarseSolverPrecise, | ||||
| 		Aggregation &Aggregates | ||||
| 		) : | ||||
|     TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,Aggregates.FineGrid), | ||||
|       _CoarseSolver(CoarseSolver), | ||||
|       _CoarseSolverPrecise(CoarseSolverPrecise), | ||||
|       _Aggregates(Aggregates) | ||||
|   { | ||||
|     coarsegrid = Aggregates.CoarseGrid; | ||||
|   }; | ||||
|  | ||||
|   virtual void PcgM1(Field & in, Field & out) | ||||
|   { | ||||
|     // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] | ||||
|     Field tmp(grid); | ||||
|     Field Min(grid); | ||||
|  | ||||
|     PcgM(in,Min); // Smoother call | ||||
|     Field tmp(this->grid); | ||||
|     Field Min(this->grid); | ||||
|     CoarseField PleftProj(this->coarsegrid); | ||||
|     CoarseField PleftMss_proj(this->coarsegrid); | ||||
|  | ||||
|     HermOp(Min,out); | ||||
|     GridStopWatch SmootherTimer; | ||||
|     GridStopWatch MatrixTimer; | ||||
|     SmootherTimer.Start(); | ||||
|     this->_Smoother(in,Min); | ||||
|     SmootherTimer.Stop(); | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     this->_FineLinop.HermOp(Min,out); | ||||
|     MatrixTimer.Stop(); | ||||
|     axpy(tmp,-1.0,out,in);          // tmp  = in - A Min | ||||
|  | ||||
|     ProjectToSubspace(tmp,PleftProj);      | ||||
|     ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s | ||||
|     PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]   | ||||
|     GridStopWatch ProjTimer; | ||||
|     GridStopWatch CoarseTimer; | ||||
|     GridStopWatch PromTimer; | ||||
|     ProjTimer.Start(); | ||||
|     this->_Aggregates.ProjectToSubspace(PleftProj,tmp);      | ||||
|     ProjTimer.Stop(); | ||||
|     CoarseTimer.Start(); | ||||
|     this->_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s | ||||
|     CoarseTimer.Stop(); | ||||
|     PromTimer.Start(); | ||||
|     this->_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]   | ||||
|     PromTimer.Stop(); | ||||
|     std::cout << GridLogPerformance << "PcgM1 breakdown "<<std::endl; | ||||
|     std::cout << GridLogPerformance << "\tSmoother   " << SmootherTimer.Elapsed() <<std::endl; | ||||
|     std::cout << GridLogPerformance << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | ||||
|     std::cout << GridLogPerformance << "\tProj       " << ProjTimer.Elapsed() <<std::endl; | ||||
|     std::cout << GridLogPerformance << "\tCoarse     " << CoarseTimer.Elapsed() <<std::endl; | ||||
|     std::cout << GridLogPerformance << "\tProm       " << PromTimer.Elapsed() <<std::endl; | ||||
|  | ||||
|     axpy(out,1.0,Min,tmp); // Min+tmp | ||||
|   } | ||||
|  | ||||
|   virtual void M2(const Field & in, Field & out) { | ||||
|     out=in; | ||||
|     // Must override for Def2 only | ||||
|     //  case PcgDef2: | ||||
|     //    Pright(in,out); | ||||
|     //    break; | ||||
|   } | ||||
|  | ||||
|   virtual RealD M3(const Field & p, Field & mmp){ | ||||
|     double d,dd; | ||||
|     HermOpAndNorm(p,mmp,d,dd); | ||||
|     return dd; | ||||
|     // Must override for Def1 only | ||||
|     //  case PcgDef1: | ||||
|     //    d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no | ||||
|     //      linop_d->Mprec(mmp,mp,tmp,1);// Dag yes | ||||
|     //    Pleft(mp,mmp); | ||||
|     //    d=real(linop_d->inner(p,mmp)); | ||||
|   } | ||||
|  | ||||
|   virtual void VstartDef2(Field & xconst Field & src){ | ||||
|     //case PcgDef2: | ||||
|     //case PcgAdef2:  | ||||
|     //case PcgAdef2f: | ||||
|     //case PcgV11f: | ||||
|   virtual void Vstart(Field & x,const Field & src) | ||||
|   { | ||||
|     /////////////////////////////////// | ||||
|     // Choose x_0 such that  | ||||
|     // x_0 = guess +  (A_ss^inv) r_s = guess + Ass_inv [src -Aguess] | ||||
| @@ -256,142 +290,72 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||
|     //                   = src_s - (A guess)_s - src_s  + (A guess)_s  | ||||
|     //                   = 0  | ||||
|     /////////////////////////////////// | ||||
|     Field r(grid); | ||||
|     Field mmp(grid); | ||||
|      | ||||
|     HermOp(x,mmp); | ||||
|     axpy (r, -1.0, mmp, src);        // r_{-1} = src - A x | ||||
|     ProjectToSubspace(r,PleftProj);      | ||||
|     ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s | ||||
|     PromoteFromSubspace(PleftMss_proj,mmp);   | ||||
|     x=x+mmp; | ||||
|     Field r(this->grid); | ||||
|     Field mmp(this->grid); | ||||
|     CoarseField PleftProj(this->coarsegrid); | ||||
|     CoarseField PleftMss_proj(this->coarsegrid); | ||||
|  | ||||
|     this->_Aggregates.ProjectToSubspace(PleftProj,src);      | ||||
|     this->_CoarseSolverPrecise(PleftProj,PleftMss_proj); // Ass^{-1} r_s | ||||
|     this->_Aggregates.PromoteFromSubspace(PleftMss_proj,x);   | ||||
|  | ||||
|   } | ||||
|  | ||||
|   virtual void Vstart(Field & x,const Field & src){ | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////////////// | ||||
|   // Only Def1 has non-trivial Vout. Override in Def1 | ||||
|   ///////////////////////////////////////////////////////////////////// | ||||
|   virtual void   Vout  (Field & in, Field & out,Field & src){ | ||||
|     out = in; | ||||
|     //case PcgDef1: | ||||
|     //    //Qb + PT x | ||||
|     //    ProjectToSubspace(src,PleftProj);      | ||||
|     //    ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s | ||||
|     //    PromoteFromSubspace(PleftMss_proj,tmp);   | ||||
|     //     | ||||
|     //    Pright(in,out); | ||||
|     //     | ||||
|     //    linop_d->axpy(out,tmp,out,1.0); | ||||
|     //    break; | ||||
|   } | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Pright and Pleft are common to all implementations | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   virtual void Pright(Field & in,Field & out){ | ||||
|     // P_R  = [ 1              0 ]  | ||||
|     //        [ -Mss^-1 Msb    0 ]  | ||||
|     Field in_sbar(grid); | ||||
|  | ||||
|     ProjectToSubspace(in,PleftProj);      | ||||
|     PromoteFromSubspace(PleftProj,out);   | ||||
|     axpy(in_sbar,-1.0,out,in);       // in_sbar = in - in_s  | ||||
|  | ||||
|     HermOp(in_sbar,out); | ||||
|     ProjectToSubspace(out,PleftProj);           // Mssbar in_sbar  (project) | ||||
|  | ||||
|     ApplyInverse     (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar  | ||||
|     PromoteFromSubspace(PleftMss_proj,out);     //  | ||||
|  | ||||
|     axpy(out,-1.0,out,in_sbar);     // in_sbar - Mss^{-1} Mssbar in_sbar | ||||
|   } | ||||
|   virtual void Pleft (Field & in,Field & out){ | ||||
|     // P_L  = [ 1  -Mbs Mss^-1]  | ||||
|     //        [ 0   0         ]  | ||||
|     Field in_sbar(grid); | ||||
|     Field    tmp2(grid); | ||||
|     Field    Mtmp(grid); | ||||
|  | ||||
|     ProjectToSubspace(in,PleftProj);      | ||||
|     PromoteFromSubspace(PleftProj,out);   | ||||
|     axpy(in_sbar,-1.0,out,in);      // in_sbar = in - in_s | ||||
|  | ||||
|     ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s | ||||
|     PromoteFromSubspace(PleftMss_proj,out); | ||||
|  | ||||
|     HermOp(out,Mtmp); | ||||
|  | ||||
|     ProjectToSubspace(Mtmp,PleftProj);      // Msbar s Mss^{-1} | ||||
|     PromoteFromSubspace(PleftProj,tmp2); | ||||
|  | ||||
|     axpy(out,-1.0,tmp2,Mtmp); | ||||
|     axpy(out,-1.0,out,in_sbar);     // in_sbar - Msbars Mss^{-1} in_s | ||||
|   } | ||||
| } | ||||
| }; | ||||
|  | ||||
| template<class Field> | ||||
| class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> { | ||||
|  public: | ||||
|   virtual void M(Field & in,Field & out,Field & tmp){ | ||||
| class TwoLevelADEF1defl : public TwoLevelCG<Field> | ||||
| { | ||||
| public: | ||||
|   const std::vector<Field> &evec; | ||||
|   const std::vector<RealD> &eval; | ||||
|    | ||||
|   TwoLevelADEF1defl(RealD tol, | ||||
| 		   Integer maxit, | ||||
| 		   LinearOperatorBase<Field>   &FineLinop, | ||||
| 		   LinearFunction<Field>   &Smoother, | ||||
| 		   std::vector<Field> &_evec, | ||||
| 		   std::vector<RealD> &_eval) :  | ||||
|     TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,_evec[0].Grid()), | ||||
|     evec(_evec), | ||||
|     eval(_eval) | ||||
|   {}; | ||||
|  | ||||
|   }  | ||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){ | ||||
|   // Can just inherit existing Vout | ||||
|   // Can just inherit existing M2 | ||||
|   // Can just inherit existing M3 | ||||
|  | ||||
|   // Simple vstart - do nothing | ||||
|   virtual void Vstart(Field & x,const Field & src){ x=src; }; | ||||
|  | ||||
|   // Override PcgM1 | ||||
|   virtual void PcgM1(Field & in, Field & out) | ||||
|   { | ||||
|     int N=evec.size(); | ||||
|     Field Pin(this->grid); | ||||
|     Field Qin(this->grid); | ||||
|  | ||||
|     //MP  + Q = M(1-AQ) + Q = M | ||||
|     // // If we are eigenvector deflating in coarse space | ||||
|     // // Q   = Sum_i |phi_i> 1/lambda_i <phi_i| | ||||
|     // // A Q = Sum_i |phi_i> <phi_i| | ||||
|     // // M(1-AQ) = M(1-proj) + Q | ||||
|     Qin.Checkerboard()=in.Checkerboard(); | ||||
|     Qin = Zero(); | ||||
|     Pin = in; | ||||
|     for (int i=0;i<N;i++) { | ||||
|       const Field& tmp = evec[i]; | ||||
|       auto ip = TensorRemove(innerProduct(tmp,in)); | ||||
|       axpy(Qin, ip / eval[i],tmp,Qin); | ||||
|       axpy(Pin, -ip ,tmp,Pin); | ||||
|     } | ||||
|  | ||||
|     this->_Smoother(Pin,out); | ||||
|  | ||||
|     out = out + Qin; | ||||
|   } | ||||
|   virtual void M2(Field & in, Field & out){ | ||||
| }; | ||||
|  | ||||
|   } | ||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){ | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
|   } | ||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){ | ||||
|  | ||||
|   } | ||||
| } | ||||
| /* | ||||
| template<class Field> | ||||
| class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> { | ||||
|  public: | ||||
|   virtual void M(Field & in,Field & out,Field & tmp);  | ||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); | ||||
|   virtual void M2(Field & in, Field & out); | ||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); | ||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); | ||||
| } | ||||
|  | ||||
| template<class Field> | ||||
| class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> { | ||||
|  public: | ||||
|   virtual void M(Field & in,Field & out,Field & tmp);  | ||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); | ||||
|   virtual void M2(Field & in, Field & out); | ||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); | ||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); | ||||
|   virtual void   Vout  (Field & in, Field & out,Field & src,Field & tmp); | ||||
| } | ||||
|  | ||||
| template<class Field> | ||||
| class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> { | ||||
|  public: | ||||
|   virtual void M(Field & in,Field & out,Field & tmp);  | ||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); | ||||
|   virtual void M2(Field & in, Field & out); | ||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); | ||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); | ||||
| } | ||||
|  | ||||
| template<class Field> | ||||
| class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> { | ||||
|  public: | ||||
|   virtual void M(Field & in,Field & out,Field & tmp);  | ||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); | ||||
|   virtual void M2(Field & in, Field & out); | ||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); | ||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); | ||||
| } | ||||
| */ | ||||
| #endif | ||||
|   | ||||
| @@ -183,13 +183,13 @@ public: | ||||
| 		  << "\tTrue residual " << true_residual | ||||
| 		  << "\tTarget " << Tolerance << std::endl; | ||||
|  | ||||
|         std::cout << GridLogMessage << "Time breakdown "<<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tInner      " << InnerTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl; | ||||
|         std::cout << GridLogPerformance << "Time breakdown "<<std::endl; | ||||
| 	std::cout << GridLogPerformance << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogPerformance << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogPerformance << "\tInner      " << InnerTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogPerformance << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogPerformance << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl; | ||||
|  | ||||
| 	std::cout << GridLogDebug << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl; | ||||
|  | ||||
|   | ||||
| @@ -465,7 +465,7 @@ until convergence | ||||
|  | ||||
|     Field& evec_k = evec[k]; | ||||
|  | ||||
|     _PolyOp(evec_k,w);    std::cout<<GridLogIRL << "PolyOp" <<std::endl; | ||||
|     _PolyOp(evec_k,w);    std::cout<<GridLogDebug << "PolyOp" <<std::endl; | ||||
|  | ||||
|     if(k>0) w -= lme[k-1] * evec[k-1]; | ||||
|  | ||||
| @@ -480,9 +480,9 @@ until convergence | ||||
|     lme[k] = beta; | ||||
|  | ||||
|     if ( (k>0) && ( (k % orth_period) == 0 )) { | ||||
|       std::cout<<GridLogIRL << "Orthogonalising " <<k<<std::endl; | ||||
|       std::cout<<GridLogDebug << "Orthogonalising " <<k<<std::endl; | ||||
|       orthogonalize(w,evec,k); // orthonormalise | ||||
|       std::cout<<GridLogIRL << "Orthogonalised " <<k<<std::endl; | ||||
|       std::cout<<GridLogDebug << "Orthogonalised " <<k<<std::endl; | ||||
|     } | ||||
|  | ||||
|     if(k < Nm-1) evec[k+1] = w; | ||||
| @@ -491,7 +491,7 @@ until convergence | ||||
|     if ( beta < tiny )  | ||||
|       std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl; | ||||
|  | ||||
|     std::cout<<GridLogIRL << "Lanczos step complete " <<k<<std::endl; | ||||
|     std::cout<<GridLogDebug << "Lanczos step complete " <<k<<std::endl; | ||||
|   } | ||||
|  | ||||
|   void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,  | ||||
|   | ||||
| @@ -33,7 +33,7 @@ NAMESPACE_BEGIN(Grid); | ||||
| /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Take a matrix and form an NE solver calling a Herm solver | ||||
| /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| template<class Field> class NormalEquations { | ||||
| template<class Field> class NormalEquations : public LinearFunction<Field>{ | ||||
| private: | ||||
|   SparseMatrixBase<Field> & _Matrix; | ||||
|   OperatorFunction<Field> & _HermitianSolver; | ||||
| @@ -60,7 +60,7 @@ public: | ||||
|   }      | ||||
| }; | ||||
|  | ||||
| template<class Field> class HPDSolver { | ||||
| template<class Field> class HPDSolver : public LinearFunction<Field> { | ||||
| private: | ||||
|   LinearOperatorBase<Field> & _Matrix; | ||||
|   OperatorFunction<Field> & _HermitianSolver; | ||||
| @@ -84,7 +84,7 @@ public: | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<class Field> class MdagMSolver { | ||||
| template<class Field> class MdagMSolver : public LinearFunction<Field> { | ||||
| private: | ||||
|   SparseMatrixBase<Field> & _Matrix; | ||||
|   OperatorFunction<Field> & _HermitianSolver; | ||||
|   | ||||
| @@ -20,7 +20,7 @@ template<class Field> class PowerMethod | ||||
|     RealD evalMaxApprox = 0.0;  | ||||
|     auto src_n = src;  | ||||
|     auto tmp = src;  | ||||
|     const int _MAX_ITER_EST_ = 50;  | ||||
|     const int _MAX_ITER_EST_ = 100;  | ||||
|  | ||||
|     for (int i=0;i<_MAX_ITER_EST_;i++) {  | ||||
|        | ||||
|   | ||||
							
								
								
									
										262
									
								
								Grid/algorithms/multigrid/Aggregates.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										262
									
								
								Grid/algorithms/multigrid/Aggregates.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,262 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/Aggregates.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #pragma once | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class Aggregation { | ||||
| public: | ||||
|   typedef iVector<CComplex,nbasis >             siteVector; | ||||
|   typedef Lattice<siteVector>                 CoarseVector; | ||||
|   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; | ||||
|  | ||||
|   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<Fobj >        FineField; | ||||
|  | ||||
|   GridBase *CoarseGrid; | ||||
|   GridBase *FineGrid; | ||||
|   std::vector<Lattice<Fobj> > subspace; | ||||
|   int checkerboard; | ||||
|   int Checkerboard(void){return checkerboard;} | ||||
|   Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :  | ||||
|     CoarseGrid(_CoarseGrid), | ||||
|     FineGrid(_FineGrid), | ||||
|     subspace(nbasis,_FineGrid), | ||||
|     checkerboard(_checkerboard) | ||||
|   { | ||||
|   }; | ||||
|    | ||||
|    | ||||
|   void Orthogonalise(void){ | ||||
|     CoarseScalar InnerProd(CoarseGrid);  | ||||
|     //    std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"<<std::endl; | ||||
|     blockOrthogonalise(InnerProd,subspace); | ||||
|   }  | ||||
|   void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){ | ||||
|     blockProject(CoarseVec,FineVec,subspace); | ||||
|   } | ||||
|   void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){ | ||||
|     FineVec.Checkerboard() = subspace[0].Checkerboard(); | ||||
|     blockPromote(CoarseVec,FineVec,subspace); | ||||
|   } | ||||
|  | ||||
|   virtual void CreateSubspaceRandom(GridParallelRNG  &RNG) { | ||||
|     int nn=nbasis; | ||||
|     RealD scale; | ||||
|     FineField noise(FineGrid); | ||||
|     for(int b=0;b<nn;b++){ | ||||
|       subspace[b] = Zero(); | ||||
|       gaussian(RNG,noise); | ||||
|       scale = std::pow(norm2(noise),-0.5);  | ||||
|       noise=noise*scale; | ||||
|       subspace[b] = noise; | ||||
|     } | ||||
|   } | ||||
|   virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) | ||||
|   { | ||||
|  | ||||
|     RealD scale; | ||||
|  | ||||
|     ConjugateGradient<FineField> CG(1.0e-2,100,false); | ||||
|     FineField noise(FineGrid); | ||||
|     FineField Mn(FineGrid); | ||||
|  | ||||
|     for(int b=0;b<nn;b++){ | ||||
|        | ||||
|       subspace[b] = Zero(); | ||||
|       gaussian(RNG,noise); | ||||
|       scale = std::pow(norm2(noise),-0.5);  | ||||
|       noise=noise*scale; | ||||
|        | ||||
|       hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise   ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl; | ||||
|  | ||||
|       for(int i=0;i<1;i++){ | ||||
|  | ||||
| 	CG(hermop,noise,subspace[b]); | ||||
|  | ||||
| 	noise = subspace[b]; | ||||
| 	scale = std::pow(norm2(noise),-0.5);  | ||||
| 	noise=noise*scale; | ||||
|  | ||||
|       } | ||||
|  | ||||
|       hermop.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(Mn)<<std::endl; | ||||
|       subspace[b]   = noise; | ||||
|  | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit) | ||||
|   // and this is the best I found | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|   virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop, | ||||
| 				       int nn, | ||||
| 				       double hi, | ||||
| 				       double lo, | ||||
| 				       int orderfilter, | ||||
| 				       int ordermin, | ||||
| 				       int orderstep, | ||||
| 				       double filterlo | ||||
| 				       ) { | ||||
|  | ||||
|     RealD scale; | ||||
|  | ||||
|     FineField noise(FineGrid); | ||||
|     FineField Mn(FineGrid); | ||||
|     FineField tmp(FineGrid); | ||||
|  | ||||
|     // New normalised noise | ||||
|     gaussian(RNG,noise); | ||||
|     scale = std::pow(norm2(noise),-0.5);  | ||||
|     noise=noise*scale; | ||||
|  | ||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pass-1 : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl; | ||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pass-2 : nbasis"<<nn<<" min " | ||||
| 	      <<ordermin<<" step "<<orderstep | ||||
| 	      <<" lo"<<filterlo<<std::endl; | ||||
|  | ||||
|     // Initial matrix element | ||||
|     hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; | ||||
|  | ||||
|     int b =0; | ||||
|     { | ||||
|       // Filter | ||||
|       Chebyshev<FineField> Cheb(lo,hi,orderfilter); | ||||
|       Cheb(hermop,noise,Mn); | ||||
|       // normalise | ||||
|       scale = std::pow(norm2(Mn),-0.5); 	Mn=Mn*scale; | ||||
|       subspace[b]   = Mn; | ||||
|       hermop.Op(Mn,tmp);  | ||||
|       std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; | ||||
|       b++; | ||||
|     } | ||||
|  | ||||
|     // Generate a full sequence of Chebyshevs | ||||
|     { | ||||
|       lo=filterlo; | ||||
|       noise=Mn; | ||||
|  | ||||
|       FineField T0(FineGrid); T0 = noise;   | ||||
|       FineField T1(FineGrid);  | ||||
|       FineField T2(FineGrid); | ||||
|       FineField y(FineGrid); | ||||
|        | ||||
|       FineField *Tnm = &T0; | ||||
|       FineField *Tn  = &T1; | ||||
|       FineField *Tnp = &T2; | ||||
|  | ||||
|       // Tn=T1 = (xscale M + mscale)in | ||||
|       RealD xscale = 2.0/(hi-lo); | ||||
|       RealD mscale = -(hi+lo)/(hi-lo); | ||||
|       hermop.HermOp(T0,y); | ||||
|       T1=y*xscale+noise*mscale; | ||||
|  | ||||
|       for(int n=2;n<=ordermin+orderstep*(nn-2);n++){ | ||||
| 	 | ||||
| 	hermop.HermOp(*Tn,y); | ||||
|  | ||||
| 	autoView( y_v , y, AcceleratorWrite); | ||||
| 	autoView( Tn_v , (*Tn), AcceleratorWrite); | ||||
| 	autoView( Tnp_v , (*Tnp), AcceleratorWrite); | ||||
| 	autoView( Tnm_v , (*Tnm), AcceleratorWrite); | ||||
| 	const int Nsimd = CComplex::Nsimd(); | ||||
| 	accelerator_for(ss, FineGrid->oSites(), Nsimd, { | ||||
| 	  coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); | ||||
| 	  coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss)); | ||||
|         }); | ||||
|  | ||||
| 	// Possible more fine grained control is needed than a linear sweep, | ||||
| 	// but huge productivity gain if this is simple algorithm and not a tunable | ||||
| 	int m =1; | ||||
| 	if ( n>=ordermin ) m=n-ordermin; | ||||
| 	if ( (m%orderstep)==0 ) {  | ||||
| 	  Mn=*Tnp; | ||||
| 	  scale = std::pow(norm2(Mn),-0.5);         Mn=Mn*scale; | ||||
| 	  subspace[b] = Mn; | ||||
| 	  hermop.Op(Mn,tmp);  | ||||
| 	  std::cout<<GridLogMessage << n<<" filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; | ||||
| 	  b++; | ||||
| 	} | ||||
|  | ||||
| 	// Cycle pointers to avoid copies | ||||
| 	FineField *swizzle = Tnm; | ||||
| 	Tnm    =Tn; | ||||
| 	Tn     =Tnp; | ||||
| 	Tnp    =swizzle; | ||||
| 	   | ||||
|       } | ||||
|     } | ||||
|     assert(b==nn); | ||||
|   } | ||||
|   virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop, | ||||
| 				       int nn, | ||||
| 				       double hi, | ||||
| 				       double lo, | ||||
| 				       int orderfilter | ||||
| 				       ) { | ||||
|  | ||||
|     RealD scale; | ||||
|  | ||||
|     FineField noise(FineGrid); | ||||
|     FineField Mn(FineGrid); | ||||
|     FineField tmp(FineGrid); | ||||
|  | ||||
|     // New normalised noise | ||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pure noise : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl; | ||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pure noise  : nbasis "<<nn<<std::endl; | ||||
|  | ||||
|  | ||||
|     for(int b =0;b<nbasis;b++) | ||||
|     { | ||||
|       gaussian(RNG,noise); | ||||
|       scale = std::pow(norm2(noise),-0.5);  | ||||
|       noise=noise*scale; | ||||
|  | ||||
|       // Initial matrix element | ||||
|       hermop.Op(noise,Mn); | ||||
|       if(b==0) std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; | ||||
|       // Filter | ||||
|       Chebyshev<FineField> Cheb(lo,hi,orderfilter); | ||||
|       Cheb(hermop,noise,Mn); | ||||
|       // normalise | ||||
|       scale = std::pow(norm2(Mn),-0.5); 	Mn=Mn*scale; | ||||
|       subspace[b]   = Mn; | ||||
|       hermop.Op(Mn,tmp);  | ||||
|       std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; | ||||
|     } | ||||
|  | ||||
|   } | ||||
|  | ||||
| }; | ||||
| NAMESPACE_END(Grid); | ||||
							
								
								
									
										814
									
								
								Grid/algorithms/multigrid/CoarsenedMatrix.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										814
									
								
								Grid/algorithms/multigrid/CoarsenedMatrix.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,814 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/CoarsenedMatrix.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef  GRID_ALGORITHM_COARSENED_MATRIX_H | ||||
| #define  GRID_ALGORITHM_COARSENED_MATRIX_H | ||||
|  | ||||
| #include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No) | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| template<class vobj,class CComplex> | ||||
| inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner, | ||||
| 				    const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask, | ||||
| 				    const Lattice<vobj> &fineX, | ||||
| 				    const Lattice<vobj> &fineY) | ||||
| { | ||||
|   typedef decltype(innerProduct(vobj(),vobj())) dotp; | ||||
|  | ||||
|   GridBase *coarse(CoarseInner.Grid()); | ||||
|   GridBase *fine  (fineX.Grid()); | ||||
|  | ||||
|   Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard(); | ||||
|   Lattice<dotp> fine_inner_msk(fine); | ||||
|  | ||||
|   // Multiply could be fused with innerProduct | ||||
|   // Single block sum kernel could do both masks. | ||||
|   fine_inner = localInnerProduct(fineX,fineY); | ||||
|   mult(fine_inner_msk, fine_inner,FineMask); | ||||
|   blockSum(CoarseInner,fine_inner_msk); | ||||
| } | ||||
|  | ||||
| // Fine Object == (per site) type of fine field | ||||
| // nbasis      == number of deflation vectors | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class CoarsenedMatrix : public CheckerBoardedSparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  { | ||||
| public: | ||||
|      | ||||
|   typedef iVector<CComplex,nbasis >           siteVector; | ||||
|   typedef Lattice<CComplex >                  CoarseComplexField; | ||||
|   typedef Lattice<siteVector>                 CoarseVector; | ||||
|   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; | ||||
|   typedef iMatrix<CComplex,nbasis >  Cobj; | ||||
|   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<Fobj >        FineField; | ||||
|   typedef CoarseVector FermionField; | ||||
|  | ||||
|   // enrich interface, use default implementation as in FermionOperator /////// | ||||
|   void Dminus(CoarseVector const& in, CoarseVector& out) { out = in; } | ||||
|   void DminusDag(CoarseVector const& in, CoarseVector& out) { out = in; } | ||||
|   void ImportPhysicalFermionSource(CoarseVector const& input, CoarseVector& imported) { imported = input; } | ||||
|   void ImportUnphysicalFermion(CoarseVector const& input, CoarseVector& imported) { imported = input; } | ||||
|   void ExportPhysicalFermionSolution(CoarseVector const& solution, CoarseVector& exported) { exported = solution; }; | ||||
|   void ExportPhysicalFermionSource(CoarseVector const& solution, CoarseVector& exported) { exported = solution; }; | ||||
|  | ||||
|   //////////////////// | ||||
|   // Data members | ||||
|   //////////////////// | ||||
|   Geometry         geom; | ||||
|   GridBase *       _grid;  | ||||
|   GridBase*        _cbgrid; | ||||
|   int hermitian; | ||||
|  | ||||
|   CartesianStencil<siteVector,siteVector,DefaultImplParams> Stencil;  | ||||
|   CartesianStencil<siteVector,siteVector,DefaultImplParams> StencilEven; | ||||
|   CartesianStencil<siteVector,siteVector,DefaultImplParams> StencilOdd; | ||||
|  | ||||
|   std::vector<CoarseMatrix> A; | ||||
|   std::vector<CoarseMatrix> Aeven; | ||||
|   std::vector<CoarseMatrix> Aodd; | ||||
|  | ||||
|   CoarseMatrix AselfInv; | ||||
|   CoarseMatrix AselfInvEven; | ||||
|   CoarseMatrix AselfInvOdd; | ||||
|  | ||||
|   Vector<RealD> dag_factor; | ||||
|  | ||||
|   /////////////////////// | ||||
|   // Interface | ||||
|   /////////////////////// | ||||
|   GridBase * Grid(void)         { return _grid; };   // this is all the linalg routines need to know | ||||
|   GridBase * RedBlackGrid()     { return _cbgrid; }; | ||||
|  | ||||
|   int ConstEE() { return 0; } | ||||
|  | ||||
|   void M (const CoarseVector &in, CoarseVector &out) | ||||
|   { | ||||
|     conformable(_grid,in.Grid()); | ||||
|     conformable(in.Grid(),out.Grid()); | ||||
|     out.Checkerboard() = in.Checkerboard(); | ||||
|  | ||||
|     SimpleCompressor<siteVector> compressor; | ||||
|  | ||||
|     Stencil.HaloExchange(in,compressor); | ||||
|     autoView( in_v , in, AcceleratorRead); | ||||
|     autoView( out_v , out, AcceleratorWrite); | ||||
|     autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||
|     int npoint = geom.npoint; | ||||
|     typedef LatticeView<Cobj> Aview; | ||||
|        | ||||
|     Vector<Aview> AcceleratorViewContainer; | ||||
|    | ||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View(AcceleratorRead)); | ||||
|     Aview *Aview_p = & AcceleratorViewContainer[0]; | ||||
|  | ||||
|     const int Nsimd = CComplex::Nsimd(); | ||||
|     typedef decltype(coalescedRead(in_v[0])) calcVector; | ||||
|     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; | ||||
|  | ||||
|     int osites=Grid()->oSites(); | ||||
|  | ||||
|     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { | ||||
|       int ss = sss/nbasis; | ||||
|       int b  = sss%nbasis; | ||||
|       calcComplex res = Zero(); | ||||
|       calcVector nbr; | ||||
|       int ptype; | ||||
|       StencilEntry *SE; | ||||
|  | ||||
|       for(int point=0;point<npoint;point++){ | ||||
|  | ||||
| 	SE=Stencil_v.GetEntry(ptype,point,ss); | ||||
| 	   | ||||
| 	if(SE->_is_local) {  | ||||
| 	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||
| 	} else { | ||||
| 	  nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); | ||||
| 	} | ||||
| 	acceleratorSynchronise(); | ||||
|  | ||||
| 	for(int bb=0;bb<nbasis;bb++) { | ||||
| 	  res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); | ||||
| 	} | ||||
|       } | ||||
|       coalescedWrite(out_v[ss](b),res); | ||||
|       }); | ||||
|  | ||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose(); | ||||
|   }; | ||||
|  | ||||
|   void Mdag (const CoarseVector &in, CoarseVector &out) | ||||
|   { | ||||
|     if(hermitian) { | ||||
|       // corresponds to Petrov-Galerkin coarsening | ||||
|       return M(in,out); | ||||
|     } else { | ||||
|       // corresponds to Galerkin coarsening | ||||
|       return MdagNonHermitian(in, out); | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   void MdagNonHermitian(const CoarseVector &in, CoarseVector &out) | ||||
|   { | ||||
|     conformable(_grid,in.Grid()); | ||||
|     conformable(in.Grid(),out.Grid()); | ||||
|     out.Checkerboard() = in.Checkerboard(); | ||||
|  | ||||
|     SimpleCompressor<siteVector> compressor; | ||||
|  | ||||
|     Stencil.HaloExchange(in,compressor); | ||||
|     autoView( in_v , in, AcceleratorRead); | ||||
|     autoView( out_v , out, AcceleratorWrite); | ||||
|     autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||
|     int npoint = geom.npoint; | ||||
|     typedef LatticeView<Cobj> Aview; | ||||
|  | ||||
|     Vector<Aview> AcceleratorViewContainer; | ||||
|  | ||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View(AcceleratorRead)); | ||||
|     Aview *Aview_p = & AcceleratorViewContainer[0]; | ||||
|  | ||||
|     const int Nsimd = CComplex::Nsimd(); | ||||
|     typedef decltype(coalescedRead(in_v[0])) calcVector; | ||||
|     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; | ||||
|  | ||||
|     int osites=Grid()->oSites(); | ||||
|  | ||||
|     Vector<int> points(geom.npoint, 0); | ||||
|     for(int p=0; p<geom.npoint; p++) | ||||
|       points[p] = geom.points_dagger[p]; | ||||
|  | ||||
|     auto points_p = &points[0]; | ||||
|  | ||||
|     RealD* dag_factor_p = &dag_factor[0]; | ||||
|  | ||||
|     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { | ||||
|       int ss = sss/nbasis; | ||||
|       int b  = sss%nbasis; | ||||
|       calcComplex res = Zero(); | ||||
|       calcVector nbr; | ||||
|       int ptype; | ||||
|       StencilEntry *SE; | ||||
|  | ||||
|       for(int p=0;p<npoint;p++){ | ||||
|         int point = points_p[p]; | ||||
|  | ||||
| 	SE=Stencil_v.GetEntry(ptype,point,ss); | ||||
|  | ||||
| 	if(SE->_is_local) { | ||||
| 	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||
| 	} else { | ||||
| 	  nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); | ||||
| 	} | ||||
| 	acceleratorSynchronise(); | ||||
|  | ||||
| 	for(int bb=0;bb<nbasis;bb++) { | ||||
| 	  res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); | ||||
| 	} | ||||
|       } | ||||
|       coalescedWrite(out_v[ss](b),res); | ||||
|       }); | ||||
|  | ||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose(); | ||||
|   } | ||||
|  | ||||
|   void MdirComms(const CoarseVector &in) | ||||
|   { | ||||
|     SimpleCompressor<siteVector> compressor; | ||||
|     Stencil.HaloExchange(in,compressor); | ||||
|   } | ||||
|   void MdirCalc(const CoarseVector &in, CoarseVector &out, int point) | ||||
|   { | ||||
|     conformable(_grid,in.Grid()); | ||||
|     conformable(_grid,out.Grid()); | ||||
|     out.Checkerboard() = in.Checkerboard(); | ||||
|  | ||||
|     typedef LatticeView<Cobj> Aview; | ||||
|     Vector<Aview> AcceleratorViewContainer; | ||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View(AcceleratorRead)); | ||||
|     Aview *Aview_p = & AcceleratorViewContainer[0]; | ||||
|  | ||||
|     autoView( out_v , out, AcceleratorWrite); | ||||
|     autoView( in_v  , in, AcceleratorRead); | ||||
|     autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||
|  | ||||
|     const int Nsimd = CComplex::Nsimd(); | ||||
|     typedef decltype(coalescedRead(in_v[0])) calcVector; | ||||
|     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; | ||||
|  | ||||
|     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { | ||||
|       int ss = sss/nbasis; | ||||
|       int b  = sss%nbasis; | ||||
|       calcComplex res = Zero(); | ||||
|       calcVector nbr; | ||||
|       int ptype; | ||||
|       StencilEntry *SE; | ||||
|  | ||||
|       SE=Stencil_v.GetEntry(ptype,point,ss); | ||||
| 	   | ||||
|       if(SE->_is_local) {  | ||||
| 	nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||
|       } else { | ||||
| 	nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); | ||||
|       } | ||||
|       acceleratorSynchronise(); | ||||
|  | ||||
|       for(int bb=0;bb<nbasis;bb++) { | ||||
| 	res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); | ||||
|       } | ||||
|       coalescedWrite(out_v[ss](b),res); | ||||
|     }); | ||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose(); | ||||
|   } | ||||
|   void MdirAll(const CoarseVector &in,std::vector<CoarseVector> &out) | ||||
|   { | ||||
|     this->MdirComms(in); | ||||
|     int ndir=geom.npoint-1; | ||||
|     if ((out.size()!=ndir)&&(out.size()!=ndir+1)) {  | ||||
|       std::cout <<"MdirAll out size "<< out.size()<<std::endl; | ||||
|       std::cout <<"MdirAll ndir "<< ndir<<std::endl; | ||||
|       assert(0); | ||||
|     } | ||||
|     for(int p=0;p<ndir;p++){ | ||||
|       MdirCalc(in,out[p],p); | ||||
|     } | ||||
|   }; | ||||
|   void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){ | ||||
|  | ||||
|     this->MdirComms(in); | ||||
|  | ||||
|     MdirCalc(in,out,geom.point(dir,disp)); | ||||
|   }; | ||||
|  | ||||
|   void Mdiag(const CoarseVector &in, CoarseVector &out) | ||||
|   { | ||||
|     int point=geom.npoint-1; | ||||
|     MdirCalc(in, out, point); // No comms | ||||
|   }; | ||||
|  | ||||
|   void Mooee(const CoarseVector &in, CoarseVector &out) { | ||||
|     MooeeInternal(in, out, DaggerNo, InverseNo); | ||||
|   } | ||||
|  | ||||
|   void MooeeInv(const CoarseVector &in, CoarseVector &out) { | ||||
|     MooeeInternal(in, out, DaggerNo, InverseYes); | ||||
|   } | ||||
|  | ||||
|   void MooeeDag(const CoarseVector &in, CoarseVector &out) { | ||||
|     MooeeInternal(in, out, DaggerYes, InverseNo); | ||||
|   } | ||||
|  | ||||
|   void MooeeInvDag(const CoarseVector &in, CoarseVector &out) { | ||||
|     MooeeInternal(in, out, DaggerYes, InverseYes); | ||||
|   } | ||||
|  | ||||
|   void Meooe(const CoarseVector &in, CoarseVector &out) { | ||||
|     if(in.Checkerboard() == Odd) { | ||||
|       DhopEO(in, out, DaggerNo); | ||||
|     } else { | ||||
|       DhopOE(in, out, DaggerNo); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void MeooeDag(const CoarseVector &in, CoarseVector &out) { | ||||
|     if(in.Checkerboard() == Odd) { | ||||
|       DhopEO(in, out, DaggerYes); | ||||
|     } else { | ||||
|       DhopOE(in, out, DaggerYes); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void Dhop(const CoarseVector &in, CoarseVector &out, int dag) { | ||||
|     conformable(in.Grid(), _grid); // verifies full grid | ||||
|     conformable(in.Grid(), out.Grid()); | ||||
|  | ||||
|     out.Checkerboard() = in.Checkerboard(); | ||||
|  | ||||
|     DhopInternal(Stencil, A, in, out, dag); | ||||
|   } | ||||
|  | ||||
|   void DhopOE(const CoarseVector &in, CoarseVector &out, int dag) { | ||||
|     conformable(in.Grid(), _cbgrid);    // verifies half grid | ||||
|     conformable(in.Grid(), out.Grid()); // drops the cb check | ||||
|  | ||||
|     assert(in.Checkerboard() == Even); | ||||
|     out.Checkerboard() = Odd; | ||||
|  | ||||
|     DhopInternal(StencilEven, Aodd, in, out, dag); | ||||
|   } | ||||
|  | ||||
|   void DhopEO(const CoarseVector &in, CoarseVector &out, int dag) { | ||||
|     conformable(in.Grid(), _cbgrid);    // verifies half grid | ||||
|     conformable(in.Grid(), out.Grid()); // drops the cb check | ||||
|  | ||||
|     assert(in.Checkerboard() == Odd); | ||||
|     out.Checkerboard() = Even; | ||||
|  | ||||
|     DhopInternal(StencilOdd, Aeven, in, out, dag); | ||||
|   } | ||||
|  | ||||
|   void MooeeInternal(const CoarseVector &in, CoarseVector &out, int dag, int inv) { | ||||
|     out.Checkerboard() = in.Checkerboard(); | ||||
|     assert(in.Checkerboard() == Odd || in.Checkerboard() == Even); | ||||
|  | ||||
|     CoarseMatrix *Aself = nullptr; | ||||
|     if(in.Grid()->_isCheckerBoarded) { | ||||
|       if(in.Checkerboard() == Odd) { | ||||
|         Aself = (inv) ? &AselfInvOdd : &Aodd[geom.npoint-1]; | ||||
|         DselfInternal(StencilOdd, *Aself, in, out, dag); | ||||
|       } else { | ||||
|         Aself = (inv) ? &AselfInvEven : &Aeven[geom.npoint-1]; | ||||
|         DselfInternal(StencilEven, *Aself, in, out, dag); | ||||
|       } | ||||
|     } else { | ||||
|       Aself = (inv) ? &AselfInv : &A[geom.npoint-1]; | ||||
|       DselfInternal(Stencil, *Aself, in, out, dag); | ||||
|     } | ||||
|     assert(Aself != nullptr); | ||||
|   } | ||||
|  | ||||
|   void DselfInternal(CartesianStencil<siteVector,siteVector,DefaultImplParams> &st, CoarseMatrix &a, | ||||
|                        const CoarseVector &in, CoarseVector &out, int dag) { | ||||
|     int point = geom.npoint-1; | ||||
|     autoView( out_v, out, AcceleratorWrite); | ||||
|     autoView( in_v,  in,  AcceleratorRead); | ||||
|     autoView( st_v,  st,  AcceleratorRead); | ||||
|     autoView( a_v,   a,   AcceleratorRead); | ||||
|  | ||||
|     const int Nsimd = CComplex::Nsimd(); | ||||
|     typedef decltype(coalescedRead(in_v[0])) calcVector; | ||||
|     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; | ||||
|  | ||||
|     RealD* dag_factor_p = &dag_factor[0]; | ||||
|  | ||||
|     if(dag) { | ||||
|       accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { | ||||
|         int ss = sss/nbasis; | ||||
|         int b  = sss%nbasis; | ||||
|         calcComplex res = Zero(); | ||||
|         calcVector nbr; | ||||
|         int ptype; | ||||
|         StencilEntry *SE; | ||||
|  | ||||
|         SE=st_v.GetEntry(ptype,point,ss); | ||||
|  | ||||
|         if(SE->_is_local) { | ||||
|           nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||
|         } else { | ||||
|           nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); | ||||
|         } | ||||
|         acceleratorSynchronise(); | ||||
|  | ||||
|         for(int bb=0;bb<nbasis;bb++) { | ||||
|           res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(a_v[ss](b,bb))*nbr(bb); | ||||
|         } | ||||
|         coalescedWrite(out_v[ss](b),res); | ||||
|       }); | ||||
|     } else { | ||||
|       accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { | ||||
|         int ss = sss/nbasis; | ||||
|         int b  = sss%nbasis; | ||||
|         calcComplex res = Zero(); | ||||
|         calcVector nbr; | ||||
|         int ptype; | ||||
|         StencilEntry *SE; | ||||
|  | ||||
|         SE=st_v.GetEntry(ptype,point,ss); | ||||
|  | ||||
|         if(SE->_is_local) { | ||||
|           nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||
|         } else { | ||||
|           nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); | ||||
|         } | ||||
|         acceleratorSynchronise(); | ||||
|  | ||||
|         for(int bb=0;bb<nbasis;bb++) { | ||||
|           res = res + coalescedRead(a_v[ss](b,bb))*nbr(bb); | ||||
|         } | ||||
|         coalescedWrite(out_v[ss](b),res); | ||||
|       }); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void DhopInternal(CartesianStencil<siteVector,siteVector,DefaultImplParams> &st, std::vector<CoarseMatrix> &a, | ||||
|                     const CoarseVector &in, CoarseVector &out, int dag) { | ||||
|     SimpleCompressor<siteVector> compressor; | ||||
|  | ||||
|     st.HaloExchange(in,compressor); | ||||
|     autoView( in_v,  in,  AcceleratorRead); | ||||
|     autoView( out_v, out, AcceleratorWrite); | ||||
|     autoView( st_v , st,  AcceleratorRead); | ||||
|     typedef LatticeView<Cobj> Aview; | ||||
|  | ||||
|     // determine in what order we need the points | ||||
|     int npoint = geom.npoint-1; | ||||
|     Vector<int> points(npoint, 0); | ||||
|     for(int p=0; p<npoint; p++) | ||||
|       points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p; | ||||
|  | ||||
|     auto points_p = &points[0]; | ||||
|  | ||||
|     Vector<Aview> AcceleratorViewContainer; | ||||
|     for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead)); | ||||
|     Aview *Aview_p = & AcceleratorViewContainer[0]; | ||||
|  | ||||
|     const int Nsimd = CComplex::Nsimd(); | ||||
|     typedef decltype(coalescedRead(in_v[0])) calcVector; | ||||
|     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; | ||||
|  | ||||
|     RealD* dag_factor_p = &dag_factor[0]; | ||||
|  | ||||
|     if(dag) { | ||||
|       accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { | ||||
|         int ss = sss/nbasis; | ||||
|         int b  = sss%nbasis; | ||||
|         calcComplex res = Zero(); | ||||
|         calcVector nbr; | ||||
|         int ptype; | ||||
|         StencilEntry *SE; | ||||
|  | ||||
|         for(int p=0;p<npoint;p++){ | ||||
|           int point = points_p[p]; | ||||
|           SE=st_v.GetEntry(ptype,point,ss); | ||||
|  | ||||
|           if(SE->_is_local) { | ||||
|             nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||
|           } else { | ||||
|             nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); | ||||
|           } | ||||
|           acceleratorSynchronise(); | ||||
|  | ||||
|           for(int bb=0;bb<nbasis;bb++) { | ||||
|             res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); | ||||
|           } | ||||
|         } | ||||
|         coalescedWrite(out_v[ss](b),res); | ||||
|       }); | ||||
|     } else { | ||||
|       accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { | ||||
|         int ss = sss/nbasis; | ||||
|         int b  = sss%nbasis; | ||||
|         calcComplex res = Zero(); | ||||
|         calcVector nbr; | ||||
|         int ptype; | ||||
|         StencilEntry *SE; | ||||
|  | ||||
|         for(int p=0;p<npoint;p++){ | ||||
|           int point = points_p[p]; | ||||
|           SE=st_v.GetEntry(ptype,point,ss); | ||||
|  | ||||
|           if(SE->_is_local) { | ||||
|             nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||
|           } else { | ||||
|             nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); | ||||
|           } | ||||
|           acceleratorSynchronise(); | ||||
|  | ||||
|           for(int bb=0;bb<nbasis;bb++) { | ||||
|             res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); | ||||
|           } | ||||
|         } | ||||
|         coalescedWrite(out_v[ss](b),res); | ||||
|       }); | ||||
|     } | ||||
|  | ||||
|     for(int p=0;p<npoint;p++) AcceleratorViewContainer[p].ViewClose(); | ||||
|   } | ||||
|    | ||||
|   CoarsenedMatrix(GridCartesian &CoarseGrid, int hermitian_=0) 	: | ||||
|     _grid(&CoarseGrid), | ||||
|     _cbgrid(new GridRedBlackCartesian(&CoarseGrid)), | ||||
|     geom(CoarseGrid._ndimension), | ||||
|     hermitian(hermitian_), | ||||
|     Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements), | ||||
|     StencilEven(_cbgrid,geom.npoint,Even,geom.directions,geom.displacements), | ||||
|     StencilOdd(_cbgrid,geom.npoint,Odd,geom.directions,geom.displacements), | ||||
|     A(geom.npoint,&CoarseGrid), | ||||
|     Aeven(geom.npoint,_cbgrid), | ||||
|     Aodd(geom.npoint,_cbgrid), | ||||
|     AselfInv(&CoarseGrid), | ||||
|     AselfInvEven(_cbgrid), | ||||
|     AselfInvOdd(_cbgrid), | ||||
|     dag_factor(nbasis*nbasis) | ||||
|   { | ||||
|     fillFactor(); | ||||
|   }; | ||||
|  | ||||
|   CoarsenedMatrix(GridCartesian &CoarseGrid, GridRedBlackCartesian &CoarseRBGrid, int hermitian_=0) 	: | ||||
|  | ||||
|     _grid(&CoarseGrid), | ||||
|     _cbgrid(&CoarseRBGrid), | ||||
|     geom(CoarseGrid._ndimension), | ||||
|     hermitian(hermitian_), | ||||
|     Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements), | ||||
|     StencilEven(&CoarseRBGrid,geom.npoint,Even,geom.directions,geom.displacements), | ||||
|     StencilOdd(&CoarseRBGrid,geom.npoint,Odd,geom.directions,geom.displacements), | ||||
|     A(geom.npoint,&CoarseGrid), | ||||
|     Aeven(geom.npoint,&CoarseRBGrid), | ||||
|     Aodd(geom.npoint,&CoarseRBGrid), | ||||
|     AselfInv(&CoarseGrid), | ||||
|     AselfInvEven(&CoarseRBGrid), | ||||
|     AselfInvOdd(&CoarseRBGrid), | ||||
|     dag_factor(nbasis*nbasis) | ||||
|   { | ||||
|     fillFactor(); | ||||
|   }; | ||||
|  | ||||
|   void fillFactor() { | ||||
|     Eigen::MatrixXd dag_factor_eigen = Eigen::MatrixXd::Ones(nbasis, nbasis); | ||||
|     if(!hermitian) { | ||||
|       const int nb = nbasis/2; | ||||
|       dag_factor_eigen.block(0,nb,nb,nb) *= -1.0; | ||||
|       dag_factor_eigen.block(nb,0,nb,nb) *= -1.0; | ||||
|     } | ||||
|  | ||||
|     // GPU readable prefactor | ||||
|     thread_for(i, nbasis*nbasis, { | ||||
|       int j = i/nbasis; | ||||
|       int k = i%nbasis; | ||||
|       dag_factor[i] = dag_factor_eigen(j, k); | ||||
|     }); | ||||
|   } | ||||
|  | ||||
|   void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop, | ||||
| 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) | ||||
|   { | ||||
|     typedef Lattice<typename Fobj::tensor_reduced> FineComplexField; | ||||
|     typedef typename Fobj::scalar_type scalar_type; | ||||
|  | ||||
|     std::cout << GridLogMessage<< "CoarsenMatrix "<< std::endl; | ||||
|  | ||||
|     FineComplexField one(FineGrid); one=scalar_type(1.0,0.0); | ||||
|     FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0); | ||||
|  | ||||
|     std::vector<FineComplexField> masks(geom.npoint,FineGrid); | ||||
|     FineComplexField imask(FineGrid); // contributions from within this block | ||||
|     FineComplexField omask(FineGrid); // contributions from outwith this block | ||||
|  | ||||
|     FineComplexField evenmask(FineGrid); | ||||
|     FineComplexField oddmask(FineGrid);  | ||||
|  | ||||
|     FineField     phi(FineGrid); | ||||
|     FineField     tmp(FineGrid); | ||||
|     FineField     zz(FineGrid); zz=Zero(); | ||||
|     FineField    Mphi(FineGrid); | ||||
|     FineField    Mphie(FineGrid); | ||||
|     FineField    Mphio(FineGrid); | ||||
|     std::vector<FineField>     Mphi_p(geom.npoint,FineGrid); | ||||
|  | ||||
|     Lattice<iScalar<vInteger> > coor (FineGrid); | ||||
|     Lattice<iScalar<vInteger> > bcoor(FineGrid); | ||||
|     Lattice<iScalar<vInteger> > bcb  (FineGrid); bcb = Zero(); | ||||
|  | ||||
|     CoarseVector iProj(Grid());  | ||||
|     CoarseVector oProj(Grid());  | ||||
|     CoarseVector SelfProj(Grid());  | ||||
|     CoarseComplexField iZProj(Grid());  | ||||
|     CoarseComplexField oZProj(Grid());  | ||||
|  | ||||
|     CoarseScalar InnerProd(Grid());  | ||||
|  | ||||
|     std::cout << GridLogMessage<< "CoarsenMatrix Orthog "<< std::endl; | ||||
|     // Orthogonalise the subblocks over the basis | ||||
|     blockOrthogonalise(InnerProd,Subspace.subspace); | ||||
|  | ||||
|     // Compute the matrix elements of linop between this orthonormal | ||||
|     // set of vectors. | ||||
|     std::cout << GridLogMessage<< "CoarsenMatrix masks "<< std::endl; | ||||
|     int self_stencil=-1; | ||||
|     for(int p=0;p<geom.npoint;p++) | ||||
|     {  | ||||
|       int dir   = geom.directions[p]; | ||||
|       int disp  = geom.displacements[p]; | ||||
|       A[p]=Zero(); | ||||
|       if( geom.displacements[p]==0){ | ||||
| 	self_stencil=p; | ||||
|       } | ||||
|  | ||||
|       Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]); | ||||
|  | ||||
|       LatticeCoordinate(coor,dir); | ||||
|  | ||||
|       /////////////////////////////////////////////////////// | ||||
|       // Work out even and odd block checkerboarding for fast diagonal term | ||||
|       /////////////////////////////////////////////////////// | ||||
|       if ( disp==1 ) { | ||||
| 	bcb   = bcb + div(coor,block); | ||||
|       } | ||||
| 	 | ||||
|       if ( disp==0 ) { | ||||
| 	  masks[p]= Zero(); | ||||
|       } else if ( disp==1 ) { | ||||
| 	masks[p] = where(mod(coor,block)==(block-1),one,zero); | ||||
|       } else if ( disp==-1 ) { | ||||
| 	masks[p] = where(mod(coor,block)==(Integer)0,one,zero); | ||||
|       } | ||||
|     } | ||||
|     evenmask = where(mod(bcb,2)==(Integer)0,one,zero); | ||||
|     oddmask  = one-evenmask; | ||||
|  | ||||
|     assert(self_stencil!=-1); | ||||
|  | ||||
|     for(int i=0;i<nbasis;i++){ | ||||
|  | ||||
|       phi=Subspace.subspace[i]; | ||||
|  | ||||
|       std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl; | ||||
|       linop.OpDirAll(phi,Mphi_p); | ||||
|       linop.OpDiag  (phi,Mphi_p[geom.npoint-1]); | ||||
|  | ||||
|       for(int p=0;p<geom.npoint;p++){  | ||||
|  | ||||
| 	Mphi = Mphi_p[p]; | ||||
|  | ||||
| 	int dir   = geom.directions[p]; | ||||
| 	int disp  = geom.displacements[p]; | ||||
|  | ||||
| 	if ( (disp==-1) || (!hermitian ) ) { | ||||
|  | ||||
| 	  //////////////////////////////////////////////////////////////////////// | ||||
| 	  // Pick out contributions coming from this cell and neighbour cell | ||||
| 	  //////////////////////////////////////////////////////////////////////// | ||||
| 	  omask = masks[p]; | ||||
| 	  imask = one-omask; | ||||
| 	 | ||||
| 	  for(int j=0;j<nbasis;j++){ | ||||
| 	     | ||||
| 	    blockMaskedInnerProduct(oZProj,omask,Subspace.subspace[j],Mphi); | ||||
| 	     | ||||
| 	    autoView( iZProj_v , iZProj, AcceleratorRead) ; | ||||
| 	    autoView( oZProj_v , oZProj, AcceleratorRead) ; | ||||
| 	    autoView( A_p     ,  A[p], AcceleratorWrite); | ||||
| 	    autoView( A_self  , A[self_stencil], AcceleratorWrite); | ||||
|  | ||||
| 	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); | ||||
| 	    if ( hermitian && (disp==-1) ) { | ||||
| 	      for(int pp=0;pp<geom.npoint;pp++){// Find the opposite link and set <j|A|i> = <i|A|j>* | ||||
| 		int dirp   = geom.directions[pp]; | ||||
| 		int dispp  = geom.displacements[pp]; | ||||
| 		if ( (dirp==dir) && (dispp==1) ){ | ||||
| 		  auto sft = conjugate(Cshift(oZProj,dir,1)); | ||||
| 		  autoView( sft_v    ,  sft  , AcceleratorWrite); | ||||
| 		  autoView( A_pp     ,  A[pp], AcceleratorWrite); | ||||
| 		  accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); }); | ||||
| 		} | ||||
| 	      } | ||||
| 	    } | ||||
|  | ||||
| 	  } | ||||
| 	} | ||||
|       } | ||||
|  | ||||
|       /////////////////////////////////////////// | ||||
|       // Faster alternate self coupling.. use hermiticity to save 2x | ||||
|       /////////////////////////////////////////// | ||||
|       { | ||||
| 	mult(tmp,phi,evenmask);  linop.Op(tmp,Mphie); | ||||
| 	mult(tmp,phi,oddmask );  linop.Op(tmp,Mphio); | ||||
|  | ||||
| 	{ | ||||
| 	  autoView( tmp_      , tmp, AcceleratorWrite); | ||||
| 	  autoView( evenmask_ , evenmask, AcceleratorRead); | ||||
| 	  autoView( oddmask_  ,  oddmask, AcceleratorRead); | ||||
| 	  autoView( Mphie_    ,  Mphie, AcceleratorRead); | ||||
| 	  autoView( Mphio_    ,  Mphio, AcceleratorRead); | ||||
| 	  accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{  | ||||
| 	      coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss)); | ||||
| 	    }); | ||||
| 	} | ||||
|  | ||||
| 	blockProject(SelfProj,tmp,Subspace.subspace); | ||||
|  | ||||
| 	autoView( SelfProj_ , SelfProj, AcceleratorRead); | ||||
| 	autoView( A_self  , A[self_stencil], AcceleratorWrite); | ||||
|  | ||||
| 	accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ | ||||
| 	  for(int j=0;j<nbasis;j++){ | ||||
| 	    coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j)); | ||||
| 	  } | ||||
| 	}); | ||||
|  | ||||
|       } | ||||
|     } | ||||
|     if(hermitian) { | ||||
|       std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl; | ||||
|     } | ||||
|  | ||||
|     InvertSelfStencilLink(); std::cout << GridLogMessage << "Coarse self link inverted" << std::endl; | ||||
|     FillHalfCbs(); std::cout << GridLogMessage << "Coarse half checkerboards filled" << std::endl; | ||||
|   } | ||||
|  | ||||
|   void InvertSelfStencilLink() { | ||||
|     std::cout << GridLogDebug << "CoarsenedMatrix::InvertSelfStencilLink" << std::endl; | ||||
|     int localVolume = Grid()->lSites(); | ||||
|  | ||||
|     typedef typename Cobj::scalar_object scalar_object; | ||||
|  | ||||
|     autoView(Aself_v,    A[geom.npoint-1], CpuRead); | ||||
|     autoView(AselfInv_v, AselfInv,         CpuWrite); | ||||
|     thread_for(site, localVolume, { // NOTE: Not able to bring this to GPU because of Eigen + peek/poke | ||||
|       Eigen::MatrixXcd selfLinkEigen    = Eigen::MatrixXcd::Zero(nbasis, nbasis); | ||||
|       Eigen::MatrixXcd selfLinkInvEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis); | ||||
|  | ||||
|       scalar_object selfLink    = Zero(); | ||||
|       scalar_object selfLinkInv = Zero(); | ||||
|  | ||||
|       Coordinate lcoor; | ||||
|  | ||||
|       Grid()->LocalIndexToLocalCoor(site, lcoor); | ||||
|       peekLocalSite(selfLink, Aself_v, lcoor); | ||||
|  | ||||
|       for (int i = 0; i < nbasis; ++i) | ||||
|         for (int j = 0; j < nbasis; ++j) | ||||
|           selfLinkEigen(i, j) = static_cast<ComplexD>(TensorRemove(selfLink(i, j))); | ||||
|  | ||||
|       selfLinkInvEigen = selfLinkEigen.inverse(); | ||||
|  | ||||
|       for(int i = 0; i < nbasis; ++i) | ||||
|         for(int j = 0; j < nbasis; ++j) | ||||
|           selfLinkInv(i, j) = selfLinkInvEigen(i, j); | ||||
|  | ||||
|       pokeLocalSite(selfLinkInv, AselfInv_v, lcoor); | ||||
|     }); | ||||
|   } | ||||
|  | ||||
|   void FillHalfCbs() { | ||||
|     std::cout << GridLogDebug << "CoarsenedMatrix::FillHalfCbs" << std::endl; | ||||
|     for(int p = 0; p < geom.npoint; ++p) { | ||||
|       pickCheckerboard(Even, Aeven[p], A[p]); | ||||
|       pickCheckerboard(Odd, Aodd[p], A[p]); | ||||
|     } | ||||
|     pickCheckerboard(Even, AselfInvEven, AselfInv); | ||||
|     pickCheckerboard(Odd, AselfInvOdd, AselfInv); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
| #endif | ||||
							
								
								
									
										418
									
								
								Grid/algorithms/multigrid/GeneralCoarsenedMatrix.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										418
									
								
								Grid/algorithms/multigrid/GeneralCoarsenedMatrix.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,418 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <pboyle@bnl.gov> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #pragma once | ||||
|  | ||||
| #include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No) | ||||
|  | ||||
| #include <Grid/lattice/PaddedCell.h> | ||||
| #include <Grid/stencil/GeneralLocalStencil.h> | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| // Fine Object == (per site) type of fine field | ||||
| // nbasis      == number of deflation vectors | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class GeneralCoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  { | ||||
| public: | ||||
|  | ||||
|   typedef GeneralCoarsenedMatrix<Fobj,CComplex,nbasis> GeneralCoarseOp; | ||||
|   typedef iVector<CComplex,nbasis >           siteVector; | ||||
|   typedef iMatrix<CComplex,nbasis >           siteMatrix; | ||||
|   typedef Lattice<iScalar<CComplex> >         CoarseComplexField; | ||||
|   typedef Lattice<siteVector>                 CoarseVector; | ||||
|   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; | ||||
|   typedef iMatrix<CComplex,nbasis >  Cobj; | ||||
|   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<Fobj >        FineField; | ||||
|   typedef CoarseVector Field; | ||||
|   //////////////////// | ||||
|   // Data members | ||||
|   //////////////////// | ||||
|   int hermitian; | ||||
|   GridBase      *       _FineGrid;  | ||||
|   GridCartesian *       _CoarseGrid;  | ||||
|   NonLocalStencilGeometry &geom; | ||||
|   PaddedCell Cell; | ||||
|   GeneralLocalStencil Stencil; | ||||
|    | ||||
|   std::vector<CoarseMatrix> _A; | ||||
|   std::vector<CoarseMatrix> _Adag; | ||||
|  | ||||
|   /////////////////////// | ||||
|   // Interface | ||||
|   /////////////////////// | ||||
|   GridBase      * Grid(void)           { return _FineGrid; };   // this is all the linalg routines need to know | ||||
|   GridBase      * FineGrid(void)       { return _FineGrid; };   // this is all the linalg routines need to know | ||||
|   GridCartesian * CoarseGrid(void)     { return _CoarseGrid; };   // this is all the linalg routines need to know | ||||
|  | ||||
|   void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe) | ||||
|   { | ||||
|     int nfound=0; | ||||
|     std::cout << GridLogMessage <<"GeneralCoarsenedMatrix::ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<<std::endl; | ||||
|     for(int p=0;p<geom.npoint;p++){ | ||||
|       for(int pp=0;pp<CopyMe.geom.npoint;pp++){ | ||||
|  	// Search for the same relative shift | ||||
| 	// Avoids brutal handling of Grid pointers | ||||
| 	if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) { | ||||
| 	  _A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]); | ||||
| 	  _Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]); | ||||
| 	  nfound++; | ||||
| 	} | ||||
|       } | ||||
|     } | ||||
|     assert(nfound==geom.npoint); | ||||
|     ExchangeCoarseLinks(); | ||||
|   } | ||||
|    | ||||
|   GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid) | ||||
|     : geom(_geom), | ||||
|       _FineGrid(FineGrid), | ||||
|       _CoarseGrid(CoarseGrid), | ||||
|       hermitian(1), | ||||
|       Cell(_geom.Depth(),_CoarseGrid), | ||||
|       Stencil(Cell.grids.back(),geom.shifts) | ||||
|   { | ||||
|     { | ||||
|       int npoint = _geom.npoint; | ||||
|       autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||
|       int osites=Stencil.Grid()->oSites(); | ||||
|       for(int ss=0;ss<osites;ss++){ | ||||
| 	for(int point=0;point<npoint;point++){ | ||||
| 	  auto SE = Stencil_v.GetEntry(point,ss); | ||||
| 	  int o = SE->_offset; | ||||
| 	  assert( o< osites); | ||||
| 	} | ||||
|       }     | ||||
|     } | ||||
|  | ||||
|     _A.resize(geom.npoint,CoarseGrid); | ||||
|     _Adag.resize(geom.npoint,CoarseGrid); | ||||
|   } | ||||
|   void M (const CoarseVector &in, CoarseVector &out) | ||||
|   { | ||||
|     Mult(_A,in,out); | ||||
|   } | ||||
|   void Mdag (const CoarseVector &in, CoarseVector &out) | ||||
|   { | ||||
|     if ( hermitian ) M(in,out); | ||||
|     else Mult(_Adag,in,out); | ||||
|   } | ||||
|   void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out) | ||||
|   { | ||||
|     RealD tviews=0; | ||||
|     RealD ttot=0; | ||||
|     RealD tmult=0; | ||||
|     RealD texch=0; | ||||
|     RealD text=0; | ||||
|     ttot=-usecond(); | ||||
|     conformable(CoarseGrid(),in.Grid()); | ||||
|     conformable(in.Grid(),out.Grid()); | ||||
|     out.Checkerboard() = in.Checkerboard(); | ||||
|     CoarseVector tin=in; | ||||
|  | ||||
|     texch-=usecond(); | ||||
|     CoarseVector pin  = Cell.Exchange(tin); | ||||
|     texch+=usecond(); | ||||
|  | ||||
|     CoarseVector pout(pin.Grid()); pout=Zero(); | ||||
|  | ||||
|     int npoint = geom.npoint; | ||||
|     typedef LatticeView<Cobj> Aview; | ||||
|        | ||||
|     const int Nsimd = CComplex::Nsimd(); | ||||
|      | ||||
|     int osites=pin.Grid()->oSites(); | ||||
|     //    int gsites=pin.Grid()->gSites(); | ||||
|  | ||||
|     RealD flops = 1.0* npoint * nbasis * nbasis * 8 * osites; | ||||
|     RealD bytes = (1.0*osites*sizeof(siteMatrix)*npoint+2.0*osites*sizeof(siteVector))*npoint; | ||||
|        | ||||
|     //    for(int point=0;point<npoint;point++){ | ||||
|     //      conformable(A[point],pin); | ||||
|     //    } | ||||
|  | ||||
|     { | ||||
|       tviews-=usecond(); | ||||
|       autoView( in_v , pin, AcceleratorRead); | ||||
|       autoView( out_v , pout, AcceleratorWrite); | ||||
|       autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||
|       tviews+=usecond(); | ||||
|        | ||||
|       for(int point=0;point<npoint;point++){ | ||||
| 	tviews-=usecond(); | ||||
| 	autoView( A_v, A[point],AcceleratorRead); | ||||
| 	tviews+=usecond(); | ||||
| 	tmult-=usecond(); | ||||
| 	accelerator_for(sss, osites*nbasis, Nsimd, { | ||||
|  | ||||
| 	    typedef decltype(coalescedRead(in_v[0]))    calcVector; | ||||
|  | ||||
| 	    int ss = sss/nbasis; | ||||
| 	    int b  = sss%nbasis; | ||||
|  | ||||
| 	    auto SE  = Stencil_v.GetEntry(point,ss); | ||||
| 	    auto nbr = coalescedReadGeneralPermute(in_v[SE->_offset],SE->_permute,Nd); | ||||
| 	    auto res = out_v(ss)(b); | ||||
| 	    for(int bb=0;bb<nbasis;bb++) { | ||||
| 	      res = res + coalescedRead(A_v[ss](b,bb))*nbr(bb); | ||||
| 	    } | ||||
| 	    coalescedWrite(out_v[ss](b),res); | ||||
| 	}); | ||||
|  | ||||
| 	tmult+=usecond(); | ||||
|       } | ||||
|     } | ||||
|     text-=usecond(); | ||||
|     out = Cell.Extract(pout); | ||||
|     text+=usecond(); | ||||
|     ttot+=usecond(); | ||||
|  | ||||
|     std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse Mult ext  "<<text<<" us"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse Mult tot  "<<ttot<<" us"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse Kernel flop/s "<< flops/tmult<<" mflop/s"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse Kernel bytes/s"<< bytes/tmult<<" MB/s"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse overall flops/s "<< flops/ttot<<" mflop/s"<<std::endl; | ||||
|     std::cout << GridLogPerformance<<"Coarse total bytes   "<< bytes/1e6<<" MB"<<std::endl; | ||||
|   }; | ||||
|  | ||||
|   void PopulateAdag(void) | ||||
|   { | ||||
|     for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){ | ||||
|       Coordinate bcoor; | ||||
|       CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor); | ||||
|        | ||||
|       for(int p=0;p<geom.npoint;p++){ | ||||
| 	Coordinate scoor = bcoor; | ||||
| 	for(int mu=0;mu<bcoor.size();mu++){ | ||||
| 	  int L = CoarseGrid()->GlobalDimensions()[mu]; | ||||
| 	  scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic | ||||
| 	} | ||||
| 	// Flip to poke/peekLocalSite and not too bad | ||||
| 	auto link = peekSite(_A[p],scoor); | ||||
| 	int pp = geom.Reverse(p); | ||||
| 	pokeSite(adj(link),_Adag[pp],bcoor); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   //  | ||||
|   // A) Only reduced flops option is to use a padded cell of depth 4 | ||||
|   // and apply MpcDagMpc in the padded cell. | ||||
|   // | ||||
|   // Makes for ONE application of MpcDagMpc per vector instead of 30 or 80. | ||||
|   // With the effective cell size around (B+8)^4 perhaps 12^4/4^4 ratio | ||||
|   // Cost is 81x more, same as stencil size. | ||||
|   // | ||||
|   // But: can eliminate comms and do as local dirichlet. | ||||
|   // | ||||
|   // Local exchange gauge field once. | ||||
|   // Apply to all vectors, local only computation. | ||||
|   // Must exchange ghost subcells in reverse process of PaddedCell to take inner products | ||||
|   // | ||||
|   // B) Can reduce cost: pad by 1, apply Deo      (4^4+6^4+8^4+8^4 )/ (4x 4^4) | ||||
|   //                     pad by 2, apply Doe | ||||
|   //                     pad by 3, apply Deo | ||||
|   //                     then break out 8x directions; cost is ~10x MpcDagMpc per vector | ||||
|   // | ||||
|   // => almost factor of 10 in setup cost, excluding data rearrangement | ||||
|   // | ||||
|   // Intermediates -- ignore the corner terms, leave approximate and force Hermitian | ||||
|   // Intermediates -- pad by 2 and apply 1+8+24 = 33 times. | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|  | ||||
|     ////////////////////////////////////////////////////////// | ||||
|     // BFM HDCG style approach: Solve a system of equations to get Aij | ||||
|     ////////////////////////////////////////////////////////// | ||||
|     /* | ||||
|      *     Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM. | ||||
|      * | ||||
|      *     conj(phases[block]) proj[k][ block*Nvec+j ] =  \sum_ball  e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} >  | ||||
|      *                                                 =  \sum_ball e^{iqk.delta} A_ji | ||||
|      * | ||||
|      *     Must invert matrix M_k,l = e^[i q_k . delta_l] | ||||
|      * | ||||
|      *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) | ||||
|      */ | ||||
|   void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop, | ||||
| 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) | ||||
|   { | ||||
|     std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl; | ||||
|     GridBase *grid = FineGrid(); | ||||
|  | ||||
|     RealD tproj=0.0; | ||||
|     RealD teigen=0.0; | ||||
|     RealD tmat=0.0; | ||||
|     RealD tphase=0.0; | ||||
|     RealD tinv=0.0; | ||||
|  | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Orthogonalise the subblocks over the basis | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     CoarseScalar InnerProd(CoarseGrid());  | ||||
|     blockOrthogonalise(InnerProd,Subspace.subspace); | ||||
|  | ||||
|     const int npoint = geom.npoint; | ||||
|        | ||||
|     Coordinate clatt = CoarseGrid()->GlobalDimensions(); | ||||
|     int Nd = CoarseGrid()->Nd(); | ||||
|  | ||||
|       /* | ||||
|        *     Here, k,l index which possible momentum/shift within the N-points connected by MdagM. | ||||
|        *     Matrix index i is mapped to this shift via  | ||||
|        *               geom.shifts[i] | ||||
|        * | ||||
|        *     conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block]  | ||||
|        *       =  \sum_{l in ball}  e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} >  | ||||
|        *       =  \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} | ||||
|        *       = M_{kl} A_ji^{b.b+l} | ||||
|        * | ||||
|        *     Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] | ||||
|        *   | ||||
|        *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) | ||||
|        * | ||||
|        *     Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} | ||||
|        */ | ||||
|     teigen-=usecond(); | ||||
|     Eigen::MatrixXcd Mkl    = Eigen::MatrixXcd::Zero(npoint,npoint); | ||||
|     Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint); | ||||
|     ComplexD ci(0.0,1.0); | ||||
|     for(int k=0;k<npoint;k++){ // Loop over momenta | ||||
|  | ||||
|       for(int l=0;l<npoint;l++){ // Loop over nbr relative | ||||
| 	ComplexD phase(0.0,0.0); | ||||
| 	for(int mu=0;mu<Nd;mu++){ | ||||
| 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; | ||||
| 	  phase=phase+TwoPiL*geom.shifts[k][mu]*geom.shifts[l][mu]; | ||||
| 	} | ||||
| 	phase=exp(phase*ci); | ||||
| 	Mkl(k,l) = phase; | ||||
|       } | ||||
|     } | ||||
|     invMkl = Mkl.inverse(); | ||||
|     teigen+=usecond(); | ||||
|  | ||||
|     /////////////////////////////////////////////////////////////////////// | ||||
|     // Now compute the matrix elements of linop between the orthonormal | ||||
|     // set of vectors. | ||||
|     /////////////////////////////////////////////////////////////////////// | ||||
|     FineField phaV(grid); // Phased block basis vector | ||||
|     FineField MphaV(grid);// Matrix applied | ||||
|     CoarseVector coarseInner(CoarseGrid()); | ||||
|  | ||||
|     std::vector<CoarseVector> ComputeProj(npoint,CoarseGrid()); | ||||
|     std::vector<CoarseVector>          FT(npoint,CoarseGrid()); | ||||
|     for(int i=0;i<nbasis;i++){// Loop over basis vectors | ||||
|       std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl; | ||||
|       for(int p=0;p<npoint;p++){ // Loop over momenta in npoint | ||||
| 	///////////////////////////////////////////////////// | ||||
| 	// Stick a phase on every block | ||||
| 	///////////////////////////////////////////////////// | ||||
| 	tphase-=usecond(); | ||||
| 	CoarseComplexField coor(CoarseGrid()); | ||||
| 	CoarseComplexField pha(CoarseGrid());	pha=Zero(); | ||||
| 	for(int mu=0;mu<Nd;mu++){ | ||||
| 	  LatticeCoordinate(coor,mu); | ||||
| 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; | ||||
| 	  pha = pha + (TwoPiL * geom.shifts[p][mu]) * coor; | ||||
| 	} | ||||
| 	pha  =exp(pha*ci); | ||||
| 	phaV=Zero(); | ||||
| 	blockZAXPY(phaV,pha,Subspace.subspace[i],phaV); | ||||
| 	tphase+=usecond(); | ||||
|  | ||||
| 	///////////////////////////////////////////////////////////////////// | ||||
| 	// Multiple phased subspace vector by matrix and project to subspace | ||||
| 	// Remove local bulk phase to leave relative phases | ||||
| 	///////////////////////////////////////////////////////////////////// | ||||
| 	tmat-=usecond(); | ||||
| 	linop.Op(phaV,MphaV); | ||||
| 	tmat+=usecond(); | ||||
|  | ||||
| 	tproj-=usecond(); | ||||
| 	blockProject(coarseInner,MphaV,Subspace.subspace); | ||||
| 	coarseInner = conjugate(pha) * coarseInner; | ||||
|  | ||||
| 	ComputeProj[p] = coarseInner; | ||||
| 	tproj+=usecond(); | ||||
|  | ||||
|       } | ||||
|  | ||||
|       tinv-=usecond(); | ||||
|       for(int k=0;k<npoint;k++){ | ||||
| 	FT[k] = Zero(); | ||||
| 	for(int l=0;l<npoint;l++){ | ||||
| 	  FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l]; | ||||
| 	} | ||||
|        | ||||
| 	int osites=CoarseGrid()->oSites(); | ||||
| 	autoView( A_v  , _A[k], AcceleratorWrite); | ||||
| 	autoView( FT_v  , FT[k], AcceleratorRead); | ||||
| 	accelerator_for(sss, osites, 1, { | ||||
| 	    for(int j=0;j<nbasis;j++){ | ||||
| 	      A_v[sss](j,i) = FT_v[sss](j); | ||||
| 	    } | ||||
|         }); | ||||
|       } | ||||
|       tinv+=usecond(); | ||||
|     } | ||||
|  | ||||
|     for(int p=0;p<geom.npoint;p++){ | ||||
|       Coordinate coor({0,0,0,0,0}); | ||||
|       auto sval = peekSite(_A[p],coor); | ||||
|     } | ||||
|  | ||||
|     // Only needed if nonhermitian | ||||
|     if ( ! hermitian ) { | ||||
|       std::cout << GridLogMessage<<"PopulateAdag  "<<std::endl; | ||||
|       PopulateAdag(); | ||||
|     } | ||||
|  | ||||
|     // Need to write something to populate Adag from A | ||||
|     ExchangeCoarseLinks(); | ||||
|     std::cout << GridLogMessage<<"CoarsenOperator eigen  "<<teigen<<" us"<<std::endl; | ||||
|     std::cout << GridLogMessage<<"CoarsenOperator phase  "<<tphase<<" us"<<std::endl; | ||||
|     std::cout << GridLogMessage<<"CoarsenOperator mat    "<<tmat <<" us"<<std::endl; | ||||
|     std::cout << GridLogMessage<<"CoarsenOperator proj   "<<tproj<<" us"<<std::endl; | ||||
|     std::cout << GridLogMessage<<"CoarsenOperator inv    "<<tinv<<" us"<<std::endl; | ||||
|   } | ||||
|   void ExchangeCoarseLinks(void){ | ||||
|     for(int p=0;p<geom.npoint;p++){ | ||||
|       _A[p] = Cell.Exchange(_A[p]); | ||||
|       _Adag[p]= Cell.Exchange(_Adag[p]); | ||||
|     } | ||||
|   } | ||||
|   virtual  void Mdiag    (const Field &in, Field &out){ assert(0);}; | ||||
|   virtual  void Mdir     (const Field &in, Field &out,int dir, int disp){assert(0);}; | ||||
|   virtual  void MdirAll  (const Field &in, std::vector<Field> &out){assert(0);}; | ||||
| }; | ||||
|  | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
							
								
								
									
										243
									
								
								Grid/algorithms/multigrid/Geometry.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										243
									
								
								Grid/algorithms/multigrid/Geometry.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,243 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <pboyle@bnl.gov> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #pragma once | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
|  | ||||
| ///////////////////////////////////////////////////////////////// | ||||
| // Geometry class in cartesian case | ||||
| ///////////////////////////////////////////////////////////////// | ||||
|  | ||||
| class Geometry { | ||||
| public: | ||||
|   int npoint; | ||||
|   int base; | ||||
|   std::vector<int> directions   ; | ||||
|   std::vector<int> displacements; | ||||
|   std::vector<int> points_dagger; | ||||
|  | ||||
|   Geometry(int _d)  { | ||||
|      | ||||
|     base = (_d==5) ? 1:0; | ||||
|  | ||||
|     // make coarse grid stencil for 4d , not 5d | ||||
|     if ( _d==5 ) _d=4; | ||||
|  | ||||
|     npoint = 2*_d+1; | ||||
|     directions.resize(npoint); | ||||
|     displacements.resize(npoint); | ||||
|     points_dagger.resize(npoint); | ||||
|     for(int d=0;d<_d;d++){ | ||||
|       directions[d   ] = d+base; | ||||
|       directions[d+_d] = d+base; | ||||
|       displacements[d  ] = +1; | ||||
|       displacements[d+_d]= -1; | ||||
|       points_dagger[d   ] = d+_d; | ||||
|       points_dagger[d+_d] = d; | ||||
|     } | ||||
|     directions   [2*_d]=0; | ||||
|     displacements[2*_d]=0; | ||||
|     points_dagger[2*_d]=2*_d; | ||||
|   } | ||||
|  | ||||
|   int point(int dir, int disp) { | ||||
|     assert(disp == -1 || disp == 0 || disp == 1); | ||||
|     assert(base+0 <= dir && dir < base+4); | ||||
|  | ||||
|     // directions faster index = new indexing | ||||
|     // 4d (base = 0): | ||||
|     // point 0  1  2  3  4  5  6  7  8 | ||||
|     // dir   0  1  2  3  0  1  2  3  0 | ||||
|     // disp +1 +1 +1 +1 -1 -1 -1 -1  0 | ||||
|     // 5d (base = 1): | ||||
|     // point 0  1  2  3  4  5  6  7  8 | ||||
|     // dir   1  2  3  4  1  2  3  4  0 | ||||
|     // disp +1 +1 +1 +1 -1 -1 -1 -1  0 | ||||
|  | ||||
|     // displacements faster index = old indexing | ||||
|     // 4d (base = 0): | ||||
|     // point 0  1  2  3  4  5  6  7  8 | ||||
|     // dir   0  0  1  1  2  2  3  3  0 | ||||
|     // disp +1 -1 +1 -1 +1 -1 +1 -1  0 | ||||
|     // 5d (base = 1): | ||||
|     // point 0  1  2  3  4  5  6  7  8 | ||||
|     // dir   1  1  2  2  3  3  4  4  0 | ||||
|     // disp +1 -1 +1 -1 +1 -1 +1 -1  0 | ||||
|  | ||||
|     if(dir == 0 and disp == 0) | ||||
|       return 8; | ||||
|     else // New indexing | ||||
|       return (1 - disp) / 2 * 4 + dir - base; | ||||
|     // else // Old indexing | ||||
|     //   return (4 * (dir - base) + 1 - disp) / 2; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| ///////////////////////////////////////////////////////////////// | ||||
| // Less local equivalent of Geometry class in cartesian case | ||||
| ///////////////////////////////////////////////////////////////// | ||||
| class NonLocalStencilGeometry { | ||||
| public: | ||||
|   int depth; | ||||
|   int hops; | ||||
|   int npoint; | ||||
|   std::vector<Coordinate> shifts; | ||||
|   Coordinate stencil_size; | ||||
|   Coordinate stencil_lo; | ||||
|   Coordinate stencil_hi; | ||||
|   GridCartesian *grid; | ||||
|   GridCartesian *Grid() {return grid;}; | ||||
|   int Depth(void){return 1;};   // Ghost zone depth | ||||
|   int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil | ||||
|  | ||||
|   virtual int DimSkip(void) =0; | ||||
|  | ||||
|   virtual ~NonLocalStencilGeometry() {}; | ||||
|  | ||||
|   int  Reverse(int point) | ||||
|   { | ||||
|     int Nd = Grid()->Nd(); | ||||
|     Coordinate shft = shifts[point]; | ||||
|     Coordinate rev(Nd); | ||||
|     for(int mu=0;mu<Nd;mu++) rev[mu]= -shft[mu]; | ||||
|     for(int p=0;p<npoint;p++){ | ||||
|       if(rev==shifts[p]){ | ||||
| 	return p; | ||||
|       } | ||||
|     } | ||||
|     assert(0); | ||||
|     return -1; | ||||
|   } | ||||
|   void BuildShifts(void) | ||||
|   { | ||||
|     this->shifts.resize(0); | ||||
|     int Nd = this->grid->Nd(); | ||||
|  | ||||
|     int dd = this->DimSkip(); | ||||
|     for(int s0=this->stencil_lo[dd+0];s0<=this->stencil_hi[dd+0];s0++){ | ||||
|     for(int s1=this->stencil_lo[dd+1];s1<=this->stencil_hi[dd+1];s1++){ | ||||
|     for(int s2=this->stencil_lo[dd+2];s2<=this->stencil_hi[dd+2];s2++){ | ||||
|     for(int s3=this->stencil_lo[dd+3];s3<=this->stencil_hi[dd+3];s3++){ | ||||
|       Coordinate sft(Nd,0); | ||||
|       sft[dd+0] = s0; | ||||
|       sft[dd+1] = s1; | ||||
|       sft[dd+2] = s2; | ||||
|       sft[dd+3] = s3; | ||||
|       int nhops = abs(s0)+abs(s1)+abs(s2)+abs(s3); | ||||
|       if(nhops<=this->hops) this->shifts.push_back(sft); | ||||
|     }}}} | ||||
|     this->npoint = this->shifts.size(); | ||||
|     std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<<std::endl; | ||||
|   } | ||||
|    | ||||
|   NonLocalStencilGeometry(GridCartesian *_coarse_grid,int _hops) : grid(_coarse_grid), hops(_hops) | ||||
|   { | ||||
|     Coordinate latt = grid->GlobalDimensions(); | ||||
|     stencil_size.resize(grid->Nd()); | ||||
|     stencil_lo.resize(grid->Nd()); | ||||
|     stencil_hi.resize(grid->Nd()); | ||||
|     for(int d=0;d<grid->Nd();d++){ | ||||
|      if ( latt[d] == 1 ) { | ||||
|       stencil_lo[d] = 0; | ||||
|       stencil_hi[d] = 0; | ||||
|       stencil_size[d]= 1; | ||||
|      } else if ( latt[d] == 2 ) { | ||||
|       stencil_lo[d] = -1; | ||||
|       stencil_hi[d] = 0; | ||||
|       stencil_size[d]= 2; | ||||
|      } else if ( latt[d] > 2 ) { | ||||
|        stencil_lo[d] = -1; | ||||
|        stencil_hi[d] =  1; | ||||
|        stencil_size[d]= 3; | ||||
|      } | ||||
|     } | ||||
|   }; | ||||
|  | ||||
| }; | ||||
|  | ||||
| // Need to worry about red-black now | ||||
| class NonLocalStencilGeometry4D : public NonLocalStencilGeometry { | ||||
| public: | ||||
|   virtual int DimSkip(void) { return 0;}; | ||||
|   NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; | ||||
|   virtual ~NonLocalStencilGeometry4D() {}; | ||||
| }; | ||||
| class NonLocalStencilGeometry5D : public NonLocalStencilGeometry { | ||||
| public: | ||||
|   virtual int DimSkip(void) { return 1; };  | ||||
|   NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops)  { }; | ||||
|   virtual ~NonLocalStencilGeometry5D() {}; | ||||
| }; | ||||
| /* | ||||
|  * Bunch of different options classes | ||||
|  */ | ||||
| class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { | ||||
| public: | ||||
|   NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,4) | ||||
|   { | ||||
|     this->BuildShifts(); | ||||
|   }; | ||||
| }; | ||||
| class NextToNextToNextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { | ||||
| public: | ||||
|   NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,4) | ||||
|   { | ||||
|     this->BuildShifts(); | ||||
|   }; | ||||
| }; | ||||
| class NextToNearestStencilGeometry4D : public  NonLocalStencilGeometry4D { | ||||
| public: | ||||
|   NextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,2) | ||||
|   { | ||||
|     this->BuildShifts(); | ||||
|   }; | ||||
| }; | ||||
| class NextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { | ||||
| public: | ||||
|   NextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,2) | ||||
|   { | ||||
|     this->BuildShifts(); | ||||
|   }; | ||||
| }; | ||||
| class NearestStencilGeometry4D : public  NonLocalStencilGeometry4D { | ||||
| public: | ||||
|   NearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,1) | ||||
|   { | ||||
|     this->BuildShifts(); | ||||
|   }; | ||||
| }; | ||||
| class NearestStencilGeometry5D : public  NonLocalStencilGeometry5D { | ||||
| public: | ||||
|   NearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,1) | ||||
|   { | ||||
|     this->BuildShifts(); | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
							
								
								
									
										33
									
								
								Grid/algorithms/multigrid/Multigrid.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								Grid/algorithms/multigrid/Multigrid.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: Grid/algorithms/multigrid/MultiGrid.h | ||||
|  | ||||
|     Copyright (C) 2023 | ||||
|  | ||||
| Author: Peter Boyle <pboyle@bnl.gov> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #pragma once | ||||
|  | ||||
| #include <Grid/algorithms/multigrid/Aggregates.h> | ||||
| #include <Grid/algorithms/multigrid/Geometry.h> | ||||
| #include <Grid/algorithms/multigrid/CoarsenedMatrix.h> | ||||
| #include <Grid/algorithms/multigrid/GeneralCoarsenedMatrix.h> | ||||
| @@ -70,8 +70,8 @@ public: | ||||
|   Coordinate _istride;    // Inner stride i.e. within simd lane | ||||
|   int _osites;                  // _isites*_osites = product(dimensions). | ||||
|   int _isites; | ||||
|   int _fsites;                  // _isites*_osites = product(dimensions). | ||||
|   int _gsites; | ||||
|   int64_t _fsites;                  // _isites*_osites = product(dimensions). | ||||
|   int64_t _gsites; | ||||
|   Coordinate _slice_block;// subslice information | ||||
|   Coordinate _slice_stride; | ||||
|   Coordinate _slice_nblock; | ||||
| @@ -183,7 +183,7 @@ public: | ||||
|   inline int Nsimd(void)  const { return _isites; };// Synonymous with iSites | ||||
|   inline int oSites(void) const { return _osites; }; | ||||
|   inline int lSites(void) const { return _isites*_osites; };  | ||||
|   inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  | ||||
|   inline int64_t gSites(void) const { return (int64_t)_isites*(int64_t)_osites*(int64_t)_Nprocessors; };  | ||||
|   inline int Nd    (void) const { return _ndimension;}; | ||||
|  | ||||
|   inline const Coordinate LocalStarts(void)             { return _lstart;    }; | ||||
| @@ -214,7 +214,7 @@ public: | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   // Global addressing | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   void GlobalIndexToGlobalCoor(int gidx,Coordinate &gcoor){ | ||||
|   void GlobalIndexToGlobalCoor(int64_t gidx,Coordinate &gcoor){ | ||||
|     assert(gidx< gSites()); | ||||
|     Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions); | ||||
|   } | ||||
| @@ -222,7 +222,7 @@ public: | ||||
|     assert(lidx<lSites()); | ||||
|     Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); | ||||
|   } | ||||
|   void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int & gidx){ | ||||
|   void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int64_t & gidx){ | ||||
|     gidx=0; | ||||
|     int mult=1; | ||||
|     for(int mu=0;mu<_ndimension;mu++) { | ||||
|   | ||||
| @@ -360,7 +360,7 @@ public: | ||||
|  | ||||
| template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|   for(int g=0;g<o.Grid()->_gsites;g++){ | ||||
|   for(int64_t g=0;g<o.Grid()->_gsites;g++){ | ||||
|  | ||||
|     Coordinate gcoor; | ||||
|     o.Grid()->GlobalIndexToGlobalCoor(g,gcoor); | ||||
|   | ||||
| @@ -361,9 +361,14 @@ public: | ||||
|     _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1}); | ||||
|     _uid.resize(_vol,std::uniform_int_distribution<uint32_t>() ); | ||||
|   } | ||||
|  | ||||
|   template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){ | ||||
|  | ||||
|   template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist) | ||||
|   { | ||||
|     if ( l.Grid()->_isCheckerBoarded ) { | ||||
|       Lattice<vobj> tmp(_grid); | ||||
|       fill(tmp,dist); | ||||
|       pickCheckerboard(l.Checkerboard(),l,tmp); | ||||
|       return; | ||||
|     } | ||||
|     typedef typename vobj::scalar_object scalar_object; | ||||
|     typedef typename vobj::scalar_type scalar_type; | ||||
|     typedef typename vobj::vector_type vector_type; | ||||
| @@ -427,7 +432,7 @@ public: | ||||
| #if 1 | ||||
|     thread_for( lidx, _grid->lSites(), { | ||||
|  | ||||
| 	int gidx; | ||||
| 	int64_t gidx; | ||||
| 	int o_idx; | ||||
| 	int i_idx; | ||||
| 	int rank; | ||||
|   | ||||
| @@ -471,13 +471,13 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | ||||
|  | ||||
|   vobj zz = Zero(); | ||||
|    | ||||
|   accelerator_for(sc,coarse->oSites(),1,{ | ||||
|   accelerator_for(sc,coarse->oSites(),vobj::Nsimd(),{ | ||||
|  | ||||
|       // One thread per sub block | ||||
|       Coordinate coor_c(_ndimension); | ||||
|       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate | ||||
|  | ||||
|       vobj cd = zz; | ||||
|       auto cd = coalescedRead(zz); | ||||
|        | ||||
|       for(int sb=0;sb<blockVol;sb++){ | ||||
|  | ||||
| @@ -488,10 +488,10 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | ||||
| 	for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; | ||||
| 	Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); | ||||
|  | ||||
| 	cd=cd+fineData_p[sf]; | ||||
| 	cd=cd+coalescedRead(fineData_p[sf]); | ||||
|       } | ||||
|  | ||||
|       coarseData_p[sc] = cd; | ||||
|       coalescedWrite(coarseData_p[sc],cd); | ||||
|  | ||||
|     }); | ||||
|   return; | ||||
| @@ -1054,7 +1054,7 @@ void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine) | ||||
|  | ||||
|   Coordinate fcoor(nd); | ||||
|   Coordinate ccoor(nd); | ||||
|   for(int g=0;g<fg->gSites();g++){ | ||||
|   for(int64_t g=0;g<fg->gSites();g++){ | ||||
|  | ||||
|     fg->GlobalIndexToGlobalCoor(g,fcoor); | ||||
|     for(int d=0;d<nd;d++){ | ||||
|   | ||||
| @@ -63,8 +63,9 @@ public: | ||||
|     dims=_grid->Nd(); | ||||
|     AllocateGrids(); | ||||
|     Coordinate local     =unpadded_grid->LocalDimensions(); | ||||
|     Coordinate procs     =unpadded_grid->ProcessorGrid(); | ||||
|     for(int d=0;d<dims;d++){ | ||||
|       assert(local[d]>=depth); | ||||
|       if ( procs[d] > 1 ) assert(local[d]>=depth); | ||||
|     } | ||||
|   } | ||||
|   void DeleteGrids(void) | ||||
| @@ -85,8 +86,10 @@ public: | ||||
|     // expand up one dim at a time | ||||
|     for(int d=0;d<dims;d++){ | ||||
|  | ||||
|       plocal[d] += 2*depth;  | ||||
|  | ||||
|       if ( processors[d] > 1 ) {  | ||||
| 	plocal[d] += 2*depth;  | ||||
|       } | ||||
|        | ||||
|       for(int d=0;d<dims;d++){ | ||||
| 	global[d] = plocal[d]*processors[d]; | ||||
|       } | ||||
| @@ -97,11 +100,17 @@ public: | ||||
|   template<class vobj> | ||||
|   inline Lattice<vobj> Extract(const Lattice<vobj> &in) const | ||||
|   { | ||||
|     Coordinate processors=unpadded_grid->_processors; | ||||
|  | ||||
|     Lattice<vobj> out(unpadded_grid); | ||||
|  | ||||
|     Coordinate local     =unpadded_grid->LocalDimensions(); | ||||
|     Coordinate fll(dims,depth); // depends on the MPI spread | ||||
|     // depends on the MPI spread       | ||||
|     Coordinate fll(dims,depth); | ||||
|     Coordinate tll(dims,0); // depends on the MPI spread | ||||
|     for(int d=0;d<dims;d++){ | ||||
|       if( processors[d]==1 ) fll[d]=0; | ||||
|     } | ||||
|     localCopyRegion(in,out,fll,tll,local); | ||||
|     return out; | ||||
|   } | ||||
| @@ -120,6 +129,7 @@ public: | ||||
|   template<class vobj> | ||||
|   inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const | ||||
|   { | ||||
|     Coordinate processors=unpadded_grid->_processors; | ||||
|     GridBase *old_grid = in.Grid(); | ||||
|     GridCartesian *new_grid = grids[dim];//These are new grids | ||||
|     Lattice<vobj>  padded(new_grid); | ||||
| @@ -132,36 +142,48 @@ public: | ||||
|     std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl; | ||||
|  | ||||
|     double tins=0, tshift=0; | ||||
|      | ||||
|     // Middle bit | ||||
|     double t = usecond(); | ||||
|     for(int x=0;x<local[dim];x++){ | ||||
|       InsertSliceLocal(in,padded,x,depth+x,dim); | ||||
|     } | ||||
|     tins += usecond() - t; | ||||
|      | ||||
|     // High bit | ||||
|     t = usecond(); | ||||
|     shifted = cshift.Cshift(in,dim,depth); | ||||
|     tshift += usecond() - t; | ||||
|  | ||||
|     t=usecond(); | ||||
|     for(int x=0;x<depth;x++){ | ||||
|       InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim); | ||||
|     } | ||||
|     tins += usecond() - t; | ||||
|      | ||||
|     // Low bit | ||||
|     t = usecond(); | ||||
|     shifted = cshift.Cshift(in,dim,-depth); | ||||
|     tshift += usecond() - t; | ||||
|      | ||||
|     t = usecond(); | ||||
|     for(int x=0;x<depth;x++){ | ||||
|       InsertSliceLocal(shifted,padded,x,x,dim); | ||||
|     } | ||||
|     tins += usecond() - t; | ||||
|     int islocal = 0 ; | ||||
|     if ( processors[dim] == 1 ) islocal = 1; | ||||
|  | ||||
|     if ( islocal ) { | ||||
|        | ||||
|       double t = usecond(); | ||||
|       for(int x=0;x<local[dim];x++){ | ||||
| 	InsertSliceLocal(in,padded,x,x,dim); | ||||
|       } | ||||
|       tins += usecond() - t; | ||||
|        | ||||
|     } else {  | ||||
|       // Middle bit | ||||
|       double t = usecond(); | ||||
|       for(int x=0;x<local[dim];x++){ | ||||
| 	InsertSliceLocal(in,padded,x,depth+x,dim); | ||||
|       } | ||||
|       tins += usecond() - t; | ||||
|      | ||||
|       // High bit | ||||
|       t = usecond(); | ||||
|       shifted = cshift.Cshift(in,dim,depth); | ||||
|       tshift += usecond() - t; | ||||
|  | ||||
|       t=usecond(); | ||||
|       for(int x=0;x<depth;x++){ | ||||
| 	InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim); | ||||
|       } | ||||
|       tins += usecond() - t; | ||||
|      | ||||
|       // Low bit | ||||
|       t = usecond(); | ||||
|       shifted = cshift.Cshift(in,dim,-depth); | ||||
|       tshift += usecond() - t; | ||||
|      | ||||
|       t = usecond(); | ||||
|       for(int x=0;x<depth;x++){ | ||||
| 	InsertSliceLocal(shifted,padded,x,x,dim); | ||||
|       } | ||||
|       tins += usecond() - t; | ||||
|     } | ||||
|     std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl; | ||||
|      | ||||
|     return padded; | ||||
|   | ||||
| @@ -487,7 +487,7 @@ public: | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|       { //view scope | ||||
| 	autoView( gStaple_v , gStaple, AcceleratorWrite); | ||||
| 	auto gStencil_v = gStencil.View(); | ||||
| 	auto gStencil_v = gStencil.View(AcceleratorRead); | ||||
| 	 | ||||
| 	accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), { | ||||
| 	    decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss; | ||||
| @@ -1199,7 +1199,7 @@ public: | ||||
|  | ||||
|       { //view scope | ||||
| 	autoView( gStaple_v , gStaple, AcceleratorWrite); | ||||
| 	auto gStencil_v = gStencil.View(); | ||||
| 	auto gStencil_v = gStencil.View(AcceleratorRead); | ||||
|  | ||||
| 	accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), { | ||||
| 	    decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss; | ||||
|   | ||||
| @@ -1130,6 +1130,14 @@ static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths inc | ||||
| #endif | ||||
| #endif | ||||
|  | ||||
| // Fixme need coalesced read gpermute | ||||
| template<class vobj> void gpermute(vobj & inout,int perm){ | ||||
|   vobj tmp=inout; | ||||
|   if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;} | ||||
|   if (perm & 0x2 ) { permute(inout,tmp,1); tmp=inout;} | ||||
|   if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;} | ||||
|   if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;} | ||||
| } | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
|   | ||||
| @@ -46,7 +46,7 @@ class GeneralLocalStencilView { | ||||
|   accelerator_inline GeneralStencilEntry * GetEntry(int point,int osite) {  | ||||
|     return & this->_entries_p[point+this->_npoints*osite];  | ||||
|   } | ||||
|  | ||||
|   void ViewClose(void){}; | ||||
| }; | ||||
| //////////////////////////////////////// | ||||
| // The Stencil Class itself | ||||
| @@ -61,7 +61,7 @@ protected: | ||||
| public:  | ||||
|   GridBase *Grid(void) const { return _grid; } | ||||
|  | ||||
|   View_type View(void) const { | ||||
|   View_type View(int mode) const { | ||||
|     View_type accessor(*( (View_type *) this)); | ||||
|     return accessor; | ||||
|   } | ||||
|   | ||||
| @@ -137,6 +137,18 @@ inline void cuda_mem(void) | ||||
|     dim3 cu_blocks ((num1+nt-1)/nt,num2,1);				\ | ||||
|     LambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda);	\ | ||||
|   } | ||||
| #define prof_accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... )	\ | ||||
|   {									\ | ||||
|     int nt=acceleratorThreads();					\ | ||||
|     typedef uint64_t Iterator;						\ | ||||
|     auto lambda = [=] accelerator					\ | ||||
|       (Iterator iter1,Iterator iter2,Iterator lane) mutable {		\ | ||||
|       __VA_ARGS__;							\ | ||||
|     };									\ | ||||
|     dim3 cu_threads(nsimd,acceleratorThreads(),1);			\ | ||||
|     dim3 cu_blocks ((num1+nt-1)/nt,num2,1);				\ | ||||
|     ProfileLambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda); \ | ||||
|   } | ||||
|  | ||||
| #define accelerator_for6dNB(iter1, num1,				\ | ||||
|                             iter2, num2,				\ | ||||
| @@ -157,6 +169,20 @@ inline void cuda_mem(void) | ||||
|     Lambda6Apply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,num3,num4,num5,num6,lambda); \ | ||||
|   } | ||||
|  | ||||
|  | ||||
| #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... )	\ | ||||
|   {									\ | ||||
|     int nt=acceleratorThreads();					\ | ||||
|     typedef uint64_t Iterator;						\ | ||||
|     auto lambda = [=] accelerator					\ | ||||
|       (Iterator iter1,Iterator iter2,Iterator lane) mutable {		\ | ||||
|       __VA_ARGS__;							\ | ||||
|     };									\ | ||||
|     dim3 cu_threads(nsimd,acceleratorThreads(),1);			\ | ||||
|     dim3 cu_blocks ((num1+nt-1)/nt,num2,1);				\ | ||||
|     LambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda);	\ | ||||
|   } | ||||
|  | ||||
| template<typename lambda>  __global__ | ||||
| void LambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) | ||||
| { | ||||
| @@ -168,6 +194,17 @@ void LambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) | ||||
|     Lambda(x,y,z); | ||||
|   } | ||||
| } | ||||
| template<typename lambda>  __global__ | ||||
| void ProfileLambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) | ||||
| { | ||||
|   // Weird permute is to make lane coalesce for large blocks | ||||
|   uint64_t x = threadIdx.y + blockDim.y*blockIdx.x; | ||||
|   uint64_t y = threadIdx.z + blockDim.z*blockIdx.y; | ||||
|   uint64_t z = threadIdx.x; | ||||
|   if ( (x < num1) && (y<num2) && (z<num3) ) { | ||||
|     Lambda(x,y,z); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template<typename lambda>  __global__ | ||||
| void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3, | ||||
| @@ -208,6 +245,7 @@ inline void *acceleratorAllocShared(size_t bytes) | ||||
|   if( err != cudaSuccess ) { | ||||
|     ptr = (void *) NULL; | ||||
|     printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err)); | ||||
|     assert(0); | ||||
|   } | ||||
|   return ptr; | ||||
| }; | ||||
| @@ -460,6 +498,9 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); | ||||
| #if defined(GRID_SYCL) || defined(GRID_CUDA) || defined(GRID_HIP) | ||||
| // FIXME -- the non-blocking nature got broken March 30 2023 by PAB | ||||
| #define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} );   | ||||
| #define prof_accelerator_for( iter1, num1, nsimd, ... ) \ | ||||
|   prof_accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} );\ | ||||
|   accelerator_barrier(dummy); | ||||
|  | ||||
| #define accelerator_for( iter, num, nsimd, ... )		\ | ||||
|   accelerator_forNB(iter, num, nsimd, { __VA_ARGS__ } );	\ | ||||
|   | ||||
| @@ -94,6 +94,13 @@ static constexpr int MaxDims = GRID_MAX_LATTICE_DIMENSION; | ||||
|  | ||||
| typedef AcceleratorVector<int,MaxDims> Coordinate; | ||||
|  | ||||
| template<class T,int _ndim> | ||||
| inline bool operator==(const AcceleratorVector<T,_ndim> &v,const AcceleratorVector<T,_ndim> &w) | ||||
| { | ||||
|   if (v.size()!=w.size()) return false; | ||||
|   for(int i=0;i<v.size();i++) if ( v[i]!=w[i] ) return false; | ||||
|   return true; | ||||
| } | ||||
| template<class T,int _ndim> | ||||
| inline std::ostream & operator<<(std::ostream &os, const AcceleratorVector<T,_ndim> &v) | ||||
| { | ||||
|   | ||||
| @@ -8,7 +8,7 @@ namespace Grid{ | ||||
|   public: | ||||
|  | ||||
|     template<class coor_t> | ||||
|     static accelerator_inline void CoorFromIndex (coor_t& coor,int index,const coor_t &dims){ | ||||
|     static accelerator_inline void CoorFromIndex (coor_t& coor,int64_t index,const coor_t &dims){ | ||||
|       int nd= dims.size(); | ||||
|       coor.resize(nd); | ||||
|       for(int d=0;d<nd;d++){ | ||||
| @@ -18,28 +18,45 @@ namespace Grid{ | ||||
|     } | ||||
|  | ||||
|     template<class coor_t> | ||||
|     static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){ | ||||
|     static accelerator_inline void IndexFromCoor (const coor_t& coor,int64_t &index,const coor_t &dims){ | ||||
|       int nd=dims.size(); | ||||
|       int stride=1; | ||||
|       index=0; | ||||
|       for(int d=0;d<nd;d++){ | ||||
| 	index = index+stride*coor[d]; | ||||
| 	index = index+(int64_t)stride*coor[d]; | ||||
| 	stride=stride*dims[d]; | ||||
|       } | ||||
|     } | ||||
|     template<class coor_t> | ||||
|     static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){ | ||||
|       int64_t index64; | ||||
|       IndexFromCoor(coor,index64,dims); | ||||
|       assert(index64<2*1024*1024*1024LL); | ||||
|       index = (int) index64; | ||||
|     } | ||||
|  | ||||
|     template<class coor_t> | ||||
|     static inline void IndexFromCoorReversed (const coor_t& coor,int &index,const coor_t &dims){ | ||||
|     static inline void IndexFromCoorReversed (const coor_t& coor,int64_t &index,const coor_t &dims){ | ||||
|       int nd=dims.size(); | ||||
|       int stride=1; | ||||
|       index=0; | ||||
|       for(int d=nd-1;d>=0;d--){ | ||||
| 	index = index+stride*coor[d]; | ||||
| 	index = index+(int64_t)stride*coor[d]; | ||||
| 	stride=stride*dims[d]; | ||||
|       } | ||||
|     } | ||||
|     template<class coor_t> | ||||
|     static inline void CoorFromIndexReversed (coor_t& coor,int index,const coor_t &dims){ | ||||
|     static inline void IndexFromCoorReversed (const coor_t& coor,int &index,const coor_t &dims){ | ||||
|       int64_t index64; | ||||
|       IndexFromCoorReversed(coor,index64,dims); | ||||
|       if ( index64>=2*1024*1024*1024LL ){ | ||||
| 	std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "<<dims<<std::endl; | ||||
|       } | ||||
|       assert(index64<2*1024*1024*1024LL); | ||||
|       index = (int) index64; | ||||
|     } | ||||
|     template<class coor_t> | ||||
|     static inline void CoorFromIndexReversed (coor_t& coor,int64_t index,const coor_t &dims){ | ||||
|       int nd= dims.size(); | ||||
|       coor.resize(nd); | ||||
|       for(int d=nd-1;d>=0;d--){ | ||||
|   | ||||
							
								
								
									
										43
									
								
								systems/Frontier/benchmarks/bench2.slurm
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										43
									
								
								systems/Frontier/benchmarks/bench2.slurm
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,43 @@ | ||||
| #!/bin/bash -l | ||||
| #SBATCH --job-name=bench | ||||
| ##SBATCH --partition=small-g | ||||
| #SBATCH --nodes=2 | ||||
| #SBATCH --ntasks-per-node=8 | ||||
| #SBATCH --cpus-per-task=7 | ||||
| #SBATCH --gpus-per-node=8 | ||||
| #SBATCH --time=00:10:00 | ||||
| #SBATCH --account=phy157_dwf | ||||
| #SBATCH --gpu-bind=none | ||||
| #SBATCH --exclusive | ||||
| #SBATCH --mem=0 | ||||
|  | ||||
| cat << EOF > select_gpu | ||||
| #!/bin/bash | ||||
| export GPU_MAP=(0 1 2 3 7 6 5 4) | ||||
| export NUMA_MAP=(3 3 1 1 2 2 0 0) | ||||
| export GPU=\${GPU_MAP[\$SLURM_LOCALID]} | ||||
| export NUMA=\${NUMA_MAP[\$SLURM_LOCALID]} | ||||
| export HIP_VISIBLE_DEVICES=\$GPU | ||||
| unset ROCR_VISIBLE_DEVICES | ||||
| echo RANK \$SLURM_LOCALID using GPU \$GPU     | ||||
| exec numactl -m \$NUMA -N \$NUMA \$* | ||||
| EOF | ||||
|  | ||||
| chmod +x ./select_gpu | ||||
|  | ||||
| root=$HOME/Frontier/Grid/systems/Frontier/ | ||||
| source ${root}/sourceme.sh | ||||
|  | ||||
| export OMP_NUM_THREADS=7 | ||||
| export MPICH_GPU_SUPPORT_ENABLED=1 | ||||
| export MPICH_SMP_SINGLE_COPY_MODE=XPMEM | ||||
|  | ||||
| for vol in 32.32.32.64 | ||||
| do | ||||
| srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol  > log.shm0.ov.$vol | ||||
| srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol  > log.shm1.ov.$vol | ||||
|  | ||||
| srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol  > log.shm0.seq.$vol | ||||
| srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol | ||||
| done | ||||
|  | ||||
							
								
								
									
										23
									
								
								systems/Frontier/config-command
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								systems/Frontier/config-command
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-` | ||||
| ../../configure --enable-comms=mpi-auto \ | ||||
| --with-lime=$CLIME \ | ||||
| --enable-unified=no \ | ||||
| --enable-shm=nvlink \ | ||||
| --enable-tracing=timer \ | ||||
| --enable-accelerator=hip \ | ||||
| --enable-gen-simd-width=64 \ | ||||
| --disable-gparity \ | ||||
| --disable-fermion-reps \ | ||||
| --enable-simd=GPU \ | ||||
| --enable-accelerator-cshift \ | ||||
| --with-gmp=$OLCF_GMP_ROOT \ | ||||
| --with-fftw=$FFTW_DIR/.. \ | ||||
| --with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \ | ||||
| --disable-fermion-reps \ | ||||
| CXX=hipcc MPICXX=mpicxx \ | ||||
| CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 " \ | ||||
|  LDFLAGS="-L/lib64 -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 " | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
							
								
								
									
										13
									
								
								systems/Frontier/mpiwrapper.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										13
									
								
								systems/Frontier/mpiwrapper.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,13 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| lrank=$SLURM_LOCALID | ||||
| lgpu=(0 1 2 3 7 6 5 4) | ||||
|  | ||||
| export ROCR_VISIBLE_DEVICES=${lgpu[$lrank]} | ||||
|  | ||||
| echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES " | ||||
|  | ||||
| $* | ||||
|  | ||||
|  | ||||
|  | ||||
							
								
								
									
										13
									
								
								systems/Frontier/sourceme.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								systems/Frontier/sourceme.sh
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | ||||
| . /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh | ||||
| spack load c-lime | ||||
| #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/sw/crusher/spack-envs/base/opt/cray-sles15-zen3/gcc-11.2.0/gperftools-2.9.1-72ubwtuc5wcz2meqltbfdb76epufgzo2/lib | ||||
| module load emacs  | ||||
| module load PrgEnv-gnu | ||||
| module load rocm | ||||
| module load cray-mpich/8.1.23 | ||||
| module load gmp | ||||
| module load cray-fftw | ||||
| module load craype-accel-amd-gfx90a | ||||
| export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH | ||||
| #Hack for lib | ||||
| #export LD_LIBRARY_PATH=`pwd`:$LD_LIBRARY_PATH | ||||
							
								
								
									
										9
									
								
								systems/Frontier/wrap.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										9
									
								
								systems/Frontier/wrap.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| #!/bin/sh | ||||
|  | ||||
| export HIP_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES | ||||
| unset ROCR_VISIBLE_DEVICES | ||||
|  | ||||
| #rank=$SLURM_PROCID | ||||
| #rocprof -d rocprof.$rank -o rocprof.$rank/results.rank$SLURM_PROCID.csv --sys-trace $@ | ||||
|  | ||||
| $@ | ||||
| @@ -1,4 +1,3 @@ | ||||
| BREW=/opt/local/ | ||||
| MPICXX=mpicxx ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity --disable-debug | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										235
									
								
								tests/debug/Test_general_coarse.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										235
									
								
								tests/debug/Test_general_coarse.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,235 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_padded_cell.cc | ||||
|  | ||||
|     Copyright (C) 2023 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid/lattice/PaddedCell.h> | ||||
| #include <Grid/stencil/GeneralLocalStencil.h> | ||||
|  | ||||
| #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h> | ||||
| #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h> | ||||
| #include <Grid/algorithms/iterative/BiCGSTAB.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
|  | ||||
| /////////////////////// | ||||
| // Tells little dirac op to use MdagM as the .Op() | ||||
| /////////////////////// | ||||
| template<class Field> | ||||
| class HermOpAdaptor : public LinearOperatorBase<Field> | ||||
| { | ||||
|   LinearOperatorBase<Field> & wrapped; | ||||
| public: | ||||
|   HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme)  {}; | ||||
|   void OpDiag (const Field &in, Field &out) {    assert(0);  } | ||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } | ||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  }; | ||||
|   void Op     (const Field &in, Field &out){ | ||||
|     wrapped.HermOp(in,out); | ||||
|   } | ||||
|   void AdjOp     (const Field &in, Field &out){ | ||||
|     wrapped.HermOp(in,out); | ||||
|   } | ||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } | ||||
|   void HermOp(const Field &in, Field &out){ | ||||
|     wrapped.HermOp(in,out); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   const int Ls=4; | ||||
|  | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), | ||||
| 								   GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 								   GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|  | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   // Construct a coarsened grid | ||||
|   Coordinate clatt = GridDefaultLatt(); | ||||
|   for(int d=0;d<clatt.size();d++){ | ||||
|     clatt[d] = clatt[d]/2; | ||||
|   } | ||||
|  | ||||
|   GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, | ||||
| 							    GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 							    GridDefaultMpi());; | ||||
|   GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d); | ||||
|  | ||||
|   std::vector<int> seeds4({1,2,3,4}); | ||||
|   std::vector<int> seeds5({5,6,7,8}); | ||||
|   std::vector<int> cseeds({5,6,7,8}); | ||||
|   GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5); | ||||
|   GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4); | ||||
|   GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); | ||||
|  | ||||
|   LatticeFermion    src(FGrid); random(RNG5,src); | ||||
|   LatticeFermion result(FGrid); result=Zero(); | ||||
|   LatticeFermion    ref(FGrid); ref=Zero(); | ||||
|   LatticeFermion    tmp(FGrid); | ||||
|   LatticeFermion    err(FGrid); | ||||
|   LatticeGaugeField Umu(UGrid); | ||||
|   SU<Nc>::HotConfiguration(RNG4,Umu); | ||||
|   //  Umu=Zero(); | ||||
|    | ||||
|   RealD mass=0.1; | ||||
|   RealD M5=1.8; | ||||
|  | ||||
|   DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|  | ||||
|   const int nbasis = 16; | ||||
|   const int cb = 0 ; | ||||
|   LatticeFermion prom(FGrid); | ||||
|  | ||||
|   std::vector<LatticeFermion> subspace(nbasis,FGrid); | ||||
|  | ||||
|   std::cout<<GridLogMessage<<"Calling Aggregation class" <<std::endl; | ||||
|  | ||||
|   /////////////////////////////////////////////////////////// | ||||
|   // Squared operator is in HermOp | ||||
|   /////////////////////////////////////////////////////////// | ||||
|   MdagMLinearOperator<DomainWallFermionD,LatticeFermion> HermDefOp(Ddwf); | ||||
|  | ||||
|   /////////////////////////////////////////////////// | ||||
|   // Random aggregation space | ||||
|   /////////////////////////////////////////////////// | ||||
|   std::cout<<GridLogMessage << "Building random aggregation class"<< std::endl; | ||||
|   typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; | ||||
|   Subspace Aggregates(Coarse5d,FGrid,cb); | ||||
|   Aggregates.CreateSubspaceRandom(RNG5); | ||||
|  | ||||
|   /////////////////////////////////////////////////// | ||||
|   // Build little dirac op | ||||
|   /////////////////////////////////////////////////// | ||||
|   std::cout<<GridLogMessage << "Building little Dirac operator"<< std::endl; | ||||
|  | ||||
|   typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator; | ||||
|   typedef LittleDiracOperator::CoarseVector CoarseVector; | ||||
|  | ||||
|   NextToNearestStencilGeometry5D geom(Coarse5d); | ||||
|   LittleDiracOperator LittleDiracOp(geom,FGrid,Coarse5d); | ||||
|   LittleDiracOperator LittleDiracOpCol(geom,FGrid,Coarse5d); | ||||
|  | ||||
|   HermOpAdaptor<LatticeFermionD> HOA(HermDefOp); | ||||
|  | ||||
|   int pp=16; | ||||
|   LittleDiracOp.CoarsenOperator(HOA,Aggregates); | ||||
|    | ||||
|   /////////////////////////////////////////////////// | ||||
|   // Test the operator | ||||
|   /////////////////////////////////////////////////// | ||||
|   CoarseVector c_src (Coarse5d); | ||||
|   CoarseVector c_res (Coarse5d); | ||||
|   CoarseVector c_res_dag(Coarse5d); | ||||
|   CoarseVector c_proj(Coarse5d); | ||||
|  | ||||
|   subspace=Aggregates.subspace; | ||||
|  | ||||
|   //  random(CRNG,c_src); | ||||
|   c_src = 1.0; | ||||
|  | ||||
|   blockPromote(c_src,err,subspace); | ||||
|  | ||||
|   prom=Zero(); | ||||
|   for(int b=0;b<nbasis;b++){ | ||||
|     prom=prom+subspace[b]; | ||||
|   } | ||||
|   err=err-prom;  | ||||
|   std::cout<<GridLogMessage<<"Promoted back from subspace: err "<<norm2(err)<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl; | ||||
|  | ||||
|   HermDefOp.HermOp(prom,tmp); | ||||
|  | ||||
|   blockProject(c_proj,tmp,subspace); | ||||
|   std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<" Calling little Dirac Op "<<std::endl; | ||||
|   LittleDiracOp.M(c_src,c_res); | ||||
|   LittleDiracOp.Mdag(c_src,c_res_dag); | ||||
|  | ||||
|   std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"Little dop dag : "<<norm2(c_res_dag)<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl; | ||||
|  | ||||
|   c_proj = c_proj - c_res; | ||||
|   std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl; | ||||
|  | ||||
|   c_res_dag = c_res_dag - c_res; | ||||
|   std::cout<<GridLogMessage<<"Little dopDag - dop: "<<norm2(c_res_dag)<<std::endl; | ||||
|  | ||||
|   std::cout<<GridLogMessage << "Testing Hermiticity stochastically "<< std::endl; | ||||
|   CoarseVector phi(Coarse5d); | ||||
|   CoarseVector chi(Coarse5d); | ||||
|   CoarseVector Aphi(Coarse5d); | ||||
|   CoarseVector Achi(Coarse5d); | ||||
|  | ||||
|   random(CRNG,phi); | ||||
|   random(CRNG,chi); | ||||
|  | ||||
|   std::cout<<GridLogMessage<<"Made randoms "<<norm2(phi)<<" " << norm2(chi)<<std::endl; | ||||
|  | ||||
|   LittleDiracOp.M(phi,Aphi); | ||||
|  | ||||
|   LittleDiracOp.Mdag(chi,Achi); | ||||
|  | ||||
|   std::cout<<GridLogMessage<<"Aphi "<<norm2(Aphi)<<" A chi" << norm2(Achi)<<std::endl; | ||||
|  | ||||
|   ComplexD pAc = innerProduct(chi,Aphi); | ||||
|   ComplexD cAp = innerProduct(phi,Achi); | ||||
|   ComplexD cAc = innerProduct(chi,Achi); | ||||
|   ComplexD pAp = innerProduct(phi,Aphi); | ||||
|  | ||||
|   std::cout<<GridLogMessage<< "pAc "<<pAc<<" cAp "<< cAp<< " diff "<<pAc-adj(cAp)<<std::endl; | ||||
|   std::cout<<GridLogMessage<< "pAp "<<pAp<<" cAc "<< cAc<<"Should be real"<< std::endl; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<"Testing linearity"<<std::endl; | ||||
|   CoarseVector PhiPlusChi(Coarse5d); | ||||
|   CoarseVector APhiPlusChi(Coarse5d); | ||||
|   CoarseVector linerr(Coarse5d); | ||||
|   PhiPlusChi = phi+chi; | ||||
|   LittleDiracOp.M(PhiPlusChi,APhiPlusChi); | ||||
|  | ||||
|   linerr= APhiPlusChi-Aphi; | ||||
|   linerr= linerr-Achi; | ||||
|   std::cout<<GridLogMessage<<"**Diff "<<norm2(linerr)<<std::endl; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<std::endl; | ||||
|   std::cout<<GridLogMessage<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||
|    | ||||
|   Grid_finalize(); | ||||
|   return 0; | ||||
| } | ||||
							
								
								
									
										408
									
								
								tests/debug/Test_general_coarse_hdcg.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										408
									
								
								tests/debug/Test_general_coarse_hdcg.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,408 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_general_coarse_hdcg.cc | ||||
|  | ||||
|     Copyright (C) 2023 | ||||
|  | ||||
| Author: Peter Boyle <pboyle@bnl.gov> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid/lattice/PaddedCell.h> | ||||
| #include <Grid/stencil/GeneralLocalStencil.h> | ||||
| //#include <Grid/algorithms/GeneralCoarsenedMatrix.h> | ||||
| #include <Grid/algorithms/iterative/AdefGeneric.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
|  | ||||
| template<class Coarsened> | ||||
| void SaveOperator(Coarsened &Operator,std::string file) | ||||
| { | ||||
| #ifdef HAVE_LIME | ||||
|   emptyUserRecord record; | ||||
|   ScidacWriter WR(Operator.Grid()->IsBoss()); | ||||
|   assert(Operator._A.size()==Operator.geom.npoint); | ||||
|   WR.open(file); | ||||
|   for(int p=0;p<Operator._A.size();p++){ | ||||
|     auto tmp = Operator.Cell.Extract(Operator._A[p]); | ||||
|     WR.writeScidacFieldRecord(tmp,record); | ||||
|   } | ||||
|   WR.close(); | ||||
| #endif | ||||
| } | ||||
| template<class Coarsened> | ||||
| void LoadOperator(Coarsened Operator,std::string file) | ||||
| { | ||||
| #ifdef HAVE_LIME | ||||
|   emptyUserRecord record; | ||||
|   Grid::ScidacReader RD ; | ||||
|   RD.open(file); | ||||
|   assert(Operator._A.size()==Operator.geom.npoint); | ||||
|   for(int p=0;p<Operator.geom.npoint;p++){ | ||||
|     conformable(Operator._A[p].Grid(),Operator.CoarseGrid()); | ||||
|     RD.readScidacFieldRecord(Operator._A[p],record); | ||||
|   }     | ||||
|   RD.close(); | ||||
|   Operator.ExchangeCoarseLinks(); | ||||
| #endif | ||||
| } | ||||
| template<class aggregation> | ||||
| void SaveBasis(aggregation &Agg,std::string file) | ||||
| { | ||||
| #ifdef HAVE_LIME | ||||
|   emptyUserRecord record; | ||||
|   ScidacWriter WR(Agg.FineGrid->IsBoss()); | ||||
|   WR.open(file); | ||||
|   for(int b=0;b<Agg.subspace.size();b++){ | ||||
|     WR.writeScidacFieldRecord(Agg.subspace[b],record); | ||||
|   } | ||||
|   WR.close(); | ||||
| #endif | ||||
| } | ||||
| template<class aggregation> | ||||
| void LoadBasis(aggregation &Agg, std::string file) | ||||
| { | ||||
| #ifdef HAVE_LIME | ||||
|   emptyUserRecord record; | ||||
|   ScidacReader RD ; | ||||
|   RD.open(file); | ||||
|   for(int b=0;b<Agg.subspace.size();b++){ | ||||
|     RD.readScidacFieldRecord(Agg.subspace[b],record); | ||||
|   }     | ||||
|   RD.close(); | ||||
| #endif | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class Field> class TestSolver : public LinearFunction<Field> { | ||||
| public: | ||||
|   TestSolver() {}; | ||||
|   void operator() (const Field &in, Field &out){    out = Zero();  }      | ||||
| }; | ||||
|  | ||||
|  | ||||
| RealD InverseApproximation(RealD x){ | ||||
|   return 1.0/x; | ||||
| } | ||||
|  | ||||
| // Want Op in CoarsenOp to call MatPcDagMatPc | ||||
| template<class Field> | ||||
| class HermOpAdaptor : public LinearOperatorBase<Field> | ||||
| { | ||||
|   LinearOperatorBase<Field> & wrapped; | ||||
| public: | ||||
|   HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme)  {}; | ||||
|   void Op     (const Field &in, Field &out)   { wrapped.HermOp(in,out);  } | ||||
|   void HermOp(const Field &in, Field &out)    { wrapped.HermOp(in,out); } | ||||
|   void AdjOp     (const Field &in, Field &out){ wrapped.HermOp(in,out);  } | ||||
|   void OpDiag (const Field &in, Field &out)                  {    assert(0);  } | ||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } | ||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out)  {    assert(0);  }; | ||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } | ||||
| }; | ||||
| template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> | ||||
| { | ||||
| public: | ||||
|   using LinearFunction<Field>::operator(); | ||||
|   typedef LinearOperatorBase<Field> FineOperator; | ||||
|   FineOperator   & _SmootherOperator; | ||||
|   Chebyshev<Field> Cheby; | ||||
|   ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator) : | ||||
|     _SmootherOperator(SmootherOperator), | ||||
|     Cheby(_lo,_hi,_ord,InverseApproximation) | ||||
|   { | ||||
|     std::cout << GridLogMessage<<" Chebyshev smoother order "<<_ord<<" ["<<_lo<<","<<_hi<<"]"<<std::endl; | ||||
|   }; | ||||
|   void operator() (const Field &in, Field &out)  | ||||
|   { | ||||
|     Field tmp(in.Grid()); | ||||
|     tmp = in; | ||||
|     Cheby(_SmootherOperator,tmp,out); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   const int Ls=16; | ||||
|   const int nbasis = 40; | ||||
|   const int cb = 0 ; | ||||
|   RealD mass=0.01; | ||||
|   RealD M5=1.8; | ||||
|   RealD b=1.5; | ||||
|   RealD c=0.5; | ||||
|  | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), | ||||
| 								   GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 								   GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   // Construct a coarsened grid with 4^4 cell | ||||
|   Coordinate clatt = GridDefaultLatt(); | ||||
|   for(int d=0;d<clatt.size();d++){ | ||||
|     clatt[d] = clatt[d]/4; | ||||
|   } | ||||
|   GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, | ||||
| 							    GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 							    GridDefaultMpi());; | ||||
|   GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d); | ||||
|  | ||||
|   ///////////////////////// RNGs ///////////////////////////////// | ||||
|   std::vector<int> seeds4({1,2,3,4}); | ||||
|   std::vector<int> seeds5({5,6,7,8}); | ||||
|   std::vector<int> cseeds({5,6,7,8}); | ||||
|  | ||||
|   GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5); | ||||
|   GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4); | ||||
|   GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); | ||||
|  | ||||
|   ///////////////////////// Configuration ///////////////////////////////// | ||||
|   LatticeGaugeField Umu(UGrid); | ||||
|  | ||||
|   FieldMetaData header; | ||||
|   std::string file("ckpoint_lat.4000"); | ||||
|   NerscIO::readConfiguration(Umu,header,file); | ||||
|  | ||||
|   //////////////////////// Fermion action ////////////////////////////////// | ||||
|   MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); | ||||
|  | ||||
|   SchurDiagMooeeOperator<MobiusFermionD, LatticeFermion> HermOpEO(Ddwf); | ||||
|  | ||||
|   typedef HermOpAdaptor<LatticeFermionD> HermFineMatrix; | ||||
|   HermFineMatrix FineHermOp(HermOpEO); | ||||
|    | ||||
|   LatticeFermion result(FrbGrid); result=Zero(); | ||||
|  | ||||
|   LatticeFermion    src(FrbGrid); random(RNG5,src); | ||||
|  | ||||
|   // Run power method on FineHermOp | ||||
|   PowerMethod<LatticeFermion>       PM;   PM(HermOpEO,src); | ||||
|  | ||||
|   | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   ///////////// Coarse basis and Little Dirac Operator /////// | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator; | ||||
|   typedef LittleDiracOperator::CoarseVector CoarseVector; | ||||
|  | ||||
|   NextToNextToNextToNearestStencilGeometry5D geom(Coarse5d); | ||||
|   NearestStencilGeometry5D geom_nn(Coarse5d); | ||||
|    | ||||
|   // Warning: This routine calls PVdagM.Op, not PVdagM.HermOp | ||||
|   typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; | ||||
|   Subspace Aggregates(Coarse5d,FrbGrid,cb); | ||||
|  | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   // Need to check about red-black grid coarsening | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d); | ||||
|  | ||||
|   bool load=false; | ||||
|   if ( load ) { | ||||
|     LoadBasis(Aggregates,"Subspace.scidac"); | ||||
|     LoadOperator(LittleDiracOp,"LittleDiracOp.scidac"); | ||||
|   } else { | ||||
|     Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis, | ||||
| 				       95.0,0.1, | ||||
| 				       //				     400,200,200 -- 48 iters | ||||
| 				       //				     600,200,200 -- 38 iters, 162s | ||||
| 				       //				     600,200,100 -- 38 iters, 169s | ||||
| 				       //				     600,200,50  -- 88 iters. 370s  | ||||
| 				       600, | ||||
| 				       200, | ||||
| 				       100, | ||||
| 				       0.0); | ||||
|     LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates); | ||||
|     SaveBasis(Aggregates,"Subspace.scidac"); | ||||
|     SaveOperator(LittleDiracOp,"LittleDiracOp.scidac"); | ||||
|   } | ||||
|    | ||||
|   // Try projecting to one hop only | ||||
|   LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d); | ||||
|   LittleDiracOpProj.ProjectNearestNeighbour(0.2,LittleDiracOp); | ||||
|  | ||||
|   typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix; | ||||
|   HermMatrix CoarseOp (LittleDiracOp); | ||||
|   HermMatrix CoarseOpProj (LittleDiracOpProj); | ||||
|    | ||||
|   ////////////////////////////////////////// | ||||
|   // Build a coarse lanczos | ||||
|   ////////////////////////////////////////// | ||||
|   Chebyshev<CoarseVector>      IRLCheby(0.5,60.0,71);  // 1 iter | ||||
|   FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp); | ||||
|   PlainHermOp<CoarseVector>    IRLOp    (CoarseOp); | ||||
|   int Nk=48; | ||||
|   int Nm=64; | ||||
|   int Nstop=Nk; | ||||
|   ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-5,20); | ||||
|  | ||||
|   int Nconv; | ||||
|   std::vector<RealD>            eval(Nm); | ||||
|   std::vector<CoarseVector>     evec(Nm,Coarse5d); | ||||
|   CoarseVector c_src(Coarse5d); c_src=1.0; | ||||
|   CoarseVector c_res(Coarse5d);  | ||||
|   CoarseVector c_ref(Coarse5d);  | ||||
|  | ||||
|   PowerMethod<CoarseVector>       cPM;   cPM(CoarseOp,c_src); | ||||
|  | ||||
|   IRL.calc(eval,evec,c_src,Nconv); | ||||
|   DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval); | ||||
|  | ||||
|    | ||||
|   ////////////////////////////////////////// | ||||
|   // Build a coarse space solver | ||||
|   ////////////////////////////////////////// | ||||
|   int maxit=20000; | ||||
|   ConjugateGradient<CoarseVector>  CG(1.0e-8,maxit,false); | ||||
|   ConjugateGradient<LatticeFermionD>  CGfine(1.0e-8,10000,false); | ||||
|   ZeroGuesser<CoarseVector> CoarseZeroGuesser; | ||||
|  | ||||
|   //  HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser); | ||||
|   HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser); | ||||
|   c_res=Zero(); | ||||
|   HPDSolve(c_src,c_res); c_ref = c_res; | ||||
|  | ||||
|   ////////////////////////////////////////////////////////////////////////// | ||||
|   // Deflated (with real op EV's) solve for the projected coarse op | ||||
|   // Work towards ADEF1 in the coarse space | ||||
|   ////////////////////////////////////////////////////////////////////////// | ||||
|   HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser); | ||||
|   c_res=Zero(); | ||||
|   HPDSolveProj(c_src,c_res); | ||||
|   c_res = c_res - c_ref; | ||||
|   std::cout << "Projected solver error "<<norm2(c_res)<<std::endl; | ||||
|  | ||||
|   ////////////////////////////////////////////////////////////////////// | ||||
|   // Coarse ADEF1 with deflation space | ||||
|   ////////////////////////////////////////////////////////////////////// | ||||
|   ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(4.0,45.,16,CoarseOpProj);  // 311 | ||||
|   //  ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj);  // 311 | ||||
|  | ||||
|   //////////////////////////////////////////////////////// | ||||
|   // CG, Cheby mode spacing 200,200 | ||||
|   // Unprojected Coarse CG solve to 1e-8 : 190 iters, 4.9s | ||||
|   // Unprojected Coarse CG solve to 4e-2 :  33 iters, 0.8s | ||||
|   // Projected Coarse CG solve to 1e-8 : 100 iters, 0.36s | ||||
|   //////////////////////////////////////////////////////// | ||||
|   // CoarseSmoother(1.0,48.,8,CoarseOpProj); 48 evecs  | ||||
|   //////////////////////////////////////////////////////// | ||||
|   // ADEF1 Coarse solve to 1e-8 : 44 iters, 2.34s  2.1x gain | ||||
|   // ADEF1 Coarse solve to 4e-2 : 7 iters, 0.4s | ||||
|   // HDCG 38 iters 162s | ||||
|   // | ||||
|   // CoarseSmoother(1.0,40.,8,CoarseOpProj); 48 evecs  | ||||
|   // ADEF1 Coarse solve to 1e-8 : 37 iters, 2.0s  2.1x gain | ||||
|   // ADEF1 Coarse solve to 4e-2 : 6 iters, 0.36s | ||||
|   // HDCG 38 iters 169s | ||||
|  | ||||
|   TwoLevelADEF1defl<CoarseVector> | ||||
|     cADEF1(1.0e-8, 100, | ||||
| 	   CoarseOp, | ||||
| 	   CoarseSmoother, | ||||
| 	   evec,eval); | ||||
|  | ||||
|   c_res=Zero(); | ||||
|   cADEF1(c_src,c_res); | ||||
|   c_res = c_res - c_ref; | ||||
|   std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; | ||||
|    | ||||
|   cADEF1.Tolerance = 1.0e-9; | ||||
|   c_res=Zero(); | ||||
|   cADEF1(c_src,c_res); | ||||
|   c_res = c_res - c_ref; | ||||
|   std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; | ||||
|    | ||||
|   ////////////////////////////////////////// | ||||
|   // Build a smoother | ||||
|   ////////////////////////////////////////// | ||||
|   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(10.0,100.0,10,FineHermOp); //499 | ||||
|   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(3.0,100.0,10,FineHermOp);  //383 | ||||
|   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(1.0,100.0,10,FineHermOp);  //328 | ||||
|   //  std::vector<RealD> los({0.5,1.0,3.0}); // 147/142/146 nbasis 1 | ||||
|   //  std::vector<RealD> los({1.0,2.0}); // Nbasis 24: 88,86 iterations | ||||
|   //  std::vector<RealD> los({2.0,4.0}); // Nbasis 32 == 52, iters | ||||
|   //  std::vector<RealD> los({2.0,4.0}); // Nbasis 40 == 36,36 iters | ||||
|  | ||||
|   // | ||||
|   // Turns approx 2700 iterations into 340 fine multiplies with Nbasis 40 | ||||
|   // Need to measure cost of coarse space. | ||||
|   // | ||||
|   // -- i) Reduce coarse residual   -- 0.04 | ||||
|   // -- ii) Lanczos on coarse space -- done | ||||
|   // -- iii) Possible 1 hop project and/or preconditioning it - easy - PrecCG it and | ||||
|   //         use a limited stencil. Reread BFM code to check on evecs / deflation strategy with prec | ||||
|   // | ||||
|   std::vector<RealD> los({3.0}); // Nbasis 40 == 36,36 iters | ||||
|  | ||||
|   //  std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults) | ||||
|   std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)   | ||||
|  | ||||
|   for(int l=0;l<los.size();l++){ | ||||
|  | ||||
|     RealD lo = los[l]; | ||||
|  | ||||
|     for(int o=0;o<ords.size();o++){ | ||||
|  | ||||
|       ConjugateGradient<CoarseVector>  CGsloppy(4.0e-2,maxit,false); | ||||
|       HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser); | ||||
|        | ||||
|       //    ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case | ||||
|       ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,ords[o],FineHermOp);  // 311 | ||||
|  | ||||
|       ////////////////////////////////////////// | ||||
|       // Build a HDCG solver | ||||
|       ////////////////////////////////////////// | ||||
|       TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> | ||||
| 	HDCG(1.0e-8, 3000, | ||||
| 	     FineHermOp, | ||||
| 	     Smoother, | ||||
| 	     HPDSolveSloppy, | ||||
| 	     HPDSolve, | ||||
| 	     Aggregates); | ||||
|  | ||||
|       result=Zero(); | ||||
|       HDCG(src,result); | ||||
|  | ||||
|       TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> | ||||
| 	HDCGdefl(1.0e-8, 100, | ||||
| 		 FineHermOp, | ||||
| 		 Smoother, | ||||
| 		 cADEF1, | ||||
| 		 HPDSolve, | ||||
| 		 Aggregates); | ||||
|        | ||||
|       result=Zero(); | ||||
|       HDCGdefl(src,result); | ||||
|        | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Standard CG | ||||
|   result=Zero(); | ||||
|   CGfine(HermOpEO, src, result); | ||||
|    | ||||
|   Grid_finalize(); | ||||
|   return 0; | ||||
| } | ||||
							
								
								
									
										267
									
								
								tests/debug/Test_general_coarse_pvdagm.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										267
									
								
								tests/debug/Test_general_coarse_pvdagm.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,267 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_padded_cell.cc | ||||
|  | ||||
|     Copyright (C) 2023 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid/lattice/PaddedCell.h> | ||||
| #include <Grid/stencil/GeneralLocalStencil.h> | ||||
|  | ||||
| #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h> | ||||
| #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h> | ||||
| #include <Grid/algorithms/iterative/BiCGSTAB.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
|  | ||||
| template<class Field> | ||||
| class HermOpAdaptor : public LinearOperatorBase<Field> | ||||
| { | ||||
|   LinearOperatorBase<Field> & wrapped; | ||||
| public: | ||||
|   HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme)  {}; | ||||
|   void OpDiag (const Field &in, Field &out) {    assert(0);  } | ||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } | ||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  }; | ||||
|   void Op     (const Field &in, Field &out){ | ||||
|     wrapped.HermOp(in,out); | ||||
|   } | ||||
|   void AdjOp     (const Field &in, Field &out){ | ||||
|     wrapped.HermOp(in,out); | ||||
|   } | ||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } | ||||
|   void HermOp(const Field &in, Field &out){ | ||||
|     wrapped.HermOp(in,out); | ||||
|   } | ||||
|    | ||||
| }; | ||||
|  | ||||
| template<class Matrix,class Field> | ||||
| class PVdagMLinearOperator : public LinearOperatorBase<Field> { | ||||
|   Matrix &_Mat; | ||||
|   Matrix &_PV; | ||||
| public: | ||||
|   PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){}; | ||||
|  | ||||
|   void OpDiag (const Field &in, Field &out) {    assert(0);  } | ||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } | ||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  }; | ||||
|   void Op     (const Field &in, Field &out){ | ||||
|     Field tmp(in.Grid()); | ||||
|     _Mat.M(in,tmp); | ||||
|     _PV.Mdag(tmp,out); | ||||
|   } | ||||
|   void AdjOp     (const Field &in, Field &out){ | ||||
|     Field tmp(in.Grid()); | ||||
|     _PV.M(tmp,out); | ||||
|     _Mat.Mdag(in,tmp); | ||||
|   } | ||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } | ||||
|   void HermOp(const Field &in, Field &out){ | ||||
|     std::cout << "HermOp"<<std::endl; | ||||
|     Field tmp(in.Grid()); | ||||
|     _Mat.M(in,tmp); | ||||
|     _PV.Mdag(tmp,out); | ||||
|     _PV.M(out,tmp); | ||||
|     _Mat.Mdag(tmp,out); | ||||
|     std::cout << "HermOp done "<<norm2(out)<<std::endl; | ||||
|      | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class Field> class DumbOperator  : public LinearOperatorBase<Field> { | ||||
| public: | ||||
|   LatticeComplex scale; | ||||
|   DumbOperator(GridBase *grid) : scale(grid) | ||||
|   { | ||||
|     scale = 0.0; | ||||
|     LatticeComplex scalesft(grid); | ||||
|     LatticeComplex scaletmp(grid); | ||||
|     for(int d=0;d<4;d++){ | ||||
|       Lattice<iScalar<vInteger> > x(grid); LatticeCoordinate(x,d+1); | ||||
|       LatticeCoordinate(scaletmp,d+1); | ||||
|       scalesft = Cshift(scaletmp,d+1,1); | ||||
|       scale = 100.0*scale + where( mod(x    ,2)==(Integer)0, scalesft,scaletmp); | ||||
|     } | ||||
|     std::cout << " scale\n" << scale << std::endl; | ||||
|   } | ||||
|   // Support for coarsening to a multigrid | ||||
|   void OpDiag (const Field &in, Field &out) {}; | ||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp){}; | ||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out) {}; | ||||
|  | ||||
|   void Op     (const Field &in, Field &out){ | ||||
|     out = scale * in; | ||||
|   } | ||||
|   void AdjOp  (const Field &in, Field &out){ | ||||
|     out = scale * in; | ||||
|   } | ||||
|   void HermOp(const Field &in, Field &out){ | ||||
|     double n1, n2; | ||||
|     HermOpAndNorm(in,out,n1,n2); | ||||
|   } | ||||
|   void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){ | ||||
|     ComplexD dot; | ||||
|  | ||||
|     out = scale * in; | ||||
|  | ||||
|     dot= innerProduct(in,out); | ||||
|     n1=real(dot); | ||||
|  | ||||
|     dot = innerProduct(out,out); | ||||
|     n2=real(dot); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   const int Ls=2; | ||||
|  | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|  | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   // Construct a coarsened grid | ||||
|   Coordinate clatt = GridDefaultLatt(); | ||||
|   for(int d=0;d<clatt.size();d++){ | ||||
|     clatt[d] = clatt[d]/4; | ||||
|   } | ||||
|   GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());; | ||||
|   GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d); | ||||
|  | ||||
|   std::vector<int> seeds4({1,2,3,4}); | ||||
|   std::vector<int> seeds5({5,6,7,8}); | ||||
|   std::vector<int> cseeds({5,6,7,8}); | ||||
|   GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5); | ||||
|   GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4); | ||||
|   GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); | ||||
|  | ||||
|   LatticeFermion    src(FGrid); random(RNG5,src); | ||||
|   LatticeFermion result(FGrid); result=Zero(); | ||||
|   LatticeFermion    ref(FGrid); ref=Zero(); | ||||
|   LatticeFermion    tmp(FGrid); | ||||
|   LatticeFermion    err(FGrid); | ||||
|   LatticeGaugeField Umu(UGrid); | ||||
|  | ||||
|   FieldMetaData header; | ||||
|   std::string file("ckpoint_lat.4000"); | ||||
|   NerscIO::readConfiguration(Umu,header,file); | ||||
|   //Umu = 1.0; | ||||
|    | ||||
|   RealD mass=0.5; | ||||
|   RealD M5=1.8; | ||||
|  | ||||
|   DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|   DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5); | ||||
|  | ||||
|   const int nbasis = 1; | ||||
|   const int cb = 0 ; | ||||
|   LatticeFermion prom(FGrid); | ||||
|  | ||||
|   typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator; | ||||
|   typedef LittleDiracOperator::CoarseVector CoarseVector; | ||||
|  | ||||
|   NextToNearestStencilGeometry5D geom(Coarse5d); | ||||
|  | ||||
|   std::cout<<GridLogMessage<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||
|   std::cout<<GridLogMessage<<std::endl; | ||||
|    | ||||
|   PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM(Ddwf,Dpv); | ||||
|   HermOpAdaptor<LatticeFermionD> HOA(PVdagM); | ||||
|  | ||||
|   // Run power method on HOA?? | ||||
|   PowerMethod<LatticeFermion>       PM;   PM(HOA,src); | ||||
|   | ||||
|   // Warning: This routine calls PVdagM.Op, not PVdagM.HermOp | ||||
|   typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; | ||||
|   Subspace AggregatesPD(Coarse5d,FGrid,cb); | ||||
|   AggregatesPD.CreateSubspaceChebyshev(RNG5, | ||||
| 				       HOA, | ||||
| 				       nbasis, | ||||
| 				       5000.0, | ||||
| 				       0.02, | ||||
| 				       100, | ||||
| 				       50, | ||||
| 				       50, | ||||
| 				       0.0); | ||||
|    | ||||
|   LittleDiracOperator LittleDiracOpPV(geom,FGrid,Coarse5d); | ||||
|   LittleDiracOpPV.CoarsenOperator(PVdagM,AggregatesPD); | ||||
|  | ||||
|   std::cout<<GridLogMessage<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||
|   std::cout<<GridLogMessage<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"Testing coarsened operator "<<std::endl; | ||||
|    | ||||
|   CoarseVector c_src (Coarse5d); | ||||
|   CoarseVector c_res (Coarse5d); | ||||
|   CoarseVector c_proj(Coarse5d); | ||||
|  | ||||
|   std::vector<LatticeFermion> subspace(nbasis,FGrid); | ||||
|   subspace=AggregatesPD.subspace; | ||||
|  | ||||
|   Complex one(1.0); | ||||
|   c_src = one;  // 1 in every element for vector 1. | ||||
|   blockPromote(c_src,err,subspace); | ||||
|  | ||||
|   prom=Zero(); | ||||
|   for(int b=0;b<nbasis;b++){ | ||||
|     prom=prom+subspace[b]; | ||||
|   } | ||||
|   err=err-prom;  | ||||
|   std::cout<<GridLogMessage<<"Promoted back from subspace: err "<<norm2(err)<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl; | ||||
|  | ||||
|   PVdagM.Op(prom,tmp); | ||||
|   blockProject(c_proj,tmp,subspace); | ||||
|   std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl; | ||||
|  | ||||
|   LittleDiracOpPV.M(c_src,c_res); | ||||
|   std::cout<<GridLogMessage<<" Called Little Dirac Op c_src "<< norm2(c_src) << "  c_res "<< norm2(c_res) <<std::endl; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl; | ||||
|   //  std::cout<<GridLogMessage<<" Little "<< c_res<<std::endl; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl; | ||||
|   //  std::cout<<GridLogMessage<<" Big "<< c_proj<<std::endl; | ||||
|   c_proj = c_proj - c_res; | ||||
|   std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl; | ||||
|   //  std::cout<<GridLogMessage<<" error "<< c_proj<<std::endl; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||
|   std::cout<<GridLogMessage<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Done "<< std::endl; | ||||
|  | ||||
|   Grid_finalize(); | ||||
|   return 0; | ||||
| } | ||||
		Reference in New Issue
	
	Block a user