mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 12:04:33 +00:00 
			
		
		
		
	Compare commits
	
		
			49 Commits
		
	
	
		
			fix/HOST_N
			...
			b58fd80379
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | b58fd80379 | ||
|  | 7f6e0f57d0 | ||
|  | cae27678d8 | ||
|  | 48ff655bad | ||
|  | 2525ad4623 | ||
|  | e7020017c5 | ||
|  | eacebfad74 | ||
|  | 3bc2da5321 | ||
|  | 2d710d6bfd | ||
|  | 6532b7f32b | ||
|  | 7b41b92d99 | ||
|  | dd557af84b | ||
|  | 59b9d0e030 | ||
|  | b82eee4733 | ||
|  | 6a87487544 | ||
|  | fcf5023845 | ||
|  | c8adad6d8b | ||
|  | 737d3ffb98 | ||
|  | b01e67bab1 | ||
|  | 8a70314f54 | ||
|  | 36ae6e5aba | ||
|  | 9db585cfeb | ||
|  | c564611ba7 | ||
|  | e187bcb85c | ||
|  | be18ffe3b4 | ||
|  | 0d63dce4e2 | ||
|  | 26b30e1551 | ||
|  | 7fc58ac293 | ||
|  | 3a86cce8c1 | ||
|  | 37884d369f | ||
|  | 9246e653cd | ||
|  | 64283c8673 | ||
|  | 755002da9c | ||
|  | 31b8e8b437 | ||
|  | 0ec0de97e6 | ||
|  | 6c3ade5d89 | ||
|  | 980c5f9a34 | ||
|  | 471ca5f281 | ||
|  | e82ddcff5d | ||
|  | b9dcad89e8 | ||
|  | 993f43ef4a | ||
|  | 2b43308208 | ||
|  | 04a1ac3a76 | ||
|  | 990b8798bd | ||
|  | b334a73a44 | ||
|  | 5d113d1c70 | ||
|  | c14977aeab | ||
|  | 3e94838204 | ||
|  | c0a0b8ca62 | 
| @@ -69,7 +69,8 @@ NAMESPACE_CHECK(BiCGSTAB); | |||||||
| #include <Grid/algorithms/iterative/PowerMethod.h> | #include <Grid/algorithms/iterative/PowerMethod.h> | ||||||
|  |  | ||||||
| NAMESPACE_CHECK(PowerMethod); | NAMESPACE_CHECK(PowerMethod); | ||||||
| #include <Grid/algorithms/CoarsenedMatrix.h> | #include <Grid/algorithms/multigrid/MultiGrid.h> | ||||||
|  |  | ||||||
| NAMESPACE_CHECK(CoarsendMatrix); | NAMESPACE_CHECK(CoarsendMatrix); | ||||||
| #include <Grid/algorithms/FFT.h> | #include <Grid/algorithms/FFT.h> | ||||||
|  |  | ||||||
|   | |||||||
| @@ -123,7 +123,7 @@ public: | |||||||
| }; | }; | ||||||
|    |    | ||||||
| template<class Fobj,class CComplex,int nbasis> | template<class Fobj,class CComplex,int nbasis> | ||||||
| class Aggregation   { | class Aggregation { | ||||||
| public: | public: | ||||||
|   typedef iVector<CComplex,nbasis >             siteVector; |   typedef iVector<CComplex,nbasis >             siteVector; | ||||||
|   typedef Lattice<siteVector>                 CoarseVector; |   typedef Lattice<siteVector>                 CoarseVector; | ||||||
| @@ -158,7 +158,20 @@ public: | |||||||
|     blockPromote(CoarseVec,FineVec,subspace); |     blockPromote(CoarseVec,FineVec,subspace); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) { |   virtual void CreateSubspaceRandom(GridParallelRNG  &RNG) { | ||||||
|  |     int nn=nbasis; | ||||||
|  |     RealD scale; | ||||||
|  |     FineField noise(FineGrid); | ||||||
|  |     for(int b=0;b<nn;b++){ | ||||||
|  |       subspace[b] = Zero(); | ||||||
|  |       gaussian(RNG,noise); | ||||||
|  |       scale = std::pow(norm2(noise),-0.5);  | ||||||
|  |       noise=noise*scale; | ||||||
|  |       subspace[b] = noise; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) | ||||||
|  |   { | ||||||
|  |  | ||||||
|     RealD scale; |     RealD scale; | ||||||
|  |  | ||||||
| @@ -217,6 +230,11 @@ public: | |||||||
|     scale = std::pow(norm2(noise),-0.5);  |     scale = std::pow(norm2(noise),-0.5);  | ||||||
|     noise=noise*scale; |     noise=noise*scale; | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage<<" Chebyshev subspace pass-1 : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<<" Chebyshev subspace pass-2 : nbasis"<<nn<<" min " | ||||||
|  | 	      <<ordermin<<" step "<<orderstep | ||||||
|  | 	      <<" lo"<<filterlo<<std::endl; | ||||||
|  |  | ||||||
|     // Initial matrix element |     // Initial matrix element | ||||||
|     hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; |     hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; | ||||||
|  |  | ||||||
| @@ -290,6 +308,44 @@ public: | |||||||
|     } |     } | ||||||
|     assert(b==nn); |     assert(b==nn); | ||||||
|   } |   } | ||||||
|  |   virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop, | ||||||
|  | 				       int nn, | ||||||
|  | 				       double hi, | ||||||
|  | 				       double lo, | ||||||
|  | 				       int orderfilter | ||||||
|  | 				       ) { | ||||||
|  |  | ||||||
|  |     RealD scale; | ||||||
|  |  | ||||||
|  |     FineField noise(FineGrid); | ||||||
|  |     FineField Mn(FineGrid); | ||||||
|  |     FineField tmp(FineGrid); | ||||||
|  |  | ||||||
|  |     // New normalised noise | ||||||
|  |     std::cout << GridLogMessage<<" Chebyshev subspace pure noise : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<<" Chebyshev subspace pure noise  : nbasis "<<nn<<std::endl; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     for(int b =0;b<nbasis;b++) | ||||||
|  |     { | ||||||
|  |       gaussian(RNG,noise); | ||||||
|  |       scale = std::pow(norm2(noise),-0.5);  | ||||||
|  |       noise=noise*scale; | ||||||
|  |  | ||||||
|  |       // Initial matrix element | ||||||
|  |       hermop.Op(noise,Mn); | ||||||
|  |       if(b==0) std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; | ||||||
|  |       // Filter | ||||||
|  |       Chebyshev<FineField> Cheb(lo,hi,orderfilter); | ||||||
|  |       Cheb(hermop,noise,Mn); | ||||||
|  |       // normalise | ||||||
|  |       scale = std::pow(norm2(Mn),-0.5); 	Mn=Mn*scale; | ||||||
|  |       subspace[b]   = Mn; | ||||||
|  |       hermop.Op(Mn,tmp);  | ||||||
|  |       std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										573
									
								
								Grid/algorithms/GeneralCoarsenedMatrix.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										573
									
								
								Grid/algorithms/GeneralCoarsenedMatrix.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,573 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <pboyle@bnl.gov> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No) | ||||||
|  |  | ||||||
|  | #include <Grid/lattice/PaddedCell.h> | ||||||
|  | #include <Grid/stencil/GeneralLocalStencil.h> | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | // Fixme need coalesced read gpermute | ||||||
|  | template<class vobj> void gpermute(vobj & inout,int perm){ | ||||||
|  |   vobj tmp=inout; | ||||||
|  |   if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;} | ||||||
|  |   if (perm & 0x2 ) { permute(inout,tmp,1); tmp=inout;} | ||||||
|  |   if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;} | ||||||
|  |   if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | ///////////////////////////////////////////////////////////////// | ||||||
|  | // Reuse Aggregation class from CoarsenedMatrix for now | ||||||
|  | // Might think about *smoothed* Aggregation | ||||||
|  | // Equivalent of Geometry class in cartesian case | ||||||
|  | ///////////////////////////////////////////////////////////////// | ||||||
|  | class NonLocalStencilGeometry { | ||||||
|  | public: | ||||||
|  |   int depth; | ||||||
|  |   int hops; | ||||||
|  |   int npoint; | ||||||
|  |   std::vector<Coordinate> shifts; | ||||||
|  |   Coordinate stencil_size; | ||||||
|  |   Coordinate stencil_lo; | ||||||
|  |   Coordinate stencil_hi; | ||||||
|  |   GridCartesian *grid; | ||||||
|  |   GridCartesian *Grid() {return grid;}; | ||||||
|  |   int Depth(void){return 1;};   // Ghost zone depth | ||||||
|  |   int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil | ||||||
|  |  | ||||||
|  |   virtual int DimSkip(void) =0; | ||||||
|  |  | ||||||
|  |   virtual ~NonLocalStencilGeometry() {}; | ||||||
|  |  | ||||||
|  |   int  Reverse(int point) | ||||||
|  |   { | ||||||
|  |     int Nd = Grid()->Nd(); | ||||||
|  |     Coordinate shft = shifts[point]; | ||||||
|  |     Coordinate rev(Nd); | ||||||
|  |     for(int mu=0;mu<Nd;mu++) rev[mu]= -shft[mu]; | ||||||
|  |     for(int p=0;p<npoint;p++){ | ||||||
|  |       if(rev==shifts[p]){ | ||||||
|  | 	return p; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     assert(0); | ||||||
|  |     return -1; | ||||||
|  |   } | ||||||
|  |   void BuildShifts(void) | ||||||
|  |   { | ||||||
|  |     this->shifts.resize(0); | ||||||
|  |     int Nd = this->grid->Nd(); | ||||||
|  |  | ||||||
|  |     int dd = this->DimSkip(); | ||||||
|  |     for(int s0=this->stencil_lo[dd+0];s0<=this->stencil_hi[dd+0];s0++){ | ||||||
|  |     for(int s1=this->stencil_lo[dd+1];s1<=this->stencil_hi[dd+1];s1++){ | ||||||
|  |     for(int s2=this->stencil_lo[dd+2];s2<=this->stencil_hi[dd+2];s2++){ | ||||||
|  |     for(int s3=this->stencil_lo[dd+3];s3<=this->stencil_hi[dd+3];s3++){ | ||||||
|  |       Coordinate sft(Nd,0); | ||||||
|  |       sft[dd+0] = s0; | ||||||
|  |       sft[dd+1] = s1; | ||||||
|  |       sft[dd+2] = s2; | ||||||
|  |       sft[dd+3] = s3; | ||||||
|  |       int nhops = abs(s0)+abs(s1)+abs(s2)+abs(s3); | ||||||
|  |       if(nhops<=this->hops) this->shifts.push_back(sft); | ||||||
|  |     }}}} | ||||||
|  |     this->npoint = this->shifts.size(); | ||||||
|  |     std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<<std::endl; | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   NonLocalStencilGeometry(GridCartesian *_coarse_grid,int _hops) : grid(_coarse_grid), hops(_hops) | ||||||
|  |   { | ||||||
|  |     Coordinate latt = grid->GlobalDimensions(); | ||||||
|  |     stencil_size.resize(grid->Nd()); | ||||||
|  |     stencil_lo.resize(grid->Nd()); | ||||||
|  |     stencil_hi.resize(grid->Nd()); | ||||||
|  |     for(int d=0;d<grid->Nd();d++){ | ||||||
|  |      if ( latt[d] == 1 ) { | ||||||
|  |       stencil_lo[d] = 0; | ||||||
|  |       stencil_hi[d] = 0; | ||||||
|  |       stencil_size[d]= 1; | ||||||
|  |      } else if ( latt[d] == 2 ) { | ||||||
|  |       stencil_lo[d] = -1; | ||||||
|  |       stencil_hi[d] = 0; | ||||||
|  |       stencil_size[d]= 2; | ||||||
|  |      } else if ( latt[d] > 2 ) { | ||||||
|  |        stencil_lo[d] = -1; | ||||||
|  |        stencil_hi[d] =  1; | ||||||
|  |        stencil_size[d]= 3; | ||||||
|  |      } | ||||||
|  |     } | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Need to worry about red-black now | ||||||
|  | class NonLocalStencilGeometry4D : public NonLocalStencilGeometry { | ||||||
|  | public: | ||||||
|  |   virtual int DimSkip(void) { return 0;}; | ||||||
|  |   NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; | ||||||
|  |   virtual ~NonLocalStencilGeometry4D() {}; | ||||||
|  | }; | ||||||
|  | class NonLocalStencilGeometry5D : public NonLocalStencilGeometry { | ||||||
|  | public: | ||||||
|  |   virtual int DimSkip(void) { return 1; };  | ||||||
|  |   NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops)  { }; | ||||||
|  |   virtual ~NonLocalStencilGeometry5D() {}; | ||||||
|  | }; | ||||||
|  | /* | ||||||
|  |  * Bunch of different options classes | ||||||
|  |  */ | ||||||
|  | class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { | ||||||
|  | public: | ||||||
|  |   NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,4) | ||||||
|  |   { | ||||||
|  |     this->BuildShifts(); | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | class NextToNextToNextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { | ||||||
|  | public: | ||||||
|  |   NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,4) | ||||||
|  |   { | ||||||
|  |     this->BuildShifts(); | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | class NextToNearestStencilGeometry4D : public  NonLocalStencilGeometry4D { | ||||||
|  | public: | ||||||
|  |   NextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,2) | ||||||
|  |   { | ||||||
|  |     this->BuildShifts(); | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | class NextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { | ||||||
|  | public: | ||||||
|  |   NextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,2) | ||||||
|  |   { | ||||||
|  |     this->BuildShifts(); | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | class NearestStencilGeometry4D : public  NonLocalStencilGeometry4D { | ||||||
|  | public: | ||||||
|  |   NearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,1) | ||||||
|  |   { | ||||||
|  |     this->BuildShifts(); | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | class NearestStencilGeometry5D : public  NonLocalStencilGeometry5D { | ||||||
|  | public: | ||||||
|  |   NearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,1) | ||||||
|  |   { | ||||||
|  |     this->BuildShifts(); | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Fine Object == (per site) type of fine field | ||||||
|  | // nbasis      == number of deflation vectors | ||||||
|  | template<class Fobj,class CComplex,int nbasis> | ||||||
|  | class GeneralCoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  { | ||||||
|  | public: | ||||||
|  |  | ||||||
|  |   typedef GeneralCoarsenedMatrix<Fobj,CComplex,nbasis> GeneralCoarseOp; | ||||||
|  |   typedef iVector<CComplex,nbasis >           siteVector; | ||||||
|  |   typedef iMatrix<CComplex,nbasis >           siteMatrix; | ||||||
|  |   typedef Lattice<iScalar<CComplex> >         CoarseComplexField; | ||||||
|  |   typedef Lattice<siteVector>                 CoarseVector; | ||||||
|  |   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; | ||||||
|  |   typedef iMatrix<CComplex,nbasis >  Cobj; | ||||||
|  |   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field | ||||||
|  |   typedef Lattice<Fobj >        FineField; | ||||||
|  |   typedef CoarseVector Field; | ||||||
|  |   //////////////////// | ||||||
|  |   // Data members | ||||||
|  |   //////////////////// | ||||||
|  |   int hermitian; | ||||||
|  |   GridBase      *       _FineGrid;  | ||||||
|  |   GridCartesian *       _CoarseGrid;  | ||||||
|  |   NonLocalStencilGeometry &geom; | ||||||
|  |   PaddedCell Cell; | ||||||
|  |   GeneralLocalStencil Stencil; | ||||||
|  |    | ||||||
|  |   std::vector<CoarseMatrix> _A; | ||||||
|  |   std::vector<CoarseMatrix> _Adag; | ||||||
|  |  | ||||||
|  |   /////////////////////// | ||||||
|  |   // Interface | ||||||
|  |   /////////////////////// | ||||||
|  |   GridBase      * Grid(void)           { return _FineGrid; };   // this is all the linalg routines need to know | ||||||
|  |   GridBase      * FineGrid(void)       { return _FineGrid; };   // this is all the linalg routines need to know | ||||||
|  |   GridCartesian * CoarseGrid(void)     { return _CoarseGrid; };   // this is all the linalg routines need to know | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe) | ||||||
|  |   { | ||||||
|  |     int nfound=0; | ||||||
|  |     std::cout << " ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<<std::endl; | ||||||
|  |     for(int p=0;p<geom.npoint;p++){ | ||||||
|  |       for(int pp=0;pp<CopyMe.geom.npoint;pp++){ | ||||||
|  |  	// Search for the same relative shift | ||||||
|  | 	// Avoids brutal handling of Grid pointers | ||||||
|  | 	if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) { | ||||||
|  | 	  _A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]); | ||||||
|  | 	  _Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]); | ||||||
|  | 	  nfound++; | ||||||
|  | 	} | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     assert(nfound==geom.npoint); | ||||||
|  |     ExchangeCoarseLinks(); | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid) | ||||||
|  |     : geom(_geom), | ||||||
|  |       _FineGrid(FineGrid), | ||||||
|  |       _CoarseGrid(CoarseGrid), | ||||||
|  |       hermitian(1), | ||||||
|  |       Cell(_geom.Depth(),_CoarseGrid), | ||||||
|  |       Stencil(Cell.grids.back(),geom.shifts) | ||||||
|  |   { | ||||||
|  |     { | ||||||
|  |       int npoint = _geom.npoint; | ||||||
|  |       autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||||
|  |       int osites=Stencil.Grid()->oSites(); | ||||||
|  |       for(int ss=0;ss<osites;ss++){ | ||||||
|  | 	for(int point=0;point<npoint;point++){ | ||||||
|  | 	  auto SE = Stencil_v.GetEntry(point,ss); | ||||||
|  | 	  int o = SE->_offset; | ||||||
|  | 	  assert( o< osites); | ||||||
|  | 	} | ||||||
|  |       }     | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     _A.resize(geom.npoint,CoarseGrid); | ||||||
|  |     _Adag.resize(geom.npoint,CoarseGrid); | ||||||
|  |   } | ||||||
|  |   void M (const CoarseVector &in, CoarseVector &out) | ||||||
|  |   { | ||||||
|  |     Mult(_A,in,out); | ||||||
|  |   } | ||||||
|  |   void Mdag (const CoarseVector &in, CoarseVector &out) | ||||||
|  |   { | ||||||
|  |     if ( hermitian ) M(in,out); | ||||||
|  |     else Mult(_Adag,in,out); | ||||||
|  |   } | ||||||
|  |   void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out) | ||||||
|  |   { | ||||||
|  |     RealD tviews=0; | ||||||
|  |     RealD ttot=0; | ||||||
|  |     RealD tmult=0; | ||||||
|  |     RealD texch=0; | ||||||
|  |     RealD text=0; | ||||||
|  |     ttot=-usecond(); | ||||||
|  |     conformable(CoarseGrid(),in.Grid()); | ||||||
|  |     conformable(in.Grid(),out.Grid()); | ||||||
|  |     out.Checkerboard() = in.Checkerboard(); | ||||||
|  |     CoarseVector tin=in; | ||||||
|  |  | ||||||
|  |     texch-=usecond(); | ||||||
|  |     CoarseVector pin  = Cell.Exchange(tin); | ||||||
|  |     texch+=usecond(); | ||||||
|  |  | ||||||
|  |     CoarseVector pout(pin.Grid()); pout=Zero(); | ||||||
|  |  | ||||||
|  |     int npoint = geom.npoint; | ||||||
|  |     typedef LatticeView<Cobj> Aview; | ||||||
|  |        | ||||||
|  |     const int Nsimd = CComplex::Nsimd(); | ||||||
|  |      | ||||||
|  |     int osites=pin.Grid()->oSites(); | ||||||
|  |     //    int gsites=pin.Grid()->gSites(); | ||||||
|  |  | ||||||
|  |     RealD flops = 1.0* npoint * nbasis * nbasis * 8 * osites; | ||||||
|  |     RealD bytes = (1.0*osites*sizeof(siteMatrix)*npoint+2.0*osites*sizeof(siteVector))*npoint; | ||||||
|  |        | ||||||
|  |     //    for(int point=0;point<npoint;point++){ | ||||||
|  |     //      conformable(A[point],pin); | ||||||
|  |     //    } | ||||||
|  |  | ||||||
|  |     { | ||||||
|  |       tviews-=usecond(); | ||||||
|  |       autoView( in_v , pin, AcceleratorRead); | ||||||
|  |       autoView( out_v , pout, AcceleratorWrite); | ||||||
|  |       autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||||
|  |       tviews+=usecond(); | ||||||
|  |        | ||||||
|  |       for(int point=0;point<npoint;point++){ | ||||||
|  | 	tviews-=usecond(); | ||||||
|  | 	autoView( A_v, A[point],AcceleratorRead); | ||||||
|  | 	tviews+=usecond(); | ||||||
|  | 	tmult-=usecond(); | ||||||
|  | 	accelerator_for(sss, osites*nbasis, Nsimd, { | ||||||
|  |  | ||||||
|  | 	    typedef decltype(coalescedRead(in_v[0]))    calcVector; | ||||||
|  |  | ||||||
|  | 	    int ss = sss/nbasis; | ||||||
|  | 	    int b  = sss%nbasis; | ||||||
|  |  | ||||||
|  | 	    auto SE  = Stencil_v.GetEntry(point,ss); | ||||||
|  | 	    auto nbr = coalescedReadGeneralPermute(in_v[SE->_offset],SE->_permute,Nd); | ||||||
|  | 	    auto res = out_v(ss)(b); | ||||||
|  | 	    for(int bb=0;bb<nbasis;bb++) { | ||||||
|  | 	      res = res + coalescedRead(A_v[ss](b,bb))*nbr(bb); | ||||||
|  | 	    } | ||||||
|  | 	    coalescedWrite(out_v[ss](b),res); | ||||||
|  | 	}); | ||||||
|  |  | ||||||
|  | 	tmult+=usecond(); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     text-=usecond(); | ||||||
|  |     out = Cell.Extract(pout); | ||||||
|  |     text+=usecond(); | ||||||
|  |     ttot+=usecond(); | ||||||
|  |  | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Mult ext  "<<text<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Mult tot  "<<ttot<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Kernel "<< flops/tmult<<" mflop/s"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Kernel "<< bytes/tmult<<" MB/s"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse flops/s "<< flops/ttot<<" mflop/s"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse bytes   "<< bytes/1e6<<" MB"<<std::endl; | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   void PopulateAdag(void) | ||||||
|  |   { | ||||||
|  |     for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){ | ||||||
|  |       Coordinate bcoor; | ||||||
|  |       CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor); | ||||||
|  |        | ||||||
|  |       for(int p=0;p<geom.npoint;p++){ | ||||||
|  | 	Coordinate scoor = bcoor; | ||||||
|  | 	for(int mu=0;mu<bcoor.size();mu++){ | ||||||
|  | 	  int L = CoarseGrid()->GlobalDimensions()[mu]; | ||||||
|  | 	  scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic | ||||||
|  | 	} | ||||||
|  | 	// Flip to poke/peekLocalSite and not too bad | ||||||
|  | 	auto link = peekSite(_A[p],scoor); | ||||||
|  | 	int pp = geom.Reverse(p); | ||||||
|  | 	pokeSite(adj(link),_Adag[pp],bcoor); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   ///////////////////////////////////////////////////////////// | ||||||
|  |   //  | ||||||
|  |   // A) Only reduced flops option is to use a padded cell of depth 4 | ||||||
|  |   // and apply MpcDagMpc in the padded cell. | ||||||
|  |   // | ||||||
|  |   // Makes for ONE application of MpcDagMpc per vector instead of 30 or 80. | ||||||
|  |   // With the effective cell size around (B+8)^4 perhaps 12^4/4^4 ratio | ||||||
|  |   // Cost is 81x more, same as stencil size. | ||||||
|  |   // | ||||||
|  |   // But: can eliminate comms and do as local dirichlet. | ||||||
|  |   // | ||||||
|  |   // Local exchange gauge field once. | ||||||
|  |   // Apply to all vectors, local only computation. | ||||||
|  |   // Must exchange ghost subcells in reverse process of PaddedCell to take inner products | ||||||
|  |   // | ||||||
|  |   // B) Can reduce cost: pad by 1, apply Deo      (4^4+6^4+8^4+8^4 )/ (4x 4^4) | ||||||
|  |   //                     pad by 2, apply Doe | ||||||
|  |   //                     pad by 3, apply Deo | ||||||
|  |   //                     then break out 8x directions; cost is ~10x MpcDagMpc per vector | ||||||
|  |   // | ||||||
|  |   // => almost factor of 10 in setup cost, excluding data rearrangement | ||||||
|  |   // | ||||||
|  |   // Intermediates -- ignore the corner terms, leave approximate and force Hermitian | ||||||
|  |   // Intermediates -- pad by 2 and apply 1+8+24 = 33 times. | ||||||
|  |   ///////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  |     ////////////////////////////////////////////////////////// | ||||||
|  |     // BFM HDCG style approach: Solve a system of equations to get Aij | ||||||
|  |     ////////////////////////////////////////////////////////// | ||||||
|  |     /* | ||||||
|  |      *     Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM. | ||||||
|  |      * | ||||||
|  |      *     conj(phases[block]) proj[k][ block*Nvec+j ] =  \sum_ball  e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} >  | ||||||
|  |      *                                                 =  \sum_ball e^{iqk.delta} A_ji | ||||||
|  |      * | ||||||
|  |      *     Must invert matrix M_k,l = e^[i q_k . delta_l] | ||||||
|  |      * | ||||||
|  |      *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) | ||||||
|  |      */ | ||||||
|  |   void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop, | ||||||
|  | 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) | ||||||
|  |   { | ||||||
|  |     std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl; | ||||||
|  |     GridBase *grid = FineGrid(); | ||||||
|  |  | ||||||
|  |     RealD tproj=0.0; | ||||||
|  |     RealD teigen=0.0; | ||||||
|  |     RealD tmat=0.0; | ||||||
|  |     RealD tphase=0.0; | ||||||
|  |     RealD tinv=0.0; | ||||||
|  |  | ||||||
|  |     ///////////////////////////////////////////////////////////// | ||||||
|  |     // Orthogonalise the subblocks over the basis | ||||||
|  |     ///////////////////////////////////////////////////////////// | ||||||
|  |     CoarseScalar InnerProd(CoarseGrid());  | ||||||
|  |     blockOrthogonalise(InnerProd,Subspace.subspace); | ||||||
|  |  | ||||||
|  |     const int npoint = geom.npoint; | ||||||
|  |        | ||||||
|  |     Coordinate clatt = CoarseGrid()->GlobalDimensions(); | ||||||
|  |     int Nd = CoarseGrid()->Nd(); | ||||||
|  |  | ||||||
|  |       /* | ||||||
|  |        *     Here, k,l index which possible momentum/shift within the N-points connected by MdagM. | ||||||
|  |        *     Matrix index i is mapped to this shift via  | ||||||
|  |        *               geom.shifts[i] | ||||||
|  |        * | ||||||
|  |        *     conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block]  | ||||||
|  |        *       =  \sum_{l in ball}  e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} >  | ||||||
|  |        *       =  \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} | ||||||
|  |        *       = M_{kl} A_ji^{b.b+l} | ||||||
|  |        * | ||||||
|  |        *     Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] | ||||||
|  |        *   | ||||||
|  |        *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) | ||||||
|  |        * | ||||||
|  |        *     Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} | ||||||
|  |        */ | ||||||
|  |     teigen-=usecond(); | ||||||
|  |     Eigen::MatrixXcd Mkl    = Eigen::MatrixXcd::Zero(npoint,npoint); | ||||||
|  |     Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint); | ||||||
|  |     ComplexD ci(0.0,1.0); | ||||||
|  |     for(int k=0;k<npoint;k++){ // Loop over momenta | ||||||
|  |  | ||||||
|  |       for(int l=0;l<npoint;l++){ // Loop over nbr relative | ||||||
|  | 	ComplexD phase(0.0,0.0); | ||||||
|  | 	for(int mu=0;mu<Nd;mu++){ | ||||||
|  | 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; | ||||||
|  | 	  phase=phase+TwoPiL*geom.shifts[k][mu]*geom.shifts[l][mu]; | ||||||
|  | 	} | ||||||
|  | 	phase=exp(phase*ci); | ||||||
|  | 	Mkl(k,l) = phase; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     invMkl = Mkl.inverse(); | ||||||
|  |     teigen+=usecond(); | ||||||
|  |  | ||||||
|  |     /////////////////////////////////////////////////////////////////////// | ||||||
|  |     // Now compute the matrix elements of linop between the orthonormal | ||||||
|  |     // set of vectors. | ||||||
|  |     /////////////////////////////////////////////////////////////////////// | ||||||
|  |     FineField phaV(grid); // Phased block basis vector | ||||||
|  |     FineField MphaV(grid);// Matrix applied | ||||||
|  |     CoarseVector coarseInner(CoarseGrid()); | ||||||
|  |  | ||||||
|  |     std::vector<CoarseVector> ComputeProj(npoint,CoarseGrid()); | ||||||
|  |     std::vector<CoarseVector>          FT(npoint,CoarseGrid()); | ||||||
|  |     for(int i=0;i<nbasis;i++){// Loop over basis vectors | ||||||
|  |       std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl; | ||||||
|  |       for(int p=0;p<npoint;p++){ // Loop over momenta in npoint | ||||||
|  | 	///////////////////////////////////////////////////// | ||||||
|  | 	// Stick a phase on every block | ||||||
|  | 	///////////////////////////////////////////////////// | ||||||
|  | 	tphase-=usecond(); | ||||||
|  | 	CoarseComplexField coor(CoarseGrid()); | ||||||
|  | 	CoarseComplexField pha(CoarseGrid());	pha=Zero(); | ||||||
|  | 	for(int mu=0;mu<Nd;mu++){ | ||||||
|  | 	  LatticeCoordinate(coor,mu); | ||||||
|  | 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; | ||||||
|  | 	  pha = pha + (TwoPiL * geom.shifts[p][mu]) * coor; | ||||||
|  | 	} | ||||||
|  | 	pha  =exp(pha*ci); | ||||||
|  | 	phaV=Zero(); | ||||||
|  | 	blockZAXPY(phaV,pha,Subspace.subspace[i],phaV); | ||||||
|  | 	tphase+=usecond(); | ||||||
|  |  | ||||||
|  | 	///////////////////////////////////////////////////////////////////// | ||||||
|  | 	// Multiple phased subspace vector by matrix and project to subspace | ||||||
|  | 	// Remove local bulk phase to leave relative phases | ||||||
|  | 	///////////////////////////////////////////////////////////////////// | ||||||
|  | 	tmat-=usecond(); | ||||||
|  | 	linop.Op(phaV,MphaV); | ||||||
|  | 	tmat+=usecond(); | ||||||
|  |  | ||||||
|  | 	tproj-=usecond(); | ||||||
|  | 	blockProject(coarseInner,MphaV,Subspace.subspace); | ||||||
|  | 	coarseInner = conjugate(pha) * coarseInner; | ||||||
|  |  | ||||||
|  | 	ComputeProj[p] = coarseInner; | ||||||
|  | 	tproj+=usecond(); | ||||||
|  |  | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       tinv-=usecond(); | ||||||
|  |       for(int k=0;k<npoint;k++){ | ||||||
|  | 	FT[k] = Zero(); | ||||||
|  | 	for(int l=0;l<npoint;l++){ | ||||||
|  | 	  FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l]; | ||||||
|  | 	} | ||||||
|  |        | ||||||
|  | 	int osites=CoarseGrid()->oSites(); | ||||||
|  | 	autoView( A_v  , _A[k], AcceleratorWrite); | ||||||
|  | 	autoView( FT_v  , FT[k], AcceleratorRead); | ||||||
|  | 	accelerator_for(sss, osites, 1, { | ||||||
|  | 	    for(int j=0;j<nbasis;j++){ | ||||||
|  | 	      A_v[sss](j,i) = FT_v[sss](j); | ||||||
|  | 	    } | ||||||
|  |         }); | ||||||
|  |       } | ||||||
|  |       tinv+=usecond(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     for(int p=0;p<geom.npoint;p++){ | ||||||
|  |       Coordinate coor({0,0,0,0,0}); | ||||||
|  |       auto sval = peekSite(_A[p],coor); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Only needed if nonhermitian | ||||||
|  |     if ( ! hermitian ) { | ||||||
|  |       std::cout << GridLogMessage<<"PopulateAdag  "<<std::endl; | ||||||
|  |       PopulateAdag(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Need to write something to populate Adag from A | ||||||
|  |     std::cout << GridLogMessage<<"ExchangeCoarseLinks  "<<std::endl; | ||||||
|  |     ExchangeCoarseLinks(); | ||||||
|  |     std::cout << GridLogMessage<<"CoarsenOperator eigen  "<<teigen<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<<"CoarsenOperator phase  "<<tphase<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<<"CoarsenOperator mat    "<<tmat <<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<<"CoarsenOperator proj   "<<tproj<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<<"CoarsenOperator inv    "<<tinv<<" us"<<std::endl; | ||||||
|  |   } | ||||||
|  |   void ExchangeCoarseLinks(void){ | ||||||
|  |     for(int p=0;p<geom.npoint;p++){ | ||||||
|  |       std::cout << "Exchange "<<p<<std::endl; | ||||||
|  |       _A[p] = Cell.Exchange(_A[p]); | ||||||
|  |       _Adag[p]= Cell.Exchange(_Adag[p]); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   virtual  void Mdiag    (const Field &in, Field &out){ assert(0);}; | ||||||
|  |   virtual  void Mdir     (const Field &in, Field &out,int dir, int disp){assert(0);}; | ||||||
|  |   virtual  void MdirAll  (const Field &in, std::vector<Field> &out){assert(0);}; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | NAMESPACE_END(Grid); | ||||||
| @@ -90,9 +90,8 @@ public: | |||||||
|     order=_order; |     order=_order; | ||||||
|        |        | ||||||
|     if(order < 2) exit(-1); |     if(order < 2) exit(-1); | ||||||
|     Coeffs.resize(order); |     Coeffs.resize(order,0.0); | ||||||
|     Coeffs.assign(0.,order); |     Coeffs[order-1] = 1.0; | ||||||
|     Coeffs[order-1] = 1.; |  | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
|   // PB - more efficient low pass drops high modes above the low as 1/x uses all Chebyshev's. |   // PB - more efficient low pass drops high modes above the low as 1/x uses all Chebyshev's. | ||||||
|   | |||||||
| @@ -33,15 +33,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|    * Script A = SolverMatrix  |    * Script A = SolverMatrix  | ||||||
|    * Script P = Preconditioner |    * Script P = Preconditioner | ||||||
|    * |    * | ||||||
|    * Deflation methods considered |  | ||||||
|    *      -- Solve P A x = P b        [ like Luscher ] |  | ||||||
|    * DEF-1        M P A x = M P b     [i.e. left precon] |  | ||||||
|    * DEF-2        P^T M A x = P^T M b |  | ||||||
|    * ADEF-1       Preconditioner = M P + Q      [ Q + M + M A Q] |  | ||||||
|    * ADEF-2       Preconditioner = P^T M + Q |  | ||||||
|    * BNN          Preconditioner = P^T M P + Q |  | ||||||
|    * BNN2         Preconditioner = M P + P^TM +Q - M P A M  |  | ||||||
|    *  |  | ||||||
|    * Implement ADEF-2 |    * Implement ADEF-2 | ||||||
|    * |    * | ||||||
|    * Vstart = P^Tx + Qb |    * Vstart = P^Tx + Qb | ||||||
| @@ -49,202 +40,245 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|    * M2=M3=1 |    * M2=M3=1 | ||||||
|    * Vout = x |    * Vout = x | ||||||
|    */ |    */ | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| // abstract base | template<class Field> | ||||||
| template<class Field, class CoarseField> | class TwoLevelCG : public LinearFunction<Field> | ||||||
| class TwoLevelFlexiblePcg : public LinearFunction<Field> |  | ||||||
| { | { | ||||||
|  public: |  public: | ||||||
|   int verbose; |  | ||||||
|   RealD   Tolerance; |   RealD   Tolerance; | ||||||
|   Integer MaxIterations; |   Integer MaxIterations; | ||||||
|   const int mmax = 5; |  | ||||||
|   GridBase *grid; |   GridBase *grid; | ||||||
|   GridBase *coarsegrid; |  | ||||||
|  |  | ||||||
|   LinearOperatorBase<Field>   *_Linop |   // Fine operator, Smoother, CoarseSolver | ||||||
|   OperatorFunction<Field>     *_Smoother, |   LinearOperatorBase<Field>   &_FineLinop; | ||||||
|   LinearFunction<CoarseField> *_CoarseSolver; |   LinearFunction<Field>   &_Smoother; | ||||||
|  |  | ||||||
|   // Need somthing that knows how to get from Coarse to fine and back again |  | ||||||
|    |    | ||||||
|   // more most opertor functions |   // more most opertor functions | ||||||
|   TwoLevelFlexiblePcg(RealD tol, |   TwoLevelCG(RealD tol, | ||||||
| 		     Integer maxit, | 	     Integer maxit, | ||||||
| 		     LinearOperatorBase<Field> *Linop, | 	     LinearOperatorBase<Field>   &FineLinop, | ||||||
| 		     LinearOperatorBase<Field> *SmootherLinop, | 	     LinearFunction<Field>       &Smoother, | ||||||
| 		     OperatorFunction<Field>   *Smoother, | 	     GridBase *fine) :  | ||||||
| 		     OperatorFunction<CoarseField>  CoarseLinop |  | ||||||
| 		     ) :  |  | ||||||
|       Tolerance(tol),  |       Tolerance(tol),  | ||||||
|       MaxIterations(maxit), |       MaxIterations(maxit), | ||||||
|       _Linop(Linop), |       _FineLinop(FineLinop), | ||||||
|       _PreconditionerLinop(PrecLinop), |       _Smoother(Smoother) | ||||||
|       _Preconditioner(Preconditioner) |  | ||||||
|   { |   { | ||||||
|     verbose=0; |     grid       = fine; | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
|   // The Pcg routine is common to all, but the various matrices differ from derived  |   virtual void operator() (const Field &src, Field &psi) | ||||||
|   // implementation to derived implmentation |   { | ||||||
|   void operator() (const Field &src, Field &psi){ |     Field resid(grid); | ||||||
|   void operator() (const Field &src, Field &psi){ |  | ||||||
|  |  | ||||||
|     psi.Checkerboard() = src.Checkerboard(); |  | ||||||
|     grid             = src.Grid(); |  | ||||||
|  |  | ||||||
|     RealD f; |     RealD f; | ||||||
|     RealD rtzp,rtz,a,d,b; |     RealD rtzp,rtz,a,d,b; | ||||||
|     RealD rptzp; |     RealD rptzp; | ||||||
|     RealD tn; |  | ||||||
|     RealD guess = norm2(psi); |  | ||||||
|     RealD ssq   = norm2(src); |  | ||||||
|     RealD rsq   = ssq*Tolerance*Tolerance; |  | ||||||
|      |      | ||||||
|     ///////////////////////////// |     Field x(grid);  | ||||||
|     // Set up history vectors |     Field p(grid); | ||||||
|     ///////////////////////////// |     Field z(grid); | ||||||
|     std::vector<Field> p  (mmax,grid); |  | ||||||
|     std::vector<Field> mmp(mmax,grid); |  | ||||||
|     std::vector<RealD> pAp(mmax); |  | ||||||
|  |  | ||||||
|     Field x  (grid); x = psi; |  | ||||||
|     Field z  (grid); |  | ||||||
|     Field tmp(grid); |     Field tmp(grid); | ||||||
|  |     Field mmp(grid); | ||||||
|     Field r  (grid); |     Field r  (grid); | ||||||
|     Field mu (grid); |     Field mu (grid); | ||||||
|  |     Field rp (grid); | ||||||
|      |      | ||||||
|  |     //Initial residual computation & set up | ||||||
|  |     RealD guess = norm2(psi); | ||||||
|  |     double tn; | ||||||
|  |  | ||||||
|  |     GridStopWatch HDCGTimer; | ||||||
|  |     HDCGTimer.Start(); | ||||||
|     ////////////////////////// |     ////////////////////////// | ||||||
|     // x0 = Vstart -- possibly modify guess |     // x0 = Vstart -- possibly modify guess | ||||||
|     ////////////////////////// |     ////////////////////////// | ||||||
|     x=src; |     x=Zero(); | ||||||
|     Vstart(x,src); |     Vstart(x,src); | ||||||
|  |  | ||||||
|     // r0 = b -A x0 |     // r0 = b -A x0 | ||||||
|     HermOp(x,mmp); // Shouldn't this be something else? |     _FineLinop.HermOp(x,mmp); | ||||||
|     axpy (r, -1.0,mmp[0], src);    // Recomputes r=src-Ax0 |  | ||||||
|  |     axpy(r, -1.0, mmp, src);    // Recomputes r=src-x0 | ||||||
|  |     rp=r; | ||||||
|  |  | ||||||
|     ////////////////////////////////// |     ////////////////////////////////// | ||||||
|     // Compute z = M1 x |     // Compute z = M1 x | ||||||
|     ////////////////////////////////// |     ////////////////////////////////// | ||||||
|     M1(r,z,tmp,mp,SmootherMirs); |     PcgM1(r,z); | ||||||
|     rtzp =real(innerProduct(r,z)); |     rtzp =real(innerProduct(r,z)); | ||||||
|  |  | ||||||
|     /////////////////////////////////////// |     /////////////////////////////////////// | ||||||
|     // Solve for Mss mu = P A z and set p = z-mu |     // Except Def2, M2 is trivial | ||||||
|     // Def2: p = 1 - Q Az = Pright z  |  | ||||||
|     // Other algos M2 is trivial |  | ||||||
|     /////////////////////////////////////// |     /////////////////////////////////////// | ||||||
|     M2(z,p[0]); |     p=z; | ||||||
|  |  | ||||||
|     for (int k=0;k<=MaxIterations;k++){ |     RealD ssq =  norm2(src); | ||||||
|  |     RealD rsq =  ssq*Tolerance*Tolerance; | ||||||
|  |  | ||||||
|       int peri_k  = k % mmax; |     std::cout<<GridLogMessage<<"HDCG: k=0 residual "<<rtzp<<" target rsq "<<rsq<<" ssq "<<ssq<<std::endl; | ||||||
|       int peri_kp = (k+1) % mmax; |      | ||||||
|  |     for (int k=1;k<=MaxIterations;k++){ | ||||||
|  |  | ||||||
|       rtz=rtzp; |       rtz=rtzp; | ||||||
|       d= M3(p[peri_k],mp,mmp[peri_k],tmp); |       d= PcgM3(p,mmp); | ||||||
|       a = rtz/d; |       a = rtz/d; | ||||||
|  |  | ||||||
|       // Memorise this |       axpy(x,a,p,x); | ||||||
|       pAp[peri_k] = d; |       RealD rn = axpy_norm(r,-a,mmp,r); | ||||||
|  |  | ||||||
|       axpy(x,a,p[peri_k],x); |       PcgM1(r,z); | ||||||
|       RealD rn = axpy_norm(r,-a,mmp[peri_k],r); |  | ||||||
|  |  | ||||||
|       // Compute z = M x |  | ||||||
|       M1(r,z,tmp,mp); |  | ||||||
|  |  | ||||||
|       rtzp =real(innerProduct(r,z)); |       rtzp =real(innerProduct(r,z)); | ||||||
|  |  | ||||||
|       M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate |       int ipcg=1; // almost free inexact preconditioned CG | ||||||
|  |       if (ipcg) { | ||||||
|       p[peri_kp]=p[peri_k]; | 	rptzp =real(innerProduct(rp,z)); | ||||||
|  |       } else { | ||||||
|       // Standard search direction  p -> z + b p    ; b =  | 	rptzp =0; | ||||||
|       b = (rtzp)/rtz; |  | ||||||
|  |  | ||||||
|       int northog; |  | ||||||
|       //    northog     = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm |  | ||||||
|       northog     = (k>mmax-1)?(mmax-1):k;        // This is the fCG-Tr(mmax-1) algorithm |  | ||||||
|      |  | ||||||
|       for(int back=0; back < northog; back++){ |  | ||||||
| 	int peri_back = (k-back)%mmax; |  | ||||||
| 	RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp])); |  | ||||||
| 	RealD beta = -pbApk/pAp[peri_back]; |  | ||||||
| 	axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]); |  | ||||||
|       } |       } | ||||||
|  |       b = (rtzp-rptzp)/rtz; | ||||||
|  |  | ||||||
|  |       PcgM2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate | ||||||
|  |  | ||||||
|  |       axpy(p,b,p,mu);  // mu = A r | ||||||
|  |  | ||||||
|       RealD rrn=sqrt(rn/ssq); |       RealD rrn=sqrt(rn/ssq); | ||||||
|       std::cout<<GridLogMessage<<"TwoLevelfPcg: k= "<<k<<" residual = "<<rrn<<std::endl; |       RealD rtn=sqrt(rtz/ssq); | ||||||
|  |       std::cout<<GridLogMessage<<"HDCG: Pcg k= "<<k<<" residual = "<<rrn<<std::endl; | ||||||
|  |  | ||||||
|  |       if ( ipcg ) { | ||||||
|  | 	axpy(rp,0.0,r,r); | ||||||
|  |       } | ||||||
|  |  | ||||||
|       // Stopping condition |       // Stopping condition | ||||||
|       if ( rn <= rsq ) {  |       if ( rn <= rsq ) {  | ||||||
|  |  | ||||||
| 	HermOp(x,mmp); // Shouldn't this be something else? | 	HDCGTimer.Stop(); | ||||||
| 	axpy(tmp,-1.0,src,mmp[0]); | 	std::cout<<GridLogMessage<<"HDCG: Pcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;; | ||||||
|  |  | ||||||
| 	RealD psinorm = sqrt(norm2(x)); | 	_FineLinop.HermOp(x,mmp);			   | ||||||
| 	RealD srcnorm = sqrt(norm2(src)); | 	axpy(tmp,-1.0,src,mmp); | ||||||
| 	RealD tmpnorm = sqrt(norm2(tmp)); |  | ||||||
| 	RealD true_residual = tmpnorm/srcnorm; | 	RealD  mmpnorm = sqrt(norm2(mmp)); | ||||||
| 	std::cout<<GridLogMessage<<"TwoLevelfPcg:   true residual is "<<true_residual<<std::endl; | 	RealD  xnorm   = sqrt(norm2(x)); | ||||||
| 	std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl; | 	RealD  srcnorm = sqrt(norm2(src)); | ||||||
| 	return k; | 	RealD  tmpnorm = sqrt(norm2(tmp)); | ||||||
|  | 	RealD  true_residual = tmpnorm/srcnorm; | ||||||
|  | 	std::cout<<GridLogMessage<<"HDCG: true residual is "<<true_residual | ||||||
|  | 		 <<" solution "<<xnorm<<" source "<<srcnorm<<std::endl; | ||||||
|  |  | ||||||
|  | 	return; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|     } |     } | ||||||
|     // Non-convergence |     std::cout << "HDCG: Pcg not converged"<<std::endl; | ||||||
|     assert(0); |     return ; | ||||||
|   } |   } | ||||||
|    |    | ||||||
|  |  | ||||||
|  public: |  public: | ||||||
|  |  | ||||||
|   virtual void M(Field & in,Field & out,Field & tmp) { |   virtual void PcgM1(Field & in, Field & out)     =0; | ||||||
|  |   virtual void Vstart(Field & x,const Field & src)=0; | ||||||
|  |  | ||||||
|  |   virtual void PcgM2(const Field & in, Field & out) { | ||||||
|  |     out=in; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   virtual void M1(Field & in, Field & out) {// the smoother |   virtual RealD PcgM3(const Field & p, Field & mmp){ | ||||||
|  |     RealD dd; | ||||||
|  |     _FineLinop.HermOp(p,mmp); | ||||||
|  |     ComplexD dot = innerProduct(p,mmp); | ||||||
|  |     dd=real(dot); | ||||||
|  |     return dd; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   ///////////////////////////////////////////////////////////////////// | ||||||
|  |   // Only Def1 has non-trivial Vout. | ||||||
|  |   ///////////////////////////////////////////////////////////////////// | ||||||
|  |   virtual void   Vout  (Field & in, Field & out,Field & src){ | ||||||
|  |     out = in; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | }; | ||||||
|  |    | ||||||
|  | template<class Field, class CoarseField, class Aggregation> | ||||||
|  | class TwoLevelADEF2 : public TwoLevelCG<Field> | ||||||
|  | { | ||||||
|  |  public: | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Need something that knows how to get from Coarse to fine and back again | ||||||
|  |   //  void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){ | ||||||
|  |   //  void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){ | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   GridBase *coarsegrid; | ||||||
|  |   Aggregation &_Aggregates;                     | ||||||
|  |   LinearFunction<CoarseField> &_CoarseSolver; | ||||||
|  |   LinearFunction<CoarseField> &_CoarseSolverPrecise; | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |    | ||||||
|  |   // more most opertor functions | ||||||
|  |   TwoLevelADEF2(RealD tol, | ||||||
|  | 		Integer maxit, | ||||||
|  | 		LinearOperatorBase<Field>   &FineLinop, | ||||||
|  | 		LinearFunction<Field>   &Smoother, | ||||||
|  | 		LinearFunction<CoarseField>  &CoarseSolver, | ||||||
|  | 		LinearFunction<CoarseField>  &CoarseSolverPrecise, | ||||||
|  | 		Aggregation &Aggregates | ||||||
|  | 		) : | ||||||
|  |     TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,Aggregates.FineGrid), | ||||||
|  |       _CoarseSolver(CoarseSolver), | ||||||
|  |       _CoarseSolverPrecise(CoarseSolverPrecise), | ||||||
|  |       _Aggregates(Aggregates) | ||||||
|  |   { | ||||||
|  |     coarsegrid = Aggregates.CoarseGrid; | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   virtual void PcgM1(Field & in, Field & out) | ||||||
|  |   { | ||||||
|     // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] |     // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] | ||||||
|     Field tmp(grid); |  | ||||||
|     Field Min(grid); |  | ||||||
|  |  | ||||||
|     PcgM(in,Min); // Smoother call |     Field tmp(this->grid); | ||||||
|  |     Field Min(this->grid); | ||||||
|  |     CoarseField PleftProj(this->coarsegrid); | ||||||
|  |     CoarseField PleftMss_proj(this->coarsegrid); | ||||||
|  |  | ||||||
|     HermOp(Min,out); |     GridStopWatch SmootherTimer; | ||||||
|  |     GridStopWatch MatrixTimer; | ||||||
|  |     SmootherTimer.Start(); | ||||||
|  |     this->_Smoother(in,Min); | ||||||
|  |     SmootherTimer.Stop(); | ||||||
|  |  | ||||||
|  |     MatrixTimer.Start(); | ||||||
|  |     this->_FineLinop.HermOp(Min,out); | ||||||
|  |     MatrixTimer.Stop(); | ||||||
|     axpy(tmp,-1.0,out,in);          // tmp  = in - A Min |     axpy(tmp,-1.0,out,in);          // tmp  = in - A Min | ||||||
|  |  | ||||||
|     ProjectToSubspace(tmp,PleftProj);      |     GridStopWatch ProjTimer; | ||||||
|     ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s |     GridStopWatch CoarseTimer; | ||||||
|     PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]   |     GridStopWatch PromTimer; | ||||||
|  |     ProjTimer.Start(); | ||||||
|  |     this->_Aggregates.ProjectToSubspace(PleftProj,tmp);      | ||||||
|  |     ProjTimer.Stop(); | ||||||
|  |     CoarseTimer.Start(); | ||||||
|  |     this->_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s | ||||||
|  |     CoarseTimer.Stop(); | ||||||
|  |     PromTimer.Start(); | ||||||
|  |     this->_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]   | ||||||
|  |     PromTimer.Stop(); | ||||||
|  |     std::cout << GridLogPerformance << "PcgM1 breakdown "<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance << "\tSmoother   " << SmootherTimer.Elapsed() <<std::endl; | ||||||
|  |     std::cout << GridLogPerformance << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | ||||||
|  |     std::cout << GridLogPerformance << "\tProj       " << ProjTimer.Elapsed() <<std::endl; | ||||||
|  |     std::cout << GridLogPerformance << "\tCoarse     " << CoarseTimer.Elapsed() <<std::endl; | ||||||
|  |     std::cout << GridLogPerformance << "\tProm       " << PromTimer.Elapsed() <<std::endl; | ||||||
|  |  | ||||||
|     axpy(out,1.0,Min,tmp); // Min+tmp |     axpy(out,1.0,Min,tmp); // Min+tmp | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   virtual void M2(const Field & in, Field & out) { |   virtual void Vstart(Field & x,const Field & src) | ||||||
|     out=in; |   { | ||||||
|     // Must override for Def2 only |  | ||||||
|     //  case PcgDef2: |  | ||||||
|     //    Pright(in,out); |  | ||||||
|     //    break; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   virtual RealD M3(const Field & p, Field & mmp){ |  | ||||||
|     double d,dd; |  | ||||||
|     HermOpAndNorm(p,mmp,d,dd); |  | ||||||
|     return dd; |  | ||||||
|     // Must override for Def1 only |  | ||||||
|     //  case PcgDef1: |  | ||||||
|     //    d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no |  | ||||||
|     //      linop_d->Mprec(mmp,mp,tmp,1);// Dag yes |  | ||||||
|     //    Pleft(mp,mmp); |  | ||||||
|     //    d=real(linop_d->inner(p,mmp)); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   virtual void VstartDef2(Field & xconst Field & src){ |  | ||||||
|     //case PcgDef2: |  | ||||||
|     //case PcgAdef2:  |  | ||||||
|     //case PcgAdef2f: |  | ||||||
|     //case PcgV11f: |  | ||||||
|     /////////////////////////////////// |     /////////////////////////////////// | ||||||
|     // Choose x_0 such that  |     // Choose x_0 such that  | ||||||
|     // x_0 = guess +  (A_ss^inv) r_s = guess + Ass_inv [src -Aguess] |     // x_0 = guess +  (A_ss^inv) r_s = guess + Ass_inv [src -Aguess] | ||||||
| @@ -256,142 +290,72 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | |||||||
|     //                   = src_s - (A guess)_s - src_s  + (A guess)_s  |     //                   = src_s - (A guess)_s - src_s  + (A guess)_s  | ||||||
|     //                   = 0  |     //                   = 0  | ||||||
|     /////////////////////////////////// |     /////////////////////////////////// | ||||||
|     Field r(grid); |     Field r(this->grid); | ||||||
|     Field mmp(grid); |     Field mmp(this->grid); | ||||||
|  |     CoarseField PleftProj(this->coarsegrid); | ||||||
|  |     CoarseField PleftMss_proj(this->coarsegrid); | ||||||
|  |  | ||||||
|     HermOp(x,mmp); |     this->_Aggregates.ProjectToSubspace(PleftProj,src);      | ||||||
|     axpy (r, -1.0, mmp, src);        // r_{-1} = src - A x |     this->_CoarseSolverPrecise(PleftProj,PleftMss_proj); // Ass^{-1} r_s | ||||||
|     ProjectToSubspace(r,PleftProj);      |     this->_Aggregates.PromoteFromSubspace(PleftMss_proj,x);   | ||||||
|     ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s |  | ||||||
|     PromoteFromSubspace(PleftMss_proj,mmp);   |  | ||||||
|     x=x+mmp; |  | ||||||
|  |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   virtual void Vstart(Field & x,const Field & src){ | }; | ||||||
|     return; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   ///////////////////////////////////////////////////////////////////// |  | ||||||
|   // Only Def1 has non-trivial Vout. Override in Def1 |  | ||||||
|   ///////////////////////////////////////////////////////////////////// |  | ||||||
|   virtual void   Vout  (Field & in, Field & out,Field & src){ |  | ||||||
|     out = in; |  | ||||||
|     //case PcgDef1: |  | ||||||
|     //    //Qb + PT x |  | ||||||
|     //    ProjectToSubspace(src,PleftProj);      |  | ||||||
|     //    ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s |  | ||||||
|     //    PromoteFromSubspace(PleftMss_proj,tmp);   |  | ||||||
|     //     |  | ||||||
|     //    Pright(in,out); |  | ||||||
|     //     |  | ||||||
|     //    linop_d->axpy(out,tmp,out,1.0); |  | ||||||
|     //    break; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Pright and Pleft are common to all implementations |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   virtual void Pright(Field & in,Field & out){ |  | ||||||
|     // P_R  = [ 1              0 ]  |  | ||||||
|     //        [ -Mss^-1 Msb    0 ]  |  | ||||||
|     Field in_sbar(grid); |  | ||||||
|  |  | ||||||
|     ProjectToSubspace(in,PleftProj);      |  | ||||||
|     PromoteFromSubspace(PleftProj,out);   |  | ||||||
|     axpy(in_sbar,-1.0,out,in);       // in_sbar = in - in_s  |  | ||||||
|  |  | ||||||
|     HermOp(in_sbar,out); |  | ||||||
|     ProjectToSubspace(out,PleftProj);           // Mssbar in_sbar  (project) |  | ||||||
|  |  | ||||||
|     ApplyInverse     (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar  |  | ||||||
|     PromoteFromSubspace(PleftMss_proj,out);     //  |  | ||||||
|  |  | ||||||
|     axpy(out,-1.0,out,in_sbar);     // in_sbar - Mss^{-1} Mssbar in_sbar |  | ||||||
|   } |  | ||||||
|   virtual void Pleft (Field & in,Field & out){ |  | ||||||
|     // P_L  = [ 1  -Mbs Mss^-1]  |  | ||||||
|     //        [ 0   0         ]  |  | ||||||
|     Field in_sbar(grid); |  | ||||||
|     Field    tmp2(grid); |  | ||||||
|     Field    Mtmp(grid); |  | ||||||
|  |  | ||||||
|     ProjectToSubspace(in,PleftProj);      |  | ||||||
|     PromoteFromSubspace(PleftProj,out);   |  | ||||||
|     axpy(in_sbar,-1.0,out,in);      // in_sbar = in - in_s |  | ||||||
|  |  | ||||||
|     ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s |  | ||||||
|     PromoteFromSubspace(PleftMss_proj,out); |  | ||||||
|  |  | ||||||
|     HermOp(out,Mtmp); |  | ||||||
|  |  | ||||||
|     ProjectToSubspace(Mtmp,PleftProj);      // Msbar s Mss^{-1} |  | ||||||
|     PromoteFromSubspace(PleftProj,tmp2); |  | ||||||
|  |  | ||||||
|     axpy(out,-1.0,tmp2,Mtmp); |  | ||||||
|     axpy(out,-1.0,out,in_sbar);     // in_sbar - Msbars Mss^{-1} in_s |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class Field> | template<class Field> | ||||||
| class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> { | class TwoLevelADEF1defl : public TwoLevelCG<Field> | ||||||
|  public: | { | ||||||
|   virtual void M(Field & in,Field & out,Field & tmp){ | public: | ||||||
|  |   const std::vector<Field> &evec; | ||||||
|  |   const std::vector<RealD> &eval; | ||||||
|    |    | ||||||
|  |   TwoLevelADEF1defl(RealD tol, | ||||||
|  | 		   Integer maxit, | ||||||
|  | 		   LinearOperatorBase<Field>   &FineLinop, | ||||||
|  | 		   LinearFunction<Field>   &Smoother, | ||||||
|  | 		   std::vector<Field> &_evec, | ||||||
|  | 		   std::vector<RealD> &_eval) :  | ||||||
|  |     TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,_evec[0].Grid()), | ||||||
|  |     evec(_evec), | ||||||
|  |     eval(_eval) | ||||||
|  |   {}; | ||||||
|  |  | ||||||
|  |   // Can just inherit existing Vout | ||||||
|  |   // Can just inherit existing M2 | ||||||
|  |   // Can just inherit existing M3 | ||||||
|  |  | ||||||
|  |   // Simple vstart - do nothing | ||||||
|  |   virtual void Vstart(Field & x,const Field & src){ x=src; }; | ||||||
|  |  | ||||||
|  |   // Override PcgM1 | ||||||
|  |   virtual void PcgM1(Field & in, Field & out) | ||||||
|  |   { | ||||||
|  |     int N=evec.size(); | ||||||
|  |     Field Pin(this->grid); | ||||||
|  |     Field Qin(this->grid); | ||||||
|  |  | ||||||
|  |     //MP  + Q = M(1-AQ) + Q = M | ||||||
|  |     // // If we are eigenvector deflating in coarse space | ||||||
|  |     // // Q   = Sum_i |phi_i> 1/lambda_i <phi_i| | ||||||
|  |     // // A Q = Sum_i |phi_i> <phi_i| | ||||||
|  |     // // M(1-AQ) = M(1-proj) + Q | ||||||
|  |     Qin.Checkerboard()=in.Checkerboard(); | ||||||
|  |     Qin = Zero(); | ||||||
|  |     Pin = in; | ||||||
|  |     for (int i=0;i<N;i++) { | ||||||
|  |       const Field& tmp = evec[i]; | ||||||
|  |       auto ip = TensorRemove(innerProduct(tmp,in)); | ||||||
|  |       axpy(Qin, ip / eval[i],tmp,Qin); | ||||||
|  |       axpy(Pin, -ip ,tmp,Pin); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     this->_Smoother(Pin,out); | ||||||
|  |  | ||||||
|  |     out = out + Qin; | ||||||
|   } |   } | ||||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){ | }; | ||||||
|  |  | ||||||
|   } | NAMESPACE_END(Grid); | ||||||
|   virtual void M2(Field & in, Field & out){ |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){ |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){ |  | ||||||
|  |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| /* |  | ||||||
| template<class Field> |  | ||||||
| class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> { |  | ||||||
|  public: |  | ||||||
|   virtual void M(Field & in,Field & out,Field & tmp);  |  | ||||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); |  | ||||||
|   virtual void M2(Field & in, Field & out); |  | ||||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); |  | ||||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class Field> |  | ||||||
| class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> { |  | ||||||
|  public: |  | ||||||
|   virtual void M(Field & in,Field & out,Field & tmp);  |  | ||||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); |  | ||||||
|   virtual void M2(Field & in, Field & out); |  | ||||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); |  | ||||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); |  | ||||||
|   virtual void   Vout  (Field & in, Field & out,Field & src,Field & tmp); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class Field> |  | ||||||
| class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> { |  | ||||||
|  public: |  | ||||||
|   virtual void M(Field & in,Field & out,Field & tmp);  |  | ||||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); |  | ||||||
|   virtual void M2(Field & in, Field & out); |  | ||||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); |  | ||||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class Field> |  | ||||||
| class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> { |  | ||||||
|  public: |  | ||||||
|   virtual void M(Field & in,Field & out,Field & tmp);  |  | ||||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); |  | ||||||
|   virtual void M2(Field & in, Field & out); |  | ||||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); |  | ||||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); |  | ||||||
| } |  | ||||||
| */ |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -183,13 +183,13 @@ public: | |||||||
| 		  << "\tTrue residual " << true_residual | 		  << "\tTrue residual " << true_residual | ||||||
| 		  << "\tTarget " << Tolerance << std::endl; | 		  << "\tTarget " << Tolerance << std::endl; | ||||||
|  |  | ||||||
|         std::cout << GridLogMessage << "Time breakdown "<<std::endl; |  | ||||||
| 	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; | 	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; | ||||||
| 	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; |         std::cout << GridLogPerformance << "Time breakdown "<<std::endl; | ||||||
| 	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; | 	std::cout << GridLogPerformance << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | ||||||
| 	std::cout << GridLogMessage << "\tInner      " << InnerTimer.Elapsed() <<std::endl; | 	std::cout << GridLogPerformance << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; | ||||||
| 	std::cout << GridLogMessage << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl; | 	std::cout << GridLogPerformance << "\tInner      " << InnerTimer.Elapsed() <<std::endl; | ||||||
| 	std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl; | 	std::cout << GridLogPerformance << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl; | ||||||
|  | 	std::cout << GridLogPerformance << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl; | ||||||
|  |  | ||||||
| 	std::cout << GridLogDebug << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl; | 	std::cout << GridLogDebug << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -465,7 +465,7 @@ until convergence | |||||||
|  |  | ||||||
|     Field& evec_k = evec[k]; |     Field& evec_k = evec[k]; | ||||||
|  |  | ||||||
|     _PolyOp(evec_k,w);    std::cout<<GridLogIRL << "PolyOp" <<std::endl; |     _PolyOp(evec_k,w);    std::cout<<GridLogDebug << "PolyOp" <<std::endl; | ||||||
|  |  | ||||||
|     if(k>0) w -= lme[k-1] * evec[k-1]; |     if(k>0) w -= lme[k-1] * evec[k-1]; | ||||||
|  |  | ||||||
| @@ -480,9 +480,9 @@ until convergence | |||||||
|     lme[k] = beta; |     lme[k] = beta; | ||||||
|  |  | ||||||
|     if ( (k>0) && ( (k % orth_period) == 0 )) { |     if ( (k>0) && ( (k % orth_period) == 0 )) { | ||||||
|       std::cout<<GridLogIRL << "Orthogonalising " <<k<<std::endl; |       std::cout<<GridLogDebug << "Orthogonalising " <<k<<std::endl; | ||||||
|       orthogonalize(w,evec,k); // orthonormalise |       orthogonalize(w,evec,k); // orthonormalise | ||||||
|       std::cout<<GridLogIRL << "Orthogonalised " <<k<<std::endl; |       std::cout<<GridLogDebug << "Orthogonalised " <<k<<std::endl; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if(k < Nm-1) evec[k+1] = w; |     if(k < Nm-1) evec[k+1] = w; | ||||||
| @@ -491,7 +491,7 @@ until convergence | |||||||
|     if ( beta < tiny )  |     if ( beta < tiny )  | ||||||
|       std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl; |       std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl; | ||||||
|  |  | ||||||
|     std::cout<<GridLogIRL << "Lanczos step complete " <<k<<std::endl; |     std::cout<<GridLogDebug << "Lanczos step complete " <<k<<std::endl; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,  |   void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,  | ||||||
|   | |||||||
| @@ -33,7 +33,7 @@ NAMESPACE_BEGIN(Grid); | |||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Take a matrix and form an NE solver calling a Herm solver | // Take a matrix and form an NE solver calling a Herm solver | ||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| template<class Field> class NormalEquations { | template<class Field> class NormalEquations : public LinearFunction<Field>{ | ||||||
| private: | private: | ||||||
|   SparseMatrixBase<Field> & _Matrix; |   SparseMatrixBase<Field> & _Matrix; | ||||||
|   OperatorFunction<Field> & _HermitianSolver; |   OperatorFunction<Field> & _HermitianSolver; | ||||||
| @@ -60,7 +60,7 @@ public: | |||||||
|   }      |   }      | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template<class Field> class HPDSolver { | template<class Field> class HPDSolver : public LinearFunction<Field> { | ||||||
| private: | private: | ||||||
|   LinearOperatorBase<Field> & _Matrix; |   LinearOperatorBase<Field> & _Matrix; | ||||||
|   OperatorFunction<Field> & _HermitianSolver; |   OperatorFunction<Field> & _HermitianSolver; | ||||||
| @@ -84,7 +84,7 @@ public: | |||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class Field> class MdagMSolver { | template<class Field> class MdagMSolver : public LinearFunction<Field> { | ||||||
| private: | private: | ||||||
|   SparseMatrixBase<Field> & _Matrix; |   SparseMatrixBase<Field> & _Matrix; | ||||||
|   OperatorFunction<Field> & _HermitianSolver; |   OperatorFunction<Field> & _HermitianSolver; | ||||||
|   | |||||||
| @@ -20,7 +20,7 @@ template<class Field> class PowerMethod | |||||||
|     RealD evalMaxApprox = 0.0;  |     RealD evalMaxApprox = 0.0;  | ||||||
|     auto src_n = src;  |     auto src_n = src;  | ||||||
|     auto tmp = src;  |     auto tmp = src;  | ||||||
|     const int _MAX_ITER_EST_ = 50;  |     const int _MAX_ITER_EST_ = 100;  | ||||||
|  |  | ||||||
|     for (int i=0;i<_MAX_ITER_EST_;i++) {  |     for (int i=0;i<_MAX_ITER_EST_;i++) {  | ||||||
|        |        | ||||||
|   | |||||||
							
								
								
									
										262
									
								
								Grid/algorithms/multigrid/Aggregates.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										262
									
								
								Grid/algorithms/multigrid/Aggregates.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,262 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/Aggregates.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||||
|  | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | template<class Fobj,class CComplex,int nbasis> | ||||||
|  | class Aggregation { | ||||||
|  | public: | ||||||
|  |   typedef iVector<CComplex,nbasis >             siteVector; | ||||||
|  |   typedef Lattice<siteVector>                 CoarseVector; | ||||||
|  |   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; | ||||||
|  |  | ||||||
|  |   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field | ||||||
|  |   typedef Lattice<Fobj >        FineField; | ||||||
|  |  | ||||||
|  |   GridBase *CoarseGrid; | ||||||
|  |   GridBase *FineGrid; | ||||||
|  |   std::vector<Lattice<Fobj> > subspace; | ||||||
|  |   int checkerboard; | ||||||
|  |   int Checkerboard(void){return checkerboard;} | ||||||
|  |   Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :  | ||||||
|  |     CoarseGrid(_CoarseGrid), | ||||||
|  |     FineGrid(_FineGrid), | ||||||
|  |     subspace(nbasis,_FineGrid), | ||||||
|  |     checkerboard(_checkerboard) | ||||||
|  |   { | ||||||
|  |   }; | ||||||
|  |    | ||||||
|  |    | ||||||
|  |   void Orthogonalise(void){ | ||||||
|  |     CoarseScalar InnerProd(CoarseGrid);  | ||||||
|  |     //    std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"<<std::endl; | ||||||
|  |     blockOrthogonalise(InnerProd,subspace); | ||||||
|  |   }  | ||||||
|  |   void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){ | ||||||
|  |     blockProject(CoarseVec,FineVec,subspace); | ||||||
|  |   } | ||||||
|  |   void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){ | ||||||
|  |     FineVec.Checkerboard() = subspace[0].Checkerboard(); | ||||||
|  |     blockPromote(CoarseVec,FineVec,subspace); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   virtual void CreateSubspaceRandom(GridParallelRNG  &RNG) { | ||||||
|  |     int nn=nbasis; | ||||||
|  |     RealD scale; | ||||||
|  |     FineField noise(FineGrid); | ||||||
|  |     for(int b=0;b<nn;b++){ | ||||||
|  |       subspace[b] = Zero(); | ||||||
|  |       gaussian(RNG,noise); | ||||||
|  |       scale = std::pow(norm2(noise),-0.5);  | ||||||
|  |       noise=noise*scale; | ||||||
|  |       subspace[b] = noise; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) | ||||||
|  |   { | ||||||
|  |  | ||||||
|  |     RealD scale; | ||||||
|  |  | ||||||
|  |     ConjugateGradient<FineField> CG(1.0e-2,100,false); | ||||||
|  |     FineField noise(FineGrid); | ||||||
|  |     FineField Mn(FineGrid); | ||||||
|  |  | ||||||
|  |     for(int b=0;b<nn;b++){ | ||||||
|  |        | ||||||
|  |       subspace[b] = Zero(); | ||||||
|  |       gaussian(RNG,noise); | ||||||
|  |       scale = std::pow(norm2(noise),-0.5);  | ||||||
|  |       noise=noise*scale; | ||||||
|  |        | ||||||
|  |       hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise   ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl; | ||||||
|  |  | ||||||
|  |       for(int i=0;i<1;i++){ | ||||||
|  |  | ||||||
|  | 	CG(hermop,noise,subspace[b]); | ||||||
|  |  | ||||||
|  | 	noise = subspace[b]; | ||||||
|  | 	scale = std::pow(norm2(noise),-0.5);  | ||||||
|  | 	noise=noise*scale; | ||||||
|  |  | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       hermop.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(Mn)<<std::endl; | ||||||
|  |       subspace[b]   = noise; | ||||||
|  |  | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit) | ||||||
|  |   // and this is the best I found | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  |   virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop, | ||||||
|  | 				       int nn, | ||||||
|  | 				       double hi, | ||||||
|  | 				       double lo, | ||||||
|  | 				       int orderfilter, | ||||||
|  | 				       int ordermin, | ||||||
|  | 				       int orderstep, | ||||||
|  | 				       double filterlo | ||||||
|  | 				       ) { | ||||||
|  |  | ||||||
|  |     RealD scale; | ||||||
|  |  | ||||||
|  |     FineField noise(FineGrid); | ||||||
|  |     FineField Mn(FineGrid); | ||||||
|  |     FineField tmp(FineGrid); | ||||||
|  |  | ||||||
|  |     // New normalised noise | ||||||
|  |     gaussian(RNG,noise); | ||||||
|  |     scale = std::pow(norm2(noise),-0.5);  | ||||||
|  |     noise=noise*scale; | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage<<" Chebyshev subspace pass-1 : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<<" Chebyshev subspace pass-2 : nbasis"<<nn<<" min " | ||||||
|  | 	      <<ordermin<<" step "<<orderstep | ||||||
|  | 	      <<" lo"<<filterlo<<std::endl; | ||||||
|  |  | ||||||
|  |     // Initial matrix element | ||||||
|  |     hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; | ||||||
|  |  | ||||||
|  |     int b =0; | ||||||
|  |     { | ||||||
|  |       // Filter | ||||||
|  |       Chebyshev<FineField> Cheb(lo,hi,orderfilter); | ||||||
|  |       Cheb(hermop,noise,Mn); | ||||||
|  |       // normalise | ||||||
|  |       scale = std::pow(norm2(Mn),-0.5); 	Mn=Mn*scale; | ||||||
|  |       subspace[b]   = Mn; | ||||||
|  |       hermop.Op(Mn,tmp);  | ||||||
|  |       std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; | ||||||
|  |       b++; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Generate a full sequence of Chebyshevs | ||||||
|  |     { | ||||||
|  |       lo=filterlo; | ||||||
|  |       noise=Mn; | ||||||
|  |  | ||||||
|  |       FineField T0(FineGrid); T0 = noise;   | ||||||
|  |       FineField T1(FineGrid);  | ||||||
|  |       FineField T2(FineGrid); | ||||||
|  |       FineField y(FineGrid); | ||||||
|  |        | ||||||
|  |       FineField *Tnm = &T0; | ||||||
|  |       FineField *Tn  = &T1; | ||||||
|  |       FineField *Tnp = &T2; | ||||||
|  |  | ||||||
|  |       // Tn=T1 = (xscale M + mscale)in | ||||||
|  |       RealD xscale = 2.0/(hi-lo); | ||||||
|  |       RealD mscale = -(hi+lo)/(hi-lo); | ||||||
|  |       hermop.HermOp(T0,y); | ||||||
|  |       T1=y*xscale+noise*mscale; | ||||||
|  |  | ||||||
|  |       for(int n=2;n<=ordermin+orderstep*(nn-2);n++){ | ||||||
|  | 	 | ||||||
|  | 	hermop.HermOp(*Tn,y); | ||||||
|  |  | ||||||
|  | 	autoView( y_v , y, AcceleratorWrite); | ||||||
|  | 	autoView( Tn_v , (*Tn), AcceleratorWrite); | ||||||
|  | 	autoView( Tnp_v , (*Tnp), AcceleratorWrite); | ||||||
|  | 	autoView( Tnm_v , (*Tnm), AcceleratorWrite); | ||||||
|  | 	const int Nsimd = CComplex::Nsimd(); | ||||||
|  | 	accelerator_for(ss, FineGrid->oSites(), Nsimd, { | ||||||
|  | 	  coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); | ||||||
|  | 	  coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss)); | ||||||
|  |         }); | ||||||
|  |  | ||||||
|  | 	// Possible more fine grained control is needed than a linear sweep, | ||||||
|  | 	// but huge productivity gain if this is simple algorithm and not a tunable | ||||||
|  | 	int m =1; | ||||||
|  | 	if ( n>=ordermin ) m=n-ordermin; | ||||||
|  | 	if ( (m%orderstep)==0 ) {  | ||||||
|  | 	  Mn=*Tnp; | ||||||
|  | 	  scale = std::pow(norm2(Mn),-0.5);         Mn=Mn*scale; | ||||||
|  | 	  subspace[b] = Mn; | ||||||
|  | 	  hermop.Op(Mn,tmp);  | ||||||
|  | 	  std::cout<<GridLogMessage << n<<" filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; | ||||||
|  | 	  b++; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// Cycle pointers to avoid copies | ||||||
|  | 	FineField *swizzle = Tnm; | ||||||
|  | 	Tnm    =Tn; | ||||||
|  | 	Tn     =Tnp; | ||||||
|  | 	Tnp    =swizzle; | ||||||
|  | 	   | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     assert(b==nn); | ||||||
|  |   } | ||||||
|  |   virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop, | ||||||
|  | 				       int nn, | ||||||
|  | 				       double hi, | ||||||
|  | 				       double lo, | ||||||
|  | 				       int orderfilter | ||||||
|  | 				       ) { | ||||||
|  |  | ||||||
|  |     RealD scale; | ||||||
|  |  | ||||||
|  |     FineField noise(FineGrid); | ||||||
|  |     FineField Mn(FineGrid); | ||||||
|  |     FineField tmp(FineGrid); | ||||||
|  |  | ||||||
|  |     // New normalised noise | ||||||
|  |     std::cout << GridLogMessage<<" Chebyshev subspace pure noise : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<<" Chebyshev subspace pure noise  : nbasis "<<nn<<std::endl; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     for(int b =0;b<nbasis;b++) | ||||||
|  |     { | ||||||
|  |       gaussian(RNG,noise); | ||||||
|  |       scale = std::pow(norm2(noise),-0.5);  | ||||||
|  |       noise=noise*scale; | ||||||
|  |  | ||||||
|  |       // Initial matrix element | ||||||
|  |       hermop.Op(noise,Mn); | ||||||
|  |       if(b==0) std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; | ||||||
|  |       // Filter | ||||||
|  |       Chebyshev<FineField> Cheb(lo,hi,orderfilter); | ||||||
|  |       Cheb(hermop,noise,Mn); | ||||||
|  |       // normalise | ||||||
|  |       scale = std::pow(norm2(Mn),-0.5); 	Mn=Mn*scale; | ||||||
|  |       subspace[b]   = Mn; | ||||||
|  |       hermop.Op(Mn,tmp);  | ||||||
|  |       std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | }; | ||||||
|  | NAMESPACE_END(Grid); | ||||||
							
								
								
									
										814
									
								
								Grid/algorithms/multigrid/CoarsenedMatrix.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										814
									
								
								Grid/algorithms/multigrid/CoarsenedMatrix.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,814 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/CoarsenedMatrix.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||||
|  | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #ifndef  GRID_ALGORITHM_COARSENED_MATRIX_H | ||||||
|  | #define  GRID_ALGORITHM_COARSENED_MATRIX_H | ||||||
|  |  | ||||||
|  | #include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No) | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | template<class vobj,class CComplex> | ||||||
|  | inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner, | ||||||
|  | 				    const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask, | ||||||
|  | 				    const Lattice<vobj> &fineX, | ||||||
|  | 				    const Lattice<vobj> &fineY) | ||||||
|  | { | ||||||
|  |   typedef decltype(innerProduct(vobj(),vobj())) dotp; | ||||||
|  |  | ||||||
|  |   GridBase *coarse(CoarseInner.Grid()); | ||||||
|  |   GridBase *fine  (fineX.Grid()); | ||||||
|  |  | ||||||
|  |   Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard(); | ||||||
|  |   Lattice<dotp> fine_inner_msk(fine); | ||||||
|  |  | ||||||
|  |   // Multiply could be fused with innerProduct | ||||||
|  |   // Single block sum kernel could do both masks. | ||||||
|  |   fine_inner = localInnerProduct(fineX,fineY); | ||||||
|  |   mult(fine_inner_msk, fine_inner,FineMask); | ||||||
|  |   blockSum(CoarseInner,fine_inner_msk); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Fine Object == (per site) type of fine field | ||||||
|  | // nbasis      == number of deflation vectors | ||||||
|  | template<class Fobj,class CComplex,int nbasis> | ||||||
|  | class CoarsenedMatrix : public CheckerBoardedSparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  { | ||||||
|  | public: | ||||||
|  |      | ||||||
|  |   typedef iVector<CComplex,nbasis >           siteVector; | ||||||
|  |   typedef Lattice<CComplex >                  CoarseComplexField; | ||||||
|  |   typedef Lattice<siteVector>                 CoarseVector; | ||||||
|  |   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; | ||||||
|  |   typedef iMatrix<CComplex,nbasis >  Cobj; | ||||||
|  |   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field | ||||||
|  |   typedef Lattice<Fobj >        FineField; | ||||||
|  |   typedef CoarseVector FermionField; | ||||||
|  |  | ||||||
|  |   // enrich interface, use default implementation as in FermionOperator /////// | ||||||
|  |   void Dminus(CoarseVector const& in, CoarseVector& out) { out = in; } | ||||||
|  |   void DminusDag(CoarseVector const& in, CoarseVector& out) { out = in; } | ||||||
|  |   void ImportPhysicalFermionSource(CoarseVector const& input, CoarseVector& imported) { imported = input; } | ||||||
|  |   void ImportUnphysicalFermion(CoarseVector const& input, CoarseVector& imported) { imported = input; } | ||||||
|  |   void ExportPhysicalFermionSolution(CoarseVector const& solution, CoarseVector& exported) { exported = solution; }; | ||||||
|  |   void ExportPhysicalFermionSource(CoarseVector const& solution, CoarseVector& exported) { exported = solution; }; | ||||||
|  |  | ||||||
|  |   //////////////////// | ||||||
|  |   // Data members | ||||||
|  |   //////////////////// | ||||||
|  |   Geometry         geom; | ||||||
|  |   GridBase *       _grid;  | ||||||
|  |   GridBase*        _cbgrid; | ||||||
|  |   int hermitian; | ||||||
|  |  | ||||||
|  |   CartesianStencil<siteVector,siteVector,DefaultImplParams> Stencil;  | ||||||
|  |   CartesianStencil<siteVector,siteVector,DefaultImplParams> StencilEven; | ||||||
|  |   CartesianStencil<siteVector,siteVector,DefaultImplParams> StencilOdd; | ||||||
|  |  | ||||||
|  |   std::vector<CoarseMatrix> A; | ||||||
|  |   std::vector<CoarseMatrix> Aeven; | ||||||
|  |   std::vector<CoarseMatrix> Aodd; | ||||||
|  |  | ||||||
|  |   CoarseMatrix AselfInv; | ||||||
|  |   CoarseMatrix AselfInvEven; | ||||||
|  |   CoarseMatrix AselfInvOdd; | ||||||
|  |  | ||||||
|  |   Vector<RealD> dag_factor; | ||||||
|  |  | ||||||
|  |   /////////////////////// | ||||||
|  |   // Interface | ||||||
|  |   /////////////////////// | ||||||
|  |   GridBase * Grid(void)         { return _grid; };   // this is all the linalg routines need to know | ||||||
|  |   GridBase * RedBlackGrid()     { return _cbgrid; }; | ||||||
|  |  | ||||||
|  |   int ConstEE() { return 0; } | ||||||
|  |  | ||||||
|  |   void M (const CoarseVector &in, CoarseVector &out) | ||||||
|  |   { | ||||||
|  |     conformable(_grid,in.Grid()); | ||||||
|  |     conformable(in.Grid(),out.Grid()); | ||||||
|  |     out.Checkerboard() = in.Checkerboard(); | ||||||
|  |  | ||||||
|  |     SimpleCompressor<siteVector> compressor; | ||||||
|  |  | ||||||
|  |     Stencil.HaloExchange(in,compressor); | ||||||
|  |     autoView( in_v , in, AcceleratorRead); | ||||||
|  |     autoView( out_v , out, AcceleratorWrite); | ||||||
|  |     autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||||
|  |     int npoint = geom.npoint; | ||||||
|  |     typedef LatticeView<Cobj> Aview; | ||||||
|  |        | ||||||
|  |     Vector<Aview> AcceleratorViewContainer; | ||||||
|  |    | ||||||
|  |     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View(AcceleratorRead)); | ||||||
|  |     Aview *Aview_p = & AcceleratorViewContainer[0]; | ||||||
|  |  | ||||||
|  |     const int Nsimd = CComplex::Nsimd(); | ||||||
|  |     typedef decltype(coalescedRead(in_v[0])) calcVector; | ||||||
|  |     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; | ||||||
|  |  | ||||||
|  |     int osites=Grid()->oSites(); | ||||||
|  |  | ||||||
|  |     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { | ||||||
|  |       int ss = sss/nbasis; | ||||||
|  |       int b  = sss%nbasis; | ||||||
|  |       calcComplex res = Zero(); | ||||||
|  |       calcVector nbr; | ||||||
|  |       int ptype; | ||||||
|  |       StencilEntry *SE; | ||||||
|  |  | ||||||
|  |       for(int point=0;point<npoint;point++){ | ||||||
|  |  | ||||||
|  | 	SE=Stencil_v.GetEntry(ptype,point,ss); | ||||||
|  | 	   | ||||||
|  | 	if(SE->_is_local) {  | ||||||
|  | 	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||||
|  | 	} else { | ||||||
|  | 	  nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); | ||||||
|  | 	} | ||||||
|  | 	acceleratorSynchronise(); | ||||||
|  |  | ||||||
|  | 	for(int bb=0;bb<nbasis;bb++) { | ||||||
|  | 	  res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); | ||||||
|  | 	} | ||||||
|  |       } | ||||||
|  |       coalescedWrite(out_v[ss](b),res); | ||||||
|  |       }); | ||||||
|  |  | ||||||
|  |     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose(); | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   void Mdag (const CoarseVector &in, CoarseVector &out) | ||||||
|  |   { | ||||||
|  |     if(hermitian) { | ||||||
|  |       // corresponds to Petrov-Galerkin coarsening | ||||||
|  |       return M(in,out); | ||||||
|  |     } else { | ||||||
|  |       // corresponds to Galerkin coarsening | ||||||
|  |       return MdagNonHermitian(in, out); | ||||||
|  |     } | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   void MdagNonHermitian(const CoarseVector &in, CoarseVector &out) | ||||||
|  |   { | ||||||
|  |     conformable(_grid,in.Grid()); | ||||||
|  |     conformable(in.Grid(),out.Grid()); | ||||||
|  |     out.Checkerboard() = in.Checkerboard(); | ||||||
|  |  | ||||||
|  |     SimpleCompressor<siteVector> compressor; | ||||||
|  |  | ||||||
|  |     Stencil.HaloExchange(in,compressor); | ||||||
|  |     autoView( in_v , in, AcceleratorRead); | ||||||
|  |     autoView( out_v , out, AcceleratorWrite); | ||||||
|  |     autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||||
|  |     int npoint = geom.npoint; | ||||||
|  |     typedef LatticeView<Cobj> Aview; | ||||||
|  |  | ||||||
|  |     Vector<Aview> AcceleratorViewContainer; | ||||||
|  |  | ||||||
|  |     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View(AcceleratorRead)); | ||||||
|  |     Aview *Aview_p = & AcceleratorViewContainer[0]; | ||||||
|  |  | ||||||
|  |     const int Nsimd = CComplex::Nsimd(); | ||||||
|  |     typedef decltype(coalescedRead(in_v[0])) calcVector; | ||||||
|  |     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; | ||||||
|  |  | ||||||
|  |     int osites=Grid()->oSites(); | ||||||
|  |  | ||||||
|  |     Vector<int> points(geom.npoint, 0); | ||||||
|  |     for(int p=0; p<geom.npoint; p++) | ||||||
|  |       points[p] = geom.points_dagger[p]; | ||||||
|  |  | ||||||
|  |     auto points_p = &points[0]; | ||||||
|  |  | ||||||
|  |     RealD* dag_factor_p = &dag_factor[0]; | ||||||
|  |  | ||||||
|  |     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { | ||||||
|  |       int ss = sss/nbasis; | ||||||
|  |       int b  = sss%nbasis; | ||||||
|  |       calcComplex res = Zero(); | ||||||
|  |       calcVector nbr; | ||||||
|  |       int ptype; | ||||||
|  |       StencilEntry *SE; | ||||||
|  |  | ||||||
|  |       for(int p=0;p<npoint;p++){ | ||||||
|  |         int point = points_p[p]; | ||||||
|  |  | ||||||
|  | 	SE=Stencil_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
|  | 	if(SE->_is_local) { | ||||||
|  | 	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||||
|  | 	} else { | ||||||
|  | 	  nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); | ||||||
|  | 	} | ||||||
|  | 	acceleratorSynchronise(); | ||||||
|  |  | ||||||
|  | 	for(int bb=0;bb<nbasis;bb++) { | ||||||
|  | 	  res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); | ||||||
|  | 	} | ||||||
|  |       } | ||||||
|  |       coalescedWrite(out_v[ss](b),res); | ||||||
|  |       }); | ||||||
|  |  | ||||||
|  |     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MdirComms(const CoarseVector &in) | ||||||
|  |   { | ||||||
|  |     SimpleCompressor<siteVector> compressor; | ||||||
|  |     Stencil.HaloExchange(in,compressor); | ||||||
|  |   } | ||||||
|  |   void MdirCalc(const CoarseVector &in, CoarseVector &out, int point) | ||||||
|  |   { | ||||||
|  |     conformable(_grid,in.Grid()); | ||||||
|  |     conformable(_grid,out.Grid()); | ||||||
|  |     out.Checkerboard() = in.Checkerboard(); | ||||||
|  |  | ||||||
|  |     typedef LatticeView<Cobj> Aview; | ||||||
|  |     Vector<Aview> AcceleratorViewContainer; | ||||||
|  |     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View(AcceleratorRead)); | ||||||
|  |     Aview *Aview_p = & AcceleratorViewContainer[0]; | ||||||
|  |  | ||||||
|  |     autoView( out_v , out, AcceleratorWrite); | ||||||
|  |     autoView( in_v  , in, AcceleratorRead); | ||||||
|  |     autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||||
|  |  | ||||||
|  |     const int Nsimd = CComplex::Nsimd(); | ||||||
|  |     typedef decltype(coalescedRead(in_v[0])) calcVector; | ||||||
|  |     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; | ||||||
|  |  | ||||||
|  |     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { | ||||||
|  |       int ss = sss/nbasis; | ||||||
|  |       int b  = sss%nbasis; | ||||||
|  |       calcComplex res = Zero(); | ||||||
|  |       calcVector nbr; | ||||||
|  |       int ptype; | ||||||
|  |       StencilEntry *SE; | ||||||
|  |  | ||||||
|  |       SE=Stencil_v.GetEntry(ptype,point,ss); | ||||||
|  | 	   | ||||||
|  |       if(SE->_is_local) {  | ||||||
|  | 	nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||||
|  |       } else { | ||||||
|  | 	nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); | ||||||
|  |       } | ||||||
|  |       acceleratorSynchronise(); | ||||||
|  |  | ||||||
|  |       for(int bb=0;bb<nbasis;bb++) { | ||||||
|  | 	res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); | ||||||
|  |       } | ||||||
|  |       coalescedWrite(out_v[ss](b),res); | ||||||
|  |     }); | ||||||
|  |     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose(); | ||||||
|  |   } | ||||||
|  |   void MdirAll(const CoarseVector &in,std::vector<CoarseVector> &out) | ||||||
|  |   { | ||||||
|  |     this->MdirComms(in); | ||||||
|  |     int ndir=geom.npoint-1; | ||||||
|  |     if ((out.size()!=ndir)&&(out.size()!=ndir+1)) {  | ||||||
|  |       std::cout <<"MdirAll out size "<< out.size()<<std::endl; | ||||||
|  |       std::cout <<"MdirAll ndir "<< ndir<<std::endl; | ||||||
|  |       assert(0); | ||||||
|  |     } | ||||||
|  |     for(int p=0;p<ndir;p++){ | ||||||
|  |       MdirCalc(in,out[p],p); | ||||||
|  |     } | ||||||
|  |   }; | ||||||
|  |   void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){ | ||||||
|  |  | ||||||
|  |     this->MdirComms(in); | ||||||
|  |  | ||||||
|  |     MdirCalc(in,out,geom.point(dir,disp)); | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   void Mdiag(const CoarseVector &in, CoarseVector &out) | ||||||
|  |   { | ||||||
|  |     int point=geom.npoint-1; | ||||||
|  |     MdirCalc(in, out, point); // No comms | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   void Mooee(const CoarseVector &in, CoarseVector &out) { | ||||||
|  |     MooeeInternal(in, out, DaggerNo, InverseNo); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MooeeInv(const CoarseVector &in, CoarseVector &out) { | ||||||
|  |     MooeeInternal(in, out, DaggerNo, InverseYes); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MooeeDag(const CoarseVector &in, CoarseVector &out) { | ||||||
|  |     MooeeInternal(in, out, DaggerYes, InverseNo); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MooeeInvDag(const CoarseVector &in, CoarseVector &out) { | ||||||
|  |     MooeeInternal(in, out, DaggerYes, InverseYes); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void Meooe(const CoarseVector &in, CoarseVector &out) { | ||||||
|  |     if(in.Checkerboard() == Odd) { | ||||||
|  |       DhopEO(in, out, DaggerNo); | ||||||
|  |     } else { | ||||||
|  |       DhopOE(in, out, DaggerNo); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MeooeDag(const CoarseVector &in, CoarseVector &out) { | ||||||
|  |     if(in.Checkerboard() == Odd) { | ||||||
|  |       DhopEO(in, out, DaggerYes); | ||||||
|  |     } else { | ||||||
|  |       DhopOE(in, out, DaggerYes); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void Dhop(const CoarseVector &in, CoarseVector &out, int dag) { | ||||||
|  |     conformable(in.Grid(), _grid); // verifies full grid | ||||||
|  |     conformable(in.Grid(), out.Grid()); | ||||||
|  |  | ||||||
|  |     out.Checkerboard() = in.Checkerboard(); | ||||||
|  |  | ||||||
|  |     DhopInternal(Stencil, A, in, out, dag); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void DhopOE(const CoarseVector &in, CoarseVector &out, int dag) { | ||||||
|  |     conformable(in.Grid(), _cbgrid);    // verifies half grid | ||||||
|  |     conformable(in.Grid(), out.Grid()); // drops the cb check | ||||||
|  |  | ||||||
|  |     assert(in.Checkerboard() == Even); | ||||||
|  |     out.Checkerboard() = Odd; | ||||||
|  |  | ||||||
|  |     DhopInternal(StencilEven, Aodd, in, out, dag); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void DhopEO(const CoarseVector &in, CoarseVector &out, int dag) { | ||||||
|  |     conformable(in.Grid(), _cbgrid);    // verifies half grid | ||||||
|  |     conformable(in.Grid(), out.Grid()); // drops the cb check | ||||||
|  |  | ||||||
|  |     assert(in.Checkerboard() == Odd); | ||||||
|  |     out.Checkerboard() = Even; | ||||||
|  |  | ||||||
|  |     DhopInternal(StencilOdd, Aeven, in, out, dag); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MooeeInternal(const CoarseVector &in, CoarseVector &out, int dag, int inv) { | ||||||
|  |     out.Checkerboard() = in.Checkerboard(); | ||||||
|  |     assert(in.Checkerboard() == Odd || in.Checkerboard() == Even); | ||||||
|  |  | ||||||
|  |     CoarseMatrix *Aself = nullptr; | ||||||
|  |     if(in.Grid()->_isCheckerBoarded) { | ||||||
|  |       if(in.Checkerboard() == Odd) { | ||||||
|  |         Aself = (inv) ? &AselfInvOdd : &Aodd[geom.npoint-1]; | ||||||
|  |         DselfInternal(StencilOdd, *Aself, in, out, dag); | ||||||
|  |       } else { | ||||||
|  |         Aself = (inv) ? &AselfInvEven : &Aeven[geom.npoint-1]; | ||||||
|  |         DselfInternal(StencilEven, *Aself, in, out, dag); | ||||||
|  |       } | ||||||
|  |     } else { | ||||||
|  |       Aself = (inv) ? &AselfInv : &A[geom.npoint-1]; | ||||||
|  |       DselfInternal(Stencil, *Aself, in, out, dag); | ||||||
|  |     } | ||||||
|  |     assert(Aself != nullptr); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void DselfInternal(CartesianStencil<siteVector,siteVector,DefaultImplParams> &st, CoarseMatrix &a, | ||||||
|  |                        const CoarseVector &in, CoarseVector &out, int dag) { | ||||||
|  |     int point = geom.npoint-1; | ||||||
|  |     autoView( out_v, out, AcceleratorWrite); | ||||||
|  |     autoView( in_v,  in,  AcceleratorRead); | ||||||
|  |     autoView( st_v,  st,  AcceleratorRead); | ||||||
|  |     autoView( a_v,   a,   AcceleratorRead); | ||||||
|  |  | ||||||
|  |     const int Nsimd = CComplex::Nsimd(); | ||||||
|  |     typedef decltype(coalescedRead(in_v[0])) calcVector; | ||||||
|  |     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; | ||||||
|  |  | ||||||
|  |     RealD* dag_factor_p = &dag_factor[0]; | ||||||
|  |  | ||||||
|  |     if(dag) { | ||||||
|  |       accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { | ||||||
|  |         int ss = sss/nbasis; | ||||||
|  |         int b  = sss%nbasis; | ||||||
|  |         calcComplex res = Zero(); | ||||||
|  |         calcVector nbr; | ||||||
|  |         int ptype; | ||||||
|  |         StencilEntry *SE; | ||||||
|  |  | ||||||
|  |         SE=st_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
|  |         if(SE->_is_local) { | ||||||
|  |           nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||||
|  |         } else { | ||||||
|  |           nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); | ||||||
|  |         } | ||||||
|  |         acceleratorSynchronise(); | ||||||
|  |  | ||||||
|  |         for(int bb=0;bb<nbasis;bb++) { | ||||||
|  |           res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(a_v[ss](b,bb))*nbr(bb); | ||||||
|  |         } | ||||||
|  |         coalescedWrite(out_v[ss](b),res); | ||||||
|  |       }); | ||||||
|  |     } else { | ||||||
|  |       accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { | ||||||
|  |         int ss = sss/nbasis; | ||||||
|  |         int b  = sss%nbasis; | ||||||
|  |         calcComplex res = Zero(); | ||||||
|  |         calcVector nbr; | ||||||
|  |         int ptype; | ||||||
|  |         StencilEntry *SE; | ||||||
|  |  | ||||||
|  |         SE=st_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
|  |         if(SE->_is_local) { | ||||||
|  |           nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||||
|  |         } else { | ||||||
|  |           nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); | ||||||
|  |         } | ||||||
|  |         acceleratorSynchronise(); | ||||||
|  |  | ||||||
|  |         for(int bb=0;bb<nbasis;bb++) { | ||||||
|  |           res = res + coalescedRead(a_v[ss](b,bb))*nbr(bb); | ||||||
|  |         } | ||||||
|  |         coalescedWrite(out_v[ss](b),res); | ||||||
|  |       }); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void DhopInternal(CartesianStencil<siteVector,siteVector,DefaultImplParams> &st, std::vector<CoarseMatrix> &a, | ||||||
|  |                     const CoarseVector &in, CoarseVector &out, int dag) { | ||||||
|  |     SimpleCompressor<siteVector> compressor; | ||||||
|  |  | ||||||
|  |     st.HaloExchange(in,compressor); | ||||||
|  |     autoView( in_v,  in,  AcceleratorRead); | ||||||
|  |     autoView( out_v, out, AcceleratorWrite); | ||||||
|  |     autoView( st_v , st,  AcceleratorRead); | ||||||
|  |     typedef LatticeView<Cobj> Aview; | ||||||
|  |  | ||||||
|  |     // determine in what order we need the points | ||||||
|  |     int npoint = geom.npoint-1; | ||||||
|  |     Vector<int> points(npoint, 0); | ||||||
|  |     for(int p=0; p<npoint; p++) | ||||||
|  |       points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p; | ||||||
|  |  | ||||||
|  |     auto points_p = &points[0]; | ||||||
|  |  | ||||||
|  |     Vector<Aview> AcceleratorViewContainer; | ||||||
|  |     for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead)); | ||||||
|  |     Aview *Aview_p = & AcceleratorViewContainer[0]; | ||||||
|  |  | ||||||
|  |     const int Nsimd = CComplex::Nsimd(); | ||||||
|  |     typedef decltype(coalescedRead(in_v[0])) calcVector; | ||||||
|  |     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; | ||||||
|  |  | ||||||
|  |     RealD* dag_factor_p = &dag_factor[0]; | ||||||
|  |  | ||||||
|  |     if(dag) { | ||||||
|  |       accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { | ||||||
|  |         int ss = sss/nbasis; | ||||||
|  |         int b  = sss%nbasis; | ||||||
|  |         calcComplex res = Zero(); | ||||||
|  |         calcVector nbr; | ||||||
|  |         int ptype; | ||||||
|  |         StencilEntry *SE; | ||||||
|  |  | ||||||
|  |         for(int p=0;p<npoint;p++){ | ||||||
|  |           int point = points_p[p]; | ||||||
|  |           SE=st_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
|  |           if(SE->_is_local) { | ||||||
|  |             nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||||
|  |           } else { | ||||||
|  |             nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); | ||||||
|  |           } | ||||||
|  |           acceleratorSynchronise(); | ||||||
|  |  | ||||||
|  |           for(int bb=0;bb<nbasis;bb++) { | ||||||
|  |             res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |         coalescedWrite(out_v[ss](b),res); | ||||||
|  |       }); | ||||||
|  |     } else { | ||||||
|  |       accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { | ||||||
|  |         int ss = sss/nbasis; | ||||||
|  |         int b  = sss%nbasis; | ||||||
|  |         calcComplex res = Zero(); | ||||||
|  |         calcVector nbr; | ||||||
|  |         int ptype; | ||||||
|  |         StencilEntry *SE; | ||||||
|  |  | ||||||
|  |         for(int p=0;p<npoint;p++){ | ||||||
|  |           int point = points_p[p]; | ||||||
|  |           SE=st_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
|  |           if(SE->_is_local) { | ||||||
|  |             nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||||
|  |           } else { | ||||||
|  |             nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); | ||||||
|  |           } | ||||||
|  |           acceleratorSynchronise(); | ||||||
|  |  | ||||||
|  |           for(int bb=0;bb<nbasis;bb++) { | ||||||
|  |             res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |         coalescedWrite(out_v[ss](b),res); | ||||||
|  |       }); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     for(int p=0;p<npoint;p++) AcceleratorViewContainer[p].ViewClose(); | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   CoarsenedMatrix(GridCartesian &CoarseGrid, int hermitian_=0) 	: | ||||||
|  |     _grid(&CoarseGrid), | ||||||
|  |     _cbgrid(new GridRedBlackCartesian(&CoarseGrid)), | ||||||
|  |     geom(CoarseGrid._ndimension), | ||||||
|  |     hermitian(hermitian_), | ||||||
|  |     Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements), | ||||||
|  |     StencilEven(_cbgrid,geom.npoint,Even,geom.directions,geom.displacements), | ||||||
|  |     StencilOdd(_cbgrid,geom.npoint,Odd,geom.directions,geom.displacements), | ||||||
|  |     A(geom.npoint,&CoarseGrid), | ||||||
|  |     Aeven(geom.npoint,_cbgrid), | ||||||
|  |     Aodd(geom.npoint,_cbgrid), | ||||||
|  |     AselfInv(&CoarseGrid), | ||||||
|  |     AselfInvEven(_cbgrid), | ||||||
|  |     AselfInvOdd(_cbgrid), | ||||||
|  |     dag_factor(nbasis*nbasis) | ||||||
|  |   { | ||||||
|  |     fillFactor(); | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   CoarsenedMatrix(GridCartesian &CoarseGrid, GridRedBlackCartesian &CoarseRBGrid, int hermitian_=0) 	: | ||||||
|  |  | ||||||
|  |     _grid(&CoarseGrid), | ||||||
|  |     _cbgrid(&CoarseRBGrid), | ||||||
|  |     geom(CoarseGrid._ndimension), | ||||||
|  |     hermitian(hermitian_), | ||||||
|  |     Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements), | ||||||
|  |     StencilEven(&CoarseRBGrid,geom.npoint,Even,geom.directions,geom.displacements), | ||||||
|  |     StencilOdd(&CoarseRBGrid,geom.npoint,Odd,geom.directions,geom.displacements), | ||||||
|  |     A(geom.npoint,&CoarseGrid), | ||||||
|  |     Aeven(geom.npoint,&CoarseRBGrid), | ||||||
|  |     Aodd(geom.npoint,&CoarseRBGrid), | ||||||
|  |     AselfInv(&CoarseGrid), | ||||||
|  |     AselfInvEven(&CoarseRBGrid), | ||||||
|  |     AselfInvOdd(&CoarseRBGrid), | ||||||
|  |     dag_factor(nbasis*nbasis) | ||||||
|  |   { | ||||||
|  |     fillFactor(); | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   void fillFactor() { | ||||||
|  |     Eigen::MatrixXd dag_factor_eigen = Eigen::MatrixXd::Ones(nbasis, nbasis); | ||||||
|  |     if(!hermitian) { | ||||||
|  |       const int nb = nbasis/2; | ||||||
|  |       dag_factor_eigen.block(0,nb,nb,nb) *= -1.0; | ||||||
|  |       dag_factor_eigen.block(nb,0,nb,nb) *= -1.0; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // GPU readable prefactor | ||||||
|  |     thread_for(i, nbasis*nbasis, { | ||||||
|  |       int j = i/nbasis; | ||||||
|  |       int k = i%nbasis; | ||||||
|  |       dag_factor[i] = dag_factor_eigen(j, k); | ||||||
|  |     }); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop, | ||||||
|  | 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) | ||||||
|  |   { | ||||||
|  |     typedef Lattice<typename Fobj::tensor_reduced> FineComplexField; | ||||||
|  |     typedef typename Fobj::scalar_type scalar_type; | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage<< "CoarsenMatrix "<< std::endl; | ||||||
|  |  | ||||||
|  |     FineComplexField one(FineGrid); one=scalar_type(1.0,0.0); | ||||||
|  |     FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0); | ||||||
|  |  | ||||||
|  |     std::vector<FineComplexField> masks(geom.npoint,FineGrid); | ||||||
|  |     FineComplexField imask(FineGrid); // contributions from within this block | ||||||
|  |     FineComplexField omask(FineGrid); // contributions from outwith this block | ||||||
|  |  | ||||||
|  |     FineComplexField evenmask(FineGrid); | ||||||
|  |     FineComplexField oddmask(FineGrid);  | ||||||
|  |  | ||||||
|  |     FineField     phi(FineGrid); | ||||||
|  |     FineField     tmp(FineGrid); | ||||||
|  |     FineField     zz(FineGrid); zz=Zero(); | ||||||
|  |     FineField    Mphi(FineGrid); | ||||||
|  |     FineField    Mphie(FineGrid); | ||||||
|  |     FineField    Mphio(FineGrid); | ||||||
|  |     std::vector<FineField>     Mphi_p(geom.npoint,FineGrid); | ||||||
|  |  | ||||||
|  |     Lattice<iScalar<vInteger> > coor (FineGrid); | ||||||
|  |     Lattice<iScalar<vInteger> > bcoor(FineGrid); | ||||||
|  |     Lattice<iScalar<vInteger> > bcb  (FineGrid); bcb = Zero(); | ||||||
|  |  | ||||||
|  |     CoarseVector iProj(Grid());  | ||||||
|  |     CoarseVector oProj(Grid());  | ||||||
|  |     CoarseVector SelfProj(Grid());  | ||||||
|  |     CoarseComplexField iZProj(Grid());  | ||||||
|  |     CoarseComplexField oZProj(Grid());  | ||||||
|  |  | ||||||
|  |     CoarseScalar InnerProd(Grid());  | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage<< "CoarsenMatrix Orthog "<< std::endl; | ||||||
|  |     // Orthogonalise the subblocks over the basis | ||||||
|  |     blockOrthogonalise(InnerProd,Subspace.subspace); | ||||||
|  |  | ||||||
|  |     // Compute the matrix elements of linop between this orthonormal | ||||||
|  |     // set of vectors. | ||||||
|  |     std::cout << GridLogMessage<< "CoarsenMatrix masks "<< std::endl; | ||||||
|  |     int self_stencil=-1; | ||||||
|  |     for(int p=0;p<geom.npoint;p++) | ||||||
|  |     {  | ||||||
|  |       int dir   = geom.directions[p]; | ||||||
|  |       int disp  = geom.displacements[p]; | ||||||
|  |       A[p]=Zero(); | ||||||
|  |       if( geom.displacements[p]==0){ | ||||||
|  | 	self_stencil=p; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]); | ||||||
|  |  | ||||||
|  |       LatticeCoordinate(coor,dir); | ||||||
|  |  | ||||||
|  |       /////////////////////////////////////////////////////// | ||||||
|  |       // Work out even and odd block checkerboarding for fast diagonal term | ||||||
|  |       /////////////////////////////////////////////////////// | ||||||
|  |       if ( disp==1 ) { | ||||||
|  | 	bcb   = bcb + div(coor,block); | ||||||
|  |       } | ||||||
|  | 	 | ||||||
|  |       if ( disp==0 ) { | ||||||
|  | 	  masks[p]= Zero(); | ||||||
|  |       } else if ( disp==1 ) { | ||||||
|  | 	masks[p] = where(mod(coor,block)==(block-1),one,zero); | ||||||
|  |       } else if ( disp==-1 ) { | ||||||
|  | 	masks[p] = where(mod(coor,block)==(Integer)0,one,zero); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     evenmask = where(mod(bcb,2)==(Integer)0,one,zero); | ||||||
|  |     oddmask  = one-evenmask; | ||||||
|  |  | ||||||
|  |     assert(self_stencil!=-1); | ||||||
|  |  | ||||||
|  |     for(int i=0;i<nbasis;i++){ | ||||||
|  |  | ||||||
|  |       phi=Subspace.subspace[i]; | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl; | ||||||
|  |       linop.OpDirAll(phi,Mphi_p); | ||||||
|  |       linop.OpDiag  (phi,Mphi_p[geom.npoint-1]); | ||||||
|  |  | ||||||
|  |       for(int p=0;p<geom.npoint;p++){  | ||||||
|  |  | ||||||
|  | 	Mphi = Mphi_p[p]; | ||||||
|  |  | ||||||
|  | 	int dir   = geom.directions[p]; | ||||||
|  | 	int disp  = geom.displacements[p]; | ||||||
|  |  | ||||||
|  | 	if ( (disp==-1) || (!hermitian ) ) { | ||||||
|  |  | ||||||
|  | 	  //////////////////////////////////////////////////////////////////////// | ||||||
|  | 	  // Pick out contributions coming from this cell and neighbour cell | ||||||
|  | 	  //////////////////////////////////////////////////////////////////////// | ||||||
|  | 	  omask = masks[p]; | ||||||
|  | 	  imask = one-omask; | ||||||
|  | 	 | ||||||
|  | 	  for(int j=0;j<nbasis;j++){ | ||||||
|  | 	     | ||||||
|  | 	    blockMaskedInnerProduct(oZProj,omask,Subspace.subspace[j],Mphi); | ||||||
|  | 	     | ||||||
|  | 	    autoView( iZProj_v , iZProj, AcceleratorRead) ; | ||||||
|  | 	    autoView( oZProj_v , oZProj, AcceleratorRead) ; | ||||||
|  | 	    autoView( A_p     ,  A[p], AcceleratorWrite); | ||||||
|  | 	    autoView( A_self  , A[self_stencil], AcceleratorWrite); | ||||||
|  |  | ||||||
|  | 	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); | ||||||
|  | 	    if ( hermitian && (disp==-1) ) { | ||||||
|  | 	      for(int pp=0;pp<geom.npoint;pp++){// Find the opposite link and set <j|A|i> = <i|A|j>* | ||||||
|  | 		int dirp   = geom.directions[pp]; | ||||||
|  | 		int dispp  = geom.displacements[pp]; | ||||||
|  | 		if ( (dirp==dir) && (dispp==1) ){ | ||||||
|  | 		  auto sft = conjugate(Cshift(oZProj,dir,1)); | ||||||
|  | 		  autoView( sft_v    ,  sft  , AcceleratorWrite); | ||||||
|  | 		  autoView( A_pp     ,  A[pp], AcceleratorWrite); | ||||||
|  | 		  accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); }); | ||||||
|  | 		} | ||||||
|  | 	      } | ||||||
|  | 	    } | ||||||
|  |  | ||||||
|  | 	  } | ||||||
|  | 	} | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       /////////////////////////////////////////// | ||||||
|  |       // Faster alternate self coupling.. use hermiticity to save 2x | ||||||
|  |       /////////////////////////////////////////// | ||||||
|  |       { | ||||||
|  | 	mult(tmp,phi,evenmask);  linop.Op(tmp,Mphie); | ||||||
|  | 	mult(tmp,phi,oddmask );  linop.Op(tmp,Mphio); | ||||||
|  |  | ||||||
|  | 	{ | ||||||
|  | 	  autoView( tmp_      , tmp, AcceleratorWrite); | ||||||
|  | 	  autoView( evenmask_ , evenmask, AcceleratorRead); | ||||||
|  | 	  autoView( oddmask_  ,  oddmask, AcceleratorRead); | ||||||
|  | 	  autoView( Mphie_    ,  Mphie, AcceleratorRead); | ||||||
|  | 	  autoView( Mphio_    ,  Mphio, AcceleratorRead); | ||||||
|  | 	  accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{  | ||||||
|  | 	      coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss)); | ||||||
|  | 	    }); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	blockProject(SelfProj,tmp,Subspace.subspace); | ||||||
|  |  | ||||||
|  | 	autoView( SelfProj_ , SelfProj, AcceleratorRead); | ||||||
|  | 	autoView( A_self  , A[self_stencil], AcceleratorWrite); | ||||||
|  |  | ||||||
|  | 	accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ | ||||||
|  | 	  for(int j=0;j<nbasis;j++){ | ||||||
|  | 	    coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j)); | ||||||
|  | 	  } | ||||||
|  | 	}); | ||||||
|  |  | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     if(hermitian) { | ||||||
|  |       std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     InvertSelfStencilLink(); std::cout << GridLogMessage << "Coarse self link inverted" << std::endl; | ||||||
|  |     FillHalfCbs(); std::cout << GridLogMessage << "Coarse half checkerboards filled" << std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void InvertSelfStencilLink() { | ||||||
|  |     std::cout << GridLogDebug << "CoarsenedMatrix::InvertSelfStencilLink" << std::endl; | ||||||
|  |     int localVolume = Grid()->lSites(); | ||||||
|  |  | ||||||
|  |     typedef typename Cobj::scalar_object scalar_object; | ||||||
|  |  | ||||||
|  |     autoView(Aself_v,    A[geom.npoint-1], CpuRead); | ||||||
|  |     autoView(AselfInv_v, AselfInv,         CpuWrite); | ||||||
|  |     thread_for(site, localVolume, { // NOTE: Not able to bring this to GPU because of Eigen + peek/poke | ||||||
|  |       Eigen::MatrixXcd selfLinkEigen    = Eigen::MatrixXcd::Zero(nbasis, nbasis); | ||||||
|  |       Eigen::MatrixXcd selfLinkInvEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis); | ||||||
|  |  | ||||||
|  |       scalar_object selfLink    = Zero(); | ||||||
|  |       scalar_object selfLinkInv = Zero(); | ||||||
|  |  | ||||||
|  |       Coordinate lcoor; | ||||||
|  |  | ||||||
|  |       Grid()->LocalIndexToLocalCoor(site, lcoor); | ||||||
|  |       peekLocalSite(selfLink, Aself_v, lcoor); | ||||||
|  |  | ||||||
|  |       for (int i = 0; i < nbasis; ++i) | ||||||
|  |         for (int j = 0; j < nbasis; ++j) | ||||||
|  |           selfLinkEigen(i, j) = static_cast<ComplexD>(TensorRemove(selfLink(i, j))); | ||||||
|  |  | ||||||
|  |       selfLinkInvEigen = selfLinkEigen.inverse(); | ||||||
|  |  | ||||||
|  |       for(int i = 0; i < nbasis; ++i) | ||||||
|  |         for(int j = 0; j < nbasis; ++j) | ||||||
|  |           selfLinkInv(i, j) = selfLinkInvEigen(i, j); | ||||||
|  |  | ||||||
|  |       pokeLocalSite(selfLinkInv, AselfInv_v, lcoor); | ||||||
|  |     }); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void FillHalfCbs() { | ||||||
|  |     std::cout << GridLogDebug << "CoarsenedMatrix::FillHalfCbs" << std::endl; | ||||||
|  |     for(int p = 0; p < geom.npoint; ++p) { | ||||||
|  |       pickCheckerboard(Even, Aeven[p], A[p]); | ||||||
|  |       pickCheckerboard(Odd, Aodd[p], A[p]); | ||||||
|  |     } | ||||||
|  |     pickCheckerboard(Even, AselfInvEven, AselfInv); | ||||||
|  |     pickCheckerboard(Odd, AselfInvOdd, AselfInv); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | NAMESPACE_END(Grid); | ||||||
|  | #endif | ||||||
							
								
								
									
										418
									
								
								Grid/algorithms/multigrid/GeneralCoarsenedMatrix.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										418
									
								
								Grid/algorithms/multigrid/GeneralCoarsenedMatrix.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,418 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <pboyle@bnl.gov> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No) | ||||||
|  |  | ||||||
|  | #include <Grid/lattice/PaddedCell.h> | ||||||
|  | #include <Grid/stencil/GeneralLocalStencil.h> | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | // Fine Object == (per site) type of fine field | ||||||
|  | // nbasis      == number of deflation vectors | ||||||
|  | template<class Fobj,class CComplex,int nbasis> | ||||||
|  | class GeneralCoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  { | ||||||
|  | public: | ||||||
|  |  | ||||||
|  |   typedef GeneralCoarsenedMatrix<Fobj,CComplex,nbasis> GeneralCoarseOp; | ||||||
|  |   typedef iVector<CComplex,nbasis >           siteVector; | ||||||
|  |   typedef iMatrix<CComplex,nbasis >           siteMatrix; | ||||||
|  |   typedef Lattice<iScalar<CComplex> >         CoarseComplexField; | ||||||
|  |   typedef Lattice<siteVector>                 CoarseVector; | ||||||
|  |   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; | ||||||
|  |   typedef iMatrix<CComplex,nbasis >  Cobj; | ||||||
|  |   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field | ||||||
|  |   typedef Lattice<Fobj >        FineField; | ||||||
|  |   typedef CoarseVector Field; | ||||||
|  |   //////////////////// | ||||||
|  |   // Data members | ||||||
|  |   //////////////////// | ||||||
|  |   int hermitian; | ||||||
|  |   GridBase      *       _FineGrid;  | ||||||
|  |   GridCartesian *       _CoarseGrid;  | ||||||
|  |   NonLocalStencilGeometry &geom; | ||||||
|  |   PaddedCell Cell; | ||||||
|  |   GeneralLocalStencil Stencil; | ||||||
|  |    | ||||||
|  |   std::vector<CoarseMatrix> _A; | ||||||
|  |   std::vector<CoarseMatrix> _Adag; | ||||||
|  |  | ||||||
|  |   /////////////////////// | ||||||
|  |   // Interface | ||||||
|  |   /////////////////////// | ||||||
|  |   GridBase      * Grid(void)           { return _FineGrid; };   // this is all the linalg routines need to know | ||||||
|  |   GridBase      * FineGrid(void)       { return _FineGrid; };   // this is all the linalg routines need to know | ||||||
|  |   GridCartesian * CoarseGrid(void)     { return _CoarseGrid; };   // this is all the linalg routines need to know | ||||||
|  |  | ||||||
|  |   void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe) | ||||||
|  |   { | ||||||
|  |     int nfound=0; | ||||||
|  |     std::cout << GridLogMessage <<"GeneralCoarsenedMatrix::ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<<std::endl; | ||||||
|  |     for(int p=0;p<geom.npoint;p++){ | ||||||
|  |       for(int pp=0;pp<CopyMe.geom.npoint;pp++){ | ||||||
|  |  	// Search for the same relative shift | ||||||
|  | 	// Avoids brutal handling of Grid pointers | ||||||
|  | 	if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) { | ||||||
|  | 	  _A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]); | ||||||
|  | 	  _Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]); | ||||||
|  | 	  nfound++; | ||||||
|  | 	} | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     assert(nfound==geom.npoint); | ||||||
|  |     ExchangeCoarseLinks(); | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid) | ||||||
|  |     : geom(_geom), | ||||||
|  |       _FineGrid(FineGrid), | ||||||
|  |       _CoarseGrid(CoarseGrid), | ||||||
|  |       hermitian(1), | ||||||
|  |       Cell(_geom.Depth(),_CoarseGrid), | ||||||
|  |       Stencil(Cell.grids.back(),geom.shifts) | ||||||
|  |   { | ||||||
|  |     { | ||||||
|  |       int npoint = _geom.npoint; | ||||||
|  |       autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||||
|  |       int osites=Stencil.Grid()->oSites(); | ||||||
|  |       for(int ss=0;ss<osites;ss++){ | ||||||
|  | 	for(int point=0;point<npoint;point++){ | ||||||
|  | 	  auto SE = Stencil_v.GetEntry(point,ss); | ||||||
|  | 	  int o = SE->_offset; | ||||||
|  | 	  assert( o< osites); | ||||||
|  | 	} | ||||||
|  |       }     | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     _A.resize(geom.npoint,CoarseGrid); | ||||||
|  |     _Adag.resize(geom.npoint,CoarseGrid); | ||||||
|  |   } | ||||||
|  |   void M (const CoarseVector &in, CoarseVector &out) | ||||||
|  |   { | ||||||
|  |     Mult(_A,in,out); | ||||||
|  |   } | ||||||
|  |   void Mdag (const CoarseVector &in, CoarseVector &out) | ||||||
|  |   { | ||||||
|  |     if ( hermitian ) M(in,out); | ||||||
|  |     else Mult(_Adag,in,out); | ||||||
|  |   } | ||||||
|  |   void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out) | ||||||
|  |   { | ||||||
|  |     RealD tviews=0; | ||||||
|  |     RealD ttot=0; | ||||||
|  |     RealD tmult=0; | ||||||
|  |     RealD texch=0; | ||||||
|  |     RealD text=0; | ||||||
|  |     ttot=-usecond(); | ||||||
|  |     conformable(CoarseGrid(),in.Grid()); | ||||||
|  |     conformable(in.Grid(),out.Grid()); | ||||||
|  |     out.Checkerboard() = in.Checkerboard(); | ||||||
|  |     CoarseVector tin=in; | ||||||
|  |  | ||||||
|  |     texch-=usecond(); | ||||||
|  |     CoarseVector pin  = Cell.Exchange(tin); | ||||||
|  |     texch+=usecond(); | ||||||
|  |  | ||||||
|  |     CoarseVector pout(pin.Grid()); pout=Zero(); | ||||||
|  |  | ||||||
|  |     int npoint = geom.npoint; | ||||||
|  |     typedef LatticeView<Cobj> Aview; | ||||||
|  |        | ||||||
|  |     const int Nsimd = CComplex::Nsimd(); | ||||||
|  |      | ||||||
|  |     int osites=pin.Grid()->oSites(); | ||||||
|  |     //    int gsites=pin.Grid()->gSites(); | ||||||
|  |  | ||||||
|  |     RealD flops = 1.0* npoint * nbasis * nbasis * 8 * osites; | ||||||
|  |     RealD bytes = (1.0*osites*sizeof(siteMatrix)*npoint+2.0*osites*sizeof(siteVector))*npoint; | ||||||
|  |        | ||||||
|  |     //    for(int point=0;point<npoint;point++){ | ||||||
|  |     //      conformable(A[point],pin); | ||||||
|  |     //    } | ||||||
|  |  | ||||||
|  |     { | ||||||
|  |       tviews-=usecond(); | ||||||
|  |       autoView( in_v , pin, AcceleratorRead); | ||||||
|  |       autoView( out_v , pout, AcceleratorWrite); | ||||||
|  |       autoView( Stencil_v  , Stencil, AcceleratorRead); | ||||||
|  |       tviews+=usecond(); | ||||||
|  |        | ||||||
|  |       for(int point=0;point<npoint;point++){ | ||||||
|  | 	tviews-=usecond(); | ||||||
|  | 	autoView( A_v, A[point],AcceleratorRead); | ||||||
|  | 	tviews+=usecond(); | ||||||
|  | 	tmult-=usecond(); | ||||||
|  | 	accelerator_for(sss, osites*nbasis, Nsimd, { | ||||||
|  |  | ||||||
|  | 	    typedef decltype(coalescedRead(in_v[0]))    calcVector; | ||||||
|  |  | ||||||
|  | 	    int ss = sss/nbasis; | ||||||
|  | 	    int b  = sss%nbasis; | ||||||
|  |  | ||||||
|  | 	    auto SE  = Stencil_v.GetEntry(point,ss); | ||||||
|  | 	    auto nbr = coalescedReadGeneralPermute(in_v[SE->_offset],SE->_permute,Nd); | ||||||
|  | 	    auto res = out_v(ss)(b); | ||||||
|  | 	    for(int bb=0;bb<nbasis;bb++) { | ||||||
|  | 	      res = res + coalescedRead(A_v[ss](b,bb))*nbr(bb); | ||||||
|  | 	    } | ||||||
|  | 	    coalescedWrite(out_v[ss](b),res); | ||||||
|  | 	}); | ||||||
|  |  | ||||||
|  | 	tmult+=usecond(); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     text-=usecond(); | ||||||
|  |     out = Cell.Extract(pout); | ||||||
|  |     text+=usecond(); | ||||||
|  |     ttot+=usecond(); | ||||||
|  |  | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Mult ext  "<<text<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Mult tot  "<<ttot<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Kernel flop/s "<< flops/tmult<<" mflop/s"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse Kernel bytes/s"<< bytes/tmult<<" MB/s"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse overall flops/s "<< flops/ttot<<" mflop/s"<<std::endl; | ||||||
|  |     std::cout << GridLogPerformance<<"Coarse total bytes   "<< bytes/1e6<<" MB"<<std::endl; | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   void PopulateAdag(void) | ||||||
|  |   { | ||||||
|  |     for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){ | ||||||
|  |       Coordinate bcoor; | ||||||
|  |       CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor); | ||||||
|  |        | ||||||
|  |       for(int p=0;p<geom.npoint;p++){ | ||||||
|  | 	Coordinate scoor = bcoor; | ||||||
|  | 	for(int mu=0;mu<bcoor.size();mu++){ | ||||||
|  | 	  int L = CoarseGrid()->GlobalDimensions()[mu]; | ||||||
|  | 	  scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic | ||||||
|  | 	} | ||||||
|  | 	// Flip to poke/peekLocalSite and not too bad | ||||||
|  | 	auto link = peekSite(_A[p],scoor); | ||||||
|  | 	int pp = geom.Reverse(p); | ||||||
|  | 	pokeSite(adj(link),_Adag[pp],bcoor); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   ///////////////////////////////////////////////////////////// | ||||||
|  |   //  | ||||||
|  |   // A) Only reduced flops option is to use a padded cell of depth 4 | ||||||
|  |   // and apply MpcDagMpc in the padded cell. | ||||||
|  |   // | ||||||
|  |   // Makes for ONE application of MpcDagMpc per vector instead of 30 or 80. | ||||||
|  |   // With the effective cell size around (B+8)^4 perhaps 12^4/4^4 ratio | ||||||
|  |   // Cost is 81x more, same as stencil size. | ||||||
|  |   // | ||||||
|  |   // But: can eliminate comms and do as local dirichlet. | ||||||
|  |   // | ||||||
|  |   // Local exchange gauge field once. | ||||||
|  |   // Apply to all vectors, local only computation. | ||||||
|  |   // Must exchange ghost subcells in reverse process of PaddedCell to take inner products | ||||||
|  |   // | ||||||
|  |   // B) Can reduce cost: pad by 1, apply Deo      (4^4+6^4+8^4+8^4 )/ (4x 4^4) | ||||||
|  |   //                     pad by 2, apply Doe | ||||||
|  |   //                     pad by 3, apply Deo | ||||||
|  |   //                     then break out 8x directions; cost is ~10x MpcDagMpc per vector | ||||||
|  |   // | ||||||
|  |   // => almost factor of 10 in setup cost, excluding data rearrangement | ||||||
|  |   // | ||||||
|  |   // Intermediates -- ignore the corner terms, leave approximate and force Hermitian | ||||||
|  |   // Intermediates -- pad by 2 and apply 1+8+24 = 33 times. | ||||||
|  |   ///////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  |     ////////////////////////////////////////////////////////// | ||||||
|  |     // BFM HDCG style approach: Solve a system of equations to get Aij | ||||||
|  |     ////////////////////////////////////////////////////////// | ||||||
|  |     /* | ||||||
|  |      *     Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM. | ||||||
|  |      * | ||||||
|  |      *     conj(phases[block]) proj[k][ block*Nvec+j ] =  \sum_ball  e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} >  | ||||||
|  |      *                                                 =  \sum_ball e^{iqk.delta} A_ji | ||||||
|  |      * | ||||||
|  |      *     Must invert matrix M_k,l = e^[i q_k . delta_l] | ||||||
|  |      * | ||||||
|  |      *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) | ||||||
|  |      */ | ||||||
|  |   void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop, | ||||||
|  | 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) | ||||||
|  |   { | ||||||
|  |     std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl; | ||||||
|  |     GridBase *grid = FineGrid(); | ||||||
|  |  | ||||||
|  |     RealD tproj=0.0; | ||||||
|  |     RealD teigen=0.0; | ||||||
|  |     RealD tmat=0.0; | ||||||
|  |     RealD tphase=0.0; | ||||||
|  |     RealD tinv=0.0; | ||||||
|  |  | ||||||
|  |     ///////////////////////////////////////////////////////////// | ||||||
|  |     // Orthogonalise the subblocks over the basis | ||||||
|  |     ///////////////////////////////////////////////////////////// | ||||||
|  |     CoarseScalar InnerProd(CoarseGrid());  | ||||||
|  |     blockOrthogonalise(InnerProd,Subspace.subspace); | ||||||
|  |  | ||||||
|  |     const int npoint = geom.npoint; | ||||||
|  |        | ||||||
|  |     Coordinate clatt = CoarseGrid()->GlobalDimensions(); | ||||||
|  |     int Nd = CoarseGrid()->Nd(); | ||||||
|  |  | ||||||
|  |       /* | ||||||
|  |        *     Here, k,l index which possible momentum/shift within the N-points connected by MdagM. | ||||||
|  |        *     Matrix index i is mapped to this shift via  | ||||||
|  |        *               geom.shifts[i] | ||||||
|  |        * | ||||||
|  |        *     conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block]  | ||||||
|  |        *       =  \sum_{l in ball}  e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} >  | ||||||
|  |        *       =  \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} | ||||||
|  |        *       = M_{kl} A_ji^{b.b+l} | ||||||
|  |        * | ||||||
|  |        *     Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] | ||||||
|  |        *   | ||||||
|  |        *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) | ||||||
|  |        * | ||||||
|  |        *     Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} | ||||||
|  |        */ | ||||||
|  |     teigen-=usecond(); | ||||||
|  |     Eigen::MatrixXcd Mkl    = Eigen::MatrixXcd::Zero(npoint,npoint); | ||||||
|  |     Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint); | ||||||
|  |     ComplexD ci(0.0,1.0); | ||||||
|  |     for(int k=0;k<npoint;k++){ // Loop over momenta | ||||||
|  |  | ||||||
|  |       for(int l=0;l<npoint;l++){ // Loop over nbr relative | ||||||
|  | 	ComplexD phase(0.0,0.0); | ||||||
|  | 	for(int mu=0;mu<Nd;mu++){ | ||||||
|  | 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; | ||||||
|  | 	  phase=phase+TwoPiL*geom.shifts[k][mu]*geom.shifts[l][mu]; | ||||||
|  | 	} | ||||||
|  | 	phase=exp(phase*ci); | ||||||
|  | 	Mkl(k,l) = phase; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     invMkl = Mkl.inverse(); | ||||||
|  |     teigen+=usecond(); | ||||||
|  |  | ||||||
|  |     /////////////////////////////////////////////////////////////////////// | ||||||
|  |     // Now compute the matrix elements of linop between the orthonormal | ||||||
|  |     // set of vectors. | ||||||
|  |     /////////////////////////////////////////////////////////////////////// | ||||||
|  |     FineField phaV(grid); // Phased block basis vector | ||||||
|  |     FineField MphaV(grid);// Matrix applied | ||||||
|  |     CoarseVector coarseInner(CoarseGrid()); | ||||||
|  |  | ||||||
|  |     std::vector<CoarseVector> ComputeProj(npoint,CoarseGrid()); | ||||||
|  |     std::vector<CoarseVector>          FT(npoint,CoarseGrid()); | ||||||
|  |     for(int i=0;i<nbasis;i++){// Loop over basis vectors | ||||||
|  |       std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl; | ||||||
|  |       for(int p=0;p<npoint;p++){ // Loop over momenta in npoint | ||||||
|  | 	///////////////////////////////////////////////////// | ||||||
|  | 	// Stick a phase on every block | ||||||
|  | 	///////////////////////////////////////////////////// | ||||||
|  | 	tphase-=usecond(); | ||||||
|  | 	CoarseComplexField coor(CoarseGrid()); | ||||||
|  | 	CoarseComplexField pha(CoarseGrid());	pha=Zero(); | ||||||
|  | 	for(int mu=0;mu<Nd;mu++){ | ||||||
|  | 	  LatticeCoordinate(coor,mu); | ||||||
|  | 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; | ||||||
|  | 	  pha = pha + (TwoPiL * geom.shifts[p][mu]) * coor; | ||||||
|  | 	} | ||||||
|  | 	pha  =exp(pha*ci); | ||||||
|  | 	phaV=Zero(); | ||||||
|  | 	blockZAXPY(phaV,pha,Subspace.subspace[i],phaV); | ||||||
|  | 	tphase+=usecond(); | ||||||
|  |  | ||||||
|  | 	///////////////////////////////////////////////////////////////////// | ||||||
|  | 	// Multiple phased subspace vector by matrix and project to subspace | ||||||
|  | 	// Remove local bulk phase to leave relative phases | ||||||
|  | 	///////////////////////////////////////////////////////////////////// | ||||||
|  | 	tmat-=usecond(); | ||||||
|  | 	linop.Op(phaV,MphaV); | ||||||
|  | 	tmat+=usecond(); | ||||||
|  |  | ||||||
|  | 	tproj-=usecond(); | ||||||
|  | 	blockProject(coarseInner,MphaV,Subspace.subspace); | ||||||
|  | 	coarseInner = conjugate(pha) * coarseInner; | ||||||
|  |  | ||||||
|  | 	ComputeProj[p] = coarseInner; | ||||||
|  | 	tproj+=usecond(); | ||||||
|  |  | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       tinv-=usecond(); | ||||||
|  |       for(int k=0;k<npoint;k++){ | ||||||
|  | 	FT[k] = Zero(); | ||||||
|  | 	for(int l=0;l<npoint;l++){ | ||||||
|  | 	  FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l]; | ||||||
|  | 	} | ||||||
|  |        | ||||||
|  | 	int osites=CoarseGrid()->oSites(); | ||||||
|  | 	autoView( A_v  , _A[k], AcceleratorWrite); | ||||||
|  | 	autoView( FT_v  , FT[k], AcceleratorRead); | ||||||
|  | 	accelerator_for(sss, osites, 1, { | ||||||
|  | 	    for(int j=0;j<nbasis;j++){ | ||||||
|  | 	      A_v[sss](j,i) = FT_v[sss](j); | ||||||
|  | 	    } | ||||||
|  |         }); | ||||||
|  |       } | ||||||
|  |       tinv+=usecond(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     for(int p=0;p<geom.npoint;p++){ | ||||||
|  |       Coordinate coor({0,0,0,0,0}); | ||||||
|  |       auto sval = peekSite(_A[p],coor); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Only needed if nonhermitian | ||||||
|  |     if ( ! hermitian ) { | ||||||
|  |       std::cout << GridLogMessage<<"PopulateAdag  "<<std::endl; | ||||||
|  |       PopulateAdag(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Need to write something to populate Adag from A | ||||||
|  |     ExchangeCoarseLinks(); | ||||||
|  |     std::cout << GridLogMessage<<"CoarsenOperator eigen  "<<teigen<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<<"CoarsenOperator phase  "<<tphase<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<<"CoarsenOperator mat    "<<tmat <<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<<"CoarsenOperator proj   "<<tproj<<" us"<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<<"CoarsenOperator inv    "<<tinv<<" us"<<std::endl; | ||||||
|  |   } | ||||||
|  |   void ExchangeCoarseLinks(void){ | ||||||
|  |     for(int p=0;p<geom.npoint;p++){ | ||||||
|  |       _A[p] = Cell.Exchange(_A[p]); | ||||||
|  |       _Adag[p]= Cell.Exchange(_Adag[p]); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   virtual  void Mdiag    (const Field &in, Field &out){ assert(0);}; | ||||||
|  |   virtual  void Mdir     (const Field &in, Field &out,int dir, int disp){assert(0);}; | ||||||
|  |   virtual  void MdirAll  (const Field &in, std::vector<Field> &out){assert(0);}; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | NAMESPACE_END(Grid); | ||||||
							
								
								
									
										243
									
								
								Grid/algorithms/multigrid/Geometry.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										243
									
								
								Grid/algorithms/multigrid/Geometry.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,243 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <pboyle@bnl.gov> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ///////////////////////////////////////////////////////////////// | ||||||
|  | // Geometry class in cartesian case | ||||||
|  | ///////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | class Geometry { | ||||||
|  | public: | ||||||
|  |   int npoint; | ||||||
|  |   int base; | ||||||
|  |   std::vector<int> directions   ; | ||||||
|  |   std::vector<int> displacements; | ||||||
|  |   std::vector<int> points_dagger; | ||||||
|  |  | ||||||
|  |   Geometry(int _d)  { | ||||||
|  |      | ||||||
|  |     base = (_d==5) ? 1:0; | ||||||
|  |  | ||||||
|  |     // make coarse grid stencil for 4d , not 5d | ||||||
|  |     if ( _d==5 ) _d=4; | ||||||
|  |  | ||||||
|  |     npoint = 2*_d+1; | ||||||
|  |     directions.resize(npoint); | ||||||
|  |     displacements.resize(npoint); | ||||||
|  |     points_dagger.resize(npoint); | ||||||
|  |     for(int d=0;d<_d;d++){ | ||||||
|  |       directions[d   ] = d+base; | ||||||
|  |       directions[d+_d] = d+base; | ||||||
|  |       displacements[d  ] = +1; | ||||||
|  |       displacements[d+_d]= -1; | ||||||
|  |       points_dagger[d   ] = d+_d; | ||||||
|  |       points_dagger[d+_d] = d; | ||||||
|  |     } | ||||||
|  |     directions   [2*_d]=0; | ||||||
|  |     displacements[2*_d]=0; | ||||||
|  |     points_dagger[2*_d]=2*_d; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   int point(int dir, int disp) { | ||||||
|  |     assert(disp == -1 || disp == 0 || disp == 1); | ||||||
|  |     assert(base+0 <= dir && dir < base+4); | ||||||
|  |  | ||||||
|  |     // directions faster index = new indexing | ||||||
|  |     // 4d (base = 0): | ||||||
|  |     // point 0  1  2  3  4  5  6  7  8 | ||||||
|  |     // dir   0  1  2  3  0  1  2  3  0 | ||||||
|  |     // disp +1 +1 +1 +1 -1 -1 -1 -1  0 | ||||||
|  |     // 5d (base = 1): | ||||||
|  |     // point 0  1  2  3  4  5  6  7  8 | ||||||
|  |     // dir   1  2  3  4  1  2  3  4  0 | ||||||
|  |     // disp +1 +1 +1 +1 -1 -1 -1 -1  0 | ||||||
|  |  | ||||||
|  |     // displacements faster index = old indexing | ||||||
|  |     // 4d (base = 0): | ||||||
|  |     // point 0  1  2  3  4  5  6  7  8 | ||||||
|  |     // dir   0  0  1  1  2  2  3  3  0 | ||||||
|  |     // disp +1 -1 +1 -1 +1 -1 +1 -1  0 | ||||||
|  |     // 5d (base = 1): | ||||||
|  |     // point 0  1  2  3  4  5  6  7  8 | ||||||
|  |     // dir   1  1  2  2  3  3  4  4  0 | ||||||
|  |     // disp +1 -1 +1 -1 +1 -1 +1 -1  0 | ||||||
|  |  | ||||||
|  |     if(dir == 0 and disp == 0) | ||||||
|  |       return 8; | ||||||
|  |     else // New indexing | ||||||
|  |       return (1 - disp) / 2 * 4 + dir - base; | ||||||
|  |     // else // Old indexing | ||||||
|  |     //   return (4 * (dir - base) + 1 - disp) / 2; | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | ///////////////////////////////////////////////////////////////// | ||||||
|  | // Less local equivalent of Geometry class in cartesian case | ||||||
|  | ///////////////////////////////////////////////////////////////// | ||||||
|  | class NonLocalStencilGeometry { | ||||||
|  | public: | ||||||
|  |   int depth; | ||||||
|  |   int hops; | ||||||
|  |   int npoint; | ||||||
|  |   std::vector<Coordinate> shifts; | ||||||
|  |   Coordinate stencil_size; | ||||||
|  |   Coordinate stencil_lo; | ||||||
|  |   Coordinate stencil_hi; | ||||||
|  |   GridCartesian *grid; | ||||||
|  |   GridCartesian *Grid() {return grid;}; | ||||||
|  |   int Depth(void){return 1;};   // Ghost zone depth | ||||||
|  |   int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil | ||||||
|  |  | ||||||
|  |   virtual int DimSkip(void) =0; | ||||||
|  |  | ||||||
|  |   virtual ~NonLocalStencilGeometry() {}; | ||||||
|  |  | ||||||
|  |   int  Reverse(int point) | ||||||
|  |   { | ||||||
|  |     int Nd = Grid()->Nd(); | ||||||
|  |     Coordinate shft = shifts[point]; | ||||||
|  |     Coordinate rev(Nd); | ||||||
|  |     for(int mu=0;mu<Nd;mu++) rev[mu]= -shft[mu]; | ||||||
|  |     for(int p=0;p<npoint;p++){ | ||||||
|  |       if(rev==shifts[p]){ | ||||||
|  | 	return p; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     assert(0); | ||||||
|  |     return -1; | ||||||
|  |   } | ||||||
|  |   void BuildShifts(void) | ||||||
|  |   { | ||||||
|  |     this->shifts.resize(0); | ||||||
|  |     int Nd = this->grid->Nd(); | ||||||
|  |  | ||||||
|  |     int dd = this->DimSkip(); | ||||||
|  |     for(int s0=this->stencil_lo[dd+0];s0<=this->stencil_hi[dd+0];s0++){ | ||||||
|  |     for(int s1=this->stencil_lo[dd+1];s1<=this->stencil_hi[dd+1];s1++){ | ||||||
|  |     for(int s2=this->stencil_lo[dd+2];s2<=this->stencil_hi[dd+2];s2++){ | ||||||
|  |     for(int s3=this->stencil_lo[dd+3];s3<=this->stencil_hi[dd+3];s3++){ | ||||||
|  |       Coordinate sft(Nd,0); | ||||||
|  |       sft[dd+0] = s0; | ||||||
|  |       sft[dd+1] = s1; | ||||||
|  |       sft[dd+2] = s2; | ||||||
|  |       sft[dd+3] = s3; | ||||||
|  |       int nhops = abs(s0)+abs(s1)+abs(s2)+abs(s3); | ||||||
|  |       if(nhops<=this->hops) this->shifts.push_back(sft); | ||||||
|  |     }}}} | ||||||
|  |     this->npoint = this->shifts.size(); | ||||||
|  |     std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<<std::endl; | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   NonLocalStencilGeometry(GridCartesian *_coarse_grid,int _hops) : grid(_coarse_grid), hops(_hops) | ||||||
|  |   { | ||||||
|  |     Coordinate latt = grid->GlobalDimensions(); | ||||||
|  |     stencil_size.resize(grid->Nd()); | ||||||
|  |     stencil_lo.resize(grid->Nd()); | ||||||
|  |     stencil_hi.resize(grid->Nd()); | ||||||
|  |     for(int d=0;d<grid->Nd();d++){ | ||||||
|  |      if ( latt[d] == 1 ) { | ||||||
|  |       stencil_lo[d] = 0; | ||||||
|  |       stencil_hi[d] = 0; | ||||||
|  |       stencil_size[d]= 1; | ||||||
|  |      } else if ( latt[d] == 2 ) { | ||||||
|  |       stencil_lo[d] = -1; | ||||||
|  |       stencil_hi[d] = 0; | ||||||
|  |       stencil_size[d]= 2; | ||||||
|  |      } else if ( latt[d] > 2 ) { | ||||||
|  |        stencil_lo[d] = -1; | ||||||
|  |        stencil_hi[d] =  1; | ||||||
|  |        stencil_size[d]= 3; | ||||||
|  |      } | ||||||
|  |     } | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Need to worry about red-black now | ||||||
|  | class NonLocalStencilGeometry4D : public NonLocalStencilGeometry { | ||||||
|  | public: | ||||||
|  |   virtual int DimSkip(void) { return 0;}; | ||||||
|  |   NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; | ||||||
|  |   virtual ~NonLocalStencilGeometry4D() {}; | ||||||
|  | }; | ||||||
|  | class NonLocalStencilGeometry5D : public NonLocalStencilGeometry { | ||||||
|  | public: | ||||||
|  |   virtual int DimSkip(void) { return 1; };  | ||||||
|  |   NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops)  { }; | ||||||
|  |   virtual ~NonLocalStencilGeometry5D() {}; | ||||||
|  | }; | ||||||
|  | /* | ||||||
|  |  * Bunch of different options classes | ||||||
|  |  */ | ||||||
|  | class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { | ||||||
|  | public: | ||||||
|  |   NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,4) | ||||||
|  |   { | ||||||
|  |     this->BuildShifts(); | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | class NextToNextToNextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { | ||||||
|  | public: | ||||||
|  |   NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,4) | ||||||
|  |   { | ||||||
|  |     this->BuildShifts(); | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | class NextToNearestStencilGeometry4D : public  NonLocalStencilGeometry4D { | ||||||
|  | public: | ||||||
|  |   NextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,2) | ||||||
|  |   { | ||||||
|  |     this->BuildShifts(); | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | class NextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { | ||||||
|  | public: | ||||||
|  |   NextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,2) | ||||||
|  |   { | ||||||
|  |     this->BuildShifts(); | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | class NearestStencilGeometry4D : public  NonLocalStencilGeometry4D { | ||||||
|  | public: | ||||||
|  |   NearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,1) | ||||||
|  |   { | ||||||
|  |     this->BuildShifts(); | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | class NearestStencilGeometry5D : public  NonLocalStencilGeometry5D { | ||||||
|  | public: | ||||||
|  |   NearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,1) | ||||||
|  |   { | ||||||
|  |     this->BuildShifts(); | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | NAMESPACE_END(Grid); | ||||||
							
								
								
									
										33
									
								
								Grid/algorithms/multigrid/Multigrid.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								Grid/algorithms/multigrid/Multigrid.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  |     Source file: Grid/algorithms/multigrid/MultiGrid.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2023 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <pboyle@bnl.gov> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <Grid/algorithms/multigrid/Aggregates.h> | ||||||
|  | #include <Grid/algorithms/multigrid/Geometry.h> | ||||||
|  | #include <Grid/algorithms/multigrid/CoarsenedMatrix.h> | ||||||
|  | #include <Grid/algorithms/multigrid/GeneralCoarsenedMatrix.h> | ||||||
| @@ -70,8 +70,8 @@ public: | |||||||
|   Coordinate _istride;    // Inner stride i.e. within simd lane |   Coordinate _istride;    // Inner stride i.e. within simd lane | ||||||
|   int _osites;                  // _isites*_osites = product(dimensions). |   int _osites;                  // _isites*_osites = product(dimensions). | ||||||
|   int _isites; |   int _isites; | ||||||
|   int _fsites;                  // _isites*_osites = product(dimensions). |   int64_t _fsites;                  // _isites*_osites = product(dimensions). | ||||||
|   int _gsites; |   int64_t _gsites; | ||||||
|   Coordinate _slice_block;// subslice information |   Coordinate _slice_block;// subslice information | ||||||
|   Coordinate _slice_stride; |   Coordinate _slice_stride; | ||||||
|   Coordinate _slice_nblock; |   Coordinate _slice_nblock; | ||||||
| @@ -183,7 +183,7 @@ public: | |||||||
|   inline int Nsimd(void)  const { return _isites; };// Synonymous with iSites |   inline int Nsimd(void)  const { return _isites; };// Synonymous with iSites | ||||||
|   inline int oSites(void) const { return _osites; }; |   inline int oSites(void) const { return _osites; }; | ||||||
|   inline int lSites(void) const { return _isites*_osites; };  |   inline int lSites(void) const { return _isites*_osites; };  | ||||||
|   inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  |   inline int64_t gSites(void) const { return (int64_t)_isites*(int64_t)_osites*(int64_t)_Nprocessors; };  | ||||||
|   inline int Nd    (void) const { return _ndimension;}; |   inline int Nd    (void) const { return _ndimension;}; | ||||||
|  |  | ||||||
|   inline const Coordinate LocalStarts(void)             { return _lstart;    }; |   inline const Coordinate LocalStarts(void)             { return _lstart;    }; | ||||||
| @@ -214,7 +214,7 @@ public: | |||||||
|   //////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////// | ||||||
|   // Global addressing |   // Global addressing | ||||||
|   //////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////// | ||||||
|   void GlobalIndexToGlobalCoor(int gidx,Coordinate &gcoor){ |   void GlobalIndexToGlobalCoor(int64_t gidx,Coordinate &gcoor){ | ||||||
|     assert(gidx< gSites()); |     assert(gidx< gSites()); | ||||||
|     Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions); |     Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions); | ||||||
|   } |   } | ||||||
| @@ -222,7 +222,7 @@ public: | |||||||
|     assert(lidx<lSites()); |     assert(lidx<lSites()); | ||||||
|     Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); |     Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); | ||||||
|   } |   } | ||||||
|   void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int & gidx){ |   void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int64_t & gidx){ | ||||||
|     gidx=0; |     gidx=0; | ||||||
|     int mult=1; |     int mult=1; | ||||||
|     for(int mu=0;mu<_ndimension;mu++) { |     for(int mu=0;mu<_ndimension;mu++) { | ||||||
|   | |||||||
| @@ -360,7 +360,7 @@ public: | |||||||
|  |  | ||||||
| template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ | template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|   for(int g=0;g<o.Grid()->_gsites;g++){ |   for(int64_t g=0;g<o.Grid()->_gsites;g++){ | ||||||
|  |  | ||||||
|     Coordinate gcoor; |     Coordinate gcoor; | ||||||
|     o.Grid()->GlobalIndexToGlobalCoor(g,gcoor); |     o.Grid()->GlobalIndexToGlobalCoor(g,gcoor); | ||||||
|   | |||||||
| @@ -361,9 +361,14 @@ public: | |||||||
|     _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1}); |     _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1}); | ||||||
|     _uid.resize(_vol,std::uniform_int_distribution<uint32_t>() ); |     _uid.resize(_vol,std::uniform_int_distribution<uint32_t>() ); | ||||||
|   } |   } | ||||||
|  |   template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist) | ||||||
|   template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){ |   { | ||||||
|  |     if ( l.Grid()->_isCheckerBoarded ) { | ||||||
|  |       Lattice<vobj> tmp(_grid); | ||||||
|  |       fill(tmp,dist); | ||||||
|  |       pickCheckerboard(l.Checkerboard(),l,tmp); | ||||||
|  |       return; | ||||||
|  |     } | ||||||
|     typedef typename vobj::scalar_object scalar_object; |     typedef typename vobj::scalar_object scalar_object; | ||||||
|     typedef typename vobj::scalar_type scalar_type; |     typedef typename vobj::scalar_type scalar_type; | ||||||
|     typedef typename vobj::vector_type vector_type; |     typedef typename vobj::vector_type vector_type; | ||||||
| @@ -427,7 +432,7 @@ public: | |||||||
| #if 1 | #if 1 | ||||||
|     thread_for( lidx, _grid->lSites(), { |     thread_for( lidx, _grid->lSites(), { | ||||||
|  |  | ||||||
| 	int gidx; | 	int64_t gidx; | ||||||
| 	int o_idx; | 	int o_idx; | ||||||
| 	int i_idx; | 	int i_idx; | ||||||
| 	int rank; | 	int rank; | ||||||
|   | |||||||
| @@ -471,13 +471,13 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | |||||||
|  |  | ||||||
|   vobj zz = Zero(); |   vobj zz = Zero(); | ||||||
|    |    | ||||||
|   accelerator_for(sc,coarse->oSites(),1,{ |   accelerator_for(sc,coarse->oSites(),vobj::Nsimd(),{ | ||||||
|  |  | ||||||
|       // One thread per sub block |       // One thread per sub block | ||||||
|       Coordinate coor_c(_ndimension); |       Coordinate coor_c(_ndimension); | ||||||
|       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate |       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate | ||||||
|  |  | ||||||
|       vobj cd = zz; |       auto cd = coalescedRead(zz); | ||||||
|        |        | ||||||
|       for(int sb=0;sb<blockVol;sb++){ |       for(int sb=0;sb<blockVol;sb++){ | ||||||
|  |  | ||||||
| @@ -488,10 +488,10 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | |||||||
| 	for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; | 	for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; | ||||||
| 	Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); | 	Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); | ||||||
|  |  | ||||||
| 	cd=cd+fineData_p[sf]; | 	cd=cd+coalescedRead(fineData_p[sf]); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       coarseData_p[sc] = cd; |       coalescedWrite(coarseData_p[sc],cd); | ||||||
|  |  | ||||||
|     }); |     }); | ||||||
|   return; |   return; | ||||||
| @@ -1054,7 +1054,7 @@ void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine) | |||||||
|  |  | ||||||
|   Coordinate fcoor(nd); |   Coordinate fcoor(nd); | ||||||
|   Coordinate ccoor(nd); |   Coordinate ccoor(nd); | ||||||
|   for(int g=0;g<fg->gSites();g++){ |   for(int64_t g=0;g<fg->gSites();g++){ | ||||||
|  |  | ||||||
|     fg->GlobalIndexToGlobalCoor(g,fcoor); |     fg->GlobalIndexToGlobalCoor(g,fcoor); | ||||||
|     for(int d=0;d<nd;d++){ |     for(int d=0;d<nd;d++){ | ||||||
|   | |||||||
| @@ -63,8 +63,9 @@ public: | |||||||
|     dims=_grid->Nd(); |     dims=_grid->Nd(); | ||||||
|     AllocateGrids(); |     AllocateGrids(); | ||||||
|     Coordinate local     =unpadded_grid->LocalDimensions(); |     Coordinate local     =unpadded_grid->LocalDimensions(); | ||||||
|  |     Coordinate procs     =unpadded_grid->ProcessorGrid(); | ||||||
|     for(int d=0;d<dims;d++){ |     for(int d=0;d<dims;d++){ | ||||||
|       assert(local[d]>=depth); |       if ( procs[d] > 1 ) assert(local[d]>=depth); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   void DeleteGrids(void) |   void DeleteGrids(void) | ||||||
| @@ -85,7 +86,9 @@ public: | |||||||
|     // expand up one dim at a time |     // expand up one dim at a time | ||||||
|     for(int d=0;d<dims;d++){ |     for(int d=0;d<dims;d++){ | ||||||
|  |  | ||||||
|       plocal[d] += 2*depth;  |       if ( processors[d] > 1 ) {  | ||||||
|  | 	plocal[d] += 2*depth;  | ||||||
|  |       } | ||||||
|        |        | ||||||
|       for(int d=0;d<dims;d++){ |       for(int d=0;d<dims;d++){ | ||||||
| 	global[d] = plocal[d]*processors[d]; | 	global[d] = plocal[d]*processors[d]; | ||||||
| @@ -97,11 +100,17 @@ public: | |||||||
|   template<class vobj> |   template<class vobj> | ||||||
|   inline Lattice<vobj> Extract(const Lattice<vobj> &in) const |   inline Lattice<vobj> Extract(const Lattice<vobj> &in) const | ||||||
|   { |   { | ||||||
|  |     Coordinate processors=unpadded_grid->_processors; | ||||||
|  |  | ||||||
|     Lattice<vobj> out(unpadded_grid); |     Lattice<vobj> out(unpadded_grid); | ||||||
|  |  | ||||||
|     Coordinate local     =unpadded_grid->LocalDimensions(); |     Coordinate local     =unpadded_grid->LocalDimensions(); | ||||||
|     Coordinate fll(dims,depth); // depends on the MPI spread |     // depends on the MPI spread       | ||||||
|  |     Coordinate fll(dims,depth); | ||||||
|     Coordinate tll(dims,0); // depends on the MPI spread |     Coordinate tll(dims,0); // depends on the MPI spread | ||||||
|  |     for(int d=0;d<dims;d++){ | ||||||
|  |       if( processors[d]==1 ) fll[d]=0; | ||||||
|  |     } | ||||||
|     localCopyRegion(in,out,fll,tll,local); |     localCopyRegion(in,out,fll,tll,local); | ||||||
|     return out; |     return out; | ||||||
|   } |   } | ||||||
| @@ -120,6 +129,7 @@ public: | |||||||
|   template<class vobj> |   template<class vobj> | ||||||
|   inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const |   inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const | ||||||
|   { |   { | ||||||
|  |     Coordinate processors=unpadded_grid->_processors; | ||||||
|     GridBase *old_grid = in.Grid(); |     GridBase *old_grid = in.Grid(); | ||||||
|     GridCartesian *new_grid = grids[dim];//These are new grids |     GridCartesian *new_grid = grids[dim];//These are new grids | ||||||
|     Lattice<vobj>  padded(new_grid); |     Lattice<vobj>  padded(new_grid); | ||||||
| @@ -133,35 +143,47 @@ public: | |||||||
|  |  | ||||||
|     double tins=0, tshift=0; |     double tins=0, tshift=0; | ||||||
|  |  | ||||||
|     // Middle bit |     int islocal = 0 ; | ||||||
|     double t = usecond(); |     if ( processors[dim] == 1 ) islocal = 1; | ||||||
|     for(int x=0;x<local[dim];x++){ |  | ||||||
|       InsertSliceLocal(in,padded,x,depth+x,dim); |     if ( islocal ) { | ||||||
|  |        | ||||||
|  |       double t = usecond(); | ||||||
|  |       for(int x=0;x<local[dim];x++){ | ||||||
|  | 	InsertSliceLocal(in,padded,x,x,dim); | ||||||
|  |       } | ||||||
|  |       tins += usecond() - t; | ||||||
|  |        | ||||||
|  |     } else {  | ||||||
|  |       // Middle bit | ||||||
|  |       double t = usecond(); | ||||||
|  |       for(int x=0;x<local[dim];x++){ | ||||||
|  | 	InsertSliceLocal(in,padded,x,depth+x,dim); | ||||||
|  |       } | ||||||
|  |       tins += usecond() - t; | ||||||
|  |      | ||||||
|  |       // High bit | ||||||
|  |       t = usecond(); | ||||||
|  |       shifted = cshift.Cshift(in,dim,depth); | ||||||
|  |       tshift += usecond() - t; | ||||||
|  |  | ||||||
|  |       t=usecond(); | ||||||
|  |       for(int x=0;x<depth;x++){ | ||||||
|  | 	InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim); | ||||||
|  |       } | ||||||
|  |       tins += usecond() - t; | ||||||
|  |      | ||||||
|  |       // Low bit | ||||||
|  |       t = usecond(); | ||||||
|  |       shifted = cshift.Cshift(in,dim,-depth); | ||||||
|  |       tshift += usecond() - t; | ||||||
|  |      | ||||||
|  |       t = usecond(); | ||||||
|  |       for(int x=0;x<depth;x++){ | ||||||
|  | 	InsertSliceLocal(shifted,padded,x,x,dim); | ||||||
|  |       } | ||||||
|  |       tins += usecond() - t; | ||||||
|     } |     } | ||||||
|     tins += usecond() - t; |  | ||||||
|      |  | ||||||
|     // High bit |  | ||||||
|     t = usecond(); |  | ||||||
|     shifted = cshift.Cshift(in,dim,depth); |  | ||||||
|     tshift += usecond() - t; |  | ||||||
|  |  | ||||||
|     t=usecond(); |  | ||||||
|     for(int x=0;x<depth;x++){ |  | ||||||
|       InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim); |  | ||||||
|     } |  | ||||||
|     tins += usecond() - t; |  | ||||||
|      |  | ||||||
|     // Low bit |  | ||||||
|     t = usecond(); |  | ||||||
|     shifted = cshift.Cshift(in,dim,-depth); |  | ||||||
|     tshift += usecond() - t; |  | ||||||
|      |  | ||||||
|     t = usecond(); |  | ||||||
|     for(int x=0;x<depth;x++){ |  | ||||||
|       InsertSliceLocal(shifted,padded,x,x,dim); |  | ||||||
|     } |  | ||||||
|     tins += usecond() - t; |  | ||||||
|  |  | ||||||
|     std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl; |     std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl; | ||||||
|      |      | ||||||
|     return padded; |     return padded; | ||||||
|   | |||||||
| @@ -487,7 +487,7 @@ public: | |||||||
|     for(int mu=0;mu<Nd;mu++){ |     for(int mu=0;mu<Nd;mu++){ | ||||||
|       { //view scope |       { //view scope | ||||||
| 	autoView( gStaple_v , gStaple, AcceleratorWrite); | 	autoView( gStaple_v , gStaple, AcceleratorWrite); | ||||||
| 	auto gStencil_v = gStencil.View(); | 	auto gStencil_v = gStencil.View(AcceleratorRead); | ||||||
| 	 | 	 | ||||||
| 	accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), { | 	accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), { | ||||||
| 	    decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss; | 	    decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss; | ||||||
| @@ -1199,7 +1199,7 @@ public: | |||||||
|  |  | ||||||
|       { //view scope |       { //view scope | ||||||
| 	autoView( gStaple_v , gStaple, AcceleratorWrite); | 	autoView( gStaple_v , gStaple, AcceleratorWrite); | ||||||
| 	auto gStencil_v = gStencil.View(); | 	auto gStencil_v = gStencil.View(AcceleratorRead); | ||||||
|  |  | ||||||
| 	accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), { | 	accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), { | ||||||
| 	    decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss; | 	    decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss; | ||||||
|   | |||||||
| @@ -1130,6 +1130,14 @@ static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths inc | |||||||
| #endif | #endif | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | // Fixme need coalesced read gpermute | ||||||
|  | template<class vobj> void gpermute(vobj & inout,int perm){ | ||||||
|  |   vobj tmp=inout; | ||||||
|  |   if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;} | ||||||
|  |   if (perm & 0x2 ) { permute(inout,tmp,1); tmp=inout;} | ||||||
|  |   if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;} | ||||||
|  |   if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;} | ||||||
|  | } | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -46,7 +46,7 @@ class GeneralLocalStencilView { | |||||||
|   accelerator_inline GeneralStencilEntry * GetEntry(int point,int osite) {  |   accelerator_inline GeneralStencilEntry * GetEntry(int point,int osite) {  | ||||||
|     return & this->_entries_p[point+this->_npoints*osite];  |     return & this->_entries_p[point+this->_npoints*osite];  | ||||||
|   } |   } | ||||||
|  |   void ViewClose(void){}; | ||||||
| }; | }; | ||||||
| //////////////////////////////////////// | //////////////////////////////////////// | ||||||
| // The Stencil Class itself | // The Stencil Class itself | ||||||
| @@ -61,7 +61,7 @@ protected: | |||||||
| public:  | public:  | ||||||
|   GridBase *Grid(void) const { return _grid; } |   GridBase *Grid(void) const { return _grid; } | ||||||
|  |  | ||||||
|   View_type View(void) const { |   View_type View(int mode) const { | ||||||
|     View_type accessor(*( (View_type *) this)); |     View_type accessor(*( (View_type *) this)); | ||||||
|     return accessor; |     return accessor; | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -137,6 +137,18 @@ inline void cuda_mem(void) | |||||||
|     dim3 cu_blocks ((num1+nt-1)/nt,num2,1);				\ |     dim3 cu_blocks ((num1+nt-1)/nt,num2,1);				\ | ||||||
|     LambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda);	\ |     LambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda);	\ | ||||||
|   } |   } | ||||||
|  | #define prof_accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... )	\ | ||||||
|  |   {									\ | ||||||
|  |     int nt=acceleratorThreads();					\ | ||||||
|  |     typedef uint64_t Iterator;						\ | ||||||
|  |     auto lambda = [=] accelerator					\ | ||||||
|  |       (Iterator iter1,Iterator iter2,Iterator lane) mutable {		\ | ||||||
|  |       __VA_ARGS__;							\ | ||||||
|  |     };									\ | ||||||
|  |     dim3 cu_threads(nsimd,acceleratorThreads(),1);			\ | ||||||
|  |     dim3 cu_blocks ((num1+nt-1)/nt,num2,1);				\ | ||||||
|  |     ProfileLambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda); \ | ||||||
|  |   } | ||||||
|  |  | ||||||
| #define accelerator_for6dNB(iter1, num1,				\ | #define accelerator_for6dNB(iter1, num1,				\ | ||||||
|                             iter2, num2,				\ |                             iter2, num2,				\ | ||||||
| @@ -157,6 +169,20 @@ inline void cuda_mem(void) | |||||||
|     Lambda6Apply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,num3,num4,num5,num6,lambda); \ |     Lambda6Apply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,num3,num4,num5,num6,lambda); \ | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... )	\ | ||||||
|  |   {									\ | ||||||
|  |     int nt=acceleratorThreads();					\ | ||||||
|  |     typedef uint64_t Iterator;						\ | ||||||
|  |     auto lambda = [=] accelerator					\ | ||||||
|  |       (Iterator iter1,Iterator iter2,Iterator lane) mutable {		\ | ||||||
|  |       __VA_ARGS__;							\ | ||||||
|  |     };									\ | ||||||
|  |     dim3 cu_threads(nsimd,acceleratorThreads(),1);			\ | ||||||
|  |     dim3 cu_blocks ((num1+nt-1)/nt,num2,1);				\ | ||||||
|  |     LambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda);	\ | ||||||
|  |   } | ||||||
|  |  | ||||||
| template<typename lambda>  __global__ | template<typename lambda>  __global__ | ||||||
| void LambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) | void LambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) | ||||||
| { | { | ||||||
| @@ -168,6 +194,17 @@ void LambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) | |||||||
|     Lambda(x,y,z); |     Lambda(x,y,z); | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  | template<typename lambda>  __global__ | ||||||
|  | void ProfileLambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) | ||||||
|  | { | ||||||
|  |   // Weird permute is to make lane coalesce for large blocks | ||||||
|  |   uint64_t x = threadIdx.y + blockDim.y*blockIdx.x; | ||||||
|  |   uint64_t y = threadIdx.z + blockDim.z*blockIdx.y; | ||||||
|  |   uint64_t z = threadIdx.x; | ||||||
|  |   if ( (x < num1) && (y<num2) && (z<num3) ) { | ||||||
|  |     Lambda(x,y,z); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
| template<typename lambda>  __global__ | template<typename lambda>  __global__ | ||||||
| void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3, | void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3, | ||||||
| @@ -208,6 +245,7 @@ inline void *acceleratorAllocShared(size_t bytes) | |||||||
|   if( err != cudaSuccess ) { |   if( err != cudaSuccess ) { | ||||||
|     ptr = (void *) NULL; |     ptr = (void *) NULL; | ||||||
|     printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err)); |     printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err)); | ||||||
|  |     assert(0); | ||||||
|   } |   } | ||||||
|   return ptr; |   return ptr; | ||||||
| }; | }; | ||||||
| @@ -460,6 +498,9 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); | |||||||
| #if defined(GRID_SYCL) || defined(GRID_CUDA) || defined(GRID_HIP) | #if defined(GRID_SYCL) || defined(GRID_CUDA) || defined(GRID_HIP) | ||||||
| // FIXME -- the non-blocking nature got broken March 30 2023 by PAB | // FIXME -- the non-blocking nature got broken March 30 2023 by PAB | ||||||
| #define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} );   | #define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} );   | ||||||
|  | #define prof_accelerator_for( iter1, num1, nsimd, ... ) \ | ||||||
|  |   prof_accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} );\ | ||||||
|  |   accelerator_barrier(dummy); | ||||||
|  |  | ||||||
| #define accelerator_for( iter, num, nsimd, ... )		\ | #define accelerator_for( iter, num, nsimd, ... )		\ | ||||||
|   accelerator_forNB(iter, num, nsimd, { __VA_ARGS__ } );	\ |   accelerator_forNB(iter, num, nsimd, { __VA_ARGS__ } );	\ | ||||||
|   | |||||||
| @@ -94,6 +94,13 @@ static constexpr int MaxDims = GRID_MAX_LATTICE_DIMENSION; | |||||||
|  |  | ||||||
| typedef AcceleratorVector<int,MaxDims> Coordinate; | typedef AcceleratorVector<int,MaxDims> Coordinate; | ||||||
|  |  | ||||||
|  | template<class T,int _ndim> | ||||||
|  | inline bool operator==(const AcceleratorVector<T,_ndim> &v,const AcceleratorVector<T,_ndim> &w) | ||||||
|  | { | ||||||
|  |   if (v.size()!=w.size()) return false; | ||||||
|  |   for(int i=0;i<v.size();i++) if ( v[i]!=w[i] ) return false; | ||||||
|  |   return true; | ||||||
|  | } | ||||||
| template<class T,int _ndim> | template<class T,int _ndim> | ||||||
| inline std::ostream & operator<<(std::ostream &os, const AcceleratorVector<T,_ndim> &v) | inline std::ostream & operator<<(std::ostream &os, const AcceleratorVector<T,_ndim> &v) | ||||||
| { | { | ||||||
|   | |||||||
| @@ -8,7 +8,7 @@ namespace Grid{ | |||||||
|   public: |   public: | ||||||
|  |  | ||||||
|     template<class coor_t> |     template<class coor_t> | ||||||
|     static accelerator_inline void CoorFromIndex (coor_t& coor,int index,const coor_t &dims){ |     static accelerator_inline void CoorFromIndex (coor_t& coor,int64_t index,const coor_t &dims){ | ||||||
|       int nd= dims.size(); |       int nd= dims.size(); | ||||||
|       coor.resize(nd); |       coor.resize(nd); | ||||||
|       for(int d=0;d<nd;d++){ |       for(int d=0;d<nd;d++){ | ||||||
| @@ -18,28 +18,45 @@ namespace Grid{ | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     template<class coor_t> |     template<class coor_t> | ||||||
|     static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){ |     static accelerator_inline void IndexFromCoor (const coor_t& coor,int64_t &index,const coor_t &dims){ | ||||||
|       int nd=dims.size(); |       int nd=dims.size(); | ||||||
|       int stride=1; |       int stride=1; | ||||||
|       index=0; |       index=0; | ||||||
|       for(int d=0;d<nd;d++){ |       for(int d=0;d<nd;d++){ | ||||||
| 	index = index+stride*coor[d]; | 	index = index+(int64_t)stride*coor[d]; | ||||||
| 	stride=stride*dims[d]; | 	stride=stride*dims[d]; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |     template<class coor_t> | ||||||
|  |     static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){ | ||||||
|  |       int64_t index64; | ||||||
|  |       IndexFromCoor(coor,index64,dims); | ||||||
|  |       assert(index64<2*1024*1024*1024LL); | ||||||
|  |       index = (int) index64; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     template<class coor_t> |     template<class coor_t> | ||||||
|     static inline void IndexFromCoorReversed (const coor_t& coor,int &index,const coor_t &dims){ |     static inline void IndexFromCoorReversed (const coor_t& coor,int64_t &index,const coor_t &dims){ | ||||||
|       int nd=dims.size(); |       int nd=dims.size(); | ||||||
|       int stride=1; |       int stride=1; | ||||||
|       index=0; |       index=0; | ||||||
|       for(int d=nd-1;d>=0;d--){ |       for(int d=nd-1;d>=0;d--){ | ||||||
| 	index = index+stride*coor[d]; | 	index = index+(int64_t)stride*coor[d]; | ||||||
| 	stride=stride*dims[d]; | 	stride=stride*dims[d]; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     template<class coor_t> |     template<class coor_t> | ||||||
|     static inline void CoorFromIndexReversed (coor_t& coor,int index,const coor_t &dims){ |     static inline void IndexFromCoorReversed (const coor_t& coor,int &index,const coor_t &dims){ | ||||||
|  |       int64_t index64; | ||||||
|  |       IndexFromCoorReversed(coor,index64,dims); | ||||||
|  |       if ( index64>=2*1024*1024*1024LL ){ | ||||||
|  | 	std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "<<dims<<std::endl; | ||||||
|  |       } | ||||||
|  |       assert(index64<2*1024*1024*1024LL); | ||||||
|  |       index = (int) index64; | ||||||
|  |     } | ||||||
|  |     template<class coor_t> | ||||||
|  |     static inline void CoorFromIndexReversed (coor_t& coor,int64_t index,const coor_t &dims){ | ||||||
|       int nd= dims.size(); |       int nd= dims.size(); | ||||||
|       coor.resize(nd); |       coor.resize(nd); | ||||||
|       for(int d=nd-1;d>=0;d--){ |       for(int d=nd-1;d>=0;d--){ | ||||||
|   | |||||||
							
								
								
									
										43
									
								
								systems/Frontier/benchmarks/bench2.slurm
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										43
									
								
								systems/Frontier/benchmarks/bench2.slurm
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,43 @@ | |||||||
|  | #!/bin/bash -l | ||||||
|  | #SBATCH --job-name=bench | ||||||
|  | ##SBATCH --partition=small-g | ||||||
|  | #SBATCH --nodes=2 | ||||||
|  | #SBATCH --ntasks-per-node=8 | ||||||
|  | #SBATCH --cpus-per-task=7 | ||||||
|  | #SBATCH --gpus-per-node=8 | ||||||
|  | #SBATCH --time=00:10:00 | ||||||
|  | #SBATCH --account=phy157_dwf | ||||||
|  | #SBATCH --gpu-bind=none | ||||||
|  | #SBATCH --exclusive | ||||||
|  | #SBATCH --mem=0 | ||||||
|  |  | ||||||
|  | cat << EOF > select_gpu | ||||||
|  | #!/bin/bash | ||||||
|  | export GPU_MAP=(0 1 2 3 7 6 5 4) | ||||||
|  | export NUMA_MAP=(3 3 1 1 2 2 0 0) | ||||||
|  | export GPU=\${GPU_MAP[\$SLURM_LOCALID]} | ||||||
|  | export NUMA=\${NUMA_MAP[\$SLURM_LOCALID]} | ||||||
|  | export HIP_VISIBLE_DEVICES=\$GPU | ||||||
|  | unset ROCR_VISIBLE_DEVICES | ||||||
|  | echo RANK \$SLURM_LOCALID using GPU \$GPU     | ||||||
|  | exec numactl -m \$NUMA -N \$NUMA \$* | ||||||
|  | EOF | ||||||
|  |  | ||||||
|  | chmod +x ./select_gpu | ||||||
|  |  | ||||||
|  | root=$HOME/Frontier/Grid/systems/Frontier/ | ||||||
|  | source ${root}/sourceme.sh | ||||||
|  |  | ||||||
|  | export OMP_NUM_THREADS=7 | ||||||
|  | export MPICH_GPU_SUPPORT_ENABLED=1 | ||||||
|  | export MPICH_SMP_SINGLE_COPY_MODE=XPMEM | ||||||
|  |  | ||||||
|  | for vol in 32.32.32.64 | ||||||
|  | do | ||||||
|  | srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol  > log.shm0.ov.$vol | ||||||
|  | srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol  > log.shm1.ov.$vol | ||||||
|  |  | ||||||
|  | srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol  > log.shm0.seq.$vol | ||||||
|  | srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol | ||||||
|  | done | ||||||
|  |  | ||||||
							
								
								
									
										23
									
								
								systems/Frontier/config-command
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								systems/Frontier/config-command
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | |||||||
|  | CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-` | ||||||
|  | ../../configure --enable-comms=mpi-auto \ | ||||||
|  | --with-lime=$CLIME \ | ||||||
|  | --enable-unified=no \ | ||||||
|  | --enable-shm=nvlink \ | ||||||
|  | --enable-tracing=timer \ | ||||||
|  | --enable-accelerator=hip \ | ||||||
|  | --enable-gen-simd-width=64 \ | ||||||
|  | --disable-gparity \ | ||||||
|  | --disable-fermion-reps \ | ||||||
|  | --enable-simd=GPU \ | ||||||
|  | --enable-accelerator-cshift \ | ||||||
|  | --with-gmp=$OLCF_GMP_ROOT \ | ||||||
|  | --with-fftw=$FFTW_DIR/.. \ | ||||||
|  | --with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \ | ||||||
|  | --disable-fermion-reps \ | ||||||
|  | CXX=hipcc MPICXX=mpicxx \ | ||||||
|  | CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 " \ | ||||||
|  |  LDFLAGS="-L/lib64 -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 " | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
							
								
								
									
										13
									
								
								systems/Frontier/mpiwrapper.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										13
									
								
								systems/Frontier/mpiwrapper.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,13 @@ | |||||||
|  | #!/bin/bash | ||||||
|  |  | ||||||
|  | lrank=$SLURM_LOCALID | ||||||
|  | lgpu=(0 1 2 3 7 6 5 4) | ||||||
|  |  | ||||||
|  | export ROCR_VISIBLE_DEVICES=${lgpu[$lrank]} | ||||||
|  |  | ||||||
|  | echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES " | ||||||
|  |  | ||||||
|  | $* | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
							
								
								
									
										13
									
								
								systems/Frontier/sourceme.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								systems/Frontier/sourceme.sh
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | |||||||
|  | . /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh | ||||||
|  | spack load c-lime | ||||||
|  | #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/sw/crusher/spack-envs/base/opt/cray-sles15-zen3/gcc-11.2.0/gperftools-2.9.1-72ubwtuc5wcz2meqltbfdb76epufgzo2/lib | ||||||
|  | module load emacs  | ||||||
|  | module load PrgEnv-gnu | ||||||
|  | module load rocm | ||||||
|  | module load cray-mpich/8.1.23 | ||||||
|  | module load gmp | ||||||
|  | module load cray-fftw | ||||||
|  | module load craype-accel-amd-gfx90a | ||||||
|  | export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH | ||||||
|  | #Hack for lib | ||||||
|  | #export LD_LIBRARY_PATH=`pwd`:$LD_LIBRARY_PATH | ||||||
							
								
								
									
										9
									
								
								systems/Frontier/wrap.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										9
									
								
								systems/Frontier/wrap.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,9 @@ | |||||||
|  | #!/bin/sh | ||||||
|  |  | ||||||
|  | export HIP_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES | ||||||
|  | unset ROCR_VISIBLE_DEVICES | ||||||
|  |  | ||||||
|  | #rank=$SLURM_PROCID | ||||||
|  | #rocprof -d rocprof.$rank -o rocprof.$rank/results.rank$SLURM_PROCID.csv --sys-trace $@ | ||||||
|  |  | ||||||
|  | $@ | ||||||
| @@ -1,4 +1,3 @@ | |||||||
| BREW=/opt/local/ | BREW=/opt/local/ | ||||||
| MPICXX=mpicxx ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity --disable-debug | MPICXX=mpicxx ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity --disable-debug | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										235
									
								
								tests/debug/Test_general_coarse.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										235
									
								
								tests/debug/Test_general_coarse.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,235 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./tests/Test_padded_cell.cc | ||||||
|  |  | ||||||
|  |     Copyright (C) 2023 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #include <Grid/Grid.h> | ||||||
|  | #include <Grid/lattice/PaddedCell.h> | ||||||
|  | #include <Grid/stencil/GeneralLocalStencil.h> | ||||||
|  |  | ||||||
|  | #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h> | ||||||
|  | #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h> | ||||||
|  | #include <Grid/algorithms/iterative/BiCGSTAB.h> | ||||||
|  |  | ||||||
|  | using namespace std; | ||||||
|  | using namespace Grid; | ||||||
|  |  | ||||||
|  | /////////////////////// | ||||||
|  | // Tells little dirac op to use MdagM as the .Op() | ||||||
|  | /////////////////////// | ||||||
|  | template<class Field> | ||||||
|  | class HermOpAdaptor : public LinearOperatorBase<Field> | ||||||
|  | { | ||||||
|  |   LinearOperatorBase<Field> & wrapped; | ||||||
|  | public: | ||||||
|  |   HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme)  {}; | ||||||
|  |   void OpDiag (const Field &in, Field &out) {    assert(0);  } | ||||||
|  |   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } | ||||||
|  |   void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  }; | ||||||
|  |   void Op     (const Field &in, Field &out){ | ||||||
|  |     wrapped.HermOp(in,out); | ||||||
|  |   } | ||||||
|  |   void AdjOp     (const Field &in, Field &out){ | ||||||
|  |     wrapped.HermOp(in,out); | ||||||
|  |   } | ||||||
|  |   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } | ||||||
|  |   void HermOp(const Field &in, Field &out){ | ||||||
|  |     wrapped.HermOp(in,out); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | int main (int argc, char ** argv) | ||||||
|  | { | ||||||
|  |   Grid_init(&argc,&argv); | ||||||
|  |  | ||||||
|  |   const int Ls=4; | ||||||
|  |  | ||||||
|  |   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), | ||||||
|  | 								   GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||||
|  | 								   GridDefaultMpi()); | ||||||
|  |   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||||
|  |  | ||||||
|  |   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||||
|  |   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||||
|  |  | ||||||
|  |   // Construct a coarsened grid | ||||||
|  |   Coordinate clatt = GridDefaultLatt(); | ||||||
|  |   for(int d=0;d<clatt.size();d++){ | ||||||
|  |     clatt[d] = clatt[d]/2; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, | ||||||
|  | 							    GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||||
|  | 							    GridDefaultMpi());; | ||||||
|  |   GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d); | ||||||
|  |  | ||||||
|  |   std::vector<int> seeds4({1,2,3,4}); | ||||||
|  |   std::vector<int> seeds5({5,6,7,8}); | ||||||
|  |   std::vector<int> cseeds({5,6,7,8}); | ||||||
|  |   GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5); | ||||||
|  |   GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4); | ||||||
|  |   GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); | ||||||
|  |  | ||||||
|  |   LatticeFermion    src(FGrid); random(RNG5,src); | ||||||
|  |   LatticeFermion result(FGrid); result=Zero(); | ||||||
|  |   LatticeFermion    ref(FGrid); ref=Zero(); | ||||||
|  |   LatticeFermion    tmp(FGrid); | ||||||
|  |   LatticeFermion    err(FGrid); | ||||||
|  |   LatticeGaugeField Umu(UGrid); | ||||||
|  |   SU<Nc>::HotConfiguration(RNG4,Umu); | ||||||
|  |   //  Umu=Zero(); | ||||||
|  |    | ||||||
|  |   RealD mass=0.1; | ||||||
|  |   RealD M5=1.8; | ||||||
|  |  | ||||||
|  |   DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||||
|  |  | ||||||
|  |   const int nbasis = 16; | ||||||
|  |   const int cb = 0 ; | ||||||
|  |   LatticeFermion prom(FGrid); | ||||||
|  |  | ||||||
|  |   std::vector<LatticeFermion> subspace(nbasis,FGrid); | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<"Calling Aggregation class" <<std::endl; | ||||||
|  |  | ||||||
|  |   /////////////////////////////////////////////////////////// | ||||||
|  |   // Squared operator is in HermOp | ||||||
|  |   /////////////////////////////////////////////////////////// | ||||||
|  |   MdagMLinearOperator<DomainWallFermionD,LatticeFermion> HermDefOp(Ddwf); | ||||||
|  |  | ||||||
|  |   /////////////////////////////////////////////////// | ||||||
|  |   // Random aggregation space | ||||||
|  |   /////////////////////////////////////////////////// | ||||||
|  |   std::cout<<GridLogMessage << "Building random aggregation class"<< std::endl; | ||||||
|  |   typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; | ||||||
|  |   Subspace Aggregates(Coarse5d,FGrid,cb); | ||||||
|  |   Aggregates.CreateSubspaceRandom(RNG5); | ||||||
|  |  | ||||||
|  |   /////////////////////////////////////////////////// | ||||||
|  |   // Build little dirac op | ||||||
|  |   /////////////////////////////////////////////////// | ||||||
|  |   std::cout<<GridLogMessage << "Building little Dirac operator"<< std::endl; | ||||||
|  |  | ||||||
|  |   typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator; | ||||||
|  |   typedef LittleDiracOperator::CoarseVector CoarseVector; | ||||||
|  |  | ||||||
|  |   NextToNearestStencilGeometry5D geom(Coarse5d); | ||||||
|  |   LittleDiracOperator LittleDiracOp(geom,FGrid,Coarse5d); | ||||||
|  |   LittleDiracOperator LittleDiracOpCol(geom,FGrid,Coarse5d); | ||||||
|  |  | ||||||
|  |   HermOpAdaptor<LatticeFermionD> HOA(HermDefOp); | ||||||
|  |  | ||||||
|  |   int pp=16; | ||||||
|  |   LittleDiracOp.CoarsenOperator(HOA,Aggregates); | ||||||
|  |    | ||||||
|  |   /////////////////////////////////////////////////// | ||||||
|  |   // Test the operator | ||||||
|  |   /////////////////////////////////////////////////// | ||||||
|  |   CoarseVector c_src (Coarse5d); | ||||||
|  |   CoarseVector c_res (Coarse5d); | ||||||
|  |   CoarseVector c_res_dag(Coarse5d); | ||||||
|  |   CoarseVector c_proj(Coarse5d); | ||||||
|  |  | ||||||
|  |   subspace=Aggregates.subspace; | ||||||
|  |  | ||||||
|  |   //  random(CRNG,c_src); | ||||||
|  |   c_src = 1.0; | ||||||
|  |  | ||||||
|  |   blockPromote(c_src,err,subspace); | ||||||
|  |  | ||||||
|  |   prom=Zero(); | ||||||
|  |   for(int b=0;b<nbasis;b++){ | ||||||
|  |     prom=prom+subspace[b]; | ||||||
|  |   } | ||||||
|  |   err=err-prom;  | ||||||
|  |   std::cout<<GridLogMessage<<"Promoted back from subspace: err "<<norm2(err)<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl; | ||||||
|  |  | ||||||
|  |   HermDefOp.HermOp(prom,tmp); | ||||||
|  |  | ||||||
|  |   blockProject(c_proj,tmp,subspace); | ||||||
|  |   std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<" Calling little Dirac Op "<<std::endl; | ||||||
|  |   LittleDiracOp.M(c_src,c_res); | ||||||
|  |   LittleDiracOp.Mdag(c_src,c_res_dag); | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"Little dop dag : "<<norm2(c_res_dag)<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl; | ||||||
|  |  | ||||||
|  |   c_proj = c_proj - c_res; | ||||||
|  |   std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl; | ||||||
|  |  | ||||||
|  |   c_res_dag = c_res_dag - c_res; | ||||||
|  |   std::cout<<GridLogMessage<<"Little dopDag - dop: "<<norm2(c_res_dag)<<std::endl; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage << "Testing Hermiticity stochastically "<< std::endl; | ||||||
|  |   CoarseVector phi(Coarse5d); | ||||||
|  |   CoarseVector chi(Coarse5d); | ||||||
|  |   CoarseVector Aphi(Coarse5d); | ||||||
|  |   CoarseVector Achi(Coarse5d); | ||||||
|  |  | ||||||
|  |   random(CRNG,phi); | ||||||
|  |   random(CRNG,chi); | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<"Made randoms "<<norm2(phi)<<" " << norm2(chi)<<std::endl; | ||||||
|  |  | ||||||
|  |   LittleDiracOp.M(phi,Aphi); | ||||||
|  |  | ||||||
|  |   LittleDiracOp.Mdag(chi,Achi); | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<"Aphi "<<norm2(Aphi)<<" A chi" << norm2(Achi)<<std::endl; | ||||||
|  |  | ||||||
|  |   ComplexD pAc = innerProduct(chi,Aphi); | ||||||
|  |   ComplexD cAp = innerProduct(phi,Achi); | ||||||
|  |   ComplexD cAc = innerProduct(chi,Achi); | ||||||
|  |   ComplexD pAp = innerProduct(phi,Aphi); | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<< "pAc "<<pAc<<" cAp "<< cAp<< " diff "<<pAc-adj(cAp)<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<< "pAp "<<pAp<<" cAc "<< cAc<<"Should be real"<< std::endl; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<"Testing linearity"<<std::endl; | ||||||
|  |   CoarseVector PhiPlusChi(Coarse5d); | ||||||
|  |   CoarseVector APhiPlusChi(Coarse5d); | ||||||
|  |   CoarseVector linerr(Coarse5d); | ||||||
|  |   PhiPlusChi = phi+chi; | ||||||
|  |   LittleDiracOp.M(PhiPlusChi,APhiPlusChi); | ||||||
|  |  | ||||||
|  |   linerr= APhiPlusChi-Aphi; | ||||||
|  |   linerr= linerr-Achi; | ||||||
|  |   std::cout<<GridLogMessage<<"**Diff "<<norm2(linerr)<<std::endl; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||||
|  |    | ||||||
|  |   Grid_finalize(); | ||||||
|  |   return 0; | ||||||
|  | } | ||||||
							
								
								
									
										408
									
								
								tests/debug/Test_general_coarse_hdcg.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										408
									
								
								tests/debug/Test_general_coarse_hdcg.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,408 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./tests/Test_general_coarse_hdcg.cc | ||||||
|  |  | ||||||
|  |     Copyright (C) 2023 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <pboyle@bnl.gov> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #include <Grid/Grid.h> | ||||||
|  | #include <Grid/lattice/PaddedCell.h> | ||||||
|  | #include <Grid/stencil/GeneralLocalStencil.h> | ||||||
|  | //#include <Grid/algorithms/GeneralCoarsenedMatrix.h> | ||||||
|  | #include <Grid/algorithms/iterative/AdefGeneric.h> | ||||||
|  |  | ||||||
|  | using namespace std; | ||||||
|  | using namespace Grid; | ||||||
|  |  | ||||||
|  | template<class Coarsened> | ||||||
|  | void SaveOperator(Coarsened &Operator,std::string file) | ||||||
|  | { | ||||||
|  | #ifdef HAVE_LIME | ||||||
|  |   emptyUserRecord record; | ||||||
|  |   ScidacWriter WR(Operator.Grid()->IsBoss()); | ||||||
|  |   assert(Operator._A.size()==Operator.geom.npoint); | ||||||
|  |   WR.open(file); | ||||||
|  |   for(int p=0;p<Operator._A.size();p++){ | ||||||
|  |     auto tmp = Operator.Cell.Extract(Operator._A[p]); | ||||||
|  |     WR.writeScidacFieldRecord(tmp,record); | ||||||
|  |   } | ||||||
|  |   WR.close(); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | template<class Coarsened> | ||||||
|  | void LoadOperator(Coarsened Operator,std::string file) | ||||||
|  | { | ||||||
|  | #ifdef HAVE_LIME | ||||||
|  |   emptyUserRecord record; | ||||||
|  |   Grid::ScidacReader RD ; | ||||||
|  |   RD.open(file); | ||||||
|  |   assert(Operator._A.size()==Operator.geom.npoint); | ||||||
|  |   for(int p=0;p<Operator.geom.npoint;p++){ | ||||||
|  |     conformable(Operator._A[p].Grid(),Operator.CoarseGrid()); | ||||||
|  |     RD.readScidacFieldRecord(Operator._A[p],record); | ||||||
|  |   }     | ||||||
|  |   RD.close(); | ||||||
|  |   Operator.ExchangeCoarseLinks(); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | template<class aggregation> | ||||||
|  | void SaveBasis(aggregation &Agg,std::string file) | ||||||
|  | { | ||||||
|  | #ifdef HAVE_LIME | ||||||
|  |   emptyUserRecord record; | ||||||
|  |   ScidacWriter WR(Agg.FineGrid->IsBoss()); | ||||||
|  |   WR.open(file); | ||||||
|  |   for(int b=0;b<Agg.subspace.size();b++){ | ||||||
|  |     WR.writeScidacFieldRecord(Agg.subspace[b],record); | ||||||
|  |   } | ||||||
|  |   WR.close(); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | template<class aggregation> | ||||||
|  | void LoadBasis(aggregation &Agg, std::string file) | ||||||
|  | { | ||||||
|  | #ifdef HAVE_LIME | ||||||
|  |   emptyUserRecord record; | ||||||
|  |   ScidacReader RD ; | ||||||
|  |   RD.open(file); | ||||||
|  |   for(int b=0;b<Agg.subspace.size();b++){ | ||||||
|  |     RD.readScidacFieldRecord(Agg.subspace[b],record); | ||||||
|  |   }     | ||||||
|  |   RD.close(); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<class Field> class TestSolver : public LinearFunction<Field> { | ||||||
|  | public: | ||||||
|  |   TestSolver() {}; | ||||||
|  |   void operator() (const Field &in, Field &out){    out = Zero();  }      | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | RealD InverseApproximation(RealD x){ | ||||||
|  |   return 1.0/x; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Want Op in CoarsenOp to call MatPcDagMatPc | ||||||
|  | template<class Field> | ||||||
|  | class HermOpAdaptor : public LinearOperatorBase<Field> | ||||||
|  | { | ||||||
|  |   LinearOperatorBase<Field> & wrapped; | ||||||
|  | public: | ||||||
|  |   HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme)  {}; | ||||||
|  |   void Op     (const Field &in, Field &out)   { wrapped.HermOp(in,out);  } | ||||||
|  |   void HermOp(const Field &in, Field &out)    { wrapped.HermOp(in,out); } | ||||||
|  |   void AdjOp     (const Field &in, Field &out){ wrapped.HermOp(in,out);  } | ||||||
|  |   void OpDiag (const Field &in, Field &out)                  {    assert(0);  } | ||||||
|  |   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } | ||||||
|  |   void OpDirAll  (const Field &in, std::vector<Field> &out)  {    assert(0);  }; | ||||||
|  |   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } | ||||||
|  | }; | ||||||
|  | template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |   using LinearFunction<Field>::operator(); | ||||||
|  |   typedef LinearOperatorBase<Field> FineOperator; | ||||||
|  |   FineOperator   & _SmootherOperator; | ||||||
|  |   Chebyshev<Field> Cheby; | ||||||
|  |   ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator) : | ||||||
|  |     _SmootherOperator(SmootherOperator), | ||||||
|  |     Cheby(_lo,_hi,_ord,InverseApproximation) | ||||||
|  |   { | ||||||
|  |     std::cout << GridLogMessage<<" Chebyshev smoother order "<<_ord<<" ["<<_lo<<","<<_hi<<"]"<<std::endl; | ||||||
|  |   }; | ||||||
|  |   void operator() (const Field &in, Field &out)  | ||||||
|  |   { | ||||||
|  |     Field tmp(in.Grid()); | ||||||
|  |     tmp = in; | ||||||
|  |     Cheby(_SmootherOperator,tmp,out); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | int main (int argc, char ** argv) | ||||||
|  | { | ||||||
|  |   Grid_init(&argc,&argv); | ||||||
|  |  | ||||||
|  |   const int Ls=16; | ||||||
|  |   const int nbasis = 40; | ||||||
|  |   const int cb = 0 ; | ||||||
|  |   RealD mass=0.01; | ||||||
|  |   RealD M5=1.8; | ||||||
|  |   RealD b=1.5; | ||||||
|  |   RealD c=0.5; | ||||||
|  |  | ||||||
|  |   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), | ||||||
|  | 								   GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||||
|  | 								   GridDefaultMpi()); | ||||||
|  |   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||||
|  |   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||||
|  |   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||||
|  |  | ||||||
|  |   // Construct a coarsened grid with 4^4 cell | ||||||
|  |   Coordinate clatt = GridDefaultLatt(); | ||||||
|  |   for(int d=0;d<clatt.size();d++){ | ||||||
|  |     clatt[d] = clatt[d]/4; | ||||||
|  |   } | ||||||
|  |   GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, | ||||||
|  | 							    GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||||
|  | 							    GridDefaultMpi());; | ||||||
|  |   GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d); | ||||||
|  |  | ||||||
|  |   ///////////////////////// RNGs ///////////////////////////////// | ||||||
|  |   std::vector<int> seeds4({1,2,3,4}); | ||||||
|  |   std::vector<int> seeds5({5,6,7,8}); | ||||||
|  |   std::vector<int> cseeds({5,6,7,8}); | ||||||
|  |  | ||||||
|  |   GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5); | ||||||
|  |   GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4); | ||||||
|  |   GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); | ||||||
|  |  | ||||||
|  |   ///////////////////////// Configuration ///////////////////////////////// | ||||||
|  |   LatticeGaugeField Umu(UGrid); | ||||||
|  |  | ||||||
|  |   FieldMetaData header; | ||||||
|  |   std::string file("ckpoint_lat.4000"); | ||||||
|  |   NerscIO::readConfiguration(Umu,header,file); | ||||||
|  |  | ||||||
|  |   //////////////////////// Fermion action ////////////////////////////////// | ||||||
|  |   MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); | ||||||
|  |  | ||||||
|  |   SchurDiagMooeeOperator<MobiusFermionD, LatticeFermion> HermOpEO(Ddwf); | ||||||
|  |  | ||||||
|  |   typedef HermOpAdaptor<LatticeFermionD> HermFineMatrix; | ||||||
|  |   HermFineMatrix FineHermOp(HermOpEO); | ||||||
|  |    | ||||||
|  |   LatticeFermion result(FrbGrid); result=Zero(); | ||||||
|  |  | ||||||
|  |   LatticeFermion    src(FrbGrid); random(RNG5,src); | ||||||
|  |  | ||||||
|  |   // Run power method on FineHermOp | ||||||
|  |   PowerMethod<LatticeFermion>       PM;   PM(HermOpEO,src); | ||||||
|  |  | ||||||
|  |   | ||||||
|  |   //////////////////////////////////////////////////////////// | ||||||
|  |   ///////////// Coarse basis and Little Dirac Operator /////// | ||||||
|  |   //////////////////////////////////////////////////////////// | ||||||
|  |   typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator; | ||||||
|  |   typedef LittleDiracOperator::CoarseVector CoarseVector; | ||||||
|  |  | ||||||
|  |   NextToNextToNextToNearestStencilGeometry5D geom(Coarse5d); | ||||||
|  |   NearestStencilGeometry5D geom_nn(Coarse5d); | ||||||
|  |    | ||||||
|  |   // Warning: This routine calls PVdagM.Op, not PVdagM.HermOp | ||||||
|  |   typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; | ||||||
|  |   Subspace Aggregates(Coarse5d,FrbGrid,cb); | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////////// | ||||||
|  |   // Need to check about red-black grid coarsening | ||||||
|  |   //////////////////////////////////////////////////////////// | ||||||
|  |   LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d); | ||||||
|  |  | ||||||
|  |   bool load=false; | ||||||
|  |   if ( load ) { | ||||||
|  |     LoadBasis(Aggregates,"Subspace.scidac"); | ||||||
|  |     LoadOperator(LittleDiracOp,"LittleDiracOp.scidac"); | ||||||
|  |   } else { | ||||||
|  |     Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis, | ||||||
|  | 				       95.0,0.1, | ||||||
|  | 				       //				     400,200,200 -- 48 iters | ||||||
|  | 				       //				     600,200,200 -- 38 iters, 162s | ||||||
|  | 				       //				     600,200,100 -- 38 iters, 169s | ||||||
|  | 				       //				     600,200,50  -- 88 iters. 370s  | ||||||
|  | 				       600, | ||||||
|  | 				       200, | ||||||
|  | 				       100, | ||||||
|  | 				       0.0); | ||||||
|  |     LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates); | ||||||
|  |     SaveBasis(Aggregates,"Subspace.scidac"); | ||||||
|  |     SaveOperator(LittleDiracOp,"LittleDiracOp.scidac"); | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   // Try projecting to one hop only | ||||||
|  |   LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d); | ||||||
|  |   LittleDiracOpProj.ProjectNearestNeighbour(0.2,LittleDiracOp); | ||||||
|  |  | ||||||
|  |   typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix; | ||||||
|  |   HermMatrix CoarseOp (LittleDiracOp); | ||||||
|  |   HermMatrix CoarseOpProj (LittleDiracOpProj); | ||||||
|  |    | ||||||
|  |   ////////////////////////////////////////// | ||||||
|  |   // Build a coarse lanczos | ||||||
|  |   ////////////////////////////////////////// | ||||||
|  |   Chebyshev<CoarseVector>      IRLCheby(0.5,60.0,71);  // 1 iter | ||||||
|  |   FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp); | ||||||
|  |   PlainHermOp<CoarseVector>    IRLOp    (CoarseOp); | ||||||
|  |   int Nk=48; | ||||||
|  |   int Nm=64; | ||||||
|  |   int Nstop=Nk; | ||||||
|  |   ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-5,20); | ||||||
|  |  | ||||||
|  |   int Nconv; | ||||||
|  |   std::vector<RealD>            eval(Nm); | ||||||
|  |   std::vector<CoarseVector>     evec(Nm,Coarse5d); | ||||||
|  |   CoarseVector c_src(Coarse5d); c_src=1.0; | ||||||
|  |   CoarseVector c_res(Coarse5d);  | ||||||
|  |   CoarseVector c_ref(Coarse5d);  | ||||||
|  |  | ||||||
|  |   PowerMethod<CoarseVector>       cPM;   cPM(CoarseOp,c_src); | ||||||
|  |  | ||||||
|  |   IRL.calc(eval,evec,c_src,Nconv); | ||||||
|  |   DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval); | ||||||
|  |  | ||||||
|  |    | ||||||
|  |   ////////////////////////////////////////// | ||||||
|  |   // Build a coarse space solver | ||||||
|  |   ////////////////////////////////////////// | ||||||
|  |   int maxit=20000; | ||||||
|  |   ConjugateGradient<CoarseVector>  CG(1.0e-8,maxit,false); | ||||||
|  |   ConjugateGradient<LatticeFermionD>  CGfine(1.0e-8,10000,false); | ||||||
|  |   ZeroGuesser<CoarseVector> CoarseZeroGuesser; | ||||||
|  |  | ||||||
|  |   //  HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser); | ||||||
|  |   HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser); | ||||||
|  |   c_res=Zero(); | ||||||
|  |   HPDSolve(c_src,c_res); c_ref = c_res; | ||||||
|  |  | ||||||
|  |   ////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Deflated (with real op EV's) solve for the projected coarse op | ||||||
|  |   // Work towards ADEF1 in the coarse space | ||||||
|  |   ////////////////////////////////////////////////////////////////////////// | ||||||
|  |   HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser); | ||||||
|  |   c_res=Zero(); | ||||||
|  |   HPDSolveProj(c_src,c_res); | ||||||
|  |   c_res = c_res - c_ref; | ||||||
|  |   std::cout << "Projected solver error "<<norm2(c_res)<<std::endl; | ||||||
|  |  | ||||||
|  |   ////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Coarse ADEF1 with deflation space | ||||||
|  |   ////////////////////////////////////////////////////////////////////// | ||||||
|  |   ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(4.0,45.,16,CoarseOpProj);  // 311 | ||||||
|  |   //  ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj);  // 311 | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////// | ||||||
|  |   // CG, Cheby mode spacing 200,200 | ||||||
|  |   // Unprojected Coarse CG solve to 1e-8 : 190 iters, 4.9s | ||||||
|  |   // Unprojected Coarse CG solve to 4e-2 :  33 iters, 0.8s | ||||||
|  |   // Projected Coarse CG solve to 1e-8 : 100 iters, 0.36s | ||||||
|  |   //////////////////////////////////////////////////////// | ||||||
|  |   // CoarseSmoother(1.0,48.,8,CoarseOpProj); 48 evecs  | ||||||
|  |   //////////////////////////////////////////////////////// | ||||||
|  |   // ADEF1 Coarse solve to 1e-8 : 44 iters, 2.34s  2.1x gain | ||||||
|  |   // ADEF1 Coarse solve to 4e-2 : 7 iters, 0.4s | ||||||
|  |   // HDCG 38 iters 162s | ||||||
|  |   // | ||||||
|  |   // CoarseSmoother(1.0,40.,8,CoarseOpProj); 48 evecs  | ||||||
|  |   // ADEF1 Coarse solve to 1e-8 : 37 iters, 2.0s  2.1x gain | ||||||
|  |   // ADEF1 Coarse solve to 4e-2 : 6 iters, 0.36s | ||||||
|  |   // HDCG 38 iters 169s | ||||||
|  |  | ||||||
|  |   TwoLevelADEF1defl<CoarseVector> | ||||||
|  |     cADEF1(1.0e-8, 100, | ||||||
|  | 	   CoarseOp, | ||||||
|  | 	   CoarseSmoother, | ||||||
|  | 	   evec,eval); | ||||||
|  |  | ||||||
|  |   c_res=Zero(); | ||||||
|  |   cADEF1(c_src,c_res); | ||||||
|  |   c_res = c_res - c_ref; | ||||||
|  |   std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; | ||||||
|  |    | ||||||
|  |   cADEF1.Tolerance = 1.0e-9; | ||||||
|  |   c_res=Zero(); | ||||||
|  |   cADEF1(c_src,c_res); | ||||||
|  |   c_res = c_res - c_ref; | ||||||
|  |   std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; | ||||||
|  |    | ||||||
|  |   ////////////////////////////////////////// | ||||||
|  |   // Build a smoother | ||||||
|  |   ////////////////////////////////////////// | ||||||
|  |   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(10.0,100.0,10,FineHermOp); //499 | ||||||
|  |   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(3.0,100.0,10,FineHermOp);  //383 | ||||||
|  |   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(1.0,100.0,10,FineHermOp);  //328 | ||||||
|  |   //  std::vector<RealD> los({0.5,1.0,3.0}); // 147/142/146 nbasis 1 | ||||||
|  |   //  std::vector<RealD> los({1.0,2.0}); // Nbasis 24: 88,86 iterations | ||||||
|  |   //  std::vector<RealD> los({2.0,4.0}); // Nbasis 32 == 52, iters | ||||||
|  |   //  std::vector<RealD> los({2.0,4.0}); // Nbasis 40 == 36,36 iters | ||||||
|  |  | ||||||
|  |   // | ||||||
|  |   // Turns approx 2700 iterations into 340 fine multiplies with Nbasis 40 | ||||||
|  |   // Need to measure cost of coarse space. | ||||||
|  |   // | ||||||
|  |   // -- i) Reduce coarse residual   -- 0.04 | ||||||
|  |   // -- ii) Lanczos on coarse space -- done | ||||||
|  |   // -- iii) Possible 1 hop project and/or preconditioning it - easy - PrecCG it and | ||||||
|  |   //         use a limited stencil. Reread BFM code to check on evecs / deflation strategy with prec | ||||||
|  |   // | ||||||
|  |   std::vector<RealD> los({3.0}); // Nbasis 40 == 36,36 iters | ||||||
|  |  | ||||||
|  |   //  std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults) | ||||||
|  |   std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)   | ||||||
|  |  | ||||||
|  |   for(int l=0;l<los.size();l++){ | ||||||
|  |  | ||||||
|  |     RealD lo = los[l]; | ||||||
|  |  | ||||||
|  |     for(int o=0;o<ords.size();o++){ | ||||||
|  |  | ||||||
|  |       ConjugateGradient<CoarseVector>  CGsloppy(4.0e-2,maxit,false); | ||||||
|  |       HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser); | ||||||
|  |        | ||||||
|  |       //    ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case | ||||||
|  |       ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,ords[o],FineHermOp);  // 311 | ||||||
|  |  | ||||||
|  |       ////////////////////////////////////////// | ||||||
|  |       // Build a HDCG solver | ||||||
|  |       ////////////////////////////////////////// | ||||||
|  |       TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> | ||||||
|  | 	HDCG(1.0e-8, 3000, | ||||||
|  | 	     FineHermOp, | ||||||
|  | 	     Smoother, | ||||||
|  | 	     HPDSolveSloppy, | ||||||
|  | 	     HPDSolve, | ||||||
|  | 	     Aggregates); | ||||||
|  |  | ||||||
|  |       result=Zero(); | ||||||
|  |       HDCG(src,result); | ||||||
|  |  | ||||||
|  |       TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> | ||||||
|  | 	HDCGdefl(1.0e-8, 100, | ||||||
|  | 		 FineHermOp, | ||||||
|  | 		 Smoother, | ||||||
|  | 		 cADEF1, | ||||||
|  | 		 HPDSolve, | ||||||
|  | 		 Aggregates); | ||||||
|  |        | ||||||
|  |       result=Zero(); | ||||||
|  |       HDCGdefl(src,result); | ||||||
|  |        | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   // Standard CG | ||||||
|  |   result=Zero(); | ||||||
|  |   CGfine(HermOpEO, src, result); | ||||||
|  |    | ||||||
|  |   Grid_finalize(); | ||||||
|  |   return 0; | ||||||
|  | } | ||||||
							
								
								
									
										267
									
								
								tests/debug/Test_general_coarse_pvdagm.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										267
									
								
								tests/debug/Test_general_coarse_pvdagm.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,267 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./tests/Test_padded_cell.cc | ||||||
|  |  | ||||||
|  |     Copyright (C) 2023 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #include <Grid/Grid.h> | ||||||
|  | #include <Grid/lattice/PaddedCell.h> | ||||||
|  | #include <Grid/stencil/GeneralLocalStencil.h> | ||||||
|  |  | ||||||
|  | #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h> | ||||||
|  | #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h> | ||||||
|  | #include <Grid/algorithms/iterative/BiCGSTAB.h> | ||||||
|  |  | ||||||
|  | using namespace std; | ||||||
|  | using namespace Grid; | ||||||
|  |  | ||||||
|  | template<class Field> | ||||||
|  | class HermOpAdaptor : public LinearOperatorBase<Field> | ||||||
|  | { | ||||||
|  |   LinearOperatorBase<Field> & wrapped; | ||||||
|  | public: | ||||||
|  |   HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme)  {}; | ||||||
|  |   void OpDiag (const Field &in, Field &out) {    assert(0);  } | ||||||
|  |   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } | ||||||
|  |   void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  }; | ||||||
|  |   void Op     (const Field &in, Field &out){ | ||||||
|  |     wrapped.HermOp(in,out); | ||||||
|  |   } | ||||||
|  |   void AdjOp     (const Field &in, Field &out){ | ||||||
|  |     wrapped.HermOp(in,out); | ||||||
|  |   } | ||||||
|  |   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } | ||||||
|  |   void HermOp(const Field &in, Field &out){ | ||||||
|  |     wrapped.HermOp(in,out); | ||||||
|  |   } | ||||||
|  |    | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template<class Matrix,class Field> | ||||||
|  | class PVdagMLinearOperator : public LinearOperatorBase<Field> { | ||||||
|  |   Matrix &_Mat; | ||||||
|  |   Matrix &_PV; | ||||||
|  | public: | ||||||
|  |   PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){}; | ||||||
|  |  | ||||||
|  |   void OpDiag (const Field &in, Field &out) {    assert(0);  } | ||||||
|  |   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } | ||||||
|  |   void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  }; | ||||||
|  |   void Op     (const Field &in, Field &out){ | ||||||
|  |     Field tmp(in.Grid()); | ||||||
|  |     _Mat.M(in,tmp); | ||||||
|  |     _PV.Mdag(tmp,out); | ||||||
|  |   } | ||||||
|  |   void AdjOp     (const Field &in, Field &out){ | ||||||
|  |     Field tmp(in.Grid()); | ||||||
|  |     _PV.M(tmp,out); | ||||||
|  |     _Mat.Mdag(in,tmp); | ||||||
|  |   } | ||||||
|  |   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } | ||||||
|  |   void HermOp(const Field &in, Field &out){ | ||||||
|  |     std::cout << "HermOp"<<std::endl; | ||||||
|  |     Field tmp(in.Grid()); | ||||||
|  |     _Mat.M(in,tmp); | ||||||
|  |     _PV.Mdag(tmp,out); | ||||||
|  |     _PV.M(out,tmp); | ||||||
|  |     _Mat.Mdag(tmp,out); | ||||||
|  |     std::cout << "HermOp done "<<norm2(out)<<std::endl; | ||||||
|  |      | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template<class Field> class DumbOperator  : public LinearOperatorBase<Field> { | ||||||
|  | public: | ||||||
|  |   LatticeComplex scale; | ||||||
|  |   DumbOperator(GridBase *grid) : scale(grid) | ||||||
|  |   { | ||||||
|  |     scale = 0.0; | ||||||
|  |     LatticeComplex scalesft(grid); | ||||||
|  |     LatticeComplex scaletmp(grid); | ||||||
|  |     for(int d=0;d<4;d++){ | ||||||
|  |       Lattice<iScalar<vInteger> > x(grid); LatticeCoordinate(x,d+1); | ||||||
|  |       LatticeCoordinate(scaletmp,d+1); | ||||||
|  |       scalesft = Cshift(scaletmp,d+1,1); | ||||||
|  |       scale = 100.0*scale + where( mod(x    ,2)==(Integer)0, scalesft,scaletmp); | ||||||
|  |     } | ||||||
|  |     std::cout << " scale\n" << scale << std::endl; | ||||||
|  |   } | ||||||
|  |   // Support for coarsening to a multigrid | ||||||
|  |   void OpDiag (const Field &in, Field &out) {}; | ||||||
|  |   void OpDir  (const Field &in, Field &out,int dir,int disp){}; | ||||||
|  |   void OpDirAll  (const Field &in, std::vector<Field> &out) {}; | ||||||
|  |  | ||||||
|  |   void Op     (const Field &in, Field &out){ | ||||||
|  |     out = scale * in; | ||||||
|  |   } | ||||||
|  |   void AdjOp  (const Field &in, Field &out){ | ||||||
|  |     out = scale * in; | ||||||
|  |   } | ||||||
|  |   void HermOp(const Field &in, Field &out){ | ||||||
|  |     double n1, n2; | ||||||
|  |     HermOpAndNorm(in,out,n1,n2); | ||||||
|  |   } | ||||||
|  |   void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){ | ||||||
|  |     ComplexD dot; | ||||||
|  |  | ||||||
|  |     out = scale * in; | ||||||
|  |  | ||||||
|  |     dot= innerProduct(in,out); | ||||||
|  |     n1=real(dot); | ||||||
|  |  | ||||||
|  |     dot = innerProduct(out,out); | ||||||
|  |     n2=real(dot); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | int main (int argc, char ** argv) | ||||||
|  | { | ||||||
|  |   Grid_init(&argc,&argv); | ||||||
|  |  | ||||||
|  |   const int Ls=2; | ||||||
|  |  | ||||||
|  |   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||||
|  |   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||||
|  |  | ||||||
|  |   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||||
|  |   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||||
|  |  | ||||||
|  |   // Construct a coarsened grid | ||||||
|  |   Coordinate clatt = GridDefaultLatt(); | ||||||
|  |   for(int d=0;d<clatt.size();d++){ | ||||||
|  |     clatt[d] = clatt[d]/4; | ||||||
|  |   } | ||||||
|  |   GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());; | ||||||
|  |   GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d); | ||||||
|  |  | ||||||
|  |   std::vector<int> seeds4({1,2,3,4}); | ||||||
|  |   std::vector<int> seeds5({5,6,7,8}); | ||||||
|  |   std::vector<int> cseeds({5,6,7,8}); | ||||||
|  |   GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5); | ||||||
|  |   GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4); | ||||||
|  |   GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); | ||||||
|  |  | ||||||
|  |   LatticeFermion    src(FGrid); random(RNG5,src); | ||||||
|  |   LatticeFermion result(FGrid); result=Zero(); | ||||||
|  |   LatticeFermion    ref(FGrid); ref=Zero(); | ||||||
|  |   LatticeFermion    tmp(FGrid); | ||||||
|  |   LatticeFermion    err(FGrid); | ||||||
|  |   LatticeGaugeField Umu(UGrid); | ||||||
|  |  | ||||||
|  |   FieldMetaData header; | ||||||
|  |   std::string file("ckpoint_lat.4000"); | ||||||
|  |   NerscIO::readConfiguration(Umu,header,file); | ||||||
|  |   //Umu = 1.0; | ||||||
|  |    | ||||||
|  |   RealD mass=0.5; | ||||||
|  |   RealD M5=1.8; | ||||||
|  |  | ||||||
|  |   DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||||
|  |   DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5); | ||||||
|  |  | ||||||
|  |   const int nbasis = 1; | ||||||
|  |   const int cb = 0 ; | ||||||
|  |   LatticeFermion prom(FGrid); | ||||||
|  |  | ||||||
|  |   typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator; | ||||||
|  |   typedef LittleDiracOperator::CoarseVector CoarseVector; | ||||||
|  |  | ||||||
|  |   NextToNearestStencilGeometry5D geom(Coarse5d); | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<std::endl; | ||||||
|  |    | ||||||
|  |   PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM(Ddwf,Dpv); | ||||||
|  |   HermOpAdaptor<LatticeFermionD> HOA(PVdagM); | ||||||
|  |  | ||||||
|  |   // Run power method on HOA?? | ||||||
|  |   PowerMethod<LatticeFermion>       PM;   PM(HOA,src); | ||||||
|  |   | ||||||
|  |   // Warning: This routine calls PVdagM.Op, not PVdagM.HermOp | ||||||
|  |   typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; | ||||||
|  |   Subspace AggregatesPD(Coarse5d,FGrid,cb); | ||||||
|  |   AggregatesPD.CreateSubspaceChebyshev(RNG5, | ||||||
|  | 				       HOA, | ||||||
|  | 				       nbasis, | ||||||
|  | 				       5000.0, | ||||||
|  | 				       0.02, | ||||||
|  | 				       100, | ||||||
|  | 				       50, | ||||||
|  | 				       50, | ||||||
|  | 				       0.0); | ||||||
|  |    | ||||||
|  |   LittleDiracOperator LittleDiracOpPV(geom,FGrid,Coarse5d); | ||||||
|  |   LittleDiracOpPV.CoarsenOperator(PVdagM,AggregatesPD); | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"Testing coarsened operator "<<std::endl; | ||||||
|  |    | ||||||
|  |   CoarseVector c_src (Coarse5d); | ||||||
|  |   CoarseVector c_res (Coarse5d); | ||||||
|  |   CoarseVector c_proj(Coarse5d); | ||||||
|  |  | ||||||
|  |   std::vector<LatticeFermion> subspace(nbasis,FGrid); | ||||||
|  |   subspace=AggregatesPD.subspace; | ||||||
|  |  | ||||||
|  |   Complex one(1.0); | ||||||
|  |   c_src = one;  // 1 in every element for vector 1. | ||||||
|  |   blockPromote(c_src,err,subspace); | ||||||
|  |  | ||||||
|  |   prom=Zero(); | ||||||
|  |   for(int b=0;b<nbasis;b++){ | ||||||
|  |     prom=prom+subspace[b]; | ||||||
|  |   } | ||||||
|  |   err=err-prom;  | ||||||
|  |   std::cout<<GridLogMessage<<"Promoted back from subspace: err "<<norm2(err)<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl; | ||||||
|  |  | ||||||
|  |   PVdagM.Op(prom,tmp); | ||||||
|  |   blockProject(c_proj,tmp,subspace); | ||||||
|  |   std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl; | ||||||
|  |  | ||||||
|  |   LittleDiracOpPV.M(c_src,c_res); | ||||||
|  |   std::cout<<GridLogMessage<<" Called Little Dirac Op c_src "<< norm2(c_src) << "  c_res "<< norm2(c_res) <<std::endl; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl; | ||||||
|  |   //  std::cout<<GridLogMessage<<" Little "<< c_res<<std::endl; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl; | ||||||
|  |   //  std::cout<<GridLogMessage<<" Big "<< c_proj<<std::endl; | ||||||
|  |   c_proj = c_proj - c_res; | ||||||
|  |   std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl; | ||||||
|  |   //  std::cout<<GridLogMessage<<" error "<< c_proj<<std::endl; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << "Done "<< std::endl; | ||||||
|  |  | ||||||
|  |   Grid_finalize(); | ||||||
|  |   return 0; | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user