mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-25 10:09:34 +01:00 
			
		
		
		
	Compare commits
	
		
			14 Commits
		
	
	
		
			b58fd80379
			...
			rmhmc_merg
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | cfa0576ffd | ||
|  | fe98e9f555 | ||
|  | 948d16fb06 | ||
|  | 58fbcaa399 | ||
|  | 9ad6836b0f | ||
|  | 026eb8a695 | ||
|  | 076580c232 | ||
|  | 7af6022a2a | ||
|  | 982a60536c | ||
|  | dc36d272ce | ||
|  | 515ff6bf62 | ||
|  | 6d0c2de399 | ||
|  | 7786ea9921 | ||
|  | d93eac7b1c | 
| @@ -69,8 +69,7 @@ NAMESPACE_CHECK(BiCGSTAB); | |||||||
| #include <Grid/algorithms/iterative/PowerMethod.h> | #include <Grid/algorithms/iterative/PowerMethod.h> | ||||||
|  |  | ||||||
| NAMESPACE_CHECK(PowerMethod); | NAMESPACE_CHECK(PowerMethod); | ||||||
| #include <Grid/algorithms/multigrid/MultiGrid.h> | #include <Grid/algorithms/CoarsenedMatrix.h> | ||||||
|  |  | ||||||
| NAMESPACE_CHECK(CoarsendMatrix); | NAMESPACE_CHECK(CoarsendMatrix); | ||||||
| #include <Grid/algorithms/FFT.h> | #include <Grid/algorithms/FFT.h> | ||||||
|  |  | ||||||
|   | |||||||
| @@ -158,20 +158,7 @@ public: | |||||||
|     blockPromote(CoarseVec,FineVec,subspace); |     blockPromote(CoarseVec,FineVec,subspace); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   virtual void CreateSubspaceRandom(GridParallelRNG  &RNG) { |   virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) { | ||||||
|     int nn=nbasis; |  | ||||||
|     RealD scale; |  | ||||||
|     FineField noise(FineGrid); |  | ||||||
|     for(int b=0;b<nn;b++){ |  | ||||||
|       subspace[b] = Zero(); |  | ||||||
|       gaussian(RNG,noise); |  | ||||||
|       scale = std::pow(norm2(noise),-0.5);  |  | ||||||
|       noise=noise*scale; |  | ||||||
|       subspace[b] = noise; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) |  | ||||||
|   { |  | ||||||
|  |  | ||||||
|     RealD scale; |     RealD scale; | ||||||
|  |  | ||||||
| @@ -230,11 +217,6 @@ public: | |||||||
|     scale = std::pow(norm2(noise),-0.5);  |     scale = std::pow(norm2(noise),-0.5);  | ||||||
|     noise=noise*scale; |     noise=noise*scale; | ||||||
|  |  | ||||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pass-1 : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pass-2 : nbasis"<<nn<<" min " |  | ||||||
| 	      <<ordermin<<" step "<<orderstep |  | ||||||
| 	      <<" lo"<<filterlo<<std::endl; |  | ||||||
|  |  | ||||||
|     // Initial matrix element |     // Initial matrix element | ||||||
|     hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; |     hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; | ||||||
|  |  | ||||||
| @@ -308,44 +290,6 @@ public: | |||||||
|     } |     } | ||||||
|     assert(b==nn); |     assert(b==nn); | ||||||
|   } |   } | ||||||
|   virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop, |  | ||||||
| 				       int nn, |  | ||||||
| 				       double hi, |  | ||||||
| 				       double lo, |  | ||||||
| 				       int orderfilter |  | ||||||
| 				       ) { |  | ||||||
|  |  | ||||||
|     RealD scale; |  | ||||||
|  |  | ||||||
|     FineField noise(FineGrid); |  | ||||||
|     FineField Mn(FineGrid); |  | ||||||
|     FineField tmp(FineGrid); |  | ||||||
|  |  | ||||||
|     // New normalised noise |  | ||||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pure noise : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pure noise  : nbasis "<<nn<<std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     for(int b =0;b<nbasis;b++) |  | ||||||
|     { |  | ||||||
|       gaussian(RNG,noise); |  | ||||||
|       scale = std::pow(norm2(noise),-0.5);  |  | ||||||
|       noise=noise*scale; |  | ||||||
|  |  | ||||||
|       // Initial matrix element |  | ||||||
|       hermop.Op(noise,Mn); |  | ||||||
|       if(b==0) std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; |  | ||||||
|       // Filter |  | ||||||
|       Chebyshev<FineField> Cheb(lo,hi,orderfilter); |  | ||||||
|       Cheb(hermop,noise,Mn); |  | ||||||
|       // normalise |  | ||||||
|       scale = std::pow(norm2(Mn),-0.5); 	Mn=Mn*scale; |  | ||||||
|       subspace[b]   = Mn; |  | ||||||
|       hermop.Op(Mn,tmp);  |  | ||||||
|       std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,573 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <pboyle@bnl.gov> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #pragma once |  | ||||||
|  |  | ||||||
| #include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No) |  | ||||||
|  |  | ||||||
| #include <Grid/lattice/PaddedCell.h> |  | ||||||
| #include <Grid/stencil/GeneralLocalStencil.h> |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
| // Fixme need coalesced read gpermute |  | ||||||
| template<class vobj> void gpermute(vobj & inout,int perm){ |  | ||||||
|   vobj tmp=inout; |  | ||||||
|   if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;} |  | ||||||
|   if (perm & 0x2 ) { permute(inout,tmp,1); tmp=inout;} |  | ||||||
|   if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;} |  | ||||||
|   if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////// |  | ||||||
| // Reuse Aggregation class from CoarsenedMatrix for now |  | ||||||
| // Might think about *smoothed* Aggregation |  | ||||||
| // Equivalent of Geometry class in cartesian case |  | ||||||
| ///////////////////////////////////////////////////////////////// |  | ||||||
| class NonLocalStencilGeometry { |  | ||||||
| public: |  | ||||||
|   int depth; |  | ||||||
|   int hops; |  | ||||||
|   int npoint; |  | ||||||
|   std::vector<Coordinate> shifts; |  | ||||||
|   Coordinate stencil_size; |  | ||||||
|   Coordinate stencil_lo; |  | ||||||
|   Coordinate stencil_hi; |  | ||||||
|   GridCartesian *grid; |  | ||||||
|   GridCartesian *Grid() {return grid;}; |  | ||||||
|   int Depth(void){return 1;};   // Ghost zone depth |  | ||||||
|   int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil |  | ||||||
|  |  | ||||||
|   virtual int DimSkip(void) =0; |  | ||||||
|  |  | ||||||
|   virtual ~NonLocalStencilGeometry() {}; |  | ||||||
|  |  | ||||||
|   int  Reverse(int point) |  | ||||||
|   { |  | ||||||
|     int Nd = Grid()->Nd(); |  | ||||||
|     Coordinate shft = shifts[point]; |  | ||||||
|     Coordinate rev(Nd); |  | ||||||
|     for(int mu=0;mu<Nd;mu++) rev[mu]= -shft[mu]; |  | ||||||
|     for(int p=0;p<npoint;p++){ |  | ||||||
|       if(rev==shifts[p]){ |  | ||||||
| 	return p; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     assert(0); |  | ||||||
|     return -1; |  | ||||||
|   } |  | ||||||
|   void BuildShifts(void) |  | ||||||
|   { |  | ||||||
|     this->shifts.resize(0); |  | ||||||
|     int Nd = this->grid->Nd(); |  | ||||||
|  |  | ||||||
|     int dd = this->DimSkip(); |  | ||||||
|     for(int s0=this->stencil_lo[dd+0];s0<=this->stencil_hi[dd+0];s0++){ |  | ||||||
|     for(int s1=this->stencil_lo[dd+1];s1<=this->stencil_hi[dd+1];s1++){ |  | ||||||
|     for(int s2=this->stencil_lo[dd+2];s2<=this->stencil_hi[dd+2];s2++){ |  | ||||||
|     for(int s3=this->stencil_lo[dd+3];s3<=this->stencil_hi[dd+3];s3++){ |  | ||||||
|       Coordinate sft(Nd,0); |  | ||||||
|       sft[dd+0] = s0; |  | ||||||
|       sft[dd+1] = s1; |  | ||||||
|       sft[dd+2] = s2; |  | ||||||
|       sft[dd+3] = s3; |  | ||||||
|       int nhops = abs(s0)+abs(s1)+abs(s2)+abs(s3); |  | ||||||
|       if(nhops<=this->hops) this->shifts.push_back(sft); |  | ||||||
|     }}}} |  | ||||||
|     this->npoint = this->shifts.size(); |  | ||||||
|     std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<<std::endl; |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   NonLocalStencilGeometry(GridCartesian *_coarse_grid,int _hops) : grid(_coarse_grid), hops(_hops) |  | ||||||
|   { |  | ||||||
|     Coordinate latt = grid->GlobalDimensions(); |  | ||||||
|     stencil_size.resize(grid->Nd()); |  | ||||||
|     stencil_lo.resize(grid->Nd()); |  | ||||||
|     stencil_hi.resize(grid->Nd()); |  | ||||||
|     for(int d=0;d<grid->Nd();d++){ |  | ||||||
|      if ( latt[d] == 1 ) { |  | ||||||
|       stencil_lo[d] = 0; |  | ||||||
|       stencil_hi[d] = 0; |  | ||||||
|       stencil_size[d]= 1; |  | ||||||
|      } else if ( latt[d] == 2 ) { |  | ||||||
|       stencil_lo[d] = -1; |  | ||||||
|       stencil_hi[d] = 0; |  | ||||||
|       stencil_size[d]= 2; |  | ||||||
|      } else if ( latt[d] > 2 ) { |  | ||||||
|        stencil_lo[d] = -1; |  | ||||||
|        stencil_hi[d] =  1; |  | ||||||
|        stencil_size[d]= 3; |  | ||||||
|      } |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Need to worry about red-black now |  | ||||||
| class NonLocalStencilGeometry4D : public NonLocalStencilGeometry { |  | ||||||
| public: |  | ||||||
|   virtual int DimSkip(void) { return 0;}; |  | ||||||
|   NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; |  | ||||||
|   virtual ~NonLocalStencilGeometry4D() {}; |  | ||||||
| }; |  | ||||||
| class NonLocalStencilGeometry5D : public NonLocalStencilGeometry { |  | ||||||
| public: |  | ||||||
|   virtual int DimSkip(void) { return 1; };  |  | ||||||
|   NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops)  { }; |  | ||||||
|   virtual ~NonLocalStencilGeometry5D() {}; |  | ||||||
| }; |  | ||||||
| /* |  | ||||||
|  * Bunch of different options classes |  | ||||||
|  */ |  | ||||||
| class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { |  | ||||||
| public: |  | ||||||
|   NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,4) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NextToNextToNextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { |  | ||||||
| public: |  | ||||||
|   NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,4) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NextToNearestStencilGeometry4D : public  NonLocalStencilGeometry4D { |  | ||||||
| public: |  | ||||||
|   NextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,2) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { |  | ||||||
| public: |  | ||||||
|   NextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,2) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NearestStencilGeometry4D : public  NonLocalStencilGeometry4D { |  | ||||||
| public: |  | ||||||
|   NearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,1) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NearestStencilGeometry5D : public  NonLocalStencilGeometry5D { |  | ||||||
| public: |  | ||||||
|   NearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,1) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Fine Object == (per site) type of fine field |  | ||||||
| // nbasis      == number of deflation vectors |  | ||||||
| template<class Fobj,class CComplex,int nbasis> |  | ||||||
| class GeneralCoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  { |  | ||||||
| public: |  | ||||||
|  |  | ||||||
|   typedef GeneralCoarsenedMatrix<Fobj,CComplex,nbasis> GeneralCoarseOp; |  | ||||||
|   typedef iVector<CComplex,nbasis >           siteVector; |  | ||||||
|   typedef iMatrix<CComplex,nbasis >           siteMatrix; |  | ||||||
|   typedef Lattice<iScalar<CComplex> >         CoarseComplexField; |  | ||||||
|   typedef Lattice<siteVector>                 CoarseVector; |  | ||||||
|   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; |  | ||||||
|   typedef iMatrix<CComplex,nbasis >  Cobj; |  | ||||||
|   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field |  | ||||||
|   typedef Lattice<Fobj >        FineField; |  | ||||||
|   typedef CoarseVector Field; |  | ||||||
|   //////////////////// |  | ||||||
|   // Data members |  | ||||||
|   //////////////////// |  | ||||||
|   int hermitian; |  | ||||||
|   GridBase      *       _FineGrid;  |  | ||||||
|   GridCartesian *       _CoarseGrid;  |  | ||||||
|   NonLocalStencilGeometry &geom; |  | ||||||
|   PaddedCell Cell; |  | ||||||
|   GeneralLocalStencil Stencil; |  | ||||||
|    |  | ||||||
|   std::vector<CoarseMatrix> _A; |  | ||||||
|   std::vector<CoarseMatrix> _Adag; |  | ||||||
|  |  | ||||||
|   /////////////////////// |  | ||||||
|   // Interface |  | ||||||
|   /////////////////////// |  | ||||||
|   GridBase      * Grid(void)           { return _FineGrid; };   // this is all the linalg routines need to know |  | ||||||
|   GridBase      * FineGrid(void)       { return _FineGrid; };   // this is all the linalg routines need to know |  | ||||||
|   GridCartesian * CoarseGrid(void)     { return _CoarseGrid; };   // this is all the linalg routines need to know |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe) |  | ||||||
|   { |  | ||||||
|     int nfound=0; |  | ||||||
|     std::cout << " ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<<std::endl; |  | ||||||
|     for(int p=0;p<geom.npoint;p++){ |  | ||||||
|       for(int pp=0;pp<CopyMe.geom.npoint;pp++){ |  | ||||||
|  	// Search for the same relative shift |  | ||||||
| 	// Avoids brutal handling of Grid pointers |  | ||||||
| 	if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) { |  | ||||||
| 	  _A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]); |  | ||||||
| 	  _Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]); |  | ||||||
| 	  nfound++; |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     assert(nfound==geom.npoint); |  | ||||||
|     ExchangeCoarseLinks(); |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid) |  | ||||||
|     : geom(_geom), |  | ||||||
|       _FineGrid(FineGrid), |  | ||||||
|       _CoarseGrid(CoarseGrid), |  | ||||||
|       hermitian(1), |  | ||||||
|       Cell(_geom.Depth(),_CoarseGrid), |  | ||||||
|       Stencil(Cell.grids.back(),geom.shifts) |  | ||||||
|   { |  | ||||||
|     { |  | ||||||
|       int npoint = _geom.npoint; |  | ||||||
|       autoView( Stencil_v  , Stencil, AcceleratorRead); |  | ||||||
|       int osites=Stencil.Grid()->oSites(); |  | ||||||
|       for(int ss=0;ss<osites;ss++){ |  | ||||||
| 	for(int point=0;point<npoint;point++){ |  | ||||||
| 	  auto SE = Stencil_v.GetEntry(point,ss); |  | ||||||
| 	  int o = SE->_offset; |  | ||||||
| 	  assert( o< osites); |  | ||||||
| 	} |  | ||||||
|       }     |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     _A.resize(geom.npoint,CoarseGrid); |  | ||||||
|     _Adag.resize(geom.npoint,CoarseGrid); |  | ||||||
|   } |  | ||||||
|   void M (const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     Mult(_A,in,out); |  | ||||||
|   } |  | ||||||
|   void Mdag (const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     if ( hermitian ) M(in,out); |  | ||||||
|     else Mult(_Adag,in,out); |  | ||||||
|   } |  | ||||||
|   void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     RealD tviews=0; |  | ||||||
|     RealD ttot=0; |  | ||||||
|     RealD tmult=0; |  | ||||||
|     RealD texch=0; |  | ||||||
|     RealD text=0; |  | ||||||
|     ttot=-usecond(); |  | ||||||
|     conformable(CoarseGrid(),in.Grid()); |  | ||||||
|     conformable(in.Grid(),out.Grid()); |  | ||||||
|     out.Checkerboard() = in.Checkerboard(); |  | ||||||
|     CoarseVector tin=in; |  | ||||||
|  |  | ||||||
|     texch-=usecond(); |  | ||||||
|     CoarseVector pin  = Cell.Exchange(tin); |  | ||||||
|     texch+=usecond(); |  | ||||||
|  |  | ||||||
|     CoarseVector pout(pin.Grid()); pout=Zero(); |  | ||||||
|  |  | ||||||
|     int npoint = geom.npoint; |  | ||||||
|     typedef LatticeView<Cobj> Aview; |  | ||||||
|        |  | ||||||
|     const int Nsimd = CComplex::Nsimd(); |  | ||||||
|      |  | ||||||
|     int osites=pin.Grid()->oSites(); |  | ||||||
|     //    int gsites=pin.Grid()->gSites(); |  | ||||||
|  |  | ||||||
|     RealD flops = 1.0* npoint * nbasis * nbasis * 8 * osites; |  | ||||||
|     RealD bytes = (1.0*osites*sizeof(siteMatrix)*npoint+2.0*osites*sizeof(siteVector))*npoint; |  | ||||||
|        |  | ||||||
|     //    for(int point=0;point<npoint;point++){ |  | ||||||
|     //      conformable(A[point],pin); |  | ||||||
|     //    } |  | ||||||
|  |  | ||||||
|     { |  | ||||||
|       tviews-=usecond(); |  | ||||||
|       autoView( in_v , pin, AcceleratorRead); |  | ||||||
|       autoView( out_v , pout, AcceleratorWrite); |  | ||||||
|       autoView( Stencil_v  , Stencil, AcceleratorRead); |  | ||||||
|       tviews+=usecond(); |  | ||||||
|        |  | ||||||
|       for(int point=0;point<npoint;point++){ |  | ||||||
| 	tviews-=usecond(); |  | ||||||
| 	autoView( A_v, A[point],AcceleratorRead); |  | ||||||
| 	tviews+=usecond(); |  | ||||||
| 	tmult-=usecond(); |  | ||||||
| 	accelerator_for(sss, osites*nbasis, Nsimd, { |  | ||||||
|  |  | ||||||
| 	    typedef decltype(coalescedRead(in_v[0]))    calcVector; |  | ||||||
|  |  | ||||||
| 	    int ss = sss/nbasis; |  | ||||||
| 	    int b  = sss%nbasis; |  | ||||||
|  |  | ||||||
| 	    auto SE  = Stencil_v.GetEntry(point,ss); |  | ||||||
| 	    auto nbr = coalescedReadGeneralPermute(in_v[SE->_offset],SE->_permute,Nd); |  | ||||||
| 	    auto res = out_v(ss)(b); |  | ||||||
| 	    for(int bb=0;bb<nbasis;bb++) { |  | ||||||
| 	      res = res + coalescedRead(A_v[ss](b,bb))*nbr(bb); |  | ||||||
| 	    } |  | ||||||
| 	    coalescedWrite(out_v[ss](b),res); |  | ||||||
| 	}); |  | ||||||
|  |  | ||||||
| 	tmult+=usecond(); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     text-=usecond(); |  | ||||||
|     out = Cell.Extract(pout); |  | ||||||
|     text+=usecond(); |  | ||||||
|     ttot+=usecond(); |  | ||||||
|  |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult ext  "<<text<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult tot  "<<ttot<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Kernel "<< flops/tmult<<" mflop/s"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Kernel "<< bytes/tmult<<" MB/s"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse flops/s "<< flops/ttot<<" mflop/s"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse bytes   "<< bytes/1e6<<" MB"<<std::endl; |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   void PopulateAdag(void) |  | ||||||
|   { |  | ||||||
|     for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){ |  | ||||||
|       Coordinate bcoor; |  | ||||||
|       CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor); |  | ||||||
|        |  | ||||||
|       for(int p=0;p<geom.npoint;p++){ |  | ||||||
| 	Coordinate scoor = bcoor; |  | ||||||
| 	for(int mu=0;mu<bcoor.size();mu++){ |  | ||||||
| 	  int L = CoarseGrid()->GlobalDimensions()[mu]; |  | ||||||
| 	  scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic |  | ||||||
| 	} |  | ||||||
| 	// Flip to poke/peekLocalSite and not too bad |  | ||||||
| 	auto link = peekSite(_A[p],scoor); |  | ||||||
| 	int pp = geom.Reverse(p); |  | ||||||
| 	pokeSite(adj(link),_Adag[pp],bcoor); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ///////////////////////////////////////////////////////////// |  | ||||||
|   //  |  | ||||||
|   // A) Only reduced flops option is to use a padded cell of depth 4 |  | ||||||
|   // and apply MpcDagMpc in the padded cell. |  | ||||||
|   // |  | ||||||
|   // Makes for ONE application of MpcDagMpc per vector instead of 30 or 80. |  | ||||||
|   // With the effective cell size around (B+8)^4 perhaps 12^4/4^4 ratio |  | ||||||
|   // Cost is 81x more, same as stencil size. |  | ||||||
|   // |  | ||||||
|   // But: can eliminate comms and do as local dirichlet. |  | ||||||
|   // |  | ||||||
|   // Local exchange gauge field once. |  | ||||||
|   // Apply to all vectors, local only computation. |  | ||||||
|   // Must exchange ghost subcells in reverse process of PaddedCell to take inner products |  | ||||||
|   // |  | ||||||
|   // B) Can reduce cost: pad by 1, apply Deo      (4^4+6^4+8^4+8^4 )/ (4x 4^4) |  | ||||||
|   //                     pad by 2, apply Doe |  | ||||||
|   //                     pad by 3, apply Deo |  | ||||||
|   //                     then break out 8x directions; cost is ~10x MpcDagMpc per vector |  | ||||||
|   // |  | ||||||
|   // => almost factor of 10 in setup cost, excluding data rearrangement |  | ||||||
|   // |  | ||||||
|   // Intermediates -- ignore the corner terms, leave approximate and force Hermitian |  | ||||||
|   // Intermediates -- pad by 2 and apply 1+8+24 = 33 times. |  | ||||||
|   ///////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////// |  | ||||||
|     // BFM HDCG style approach: Solve a system of equations to get Aij |  | ||||||
|     ////////////////////////////////////////////////////////// |  | ||||||
|     /* |  | ||||||
|      *     Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM. |  | ||||||
|      * |  | ||||||
|      *     conj(phases[block]) proj[k][ block*Nvec+j ] =  \sum_ball  e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} >  |  | ||||||
|      *                                                 =  \sum_ball e^{iqk.delta} A_ji |  | ||||||
|      * |  | ||||||
|      *     Must invert matrix M_k,l = e^[i q_k . delta_l] |  | ||||||
|      * |  | ||||||
|      *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) |  | ||||||
|      */ |  | ||||||
|   void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop, |  | ||||||
| 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) |  | ||||||
|   { |  | ||||||
|     std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl; |  | ||||||
|     GridBase *grid = FineGrid(); |  | ||||||
|  |  | ||||||
|     RealD tproj=0.0; |  | ||||||
|     RealD teigen=0.0; |  | ||||||
|     RealD tmat=0.0; |  | ||||||
|     RealD tphase=0.0; |  | ||||||
|     RealD tinv=0.0; |  | ||||||
|  |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|     // Orthogonalise the subblocks over the basis |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|     CoarseScalar InnerProd(CoarseGrid());  |  | ||||||
|     blockOrthogonalise(InnerProd,Subspace.subspace); |  | ||||||
|  |  | ||||||
|     const int npoint = geom.npoint; |  | ||||||
|        |  | ||||||
|     Coordinate clatt = CoarseGrid()->GlobalDimensions(); |  | ||||||
|     int Nd = CoarseGrid()->Nd(); |  | ||||||
|  |  | ||||||
|       /* |  | ||||||
|        *     Here, k,l index which possible momentum/shift within the N-points connected by MdagM. |  | ||||||
|        *     Matrix index i is mapped to this shift via  |  | ||||||
|        *               geom.shifts[i] |  | ||||||
|        * |  | ||||||
|        *     conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block]  |  | ||||||
|        *       =  \sum_{l in ball}  e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} >  |  | ||||||
|        *       =  \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} |  | ||||||
|        *       = M_{kl} A_ji^{b.b+l} |  | ||||||
|        * |  | ||||||
|        *     Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] |  | ||||||
|        *   |  | ||||||
|        *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) |  | ||||||
|        * |  | ||||||
|        *     Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} |  | ||||||
|        */ |  | ||||||
|     teigen-=usecond(); |  | ||||||
|     Eigen::MatrixXcd Mkl    = Eigen::MatrixXcd::Zero(npoint,npoint); |  | ||||||
|     Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint); |  | ||||||
|     ComplexD ci(0.0,1.0); |  | ||||||
|     for(int k=0;k<npoint;k++){ // Loop over momenta |  | ||||||
|  |  | ||||||
|       for(int l=0;l<npoint;l++){ // Loop over nbr relative |  | ||||||
| 	ComplexD phase(0.0,0.0); |  | ||||||
| 	for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; |  | ||||||
| 	  phase=phase+TwoPiL*geom.shifts[k][mu]*geom.shifts[l][mu]; |  | ||||||
| 	} |  | ||||||
| 	phase=exp(phase*ci); |  | ||||||
| 	Mkl(k,l) = phase; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     invMkl = Mkl.inverse(); |  | ||||||
|     teigen+=usecond(); |  | ||||||
|  |  | ||||||
|     /////////////////////////////////////////////////////////////////////// |  | ||||||
|     // Now compute the matrix elements of linop between the orthonormal |  | ||||||
|     // set of vectors. |  | ||||||
|     /////////////////////////////////////////////////////////////////////// |  | ||||||
|     FineField phaV(grid); // Phased block basis vector |  | ||||||
|     FineField MphaV(grid);// Matrix applied |  | ||||||
|     CoarseVector coarseInner(CoarseGrid()); |  | ||||||
|  |  | ||||||
|     std::vector<CoarseVector> ComputeProj(npoint,CoarseGrid()); |  | ||||||
|     std::vector<CoarseVector>          FT(npoint,CoarseGrid()); |  | ||||||
|     for(int i=0;i<nbasis;i++){// Loop over basis vectors |  | ||||||
|       std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl; |  | ||||||
|       for(int p=0;p<npoint;p++){ // Loop over momenta in npoint |  | ||||||
| 	///////////////////////////////////////////////////// |  | ||||||
| 	// Stick a phase on every block |  | ||||||
| 	///////////////////////////////////////////////////// |  | ||||||
| 	tphase-=usecond(); |  | ||||||
| 	CoarseComplexField coor(CoarseGrid()); |  | ||||||
| 	CoarseComplexField pha(CoarseGrid());	pha=Zero(); |  | ||||||
| 	for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	  LatticeCoordinate(coor,mu); |  | ||||||
| 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; |  | ||||||
| 	  pha = pha + (TwoPiL * geom.shifts[p][mu]) * coor; |  | ||||||
| 	} |  | ||||||
| 	pha  =exp(pha*ci); |  | ||||||
| 	phaV=Zero(); |  | ||||||
| 	blockZAXPY(phaV,pha,Subspace.subspace[i],phaV); |  | ||||||
| 	tphase+=usecond(); |  | ||||||
|  |  | ||||||
| 	///////////////////////////////////////////////////////////////////// |  | ||||||
| 	// Multiple phased subspace vector by matrix and project to subspace |  | ||||||
| 	// Remove local bulk phase to leave relative phases |  | ||||||
| 	///////////////////////////////////////////////////////////////////// |  | ||||||
| 	tmat-=usecond(); |  | ||||||
| 	linop.Op(phaV,MphaV); |  | ||||||
| 	tmat+=usecond(); |  | ||||||
|  |  | ||||||
| 	tproj-=usecond(); |  | ||||||
| 	blockProject(coarseInner,MphaV,Subspace.subspace); |  | ||||||
| 	coarseInner = conjugate(pha) * coarseInner; |  | ||||||
|  |  | ||||||
| 	ComputeProj[p] = coarseInner; |  | ||||||
| 	tproj+=usecond(); |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       tinv-=usecond(); |  | ||||||
|       for(int k=0;k<npoint;k++){ |  | ||||||
| 	FT[k] = Zero(); |  | ||||||
| 	for(int l=0;l<npoint;l++){ |  | ||||||
| 	  FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l]; |  | ||||||
| 	} |  | ||||||
|        |  | ||||||
| 	int osites=CoarseGrid()->oSites(); |  | ||||||
| 	autoView( A_v  , _A[k], AcceleratorWrite); |  | ||||||
| 	autoView( FT_v  , FT[k], AcceleratorRead); |  | ||||||
| 	accelerator_for(sss, osites, 1, { |  | ||||||
| 	    for(int j=0;j<nbasis;j++){ |  | ||||||
| 	      A_v[sss](j,i) = FT_v[sss](j); |  | ||||||
| 	    } |  | ||||||
|         }); |  | ||||||
|       } |  | ||||||
|       tinv+=usecond(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int p=0;p<geom.npoint;p++){ |  | ||||||
|       Coordinate coor({0,0,0,0,0}); |  | ||||||
|       auto sval = peekSite(_A[p],coor); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Only needed if nonhermitian |  | ||||||
|     if ( ! hermitian ) { |  | ||||||
|       std::cout << GridLogMessage<<"PopulateAdag  "<<std::endl; |  | ||||||
|       PopulateAdag(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Need to write something to populate Adag from A |  | ||||||
|     std::cout << GridLogMessage<<"ExchangeCoarseLinks  "<<std::endl; |  | ||||||
|     ExchangeCoarseLinks(); |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator eigen  "<<teigen<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator phase  "<<tphase<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator mat    "<<tmat <<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator proj   "<<tproj<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator inv    "<<tinv<<" us"<<std::endl; |  | ||||||
|   } |  | ||||||
|   void ExchangeCoarseLinks(void){ |  | ||||||
|     for(int p=0;p<geom.npoint;p++){ |  | ||||||
|       std::cout << "Exchange "<<p<<std::endl; |  | ||||||
|       _A[p] = Cell.Exchange(_A[p]); |  | ||||||
|       _Adag[p]= Cell.Exchange(_Adag[p]); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   virtual  void Mdiag    (const Field &in, Field &out){ assert(0);}; |  | ||||||
|   virtual  void Mdir     (const Field &in, Field &out,int dir, int disp){assert(0);}; |  | ||||||
|   virtual  void MdirAll  (const Field &in, std::vector<Field> &out){assert(0);}; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| @@ -460,6 +460,53 @@ class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase<Fi | |||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | template<class Matrix,class Field> | ||||||
|  | class QuadLinearOperator : public LinearOperatorBase<Field> { | ||||||
|  |   Matrix &_Mat; | ||||||
|  | public: | ||||||
|  |   RealD a0,a1,a2; | ||||||
|  |   QuadLinearOperator(Matrix &Mat): _Mat(Mat),a0(0.),a1(0.),a2(1.) {}; | ||||||
|  |   QuadLinearOperator(Matrix &Mat, RealD _a0,RealD _a1,RealD _a2): _Mat(Mat),a0(_a0),a1(_a1),a2(_a2) {}; | ||||||
|  |   // Support for coarsening to a multigrid | ||||||
|  |   void OpDiag (const Field &in, Field &out) { | ||||||
|  |     assert(0); | ||||||
|  |     _Mat.Mdiag(in,out); | ||||||
|  |   } | ||||||
|  |   void OpDir  (const Field &in, Field &out,int dir,int disp) { | ||||||
|  |     assert(0); | ||||||
|  |     _Mat.Mdir(in,out,dir,disp); | ||||||
|  |   } | ||||||
|  |   void OpDirAll  (const Field &in, std::vector<Field> &out){ | ||||||
|  |     assert(0); | ||||||
|  |     _Mat.MdirAll(in,out); | ||||||
|  |   } | ||||||
|  |   void HermOp (const Field &in, Field &out){ | ||||||
|  | //    _Mat.M(in,out); | ||||||
|  |     Field tmp1(in.Grid()); | ||||||
|  | //    Linop.HermOpAndNorm(psi, mmp, d, b); | ||||||
|  |     _Mat.M(in,tmp1); | ||||||
|  |     _Mat.M(tmp1,out); | ||||||
|  |     out *= a2; | ||||||
|  |     axpy(out, a1, tmp1, out); | ||||||
|  |     axpy(out, a0, in, out); | ||||||
|  | //    d=real(innerProduct(psi,mmp)); | ||||||
|  | //    b=norm2(mmp); | ||||||
|  |   } | ||||||
|  |   void AdjOp     (const Field &in, Field &out){ | ||||||
|  |     assert(0); | ||||||
|  |     _Mat.M(in,out); | ||||||
|  |   } | ||||||
|  |   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||||
|  |     HermOp(in,out); | ||||||
|  |     ComplexD dot= innerProduct(in,out); n1=real(dot); | ||||||
|  |     n2=norm2(out); | ||||||
|  |   } | ||||||
|  |   void Op(const Field &in, Field &out){ | ||||||
|  |     assert(0); | ||||||
|  |     _Mat.M(in,out); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Left  handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta  -->  ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta | // Left  handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta  -->  ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta | ||||||
| // Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta  -->  ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi | // Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta  -->  ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi | ||||||
|   | |||||||
| @@ -90,8 +90,9 @@ public: | |||||||
|     order=_order; |     order=_order; | ||||||
|        |        | ||||||
|     if(order < 2) exit(-1); |     if(order < 2) exit(-1); | ||||||
|     Coeffs.resize(order,0.0); |     Coeffs.resize(order); | ||||||
|     Coeffs[order-1] = 1.0; |     Coeffs.assign(0.,order); | ||||||
|  |     Coeffs[order-1] = 1.; | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
|   // PB - more efficient low pass drops high modes above the low as 1/x uses all Chebyshev's. |   // PB - more efficient low pass drops high modes above the low as 1/x uses all Chebyshev's. | ||||||
|   | |||||||
| @@ -36,11 +36,12 @@ NAMESPACE_BEGIN(Grid); | |||||||
| // Abstract base class. | // Abstract base class. | ||||||
| // Takes a matrix (Mat), a source (phi), and a vector of Fields (chi) | // Takes a matrix (Mat), a source (phi), and a vector of Fields (chi) | ||||||
| // and returns a forecasted solution to the system D*psi = phi (psi). | // and returns a forecasted solution to the system D*psi = phi (psi). | ||||||
| template<class Matrix, class Field> | // Changing to operator | ||||||
|  | template<class LinearOperatorBase, class Field> | ||||||
| class Forecast | class Forecast | ||||||
| { | { | ||||||
| public: | public: | ||||||
|   virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& chi) = 0; |   virtual Field operator()(LinearOperatorBase &Mat, const Field& phi, const std::vector<Field>& chi) = 0; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| // Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012), | // Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012), | ||||||
| @@ -54,13 +55,13 @@ public: | |||||||
|   Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns) |   Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns) | ||||||
|   { |   { | ||||||
|     int degree = prev_solns.size(); |     int degree = prev_solns.size(); | ||||||
|  |     std::cout << GridLogMessage << "ChronoForecast: degree= " << degree << std::endl; | ||||||
|     Field chi(phi); // forecasted solution |     Field chi(phi); // forecasted solution | ||||||
|  |  | ||||||
|     // Trivial cases |     // Trivial cases | ||||||
|     if(degree == 0){ chi = Zero(); return chi; } |     if(degree == 0){ chi = Zero(); return chi; } | ||||||
|     else if(degree == 1){ return prev_solns[0]; } |     else if(degree == 1){ return prev_solns[0]; } | ||||||
|  |  | ||||||
|     //    RealD dot; |  | ||||||
|     ComplexD xp; |     ComplexD xp; | ||||||
|     Field r(phi); // residual |     Field r(phi); // residual | ||||||
|     Field Mv(phi); |     Field Mv(phi); | ||||||
| @@ -83,8 +84,9 @@ public: | |||||||
|     // Perform sparse matrix multiplication and construct rhs |     // Perform sparse matrix multiplication and construct rhs | ||||||
|     for(int i=0; i<degree; i++){ |     for(int i=0; i<degree; i++){ | ||||||
|       b[i] = innerProduct(v[i],phi); |       b[i] = innerProduct(v[i],phi); | ||||||
|       Mat.M(v[i],Mv); | //      Mat.M(v[i],Mv); | ||||||
|       Mat.Mdag(Mv,MdagMv[i]); | //      Mat.Mdag(Mv,MdagMv[i]); | ||||||
|  |       Mat.HermOp(v[i],MdagMv[i]); | ||||||
|       G[i][i] = innerProduct(v[i],MdagMv[i]); |       G[i][i] = innerProduct(v[i],MdagMv[i]); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -33,6 +33,15 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|    * Script A = SolverMatrix  |    * Script A = SolverMatrix  | ||||||
|    * Script P = Preconditioner |    * Script P = Preconditioner | ||||||
|    * |    * | ||||||
|  |    * Deflation methods considered | ||||||
|  |    *      -- Solve P A x = P b        [ like Luscher ] | ||||||
|  |    * DEF-1        M P A x = M P b     [i.e. left precon] | ||||||
|  |    * DEF-2        P^T M A x = P^T M b | ||||||
|  |    * ADEF-1       Preconditioner = M P + Q      [ Q + M + M A Q] | ||||||
|  |    * ADEF-2       Preconditioner = P^T M + Q | ||||||
|  |    * BNN          Preconditioner = P^T M P + Q | ||||||
|  |    * BNN2         Preconditioner = M P + P^TM +Q - M P A M  | ||||||
|  |    *  | ||||||
|    * Implement ADEF-2 |    * Implement ADEF-2 | ||||||
|    * |    * | ||||||
|    * Vstart = P^Tx + Qb |    * Vstart = P^Tx + Qb | ||||||
| @@ -40,245 +49,202 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|    * M2=M3=1 |    * M2=M3=1 | ||||||
|    * Vout = x |    * Vout = x | ||||||
|    */ |    */ | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
| template<class Field> | // abstract base | ||||||
| class TwoLevelCG : public LinearFunction<Field> | template<class Field, class CoarseField> | ||||||
|  | class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||||
| { | { | ||||||
|  public: |  public: | ||||||
|  |   int verbose; | ||||||
|   RealD   Tolerance; |   RealD   Tolerance; | ||||||
|   Integer MaxIterations; |   Integer MaxIterations; | ||||||
|  |   const int mmax = 5; | ||||||
|   GridBase *grid; |   GridBase *grid; | ||||||
|  |   GridBase *coarsegrid; | ||||||
|  |  | ||||||
|   // Fine operator, Smoother, CoarseSolver |   LinearOperatorBase<Field>   *_Linop | ||||||
|   LinearOperatorBase<Field>   &_FineLinop; |   OperatorFunction<Field>     *_Smoother, | ||||||
|   LinearFunction<Field>   &_Smoother; |   LinearFunction<CoarseField> *_CoarseSolver; | ||||||
|  |  | ||||||
|  |   // Need somthing that knows how to get from Coarse to fine and back again | ||||||
|    |    | ||||||
|   // more most opertor functions |   // more most opertor functions | ||||||
|   TwoLevelCG(RealD tol, |   TwoLevelFlexiblePcg(RealD tol, | ||||||
| 		     Integer maxit, | 		     Integer maxit, | ||||||
| 	     LinearOperatorBase<Field>   &FineLinop, | 		     LinearOperatorBase<Field> *Linop, | ||||||
| 	     LinearFunction<Field>       &Smoother, | 		     LinearOperatorBase<Field> *SmootherLinop, | ||||||
| 	     GridBase *fine) :  | 		     OperatorFunction<Field>   *Smoother, | ||||||
|  | 		     OperatorFunction<CoarseField>  CoarseLinop | ||||||
|  | 		     ) :  | ||||||
|       Tolerance(tol),  |       Tolerance(tol),  | ||||||
|       MaxIterations(maxit), |       MaxIterations(maxit), | ||||||
|       _FineLinop(FineLinop), |       _Linop(Linop), | ||||||
|       _Smoother(Smoother) |       _PreconditionerLinop(PrecLinop), | ||||||
|  |       _Preconditioner(Preconditioner) | ||||||
|   {  |   {  | ||||||
|     grid       = fine; |     verbose=0; | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   virtual void operator() (const Field &src, Field &psi) |   // The Pcg routine is common to all, but the various matrices differ from derived  | ||||||
|   { |   // implementation to derived implmentation | ||||||
|     Field resid(grid); |   void operator() (const Field &src, Field &psi){ | ||||||
|  |   void operator() (const Field &src, Field &psi){ | ||||||
|  |  | ||||||
|  |     psi.Checkerboard() = src.Checkerboard(); | ||||||
|  |     grid             = src.Grid(); | ||||||
|  |  | ||||||
|     RealD f; |     RealD f; | ||||||
|     RealD rtzp,rtz,a,d,b; |     RealD rtzp,rtz,a,d,b; | ||||||
|     RealD rptzp; |     RealD rptzp; | ||||||
|  |     RealD tn; | ||||||
|  |     RealD guess = norm2(psi); | ||||||
|  |     RealD ssq   = norm2(src); | ||||||
|  |     RealD rsq   = ssq*Tolerance*Tolerance; | ||||||
|      |      | ||||||
|     Field x(grid);  |     ///////////////////////////// | ||||||
|     Field p(grid); |     // Set up history vectors | ||||||
|     Field z(grid); |     ///////////////////////////// | ||||||
|  |     std::vector<Field> p  (mmax,grid); | ||||||
|  |     std::vector<Field> mmp(mmax,grid); | ||||||
|  |     std::vector<RealD> pAp(mmax); | ||||||
|  |  | ||||||
|  |     Field x  (grid); x = psi; | ||||||
|  |     Field z  (grid); | ||||||
|     Field tmp(grid); |     Field tmp(grid); | ||||||
|     Field mmp(grid); |  | ||||||
|     Field r  (grid); |     Field r  (grid); | ||||||
|     Field mu (grid); |     Field mu (grid); | ||||||
|     Field rp (grid); |  | ||||||
|    |    | ||||||
|     //Initial residual computation & set up |  | ||||||
|     RealD guess = norm2(psi); |  | ||||||
|     double tn; |  | ||||||
|  |  | ||||||
|     GridStopWatch HDCGTimer; |  | ||||||
|     HDCGTimer.Start(); |  | ||||||
|     ////////////////////////// |     ////////////////////////// | ||||||
|     // x0 = Vstart -- possibly modify guess |     // x0 = Vstart -- possibly modify guess | ||||||
|     ////////////////////////// |     ////////////////////////// | ||||||
|     x=Zero(); |     x=src; | ||||||
|     Vstart(x,src); |     Vstart(x,src); | ||||||
|  |  | ||||||
|     // r0 = b -A x0 |     // r0 = b -A x0 | ||||||
|     _FineLinop.HermOp(x,mmp); |     HermOp(x,mmp); // Shouldn't this be something else? | ||||||
|  |     axpy (r, -1.0,mmp[0], src);    // Recomputes r=src-Ax0 | ||||||
|     axpy(r, -1.0, mmp, src);    // Recomputes r=src-x0 |  | ||||||
|     rp=r; |  | ||||||
|  |  | ||||||
|     ////////////////////////////////// |     ////////////////////////////////// | ||||||
|     // Compute z = M1 x |     // Compute z = M1 x | ||||||
|     ////////////////////////////////// |     ////////////////////////////////// | ||||||
|     PcgM1(r,z); |     M1(r,z,tmp,mp,SmootherMirs); | ||||||
|     rtzp =real(innerProduct(r,z)); |     rtzp =real(innerProduct(r,z)); | ||||||
|  |  | ||||||
|     /////////////////////////////////////// |     /////////////////////////////////////// | ||||||
|     // Except Def2, M2 is trivial |     // Solve for Mss mu = P A z and set p = z-mu | ||||||
|  |     // Def2: p = 1 - Q Az = Pright z  | ||||||
|  |     // Other algos M2 is trivial | ||||||
|     /////////////////////////////////////// |     /////////////////////////////////////// | ||||||
|     p=z; |     M2(z,p[0]); | ||||||
|  |  | ||||||
|     RealD ssq =  norm2(src); |     for (int k=0;k<=MaxIterations;k++){ | ||||||
|     RealD rsq =  ssq*Tolerance*Tolerance; |  | ||||||
|      |      | ||||||
|     std::cout<<GridLogMessage<<"HDCG: k=0 residual "<<rtzp<<" target rsq "<<rsq<<" ssq "<<ssq<<std::endl; |       int peri_k  = k % mmax; | ||||||
|      |       int peri_kp = (k+1) % mmax; | ||||||
|     for (int k=1;k<=MaxIterations;k++){ |  | ||||||
|  |  | ||||||
|       rtz=rtzp; |       rtz=rtzp; | ||||||
|       d= PcgM3(p,mmp); |       d= M3(p[peri_k],mp,mmp[peri_k],tmp); | ||||||
|       a = rtz/d; |       a = rtz/d; | ||||||
|      |      | ||||||
|       axpy(x,a,p,x); |       // Memorise this | ||||||
|       RealD rn = axpy_norm(r,-a,mmp,r); |       pAp[peri_k] = d; | ||||||
|  |  | ||||||
|       PcgM1(r,z); |       axpy(x,a,p[peri_k],x); | ||||||
|  |       RealD rn = axpy_norm(r,-a,mmp[peri_k],r); | ||||||
|  |  | ||||||
|  |       // Compute z = M x | ||||||
|  |       M1(r,z,tmp,mp); | ||||||
|  |  | ||||||
|       rtzp =real(innerProduct(r,z)); |       rtzp =real(innerProduct(r,z)); | ||||||
|  |  | ||||||
|       int ipcg=1; // almost free inexact preconditioned CG |       M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate | ||||||
|       if (ipcg) { |  | ||||||
| 	rptzp =real(innerProduct(rp,z)); |       p[peri_kp]=p[peri_k]; | ||||||
|       } else { |  | ||||||
| 	rptzp =0; |       // Standard search direction  p -> z + b p    ; b =  | ||||||
|  |       b = (rtzp)/rtz; | ||||||
|  |  | ||||||
|  |       int northog; | ||||||
|  |       //    northog     = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm | ||||||
|  |       northog     = (k>mmax-1)?(mmax-1):k;        // This is the fCG-Tr(mmax-1) algorithm | ||||||
|  |      | ||||||
|  |       for(int back=0; back < northog; back++){ | ||||||
|  | 	int peri_back = (k-back)%mmax; | ||||||
|  | 	RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp])); | ||||||
|  | 	RealD beta = -pbApk/pAp[peri_back]; | ||||||
|  | 	axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]); | ||||||
|       } |       } | ||||||
|       b = (rtzp-rptzp)/rtz; |  | ||||||
|  |  | ||||||
|       PcgM2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate |  | ||||||
|  |  | ||||||
|       axpy(p,b,p,mu);  // mu = A r |  | ||||||
|  |  | ||||||
|       RealD rrn=sqrt(rn/ssq); |       RealD rrn=sqrt(rn/ssq); | ||||||
|       RealD rtn=sqrt(rtz/ssq); |       std::cout<<GridLogMessage<<"TwoLevelfPcg: k= "<<k<<" residual = "<<rrn<<std::endl; | ||||||
|       std::cout<<GridLogMessage<<"HDCG: Pcg k= "<<k<<" residual = "<<rrn<<std::endl; |  | ||||||
|  |  | ||||||
|       if ( ipcg ) { |  | ||||||
| 	axpy(rp,0.0,r,r); |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       // Stopping condition |       // Stopping condition | ||||||
|       if ( rn <= rsq ) {  |       if ( rn <= rsq ) {  | ||||||
|  |  | ||||||
| 	HDCGTimer.Stop(); | 	HermOp(x,mmp); // Shouldn't this be something else? | ||||||
| 	std::cout<<GridLogMessage<<"HDCG: Pcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;; | 	axpy(tmp,-1.0,src,mmp[0]); | ||||||
| 	 | 	 | ||||||
| 	_FineLinop.HermOp(x,mmp);			   | 	RealD psinorm = sqrt(norm2(x)); | ||||||
| 	axpy(tmp,-1.0,src,mmp); |  | ||||||
|  |  | ||||||
| 	RealD  mmpnorm = sqrt(norm2(mmp)); |  | ||||||
| 	RealD  xnorm   = sqrt(norm2(x)); |  | ||||||
| 	RealD srcnorm = sqrt(norm2(src)); | 	RealD srcnorm = sqrt(norm2(src)); | ||||||
| 	RealD tmpnorm = sqrt(norm2(tmp)); | 	RealD tmpnorm = sqrt(norm2(tmp)); | ||||||
| 	RealD true_residual = tmpnorm/srcnorm; | 	RealD true_residual = tmpnorm/srcnorm; | ||||||
| 	std::cout<<GridLogMessage<<"HDCG: true residual is "<<true_residual | 	std::cout<<GridLogMessage<<"TwoLevelfPcg:   true residual is "<<true_residual<<std::endl; | ||||||
| 		 <<" solution "<<xnorm<<" source "<<srcnorm<<std::endl; | 	std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl; | ||||||
|  | 	return k; | ||||||
| 	return; |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|     } |     } | ||||||
|     std::cout << "HDCG: Pcg not converged"<<std::endl; |     // Non-convergence | ||||||
|     return ; |     assert(0); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  public: |  public: | ||||||
|  |  | ||||||
|   virtual void PcgM1(Field & in, Field & out)     =0; |   virtual void M(Field & in,Field & out,Field & tmp) { | ||||||
|   virtual void Vstart(Field & x,const Field & src)=0; |  | ||||||
|  |  | ||||||
|   virtual void PcgM2(const Field & in, Field & out) { |  | ||||||
|     out=in; |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   virtual RealD PcgM3(const Field & p, Field & mmp){ |   virtual void M1(Field & in, Field & out) {// the smoother | ||||||
|     RealD dd; |  | ||||||
|     _FineLinop.HermOp(p,mmp); |  | ||||||
|     ComplexD dot = innerProduct(p,mmp); |  | ||||||
|     dd=real(dot); |  | ||||||
|     return dd; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   ///////////////////////////////////////////////////////////////////// |  | ||||||
|   // Only Def1 has non-trivial Vout. |  | ||||||
|   ///////////////////////////////////////////////////////////////////// |  | ||||||
|   virtual void   Vout  (Field & in, Field & out,Field & src){ |  | ||||||
|     out = in; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|    |  | ||||||
| template<class Field, class CoarseField, class Aggregation> |  | ||||||
| class TwoLevelADEF2 : public TwoLevelCG<Field> |  | ||||||
| { |  | ||||||
|  public: |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Need something that knows how to get from Coarse to fine and back again |  | ||||||
|   //  void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){ |  | ||||||
|   //  void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){ |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   GridBase *coarsegrid; |  | ||||||
|   Aggregation &_Aggregates;                     |  | ||||||
|   LinearFunction<CoarseField> &_CoarseSolver; |  | ||||||
|   LinearFunction<CoarseField> &_CoarseSolverPrecise; |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|    |  | ||||||
|   // more most opertor functions |  | ||||||
|   TwoLevelADEF2(RealD tol, |  | ||||||
| 		Integer maxit, |  | ||||||
| 		LinearOperatorBase<Field>   &FineLinop, |  | ||||||
| 		LinearFunction<Field>   &Smoother, |  | ||||||
| 		LinearFunction<CoarseField>  &CoarseSolver, |  | ||||||
| 		LinearFunction<CoarseField>  &CoarseSolverPrecise, |  | ||||||
| 		Aggregation &Aggregates |  | ||||||
| 		) : |  | ||||||
|     TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,Aggregates.FineGrid), |  | ||||||
|       _CoarseSolver(CoarseSolver), |  | ||||||
|       _CoarseSolverPrecise(CoarseSolverPrecise), |  | ||||||
|       _Aggregates(Aggregates) |  | ||||||
|   { |  | ||||||
|     coarsegrid = Aggregates.CoarseGrid; |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   virtual void PcgM1(Field & in, Field & out) |  | ||||||
|   { |  | ||||||
|     // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] |     // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] | ||||||
|  |     Field tmp(grid); | ||||||
|  |     Field Min(grid); | ||||||
|  |  | ||||||
|     Field tmp(this->grid); |     PcgM(in,Min); // Smoother call | ||||||
|     Field Min(this->grid); |  | ||||||
|     CoarseField PleftProj(this->coarsegrid); |  | ||||||
|     CoarseField PleftMss_proj(this->coarsegrid); |  | ||||||
|  |  | ||||||
|     GridStopWatch SmootherTimer; |     HermOp(Min,out); | ||||||
|     GridStopWatch MatrixTimer; |  | ||||||
|     SmootherTimer.Start(); |  | ||||||
|     this->_Smoother(in,Min); |  | ||||||
|     SmootherTimer.Stop(); |  | ||||||
|  |  | ||||||
|     MatrixTimer.Start(); |  | ||||||
|     this->_FineLinop.HermOp(Min,out); |  | ||||||
|     MatrixTimer.Stop(); |  | ||||||
|     axpy(tmp,-1.0,out,in);          // tmp  = in - A Min |     axpy(tmp,-1.0,out,in);          // tmp  = in - A Min | ||||||
|  |  | ||||||
|     GridStopWatch ProjTimer; |     ProjectToSubspace(tmp,PleftProj);      | ||||||
|     GridStopWatch CoarseTimer; |     ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s | ||||||
|     GridStopWatch PromTimer; |     PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]   | ||||||
|     ProjTimer.Start(); |  | ||||||
|     this->_Aggregates.ProjectToSubspace(PleftProj,tmp);      |  | ||||||
|     ProjTimer.Stop(); |  | ||||||
|     CoarseTimer.Start(); |  | ||||||
|     this->_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s |  | ||||||
|     CoarseTimer.Stop(); |  | ||||||
|     PromTimer.Start(); |  | ||||||
|     this->_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]   |  | ||||||
|     PromTimer.Stop(); |  | ||||||
|     std::cout << GridLogPerformance << "PcgM1 breakdown "<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance << "\tSmoother   " << SmootherTimer.Elapsed() <<std::endl; |  | ||||||
|     std::cout << GridLogPerformance << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; |  | ||||||
|     std::cout << GridLogPerformance << "\tProj       " << ProjTimer.Elapsed() <<std::endl; |  | ||||||
|     std::cout << GridLogPerformance << "\tCoarse     " << CoarseTimer.Elapsed() <<std::endl; |  | ||||||
|     std::cout << GridLogPerformance << "\tProm       " << PromTimer.Elapsed() <<std::endl; |  | ||||||
|  |  | ||||||
|     axpy(out,1.0,Min,tmp); // Min+tmp |     axpy(out,1.0,Min,tmp); // Min+tmp | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   virtual void Vstart(Field & x,const Field & src) |   virtual void M2(const Field & in, Field & out) { | ||||||
|   { |     out=in; | ||||||
|  |     // Must override for Def2 only | ||||||
|  |     //  case PcgDef2: | ||||||
|  |     //    Pright(in,out); | ||||||
|  |     //    break; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   virtual RealD M3(const Field & p, Field & mmp){ | ||||||
|  |     double d,dd; | ||||||
|  |     HermOpAndNorm(p,mmp,d,dd); | ||||||
|  |     return dd; | ||||||
|  |     // Must override for Def1 only | ||||||
|  |     //  case PcgDef1: | ||||||
|  |     //    d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no | ||||||
|  |     //      linop_d->Mprec(mmp,mp,tmp,1);// Dag yes | ||||||
|  |     //    Pleft(mp,mmp); | ||||||
|  |     //    d=real(linop_d->inner(p,mmp)); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   virtual void VstartDef2(Field & xconst Field & src){ | ||||||
|  |     //case PcgDef2: | ||||||
|  |     //case PcgAdef2:  | ||||||
|  |     //case PcgAdef2f: | ||||||
|  |     //case PcgV11f: | ||||||
|     /////////////////////////////////// |     /////////////////////////////////// | ||||||
|     // Choose x_0 such that  |     // Choose x_0 such that  | ||||||
|     // x_0 = guess +  (A_ss^inv) r_s = guess + Ass_inv [src -Aguess] |     // x_0 = guess +  (A_ss^inv) r_s = guess + Ass_inv [src -Aguess] | ||||||
| @@ -290,72 +256,142 @@ class TwoLevelADEF2 : public TwoLevelCG<Field> | |||||||
|     //                   = src_s - (A guess)_s - src_s  + (A guess)_s  |     //                   = src_s - (A guess)_s - src_s  + (A guess)_s  | ||||||
|     //                   = 0  |     //                   = 0  | ||||||
|     /////////////////////////////////// |     /////////////////////////////////// | ||||||
|     Field r(this->grid); |     Field r(grid); | ||||||
|     Field mmp(this->grid); |     Field mmp(grid); | ||||||
|     CoarseField PleftProj(this->coarsegrid); |  | ||||||
|     CoarseField PleftMss_proj(this->coarsegrid); |  | ||||||
|      |      | ||||||
|     this->_Aggregates.ProjectToSubspace(PleftProj,src);      |     HermOp(x,mmp); | ||||||
|     this->_CoarseSolverPrecise(PleftProj,PleftMss_proj); // Ass^{-1} r_s |     axpy (r, -1.0, mmp, src);        // r_{-1} = src - A x | ||||||
|     this->_Aggregates.PromoteFromSubspace(PleftMss_proj,x);   |     ProjectToSubspace(r,PleftProj);      | ||||||
|  |     ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s | ||||||
|  |     PromoteFromSubspace(PleftMss_proj,mmp);   | ||||||
|  |     x=x+mmp; | ||||||
|  |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
| }; |   virtual void Vstart(Field & x,const Field & src){ | ||||||
|  |     return; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   ///////////////////////////////////////////////////////////////////// | ||||||
|  |   // Only Def1 has non-trivial Vout. Override in Def1 | ||||||
|  |   ///////////////////////////////////////////////////////////////////// | ||||||
|  |   virtual void   Vout  (Field & in, Field & out,Field & src){ | ||||||
|  |     out = in; | ||||||
|  |     //case PcgDef1: | ||||||
|  |     //    //Qb + PT x | ||||||
|  |     //    ProjectToSubspace(src,PleftProj);      | ||||||
|  |     //    ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s | ||||||
|  |     //    PromoteFromSubspace(PleftMss_proj,tmp);   | ||||||
|  |     //     | ||||||
|  |     //    Pright(in,out); | ||||||
|  |     //     | ||||||
|  |     //    linop_d->axpy(out,tmp,out,1.0); | ||||||
|  |     //    break; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Pright and Pleft are common to all implementations | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   virtual void Pright(Field & in,Field & out){ | ||||||
|  |     // P_R  = [ 1              0 ]  | ||||||
|  |     //        [ -Mss^-1 Msb    0 ]  | ||||||
|  |     Field in_sbar(grid); | ||||||
|  |  | ||||||
|  |     ProjectToSubspace(in,PleftProj);      | ||||||
|  |     PromoteFromSubspace(PleftProj,out);   | ||||||
|  |     axpy(in_sbar,-1.0,out,in);       // in_sbar = in - in_s  | ||||||
|  |  | ||||||
|  |     HermOp(in_sbar,out); | ||||||
|  |     ProjectToSubspace(out,PleftProj);           // Mssbar in_sbar  (project) | ||||||
|  |  | ||||||
|  |     ApplyInverse     (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar  | ||||||
|  |     PromoteFromSubspace(PleftMss_proj,out);     //  | ||||||
|  |  | ||||||
|  |     axpy(out,-1.0,out,in_sbar);     // in_sbar - Mss^{-1} Mssbar in_sbar | ||||||
|  |   } | ||||||
|  |   virtual void Pleft (Field & in,Field & out){ | ||||||
|  |     // P_L  = [ 1  -Mbs Mss^-1]  | ||||||
|  |     //        [ 0   0         ]  | ||||||
|  |     Field in_sbar(grid); | ||||||
|  |     Field    tmp2(grid); | ||||||
|  |     Field    Mtmp(grid); | ||||||
|  |  | ||||||
|  |     ProjectToSubspace(in,PleftProj);      | ||||||
|  |     PromoteFromSubspace(PleftProj,out);   | ||||||
|  |     axpy(in_sbar,-1.0,out,in);      // in_sbar = in - in_s | ||||||
|  |  | ||||||
|  |     ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s | ||||||
|  |     PromoteFromSubspace(PleftMss_proj,out); | ||||||
|  |  | ||||||
|  |     HermOp(out,Mtmp); | ||||||
|  |  | ||||||
|  |     ProjectToSubspace(Mtmp,PleftProj);      // Msbar s Mss^{-1} | ||||||
|  |     PromoteFromSubspace(PleftProj,tmp2); | ||||||
|  |  | ||||||
|  |     axpy(out,-1.0,tmp2,Mtmp); | ||||||
|  |     axpy(out,-1.0,out,in_sbar);     // in_sbar - Msbars Mss^{-1} in_s | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
| template<class Field> | template<class Field> | ||||||
| class TwoLevelADEF1defl : public TwoLevelCG<Field> | class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> { | ||||||
| { |  public: | ||||||
| public: |   virtual void M(Field & in,Field & out,Field & tmp){ | ||||||
|   const std::vector<Field> &evec; |  | ||||||
|   const std::vector<RealD> &eval; |  | ||||||
|  |  | ||||||
|   TwoLevelADEF1defl(RealD tol, |  | ||||||
| 		   Integer maxit, |  | ||||||
| 		   LinearOperatorBase<Field>   &FineLinop, |  | ||||||
| 		   LinearFunction<Field>   &Smoother, |  | ||||||
| 		   std::vector<Field> &_evec, |  | ||||||
| 		   std::vector<RealD> &_eval) :  |  | ||||||
|     TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,_evec[0].Grid()), |  | ||||||
|     evec(_evec), |  | ||||||
|     eval(_eval) |  | ||||||
|   {}; |  | ||||||
|  |  | ||||||
|   // Can just inherit existing Vout |  | ||||||
|   // Can just inherit existing M2 |  | ||||||
|   // Can just inherit existing M3 |  | ||||||
|  |  | ||||||
|   // Simple vstart - do nothing |  | ||||||
|   virtual void Vstart(Field & x,const Field & src){ x=src; }; |  | ||||||
|  |  | ||||||
|   // Override PcgM1 |  | ||||||
|   virtual void PcgM1(Field & in, Field & out) |  | ||||||
|   { |  | ||||||
|     int N=evec.size(); |  | ||||||
|     Field Pin(this->grid); |  | ||||||
|     Field Qin(this->grid); |  | ||||||
|  |  | ||||||
|     //MP  + Q = M(1-AQ) + Q = M |  | ||||||
|     // // If we are eigenvector deflating in coarse space |  | ||||||
|     // // Q   = Sum_i |phi_i> 1/lambda_i <phi_i| |  | ||||||
|     // // A Q = Sum_i |phi_i> <phi_i| |  | ||||||
|     // // M(1-AQ) = M(1-proj) + Q |  | ||||||
|     Qin.Checkerboard()=in.Checkerboard(); |  | ||||||
|     Qin = Zero(); |  | ||||||
|     Pin = in; |  | ||||||
|     for (int i=0;i<N;i++) { |  | ||||||
|       const Field& tmp = evec[i]; |  | ||||||
|       auto ip = TensorRemove(innerProduct(tmp,in)); |  | ||||||
|       axpy(Qin, ip / eval[i],tmp,Qin); |  | ||||||
|       axpy(Pin, -ip ,tmp,Pin); |  | ||||||
|   }  |   }  | ||||||
|  |   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){ | ||||||
|  |  | ||||||
|     this->_Smoother(Pin,out); |  | ||||||
|  |  | ||||||
|     out = out + Qin; |  | ||||||
|   } |   } | ||||||
| }; |   virtual void M2(Field & in, Field & out){ | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |   } | ||||||
|  |   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){ | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){ | ||||||
|  |  | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | /* | ||||||
|  | template<class Field> | ||||||
|  | class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> { | ||||||
|  |  public: | ||||||
|  |   virtual void M(Field & in,Field & out,Field & tmp);  | ||||||
|  |   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); | ||||||
|  |   virtual void M2(Field & in, Field & out); | ||||||
|  |   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); | ||||||
|  |   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template<class Field> | ||||||
|  | class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> { | ||||||
|  |  public: | ||||||
|  |   virtual void M(Field & in,Field & out,Field & tmp);  | ||||||
|  |   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); | ||||||
|  |   virtual void M2(Field & in, Field & out); | ||||||
|  |   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); | ||||||
|  |   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); | ||||||
|  |   virtual void   Vout  (Field & in, Field & out,Field & src,Field & tmp); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template<class Field> | ||||||
|  | class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> { | ||||||
|  |  public: | ||||||
|  |   virtual void M(Field & in,Field & out,Field & tmp);  | ||||||
|  |   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); | ||||||
|  |   virtual void M2(Field & in, Field & out); | ||||||
|  |   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); | ||||||
|  |   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template<class Field> | ||||||
|  | class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> { | ||||||
|  |  public: | ||||||
|  |   virtual void M(Field & in,Field & out,Field & tmp);  | ||||||
|  |   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); | ||||||
|  |   virtual void M2(Field & in, Field & out); | ||||||
|  |   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); | ||||||
|  |   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); | ||||||
|  | } | ||||||
|  | */ | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -183,13 +183,13 @@ public: | |||||||
| 		  << "\tTrue residual " << true_residual | 		  << "\tTrue residual " << true_residual | ||||||
| 		  << "\tTarget " << Tolerance << std::endl; | 		  << "\tTarget " << Tolerance << std::endl; | ||||||
|  |  | ||||||
|  |         std::cout << GridLogMessage << "Time breakdown "<<std::endl; | ||||||
| 	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; | 	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; | ||||||
|         std::cout << GridLogPerformance << "Time breakdown "<<std::endl; | 	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | ||||||
| 	std::cout << GridLogPerformance << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | 	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; | ||||||
| 	std::cout << GridLogPerformance << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; | 	std::cout << GridLogMessage << "\tInner      " << InnerTimer.Elapsed() <<std::endl; | ||||||
| 	std::cout << GridLogPerformance << "\tInner      " << InnerTimer.Elapsed() <<std::endl; | 	std::cout << GridLogMessage << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl; | ||||||
| 	std::cout << GridLogPerformance << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl; | 	std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl; | ||||||
| 	std::cout << GridLogPerformance << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl; |  | ||||||
|  |  | ||||||
| 	std::cout << GridLogDebug << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl; | 	std::cout << GridLogDebug << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -465,7 +465,7 @@ until convergence | |||||||
|  |  | ||||||
|     Field& evec_k = evec[k]; |     Field& evec_k = evec[k]; | ||||||
|  |  | ||||||
|     _PolyOp(evec_k,w);    std::cout<<GridLogDebug << "PolyOp" <<std::endl; |     _PolyOp(evec_k,w);    std::cout<<GridLogIRL << "PolyOp" <<std::endl; | ||||||
|  |  | ||||||
|     if(k>0) w -= lme[k-1] * evec[k-1]; |     if(k>0) w -= lme[k-1] * evec[k-1]; | ||||||
|  |  | ||||||
| @@ -480,9 +480,9 @@ until convergence | |||||||
|     lme[k] = beta; |     lme[k] = beta; | ||||||
|  |  | ||||||
|     if ( (k>0) && ( (k % orth_period) == 0 )) { |     if ( (k>0) && ( (k % orth_period) == 0 )) { | ||||||
|       std::cout<<GridLogDebug << "Orthogonalising " <<k<<std::endl; |       std::cout<<GridLogIRL << "Orthogonalising " <<k<<std::endl; | ||||||
|       orthogonalize(w,evec,k); // orthonormalise |       orthogonalize(w,evec,k); // orthonormalise | ||||||
|       std::cout<<GridLogDebug << "Orthogonalised " <<k<<std::endl; |       std::cout<<GridLogIRL << "Orthogonalised " <<k<<std::endl; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if(k < Nm-1) evec[k+1] = w; |     if(k < Nm-1) evec[k+1] = w; | ||||||
| @@ -491,7 +491,7 @@ until convergence | |||||||
|     if ( beta < tiny )  |     if ( beta < tiny )  | ||||||
|       std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl; |       std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl; | ||||||
|  |  | ||||||
|     std::cout<<GridLogDebug << "Lanczos step complete " <<k<<std::endl; |     std::cout<<GridLogIRL << "Lanczos step complete " <<k<<std::endl; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,  |   void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,  | ||||||
|   | |||||||
| @@ -33,7 +33,7 @@ NAMESPACE_BEGIN(Grid); | |||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Take a matrix and form an NE solver calling a Herm solver | // Take a matrix and form an NE solver calling a Herm solver | ||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| template<class Field> class NormalEquations : public LinearFunction<Field>{ | template<class Field> class NormalEquations { | ||||||
| private: | private: | ||||||
|   SparseMatrixBase<Field> & _Matrix; |   SparseMatrixBase<Field> & _Matrix; | ||||||
|   OperatorFunction<Field> & _HermitianSolver; |   OperatorFunction<Field> & _HermitianSolver; | ||||||
| @@ -60,7 +60,7 @@ public: | |||||||
|   }      |   }      | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template<class Field> class HPDSolver : public LinearFunction<Field> { | template<class Field> class HPDSolver { | ||||||
| private: | private: | ||||||
|   LinearOperatorBase<Field> & _Matrix; |   LinearOperatorBase<Field> & _Matrix; | ||||||
|   OperatorFunction<Field> & _HermitianSolver; |   OperatorFunction<Field> & _HermitianSolver; | ||||||
| @@ -84,7 +84,7 @@ public: | |||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class Field> class MdagMSolver : public LinearFunction<Field> { | template<class Field> class MdagMSolver { | ||||||
| private: | private: | ||||||
|   SparseMatrixBase<Field> & _Matrix; |   SparseMatrixBase<Field> & _Matrix; | ||||||
|   OperatorFunction<Field> & _HermitianSolver; |   OperatorFunction<Field> & _HermitianSolver; | ||||||
|   | |||||||
| @@ -20,7 +20,7 @@ template<class Field> class PowerMethod | |||||||
|     RealD evalMaxApprox = 0.0;  |     RealD evalMaxApprox = 0.0;  | ||||||
|     auto src_n = src;  |     auto src_n = src;  | ||||||
|     auto tmp = src;  |     auto tmp = src;  | ||||||
|     const int _MAX_ITER_EST_ = 100;  |     const int _MAX_ITER_EST_ = 50;  | ||||||
|  |  | ||||||
|     for (int i=0;i<_MAX_ITER_EST_;i++) {  |     for (int i=0;i<_MAX_ITER_EST_;i++) {  | ||||||
|        |        | ||||||
|   | |||||||
| @@ -1,262 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/Aggregates.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #pragma once |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
| template<class Fobj,class CComplex,int nbasis> |  | ||||||
| class Aggregation { |  | ||||||
| public: |  | ||||||
|   typedef iVector<CComplex,nbasis >             siteVector; |  | ||||||
|   typedef Lattice<siteVector>                 CoarseVector; |  | ||||||
|   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; |  | ||||||
|  |  | ||||||
|   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field |  | ||||||
|   typedef Lattice<Fobj >        FineField; |  | ||||||
|  |  | ||||||
|   GridBase *CoarseGrid; |  | ||||||
|   GridBase *FineGrid; |  | ||||||
|   std::vector<Lattice<Fobj> > subspace; |  | ||||||
|   int checkerboard; |  | ||||||
|   int Checkerboard(void){return checkerboard;} |  | ||||||
|   Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :  |  | ||||||
|     CoarseGrid(_CoarseGrid), |  | ||||||
|     FineGrid(_FineGrid), |  | ||||||
|     subspace(nbasis,_FineGrid), |  | ||||||
|     checkerboard(_checkerboard) |  | ||||||
|   { |  | ||||||
|   }; |  | ||||||
|    |  | ||||||
|    |  | ||||||
|   void Orthogonalise(void){ |  | ||||||
|     CoarseScalar InnerProd(CoarseGrid);  |  | ||||||
|     //    std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"<<std::endl; |  | ||||||
|     blockOrthogonalise(InnerProd,subspace); |  | ||||||
|   }  |  | ||||||
|   void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){ |  | ||||||
|     blockProject(CoarseVec,FineVec,subspace); |  | ||||||
|   } |  | ||||||
|   void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){ |  | ||||||
|     FineVec.Checkerboard() = subspace[0].Checkerboard(); |  | ||||||
|     blockPromote(CoarseVec,FineVec,subspace); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   virtual void CreateSubspaceRandom(GridParallelRNG  &RNG) { |  | ||||||
|     int nn=nbasis; |  | ||||||
|     RealD scale; |  | ||||||
|     FineField noise(FineGrid); |  | ||||||
|     for(int b=0;b<nn;b++){ |  | ||||||
|       subspace[b] = Zero(); |  | ||||||
|       gaussian(RNG,noise); |  | ||||||
|       scale = std::pow(norm2(noise),-0.5);  |  | ||||||
|       noise=noise*scale; |  | ||||||
|       subspace[b] = noise; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) |  | ||||||
|   { |  | ||||||
|  |  | ||||||
|     RealD scale; |  | ||||||
|  |  | ||||||
|     ConjugateGradient<FineField> CG(1.0e-2,100,false); |  | ||||||
|     FineField noise(FineGrid); |  | ||||||
|     FineField Mn(FineGrid); |  | ||||||
|  |  | ||||||
|     for(int b=0;b<nn;b++){ |  | ||||||
|        |  | ||||||
|       subspace[b] = Zero(); |  | ||||||
|       gaussian(RNG,noise); |  | ||||||
|       scale = std::pow(norm2(noise),-0.5);  |  | ||||||
|       noise=noise*scale; |  | ||||||
|        |  | ||||||
|       hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise   ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl; |  | ||||||
|  |  | ||||||
|       for(int i=0;i<1;i++){ |  | ||||||
|  |  | ||||||
| 	CG(hermop,noise,subspace[b]); |  | ||||||
|  |  | ||||||
| 	noise = subspace[b]; |  | ||||||
| 	scale = std::pow(norm2(noise),-0.5);  |  | ||||||
| 	noise=noise*scale; |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       hermop.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(Mn)<<std::endl; |  | ||||||
|       subspace[b]   = noise; |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit) |  | ||||||
|   // and this is the best I found |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|   virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop, |  | ||||||
| 				       int nn, |  | ||||||
| 				       double hi, |  | ||||||
| 				       double lo, |  | ||||||
| 				       int orderfilter, |  | ||||||
| 				       int ordermin, |  | ||||||
| 				       int orderstep, |  | ||||||
| 				       double filterlo |  | ||||||
| 				       ) { |  | ||||||
|  |  | ||||||
|     RealD scale; |  | ||||||
|  |  | ||||||
|     FineField noise(FineGrid); |  | ||||||
|     FineField Mn(FineGrid); |  | ||||||
|     FineField tmp(FineGrid); |  | ||||||
|  |  | ||||||
|     // New normalised noise |  | ||||||
|     gaussian(RNG,noise); |  | ||||||
|     scale = std::pow(norm2(noise),-0.5);  |  | ||||||
|     noise=noise*scale; |  | ||||||
|  |  | ||||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pass-1 : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pass-2 : nbasis"<<nn<<" min " |  | ||||||
| 	      <<ordermin<<" step "<<orderstep |  | ||||||
| 	      <<" lo"<<filterlo<<std::endl; |  | ||||||
|  |  | ||||||
|     // Initial matrix element |  | ||||||
|     hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; |  | ||||||
|  |  | ||||||
|     int b =0; |  | ||||||
|     { |  | ||||||
|       // Filter |  | ||||||
|       Chebyshev<FineField> Cheb(lo,hi,orderfilter); |  | ||||||
|       Cheb(hermop,noise,Mn); |  | ||||||
|       // normalise |  | ||||||
|       scale = std::pow(norm2(Mn),-0.5); 	Mn=Mn*scale; |  | ||||||
|       subspace[b]   = Mn; |  | ||||||
|       hermop.Op(Mn,tmp);  |  | ||||||
|       std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; |  | ||||||
|       b++; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Generate a full sequence of Chebyshevs |  | ||||||
|     { |  | ||||||
|       lo=filterlo; |  | ||||||
|       noise=Mn; |  | ||||||
|  |  | ||||||
|       FineField T0(FineGrid); T0 = noise;   |  | ||||||
|       FineField T1(FineGrid);  |  | ||||||
|       FineField T2(FineGrid); |  | ||||||
|       FineField y(FineGrid); |  | ||||||
|        |  | ||||||
|       FineField *Tnm = &T0; |  | ||||||
|       FineField *Tn  = &T1; |  | ||||||
|       FineField *Tnp = &T2; |  | ||||||
|  |  | ||||||
|       // Tn=T1 = (xscale M + mscale)in |  | ||||||
|       RealD xscale = 2.0/(hi-lo); |  | ||||||
|       RealD mscale = -(hi+lo)/(hi-lo); |  | ||||||
|       hermop.HermOp(T0,y); |  | ||||||
|       T1=y*xscale+noise*mscale; |  | ||||||
|  |  | ||||||
|       for(int n=2;n<=ordermin+orderstep*(nn-2);n++){ |  | ||||||
| 	 |  | ||||||
| 	hermop.HermOp(*Tn,y); |  | ||||||
|  |  | ||||||
| 	autoView( y_v , y, AcceleratorWrite); |  | ||||||
| 	autoView( Tn_v , (*Tn), AcceleratorWrite); |  | ||||||
| 	autoView( Tnp_v , (*Tnp), AcceleratorWrite); |  | ||||||
| 	autoView( Tnm_v , (*Tnm), AcceleratorWrite); |  | ||||||
| 	const int Nsimd = CComplex::Nsimd(); |  | ||||||
| 	accelerator_for(ss, FineGrid->oSites(), Nsimd, { |  | ||||||
| 	  coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); |  | ||||||
| 	  coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss)); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
| 	// Possible more fine grained control is needed than a linear sweep, |  | ||||||
| 	// but huge productivity gain if this is simple algorithm and not a tunable |  | ||||||
| 	int m =1; |  | ||||||
| 	if ( n>=ordermin ) m=n-ordermin; |  | ||||||
| 	if ( (m%orderstep)==0 ) {  |  | ||||||
| 	  Mn=*Tnp; |  | ||||||
| 	  scale = std::pow(norm2(Mn),-0.5);         Mn=Mn*scale; |  | ||||||
| 	  subspace[b] = Mn; |  | ||||||
| 	  hermop.Op(Mn,tmp);  |  | ||||||
| 	  std::cout<<GridLogMessage << n<<" filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; |  | ||||||
| 	  b++; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Cycle pointers to avoid copies |  | ||||||
| 	FineField *swizzle = Tnm; |  | ||||||
| 	Tnm    =Tn; |  | ||||||
| 	Tn     =Tnp; |  | ||||||
| 	Tnp    =swizzle; |  | ||||||
| 	   |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     assert(b==nn); |  | ||||||
|   } |  | ||||||
|   virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop, |  | ||||||
| 				       int nn, |  | ||||||
| 				       double hi, |  | ||||||
| 				       double lo, |  | ||||||
| 				       int orderfilter |  | ||||||
| 				       ) { |  | ||||||
|  |  | ||||||
|     RealD scale; |  | ||||||
|  |  | ||||||
|     FineField noise(FineGrid); |  | ||||||
|     FineField Mn(FineGrid); |  | ||||||
|     FineField tmp(FineGrid); |  | ||||||
|  |  | ||||||
|     // New normalised noise |  | ||||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pure noise : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<" Chebyshev subspace pure noise  : nbasis "<<nn<<std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     for(int b =0;b<nbasis;b++) |  | ||||||
|     { |  | ||||||
|       gaussian(RNG,noise); |  | ||||||
|       scale = std::pow(norm2(noise),-0.5);  |  | ||||||
|       noise=noise*scale; |  | ||||||
|  |  | ||||||
|       // Initial matrix element |  | ||||||
|       hermop.Op(noise,Mn); |  | ||||||
|       if(b==0) std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl; |  | ||||||
|       // Filter |  | ||||||
|       Chebyshev<FineField> Cheb(lo,hi,orderfilter); |  | ||||||
|       Cheb(hermop,noise,Mn); |  | ||||||
|       // normalise |  | ||||||
|       scale = std::pow(norm2(Mn),-0.5); 	Mn=Mn*scale; |  | ||||||
|       subspace[b]   = Mn; |  | ||||||
|       hermop.Op(Mn,tmp);  |  | ||||||
|       std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| }; |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| @@ -1,814 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/CoarsenedMatrix.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #ifndef  GRID_ALGORITHM_COARSENED_MATRIX_H |  | ||||||
| #define  GRID_ALGORITHM_COARSENED_MATRIX_H |  | ||||||
|  |  | ||||||
| #include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No) |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
| template<class vobj,class CComplex> |  | ||||||
| inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner, |  | ||||||
| 				    const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask, |  | ||||||
| 				    const Lattice<vobj> &fineX, |  | ||||||
| 				    const Lattice<vobj> &fineY) |  | ||||||
| { |  | ||||||
|   typedef decltype(innerProduct(vobj(),vobj())) dotp; |  | ||||||
|  |  | ||||||
|   GridBase *coarse(CoarseInner.Grid()); |  | ||||||
|   GridBase *fine  (fineX.Grid()); |  | ||||||
|  |  | ||||||
|   Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard(); |  | ||||||
|   Lattice<dotp> fine_inner_msk(fine); |  | ||||||
|  |  | ||||||
|   // Multiply could be fused with innerProduct |  | ||||||
|   // Single block sum kernel could do both masks. |  | ||||||
|   fine_inner = localInnerProduct(fineX,fineY); |  | ||||||
|   mult(fine_inner_msk, fine_inner,FineMask); |  | ||||||
|   blockSum(CoarseInner,fine_inner_msk); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Fine Object == (per site) type of fine field |  | ||||||
| // nbasis      == number of deflation vectors |  | ||||||
| template<class Fobj,class CComplex,int nbasis> |  | ||||||
| class CoarsenedMatrix : public CheckerBoardedSparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  { |  | ||||||
| public: |  | ||||||
|      |  | ||||||
|   typedef iVector<CComplex,nbasis >           siteVector; |  | ||||||
|   typedef Lattice<CComplex >                  CoarseComplexField; |  | ||||||
|   typedef Lattice<siteVector>                 CoarseVector; |  | ||||||
|   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; |  | ||||||
|   typedef iMatrix<CComplex,nbasis >  Cobj; |  | ||||||
|   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field |  | ||||||
|   typedef Lattice<Fobj >        FineField; |  | ||||||
|   typedef CoarseVector FermionField; |  | ||||||
|  |  | ||||||
|   // enrich interface, use default implementation as in FermionOperator /////// |  | ||||||
|   void Dminus(CoarseVector const& in, CoarseVector& out) { out = in; } |  | ||||||
|   void DminusDag(CoarseVector const& in, CoarseVector& out) { out = in; } |  | ||||||
|   void ImportPhysicalFermionSource(CoarseVector const& input, CoarseVector& imported) { imported = input; } |  | ||||||
|   void ImportUnphysicalFermion(CoarseVector const& input, CoarseVector& imported) { imported = input; } |  | ||||||
|   void ExportPhysicalFermionSolution(CoarseVector const& solution, CoarseVector& exported) { exported = solution; }; |  | ||||||
|   void ExportPhysicalFermionSource(CoarseVector const& solution, CoarseVector& exported) { exported = solution; }; |  | ||||||
|  |  | ||||||
|   //////////////////// |  | ||||||
|   // Data members |  | ||||||
|   //////////////////// |  | ||||||
|   Geometry         geom; |  | ||||||
|   GridBase *       _grid;  |  | ||||||
|   GridBase*        _cbgrid; |  | ||||||
|   int hermitian; |  | ||||||
|  |  | ||||||
|   CartesianStencil<siteVector,siteVector,DefaultImplParams> Stencil;  |  | ||||||
|   CartesianStencil<siteVector,siteVector,DefaultImplParams> StencilEven; |  | ||||||
|   CartesianStencil<siteVector,siteVector,DefaultImplParams> StencilOdd; |  | ||||||
|  |  | ||||||
|   std::vector<CoarseMatrix> A; |  | ||||||
|   std::vector<CoarseMatrix> Aeven; |  | ||||||
|   std::vector<CoarseMatrix> Aodd; |  | ||||||
|  |  | ||||||
|   CoarseMatrix AselfInv; |  | ||||||
|   CoarseMatrix AselfInvEven; |  | ||||||
|   CoarseMatrix AselfInvOdd; |  | ||||||
|  |  | ||||||
|   Vector<RealD> dag_factor; |  | ||||||
|  |  | ||||||
|   /////////////////////// |  | ||||||
|   // Interface |  | ||||||
|   /////////////////////// |  | ||||||
|   GridBase * Grid(void)         { return _grid; };   // this is all the linalg routines need to know |  | ||||||
|   GridBase * RedBlackGrid()     { return _cbgrid; }; |  | ||||||
|  |  | ||||||
|   int ConstEE() { return 0; } |  | ||||||
|  |  | ||||||
|   void M (const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     conformable(_grid,in.Grid()); |  | ||||||
|     conformable(in.Grid(),out.Grid()); |  | ||||||
|     out.Checkerboard() = in.Checkerboard(); |  | ||||||
|  |  | ||||||
|     SimpleCompressor<siteVector> compressor; |  | ||||||
|  |  | ||||||
|     Stencil.HaloExchange(in,compressor); |  | ||||||
|     autoView( in_v , in, AcceleratorRead); |  | ||||||
|     autoView( out_v , out, AcceleratorWrite); |  | ||||||
|     autoView( Stencil_v  , Stencil, AcceleratorRead); |  | ||||||
|     int npoint = geom.npoint; |  | ||||||
|     typedef LatticeView<Cobj> Aview; |  | ||||||
|        |  | ||||||
|     Vector<Aview> AcceleratorViewContainer; |  | ||||||
|    |  | ||||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View(AcceleratorRead)); |  | ||||||
|     Aview *Aview_p = & AcceleratorViewContainer[0]; |  | ||||||
|  |  | ||||||
|     const int Nsimd = CComplex::Nsimd(); |  | ||||||
|     typedef decltype(coalescedRead(in_v[0])) calcVector; |  | ||||||
|     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; |  | ||||||
|  |  | ||||||
|     int osites=Grid()->oSites(); |  | ||||||
|  |  | ||||||
|     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { |  | ||||||
|       int ss = sss/nbasis; |  | ||||||
|       int b  = sss%nbasis; |  | ||||||
|       calcComplex res = Zero(); |  | ||||||
|       calcVector nbr; |  | ||||||
|       int ptype; |  | ||||||
|       StencilEntry *SE; |  | ||||||
|  |  | ||||||
|       for(int point=0;point<npoint;point++){ |  | ||||||
|  |  | ||||||
| 	SE=Stencil_v.GetEntry(ptype,point,ss); |  | ||||||
| 	   |  | ||||||
| 	if(SE->_is_local) {  |  | ||||||
| 	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); |  | ||||||
| 	} else { |  | ||||||
| 	  nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); |  | ||||||
| 	} |  | ||||||
| 	acceleratorSynchronise(); |  | ||||||
|  |  | ||||||
| 	for(int bb=0;bb<nbasis;bb++) { |  | ||||||
| 	  res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|       coalescedWrite(out_v[ss](b),res); |  | ||||||
|       }); |  | ||||||
|  |  | ||||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose(); |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   void Mdag (const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     if(hermitian) { |  | ||||||
|       // corresponds to Petrov-Galerkin coarsening |  | ||||||
|       return M(in,out); |  | ||||||
|     } else { |  | ||||||
|       // corresponds to Galerkin coarsening |  | ||||||
|       return MdagNonHermitian(in, out); |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   void MdagNonHermitian(const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     conformable(_grid,in.Grid()); |  | ||||||
|     conformable(in.Grid(),out.Grid()); |  | ||||||
|     out.Checkerboard() = in.Checkerboard(); |  | ||||||
|  |  | ||||||
|     SimpleCompressor<siteVector> compressor; |  | ||||||
|  |  | ||||||
|     Stencil.HaloExchange(in,compressor); |  | ||||||
|     autoView( in_v , in, AcceleratorRead); |  | ||||||
|     autoView( out_v , out, AcceleratorWrite); |  | ||||||
|     autoView( Stencil_v  , Stencil, AcceleratorRead); |  | ||||||
|     int npoint = geom.npoint; |  | ||||||
|     typedef LatticeView<Cobj> Aview; |  | ||||||
|  |  | ||||||
|     Vector<Aview> AcceleratorViewContainer; |  | ||||||
|  |  | ||||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View(AcceleratorRead)); |  | ||||||
|     Aview *Aview_p = & AcceleratorViewContainer[0]; |  | ||||||
|  |  | ||||||
|     const int Nsimd = CComplex::Nsimd(); |  | ||||||
|     typedef decltype(coalescedRead(in_v[0])) calcVector; |  | ||||||
|     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; |  | ||||||
|  |  | ||||||
|     int osites=Grid()->oSites(); |  | ||||||
|  |  | ||||||
|     Vector<int> points(geom.npoint, 0); |  | ||||||
|     for(int p=0; p<geom.npoint; p++) |  | ||||||
|       points[p] = geom.points_dagger[p]; |  | ||||||
|  |  | ||||||
|     auto points_p = &points[0]; |  | ||||||
|  |  | ||||||
|     RealD* dag_factor_p = &dag_factor[0]; |  | ||||||
|  |  | ||||||
|     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { |  | ||||||
|       int ss = sss/nbasis; |  | ||||||
|       int b  = sss%nbasis; |  | ||||||
|       calcComplex res = Zero(); |  | ||||||
|       calcVector nbr; |  | ||||||
|       int ptype; |  | ||||||
|       StencilEntry *SE; |  | ||||||
|  |  | ||||||
|       for(int p=0;p<npoint;p++){ |  | ||||||
|         int point = points_p[p]; |  | ||||||
|  |  | ||||||
| 	SE=Stencil_v.GetEntry(ptype,point,ss); |  | ||||||
|  |  | ||||||
| 	if(SE->_is_local) { |  | ||||||
| 	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); |  | ||||||
| 	} else { |  | ||||||
| 	  nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); |  | ||||||
| 	} |  | ||||||
| 	acceleratorSynchronise(); |  | ||||||
|  |  | ||||||
| 	for(int bb=0;bb<nbasis;bb++) { |  | ||||||
| 	  res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|       coalescedWrite(out_v[ss](b),res); |  | ||||||
|       }); |  | ||||||
|  |  | ||||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void MdirComms(const CoarseVector &in) |  | ||||||
|   { |  | ||||||
|     SimpleCompressor<siteVector> compressor; |  | ||||||
|     Stencil.HaloExchange(in,compressor); |  | ||||||
|   } |  | ||||||
|   void MdirCalc(const CoarseVector &in, CoarseVector &out, int point) |  | ||||||
|   { |  | ||||||
|     conformable(_grid,in.Grid()); |  | ||||||
|     conformable(_grid,out.Grid()); |  | ||||||
|     out.Checkerboard() = in.Checkerboard(); |  | ||||||
|  |  | ||||||
|     typedef LatticeView<Cobj> Aview; |  | ||||||
|     Vector<Aview> AcceleratorViewContainer; |  | ||||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View(AcceleratorRead)); |  | ||||||
|     Aview *Aview_p = & AcceleratorViewContainer[0]; |  | ||||||
|  |  | ||||||
|     autoView( out_v , out, AcceleratorWrite); |  | ||||||
|     autoView( in_v  , in, AcceleratorRead); |  | ||||||
|     autoView( Stencil_v  , Stencil, AcceleratorRead); |  | ||||||
|  |  | ||||||
|     const int Nsimd = CComplex::Nsimd(); |  | ||||||
|     typedef decltype(coalescedRead(in_v[0])) calcVector; |  | ||||||
|     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; |  | ||||||
|  |  | ||||||
|     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { |  | ||||||
|       int ss = sss/nbasis; |  | ||||||
|       int b  = sss%nbasis; |  | ||||||
|       calcComplex res = Zero(); |  | ||||||
|       calcVector nbr; |  | ||||||
|       int ptype; |  | ||||||
|       StencilEntry *SE; |  | ||||||
|  |  | ||||||
|       SE=Stencil_v.GetEntry(ptype,point,ss); |  | ||||||
| 	   |  | ||||||
|       if(SE->_is_local) {  |  | ||||||
| 	nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); |  | ||||||
|       } else { |  | ||||||
| 	nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); |  | ||||||
|       } |  | ||||||
|       acceleratorSynchronise(); |  | ||||||
|  |  | ||||||
|       for(int bb=0;bb<nbasis;bb++) { |  | ||||||
| 	res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); |  | ||||||
|       } |  | ||||||
|       coalescedWrite(out_v[ss](b),res); |  | ||||||
|     }); |  | ||||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose(); |  | ||||||
|   } |  | ||||||
|   void MdirAll(const CoarseVector &in,std::vector<CoarseVector> &out) |  | ||||||
|   { |  | ||||||
|     this->MdirComms(in); |  | ||||||
|     int ndir=geom.npoint-1; |  | ||||||
|     if ((out.size()!=ndir)&&(out.size()!=ndir+1)) {  |  | ||||||
|       std::cout <<"MdirAll out size "<< out.size()<<std::endl; |  | ||||||
|       std::cout <<"MdirAll ndir "<< ndir<<std::endl; |  | ||||||
|       assert(0); |  | ||||||
|     } |  | ||||||
|     for(int p=0;p<ndir;p++){ |  | ||||||
|       MdirCalc(in,out[p],p); |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|   void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){ |  | ||||||
|  |  | ||||||
|     this->MdirComms(in); |  | ||||||
|  |  | ||||||
|     MdirCalc(in,out,geom.point(dir,disp)); |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   void Mdiag(const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     int point=geom.npoint-1; |  | ||||||
|     MdirCalc(in, out, point); // No comms |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   void Mooee(const CoarseVector &in, CoarseVector &out) { |  | ||||||
|     MooeeInternal(in, out, DaggerNo, InverseNo); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void MooeeInv(const CoarseVector &in, CoarseVector &out) { |  | ||||||
|     MooeeInternal(in, out, DaggerNo, InverseYes); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void MooeeDag(const CoarseVector &in, CoarseVector &out) { |  | ||||||
|     MooeeInternal(in, out, DaggerYes, InverseNo); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void MooeeInvDag(const CoarseVector &in, CoarseVector &out) { |  | ||||||
|     MooeeInternal(in, out, DaggerYes, InverseYes); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void Meooe(const CoarseVector &in, CoarseVector &out) { |  | ||||||
|     if(in.Checkerboard() == Odd) { |  | ||||||
|       DhopEO(in, out, DaggerNo); |  | ||||||
|     } else { |  | ||||||
|       DhopOE(in, out, DaggerNo); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void MeooeDag(const CoarseVector &in, CoarseVector &out) { |  | ||||||
|     if(in.Checkerboard() == Odd) { |  | ||||||
|       DhopEO(in, out, DaggerYes); |  | ||||||
|     } else { |  | ||||||
|       DhopOE(in, out, DaggerYes); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void Dhop(const CoarseVector &in, CoarseVector &out, int dag) { |  | ||||||
|     conformable(in.Grid(), _grid); // verifies full grid |  | ||||||
|     conformable(in.Grid(), out.Grid()); |  | ||||||
|  |  | ||||||
|     out.Checkerboard() = in.Checkerboard(); |  | ||||||
|  |  | ||||||
|     DhopInternal(Stencil, A, in, out, dag); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void DhopOE(const CoarseVector &in, CoarseVector &out, int dag) { |  | ||||||
|     conformable(in.Grid(), _cbgrid);    // verifies half grid |  | ||||||
|     conformable(in.Grid(), out.Grid()); // drops the cb check |  | ||||||
|  |  | ||||||
|     assert(in.Checkerboard() == Even); |  | ||||||
|     out.Checkerboard() = Odd; |  | ||||||
|  |  | ||||||
|     DhopInternal(StencilEven, Aodd, in, out, dag); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void DhopEO(const CoarseVector &in, CoarseVector &out, int dag) { |  | ||||||
|     conformable(in.Grid(), _cbgrid);    // verifies half grid |  | ||||||
|     conformable(in.Grid(), out.Grid()); // drops the cb check |  | ||||||
|  |  | ||||||
|     assert(in.Checkerboard() == Odd); |  | ||||||
|     out.Checkerboard() = Even; |  | ||||||
|  |  | ||||||
|     DhopInternal(StencilOdd, Aeven, in, out, dag); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void MooeeInternal(const CoarseVector &in, CoarseVector &out, int dag, int inv) { |  | ||||||
|     out.Checkerboard() = in.Checkerboard(); |  | ||||||
|     assert(in.Checkerboard() == Odd || in.Checkerboard() == Even); |  | ||||||
|  |  | ||||||
|     CoarseMatrix *Aself = nullptr; |  | ||||||
|     if(in.Grid()->_isCheckerBoarded) { |  | ||||||
|       if(in.Checkerboard() == Odd) { |  | ||||||
|         Aself = (inv) ? &AselfInvOdd : &Aodd[geom.npoint-1]; |  | ||||||
|         DselfInternal(StencilOdd, *Aself, in, out, dag); |  | ||||||
|       } else { |  | ||||||
|         Aself = (inv) ? &AselfInvEven : &Aeven[geom.npoint-1]; |  | ||||||
|         DselfInternal(StencilEven, *Aself, in, out, dag); |  | ||||||
|       } |  | ||||||
|     } else { |  | ||||||
|       Aself = (inv) ? &AselfInv : &A[geom.npoint-1]; |  | ||||||
|       DselfInternal(Stencil, *Aself, in, out, dag); |  | ||||||
|     } |  | ||||||
|     assert(Aself != nullptr); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void DselfInternal(CartesianStencil<siteVector,siteVector,DefaultImplParams> &st, CoarseMatrix &a, |  | ||||||
|                        const CoarseVector &in, CoarseVector &out, int dag) { |  | ||||||
|     int point = geom.npoint-1; |  | ||||||
|     autoView( out_v, out, AcceleratorWrite); |  | ||||||
|     autoView( in_v,  in,  AcceleratorRead); |  | ||||||
|     autoView( st_v,  st,  AcceleratorRead); |  | ||||||
|     autoView( a_v,   a,   AcceleratorRead); |  | ||||||
|  |  | ||||||
|     const int Nsimd = CComplex::Nsimd(); |  | ||||||
|     typedef decltype(coalescedRead(in_v[0])) calcVector; |  | ||||||
|     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; |  | ||||||
|  |  | ||||||
|     RealD* dag_factor_p = &dag_factor[0]; |  | ||||||
|  |  | ||||||
|     if(dag) { |  | ||||||
|       accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { |  | ||||||
|         int ss = sss/nbasis; |  | ||||||
|         int b  = sss%nbasis; |  | ||||||
|         calcComplex res = Zero(); |  | ||||||
|         calcVector nbr; |  | ||||||
|         int ptype; |  | ||||||
|         StencilEntry *SE; |  | ||||||
|  |  | ||||||
|         SE=st_v.GetEntry(ptype,point,ss); |  | ||||||
|  |  | ||||||
|         if(SE->_is_local) { |  | ||||||
|           nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); |  | ||||||
|         } else { |  | ||||||
|           nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); |  | ||||||
|         } |  | ||||||
|         acceleratorSynchronise(); |  | ||||||
|  |  | ||||||
|         for(int bb=0;bb<nbasis;bb++) { |  | ||||||
|           res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(a_v[ss](b,bb))*nbr(bb); |  | ||||||
|         } |  | ||||||
|         coalescedWrite(out_v[ss](b),res); |  | ||||||
|       }); |  | ||||||
|     } else { |  | ||||||
|       accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { |  | ||||||
|         int ss = sss/nbasis; |  | ||||||
|         int b  = sss%nbasis; |  | ||||||
|         calcComplex res = Zero(); |  | ||||||
|         calcVector nbr; |  | ||||||
|         int ptype; |  | ||||||
|         StencilEntry *SE; |  | ||||||
|  |  | ||||||
|         SE=st_v.GetEntry(ptype,point,ss); |  | ||||||
|  |  | ||||||
|         if(SE->_is_local) { |  | ||||||
|           nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); |  | ||||||
|         } else { |  | ||||||
|           nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); |  | ||||||
|         } |  | ||||||
|         acceleratorSynchronise(); |  | ||||||
|  |  | ||||||
|         for(int bb=0;bb<nbasis;bb++) { |  | ||||||
|           res = res + coalescedRead(a_v[ss](b,bb))*nbr(bb); |  | ||||||
|         } |  | ||||||
|         coalescedWrite(out_v[ss](b),res); |  | ||||||
|       }); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void DhopInternal(CartesianStencil<siteVector,siteVector,DefaultImplParams> &st, std::vector<CoarseMatrix> &a, |  | ||||||
|                     const CoarseVector &in, CoarseVector &out, int dag) { |  | ||||||
|     SimpleCompressor<siteVector> compressor; |  | ||||||
|  |  | ||||||
|     st.HaloExchange(in,compressor); |  | ||||||
|     autoView( in_v,  in,  AcceleratorRead); |  | ||||||
|     autoView( out_v, out, AcceleratorWrite); |  | ||||||
|     autoView( st_v , st,  AcceleratorRead); |  | ||||||
|     typedef LatticeView<Cobj> Aview; |  | ||||||
|  |  | ||||||
|     // determine in what order we need the points |  | ||||||
|     int npoint = geom.npoint-1; |  | ||||||
|     Vector<int> points(npoint, 0); |  | ||||||
|     for(int p=0; p<npoint; p++) |  | ||||||
|       points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p; |  | ||||||
|  |  | ||||||
|     auto points_p = &points[0]; |  | ||||||
|  |  | ||||||
|     Vector<Aview> AcceleratorViewContainer; |  | ||||||
|     for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead)); |  | ||||||
|     Aview *Aview_p = & AcceleratorViewContainer[0]; |  | ||||||
|  |  | ||||||
|     const int Nsimd = CComplex::Nsimd(); |  | ||||||
|     typedef decltype(coalescedRead(in_v[0])) calcVector; |  | ||||||
|     typedef decltype(coalescedRead(in_v[0](0))) calcComplex; |  | ||||||
|  |  | ||||||
|     RealD* dag_factor_p = &dag_factor[0]; |  | ||||||
|  |  | ||||||
|     if(dag) { |  | ||||||
|       accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { |  | ||||||
|         int ss = sss/nbasis; |  | ||||||
|         int b  = sss%nbasis; |  | ||||||
|         calcComplex res = Zero(); |  | ||||||
|         calcVector nbr; |  | ||||||
|         int ptype; |  | ||||||
|         StencilEntry *SE; |  | ||||||
|  |  | ||||||
|         for(int p=0;p<npoint;p++){ |  | ||||||
|           int point = points_p[p]; |  | ||||||
|           SE=st_v.GetEntry(ptype,point,ss); |  | ||||||
|  |  | ||||||
|           if(SE->_is_local) { |  | ||||||
|             nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); |  | ||||||
|           } else { |  | ||||||
|             nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); |  | ||||||
|           } |  | ||||||
|           acceleratorSynchronise(); |  | ||||||
|  |  | ||||||
|           for(int bb=0;bb<nbasis;bb++) { |  | ||||||
|             res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); |  | ||||||
|           } |  | ||||||
|         } |  | ||||||
|         coalescedWrite(out_v[ss](b),res); |  | ||||||
|       }); |  | ||||||
|     } else { |  | ||||||
|       accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, { |  | ||||||
|         int ss = sss/nbasis; |  | ||||||
|         int b  = sss%nbasis; |  | ||||||
|         calcComplex res = Zero(); |  | ||||||
|         calcVector nbr; |  | ||||||
|         int ptype; |  | ||||||
|         StencilEntry *SE; |  | ||||||
|  |  | ||||||
|         for(int p=0;p<npoint;p++){ |  | ||||||
|           int point = points_p[p]; |  | ||||||
|           SE=st_v.GetEntry(ptype,point,ss); |  | ||||||
|  |  | ||||||
|           if(SE->_is_local) { |  | ||||||
|             nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); |  | ||||||
|           } else { |  | ||||||
|             nbr = coalescedRead(st_v.CommBuf()[SE->_offset]); |  | ||||||
|           } |  | ||||||
|           acceleratorSynchronise(); |  | ||||||
|  |  | ||||||
|           for(int bb=0;bb<nbasis;bb++) { |  | ||||||
|             res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb); |  | ||||||
|           } |  | ||||||
|         } |  | ||||||
|         coalescedWrite(out_v[ss](b),res); |  | ||||||
|       }); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int p=0;p<npoint;p++) AcceleratorViewContainer[p].ViewClose(); |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   CoarsenedMatrix(GridCartesian &CoarseGrid, int hermitian_=0) 	: |  | ||||||
|     _grid(&CoarseGrid), |  | ||||||
|     _cbgrid(new GridRedBlackCartesian(&CoarseGrid)), |  | ||||||
|     geom(CoarseGrid._ndimension), |  | ||||||
|     hermitian(hermitian_), |  | ||||||
|     Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements), |  | ||||||
|     StencilEven(_cbgrid,geom.npoint,Even,geom.directions,geom.displacements), |  | ||||||
|     StencilOdd(_cbgrid,geom.npoint,Odd,geom.directions,geom.displacements), |  | ||||||
|     A(geom.npoint,&CoarseGrid), |  | ||||||
|     Aeven(geom.npoint,_cbgrid), |  | ||||||
|     Aodd(geom.npoint,_cbgrid), |  | ||||||
|     AselfInv(&CoarseGrid), |  | ||||||
|     AselfInvEven(_cbgrid), |  | ||||||
|     AselfInvOdd(_cbgrid), |  | ||||||
|     dag_factor(nbasis*nbasis) |  | ||||||
|   { |  | ||||||
|     fillFactor(); |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   CoarsenedMatrix(GridCartesian &CoarseGrid, GridRedBlackCartesian &CoarseRBGrid, int hermitian_=0) 	: |  | ||||||
|  |  | ||||||
|     _grid(&CoarseGrid), |  | ||||||
|     _cbgrid(&CoarseRBGrid), |  | ||||||
|     geom(CoarseGrid._ndimension), |  | ||||||
|     hermitian(hermitian_), |  | ||||||
|     Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements), |  | ||||||
|     StencilEven(&CoarseRBGrid,geom.npoint,Even,geom.directions,geom.displacements), |  | ||||||
|     StencilOdd(&CoarseRBGrid,geom.npoint,Odd,geom.directions,geom.displacements), |  | ||||||
|     A(geom.npoint,&CoarseGrid), |  | ||||||
|     Aeven(geom.npoint,&CoarseRBGrid), |  | ||||||
|     Aodd(geom.npoint,&CoarseRBGrid), |  | ||||||
|     AselfInv(&CoarseGrid), |  | ||||||
|     AselfInvEven(&CoarseRBGrid), |  | ||||||
|     AselfInvOdd(&CoarseRBGrid), |  | ||||||
|     dag_factor(nbasis*nbasis) |  | ||||||
|   { |  | ||||||
|     fillFactor(); |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   void fillFactor() { |  | ||||||
|     Eigen::MatrixXd dag_factor_eigen = Eigen::MatrixXd::Ones(nbasis, nbasis); |  | ||||||
|     if(!hermitian) { |  | ||||||
|       const int nb = nbasis/2; |  | ||||||
|       dag_factor_eigen.block(0,nb,nb,nb) *= -1.0; |  | ||||||
|       dag_factor_eigen.block(nb,0,nb,nb) *= -1.0; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // GPU readable prefactor |  | ||||||
|     thread_for(i, nbasis*nbasis, { |  | ||||||
|       int j = i/nbasis; |  | ||||||
|       int k = i%nbasis; |  | ||||||
|       dag_factor[i] = dag_factor_eigen(j, k); |  | ||||||
|     }); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop, |  | ||||||
| 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) |  | ||||||
|   { |  | ||||||
|     typedef Lattice<typename Fobj::tensor_reduced> FineComplexField; |  | ||||||
|     typedef typename Fobj::scalar_type scalar_type; |  | ||||||
|  |  | ||||||
|     std::cout << GridLogMessage<< "CoarsenMatrix "<< std::endl; |  | ||||||
|  |  | ||||||
|     FineComplexField one(FineGrid); one=scalar_type(1.0,0.0); |  | ||||||
|     FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0); |  | ||||||
|  |  | ||||||
|     std::vector<FineComplexField> masks(geom.npoint,FineGrid); |  | ||||||
|     FineComplexField imask(FineGrid); // contributions from within this block |  | ||||||
|     FineComplexField omask(FineGrid); // contributions from outwith this block |  | ||||||
|  |  | ||||||
|     FineComplexField evenmask(FineGrid); |  | ||||||
|     FineComplexField oddmask(FineGrid);  |  | ||||||
|  |  | ||||||
|     FineField     phi(FineGrid); |  | ||||||
|     FineField     tmp(FineGrid); |  | ||||||
|     FineField     zz(FineGrid); zz=Zero(); |  | ||||||
|     FineField    Mphi(FineGrid); |  | ||||||
|     FineField    Mphie(FineGrid); |  | ||||||
|     FineField    Mphio(FineGrid); |  | ||||||
|     std::vector<FineField>     Mphi_p(geom.npoint,FineGrid); |  | ||||||
|  |  | ||||||
|     Lattice<iScalar<vInteger> > coor (FineGrid); |  | ||||||
|     Lattice<iScalar<vInteger> > bcoor(FineGrid); |  | ||||||
|     Lattice<iScalar<vInteger> > bcb  (FineGrid); bcb = Zero(); |  | ||||||
|  |  | ||||||
|     CoarseVector iProj(Grid());  |  | ||||||
|     CoarseVector oProj(Grid());  |  | ||||||
|     CoarseVector SelfProj(Grid());  |  | ||||||
|     CoarseComplexField iZProj(Grid());  |  | ||||||
|     CoarseComplexField oZProj(Grid());  |  | ||||||
|  |  | ||||||
|     CoarseScalar InnerProd(Grid());  |  | ||||||
|  |  | ||||||
|     std::cout << GridLogMessage<< "CoarsenMatrix Orthog "<< std::endl; |  | ||||||
|     // Orthogonalise the subblocks over the basis |  | ||||||
|     blockOrthogonalise(InnerProd,Subspace.subspace); |  | ||||||
|  |  | ||||||
|     // Compute the matrix elements of linop between this orthonormal |  | ||||||
|     // set of vectors. |  | ||||||
|     std::cout << GridLogMessage<< "CoarsenMatrix masks "<< std::endl; |  | ||||||
|     int self_stencil=-1; |  | ||||||
|     for(int p=0;p<geom.npoint;p++) |  | ||||||
|     {  |  | ||||||
|       int dir   = geom.directions[p]; |  | ||||||
|       int disp  = geom.displacements[p]; |  | ||||||
|       A[p]=Zero(); |  | ||||||
|       if( geom.displacements[p]==0){ |  | ||||||
| 	self_stencil=p; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]); |  | ||||||
|  |  | ||||||
|       LatticeCoordinate(coor,dir); |  | ||||||
|  |  | ||||||
|       /////////////////////////////////////////////////////// |  | ||||||
|       // Work out even and odd block checkerboarding for fast diagonal term |  | ||||||
|       /////////////////////////////////////////////////////// |  | ||||||
|       if ( disp==1 ) { |  | ||||||
| 	bcb   = bcb + div(coor,block); |  | ||||||
|       } |  | ||||||
| 	 |  | ||||||
|       if ( disp==0 ) { |  | ||||||
| 	  masks[p]= Zero(); |  | ||||||
|       } else if ( disp==1 ) { |  | ||||||
| 	masks[p] = where(mod(coor,block)==(block-1),one,zero); |  | ||||||
|       } else if ( disp==-1 ) { |  | ||||||
| 	masks[p] = where(mod(coor,block)==(Integer)0,one,zero); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     evenmask = where(mod(bcb,2)==(Integer)0,one,zero); |  | ||||||
|     oddmask  = one-evenmask; |  | ||||||
|  |  | ||||||
|     assert(self_stencil!=-1); |  | ||||||
|  |  | ||||||
|     for(int i=0;i<nbasis;i++){ |  | ||||||
|  |  | ||||||
|       phi=Subspace.subspace[i]; |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl; |  | ||||||
|       linop.OpDirAll(phi,Mphi_p); |  | ||||||
|       linop.OpDiag  (phi,Mphi_p[geom.npoint-1]); |  | ||||||
|  |  | ||||||
|       for(int p=0;p<geom.npoint;p++){  |  | ||||||
|  |  | ||||||
| 	Mphi = Mphi_p[p]; |  | ||||||
|  |  | ||||||
| 	int dir   = geom.directions[p]; |  | ||||||
| 	int disp  = geom.displacements[p]; |  | ||||||
|  |  | ||||||
| 	if ( (disp==-1) || (!hermitian ) ) { |  | ||||||
|  |  | ||||||
| 	  //////////////////////////////////////////////////////////////////////// |  | ||||||
| 	  // Pick out contributions coming from this cell and neighbour cell |  | ||||||
| 	  //////////////////////////////////////////////////////////////////////// |  | ||||||
| 	  omask = masks[p]; |  | ||||||
| 	  imask = one-omask; |  | ||||||
| 	 |  | ||||||
| 	  for(int j=0;j<nbasis;j++){ |  | ||||||
| 	     |  | ||||||
| 	    blockMaskedInnerProduct(oZProj,omask,Subspace.subspace[j],Mphi); |  | ||||||
| 	     |  | ||||||
| 	    autoView( iZProj_v , iZProj, AcceleratorRead) ; |  | ||||||
| 	    autoView( oZProj_v , oZProj, AcceleratorRead) ; |  | ||||||
| 	    autoView( A_p     ,  A[p], AcceleratorWrite); |  | ||||||
| 	    autoView( A_self  , A[self_stencil], AcceleratorWrite); |  | ||||||
|  |  | ||||||
| 	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); |  | ||||||
| 	    if ( hermitian && (disp==-1) ) { |  | ||||||
| 	      for(int pp=0;pp<geom.npoint;pp++){// Find the opposite link and set <j|A|i> = <i|A|j>* |  | ||||||
| 		int dirp   = geom.directions[pp]; |  | ||||||
| 		int dispp  = geom.displacements[pp]; |  | ||||||
| 		if ( (dirp==dir) && (dispp==1) ){ |  | ||||||
| 		  auto sft = conjugate(Cshift(oZProj,dir,1)); |  | ||||||
| 		  autoView( sft_v    ,  sft  , AcceleratorWrite); |  | ||||||
| 		  autoView( A_pp     ,  A[pp], AcceleratorWrite); |  | ||||||
| 		  accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); }); |  | ||||||
| 		} |  | ||||||
| 	      } |  | ||||||
| 	    } |  | ||||||
|  |  | ||||||
| 	  } |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       /////////////////////////////////////////// |  | ||||||
|       // Faster alternate self coupling.. use hermiticity to save 2x |  | ||||||
|       /////////////////////////////////////////// |  | ||||||
|       { |  | ||||||
| 	mult(tmp,phi,evenmask);  linop.Op(tmp,Mphie); |  | ||||||
| 	mult(tmp,phi,oddmask );  linop.Op(tmp,Mphio); |  | ||||||
|  |  | ||||||
| 	{ |  | ||||||
| 	  autoView( tmp_      , tmp, AcceleratorWrite); |  | ||||||
| 	  autoView( evenmask_ , evenmask, AcceleratorRead); |  | ||||||
| 	  autoView( oddmask_  ,  oddmask, AcceleratorRead); |  | ||||||
| 	  autoView( Mphie_    ,  Mphie, AcceleratorRead); |  | ||||||
| 	  autoView( Mphio_    ,  Mphio, AcceleratorRead); |  | ||||||
| 	  accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{  |  | ||||||
| 	      coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss)); |  | ||||||
| 	    }); |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	blockProject(SelfProj,tmp,Subspace.subspace); |  | ||||||
|  |  | ||||||
| 	autoView( SelfProj_ , SelfProj, AcceleratorRead); |  | ||||||
| 	autoView( A_self  , A[self_stencil], AcceleratorWrite); |  | ||||||
|  |  | ||||||
| 	accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ |  | ||||||
| 	  for(int j=0;j<nbasis;j++){ |  | ||||||
| 	    coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j)); |  | ||||||
| 	  } |  | ||||||
| 	}); |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     if(hermitian) { |  | ||||||
|       std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     InvertSelfStencilLink(); std::cout << GridLogMessage << "Coarse self link inverted" << std::endl; |  | ||||||
|     FillHalfCbs(); std::cout << GridLogMessage << "Coarse half checkerboards filled" << std::endl; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void InvertSelfStencilLink() { |  | ||||||
|     std::cout << GridLogDebug << "CoarsenedMatrix::InvertSelfStencilLink" << std::endl; |  | ||||||
|     int localVolume = Grid()->lSites(); |  | ||||||
|  |  | ||||||
|     typedef typename Cobj::scalar_object scalar_object; |  | ||||||
|  |  | ||||||
|     autoView(Aself_v,    A[geom.npoint-1], CpuRead); |  | ||||||
|     autoView(AselfInv_v, AselfInv,         CpuWrite); |  | ||||||
|     thread_for(site, localVolume, { // NOTE: Not able to bring this to GPU because of Eigen + peek/poke |  | ||||||
|       Eigen::MatrixXcd selfLinkEigen    = Eigen::MatrixXcd::Zero(nbasis, nbasis); |  | ||||||
|       Eigen::MatrixXcd selfLinkInvEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis); |  | ||||||
|  |  | ||||||
|       scalar_object selfLink    = Zero(); |  | ||||||
|       scalar_object selfLinkInv = Zero(); |  | ||||||
|  |  | ||||||
|       Coordinate lcoor; |  | ||||||
|  |  | ||||||
|       Grid()->LocalIndexToLocalCoor(site, lcoor); |  | ||||||
|       peekLocalSite(selfLink, Aself_v, lcoor); |  | ||||||
|  |  | ||||||
|       for (int i = 0; i < nbasis; ++i) |  | ||||||
|         for (int j = 0; j < nbasis; ++j) |  | ||||||
|           selfLinkEigen(i, j) = static_cast<ComplexD>(TensorRemove(selfLink(i, j))); |  | ||||||
|  |  | ||||||
|       selfLinkInvEigen = selfLinkEigen.inverse(); |  | ||||||
|  |  | ||||||
|       for(int i = 0; i < nbasis; ++i) |  | ||||||
|         for(int j = 0; j < nbasis; ++j) |  | ||||||
|           selfLinkInv(i, j) = selfLinkInvEigen(i, j); |  | ||||||
|  |  | ||||||
|       pokeLocalSite(selfLinkInv, AselfInv_v, lcoor); |  | ||||||
|     }); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void FillHalfCbs() { |  | ||||||
|     std::cout << GridLogDebug << "CoarsenedMatrix::FillHalfCbs" << std::endl; |  | ||||||
|     for(int p = 0; p < geom.npoint; ++p) { |  | ||||||
|       pickCheckerboard(Even, Aeven[p], A[p]); |  | ||||||
|       pickCheckerboard(Odd, Aodd[p], A[p]); |  | ||||||
|     } |  | ||||||
|     pickCheckerboard(Even, AselfInvEven, AselfInv); |  | ||||||
|     pickCheckerboard(Odd, AselfInvOdd, AselfInv); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| #endif |  | ||||||
| @@ -1,418 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <pboyle@bnl.gov> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #pragma once |  | ||||||
|  |  | ||||||
| #include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No) |  | ||||||
|  |  | ||||||
| #include <Grid/lattice/PaddedCell.h> |  | ||||||
| #include <Grid/stencil/GeneralLocalStencil.h> |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
| // Fine Object == (per site) type of fine field |  | ||||||
| // nbasis      == number of deflation vectors |  | ||||||
| template<class Fobj,class CComplex,int nbasis> |  | ||||||
| class GeneralCoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  { |  | ||||||
| public: |  | ||||||
|  |  | ||||||
|   typedef GeneralCoarsenedMatrix<Fobj,CComplex,nbasis> GeneralCoarseOp; |  | ||||||
|   typedef iVector<CComplex,nbasis >           siteVector; |  | ||||||
|   typedef iMatrix<CComplex,nbasis >           siteMatrix; |  | ||||||
|   typedef Lattice<iScalar<CComplex> >         CoarseComplexField; |  | ||||||
|   typedef Lattice<siteVector>                 CoarseVector; |  | ||||||
|   typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; |  | ||||||
|   typedef iMatrix<CComplex,nbasis >  Cobj; |  | ||||||
|   typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field |  | ||||||
|   typedef Lattice<Fobj >        FineField; |  | ||||||
|   typedef CoarseVector Field; |  | ||||||
|   //////////////////// |  | ||||||
|   // Data members |  | ||||||
|   //////////////////// |  | ||||||
|   int hermitian; |  | ||||||
|   GridBase      *       _FineGrid;  |  | ||||||
|   GridCartesian *       _CoarseGrid;  |  | ||||||
|   NonLocalStencilGeometry &geom; |  | ||||||
|   PaddedCell Cell; |  | ||||||
|   GeneralLocalStencil Stencil; |  | ||||||
|    |  | ||||||
|   std::vector<CoarseMatrix> _A; |  | ||||||
|   std::vector<CoarseMatrix> _Adag; |  | ||||||
|  |  | ||||||
|   /////////////////////// |  | ||||||
|   // Interface |  | ||||||
|   /////////////////////// |  | ||||||
|   GridBase      * Grid(void)           { return _FineGrid; };   // this is all the linalg routines need to know |  | ||||||
|   GridBase      * FineGrid(void)       { return _FineGrid; };   // this is all the linalg routines need to know |  | ||||||
|   GridCartesian * CoarseGrid(void)     { return _CoarseGrid; };   // this is all the linalg routines need to know |  | ||||||
|  |  | ||||||
|   void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe) |  | ||||||
|   { |  | ||||||
|     int nfound=0; |  | ||||||
|     std::cout << GridLogMessage <<"GeneralCoarsenedMatrix::ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<<std::endl; |  | ||||||
|     for(int p=0;p<geom.npoint;p++){ |  | ||||||
|       for(int pp=0;pp<CopyMe.geom.npoint;pp++){ |  | ||||||
|  	// Search for the same relative shift |  | ||||||
| 	// Avoids brutal handling of Grid pointers |  | ||||||
| 	if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) { |  | ||||||
| 	  _A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]); |  | ||||||
| 	  _Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]); |  | ||||||
| 	  nfound++; |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     assert(nfound==geom.npoint); |  | ||||||
|     ExchangeCoarseLinks(); |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid) |  | ||||||
|     : geom(_geom), |  | ||||||
|       _FineGrid(FineGrid), |  | ||||||
|       _CoarseGrid(CoarseGrid), |  | ||||||
|       hermitian(1), |  | ||||||
|       Cell(_geom.Depth(),_CoarseGrid), |  | ||||||
|       Stencil(Cell.grids.back(),geom.shifts) |  | ||||||
|   { |  | ||||||
|     { |  | ||||||
|       int npoint = _geom.npoint; |  | ||||||
|       autoView( Stencil_v  , Stencil, AcceleratorRead); |  | ||||||
|       int osites=Stencil.Grid()->oSites(); |  | ||||||
|       for(int ss=0;ss<osites;ss++){ |  | ||||||
| 	for(int point=0;point<npoint;point++){ |  | ||||||
| 	  auto SE = Stencil_v.GetEntry(point,ss); |  | ||||||
| 	  int o = SE->_offset; |  | ||||||
| 	  assert( o< osites); |  | ||||||
| 	} |  | ||||||
|       }     |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     _A.resize(geom.npoint,CoarseGrid); |  | ||||||
|     _Adag.resize(geom.npoint,CoarseGrid); |  | ||||||
|   } |  | ||||||
|   void M (const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     Mult(_A,in,out); |  | ||||||
|   } |  | ||||||
|   void Mdag (const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     if ( hermitian ) M(in,out); |  | ||||||
|     else Mult(_Adag,in,out); |  | ||||||
|   } |  | ||||||
|   void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out) |  | ||||||
|   { |  | ||||||
|     RealD tviews=0; |  | ||||||
|     RealD ttot=0; |  | ||||||
|     RealD tmult=0; |  | ||||||
|     RealD texch=0; |  | ||||||
|     RealD text=0; |  | ||||||
|     ttot=-usecond(); |  | ||||||
|     conformable(CoarseGrid(),in.Grid()); |  | ||||||
|     conformable(in.Grid(),out.Grid()); |  | ||||||
|     out.Checkerboard() = in.Checkerboard(); |  | ||||||
|     CoarseVector tin=in; |  | ||||||
|  |  | ||||||
|     texch-=usecond(); |  | ||||||
|     CoarseVector pin  = Cell.Exchange(tin); |  | ||||||
|     texch+=usecond(); |  | ||||||
|  |  | ||||||
|     CoarseVector pout(pin.Grid()); pout=Zero(); |  | ||||||
|  |  | ||||||
|     int npoint = geom.npoint; |  | ||||||
|     typedef LatticeView<Cobj> Aview; |  | ||||||
|        |  | ||||||
|     const int Nsimd = CComplex::Nsimd(); |  | ||||||
|      |  | ||||||
|     int osites=pin.Grid()->oSites(); |  | ||||||
|     //    int gsites=pin.Grid()->gSites(); |  | ||||||
|  |  | ||||||
|     RealD flops = 1.0* npoint * nbasis * nbasis * 8 * osites; |  | ||||||
|     RealD bytes = (1.0*osites*sizeof(siteMatrix)*npoint+2.0*osites*sizeof(siteVector))*npoint; |  | ||||||
|        |  | ||||||
|     //    for(int point=0;point<npoint;point++){ |  | ||||||
|     //      conformable(A[point],pin); |  | ||||||
|     //    } |  | ||||||
|  |  | ||||||
|     { |  | ||||||
|       tviews-=usecond(); |  | ||||||
|       autoView( in_v , pin, AcceleratorRead); |  | ||||||
|       autoView( out_v , pout, AcceleratorWrite); |  | ||||||
|       autoView( Stencil_v  , Stencil, AcceleratorRead); |  | ||||||
|       tviews+=usecond(); |  | ||||||
|        |  | ||||||
|       for(int point=0;point<npoint;point++){ |  | ||||||
| 	tviews-=usecond(); |  | ||||||
| 	autoView( A_v, A[point],AcceleratorRead); |  | ||||||
| 	tviews+=usecond(); |  | ||||||
| 	tmult-=usecond(); |  | ||||||
| 	accelerator_for(sss, osites*nbasis, Nsimd, { |  | ||||||
|  |  | ||||||
| 	    typedef decltype(coalescedRead(in_v[0]))    calcVector; |  | ||||||
|  |  | ||||||
| 	    int ss = sss/nbasis; |  | ||||||
| 	    int b  = sss%nbasis; |  | ||||||
|  |  | ||||||
| 	    auto SE  = Stencil_v.GetEntry(point,ss); |  | ||||||
| 	    auto nbr = coalescedReadGeneralPermute(in_v[SE->_offset],SE->_permute,Nd); |  | ||||||
| 	    auto res = out_v(ss)(b); |  | ||||||
| 	    for(int bb=0;bb<nbasis;bb++) { |  | ||||||
| 	      res = res + coalescedRead(A_v[ss](b,bb))*nbr(bb); |  | ||||||
| 	    } |  | ||||||
| 	    coalescedWrite(out_v[ss](b),res); |  | ||||||
| 	}); |  | ||||||
|  |  | ||||||
| 	tmult+=usecond(); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     text-=usecond(); |  | ||||||
|     out = Cell.Extract(pout); |  | ||||||
|     text+=usecond(); |  | ||||||
|     ttot+=usecond(); |  | ||||||
|  |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult ext  "<<text<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Mult tot  "<<ttot<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Kernel flop/s "<< flops/tmult<<" mflop/s"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse Kernel bytes/s"<< bytes/tmult<<" MB/s"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse overall flops/s "<< flops/ttot<<" mflop/s"<<std::endl; |  | ||||||
|     std::cout << GridLogPerformance<<"Coarse total bytes   "<< bytes/1e6<<" MB"<<std::endl; |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   void PopulateAdag(void) |  | ||||||
|   { |  | ||||||
|     for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){ |  | ||||||
|       Coordinate bcoor; |  | ||||||
|       CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor); |  | ||||||
|        |  | ||||||
|       for(int p=0;p<geom.npoint;p++){ |  | ||||||
| 	Coordinate scoor = bcoor; |  | ||||||
| 	for(int mu=0;mu<bcoor.size();mu++){ |  | ||||||
| 	  int L = CoarseGrid()->GlobalDimensions()[mu]; |  | ||||||
| 	  scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic |  | ||||||
| 	} |  | ||||||
| 	// Flip to poke/peekLocalSite and not too bad |  | ||||||
| 	auto link = peekSite(_A[p],scoor); |  | ||||||
| 	int pp = geom.Reverse(p); |  | ||||||
| 	pokeSite(adj(link),_Adag[pp],bcoor); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ///////////////////////////////////////////////////////////// |  | ||||||
|   //  |  | ||||||
|   // A) Only reduced flops option is to use a padded cell of depth 4 |  | ||||||
|   // and apply MpcDagMpc in the padded cell. |  | ||||||
|   // |  | ||||||
|   // Makes for ONE application of MpcDagMpc per vector instead of 30 or 80. |  | ||||||
|   // With the effective cell size around (B+8)^4 perhaps 12^4/4^4 ratio |  | ||||||
|   // Cost is 81x more, same as stencil size. |  | ||||||
|   // |  | ||||||
|   // But: can eliminate comms and do as local dirichlet. |  | ||||||
|   // |  | ||||||
|   // Local exchange gauge field once. |  | ||||||
|   // Apply to all vectors, local only computation. |  | ||||||
|   // Must exchange ghost subcells in reverse process of PaddedCell to take inner products |  | ||||||
|   // |  | ||||||
|   // B) Can reduce cost: pad by 1, apply Deo      (4^4+6^4+8^4+8^4 )/ (4x 4^4) |  | ||||||
|   //                     pad by 2, apply Doe |  | ||||||
|   //                     pad by 3, apply Deo |  | ||||||
|   //                     then break out 8x directions; cost is ~10x MpcDagMpc per vector |  | ||||||
|   // |  | ||||||
|   // => almost factor of 10 in setup cost, excluding data rearrangement |  | ||||||
|   // |  | ||||||
|   // Intermediates -- ignore the corner terms, leave approximate and force Hermitian |  | ||||||
|   // Intermediates -- pad by 2 and apply 1+8+24 = 33 times. |  | ||||||
|   ///////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////// |  | ||||||
|     // BFM HDCG style approach: Solve a system of equations to get Aij |  | ||||||
|     ////////////////////////////////////////////////////////// |  | ||||||
|     /* |  | ||||||
|      *     Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM. |  | ||||||
|      * |  | ||||||
|      *     conj(phases[block]) proj[k][ block*Nvec+j ] =  \sum_ball  e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} >  |  | ||||||
|      *                                                 =  \sum_ball e^{iqk.delta} A_ji |  | ||||||
|      * |  | ||||||
|      *     Must invert matrix M_k,l = e^[i q_k . delta_l] |  | ||||||
|      * |  | ||||||
|      *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) |  | ||||||
|      */ |  | ||||||
|   void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop, |  | ||||||
| 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) |  | ||||||
|   { |  | ||||||
|     std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl; |  | ||||||
|     GridBase *grid = FineGrid(); |  | ||||||
|  |  | ||||||
|     RealD tproj=0.0; |  | ||||||
|     RealD teigen=0.0; |  | ||||||
|     RealD tmat=0.0; |  | ||||||
|     RealD tphase=0.0; |  | ||||||
|     RealD tinv=0.0; |  | ||||||
|  |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|     // Orthogonalise the subblocks over the basis |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|     CoarseScalar InnerProd(CoarseGrid());  |  | ||||||
|     blockOrthogonalise(InnerProd,Subspace.subspace); |  | ||||||
|  |  | ||||||
|     const int npoint = geom.npoint; |  | ||||||
|        |  | ||||||
|     Coordinate clatt = CoarseGrid()->GlobalDimensions(); |  | ||||||
|     int Nd = CoarseGrid()->Nd(); |  | ||||||
|  |  | ||||||
|       /* |  | ||||||
|        *     Here, k,l index which possible momentum/shift within the N-points connected by MdagM. |  | ||||||
|        *     Matrix index i is mapped to this shift via  |  | ||||||
|        *               geom.shifts[i] |  | ||||||
|        * |  | ||||||
|        *     conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block]  |  | ||||||
|        *       =  \sum_{l in ball}  e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} >  |  | ||||||
|        *       =  \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} |  | ||||||
|        *       = M_{kl} A_ji^{b.b+l} |  | ||||||
|        * |  | ||||||
|        *     Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] |  | ||||||
|        *   |  | ||||||
|        *     Where q_k = delta_k . (2*M_PI/global_nb[mu]) |  | ||||||
|        * |  | ||||||
|        *     Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} |  | ||||||
|        */ |  | ||||||
|     teigen-=usecond(); |  | ||||||
|     Eigen::MatrixXcd Mkl    = Eigen::MatrixXcd::Zero(npoint,npoint); |  | ||||||
|     Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint); |  | ||||||
|     ComplexD ci(0.0,1.0); |  | ||||||
|     for(int k=0;k<npoint;k++){ // Loop over momenta |  | ||||||
|  |  | ||||||
|       for(int l=0;l<npoint;l++){ // Loop over nbr relative |  | ||||||
| 	ComplexD phase(0.0,0.0); |  | ||||||
| 	for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; |  | ||||||
| 	  phase=phase+TwoPiL*geom.shifts[k][mu]*geom.shifts[l][mu]; |  | ||||||
| 	} |  | ||||||
| 	phase=exp(phase*ci); |  | ||||||
| 	Mkl(k,l) = phase; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     invMkl = Mkl.inverse(); |  | ||||||
|     teigen+=usecond(); |  | ||||||
|  |  | ||||||
|     /////////////////////////////////////////////////////////////////////// |  | ||||||
|     // Now compute the matrix elements of linop between the orthonormal |  | ||||||
|     // set of vectors. |  | ||||||
|     /////////////////////////////////////////////////////////////////////// |  | ||||||
|     FineField phaV(grid); // Phased block basis vector |  | ||||||
|     FineField MphaV(grid);// Matrix applied |  | ||||||
|     CoarseVector coarseInner(CoarseGrid()); |  | ||||||
|  |  | ||||||
|     std::vector<CoarseVector> ComputeProj(npoint,CoarseGrid()); |  | ||||||
|     std::vector<CoarseVector>          FT(npoint,CoarseGrid()); |  | ||||||
|     for(int i=0;i<nbasis;i++){// Loop over basis vectors |  | ||||||
|       std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl; |  | ||||||
|       for(int p=0;p<npoint;p++){ // Loop over momenta in npoint |  | ||||||
| 	///////////////////////////////////////////////////// |  | ||||||
| 	// Stick a phase on every block |  | ||||||
| 	///////////////////////////////////////////////////// |  | ||||||
| 	tphase-=usecond(); |  | ||||||
| 	CoarseComplexField coor(CoarseGrid()); |  | ||||||
| 	CoarseComplexField pha(CoarseGrid());	pha=Zero(); |  | ||||||
| 	for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	  LatticeCoordinate(coor,mu); |  | ||||||
| 	  RealD TwoPiL =  M_PI * 2.0/ clatt[mu]; |  | ||||||
| 	  pha = pha + (TwoPiL * geom.shifts[p][mu]) * coor; |  | ||||||
| 	} |  | ||||||
| 	pha  =exp(pha*ci); |  | ||||||
| 	phaV=Zero(); |  | ||||||
| 	blockZAXPY(phaV,pha,Subspace.subspace[i],phaV); |  | ||||||
| 	tphase+=usecond(); |  | ||||||
|  |  | ||||||
| 	///////////////////////////////////////////////////////////////////// |  | ||||||
| 	// Multiple phased subspace vector by matrix and project to subspace |  | ||||||
| 	// Remove local bulk phase to leave relative phases |  | ||||||
| 	///////////////////////////////////////////////////////////////////// |  | ||||||
| 	tmat-=usecond(); |  | ||||||
| 	linop.Op(phaV,MphaV); |  | ||||||
| 	tmat+=usecond(); |  | ||||||
|  |  | ||||||
| 	tproj-=usecond(); |  | ||||||
| 	blockProject(coarseInner,MphaV,Subspace.subspace); |  | ||||||
| 	coarseInner = conjugate(pha) * coarseInner; |  | ||||||
|  |  | ||||||
| 	ComputeProj[p] = coarseInner; |  | ||||||
| 	tproj+=usecond(); |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       tinv-=usecond(); |  | ||||||
|       for(int k=0;k<npoint;k++){ |  | ||||||
| 	FT[k] = Zero(); |  | ||||||
| 	for(int l=0;l<npoint;l++){ |  | ||||||
| 	  FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l]; |  | ||||||
| 	} |  | ||||||
|        |  | ||||||
| 	int osites=CoarseGrid()->oSites(); |  | ||||||
| 	autoView( A_v  , _A[k], AcceleratorWrite); |  | ||||||
| 	autoView( FT_v  , FT[k], AcceleratorRead); |  | ||||||
| 	accelerator_for(sss, osites, 1, { |  | ||||||
| 	    for(int j=0;j<nbasis;j++){ |  | ||||||
| 	      A_v[sss](j,i) = FT_v[sss](j); |  | ||||||
| 	    } |  | ||||||
|         }); |  | ||||||
|       } |  | ||||||
|       tinv+=usecond(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int p=0;p<geom.npoint;p++){ |  | ||||||
|       Coordinate coor({0,0,0,0,0}); |  | ||||||
|       auto sval = peekSite(_A[p],coor); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Only needed if nonhermitian |  | ||||||
|     if ( ! hermitian ) { |  | ||||||
|       std::cout << GridLogMessage<<"PopulateAdag  "<<std::endl; |  | ||||||
|       PopulateAdag(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Need to write something to populate Adag from A |  | ||||||
|     ExchangeCoarseLinks(); |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator eigen  "<<teigen<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator phase  "<<tphase<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator mat    "<<tmat <<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator proj   "<<tproj<<" us"<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<"CoarsenOperator inv    "<<tinv<<" us"<<std::endl; |  | ||||||
|   } |  | ||||||
|   void ExchangeCoarseLinks(void){ |  | ||||||
|     for(int p=0;p<geom.npoint;p++){ |  | ||||||
|       _A[p] = Cell.Exchange(_A[p]); |  | ||||||
|       _Adag[p]= Cell.Exchange(_Adag[p]); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   virtual  void Mdiag    (const Field &in, Field &out){ assert(0);}; |  | ||||||
|   virtual  void Mdir     (const Field &in, Field &out,int dir, int disp){assert(0);}; |  | ||||||
|   virtual  void MdirAll  (const Field &in, std::vector<Field> &out){assert(0);}; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| @@ -1,243 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <pboyle@bnl.gov> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #pragma once |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////// |  | ||||||
| // Geometry class in cartesian case |  | ||||||
| ///////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| class Geometry { |  | ||||||
| public: |  | ||||||
|   int npoint; |  | ||||||
|   int base; |  | ||||||
|   std::vector<int> directions   ; |  | ||||||
|   std::vector<int> displacements; |  | ||||||
|   std::vector<int> points_dagger; |  | ||||||
|  |  | ||||||
|   Geometry(int _d)  { |  | ||||||
|      |  | ||||||
|     base = (_d==5) ? 1:0; |  | ||||||
|  |  | ||||||
|     // make coarse grid stencil for 4d , not 5d |  | ||||||
|     if ( _d==5 ) _d=4; |  | ||||||
|  |  | ||||||
|     npoint = 2*_d+1; |  | ||||||
|     directions.resize(npoint); |  | ||||||
|     displacements.resize(npoint); |  | ||||||
|     points_dagger.resize(npoint); |  | ||||||
|     for(int d=0;d<_d;d++){ |  | ||||||
|       directions[d   ] = d+base; |  | ||||||
|       directions[d+_d] = d+base; |  | ||||||
|       displacements[d  ] = +1; |  | ||||||
|       displacements[d+_d]= -1; |  | ||||||
|       points_dagger[d   ] = d+_d; |  | ||||||
|       points_dagger[d+_d] = d; |  | ||||||
|     } |  | ||||||
|     directions   [2*_d]=0; |  | ||||||
|     displacements[2*_d]=0; |  | ||||||
|     points_dagger[2*_d]=2*_d; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   int point(int dir, int disp) { |  | ||||||
|     assert(disp == -1 || disp == 0 || disp == 1); |  | ||||||
|     assert(base+0 <= dir && dir < base+4); |  | ||||||
|  |  | ||||||
|     // directions faster index = new indexing |  | ||||||
|     // 4d (base = 0): |  | ||||||
|     // point 0  1  2  3  4  5  6  7  8 |  | ||||||
|     // dir   0  1  2  3  0  1  2  3  0 |  | ||||||
|     // disp +1 +1 +1 +1 -1 -1 -1 -1  0 |  | ||||||
|     // 5d (base = 1): |  | ||||||
|     // point 0  1  2  3  4  5  6  7  8 |  | ||||||
|     // dir   1  2  3  4  1  2  3  4  0 |  | ||||||
|     // disp +1 +1 +1 +1 -1 -1 -1 -1  0 |  | ||||||
|  |  | ||||||
|     // displacements faster index = old indexing |  | ||||||
|     // 4d (base = 0): |  | ||||||
|     // point 0  1  2  3  4  5  6  7  8 |  | ||||||
|     // dir   0  0  1  1  2  2  3  3  0 |  | ||||||
|     // disp +1 -1 +1 -1 +1 -1 +1 -1  0 |  | ||||||
|     // 5d (base = 1): |  | ||||||
|     // point 0  1  2  3  4  5  6  7  8 |  | ||||||
|     // dir   1  1  2  2  3  3  4  4  0 |  | ||||||
|     // disp +1 -1 +1 -1 +1 -1 +1 -1  0 |  | ||||||
|  |  | ||||||
|     if(dir == 0 and disp == 0) |  | ||||||
|       return 8; |  | ||||||
|     else // New indexing |  | ||||||
|       return (1 - disp) / 2 * 4 + dir - base; |  | ||||||
|     // else // Old indexing |  | ||||||
|     //   return (4 * (dir - base) + 1 - disp) / 2; |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////// |  | ||||||
| // Less local equivalent of Geometry class in cartesian case |  | ||||||
| ///////////////////////////////////////////////////////////////// |  | ||||||
| class NonLocalStencilGeometry { |  | ||||||
| public: |  | ||||||
|   int depth; |  | ||||||
|   int hops; |  | ||||||
|   int npoint; |  | ||||||
|   std::vector<Coordinate> shifts; |  | ||||||
|   Coordinate stencil_size; |  | ||||||
|   Coordinate stencil_lo; |  | ||||||
|   Coordinate stencil_hi; |  | ||||||
|   GridCartesian *grid; |  | ||||||
|   GridCartesian *Grid() {return grid;}; |  | ||||||
|   int Depth(void){return 1;};   // Ghost zone depth |  | ||||||
|   int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil |  | ||||||
|  |  | ||||||
|   virtual int DimSkip(void) =0; |  | ||||||
|  |  | ||||||
|   virtual ~NonLocalStencilGeometry() {}; |  | ||||||
|  |  | ||||||
|   int  Reverse(int point) |  | ||||||
|   { |  | ||||||
|     int Nd = Grid()->Nd(); |  | ||||||
|     Coordinate shft = shifts[point]; |  | ||||||
|     Coordinate rev(Nd); |  | ||||||
|     for(int mu=0;mu<Nd;mu++) rev[mu]= -shft[mu]; |  | ||||||
|     for(int p=0;p<npoint;p++){ |  | ||||||
|       if(rev==shifts[p]){ |  | ||||||
| 	return p; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     assert(0); |  | ||||||
|     return -1; |  | ||||||
|   } |  | ||||||
|   void BuildShifts(void) |  | ||||||
|   { |  | ||||||
|     this->shifts.resize(0); |  | ||||||
|     int Nd = this->grid->Nd(); |  | ||||||
|  |  | ||||||
|     int dd = this->DimSkip(); |  | ||||||
|     for(int s0=this->stencil_lo[dd+0];s0<=this->stencil_hi[dd+0];s0++){ |  | ||||||
|     for(int s1=this->stencil_lo[dd+1];s1<=this->stencil_hi[dd+1];s1++){ |  | ||||||
|     for(int s2=this->stencil_lo[dd+2];s2<=this->stencil_hi[dd+2];s2++){ |  | ||||||
|     for(int s3=this->stencil_lo[dd+3];s3<=this->stencil_hi[dd+3];s3++){ |  | ||||||
|       Coordinate sft(Nd,0); |  | ||||||
|       sft[dd+0] = s0; |  | ||||||
|       sft[dd+1] = s1; |  | ||||||
|       sft[dd+2] = s2; |  | ||||||
|       sft[dd+3] = s3; |  | ||||||
|       int nhops = abs(s0)+abs(s1)+abs(s2)+abs(s3); |  | ||||||
|       if(nhops<=this->hops) this->shifts.push_back(sft); |  | ||||||
|     }}}} |  | ||||||
|     this->npoint = this->shifts.size(); |  | ||||||
|     std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<<std::endl; |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   NonLocalStencilGeometry(GridCartesian *_coarse_grid,int _hops) : grid(_coarse_grid), hops(_hops) |  | ||||||
|   { |  | ||||||
|     Coordinate latt = grid->GlobalDimensions(); |  | ||||||
|     stencil_size.resize(grid->Nd()); |  | ||||||
|     stencil_lo.resize(grid->Nd()); |  | ||||||
|     stencil_hi.resize(grid->Nd()); |  | ||||||
|     for(int d=0;d<grid->Nd();d++){ |  | ||||||
|      if ( latt[d] == 1 ) { |  | ||||||
|       stencil_lo[d] = 0; |  | ||||||
|       stencil_hi[d] = 0; |  | ||||||
|       stencil_size[d]= 1; |  | ||||||
|      } else if ( latt[d] == 2 ) { |  | ||||||
|       stencil_lo[d] = -1; |  | ||||||
|       stencil_hi[d] = 0; |  | ||||||
|       stencil_size[d]= 2; |  | ||||||
|      } else if ( latt[d] > 2 ) { |  | ||||||
|        stencil_lo[d] = -1; |  | ||||||
|        stencil_hi[d] =  1; |  | ||||||
|        stencil_size[d]= 3; |  | ||||||
|      } |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Need to worry about red-black now |  | ||||||
| class NonLocalStencilGeometry4D : public NonLocalStencilGeometry { |  | ||||||
| public: |  | ||||||
|   virtual int DimSkip(void) { return 0;}; |  | ||||||
|   NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { }; |  | ||||||
|   virtual ~NonLocalStencilGeometry4D() {}; |  | ||||||
| }; |  | ||||||
| class NonLocalStencilGeometry5D : public NonLocalStencilGeometry { |  | ||||||
| public: |  | ||||||
|   virtual int DimSkip(void) { return 1; };  |  | ||||||
|   NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops)  { }; |  | ||||||
|   virtual ~NonLocalStencilGeometry5D() {}; |  | ||||||
| }; |  | ||||||
| /* |  | ||||||
|  * Bunch of different options classes |  | ||||||
|  */ |  | ||||||
| class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D { |  | ||||||
| public: |  | ||||||
|   NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,4) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NextToNextToNextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { |  | ||||||
| public: |  | ||||||
|   NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,4) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NextToNearestStencilGeometry4D : public  NonLocalStencilGeometry4D { |  | ||||||
| public: |  | ||||||
|   NextToNearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,2) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NextToNearestStencilGeometry5D : public  NonLocalStencilGeometry5D { |  | ||||||
| public: |  | ||||||
|   NextToNearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,2) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NearestStencilGeometry4D : public  NonLocalStencilGeometry4D { |  | ||||||
| public: |  | ||||||
|   NearestStencilGeometry4D(GridCartesian *Coarse) :  NonLocalStencilGeometry4D(Coarse,1) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| class NearestStencilGeometry5D : public  NonLocalStencilGeometry5D { |  | ||||||
| public: |  | ||||||
|   NearestStencilGeometry5D(GridCartesian *Coarse) :  NonLocalStencilGeometry5D(Coarse,1) |  | ||||||
|   { |  | ||||||
|     this->BuildShifts(); |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| @@ -1,33 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
|     Source file: Grid/algorithms/multigrid/MultiGrid.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2023 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <pboyle@bnl.gov> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #pragma once |  | ||||||
|  |  | ||||||
| #include <Grid/algorithms/multigrid/Aggregates.h> |  | ||||||
| #include <Grid/algorithms/multigrid/Geometry.h> |  | ||||||
| #include <Grid/algorithms/multigrid/CoarsenedMatrix.h> |  | ||||||
| #include <Grid/algorithms/multigrid/GeneralCoarsenedMatrix.h> |  | ||||||
| @@ -70,8 +70,8 @@ public: | |||||||
|   Coordinate _istride;    // Inner stride i.e. within simd lane |   Coordinate _istride;    // Inner stride i.e. within simd lane | ||||||
|   int _osites;                  // _isites*_osites = product(dimensions). |   int _osites;                  // _isites*_osites = product(dimensions). | ||||||
|   int _isites; |   int _isites; | ||||||
|   int64_t _fsites;                  // _isites*_osites = product(dimensions). |   int _fsites;                  // _isites*_osites = product(dimensions). | ||||||
|   int64_t _gsites; |   int _gsites; | ||||||
|   Coordinate _slice_block;// subslice information |   Coordinate _slice_block;// subslice information | ||||||
|   Coordinate _slice_stride; |   Coordinate _slice_stride; | ||||||
|   Coordinate _slice_nblock; |   Coordinate _slice_nblock; | ||||||
| @@ -183,7 +183,7 @@ public: | |||||||
|   inline int Nsimd(void)  const { return _isites; };// Synonymous with iSites |   inline int Nsimd(void)  const { return _isites; };// Synonymous with iSites | ||||||
|   inline int oSites(void) const { return _osites; }; |   inline int oSites(void) const { return _osites; }; | ||||||
|   inline int lSites(void) const { return _isites*_osites; };  |   inline int lSites(void) const { return _isites*_osites; };  | ||||||
|   inline int64_t gSites(void) const { return (int64_t)_isites*(int64_t)_osites*(int64_t)_Nprocessors; };  |   inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  | ||||||
|   inline int Nd    (void) const { return _ndimension;}; |   inline int Nd    (void) const { return _ndimension;}; | ||||||
|  |  | ||||||
|   inline const Coordinate LocalStarts(void)             { return _lstart;    }; |   inline const Coordinate LocalStarts(void)             { return _lstart;    }; | ||||||
| @@ -214,7 +214,7 @@ public: | |||||||
|   //////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////// | ||||||
|   // Global addressing |   // Global addressing | ||||||
|   //////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////// | ||||||
|   void GlobalIndexToGlobalCoor(int64_t gidx,Coordinate &gcoor){ |   void GlobalIndexToGlobalCoor(int gidx,Coordinate &gcoor){ | ||||||
|     assert(gidx< gSites()); |     assert(gidx< gSites()); | ||||||
|     Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions); |     Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions); | ||||||
|   } |   } | ||||||
| @@ -222,7 +222,7 @@ public: | |||||||
|     assert(lidx<lSites()); |     assert(lidx<lSites()); | ||||||
|     Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); |     Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); | ||||||
|   } |   } | ||||||
|   void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int64_t & gidx){ |   void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int & gidx){ | ||||||
|     gidx=0; |     gidx=0; | ||||||
|     int mult=1; |     int mult=1; | ||||||
|     for(int mu=0;mu<_ndimension;mu++) { |     for(int mu=0;mu<_ndimension;mu++) { | ||||||
|   | |||||||
| @@ -360,7 +360,7 @@ public: | |||||||
|  |  | ||||||
| template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ | template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|   for(int64_t g=0;g<o.Grid()->_gsites;g++){ |   for(int g=0;g<o.Grid()->_gsites;g++){ | ||||||
|  |  | ||||||
|     Coordinate gcoor; |     Coordinate gcoor; | ||||||
|     o.Grid()->GlobalIndexToGlobalCoor(g,gcoor); |     o.Grid()->GlobalIndexToGlobalCoor(g,gcoor); | ||||||
|   | |||||||
| @@ -361,14 +361,9 @@ public: | |||||||
|     _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1}); |     _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1}); | ||||||
|     _uid.resize(_vol,std::uniform_int_distribution<uint32_t>() ); |     _uid.resize(_vol,std::uniform_int_distribution<uint32_t>() ); | ||||||
|   } |   } | ||||||
|   template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist) |  | ||||||
|   { |   template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){ | ||||||
|     if ( l.Grid()->_isCheckerBoarded ) { |  | ||||||
|       Lattice<vobj> tmp(_grid); |  | ||||||
|       fill(tmp,dist); |  | ||||||
|       pickCheckerboard(l.Checkerboard(),l,tmp); |  | ||||||
|       return; |  | ||||||
|     } |  | ||||||
|     typedef typename vobj::scalar_object scalar_object; |     typedef typename vobj::scalar_object scalar_object; | ||||||
|     typedef typename vobj::scalar_type scalar_type; |     typedef typename vobj::scalar_type scalar_type; | ||||||
|     typedef typename vobj::vector_type vector_type; |     typedef typename vobj::vector_type vector_type; | ||||||
| @@ -432,7 +427,7 @@ public: | |||||||
| #if 1 | #if 1 | ||||||
|     thread_for( lidx, _grid->lSites(), { |     thread_for( lidx, _grid->lSites(), { | ||||||
|  |  | ||||||
| 	int64_t gidx; | 	int gidx; | ||||||
| 	int o_idx; | 	int o_idx; | ||||||
| 	int i_idx; | 	int i_idx; | ||||||
| 	int rank; | 	int rank; | ||||||
|   | |||||||
| @@ -471,13 +471,13 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | |||||||
|  |  | ||||||
|   vobj zz = Zero(); |   vobj zz = Zero(); | ||||||
|    |    | ||||||
|   accelerator_for(sc,coarse->oSites(),vobj::Nsimd(),{ |   accelerator_for(sc,coarse->oSites(),1,{ | ||||||
|  |  | ||||||
|       // One thread per sub block |       // One thread per sub block | ||||||
|       Coordinate coor_c(_ndimension); |       Coordinate coor_c(_ndimension); | ||||||
|       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate |       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate | ||||||
|  |  | ||||||
|       auto cd = coalescedRead(zz); |       vobj cd = zz; | ||||||
|        |        | ||||||
|       for(int sb=0;sb<blockVol;sb++){ |       for(int sb=0;sb<blockVol;sb++){ | ||||||
|  |  | ||||||
| @@ -488,10 +488,10 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | |||||||
| 	for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; | 	for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; | ||||||
| 	Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); | 	Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); | ||||||
|  |  | ||||||
| 	cd=cd+coalescedRead(fineData_p[sf]); | 	cd=cd+fineData_p[sf]; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       coalescedWrite(coarseData_p[sc],cd); |       coarseData_p[sc] = cd; | ||||||
|  |  | ||||||
|     }); |     }); | ||||||
|   return; |   return; | ||||||
| @@ -1054,7 +1054,7 @@ void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine) | |||||||
|  |  | ||||||
|   Coordinate fcoor(nd); |   Coordinate fcoor(nd); | ||||||
|   Coordinate ccoor(nd); |   Coordinate ccoor(nd); | ||||||
|   for(int64_t g=0;g<fg->gSites();g++){ |   for(int g=0;g<fg->gSites();g++){ | ||||||
|  |  | ||||||
|     fg->GlobalIndexToGlobalCoor(g,fcoor); |     fg->GlobalIndexToGlobalCoor(g,fcoor); | ||||||
|     for(int d=0;d<nd;d++){ |     for(int d=0;d<nd;d++){ | ||||||
|   | |||||||
| @@ -63,9 +63,8 @@ public: | |||||||
|     dims=_grid->Nd(); |     dims=_grid->Nd(); | ||||||
|     AllocateGrids(); |     AllocateGrids(); | ||||||
|     Coordinate local     =unpadded_grid->LocalDimensions(); |     Coordinate local     =unpadded_grid->LocalDimensions(); | ||||||
|     Coordinate procs     =unpadded_grid->ProcessorGrid(); |  | ||||||
|     for(int d=0;d<dims;d++){ |     for(int d=0;d<dims;d++){ | ||||||
|       if ( procs[d] > 1 ) assert(local[d]>=depth); |       assert(local[d]>=depth); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   void DeleteGrids(void) |   void DeleteGrids(void) | ||||||
| @@ -86,9 +85,7 @@ public: | |||||||
|     // expand up one dim at a time |     // expand up one dim at a time | ||||||
|     for(int d=0;d<dims;d++){ |     for(int d=0;d<dims;d++){ | ||||||
|  |  | ||||||
|       if ( processors[d] > 1 ) {  |  | ||||||
|       plocal[d] += 2*depth;  |       plocal[d] += 2*depth;  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       for(int d=0;d<dims;d++){ |       for(int d=0;d<dims;d++){ | ||||||
| 	global[d] = plocal[d]*processors[d]; | 	global[d] = plocal[d]*processors[d]; | ||||||
| @@ -100,17 +97,11 @@ public: | |||||||
|   template<class vobj> |   template<class vobj> | ||||||
|   inline Lattice<vobj> Extract(const Lattice<vobj> &in) const |   inline Lattice<vobj> Extract(const Lattice<vobj> &in) const | ||||||
|   { |   { | ||||||
|     Coordinate processors=unpadded_grid->_processors; |  | ||||||
|  |  | ||||||
|     Lattice<vobj> out(unpadded_grid); |     Lattice<vobj> out(unpadded_grid); | ||||||
|  |  | ||||||
|     Coordinate local     =unpadded_grid->LocalDimensions(); |     Coordinate local     =unpadded_grid->LocalDimensions(); | ||||||
|     // depends on the MPI spread       |     Coordinate fll(dims,depth); // depends on the MPI spread | ||||||
|     Coordinate fll(dims,depth); |  | ||||||
|     Coordinate tll(dims,0); // depends on the MPI spread |     Coordinate tll(dims,0); // depends on the MPI spread | ||||||
|     for(int d=0;d<dims;d++){ |  | ||||||
|       if( processors[d]==1 ) fll[d]=0; |  | ||||||
|     } |  | ||||||
|     localCopyRegion(in,out,fll,tll,local); |     localCopyRegion(in,out,fll,tll,local); | ||||||
|     return out; |     return out; | ||||||
|   } |   } | ||||||
| @@ -129,7 +120,6 @@ public: | |||||||
|   template<class vobj> |   template<class vobj> | ||||||
|   inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const |   inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const | ||||||
|   { |   { | ||||||
|     Coordinate processors=unpadded_grid->_processors; |  | ||||||
|     GridBase *old_grid = in.Grid(); |     GridBase *old_grid = in.Grid(); | ||||||
|     GridCartesian *new_grid = grids[dim];//These are new grids |     GridCartesian *new_grid = grids[dim];//These are new grids | ||||||
|     Lattice<vobj>  padded(new_grid); |     Lattice<vobj>  padded(new_grid); | ||||||
| @@ -143,18 +133,6 @@ public: | |||||||
|  |  | ||||||
|     double tins=0, tshift=0; |     double tins=0, tshift=0; | ||||||
|      |      | ||||||
|     int islocal = 0 ; |  | ||||||
|     if ( processors[dim] == 1 ) islocal = 1; |  | ||||||
|  |  | ||||||
|     if ( islocal ) { |  | ||||||
|        |  | ||||||
|       double t = usecond(); |  | ||||||
|       for(int x=0;x<local[dim];x++){ |  | ||||||
| 	InsertSliceLocal(in,padded,x,x,dim); |  | ||||||
|       } |  | ||||||
|       tins += usecond() - t; |  | ||||||
|        |  | ||||||
|     } else {  |  | ||||||
|     // Middle bit |     // Middle bit | ||||||
|     double t = usecond(); |     double t = usecond(); | ||||||
|     for(int x=0;x<local[dim];x++){ |     for(int x=0;x<local[dim];x++){ | ||||||
| @@ -183,7 +161,7 @@ public: | |||||||
|       InsertSliceLocal(shifted,padded,x,x,dim); |       InsertSliceLocal(shifted,padded,x,x,dim); | ||||||
|     } |     } | ||||||
|     tins += usecond() - t; |     tins += usecond() - t; | ||||||
|     } |  | ||||||
|     std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl; |     std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl; | ||||||
|      |      | ||||||
|     return padded; |     return padded; | ||||||
|   | |||||||
| @@ -67,6 +67,7 @@ NAMESPACE_CHECK(Scalar); | |||||||
| #include <Grid/qcd/utils/Metric.h> | #include <Grid/qcd/utils/Metric.h> | ||||||
| NAMESPACE_CHECK(Metric); | NAMESPACE_CHECK(Metric); | ||||||
| #include <Grid/qcd/utils/CovariantLaplacian.h> | #include <Grid/qcd/utils/CovariantLaplacian.h> | ||||||
|  | #include <Grid/qcd/utils/CovariantLaplacianRat.h> | ||||||
| NAMESPACE_CHECK(CovariantLaplacian); | NAMESPACE_CHECK(CovariantLaplacian); | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -65,6 +65,19 @@ struct WilsonImplParams { | |||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | struct GaugeImplParams { | ||||||
|  | //  bool overlapCommsCompute; | ||||||
|  | //  AcceleratorVector<Real,Nd> twist_n_2pi_L; | ||||||
|  |   AcceleratorVector<Complex,Nd> boundary_phases; | ||||||
|  |   GaugeImplParams()  { | ||||||
|  |     boundary_phases.resize(Nd, 1.0); | ||||||
|  | //      twist_n_2pi_L.resize(Nd, 0.0); | ||||||
|  |   }; | ||||||
|  |   GaugeImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi) { | ||||||
|  | //    twist_n_2pi_L.resize(Nd, 0.0); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
| struct StaggeredImplParams { | struct StaggeredImplParams { | ||||||
|   Coordinate dirichlet; // Blocksize of dirichlet BCs |   Coordinate dirichlet; // Blocksize of dirichlet BCs | ||||||
|   int  partialDirichlet; |   int  partialDirichlet; | ||||||
|   | |||||||
| @@ -32,7 +32,7 @@ directory | |||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| #define CPS_MD_TIME | #undef CPS_MD_TIME | ||||||
|  |  | ||||||
| #ifdef CPS_MD_TIME | #ifdef CPS_MD_TIME | ||||||
| #define HMC_MOMENTUM_DENOMINATOR (2.0) | #define HMC_MOMENTUM_DENOMINATOR (2.0) | ||||||
|   | |||||||
| @@ -42,9 +42,13 @@ template <class Gimpl> | |||||||
| class WilsonGaugeAction : public Action<typename Gimpl::GaugeField> { | class WilsonGaugeAction : public Action<typename Gimpl::GaugeField> { | ||||||
| public:   | public:   | ||||||
|   INHERIT_GIMPL_TYPES(Gimpl); |   INHERIT_GIMPL_TYPES(Gimpl); | ||||||
|  |   typedef GaugeImplParams ImplParams; | ||||||
|  |   ImplParams Params; | ||||||
|  |  | ||||||
|   /////////////////////////// constructors |   /////////////////////////// constructors | ||||||
|   explicit WilsonGaugeAction(RealD beta_):beta(beta_){}; |   explicit WilsonGaugeAction(RealD beta_, | ||||||
|  | 		  const ImplParams &p = ImplParams() | ||||||
|  | 		  ):beta(beta_),Params(p){}; | ||||||
|  |  | ||||||
|   virtual std::string action_name() {return "WilsonGaugeAction";} |   virtual std::string action_name() {return "WilsonGaugeAction";} | ||||||
|  |  | ||||||
| @@ -56,14 +60,53 @@ public: | |||||||
|  |  | ||||||
|   virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG){};  // noop as no pseudoferms |   virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG){};  // noop as no pseudoferms | ||||||
|  |  | ||||||
|  | // Umu<->U maximally confusing | ||||||
|  |   virtual void boundary(const GaugeField &Umu, GaugeField &Ub){ | ||||||
|  |     typedef typename Simd::scalar_type scalar_type; | ||||||
|  |     assert(Params.boundary_phases.size() == Nd); | ||||||
|  |     GridBase *GaugeGrid=Umu.Grid(); | ||||||
|  |     GaugeLinkField U(GaugeGrid); | ||||||
|  |     GaugeLinkField tmp(GaugeGrid); | ||||||
|  |  | ||||||
|  |     Lattice<iScalar<vInteger> > coor(GaugeGrid); | ||||||
|  |     for (int mu = 0; mu < Nd; mu++) { | ||||||
|  | 	////////// boundary phase ///////////// | ||||||
|  |       auto pha = Params.boundary_phases[mu]; | ||||||
|  |       scalar_type phase( real(pha),imag(pha) ); | ||||||
|  |       std::cout<< GridLogIterative << "[WilsonGaugeAction] boundary "<<mu<<" "<<phase<< std::endl;  | ||||||
|  |  | ||||||
|  | 	int L   = GaugeGrid->GlobalDimensions()[mu]; | ||||||
|  |         int Lmu = L - 1; | ||||||
|  |  | ||||||
|  |       LatticeCoordinate(coor, mu); | ||||||
|  |  | ||||||
|  |       U = PeekIndex<LorentzIndex>(Umu, mu); | ||||||
|  |       tmp = where(coor == Lmu, phase * U, U); | ||||||
|  |       PokeIndex<LorentzIndex>(Ub, tmp, mu); | ||||||
|  | //      PokeIndex<LorentzIndex>(Ub, U, mu); | ||||||
|  | //      PokeIndex<LorentzIndex>(Umu, tmp, mu); | ||||||
|  |  | ||||||
|  |     } | ||||||
|  |   }; | ||||||
|  |  | ||||||
|   virtual RealD S(const GaugeField &U) { |   virtual RealD S(const GaugeField &U) { | ||||||
|     RealD plaq = WilsonLoops<Gimpl>::avgPlaquette(U); |     GaugeField Ub(U.Grid()); | ||||||
|     RealD vol = U.Grid()->gSites(); |     this->boundary(U,Ub); | ||||||
|  |     static RealD lastG=0.; | ||||||
|  |     RealD plaq = WilsonLoops<Gimpl>::avgPlaquette(Ub); | ||||||
|  |     RealD vol = Ub.Grid()->gSites(); | ||||||
|     RealD action = beta * (1.0 - plaq) * (Nd * (Nd - 1.0)) * vol * 0.5; |     RealD action = beta * (1.0 - plaq) * (Nd * (Nd - 1.0)) * vol * 0.5; | ||||||
|  |     std::cout << GridLogMessage << "[WilsonGaugeAction] dH: " << action-lastG << std::endl; | ||||||
|  |     RealD plaq_o = WilsonLoops<Gimpl>::avgPlaquette(U); | ||||||
|  |     RealD action_o = beta * (1.0 - plaq_o) * (Nd * (Nd - 1.0)) * vol * 0.5; | ||||||
|  |     std::cout << GridLogMessage << "[WilsonGaugeAction] U: " << action_o <<" Ub: "<< action  << std::endl; | ||||||
|  |     lastG=action; | ||||||
|     return action; |     return action; | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   virtual void deriv(const GaugeField &U, GaugeField &dSdU) { |   virtual void deriv(const GaugeField &U, GaugeField &dSdU) { | ||||||
|  |     GaugeField Ub(U.Grid()); | ||||||
|  |     this->boundary(U,Ub); | ||||||
|     // not optimal implementation FIXME |     // not optimal implementation FIXME | ||||||
|     // extend Ta to include Lorentz indexes |     // extend Ta to include Lorentz indexes | ||||||
|  |  | ||||||
| @@ -73,10 +116,9 @@ public: | |||||||
|     GaugeLinkField dSdU_mu(U.Grid()); |     GaugeLinkField dSdU_mu(U.Grid()); | ||||||
|     for (int mu = 0; mu < Nd; mu++) { |     for (int mu = 0; mu < Nd; mu++) { | ||||||
|  |  | ||||||
|       Umu = PeekIndex<LorentzIndex>(U, mu); |       Umu = PeekIndex<LorentzIndex>(Ub, mu); | ||||||
|        |  | ||||||
|       // Staple in direction mu |       // Staple in direction mu | ||||||
|       WilsonLoops<Gimpl>::Staple(dSdU_mu, U, mu); |       WilsonLoops<Gimpl>::Staple(dSdU_mu, Ub, mu); | ||||||
|       dSdU_mu = Ta(Umu * dSdU_mu) * factor; |       dSdU_mu = Ta(Umu * dSdU_mu) * factor; | ||||||
|        |        | ||||||
|       PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu); |       PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu); | ||||||
|   | |||||||
| @@ -178,7 +178,10 @@ NAMESPACE_BEGIN(Grid); | |||||||
|         // Use chronological inverter to forecast solutions across poles |         // Use chronological inverter to forecast solutions across poles | ||||||
|         std::vector<FermionField> prev_solns; |         std::vector<FermionField> prev_solns; | ||||||
|         if(use_heatbath_forecasting){ prev_solns.reserve(param.degree); } |         if(use_heatbath_forecasting){ prev_solns.reserve(param.degree); } | ||||||
|         ChronoForecast<AbstractEOFAFermion<Impl>, FermionField> Forecast; | 	MdagMLinearOperator<AbstractEOFAFermion<Impl> ,FermionField> MdagML(Lop); | ||||||
|  | 	MdagMLinearOperator<AbstractEOFAFermion<Impl> ,FermionField> MdagMR(Rop); | ||||||
|  | //        ChronoForecast<AbstractEOFAFermion<Impl>, FermionField> Forecast; | ||||||
|  | 	ChronoForecast<MdagMLinearOperator<AbstractEOFAFermion<Impl>, FermionField> , FermionField> Forecast; | ||||||
|  |  | ||||||
|         // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta |         // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta | ||||||
|         RealD N(PowerNegHalf.norm); |         RealD N(PowerNegHalf.norm); | ||||||
| @@ -198,7 +201,7 @@ NAMESPACE_BEGIN(Grid); | |||||||
|           heatbathRefreshShiftCoefficients(0, -gamma_l); |           heatbathRefreshShiftCoefficients(0, -gamma_l); | ||||||
|           if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles |           if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles | ||||||
|             Lop.Mdag(CG_src, Forecast_src); |             Lop.Mdag(CG_src, Forecast_src); | ||||||
|             CG_soln = Forecast(Lop, Forecast_src, prev_solns); |             CG_soln = Forecast(MdagML, Forecast_src, prev_solns); | ||||||
|             SolverHBL(Lop, CG_src, CG_soln); |             SolverHBL(Lop, CG_src, CG_soln); | ||||||
|             prev_solns.push_back(CG_soln); |             prev_solns.push_back(CG_soln); | ||||||
|           } else { |           } else { | ||||||
| @@ -225,7 +228,7 @@ NAMESPACE_BEGIN(Grid); | |||||||
| 	  heatbathRefreshShiftCoefficients(1, -gamma_l*PowerNegHalf.poles[k]); | 	  heatbathRefreshShiftCoefficients(1, -gamma_l*PowerNegHalf.poles[k]); | ||||||
|           if(use_heatbath_forecasting){ |           if(use_heatbath_forecasting){ | ||||||
|             Rop.Mdag(CG_src, Forecast_src); |             Rop.Mdag(CG_src, Forecast_src); | ||||||
|             CG_soln = Forecast(Rop, Forecast_src, prev_solns); |             CG_soln = Forecast(MdagMR, Forecast_src, prev_solns); | ||||||
|             SolverHBR(Rop, CG_src, CG_soln); |             SolverHBR(Rop, CG_src, CG_soln); | ||||||
|             prev_solns.push_back(CG_soln); |             prev_solns.push_back(CG_soln); | ||||||
|           } else { |           } else { | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
| #define CPS_MD_TIME  | #undef CPS_MD_TIME  | ||||||
|  |  | ||||||
| #ifdef CPS_MD_TIME | #ifdef CPS_MD_TIME | ||||||
| #define HMC_MOMENTUM_DENOMINATOR (2.0) | #define HMC_MOMENTUM_DENOMINATOR (2.0) | ||||||
|   | |||||||
| @@ -121,12 +121,19 @@ public: | |||||||
|  |  | ||||||
|   template <class SmearingPolicy> |   template <class SmearingPolicy> | ||||||
|   void Run(SmearingPolicy &S) { |   void Run(SmearingPolicy &S) { | ||||||
|     Runner(S); |     TrivialMetric<typename Implementation::Field> Mtr; | ||||||
|  |     Runner(S,Mtr); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   template <class SmearingPolicy, class Metric> | ||||||
|  |   void Run(SmearingPolicy &S, Metric &Mtr) { | ||||||
|  |     Runner(S,Mtr); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void Run(){ |   void Run(){ | ||||||
|     NoSmearing<Implementation> S; |     NoSmearing<Implementation> S; | ||||||
|     Runner(S); |     TrivialMetric<typename Implementation::Field> Mtr; | ||||||
|  |     Runner(S,Mtr); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   //Use the checkpointer to initialize the RNGs and the gauge field, writing the resulting gauge field into U. |   //Use the checkpointer to initialize the RNGs and the gauge field, writing the resulting gauge field into U. | ||||||
| @@ -176,15 +183,15 @@ public: | |||||||
|   ////////////////////////////////////////////////////////////////// |   ////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
| private: | private: | ||||||
|   template <class SmearingPolicy> |   template <class SmearingPolicy, class Metric> | ||||||
|   void Runner(SmearingPolicy &Smearing) { |   void Runner(SmearingPolicy &Smearing, Metric &Mtr) { | ||||||
|     auto UGrid = Resources.GetCartesian(); |     auto UGrid = Resources.GetCartesian(); | ||||||
|     Field U(UGrid); |     Field U(UGrid); | ||||||
|  |  | ||||||
|     initializeGaugeFieldAndRNGs(U); |     initializeGaugeFieldAndRNGs(U); | ||||||
|  |  | ||||||
|     typedef IntegratorType<SmearingPolicy> TheIntegrator; |     typedef IntegratorType<SmearingPolicy> TheIntegrator; | ||||||
|     TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing); |     TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing,Mtr); | ||||||
|  |  | ||||||
|     // Sets the momentum filter |     // Sets the momentum filter | ||||||
|     MDynamics.setMomentumFilter(*(Resources.GetMomentumFilter())); |     MDynamics.setMomentumFilter(*(Resources.GetMomentumFilter())); | ||||||
|   | |||||||
| @@ -55,6 +55,8 @@ struct HMCparameters: Serializable { | |||||||
|                                   Integer, NoMetropolisUntil, |                                   Integer, NoMetropolisUntil, | ||||||
| 				  bool, PerformRandomShift, /* @brief Randomly shift the gauge configuration at the start of a trajectory */ | 				  bool, PerformRandomShift, /* @brief Randomly shift the gauge configuration at the start of a trajectory */ | ||||||
|                                   std::string, StartingType, |                                   std::string, StartingType, | ||||||
|  | 				  Integer, SW, | ||||||
|  |                                   RealD, Kappa, | ||||||
|                                   IntegratorParameters, MD) |                                   IntegratorParameters, MD) | ||||||
|  |  | ||||||
|   HMCparameters() { |   HMCparameters() { | ||||||
| @@ -110,6 +112,8 @@ private: | |||||||
|   IntegratorType &TheIntegrator; |   IntegratorType &TheIntegrator; | ||||||
|   ObsListType Observables; |   ObsListType Observables; | ||||||
|  |  | ||||||
|  |   int traj_num; | ||||||
|  |  | ||||||
|   ///////////////////////////////////////////////////////// |   ///////////////////////////////////////////////////////// | ||||||
|   // Metropolis step |   // Metropolis step | ||||||
|   ///////////////////////////////////////////////////////// |   ///////////////////////////////////////////////////////// | ||||||
| @@ -200,14 +204,14 @@ private: | |||||||
|  |  | ||||||
|     std::cout << GridLogMessage << "--------------------------------------------------\n"; |     std::cout << GridLogMessage << "--------------------------------------------------\n"; | ||||||
|     std::cout << GridLogMessage << " Molecular Dynamics evolution "; |     std::cout << GridLogMessage << " Molecular Dynamics evolution "; | ||||||
|     TheIntegrator.integrate(U); |     TheIntegrator.integrate(U,traj_num); | ||||||
|     std::cout << GridLogMessage << "--------------------------------------------------\n"; |     std::cout << GridLogMessage << "--------------------------------------------------\n"; | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|     // updated state action |     // updated state action | ||||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|     std::cout << GridLogMessage << "--------------------------------------------------\n"; |     std::cout << GridLogMessage << "--------------------------------------------------\n"; | ||||||
|     std::cout << GridLogMessage << "Compute final action"; |     std::cout << GridLogMessage << "Compute final action" <<std::endl; | ||||||
|     RealD H1 = TheIntegrator.S(U);   |     RealD H1 = TheIntegrator.S(U);   | ||||||
|     std::cout << GridLogMessage << "--------------------------------------------------\n"; |     std::cout << GridLogMessage << "--------------------------------------------------\n"; | ||||||
|  |  | ||||||
| @@ -242,7 +246,7 @@ public: | |||||||
|   HybridMonteCarlo(HMCparameters _Pams, IntegratorType &_Int, |   HybridMonteCarlo(HMCparameters _Pams, IntegratorType &_Int, | ||||||
|                    GridSerialRNG &_sRNG, GridParallelRNG &_pRNG,  |                    GridSerialRNG &_sRNG, GridParallelRNG &_pRNG,  | ||||||
|                    ObsListType _Obs, Field &_U) |                    ObsListType _Obs, Field &_U) | ||||||
|     : Params(_Pams), TheIntegrator(_Int), sRNG(_sRNG), pRNG(_pRNG), Observables(_Obs), Ucur(_U) {} |     : Params(_Pams), TheIntegrator(_Int), sRNG(_sRNG), pRNG(_pRNG), Observables(_Obs), Ucur(_U),traj_num(0) {} | ||||||
|   ~HybridMonteCarlo(){}; |   ~HybridMonteCarlo(){}; | ||||||
|  |  | ||||||
|   void evolve(void) { |   void evolve(void) { | ||||||
| @@ -258,8 +262,9 @@ public: | |||||||
|  |  | ||||||
|     for (int traj = Params.StartTrajectory; traj < FinalTrajectory; ++traj) { |     for (int traj = Params.StartTrajectory; traj < FinalTrajectory; ++traj) { | ||||||
|      |      | ||||||
|       std::cout << GridLogHMC << "-- # Trajectory = " << traj << "\n"; |  | ||||||
|  |  | ||||||
|  |       std::cout << GridLogHMC << "-- # Trajectory = " << traj << "\n"; | ||||||
|  |       traj_num=traj; | ||||||
|       if (traj < Params.StartTrajectory + Params.NoMetropolisUntil) { |       if (traj < Params.StartTrajectory + Params.NoMetropolisUntil) { | ||||||
|       	std::cout << GridLogHMC << "-- Thermalization" << std::endl; |       	std::cout << GridLogHMC << "-- Thermalization" << std::endl; | ||||||
|       } |       } | ||||||
|   | |||||||
| @@ -9,6 +9,7 @@ Copyright (C) 2015 | |||||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: Guido Cossu <cossu@post.kek.jp> | Author: Guido Cossu <cossu@post.kek.jp> | ||||||
|  | Author: Chulwoo Jung <chulwoo@bnl.gov> | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -33,6 +34,7 @@ directory | |||||||
| #define INTEGRATOR_INCLUDED | #define INTEGRATOR_INCLUDED | ||||||
|  |  | ||||||
| #include <memory> | #include <memory> | ||||||
|  | #include <Grid/parallelIO/NerscIO.h> | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| @@ -41,10 +43,19 @@ public: | |||||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(IntegratorParameters, |   GRID_SERIALIZABLE_CLASS_MEMBERS(IntegratorParameters, | ||||||
| 				  std::string, name,      // name of the integrator | 				  std::string, name,      // name of the integrator | ||||||
| 				  unsigned int, MDsteps,  // number of outer steps | 				  unsigned int, MDsteps,  // number of outer steps | ||||||
|  | 				  RealD, RMHMCTol, | ||||||
|  |                                   RealD, RMHMCCGTol, | ||||||
|  |                                   RealD, lambda0, | ||||||
|  |                                   RealD, lambda1, | ||||||
|  |                                   RealD, lambda2, | ||||||
| 				  RealD, trajL)           // trajectory length | 				  RealD, trajL)           // trajectory length | ||||||
|  |  | ||||||
|   IntegratorParameters(int MDsteps_ = 10, RealD trajL_ = 1.0) |   IntegratorParameters(int MDsteps_ = 10, RealD trajL_ = 1.0) | ||||||
|   : MDsteps(MDsteps_), |   : MDsteps(MDsteps_), | ||||||
|  |    lambda0(0.1931833275037836), | ||||||
|  |    lambda1(0.1931833275037836), | ||||||
|  |    lambda2(0.1931833275037836), | ||||||
|  |    RMHMCTol(1e-8),RMHMCCGTol(1e-8), | ||||||
|     trajL(trajL_) {}; |     trajL(trajL_) {}; | ||||||
|  |  | ||||||
|   template <class ReaderClass, typename std::enable_if<isReader<ReaderClass>::value, int >::type = 0 > |   template <class ReaderClass, typename std::enable_if<isReader<ReaderClass>::value, int >::type = 0 > | ||||||
| @@ -75,11 +86,14 @@ public: | |||||||
|   double t_U;  // Track time passing on each level and for U and for P |   double t_U;  // Track time passing on each level and for U and for P | ||||||
|   std::vector<double> t_P;   |   std::vector<double> t_P;   | ||||||
|  |  | ||||||
|   MomentaField P; | //  MomentaField P; | ||||||
|  |   GeneralisedMomenta<FieldImplementation > P; | ||||||
|   SmearingPolicy& Smearer; |   SmearingPolicy& Smearer; | ||||||
|   RepresentationPolicy Representations; |   RepresentationPolicy Representations; | ||||||
|   IntegratorParameters Params; |   IntegratorParameters Params; | ||||||
|  |  | ||||||
|  |   RealD Saux,Smom,Sg; | ||||||
|  |  | ||||||
|   //Filters allow the user to manipulate the conjugate momentum, for example to freeze links in DDHMC |   //Filters allow the user to manipulate the conjugate momentum, for example to freeze links in DDHMC | ||||||
|   //It is applied whenever the momentum is updated / refreshed |   //It is applied whenever the momentum is updated / refreshed | ||||||
|   //The default filter does nothing |   //The default filter does nothing | ||||||
| @@ -96,7 +110,16 @@ public: | |||||||
|   void update_P(Field& U, int level, double ep)  |   void update_P(Field& U, int level, double ep)  | ||||||
|   { |   { | ||||||
|     t_P[level] += ep; |     t_P[level] += ep; | ||||||
|     update_P(P, U, level, ep); |     update_P(P.Mom, U, level, ep); | ||||||
|  |  | ||||||
|  |     std::cout << GridLogIntegrator << "[" << level << "] P " << " dt " << ep << " : t_P " << t_P[level] << std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void update_P2(Field& U, int level, double ep)  | ||||||
|  |   { | ||||||
|  |     t_P[level] += ep; | ||||||
|  |     update_P2(P.Mom, U, level, ep); | ||||||
|  |  | ||||||
|     std::cout << GridLogIntegrator << "[" << level << "] P " << " dt " << ep << " : t_P " << t_P[level] << std::endl; |     std::cout << GridLogIntegrator << "[" << level << "] P " << " dt " << ep << " : t_P " << t_P[level] << std::endl; | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -119,62 +142,174 @@ public: | |||||||
|     } |     } | ||||||
|   } update_P_hireps{}; |   } update_P_hireps{}; | ||||||
|  |  | ||||||
|   |  | ||||||
|   void update_P(MomentaField& Mom, Field& U, int level, double ep) { |   void update_P(MomentaField& Mom, Field& U, int level, double ep) { | ||||||
|     // input U actually not used in the fundamental case |     // input U actually not used in the fundamental case | ||||||
|     // Fundamental updates, include smearing |     // Fundamental updates, include smearing | ||||||
|  |  | ||||||
|     for (int a = 0; a < as[level].actions.size(); ++a) { |     for (int a = 0; a < as[level].actions.size(); ++a) { | ||||||
|  |  | ||||||
|       double start_full = usecond(); |       double start_full = usecond(); | ||||||
|       Field force(U.Grid()); |       Field force(U.Grid()); | ||||||
|       conformable(U.Grid(), Mom.Grid()); |       conformable(U.Grid(), Mom.Grid()); | ||||||
|  |  | ||||||
|  |       Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared); | ||||||
|       double start_force = usecond(); |       double start_force = usecond(); | ||||||
|  |       as[level].actions.at(a)->deriv(Us, force);  // deriv should NOT include Ta | ||||||
|  |  | ||||||
|       as[level].actions.at(a)->deriv_timer_start(); |       std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl; | ||||||
|       as[level].actions.at(a)->deriv(Smearer, force);  // deriv should NOT include Ta |       if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force); | ||||||
|       as[level].actions.at(a)->deriv_timer_stop(); |  | ||||||
|  |  | ||||||
|       auto name = as[level].actions.at(a)->action_name(); |  | ||||||
|  |  | ||||||
|       force = FieldImplementation::projectForce(force); // Ta for gauge fields |       force = FieldImplementation::projectForce(force); // Ta for gauge fields | ||||||
|       double end_force = usecond(); |       double end_force = usecond(); | ||||||
|        |       Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); | ||||||
|       MomFilter->applyFilter(force); |       std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << std::endl; | ||||||
|  |  | ||||||
|       std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<<" dt "<<ep<<  std::endl; |  | ||||||
|        |  | ||||||
|       Real force_abs   = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm.  nb. norm2(latt) = \sum_x norm2(latt[x])  |  | ||||||
|       Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;     |  | ||||||
|  |  | ||||||
|       Real force_max   = std::sqrt(maxLocalNorm2(force)); |  | ||||||
|       Real impulse_max = force_max * ep * HMC_MOMENTUM_DENOMINATOR;     |  | ||||||
|  |  | ||||||
|       as[level].actions.at(a)->deriv_log(force_abs,force_max,impulse_abs,impulse_max); |  | ||||||
|        |  | ||||||
|       std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] dt           : " << ep <<" "<<name<<std::endl; |  | ||||||
|       std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force average: " << force_abs <<" "<<name<<std::endl; |  | ||||||
|       std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force max    : " << force_max <<" "<<name<<std::endl; |  | ||||||
|       std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Fdt average  : " << impulse_abs <<" "<<name<<std::endl; |  | ||||||
|       std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Fdt max      : " << impulse_max <<" "<<name<<std::endl; |  | ||||||
|  |  | ||||||
|       Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;;  |       Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;;  | ||||||
|       double end_full = usecond(); |       double end_full = usecond(); | ||||||
|       double time_full  = (end_full - start_full) / 1e3; |       double time_full  = (end_full - start_full) / 1e3; | ||||||
|       double time_force = (end_force - start_force) / 1e3; |       double time_force = (end_force - start_force) / 1e3; | ||||||
|       std::cout << GridLogMessage << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)"  << std::endl; |       std::cout << GridLogMessage << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)"  << std::endl; | ||||||
|  |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // Force from the other representations |     // Force from the other representations | ||||||
|     as[level].apply(update_P_hireps, Representations, Mom, U, ep); |     as[level].apply(update_P_hireps, Representations, Mom, U, ep); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void update_P2(MomentaField& Mom, Field& U, int level, double ep) { | ||||||
|  |     // input U actually not used in the fundamental case | ||||||
|  |     // Fundamental updates, include smearing | ||||||
|  |  | ||||||
|  |     std::cout << GridLogIntegrator << "U before update_P2: " << std::sqrt(norm2(U)) << std::endl; | ||||||
|  |     // Generalised momenta   | ||||||
|  |     // Derivative of the kinetic term must be computed before | ||||||
|  |     // Mom is the momenta and gets updated by the  | ||||||
|  |     // actions derivatives | ||||||
|  |     MomentaField MomDer(P.Mom.Grid()); | ||||||
|  |     P.M.ImportGauge(U); | ||||||
|  |     P.DerivativeU(P.Mom, MomDer); | ||||||
|  |     std::cout << GridLogIntegrator << "MomDer update_P2: " << std::sqrt(norm2(MomDer)) << std::endl; | ||||||
|  | //    Mom -= MomDer * ep; | ||||||
|  |     Mom -= MomDer * ep * HMC_MOMENTUM_DENOMINATOR; | ||||||
|  |     std::cout << GridLogIntegrator << "Mom update_P2: " << std::sqrt(norm2(Mom)) << std::endl; | ||||||
|  |  | ||||||
|  |     // Auxiliary fields | ||||||
|  |     P.update_auxiliary_momenta(ep*0.5 ); | ||||||
|  |     P.AuxiliaryFieldsDerivative(MomDer); | ||||||
|  |     std::cout << GridLogIntegrator << "MomDer(Aux) update_P2: " << std::sqrt(norm2(Mom)) << std::endl; | ||||||
|  | //    Mom -= MomDer * ep; | ||||||
|  |     Mom -= MomDer * ep * HMC_MOMENTUM_DENOMINATOR; | ||||||
|  |     P.update_auxiliary_momenta(ep*0.5 ); | ||||||
|  |  | ||||||
|  |     for (int a = 0; a < as[level].actions.size(); ++a) { | ||||||
|  |       double start_full = usecond(); | ||||||
|  |       Field force(U.Grid()); | ||||||
|  |       conformable(U.Grid(), Mom.Grid()); | ||||||
|  |  | ||||||
|  |       Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared); | ||||||
|  |       double start_force = usecond(); | ||||||
|  |       as[level].actions.at(a)->deriv(Us, force);  // deriv should NOT include Ta | ||||||
|  |  | ||||||
|  |       std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl; | ||||||
|  |       if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force); | ||||||
|  |       force = FieldImplementation::projectForce(force); // Ta for gauge fields | ||||||
|  |       double end_force = usecond(); | ||||||
|  |       Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); | ||||||
|  |       std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << std::endl; | ||||||
|  |       Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;;  | ||||||
|  |       double end_full = usecond(); | ||||||
|  |       double time_full  = (end_full - start_full) / 1e3; | ||||||
|  |       double time_force = (end_force - start_force) / 1e3; | ||||||
|  |       std::cout << GridLogMessage << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)"  << std::endl; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Force from the other representations | ||||||
|  |     as[level].apply(update_P_hireps, Representations, Mom, U, ep); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void implicit_update_P(Field& U, int level, double ep, double ep1, bool intermediate = false) { | ||||||
|  |     t_P[level] += ep; | ||||||
|  |  | ||||||
|  |     double ep2= ep-ep1; | ||||||
|  |  | ||||||
|  |     std::cout << GridLogIntegrator << "[" << level << "] P " | ||||||
|  |               << " dt " << ep << " : t_P " << t_P[level] << std::endl; | ||||||
|  |     std::cout << GridLogIntegrator << "U before implicit_update_P: " << std::sqrt(norm2(U)) << std::endl; | ||||||
|  |     // Fundamental updates, include smearing | ||||||
|  |     MomentaField Msum(P.Mom.Grid()); | ||||||
|  |     Msum = Zero(); | ||||||
|  |     for (int a = 0; a < as[level].actions.size(); ++a) { | ||||||
|  |       // Compute the force terms for the lagrangian part | ||||||
|  |       // We need to compute the derivative of the actions | ||||||
|  |       // only once | ||||||
|  |       Field force(U.Grid()); | ||||||
|  |       conformable(U.Grid(), P.Mom.Grid()); | ||||||
|  |       Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared); | ||||||
|  |       as[level].actions.at(a)->deriv(Us, force);  // deriv should NOT include Ta | ||||||
|  |  | ||||||
|  |       std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl; | ||||||
|  |       if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force); | ||||||
|  |       force = FieldImplementation::projectForce(force);  // Ta for gauge fields | ||||||
|  |       Real force_abs = std::sqrt(norm2(force) / U.Grid()->gSites()); | ||||||
|  |       std::cout << GridLogIntegrator << "|Force| site average: " << force_abs | ||||||
|  |                 << std::endl; | ||||||
|  |       Msum += force; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     MomentaField NewMom = P.Mom; | ||||||
|  |     MomentaField OldMom = P.Mom; | ||||||
|  |     double threshold = Params.RMHMCTol; | ||||||
|  |     P.M.ImportGauge(U); | ||||||
|  |     MomentaField MomDer(P.Mom.Grid()); | ||||||
|  |     MomentaField MomDer1(P.Mom.Grid()); | ||||||
|  |     MomentaField AuxDer(P.Mom.Grid()); | ||||||
|  |     MomDer1 = Zero(); | ||||||
|  |     MomentaField diff(P.Mom.Grid()); | ||||||
|  |     double factor = 2.0; | ||||||
|  |     if (intermediate){ | ||||||
|  |       P.DerivativeU(P.Mom, MomDer1); | ||||||
|  |       factor = 1.0; | ||||||
|  |     } | ||||||
|  | //    std::cout << GridLogIntegrator << "MomDer1 implicit_update_P: " << std::sqrt(norm2(MomDer1)) << std::endl; | ||||||
|  |  | ||||||
|  |     // Auxiliary fields | ||||||
|  |     P.update_auxiliary_momenta(ep1); | ||||||
|  |     P.AuxiliaryFieldsDerivative(AuxDer); | ||||||
|  |     Msum += AuxDer; | ||||||
|  |      | ||||||
|  |  | ||||||
|  |     // Here run recursively | ||||||
|  |     int counter = 1; | ||||||
|  |     RealD RelativeError; | ||||||
|  |     do { | ||||||
|  |       std::cout << GridLogIntegrator << "UpdateP implicit step "<< counter << std::endl; | ||||||
|  |  | ||||||
|  |       // Compute the derivative of the kinetic term | ||||||
|  |       // with respect to the gauge field | ||||||
|  |       P.DerivativeU(NewMom, MomDer); | ||||||
|  |       Real force_abs = std::sqrt(norm2(MomDer) / U.Grid()->gSites()); | ||||||
|  |       std::cout << GridLogIntegrator << "|Force| laplacian site average: " << force_abs | ||||||
|  |                 << std::endl; | ||||||
|  |  | ||||||
|  | //      NewMom = P.Mom - ep* 0.5 * HMC_MOMENTUM_DENOMINATOR * (2.0*Msum + factor*MomDer + MomDer1);// simplify | ||||||
|  |       NewMom = P.Mom -  HMC_MOMENTUM_DENOMINATOR * (ep*Msum + ep1* factor*MomDer + ep2* MomDer1);// simplify | ||||||
|  |       diff = NewMom - OldMom; | ||||||
|  |       counter++; | ||||||
|  |       RelativeError = std::sqrt(norm2(diff))/std::sqrt(norm2(NewMom)); | ||||||
|  |       std::cout << GridLogIntegrator << "UpdateP RelativeError: " << RelativeError << std::endl; | ||||||
|  |       OldMom = NewMom; | ||||||
|  |     } while (RelativeError > threshold); | ||||||
|  |  | ||||||
|  |     P.Mom = NewMom; | ||||||
|  |     std::cout << GridLogIntegrator << "NewMom implicit_update_P: " << std::sqrt(norm2(NewMom)) << std::endl; | ||||||
|  |  | ||||||
|  |     // update the auxiliary fields momenta     | ||||||
|  |     P.update_auxiliary_momenta(ep2); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void implicit_update_P(Field& U, int level, double ep, bool intermediate = false) { | ||||||
|  |       implicit_update_P( U, level, ep, ep*0.5, intermediate );  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void update_U(Field& U, double ep)  |   void update_U(Field& U, double ep)  | ||||||
|   { |   { | ||||||
|     update_U(P, U, ep); |     update_U(P.Mom, U, ep); | ||||||
|  |  | ||||||
|     t_U += ep; |     t_U += ep; | ||||||
|     int fl = levels - 1; |     int fl = levels - 1; | ||||||
| @@ -183,12 +318,8 @@ public: | |||||||
|    |    | ||||||
|   void update_U(MomentaField& Mom, Field& U, double ep)  |   void update_U(MomentaField& Mom, Field& U, double ep)  | ||||||
|   { |   { | ||||||
|     MomentaField MomFiltered(Mom.Grid()); |  | ||||||
|     MomFiltered = Mom; |  | ||||||
|     MomFilter->applyFilter(MomFiltered); |  | ||||||
|  |  | ||||||
|     // exponential of Mom*U in the gauge fields case |     // exponential of Mom*U in the gauge fields case | ||||||
|     FieldImplementation::update_field(MomFiltered, U, ep); |     FieldImplementation::update_field(Mom, U, ep); | ||||||
|  |  | ||||||
|     // Update the smeared fields, can be implemented as observer |     // Update the smeared fields, can be implemented as observer | ||||||
|     Smearer.set_Field(U); |     Smearer.set_Field(U); | ||||||
| @@ -197,18 +328,74 @@ public: | |||||||
|     Representations.update(U);  // void functions if fundamental representation |     Representations.update(U);  // void functions if fundamental representation | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   void implicit_update_U(Field&U, double ep, double ep1 ){ | ||||||
|  |     double ep2=ep-ep1; | ||||||
|  |     t_U += ep; | ||||||
|  |     int fl = levels - 1; | ||||||
|  |     std::cout << GridLogIntegrator << "   " << "[" << fl << "] U " << " dt " << ep << " : t_U " << t_U << std::endl; | ||||||
|  |     std::cout << GridLogIntegrator << "U before implicit_update_U: " << std::sqrt(norm2(U)) << std::endl; | ||||||
|  |  | ||||||
|  |     MomentaField Mom1(P.Mom.Grid()); | ||||||
|  |     MomentaField Mom2(P.Mom.Grid()); | ||||||
|  |     RealD RelativeError; | ||||||
|  |     Field diff(U.Grid()); | ||||||
|  |     Real threshold =  Params.RMHMCTol; | ||||||
|  |     int counter = 1; | ||||||
|  |     int MaxCounter = 100; | ||||||
|  |  | ||||||
|  |     Field OldU = U; | ||||||
|  |     Field NewU = U; | ||||||
|  |  | ||||||
|  |     P.M.ImportGauge(U); | ||||||
|  |     P.DerivativeP(Mom1); // first term in the derivative  | ||||||
|  |     std::cout << GridLogIntegrator << "implicit_update_U: Mom1: " << std::sqrt(norm2(Mom1)) << std::endl; | ||||||
|  |  | ||||||
|  |     P.update_auxiliary_fields(ep1); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     MomentaField sum=Mom1; | ||||||
|  |     do { | ||||||
|  |       std::cout << GridLogIntegrator << "UpdateU implicit step "<< counter << std::endl; | ||||||
|  |        | ||||||
|  |       P.DerivativeP(Mom2); // second term in the derivative, on the updated U | ||||||
|  |       std::cout << GridLogIntegrator << "implicit_update_U: Mom1: " << std::sqrt(norm2(Mom1)) << std::endl; | ||||||
|  |       sum = (Mom1*ep1 + Mom2*ep2); | ||||||
|  |  | ||||||
|  |       for (int mu = 0; mu < Nd; mu++) { | ||||||
|  |         auto Umu = PeekIndex<LorentzIndex>(U, mu); | ||||||
|  |         auto Pmu = PeekIndex<LorentzIndex>(sum, mu); | ||||||
|  |         Umu = expMat(Pmu, 1, 12) * Umu; | ||||||
|  |         PokeIndex<LorentzIndex>(NewU, ProjectOnGroup(Umu), mu); | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       diff = NewU - OldU; | ||||||
|  |       RelativeError = std::sqrt(norm2(diff))/std::sqrt(norm2(NewU)); | ||||||
|  |       std::cout << GridLogIntegrator << "UpdateU RelativeError: " << RelativeError << std::endl; | ||||||
|  |        | ||||||
|  |       P.M.ImportGauge(NewU); | ||||||
|  |       OldU = NewU; // some redundancy to be eliminated | ||||||
|  |       counter++; | ||||||
|  |     } while (RelativeError > threshold && counter < MaxCounter); | ||||||
|  |  | ||||||
|  |     U = NewU; | ||||||
|  |     std::cout << GridLogIntegrator << "NewU implicit_update_U: " << std::sqrt(norm2(U)) << std::endl; | ||||||
|  |     P.update_auxiliary_fields(ep2); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   virtual void step(Field& U, int level, int first, int last) = 0; |   virtual void step(Field& U, int level, int first, int last) = 0; | ||||||
|  |  | ||||||
| public: | public: | ||||||
|   Integrator(GridBase* grid, IntegratorParameters Par, |   Integrator(GridBase* grid, IntegratorParameters Par, | ||||||
|              ActionSet<Field, RepresentationPolicy>& Aset, |              ActionSet<Field, RepresentationPolicy>& Aset, | ||||||
|              SmearingPolicy& Sm) |              SmearingPolicy& Sm, Metric<MomentaField>& M) | ||||||
|     : Params(Par), |     : Params(Par), | ||||||
|       as(Aset), |       as(Aset), | ||||||
|       P(grid), |       P(grid, M), | ||||||
|       levels(Aset.size()), |       levels(Aset.size()), | ||||||
|       Smearer(Sm), |       Smearer(Sm), | ||||||
|       Representations(grid)  |       Representations(grid), | ||||||
|  |       Saux(0.),Smom(0.),Sg(0.) | ||||||
|   { |   { | ||||||
|     t_P.resize(levels, 0.0); |     t_P.resize(levels, 0.0); | ||||||
|     t_U = 0.0; |     t_U = 0.0; | ||||||
| @@ -324,7 +511,8 @@ public: | |||||||
|  |  | ||||||
|   void reverse_momenta() |   void reverse_momenta() | ||||||
|   { |   { | ||||||
|     P *= -1.0; |     P.Mom *= -1.0; | ||||||
|  |     P.AuxMom *= -1.0; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // to be used by the actionlevel class to iterate |   // to be used by the actionlevel class to iterate | ||||||
| @@ -343,11 +531,14 @@ public: | |||||||
|   // Initialization of momenta and actions |   // Initialization of momenta and actions | ||||||
|   void refresh(Field& U,  GridSerialRNG & sRNG, GridParallelRNG& pRNG)  |   void refresh(Field& U,  GridSerialRNG & sRNG, GridParallelRNG& pRNG)  | ||||||
|   { |   { | ||||||
|     assert(P.Grid() == U.Grid()); |     assert(P.Mom.Grid() == U.Grid()); | ||||||
|     std::cout << GridLogIntegrator << "Integrator refresh" << std::endl; |     std::cout << GridLogIntegrator << "Integrator refresh" << std::endl; | ||||||
|  |  | ||||||
|     std::cout << GridLogIntegrator << "Generating momentum" << std::endl; |     std::cout << GridLogIntegrator << "Generating momentum" << std::endl; | ||||||
|     FieldImplementation::generate_momenta(P, sRNG, pRNG); | //    FieldImplementation::generate_momenta(P.Mom, sRNG, pRNG); | ||||||
|  |     P.M.ImportGauge(U); | ||||||
|  |     P.MomentaDistribution(sRNG,pRNG); | ||||||
|  |  | ||||||
|  |  | ||||||
|     // Update the smeared fields, can be implemented as observer |     // Update the smeared fields, can be implemented as observer | ||||||
|     // necessary to keep the fields updated even after a reject |     // necessary to keep the fields updated even after a reject | ||||||
| @@ -402,9 +593,22 @@ public: | |||||||
|  |  | ||||||
|     std::cout << GridLogIntegrator << "Integrator action\n"; |     std::cout << GridLogIntegrator << "Integrator action\n"; | ||||||
|  |  | ||||||
|     RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom | //    RealD H = - FieldImplementation::FieldSquareNorm(P.Mom)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom | ||||||
|  | //    RealD Hterm; | ||||||
|  |  | ||||||
|  | //    static RealD Saux=0.,Smom=0.,Sg=0.; | ||||||
|  |  | ||||||
|  |     RealD H = - FieldImplementation::FieldSquareNorm(P.Mom)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom | ||||||
|  |     std::cout << GridLogMessage << "S:FieldSquareNorm H_p = " << H << "\n"; | ||||||
|  |     std::cout << GridLogMessage << "S:dSField = " << H-Smom << "\n"; | ||||||
|  |     Smom=H; | ||||||
|  |     P.M.ImportGauge(U); | ||||||
|  |     RealD Hterm = - P.MomentaAction(); | ||||||
|  |     std::cout << GridLogMessage << "S:Momentum action H_p = " << Hterm << "\n"; | ||||||
|  |     std::cout << GridLogMessage << "S:dSMom = " << Hterm-Saux << "\n"; | ||||||
|  |     Saux=Hterm; | ||||||
|  |     H = Hterm; | ||||||
|  |  | ||||||
|     RealD Hterm; |  | ||||||
|  |  | ||||||
|     // Actions |     // Actions | ||||||
|     for (int level = 0; level < as.size(); ++level) { |     for (int level = 0; level < as.size(); ++level) { | ||||||
| @@ -446,9 +650,18 @@ public: | |||||||
|  |  | ||||||
|     std::cout << GridLogIntegrator << "Integrator initial action\n"; |     std::cout << GridLogIntegrator << "Integrator initial action\n"; | ||||||
|  |  | ||||||
|     RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom | //    RealD H = - FieldImplementation::FieldSquareNorm(P.Mom)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom | ||||||
|  | //    RealD Hterm; | ||||||
|     RealD Hterm; |     RealD H = - FieldImplementation::FieldSquareNorm(P.Mom)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom | ||||||
|  |     std::cout << GridLogMessage << "S:FieldSquareNorm H_p = " << H << "\n"; | ||||||
|  |     std::cout << GridLogMessage << "S:dSField = " << H-Smom << "\n"; | ||||||
|  |     Smom=H; | ||||||
|  |     P.M.ImportGauge(U); | ||||||
|  |     RealD Hterm = - P.MomentaAction(); | ||||||
|  |     std::cout << GridLogMessage << "S:Momentum action H_p = " << Hterm << "\n"; | ||||||
|  |     std::cout << GridLogMessage << "S:dSMom = " << Hterm-Saux << "\n"; | ||||||
|  |     Saux=Hterm; | ||||||
|  |     H = Hterm; | ||||||
|  |  | ||||||
|     // Actions |     // Actions | ||||||
|     for (int level = 0; level < as.size(); ++level) { |     for (int level = 0; level < as.size(); ++level) { | ||||||
| @@ -471,7 +684,7 @@ public: | |||||||
|   } |   } | ||||||
|  |  | ||||||
|    |    | ||||||
|   void integrate(Field& U)  |   void integrate(Field& U, int traj=-1 )  | ||||||
|   { |   { | ||||||
|     // reset the clocks |     // reset the clocks | ||||||
|     t_U = 0; |     t_U = 0; | ||||||
| @@ -483,6 +696,12 @@ public: | |||||||
|       int first_step = (stp == 0); |       int first_step = (stp == 0); | ||||||
|       int last_step = (stp == Params.MDsteps - 1); |       int last_step = (stp == Params.MDsteps - 1); | ||||||
|       this->step(U, 0, first_step, last_step); |       this->step(U, 0, first_step, last_step); | ||||||
|  |       if (traj>=0){ | ||||||
|  |         std::string file("./config."+std::to_string(traj)+"_"+std::to_string(stp+1) ); | ||||||
|  |         int precision32 = 0; | ||||||
|  |         int tworow      = 0; | ||||||
|  |         NerscIO::writeConfiguration(U,file,tworow,precision32); | ||||||
|  |       } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // Check the clocks all match on all levels |     // Check the clocks all match on all levels | ||||||
| @@ -492,7 +711,6 @@ public: | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     FieldImplementation::Project(U); |     FieldImplementation::Project(U); | ||||||
|  |  | ||||||
|     // and that we indeed got to the end of the trajectory |     // and that we indeed got to the end of the trajectory | ||||||
|     assert(fabs(t_U - Params.trajL) < 1.0e-6); |     assert(fabs(t_U - Params.trajL) < 1.0e-6); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -102,8 +102,8 @@ public: | |||||||
|  |  | ||||||
|   std::string integrator_name(){return "LeapFrog";} |   std::string integrator_name(){return "LeapFrog";} | ||||||
|  |  | ||||||
|   LeapFrog(GridBase* grid, IntegratorParameters Par, ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm) |   LeapFrog(GridBase* grid, IntegratorParameters Par, ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm, Metric<Field>& M) | ||||||
|     : Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>(grid, Par, Aset, Sm){}; |     : Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>(grid, Par, Aset, Sm,M){}; | ||||||
|  |  | ||||||
|   void step(Field& U, int level, int _first, int _last) { |   void step(Field& U, int level, int _first, int _last) { | ||||||
|     int fl = this->as.size() - 1; |     int fl = this->as.size() - 1; | ||||||
| @@ -140,14 +140,14 @@ template <class FieldImplementation_, class SmearingPolicy, class Representation | |||||||
| class MinimumNorm2 : public Integrator<FieldImplementation_, SmearingPolicy, RepresentationPolicy>  | class MinimumNorm2 : public Integrator<FieldImplementation_, SmearingPolicy, RepresentationPolicy>  | ||||||
| { | { | ||||||
| private: | private: | ||||||
|   const RealD lambda = 0.1931833275037836; | //  const RealD lambda = 0.1931833275037836; | ||||||
|  |  | ||||||
| public: | public: | ||||||
|   typedef FieldImplementation_ FieldImplementation; |   typedef FieldImplementation_ FieldImplementation; | ||||||
|   INHERIT_FIELD_TYPES(FieldImplementation); |   INHERIT_FIELD_TYPES(FieldImplementation); | ||||||
|  |  | ||||||
|   MinimumNorm2(GridBase* grid, IntegratorParameters Par, ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm) |   MinimumNorm2(GridBase* grid, IntegratorParameters Par, ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm, Metric<Field>& M) | ||||||
|     : Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>(grid, Par, Aset, Sm){}; |     : Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>(grid, Par, Aset, Sm,M){}; | ||||||
|  |  | ||||||
|   std::string integrator_name(){return "MininumNorm2";} |   std::string integrator_name(){return "MininumNorm2";} | ||||||
|  |  | ||||||
| @@ -155,6 +155,11 @@ public: | |||||||
|     // level  : current level |     // level  : current level | ||||||
|     // fl     : final level |     // fl     : final level | ||||||
|     // eps    : current step size |     // eps    : current step size | ||||||
|  |     assert(level<3); | ||||||
|  |     RealD lambda= this->Params.lambda0; | ||||||
|  |     if (level>0) lambda= this->Params.lambda1; | ||||||
|  |     if (level>1) lambda= this->Params.lambda2; | ||||||
|  |     std::cout << GridLogMessage << "level: "<<level<< "lambda: "<<lambda<<std::endl; | ||||||
|  |  | ||||||
|     int fl = this->as.size() - 1; |     int fl = this->as.size() - 1; | ||||||
|  |  | ||||||
| @@ -210,9 +215,9 @@ public: | |||||||
|   // Looks like dH scales as dt^4. tested wilson/wilson 2 level. |   // Looks like dH scales as dt^4. tested wilson/wilson 2 level. | ||||||
|   ForceGradient(GridBase* grid, IntegratorParameters Par, |   ForceGradient(GridBase* grid, IntegratorParameters Par, | ||||||
|                 ActionSet<Field, RepresentationPolicy>& Aset, |                 ActionSet<Field, RepresentationPolicy>& Aset, | ||||||
|                 SmearingPolicy& Sm) |                 SmearingPolicy& Sm, Metric<Field>& M) | ||||||
|     : Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>( |     : Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>( | ||||||
| 									    grid, Par, Aset, Sm){}; | 									    grid, Par, Aset, Sm,M){}; | ||||||
|  |  | ||||||
|   std::string integrator_name(){return "ForceGradient";} |   std::string integrator_name(){return "ForceGradient";} | ||||||
|    |    | ||||||
| @@ -275,6 +280,255 @@ public: | |||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | //////////////////////////////// | ||||||
|  | // Riemannian Manifold HMC | ||||||
|  | // Girolami et al | ||||||
|  | //////////////////////////////// | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | // correct | ||||||
|  | template <class FieldImplementation, class SmearingPolicy, | ||||||
|  |           class RepresentationPolicy = | ||||||
|  |               Representations<FundamentalRepresentation> > | ||||||
|  | class ImplicitLeapFrog : public Integrator<FieldImplementation, SmearingPolicy, | ||||||
|  |                                            RepresentationPolicy> { | ||||||
|  |  public: | ||||||
|  |   typedef ImplicitLeapFrog<FieldImplementation, SmearingPolicy, RepresentationPolicy> | ||||||
|  |       Algorithm; | ||||||
|  |   INHERIT_FIELD_TYPES(FieldImplementation); | ||||||
|  |  | ||||||
|  |   // Riemannian manifold metric operator | ||||||
|  |   // Hermitian operator Fisher | ||||||
|  |  | ||||||
|  |   std::string integrator_name(){return "ImplicitLeapFrog";} | ||||||
|  |  | ||||||
|  |   ImplicitLeapFrog(GridBase* grid, IntegratorParameters Par, | ||||||
|  |            ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm, Metric<Field>& M) | ||||||
|  |       : Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>( | ||||||
|  |             grid, Par, Aset, Sm, M){}; | ||||||
|  |  | ||||||
|  |   void step(Field& U, int level, int _first, int _last) { | ||||||
|  |     int fl = this->as.size() - 1; | ||||||
|  |     // level  : current level | ||||||
|  |     // fl     : final level | ||||||
|  |     // eps    : current step size | ||||||
|  |  | ||||||
|  |     // Get current level step size | ||||||
|  |     RealD eps = this->Params.trajL/this->Params.MDsteps; | ||||||
|  |     for (int l = 0; l <= level; ++l) eps /= this->as[l].multiplier; | ||||||
|  |  | ||||||
|  |     int multiplier = this->as[level].multiplier; | ||||||
|  |     for (int e = 0; e < multiplier; ++e) { | ||||||
|  |       int first_step = _first && (e == 0); | ||||||
|  |       int last_step = _last && (e == multiplier - 1); | ||||||
|  |  | ||||||
|  |       if (first_step) {  // initial half step | ||||||
|  |        this->implicit_update_P(U, level, eps / 2.0); | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       if (level == fl) {  // lowest level | ||||||
|  |         this->implicit_update_U(U, eps,eps/2.); | ||||||
|  |       } else {  // recursive function call | ||||||
|  |         this->step(U, level + 1, first_step, last_step); | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       //int mm = last_step ? 1 : 2; | ||||||
|  |       if (last_step){ | ||||||
|  |         this->update_P2(U, level, eps / 2.0); | ||||||
|  |       } else { | ||||||
|  |       this->implicit_update_P(U, level, eps, true);// works intermediate step | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template <class FieldImplementation, class SmearingPolicy, | ||||||
|  |           class RepresentationPolicy = | ||||||
|  |               Representations<FundamentalRepresentation> > | ||||||
|  | class ImplicitMinimumNorm2 : public Integrator<FieldImplementation, SmearingPolicy, | ||||||
|  |                                        RepresentationPolicy> { | ||||||
|  |  private: | ||||||
|  | //  const RealD lambda = 0.1931833275037836; | ||||||
|  |  | ||||||
|  |  public: | ||||||
|  |   INHERIT_FIELD_TYPES(FieldImplementation); | ||||||
|  |  | ||||||
|  |   ImplicitMinimumNorm2(GridBase* grid, IntegratorParameters Par, | ||||||
|  |                ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm, Metric<Field>& M) | ||||||
|  |       : Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>( | ||||||
|  |             grid, Par, Aset, Sm, M){}; | ||||||
|  |  | ||||||
|  |   std::string integrator_name(){return "ImplicitMininumNorm2";} | ||||||
|  |  | ||||||
|  |   void step(Field& U, int level, int _first, int _last) { | ||||||
|  |     // level  : current level | ||||||
|  |     // fl     : final level | ||||||
|  |     // eps    : current step size | ||||||
|  |  | ||||||
|  |     int fl = this->as.size() - 1; | ||||||
|  | //    assert(Params.lambda.size()>level); | ||||||
|  | //    RealD lambda= Params.lambda[level]; | ||||||
|  |     assert(level<3); | ||||||
|  |     RealD lambda= this->Params.lambda0; | ||||||
|  |     if (level>0) lambda= this->Params.lambda1; | ||||||
|  |     if (level>1) lambda= this->Params.lambda2; | ||||||
|  |     std::cout << GridLogMessage << "level: "<<level<< "lambda: "<<lambda<<std::endl; | ||||||
|  |  | ||||||
|  |   if(level<fl){ | ||||||
|  |  | ||||||
|  |     RealD eps = this->Params.trajL/this->Params.MDsteps * 2.0; | ||||||
|  |     for (int l = 0; l <= level; ++l) eps /= 2.0 * this->as[l].multiplier; | ||||||
|  |  | ||||||
|  |     // Nesting:  2xupdate_U of size eps/2 | ||||||
|  |     // Next level is eps/2/multiplier | ||||||
|  |  | ||||||
|  |     int multiplier = this->as[level].multiplier; | ||||||
|  |     for (int e = 0; e < multiplier; ++e) {  // steps per step | ||||||
|  |  | ||||||
|  |       int first_step = _first && (e == 0); | ||||||
|  |       int last_step = _last && (e == multiplier - 1); | ||||||
|  |  | ||||||
|  |       if (first_step) {  // initial half step | ||||||
|  |         this->update_P(U, level, lambda * eps); | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |         this->step(U, level + 1, first_step, 0); | ||||||
|  |  | ||||||
|  |       this->update_P(U, level, (1.0 - 2.0 * lambda) * eps); | ||||||
|  |  | ||||||
|  |         this->step(U, level + 1, 0, last_step); | ||||||
|  |  | ||||||
|  |       int mm = (last_step) ? 1 : 2; | ||||||
|  |       this->update_P(U, level, lambda * eps * mm); | ||||||
|  |     } | ||||||
|  |   }  | ||||||
|  |   else  | ||||||
|  |   { // last level | ||||||
|  |     RealD eps = this->Params.trajL/this->Params.MDsteps * 2.0; | ||||||
|  |     for (int l = 0; l <= level; ++l) eps /= 2.0 * this->as[l].multiplier; | ||||||
|  |  | ||||||
|  |     // Nesting:  2xupdate_U of size eps/2 | ||||||
|  |     // Next level is eps/2/multiplier | ||||||
|  |  | ||||||
|  |     int multiplier = this->as[level].multiplier; | ||||||
|  |     for (int e = 0; e < multiplier; ++e) {  // steps per step | ||||||
|  |  | ||||||
|  |       int first_step = _first && (e == 0); | ||||||
|  |       int last_step = _last && (e == multiplier - 1); | ||||||
|  |  | ||||||
|  |       if (first_step) {  // initial half step | ||||||
|  |         this->implicit_update_P(U, level, lambda * eps); | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       this->implicit_update_U(U, 0.5 * eps,lambda*eps); | ||||||
|  |  | ||||||
|  |       this->implicit_update_P(U, level, (1.0 - 2.0 * lambda) * eps, true); | ||||||
|  |  | ||||||
|  |       this->implicit_update_U(U, 0.5 * eps, (0.5-lambda)*eps); | ||||||
|  |  | ||||||
|  |       if (last_step) { | ||||||
|  |         this->update_P2(U, level, eps * lambda); | ||||||
|  |       } else { | ||||||
|  |         this->implicit_update_P(U, level, lambda * eps*2.0, true); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <class FieldImplementation, class SmearingPolicy, | ||||||
|  |           class RepresentationPolicy = | ||||||
|  |               Representations<FundamentalRepresentation> > | ||||||
|  | class ImplicitCampostrini : public Integrator<FieldImplementation, SmearingPolicy, | ||||||
|  |                                        RepresentationPolicy> { | ||||||
|  |  private: | ||||||
|  | //  const RealD lambda = 0.1931833275037836; | ||||||
|  |  | ||||||
|  |  public: | ||||||
|  |   INHERIT_FIELD_TYPES(FieldImplementation); | ||||||
|  |  | ||||||
|  |   ImplicitCampostrini(GridBase* grid, IntegratorParameters Par, | ||||||
|  |                ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm, Metric<Field>& M) | ||||||
|  |       : Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>( | ||||||
|  |             grid, Par, Aset, Sm, M){}; | ||||||
|  |  | ||||||
|  |   std::string integrator_name(){return "ImplicitCampostrini";} | ||||||
|  |  | ||||||
|  |   void step(Field& U, int level, int _first, int _last) { | ||||||
|  |     // level  : current level | ||||||
|  |     // fl     : final level | ||||||
|  |     // eps    : current step size | ||||||
|  |  | ||||||
|  |     int fl = this->as.size() - 1; | ||||||
|  | //    assert(Params.lambda.size()>level); | ||||||
|  | //    RealD lambda= Params.lambda[level]; | ||||||
|  |     assert(level<3); | ||||||
|  |     RealD lambda= this->Params.lambda0; | ||||||
|  |     if (level>0) lambda= this->Params.lambda1; | ||||||
|  |     if (level>1) lambda= this->Params.lambda2; | ||||||
|  |     std::cout << GridLogMessage << "level: "<<level<< "lambda: "<<lambda<<std::endl; | ||||||
|  |      | ||||||
|  |     RealD sigma=pow(2.0,1./3.); | ||||||
|  |  | ||||||
|  |   if(level<fl){ | ||||||
|  | //Still Omelyan. Needs to change step() to accept variable stepsize | ||||||
|  |     RealD eps = this->Params.trajL/this->Params.MDsteps * 2.0; | ||||||
|  |     for (int l = 0; l <= level; ++l) eps /= 2.0 * this->as[l].multiplier; | ||||||
|  |  | ||||||
|  |     // Nesting:  2xupdate_U of size eps/2 | ||||||
|  |     // Next level is eps/2/multiplier | ||||||
|  |  | ||||||
|  |     int multiplier = this->as[level].multiplier; | ||||||
|  |     for (int e = 0; e < multiplier; ++e) {  // steps per step | ||||||
|  |  | ||||||
|  |       int first_step = _first && (e == 0); | ||||||
|  |       int last_step = _last && (e == multiplier - 1); | ||||||
|  |  | ||||||
|  |       if (first_step) {  // initial half step | ||||||
|  |         this->update_P(U, level, lambda * eps); | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |         this->step(U, level + 1, first_step, 0); | ||||||
|  |  | ||||||
|  |       this->update_P(U, level, (1.0 - 2.0 * lambda) * eps); | ||||||
|  |  | ||||||
|  |         this->step(U, level + 1, 0, last_step); | ||||||
|  |  | ||||||
|  |       int mm = (last_step) ? 1 : 2; | ||||||
|  |       this->update_P(U, level, lambda * eps * mm); | ||||||
|  |     } | ||||||
|  |   }  | ||||||
|  |   else  | ||||||
|  |   { // last level | ||||||
|  |     RealD dt = this->Params.trajL/this->Params.MDsteps * 2.0; | ||||||
|  |     for (int l = 0; l <= level; ++l) dt /= 2.0 * this->as[l].multiplier; | ||||||
|  |  | ||||||
|  |     RealD epsilon = dt/(2.0 - sigma); | ||||||
|  |  | ||||||
|  |     int multiplier = this->as[level].multiplier; | ||||||
|  |     for (int e = 0; e < multiplier; ++e) {  // steps per step | ||||||
|  |  | ||||||
|  |       int first_step = _first && (e == 0); | ||||||
|  |       int last_step = _last && (e == multiplier - 1); | ||||||
|  |       // initial half step | ||||||
|  |       if (first_step) {  this->implicit_update_P(U, level, epsilon*0.5); } | ||||||
|  |       this->implicit_update_U(U, epsilon,epsilon*0.5); | ||||||
|  |       this->implicit_update_P(U, level, (1.0 - sigma) * epsilon *0.5, epsilon*0.5, true); | ||||||
|  |       this->implicit_update_U(U, -epsilon*sigma, -epsilon*sigma*0.5); | ||||||
|  |       this->implicit_update_P(U, level, (1.0 - sigma) * epsilon *0.5, -epsilon*sigma*0.5, true); | ||||||
|  |       this->implicit_update_U(U, epsilon,epsilon*0.5); | ||||||
|  |       if (last_step) { this->update_P2(U, level, epsilon*0.5 ); }  | ||||||
|  |       else | ||||||
|  |       this->implicit_update_P(U, level, epsilon,epsilon*0.5); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
| #endif  // INTEGRATOR_INCLUDED | #endif  // INTEGRATOR_INCLUDED | ||||||
|   | |||||||
| @@ -54,7 +54,361 @@ struct LaplacianParams : Serializable { | |||||||
|       precision(precision){}; |       precision(precision){}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | #define LEG_LOAD(Dir)						 \ | ||||||
|  |   SE = st.GetEntry(ptype, Dir, ss);				 \ | ||||||
|  |   if (SE->_is_local ) {						 \ | ||||||
|  |     int perm= SE->_permute;					 \ | ||||||
|  |     chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \ | ||||||
|  |   } else {							 \ | ||||||
|  |     chi = coalescedRead(buf[SE->_offset],lane);			 \ | ||||||
|  |   }								 \ | ||||||
|  |   acceleratorSynchronise(); | ||||||
|  |  | ||||||
|  | const std::vector<int> directions4D   ({Xdir,Ydir,Zdir,Tdir,Xdir,Ydir,Zdir,Tdir}); | ||||||
|  | const std::vector<int> displacements4D({1,1,1,1,-1,-1,-1,-1}); | ||||||
|  |  | ||||||
|  | template<class Gimpl,class Field> class CovariantAdjointLaplacianStencil : public SparseMatrixBase<Field> | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |   INHERIT_GIMPL_TYPES(Gimpl); | ||||||
|  | //  RealD kappa; | ||||||
|  |  | ||||||
|  |   typedef typename Field::vector_object siteObject; | ||||||
|  |  | ||||||
|  |   template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nc> >, Nds>; | ||||||
|  |   typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; | ||||||
|  |   typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; | ||||||
|  |   typedef CartesianStencil<siteObject, siteObject, DefaultImplParams> StencilImpl; | ||||||
|  |  | ||||||
|  |   GridBase *grid; | ||||||
|  |   StencilImpl Stencil; | ||||||
|  |   SimpleCompressor<siteObject> Compressor; | ||||||
|  |   DoubledGaugeField Uds; | ||||||
|  |  | ||||||
|  |   CovariantAdjointLaplacianStencil( GridBase *_grid) | ||||||
|  |     : grid(_grid), | ||||||
|  |       Stencil    (grid,8,Even,directions4D,displacements4D), | ||||||
|  |       Uds(grid){} | ||||||
|  |  | ||||||
|  |   CovariantAdjointLaplacianStencil(GaugeField &Umu) | ||||||
|  |     : | ||||||
|  |       grid(Umu.Grid()), | ||||||
|  |       Stencil    (grid,8,Even,directions4D,displacements4D), | ||||||
|  |       Uds(grid) | ||||||
|  |   { GaugeImport(Umu); } | ||||||
|  |  | ||||||
|  |   void GaugeImport (const GaugeField &Umu) | ||||||
|  |   { | ||||||
|  |     assert(grid == Umu.Grid()); | ||||||
|  |     for (int mu = 0; mu < Nd; mu++) { | ||||||
|  |       auto U = PeekIndex<LorentzIndex>(Umu, mu); | ||||||
|  |       PokeIndex<LorentzIndex>(Uds, U, mu ); | ||||||
|  |       U = adj(Cshift(U, mu, -1)); | ||||||
|  |       PokeIndex<LorentzIndex>(Uds, U, mu + 4); | ||||||
|  |     } | ||||||
|  |   }; | ||||||
|  |    | ||||||
|  |   virtual GridBase *Grid(void) { return grid; }; | ||||||
|  | //broken | ||||||
|  | #if 0 | ||||||
|  |   virtual void  MDeriv(const Field &_left, Field &_right,Field &_der, int mu) | ||||||
|  |   { | ||||||
|  |     /////////////////////////////////////////////// | ||||||
|  |     // Halo exchange for this geometry of stencil | ||||||
|  |     /////////////////////////////////////////////// | ||||||
|  |     Stencil.HaloExchange(_lef, Compressor); | ||||||
|  |  | ||||||
|  |     /////////////////////////////////// | ||||||
|  |     // Arithmetic expressions | ||||||
|  |     /////////////////////////////////// | ||||||
|  |     autoView( st     , Stencil    , AcceleratorRead); | ||||||
|  |     auto buf = st.CommBuf(); | ||||||
|  |  | ||||||
|  |     autoView( in     , _left    , AcceleratorRead); | ||||||
|  |     autoView( right    , _right   , AcceleratorRead); | ||||||
|  |     autoView( der    , _der   , AcceleratorWrite); | ||||||
|  |     autoView( U     , Uds    , AcceleratorRead); | ||||||
|  |  | ||||||
|  |     typedef typename Field::vector_object        vobj; | ||||||
|  |     typedef decltype(coalescedRead(left[0]))    calcObj; | ||||||
|  |     typedef decltype(coalescedRead(U[0](0))) calcLink; | ||||||
|  |  | ||||||
|  |     const int      Nsimd = vobj::Nsimd(); | ||||||
|  |     const uint64_t NN = grid->oSites(); | ||||||
|  |  | ||||||
|  |     accelerator_for( ss, NN, Nsimd, { | ||||||
|  |  | ||||||
|  | 	StencilEntry *SE; | ||||||
|  | 	 | ||||||
|  | 	const int lane=acceleratorSIMTlane(Nsimd); | ||||||
|  |  | ||||||
|  | 	calcObj chi; | ||||||
|  | 	calcObj phi; | ||||||
|  | 	calcObj res; | ||||||
|  | 	calcObj Uchi; | ||||||
|  | 	calcObj Utmp; | ||||||
|  | 	calcObj Utmp2; | ||||||
|  | 	calcLink UU; | ||||||
|  | 	calcLink Udag; | ||||||
|  | 	int ptype; | ||||||
|  |  | ||||||
|  | 	res                 = coalescedRead(def[ss]); | ||||||
|  | 	phi                 = coalescedRead(right[ss]); | ||||||
|  |  | ||||||
|  | #define LEG_LOAD_MULT_LINK(leg,polarisation)			\ | ||||||
|  | 	UU = coalescedRead(U[ss](polarisation));	\ | ||||||
|  | 	Udag = adj(UU);					\ | ||||||
|  | 	LEG_LOAD(leg);					\ | ||||||
|  | 	mult(&Utmp(), &UU, &chi());			\ | ||||||
|  | 	Utmp2 = adj(Utmp);				\ | ||||||
|  | 	mult(&Utmp(), &UU, &Utmp2());			\ | ||||||
|  | 	Utmp2 = adj(Utmp);				\ | ||||||
|  | 	mult(&Uchi(), &phi(), &Utmp2());			\ | ||||||
|  | 	res = res + Uchi; | ||||||
|  | 	 | ||||||
|  | 	LEG_LOAD_MULT_LINK(0,Xp); | ||||||
|  | 	LEG_LOAD_MULT_LINK(1,Yp); | ||||||
|  | 	LEG_LOAD_MULT_LINK(2,Zp); | ||||||
|  | 	LEG_LOAD_MULT_LINK(3,Tp); | ||||||
|  |  | ||||||
|  | 	coalescedWrite(der[ss], res,lane); | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  |   }; | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   virtual void  Morig(const Field &_in, Field &_out) | ||||||
|  |   { | ||||||
|  |     /////////////////////////////////////////////// | ||||||
|  |     // Halo exchange for this geometry of stencil | ||||||
|  |     /////////////////////////////////////////////// | ||||||
|  |     Stencil.HaloExchange(_in, Compressor); | ||||||
|  |  | ||||||
|  |     /////////////////////////////////// | ||||||
|  |     // Arithmetic expressions | ||||||
|  |     /////////////////////////////////// | ||||||
|  | //    auto st = Stencil.View(AcceleratorRead); | ||||||
|  |     autoView( st     , Stencil    , AcceleratorRead); | ||||||
|  |     auto buf = st.CommBuf(); | ||||||
|  |  | ||||||
|  |     autoView( in     , _in    , AcceleratorRead); | ||||||
|  |     autoView( out    , _out   , AcceleratorWrite); | ||||||
|  |     autoView( U     , Uds    , AcceleratorRead); | ||||||
|  |  | ||||||
|  |     typedef typename Field::vector_object        vobj; | ||||||
|  |     typedef decltype(coalescedRead(in[0]))    calcObj; | ||||||
|  |     typedef decltype(coalescedRead(U[0](0))) calcLink; | ||||||
|  |  | ||||||
|  |     const int      Nsimd = vobj::Nsimd(); | ||||||
|  |     const uint64_t NN = grid->oSites(); | ||||||
|  |  | ||||||
|  |     accelerator_for( ss, NN, Nsimd, { | ||||||
|  |  | ||||||
|  | 	StencilEntry *SE; | ||||||
|  | 	 | ||||||
|  | 	const int lane=acceleratorSIMTlane(Nsimd); | ||||||
|  |  | ||||||
|  | 	calcObj chi; | ||||||
|  | 	calcObj res; | ||||||
|  | 	calcObj Uchi; | ||||||
|  | 	calcObj Utmp; | ||||||
|  | 	calcObj Utmp2; | ||||||
|  | 	calcLink UU; | ||||||
|  | 	calcLink Udag; | ||||||
|  | 	int ptype; | ||||||
|  |  | ||||||
|  | 	res                 = coalescedRead(in[ss])*(-8.0); | ||||||
|  |  | ||||||
|  | #define LEG_LOAD_MULT(leg,polarisation)			\ | ||||||
|  | 	UU = coalescedRead(U[ss](polarisation));	\ | ||||||
|  | 	Udag = adj(UU);					\ | ||||||
|  | 	LEG_LOAD(leg);					\ | ||||||
|  | 	mult(&Utmp(), &UU, &chi());			\ | ||||||
|  | 	Utmp2 = adj(Utmp);				\ | ||||||
|  | 	mult(&Utmp(), &UU, &Utmp2());			\ | ||||||
|  | 	Uchi = adj(Utmp);				\ | ||||||
|  | 	res = res + Uchi; | ||||||
|  | 	 | ||||||
|  | 	LEG_LOAD_MULT(0,Xp); | ||||||
|  | 	LEG_LOAD_MULT(1,Yp); | ||||||
|  | 	LEG_LOAD_MULT(2,Zp); | ||||||
|  | 	LEG_LOAD_MULT(3,Tp); | ||||||
|  | 	LEG_LOAD_MULT(4,Xm); | ||||||
|  | 	LEG_LOAD_MULT(5,Ym); | ||||||
|  | 	LEG_LOAD_MULT(6,Zm); | ||||||
|  | 	LEG_LOAD_MULT(7,Tm); | ||||||
|  |  | ||||||
|  | 	coalescedWrite(out[ss], res,lane); | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  |   }; | ||||||
|  |   virtual void  Mnew (const Field &_in, Field &_out) | ||||||
|  |   { | ||||||
|  |     /////////////////////////////////////////////// | ||||||
|  |     // Halo exchange for this geometry of stencil | ||||||
|  |     /////////////////////////////////////////////// | ||||||
|  | //    Stencil.HaloExchange(_in, Compressor); | ||||||
|  |       std::vector<std::vector<CommsRequest_t> > requests; | ||||||
|  |       Stencil.Prepare(); | ||||||
|  |   { | ||||||
|  |     GRID_TRACE("Laplace Gather"); | ||||||
|  |     Stencil.HaloGather(_in,Compressor); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   tracePush("Laplace Communication"); | ||||||
|  |   Stencil.CommunicateBegin(requests); | ||||||
|  |   { | ||||||
|  |     GRID_TRACE("MergeSHM"); | ||||||
|  |     Stencil.CommsMergeSHM(Compressor); | ||||||
|  |   } | ||||||
|  |      | ||||||
|  |  | ||||||
|  |     /////////////////////////////////// | ||||||
|  |     // Arithmetic expressions | ||||||
|  |     /////////////////////////////////// | ||||||
|  | //    auto st = Stencil.View(AcceleratorRead); | ||||||
|  |     autoView( st     , Stencil    , AcceleratorRead); | ||||||
|  |     auto buf = st.CommBuf(); | ||||||
|  |  | ||||||
|  |     autoView( in     , _in    , AcceleratorRead); | ||||||
|  |     autoView( out    , _out   , AcceleratorWrite); | ||||||
|  |     autoView( U     , Uds    , AcceleratorRead); | ||||||
|  |  | ||||||
|  |     typedef typename Field::vector_object        vobj; | ||||||
|  |     typedef decltype(coalescedRead(in[0]))    calcObj; | ||||||
|  |     typedef decltype(coalescedRead(U[0](0))) calcLink; | ||||||
|  |  | ||||||
|  |     const int      Nsimd = vobj::Nsimd(); | ||||||
|  |     const uint64_t NN = grid->oSites(); | ||||||
|  |  | ||||||
|  |     accelerator_for( ss, NN, Nsimd, { | ||||||
|  |  | ||||||
|  | 	StencilEntry *SE; | ||||||
|  | 	 | ||||||
|  | 	const int lane=acceleratorSIMTlane(Nsimd); | ||||||
|  |  | ||||||
|  | 	calcObj chi; | ||||||
|  | 	calcObj res; | ||||||
|  | 	calcObj Uchi; | ||||||
|  | 	calcObj Utmp; | ||||||
|  | 	calcObj Utmp2; | ||||||
|  | 	calcLink UU; | ||||||
|  | 	calcLink Udag; | ||||||
|  | 	int ptype; | ||||||
|  |  | ||||||
|  | 	res                 = coalescedRead(in[ss])*(-8.0); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |         SE = st.GetEntry(ptype, 0, ss);				  | ||||||
|  |         if (SE->_is_local ) { | ||||||
|  | 	LEG_LOAD_MULT(0,Xp); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 1, ss);				  | ||||||
|  |         if (SE->_is_local ) { | ||||||
|  | 	LEG_LOAD_MULT(1,Yp); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 2, ss);				  | ||||||
|  |         if (SE->_is_local ) { | ||||||
|  | 	LEG_LOAD_MULT(2,Zp); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 3, ss);				  | ||||||
|  |         if (SE->_is_local ) { | ||||||
|  | 	LEG_LOAD_MULT(3,Tp); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 4, ss);				  | ||||||
|  |         if (SE->_is_local ) { | ||||||
|  | 	LEG_LOAD_MULT(4,Xm); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 5, ss);				  | ||||||
|  |         if (SE->_is_local ) { | ||||||
|  | 	LEG_LOAD_MULT(5,Ym); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 6, ss);				  | ||||||
|  |         if (SE->_is_local ) { | ||||||
|  | 	LEG_LOAD_MULT(6,Zm); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 7, ss);				  | ||||||
|  |         if (SE->_is_local ) { | ||||||
|  | 	LEG_LOAD_MULT(7,Tm); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	coalescedWrite(out[ss], res,lane); | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  |     Stencil.CommunicateComplete(requests); | ||||||
|  |   tracePop("Communication"); | ||||||
|  |  | ||||||
|  |   { | ||||||
|  |     GRID_TRACE("Merge"); | ||||||
|  |     Stencil.CommsMerge(Compressor); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     accelerator_for( ss, NN, Nsimd, { | ||||||
|  |  | ||||||
|  | 	StencilEntry *SE; | ||||||
|  | 	 | ||||||
|  | 	const int lane=acceleratorSIMTlane(Nsimd); | ||||||
|  |  | ||||||
|  | 	calcObj chi; | ||||||
|  | 	calcObj res; | ||||||
|  | 	calcObj Uchi; | ||||||
|  | 	calcObj Utmp; | ||||||
|  | 	calcObj Utmp2; | ||||||
|  | 	calcLink UU; | ||||||
|  | 	calcLink Udag; | ||||||
|  | 	int ptype; | ||||||
|  |  | ||||||
|  | //	res                 = coalescedRead(in[ss])*(-8.0); | ||||||
|  | 	res                 = coalescedRead(out[ss]); | ||||||
|  |  | ||||||
|  |         SE = st.GetEntry(ptype, 0, ss);				  | ||||||
|  |         if ((SE->_is_local )==0){ | ||||||
|  | 	LEG_LOAD_MULT(0,Xp); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 1, ss);				  | ||||||
|  |         if ((SE->_is_local )==0){ | ||||||
|  | 	LEG_LOAD_MULT(1,Yp); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 2, ss);				  | ||||||
|  |         if ((SE->_is_local )==0){ | ||||||
|  | 	LEG_LOAD_MULT(2,Zp); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 3, ss); | ||||||
|  |         if ((SE->_is_local )==0){ | ||||||
|  | 	LEG_LOAD_MULT(3,Tp); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 4, ss); | ||||||
|  |         if ((SE->_is_local )==0){ | ||||||
|  | 	LEG_LOAD_MULT(4,Xm); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 5, ss); | ||||||
|  |         if ((SE->_is_local )==0){ | ||||||
|  | 	LEG_LOAD_MULT(5,Ym); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 6, ss); | ||||||
|  |         if ((SE->_is_local )==0){ | ||||||
|  | 	LEG_LOAD_MULT(6,Zm); | ||||||
|  | 	} | ||||||
|  |         SE = st.GetEntry(ptype, 7, ss); | ||||||
|  |         if ((SE->_is_local )==0){ | ||||||
|  | 	LEG_LOAD_MULT(7,Tm); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	coalescedWrite(out[ss], res,lane); | ||||||
|  |     }); | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   virtual void  M(const Field &in, Field &out) {Mnew(in,out);}; | ||||||
|  |   virtual void  Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian | ||||||
|  |   virtual  void Mdiag    (const Field &in, Field &out)                  {assert(0);}; // Unimplemented need only for multigrid | ||||||
|  |   virtual  void Mdir     (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid | ||||||
|  |   virtual  void MdirAll  (const Field &in, std::vector<Field> &out)     {assert(0);}; // Unimplemented need only for multigrid | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | #undef LEG_LOAD_MULT | ||||||
|  | #undef LEG_LOAD_MULT_LINK | ||||||
|  | #undef LEG_LOAD | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////// | ||||||
| // Laplacian operator L on adjoint fields | // Laplacian operator L on adjoint fields | ||||||
| @@ -76,29 +430,40 @@ class LaplacianAdjointField: public Metric<typename Impl::Field> { | |||||||
|   LaplacianParams param; |   LaplacianParams param; | ||||||
|   MultiShiftFunction PowerHalf;     |   MultiShiftFunction PowerHalf;     | ||||||
|   MultiShiftFunction PowerInvHalf;     |   MultiShiftFunction PowerInvHalf;     | ||||||
|  | //template<class Gimpl,class Field> class CovariantAdjointLaplacianStencil : public SparseMatrixBase<Field> | ||||||
|  |   CovariantAdjointLaplacianStencil<Impl,typename Impl::LinkField> LapStencil; | ||||||
|  |  | ||||||
| public: | public: | ||||||
|   INHERIT_GIMPL_TYPES(Impl); |   INHERIT_GIMPL_TYPES(Impl); | ||||||
|  |  | ||||||
|   LaplacianAdjointField(GridBase* grid, OperatorFunction<GaugeField>& S, LaplacianParams& p, const RealD k = 1.0) |   LaplacianAdjointField(GridBase* grid, OperatorFunction<GaugeField>& S, LaplacianParams& p, const RealD k = 1.0, bool if_remez=true) | ||||||
|     : U(Nd, grid), Solver(S), param(p), kappa(k){ |     : U(Nd, grid), Solver(S), param(p), kappa(k) | ||||||
|  | 	,LapStencil(grid){ | ||||||
|     AlgRemez remez(param.lo,param.hi,param.precision); |     AlgRemez remez(param.lo,param.hi,param.precision); | ||||||
|     std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/2)"<<std::endl; |     std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/2)"<<std::endl; | ||||||
|  |     if(if_remez){ | ||||||
|     remez.generateApprox(param.degree,1,2); |     remez.generateApprox(param.degree,1,2); | ||||||
|     PowerHalf.Init(remez,param.tolerance,false); |     PowerHalf.Init(remez,param.tolerance,false); | ||||||
|     PowerInvHalf.Init(remez,param.tolerance,true); |     PowerInvHalf.Init(remez,param.tolerance,true); | ||||||
|  |     } | ||||||
|  |     this->triv=0; | ||||||
|          |          | ||||||
|  |  | ||||||
|   }; |   }; | ||||||
|  |   LaplacianAdjointField(){this->triv=0; printf("triv=%d\n",this->Trivial());} | ||||||
|   void Mdir(const GaugeField&, GaugeField&, int, int){ assert(0);} |   void Mdir(const GaugeField&, GaugeField&, int, int){ assert(0);} | ||||||
|   void MdirAll(const GaugeField&, std::vector<GaugeField> &){ assert(0);} |   void MdirAll(const GaugeField&, std::vector<GaugeField> &){ assert(0);} | ||||||
|   void Mdiag(const GaugeField&, GaugeField&){ assert(0);} |   void Mdiag(const GaugeField&, GaugeField&){ assert(0);} | ||||||
|  |  | ||||||
|   void ImportGauge(const GaugeField& _U) { |   void ImportGauge(const GaugeField& _U) { | ||||||
|  |     RealD total=0.; | ||||||
|     for (int mu = 0; mu < Nd; mu++) { |     for (int mu = 0; mu < Nd; mu++) { | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(_U, mu); |       U[mu] = PeekIndex<LorentzIndex>(_U, mu); | ||||||
|  |       total += norm2(U[mu]); | ||||||
|     } |     } | ||||||
|  |     LapStencil.GaugeImport (_U); | ||||||
|  |  | ||||||
|  |     std::cout << GridLogDebug <<"ImportGauge:norm2(U _U) = "<<total<<std::endl; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void M(const GaugeField& in, GaugeField& out) { |   void M(const GaugeField& in, GaugeField& out) { | ||||||
| @@ -106,10 +471,12 @@ public: | |||||||
|     // test |     // test | ||||||
|     //GaugeField herm = in + adj(in); |     //GaugeField herm = in + adj(in); | ||||||
|     //std::cout << "AHermiticity: " << norm2(herm) << std::endl; |     //std::cout << "AHermiticity: " << norm2(herm) << std::endl; | ||||||
|  | //    std::cout << GridLogDebug <<"M:Kappa = "<<kappa<<std::endl; | ||||||
|  |  | ||||||
|  |     GaugeLinkField sum(in.Grid()); | ||||||
|  | #if 0 | ||||||
|     GaugeLinkField tmp(in.Grid()); |     GaugeLinkField tmp(in.Grid()); | ||||||
|     GaugeLinkField tmp2(in.Grid()); |     GaugeLinkField tmp2(in.Grid()); | ||||||
|     GaugeLinkField sum(in.Grid()); |  | ||||||
|  |  | ||||||
|     for (int nu = 0; nu < Nd; nu++) { |     for (int nu = 0; nu < Nd; nu++) { | ||||||
|       sum = Zero(); |       sum = Zero(); | ||||||
| @@ -123,10 +490,22 @@ public: | |||||||
|       out_nu = (1.0 - kappa) * in_nu - kappa / (double(4 * Nd)) * sum; |       out_nu = (1.0 - kappa) * in_nu - kappa / (double(4 * Nd)) * sum; | ||||||
|       PokeIndex<LorentzIndex>(out, out_nu, nu); |       PokeIndex<LorentzIndex>(out, out_nu, nu); | ||||||
|     } |     } | ||||||
|  | #else | ||||||
|  |     for (int nu = 0; nu < Nd; nu++) { | ||||||
|  |       GaugeLinkField in_nu = PeekIndex<LorentzIndex>(in, nu); | ||||||
|  |       GaugeLinkField out_nu(out.Grid()); | ||||||
|  |       LapStencil.M(in_nu,sum); | ||||||
|  |       out_nu = (1.0 - kappa) * in_nu - kappa / (double(4 * Nd)) * sum; | ||||||
|  |       PokeIndex<LorentzIndex>(out, out_nu, nu); | ||||||
|     } |     } | ||||||
|  | #endif | ||||||
|  | //    std::cout << GridLogDebug <<"M:norm2(out) = "<<norm2(out)<<std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   void MDeriv(const GaugeField& in, GaugeField& der) { |   void MDeriv(const GaugeField& in, GaugeField& der) { | ||||||
|     // in is anti-hermitian |     // in is anti-hermitian | ||||||
|  | //    std::cout << GridLogDebug <<"MDeriv:Kappa = "<<kappa<<std::endl; | ||||||
|     RealD factor = -kappa / (double(4 * Nd)); |     RealD factor = -kappa / (double(4 * Nd)); | ||||||
|      |      | ||||||
|     for (int mu = 0; mu < Nd; mu++){ |     for (int mu = 0; mu < Nd; mu++){ | ||||||
| @@ -140,6 +519,7 @@ public: | |||||||
|       // adjoint in the last multiplication |       // adjoint in the last multiplication | ||||||
|       PokeIndex<LorentzIndex>(der,  -2.0 * factor * der_mu, mu); |       PokeIndex<LorentzIndex>(der,  -2.0 * factor * der_mu, mu); | ||||||
|     }  |     }  | ||||||
|  |     std::cout << GridLogDebug <<"MDeriv: Kappa= "<< kappa << " norm2(der) = "<<norm2(der)<<std::endl; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // separating this temporarily |   // separating this temporarily | ||||||
| @@ -159,11 +539,22 @@ public: | |||||||
|       } |       } | ||||||
|       PokeIndex<LorentzIndex>(der, -factor * der_mu, mu); |       PokeIndex<LorentzIndex>(der, -factor * der_mu, mu); | ||||||
|     } |     } | ||||||
|  |     std::cout << GridLogDebug <<"MDeriv: Kappa= "<< kappa << " norm2(der) = "<<norm2(der)<<std::endl; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void Minv(const GaugeField& in, GaugeField& inverted){ |   void Minv(const GaugeField& in, GaugeField& inverted){ | ||||||
|     HermitianLinearOperator<LaplacianAdjointField<Impl>,GaugeField> HermOp(*this); |     HermitianLinearOperator<LaplacianAdjointField<Impl>,GaugeField> HermOp(*this); | ||||||
|     Solver(HermOp, in, inverted); |     Solver(HermOp, in, inverted); | ||||||
|  |     std::cout << GridLogDebug <<"Minv:norm2(inverted) = "<<norm2(inverted)<<std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   void MinvDeriv(const GaugeField& in, GaugeField& der) { | ||||||
|  |     GaugeField X(in.Grid()); | ||||||
|  |     Minv(in,X); | ||||||
|  |     MDeriv(X,der); | ||||||
|  |     der *=-1.0; | ||||||
|  |     std::cout << GridLogDebug <<"MinvDeriv:norm2(der) = "<<norm2(der)<<std::endl; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void MSquareRoot(GaugeField& P){ |   void MSquareRoot(GaugeField& P){ | ||||||
| @@ -172,6 +563,7 @@ public: | |||||||
|     ConjugateGradientMultiShift<GaugeField> msCG(param.MaxIter,PowerHalf); |     ConjugateGradientMultiShift<GaugeField> msCG(param.MaxIter,PowerHalf); | ||||||
|     msCG(HermOp,P,Gp); |     msCG(HermOp,P,Gp); | ||||||
|     P = Gp;  |     P = Gp;  | ||||||
|  |     std::cout << GridLogDebug <<"MSquareRoot:norm2(P) = "<<norm2(P)<<std::endl; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void MInvSquareRoot(GaugeField& P){ |   void MInvSquareRoot(GaugeField& P){ | ||||||
| @@ -180,6 +572,7 @@ public: | |||||||
|     ConjugateGradientMultiShift<GaugeField> msCG(param.MaxIter,PowerInvHalf); |     ConjugateGradientMultiShift<GaugeField> msCG(param.MaxIter,PowerInvHalf); | ||||||
|     msCG(HermOp,P,Gp); |     msCG(HermOp,P,Gp); | ||||||
|     P = Gp;  |     P = Gp;  | ||||||
|  |     std::cout << GridLogDebug <<"MInvSquareRoot:norm2(P) = "<<norm2(P)<<std::endl; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										403
									
								
								Grid/qcd/utils/CovariantLaplacianRat.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										403
									
								
								Grid/qcd/utils/CovariantLaplacianRat.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,403 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/qcd/action/scalar/CovariantLaplacianRat.h | ||||||
|  |  | ||||||
|  | Copyright (C) 2021 | ||||||
|  |  | ||||||
|  | Author: Chulwoo Jung <chulwoo@bnl.gov> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | 			   /*  END LEGAL */ | ||||||
|  | #pragma once  | ||||||
|  | #define MIXED_CG | ||||||
|  | //enable/disable push_back | ||||||
|  | #undef USE_CHRONO  | ||||||
|  |  | ||||||
|  | //#include <roctracer/roctx.h> | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | struct LaplacianRatParams { | ||||||
|  |  | ||||||
|  |   RealD offset; | ||||||
|  |   int order; | ||||||
|  |   std::vector<RealD> a0; | ||||||
|  |   std::vector<RealD> a1; | ||||||
|  |   std::vector<RealD> b0; | ||||||
|  |   std::vector<RealD> b1; | ||||||
|  |   RealD b2; //for debugging | ||||||
|  |   int   MaxIter; | ||||||
|  |   RealD tolerance; | ||||||
|  |   int   precision; | ||||||
|  |    | ||||||
|  |   // constructor  | ||||||
|  |   LaplacianRatParams(int ord = 1, | ||||||
|  |                   int maxit     = 1000, | ||||||
|  |                   RealD tol     = 1.0e-8,  | ||||||
|  |                   int precision = 64) | ||||||
|  |     : offset(1.), order(ord),b2(1.), | ||||||
|  |       MaxIter(maxit), | ||||||
|  |       tolerance(tol), | ||||||
|  |       precision(precision){  | ||||||
|  |       a0.resize(ord,0.); | ||||||
|  |       a1.resize(ord,0.); | ||||||
|  |       b0.resize(ord,0.); | ||||||
|  |       b1.resize(ord,0.); | ||||||
|  |       }; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////////////////////// | ||||||
|  | // Laplacian operator L on adjoint fields | ||||||
|  | // | ||||||
|  | // phi: adjoint field | ||||||
|  | // L: D_mu^dag D_mu | ||||||
|  | // | ||||||
|  | // L phi(x) = Sum_mu [ U_mu(x)phi(x+mu)U_mu(x)^dag +  | ||||||
|  | //                     U_mu(x-mu)^dag phi(x-mu)U_mu(x-mu) | ||||||
|  | //                     -2phi(x)] | ||||||
|  | // | ||||||
|  | // Operator designed to be encapsulated by | ||||||
|  | // an HermitianLinearOperator<.. , ..> | ||||||
|  | //////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | template <class Impl, class ImplF> | ||||||
|  | class LaplacianAdjointRat: public Metric<typename Impl::Field> { | ||||||
|  |   OperatorFunction<typename Impl::Field> &Solver; | ||||||
|  |   LaplacianRatParams Gparam; | ||||||
|  |   LaplacianRatParams Mparam; | ||||||
|  |   GridBase *grid; | ||||||
|  |   GridBase *grid_f; | ||||||
|  |   CovariantAdjointLaplacianStencil<Impl,typename Impl::LinkField> LapStencil; | ||||||
|  |   CovariantAdjointLaplacianStencil<ImplF,typename ImplF::LinkField> LapStencilF; | ||||||
|  | public: | ||||||
|  |   INHERIT_GIMPL_TYPES(Impl); | ||||||
|  | //   typedef typename GImpl::LinkField GaugeLinkField; \ | ||||||
|  | //  typedef typename GImpl::Field GaugeField;          | ||||||
|  |   typedef typename ImplF::Field GaugeFieldF; | ||||||
|  |   typedef typename ImplF::LinkField GaugeLinkFieldF; \ | ||||||
|  |   GaugeField Usav; | ||||||
|  |   GaugeFieldF UsavF; | ||||||
|  |   std::vector< std::vector<GaugeLinkField> > prev_solnsM; | ||||||
|  |   std::vector< std::vector<GaugeLinkField> > prev_solnsMinv; | ||||||
|  |   std::vector< std::vector<GaugeLinkField> > prev_solnsMDeriv; | ||||||
|  |   std::vector< std::vector<GaugeLinkField> > prev_solnsMinvDeriv; | ||||||
|  |  | ||||||
|  | 	  LaplacianAdjointRat(GridBase* _grid, GridBase* _grid_f, OperatorFunction<GaugeField>& S, LaplacianRatParams& gpar, LaplacianRatParams& mpar) | ||||||
|  |     : grid(_grid),grid_f(_grid_f), LapStencil(_grid), LapStencilF(_grid_f), U(Nd, _grid), Solver(S), Gparam(gpar), Mparam(mpar),Usav(_grid), UsavF(_grid_f), | ||||||
|  |       prev_solnsM(4),prev_solnsMinv(4),prev_solnsMDeriv(4),prev_solnsMinvDeriv(4) { | ||||||
|  | //    std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/2)"<<std::endl; | ||||||
|  |     this->triv=0; | ||||||
|  |          | ||||||
|  |  | ||||||
|  |   }; | ||||||
|  |   LaplacianAdjointRat(){this->triv=0; printf("triv=%d\n",this->Trivial());} | ||||||
|  |   void Mdir(const GaugeField&, GaugeField&, int, int){ assert(0);} | ||||||
|  |   void MdirAll(const GaugeField&, std::vector<GaugeField> &){ assert(0);} | ||||||
|  |   void Mdiag(const GaugeField&, GaugeField&){ assert(0);} | ||||||
|  |  | ||||||
|  |   void ImportGauge(const GaugeField& _U) { | ||||||
|  |     RealD total=0.; | ||||||
|  |     for (int mu = 0; mu < Nd; mu++) { | ||||||
|  |       U[mu] = PeekIndex<LorentzIndex>(_U, mu); | ||||||
|  |       total += norm2(U[mu]); | ||||||
|  |     } | ||||||
|  |     Usav = _U; | ||||||
|  |     precisionChange(UsavF,Usav); | ||||||
|  |     std::cout <<GridLogDebug << "ImportGauge:norm2(_U) = "<<" "<<total<<std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MDerivLink(const GaugeLinkField& left, const GaugeLinkField& right, | ||||||
|  |               GaugeField& der) { | ||||||
|  |     std::cout<<GridLogMessage << "MDerivLink start "<< std::endl; | ||||||
|  |     RealD factor = -1. / (double(4 * Nd)); | ||||||
|  |     for (int mu = 0; mu < Nd; mu++) { | ||||||
|  |       GaugeLinkField der_mu(der.Grid()); | ||||||
|  |       der_mu = Zero(); | ||||||
|  | //      for (int nu = 0; nu < Nd; nu++) { | ||||||
|  | //        GaugeLinkField left_nu = PeekIndex<LorentzIndex>(left, nu); | ||||||
|  | //        GaugeLinkField right_nu = PeekIndex<LorentzIndex>(right, nu); | ||||||
|  |         der_mu += U[mu] * Cshift(left, mu, 1) * adj(U[mu]) * right; | ||||||
|  |         der_mu += U[mu] * Cshift(right, mu, 1) * adj(U[mu]) * left; | ||||||
|  | //      } | ||||||
|  |       PokeIndex<LorentzIndex>(der, -factor * der_mu, mu); | ||||||
|  |     } | ||||||
|  | //    std::cout << GridLogDebug <<"MDerivLink:  norm2(der) = "<<norm2(der)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "MDerivLink end "<< std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MDerivLink(const GaugeLinkField& left, const GaugeLinkField& right, | ||||||
|  |               std::vector<GaugeLinkField> & der) { | ||||||
|  | //    std::cout<<GridLogMessage << "MDerivLink "<< std::endl; | ||||||
|  |     RealD factor = -1. / (double(4 * Nd)); | ||||||
|  |  | ||||||
|  |     for (int mu = 0; mu < Nd; mu++) { | ||||||
|  |       GaugeLinkField der_mu(left.Grid()); | ||||||
|  |       der_mu = Zero(); | ||||||
|  |         der_mu += U[mu] * Cshift(left, mu, 1) * adj(U[mu]) * right; | ||||||
|  |         der_mu += U[mu] * Cshift(right, mu, 1) * adj(U[mu]) * left; | ||||||
|  | //      PokeIndex<LorentzIndex>(der, -factor * der_mu, mu); | ||||||
|  |       der[mu] = -factor*der_mu; | ||||||
|  | //      std::cout << GridLogDebug <<"MDerivLink:  norm2(der) = "<<norm2(der[mu])<<std::endl; | ||||||
|  |          | ||||||
|  |     } | ||||||
|  | //    std::cout<<GridLogMessage << "MDerivLink end "<< std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MDerivInt(LaplacianRatParams &par, const GaugeField& left, const GaugeField& right, | ||||||
|  |               GaugeField& der ,  std::vector< std::vector<GaugeLinkField> >& prev_solns ) { | ||||||
|  |  | ||||||
|  | // get rid of this please | ||||||
|  |     std::cout<<GridLogMessage << "LaplaceStart " <<std::endl; | ||||||
|  |     RealD fac =  - 1. / (double(4 * Nd)) ; | ||||||
|  |     RealD coef=0.5; | ||||||
|  |     LapStencil.GaugeImport(Usav); | ||||||
|  |     LapStencilF.GaugeImport(UsavF); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     for (int nu=0;nu<Nd;nu++){ | ||||||
|  |         GaugeLinkField right_nu = PeekIndex<LorentzIndex>(right, nu); | ||||||
|  |         GaugeLinkField left_nu = PeekIndex<LorentzIndex>(left, nu); | ||||||
|  |         GaugeLinkField LMinvMom(left.Grid()); | ||||||
|  |      | ||||||
|  |         GaugeLinkField GMom(left.Grid()); | ||||||
|  |         GaugeLinkField LMinvGMom(left.Grid()); | ||||||
|  |      | ||||||
|  |         GaugeLinkField AGMom(left.Grid()); | ||||||
|  |         GaugeLinkField MinvAGMom(left.Grid()); | ||||||
|  |         GaugeLinkField LMinvAGMom(left.Grid()); | ||||||
|  |      | ||||||
|  |         GaugeLinkField AMinvMom(left.Grid()); | ||||||
|  |         GaugeLinkField LMinvAMom(left.Grid()); | ||||||
|  |         GaugeLinkField temp(left.Grid()); | ||||||
|  |         GaugeLinkField temp2(left.Grid()); | ||||||
|  |      | ||||||
|  |         std::vector<GaugeLinkField> MinvMom(par.order,left.Grid()); | ||||||
|  |      | ||||||
|  |         GaugeLinkField MinvGMom(left.Grid()); | ||||||
|  |         GaugeLinkField Gtemp(left.Grid()); | ||||||
|  |         GaugeLinkField Gtemp2(left.Grid()); | ||||||
|  |      | ||||||
|  |      | ||||||
|  |         ConjugateGradient<GaugeLinkField> CG(par.tolerance,10000,false); | ||||||
|  |     //    ConjugateGradient<GaugeFieldF> CG_f(par.tolerance,10000,false); | ||||||
|  |         LaplacianParams LapPar(0.0001, 1.0, 10000, 1e-8, 12, 64); | ||||||
|  |      | ||||||
|  |         ChronoForecast< QuadLinearOperator<CovariantAdjointLaplacianStencil<Impl,GaugeLinkField>,GaugeLinkField> , GaugeLinkField> Forecast; | ||||||
|  |      | ||||||
|  |         GMom = par.offset * right_nu; | ||||||
|  |      | ||||||
|  |         for(int i =0;i<par.order;i++){ | ||||||
|  |         QuadLinearOperator<CovariantAdjointLaplacianStencil<Impl,typename Impl::LinkField>,GaugeLinkField> QuadOp(LapStencil,par.b0[i],fac*par.b1[i],fac*fac*par.b2); | ||||||
|  | #if USE_CHRONO | ||||||
|  |         MinvMom[i] = Forecast(QuadOp, right_nu, prev_solns[nu]); | ||||||
|  | #endif | ||||||
|  | #ifndef MIXED_CG | ||||||
|  |         CG(QuadOp,right_nu,MinvMom[i]); | ||||||
|  | #else | ||||||
|  |         QuadLinearOperator<CovariantAdjointLaplacianStencil<ImplF,typename ImplF::LinkField>,GaugeLinkFieldF> QuadOpF(LapStencilF,par.b0[i],fac*par.b1[i],fac*fac*par.b2); | ||||||
|  |     //    QuadLinearOperator<LaplacianAdjointField<ImplF>,GaugeLinkFieldF> QuadOpF(LapStencilF,par.b0[i],par.b1[i],par.b2); | ||||||
|  |         MixedPrecisionConjugateGradient<GaugeLinkField,GaugeLinkFieldF> MixedCG(par.tolerance,10000,10000,grid_f,QuadOpF,QuadOp); | ||||||
|  |         MixedCG.InnerTolerance=par.tolerance; | ||||||
|  |         MixedCG(right_nu,MinvMom[i]); | ||||||
|  |     #endif | ||||||
|  |     #if USE_CHRONO | ||||||
|  |         prev_solns[nu].push_back(MinvMom[i]); | ||||||
|  |     #endif | ||||||
|  |          | ||||||
|  |         GMom += par.a0[i]*MinvMom[i];  | ||||||
|  |         LapStencil.M(MinvMom[i],Gtemp2); | ||||||
|  |         GMom += par.a1[i]*fac*Gtemp2;  | ||||||
|  |         } | ||||||
|  |         for(int i =0;i<par.order;i++){ | ||||||
|  |         QuadLinearOperator<CovariantAdjointLaplacianStencil<Impl,typename Impl::LinkField>,GaugeLinkField> QuadOp(LapStencil,par.b0[i],fac*par.b1[i],fac*fac*par.b2); | ||||||
|  |      | ||||||
|  |         MinvGMom = Forecast(QuadOp, GMom, prev_solns[nu]); | ||||||
|  |     #ifndef MIXED_CG | ||||||
|  |         CG(QuadOp,GMom,MinvGMom); | ||||||
|  |         LapStencil.M(MinvGMom, Gtemp2); LMinvGMom=fac*Gtemp2; | ||||||
|  |         CG(QuadOp,right_nu,MinvMom[i]); | ||||||
|  |     #else | ||||||
|  |         QuadLinearOperator<CovariantAdjointLaplacianStencil<ImplF,typename ImplF::LinkField>,GaugeLinkFieldF> QuadOpF(LapStencilF,par.b0[i],fac*par.b1[i],fac*fac*par.b2); | ||||||
|  |     //    QuadLinearOperator<LaplacianAdjointField<ImplF>,GaugeLinkFieldF> QuadOpF(LapStencilF,par.b0[i],par.b1[i],par.b2); | ||||||
|  |         MixedPrecisionConjugateGradient<GaugeLinkField,GaugeLinkFieldF> MixedCG(par.tolerance,10000,10000,grid_f,QuadOpF,QuadOp); | ||||||
|  |         MixedCG.InnerTolerance=par.tolerance; | ||||||
|  |         MixedCG(GMom,MinvGMom); | ||||||
|  |         LapStencil.M(MinvGMom, Gtemp2); LMinvGMom=fac*Gtemp2; | ||||||
|  |     //    Laplacian.M(MinvGMom, LMinvGMom); | ||||||
|  |         MixedCG(right_nu,MinvMom[i]); | ||||||
|  |     #endif | ||||||
|  | #if USE_CHRONO | ||||||
|  |         prev_solns[nu].push_back(MinvGMom); | ||||||
|  | #endif | ||||||
|  |      | ||||||
|  |         LapStencil.M(MinvMom[i], Gtemp2); LMinvMom=fac*Gtemp2; | ||||||
|  |         AMinvMom = par.a1[i]*LMinvMom; | ||||||
|  |         AMinvMom += par.a0[i]*MinvMom[i]; | ||||||
|  |      | ||||||
|  |         LapStencil.M(AMinvMom, Gtemp2); LMinvAMom=fac*Gtemp2; | ||||||
|  |         LapStencil.M(MinvGMom, Gtemp2); temp=fac*Gtemp2; | ||||||
|  |         MinvAGMom = par.a1[i]*temp; | ||||||
|  |         MinvAGMom += par.a0[i]*MinvGMom; | ||||||
|  |         LapStencil.M(MinvAGMom, Gtemp2); LMinvAGMom=fac*Gtemp2; | ||||||
|  |      | ||||||
|  |      | ||||||
|  |         GaugeField tempDer(left.Grid()); | ||||||
|  |         std::vector<GaugeLinkField> DerLink(Nd,left.Grid()); | ||||||
|  |         std::vector<GaugeLinkField> tempDerLink(Nd,left.Grid()); | ||||||
|  |  | ||||||
|  |         std::cout<<GridLogMessage << "force contraction "<< i <<std::endl; | ||||||
|  |     //    roctxRangePushA("RMHMC force contraction"); | ||||||
|  |  #if 0 | ||||||
|  |         MDerivLink(GMom,MinvMom[i],tempDer); der += coef*2*par.a1[i]*tempDer; | ||||||
|  |         MDerivLink(left_nu,MinvGMom,tempDer); der += coef*2*par.a1[i]*tempDer; | ||||||
|  |         MDerivLink(LMinvAGMom,MinvMom[i],tempDer); der += coef*-2.*par.b2*tempDer; | ||||||
|  |         MDerivLink(LMinvAMom,MinvGMom,tempDer); der += coef*-2.*par.b2*tempDer; | ||||||
|  |         MDerivLink(MinvAGMom,LMinvMom,tempDer); der += coef*-2.*par.b2*tempDer; | ||||||
|  |         MDerivLink(AMinvMom,LMinvGMom,tempDer); der += coef*-2.*par.b2*tempDer; | ||||||
|  |         MDerivLink(MinvAGMom,MinvMom[i],tempDer); der += coef*-2.*par.b1[i]*tempDer; | ||||||
|  |         MDerivLink(AMinvMom,MinvGMom,tempDer); der += coef*-2.*par.b1[i]*tempDer; | ||||||
|  | #else | ||||||
|  | 	for (int mu=0;mu<Nd;mu++) DerLink[mu]=Zero(); | ||||||
|  |         MDerivLink(GMom,MinvMom[i],tempDerLink); 	for (int mu=0;mu<Nd;mu++) DerLink[mu] += coef*2*par.a1[i]*tempDerLink[mu]; | ||||||
|  |         MDerivLink(left_nu,MinvGMom,tempDerLink); 	for (int mu=0;mu<Nd;mu++) DerLink[mu] += coef*2*par.a1[i]*tempDerLink[mu]; | ||||||
|  |         MDerivLink(LMinvAGMom,MinvMom[i],tempDerLink); 	for (int mu=0;mu<Nd;mu++) DerLink[mu] += coef*-2.*par.b2*tempDerLink[mu]; | ||||||
|  |         MDerivLink(LMinvAMom,MinvGMom,tempDerLink); 	for (int mu=0;mu<Nd;mu++) DerLink[mu] += coef*-2.*par.b2*tempDerLink[mu]; | ||||||
|  |         MDerivLink(MinvAGMom,LMinvMom,tempDerLink); 	for (int mu=0;mu<Nd;mu++) DerLink[mu] += coef*-2.*par.b2*tempDerLink[mu]; | ||||||
|  |         MDerivLink(AMinvMom,LMinvGMom,tempDerLink); 	for (int mu=0;mu<Nd;mu++) DerLink[mu] += coef*-2.*par.b2*tempDerLink[mu]; | ||||||
|  |         MDerivLink(MinvAGMom,MinvMom[i],tempDerLink); 	for (int mu=0;mu<Nd;mu++) DerLink[mu] += coef*-2.*par.b1[i]*tempDerLink[mu]; | ||||||
|  |         MDerivLink(AMinvMom,MinvGMom,tempDerLink); 	for (int mu=0;mu<Nd;mu++) DerLink[mu] += coef*-2.*par.b1[i]*tempDerLink[mu]; | ||||||
|  | //      PokeIndex<LorentzIndex>(der, -factor * der_mu, mu); | ||||||
|  |         for (int mu=0;mu<Nd;mu++) PokeIndex<LorentzIndex>(tempDer, tempDerLink[mu], mu); | ||||||
|  |  | ||||||
|  | 	der += tempDer; | ||||||
|  | #endif | ||||||
|  |         std::cout<<GridLogMessage << "coef =  force contraction "<< i << "done "<< coef <<std::endl; | ||||||
|  |     //    roctxRangePop(); | ||||||
|  |      | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     std::cout<<GridLogMessage << "LaplaceEnd " <<std::endl; | ||||||
|  | //  exit(-42); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MDeriv(const GaugeField& in, GaugeField& der) { | ||||||
|  |     MDeriv(in,in, der); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MDeriv(const GaugeField& left, const GaugeField& right, | ||||||
|  |               GaugeField& der) { | ||||||
|  |  | ||||||
|  |     der=Zero(); | ||||||
|  |     MDerivInt(Mparam, left, right, der,prev_solnsMDeriv ); | ||||||
|  |     std::cout <<GridLogDebug << "MDeriv:norm2(der) = "<<norm2(der)<<std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MinvDeriv(const GaugeField& in, GaugeField& der) { | ||||||
|  |     std::vector< std::vector<GaugeLinkField> > prev_solns(4); | ||||||
|  |     der=Zero(); | ||||||
|  |     MDerivInt(Gparam, in, in, der,prev_solnsMinvDeriv); | ||||||
|  |     std::cout <<GridLogDebug << "MinvDeriv:norm2(der) = "<<norm2(der)<<std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   void MSquareRootInt(LaplacianRatParams &par, GaugeField& P, std::vector< std::vector<GaugeLinkField> > & prev_solns ){ | ||||||
|  |  | ||||||
|  |     std::cout<<GridLogMessage << "LaplaceStart " <<std::endl; | ||||||
|  |     RealD fac = -1. / (double(4 * Nd)); | ||||||
|  |     LapStencil.GaugeImport(Usav); | ||||||
|  |     LapStencilF.GaugeImport(UsavF); | ||||||
|  |     for(int nu=0; nu<Nd;nu++){ | ||||||
|  |         GaugeLinkField P_nu = PeekIndex<LorentzIndex>(P, nu); | ||||||
|  |         GaugeLinkField Gp(P.Grid()); | ||||||
|  |         Gp = par.offset * P_nu; | ||||||
|  |         ConjugateGradient<GaugeLinkField> CG(par.tolerance,10000); | ||||||
|  |     //    ConjugateGradient<GaugeLinkFieldF> CG_f(1.0e-8,10000); | ||||||
|  |      | ||||||
|  |         ChronoForecast< QuadLinearOperator<CovariantAdjointLaplacianStencil<Impl,typename Impl::LinkField>,GaugeLinkField> , GaugeLinkField> Forecast; | ||||||
|  |      | ||||||
|  |         GaugeLinkField Gtemp(P.Grid()); | ||||||
|  |         GaugeLinkField Gtemp2(P.Grid()); | ||||||
|  |      | ||||||
|  |      | ||||||
|  |         for(int i =0;i<par.order;i++){ | ||||||
|  |         QuadLinearOperator<CovariantAdjointLaplacianStencil<Impl,typename Impl::LinkField>,GaugeLinkField> QuadOp(LapStencil,par.b0[i],fac*par.b1[i],fac*fac*par.b2); | ||||||
|  |      | ||||||
|  |         Gtemp = Forecast(QuadOp, P_nu, prev_solns[nu]); | ||||||
|  |     #ifndef MIXED_CG | ||||||
|  |         CG(QuadOp,P_nu,Gtemp); | ||||||
|  |     #else | ||||||
|  |         QuadLinearOperator<CovariantAdjointLaplacianStencil<ImplF,typename ImplF::LinkField>,GaugeLinkFieldF> QuadOpF(LapStencilF,par.b0[i],fac*par.b1[i],fac*fac*par.b2); | ||||||
|  |     //    QuadLinearOperator<LaplacianAdjointField<ImplF>,GaugeFieldF> QuadOpF(LapStencilF,par.b0[i],par.b1[i],par.b2); | ||||||
|  |         MixedPrecisionConjugateGradient<GaugeLinkField,GaugeLinkFieldF> MixedCG(par.tolerance,10000,10000,grid_f,QuadOpF,QuadOp); | ||||||
|  |         MixedCG.InnerTolerance=par.tolerance; | ||||||
|  |         MixedCG(P_nu,Gtemp); | ||||||
|  |     #endif | ||||||
|  |     #if USE_CHRONO | ||||||
|  |         prev_solns[nu].push_back(Gtemp); | ||||||
|  |     #endif | ||||||
|  |      | ||||||
|  |         Gp += par.a0[i]*Gtemp;  | ||||||
|  |         LapStencil.M(Gtemp,Gtemp2); | ||||||
|  |         Gp += par.a1[i]*fac*Gtemp2;  | ||||||
|  |         } | ||||||
|  |         PokeIndex<LorentzIndex>(P, Gp, nu); | ||||||
|  |     } | ||||||
|  |     std::cout<<GridLogMessage << "LaplaceEnd " <<std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MSquareRoot(GaugeField& P){ | ||||||
|  |     std::vector< std::vector<GaugeLinkField> > prev_solns(4); | ||||||
|  |     MSquareRootInt(Mparam,P,prev_solns); | ||||||
|  |     std::cout <<GridLogDebug << "MSquareRoot:norm2(P) = "<<norm2(P)<<std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void MInvSquareRoot(GaugeField& P){ | ||||||
|  |     std::vector< std::vector<GaugeLinkField> > prev_solns(4); | ||||||
|  |     MSquareRootInt(Gparam,P,prev_solns); | ||||||
|  |     std::cout <<GridLogDebug << "MInvSquareRoot:norm2(P) = "<<norm2(P)<<std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void M(const GaugeField& in, GaugeField& out) { | ||||||
|  |       out = in; | ||||||
|  |       std::vector< std::vector<GaugeLinkField> > prev_solns(4); | ||||||
|  |       MSquareRootInt(Mparam,out,prev_solns); | ||||||
|  |       MSquareRootInt(Mparam,out,prev_solns); | ||||||
|  |       std::cout <<GridLogDebug << "M:norm2(out) = "<<norm2(out)<<std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void Minv(const GaugeField& in, GaugeField& inverted){ | ||||||
|  |       inverted = in; | ||||||
|  |       std::vector< std::vector<GaugeLinkField> > prev_solns(4); | ||||||
|  |       MSquareRootInt(Gparam,inverted,prev_solns); | ||||||
|  |       MSquareRootInt(Gparam,inverted,prev_solns); | ||||||
|  |       std::cout <<GridLogDebug << "Minv:norm2(inverted) = "<<norm2(inverted)<<std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |   std::vector<GaugeLinkField> U; | ||||||
|  | }; | ||||||
|  | #undef MIXED_CG | ||||||
|  |  | ||||||
|  | NAMESPACE_END(Grid); | ||||||
| @@ -7,6 +7,7 @@ Source file: ./lib/qcd/hmc/integrators/Integrator.h | |||||||
| Copyright (C) 2015 | Copyright (C) 2015 | ||||||
|  |  | ||||||
| Author: Guido Cossu <guido.cossu@ed.ac.uk> | Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  | Author: Chulwoo Jung <chulwoo@bnl.gov> | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -33,7 +34,12 @@ NAMESPACE_BEGIN(Grid); | |||||||
|  |  | ||||||
| template <typename Field>  | template <typename Field>  | ||||||
| class Metric{ | class Metric{ | ||||||
|  | protected: | ||||||
|  |   int triv; | ||||||
| public: | public: | ||||||
|  |   Metric(){this->triv=1;} | ||||||
|  |   int Trivial(){ return triv;} | ||||||
|  | //printf("Metric::Trivial=%d\n",triv); ; | ||||||
|   virtual void ImportGauge(const Field&)   = 0; |   virtual void ImportGauge(const Field&)   = 0; | ||||||
|   virtual void M(const Field&, Field&)     = 0; |   virtual void M(const Field&, Field&)     = 0; | ||||||
|   virtual void Minv(const Field&, Field&)  = 0; |   virtual void Minv(const Field&, Field&)  = 0; | ||||||
| @@ -41,6 +47,8 @@ public: | |||||||
|   virtual void MInvSquareRoot(Field&) = 0; |   virtual void MInvSquareRoot(Field&) = 0; | ||||||
|   virtual void MDeriv(const Field&, Field&) = 0; |   virtual void MDeriv(const Field&, Field&) = 0; | ||||||
|   virtual void MDeriv(const Field&, const Field&, Field&) = 0; |   virtual void MDeriv(const Field&, const Field&, Field&) = 0; | ||||||
|  |   virtual void MinvDeriv(const Field&, Field&) = 0; | ||||||
|  | //  virtual void MinvDeriv(const Field&, const Field&, Field&) = 0; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -48,23 +56,36 @@ public: | |||||||
| template <typename Field> | template <typename Field> | ||||||
| class TrivialMetric : public Metric<Field>{ | class TrivialMetric : public Metric<Field>{ | ||||||
| public: | public: | ||||||
|  | //  TrivialMetric(){this->triv=1;printf("TrivialMetric::triv=%d\n",this->Trivial());} | ||||||
|   virtual void ImportGauge(const Field&){}; |   virtual void ImportGauge(const Field&){}; | ||||||
|   virtual void M(const Field& in, Field& out){ |   virtual void M(const Field& in, Field& out){ | ||||||
|  | //    printf("M:norm=%0.15e\n",norm2(in)); | ||||||
|  |     std::cout << GridLogIntegrator << " M:norm(in)= " << std::sqrt(norm2(in)) << std::endl; | ||||||
|     out = in; |     out = in; | ||||||
|   } |   } | ||||||
|   virtual void Minv(const Field& in, Field& out){ |   virtual void Minv(const Field& in, Field& out){ | ||||||
|  |     std::cout << GridLogIntegrator << " Minv:norm(in)= " << std::sqrt(norm2(in)) << std::endl; | ||||||
|     out = in; |     out = in; | ||||||
|   } |   } | ||||||
|   virtual void MSquareRoot(Field& P){ |   virtual void MSquareRoot(Field& P){ | ||||||
|  |     std::cout << GridLogIntegrator << " MSquareRoot:norm(P)= " << std::sqrt(norm2(P)) << std::endl; | ||||||
|     // do nothing |     // do nothing | ||||||
|   } |   } | ||||||
|   virtual void MInvSquareRoot(Field& P){ |   virtual void MInvSquareRoot(Field& P){ | ||||||
|  |     std::cout << GridLogIntegrator << " MInvSquareRoot:norm(P)= " << std::sqrt(norm2(P)) << std::endl; | ||||||
|     // do nothing |     // do nothing | ||||||
|   } |   } | ||||||
|   virtual void MDeriv(const Field& in, Field& out){ |   virtual void MDeriv(const Field& in, Field& out){ | ||||||
|  |     std::cout << GridLogIntegrator << " MDeriv:norm(in)= " << std::sqrt(norm2(in)) << std::endl; | ||||||
|  |     out = Zero(); | ||||||
|  |   } | ||||||
|  |   virtual void MinvDeriv(const Field& in, Field& out){ | ||||||
|  |     std::cout << GridLogIntegrator << " MinvDeriv:norm(in)= " << std::sqrt(norm2(in)) << std::endl; | ||||||
|     out = Zero(); |     out = Zero(); | ||||||
|   } |   } | ||||||
|   virtual void MDeriv(const Field& left, const Field& right, Field& out){ |   virtual void MDeriv(const Field& left, const Field& right, Field& out){ | ||||||
|  |     std::cout << GridLogIntegrator << " MDeriv:norm(left)= " << std::sqrt(norm2(left)) << std::endl; | ||||||
|  |     std::cout << GridLogIntegrator << " MDeriv:norm(right)= " << std::sqrt(norm2(right)) << std::endl; | ||||||
|     out = Zero(); |     out = Zero(); | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -101,14 +122,15 @@ public: | |||||||
|     // Generate gaussian momenta |     // Generate gaussian momenta | ||||||
|     Implementation::generate_momenta(Mom, sRNG, pRNG); |     Implementation::generate_momenta(Mom, sRNG, pRNG); | ||||||
|     // Modify the distribution with the metric |     // Modify the distribution with the metric | ||||||
|  | //    if(M.Trivial()) return; | ||||||
|     M.MSquareRoot(Mom); |     M.MSquareRoot(Mom); | ||||||
|  |  | ||||||
|     if (1) { |     if (1) { | ||||||
|       // Auxiliary momenta |       // Auxiliary momenta | ||||||
|       // do nothing if trivial, so hide in the metric |       // do nothing if trivial, so hide in the metric | ||||||
|       MomentaField AuxMomTemp(Mom.Grid()); |       MomentaField AuxMomTemp(Mom.Grid()); | ||||||
|       Implementation::generate_momenta(AuxMom, sRNG, pRNG); |       Implementation::generate_momenta(AuxMom, sRNG,pRNG); | ||||||
|       Implementation::generate_momenta(AuxField, sRNG, pRNG); |       Implementation::generate_momenta(AuxField, sRNG,pRNG); | ||||||
|       // Modify the distribution with the metric |       // Modify the distribution with the metric | ||||||
|       // Aux^dag M Aux |       // Aux^dag M Aux | ||||||
|       M.MInvSquareRoot(AuxMom);  // AuxMom = M^{-1/2} AuxMomTemp |       M.MInvSquareRoot(AuxMom);  // AuxMom = M^{-1/2} AuxMomTemp | ||||||
| @@ -117,11 +139,12 @@ public: | |||||||
|  |  | ||||||
|   // Correct |   // Correct | ||||||
|   RealD MomentaAction(){ |   RealD MomentaAction(){ | ||||||
|  |     static RealD Saux=0.,Smom=0.; | ||||||
|     MomentaField inv(Mom.Grid()); |     MomentaField inv(Mom.Grid()); | ||||||
|     inv = Zero(); |     inv = Zero(); | ||||||
|     M.Minv(Mom, inv); |     M.Minv(Mom, inv); | ||||||
|     LatticeComplex Hloc(Mom.Grid()); |     LatticeComplex Hloc(Mom.Grid()); Hloc = Zero(); | ||||||
|     Hloc = Zero(); |     LatticeComplex Hloc2(Mom.Grid()); Hloc2 = Zero(); | ||||||
|     for (int mu = 0; mu < Nd; mu++) { |     for (int mu = 0; mu < Nd; mu++) { | ||||||
|       // This is not very general |       // This is not very general | ||||||
|       // hide in the metric |       // hide in the metric | ||||||
| @@ -129,8 +152,15 @@ public: | |||||||
|       auto inv_mu = PeekIndex<LorentzIndex>(inv, mu); |       auto inv_mu = PeekIndex<LorentzIndex>(inv, mu); | ||||||
|       Hloc += trace(Mom_mu * inv_mu); |       Hloc += trace(Mom_mu * inv_mu); | ||||||
|     } |     } | ||||||
|  |     auto Htmp1 = TensorRemove(sum(Hloc)); | ||||||
|  |     std::cout << GridLogMessage << "S:dSmom = " << Htmp1.real()-Smom << "\n"; | ||||||
|  |     Smom=Htmp1.real()/HMC_MOMENTUM_DENOMINATOR; | ||||||
|      |      | ||||||
|     if (1) { |  | ||||||
|  |      | ||||||
|  |  | ||||||
|  | //    if(!M.Trivial())  | ||||||
|  |     { | ||||||
|       // Auxiliary Fields |       // Auxiliary Fields | ||||||
|       // hide in the metric |       // hide in the metric | ||||||
|       M.M(AuxMom, inv); |       M.M(AuxMom, inv); | ||||||
| @@ -140,13 +170,18 @@ public: | |||||||
|         auto inv_mu = PeekIndex<LorentzIndex>(inv, mu); |         auto inv_mu = PeekIndex<LorentzIndex>(inv, mu); | ||||||
|         auto am_mu = PeekIndex<LorentzIndex>(AuxMom, mu); |         auto am_mu = PeekIndex<LorentzIndex>(AuxMom, mu); | ||||||
|         auto af_mu = PeekIndex<LorentzIndex>(AuxField, mu); |         auto af_mu = PeekIndex<LorentzIndex>(AuxField, mu); | ||||||
|         Hloc += trace(am_mu * inv_mu);// p M p |         Hloc += trace(am_mu * inv_mu); | ||||||
|         Hloc += trace(af_mu * af_mu); |         Hloc2 += trace(af_mu * af_mu); | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |     auto Htmp2 = TensorRemove(sum(Hloc))-Htmp1; | ||||||
|  |     std::cout << GridLogMessage << "S:dSaux = " << Htmp2.real()-Saux << "\n"; | ||||||
|  |     Saux=Htmp2.real(); | ||||||
|  |  | ||||||
|     auto Hsum = TensorRemove(sum(Hloc)); |     auto Hsum = TensorRemove(sum(Hloc))/HMC_MOMENTUM_DENOMINATOR; | ||||||
|     return Hsum.real(); |     auto Hsum2 = TensorRemove(sum(Hloc2)); | ||||||
|  |     std::cout << GridLogIntegrator << "MomentaAction: " <<  Hsum.real()+Hsum2.real() << std::endl; | ||||||
|  |     return Hsum.real()+Hsum2.real(); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // Correct |   // Correct | ||||||
| @@ -157,15 +192,17 @@ public: | |||||||
|     MomentaField MDer(in.Grid()); |     MomentaField MDer(in.Grid()); | ||||||
|     MomentaField X(in.Grid()); |     MomentaField X(in.Grid()); | ||||||
|     X = Zero(); |     X = Zero(); | ||||||
|     M.Minv(in, X);  // X = G in |     M.MinvDeriv(in, MDer);  // MDer = U * dS/dU | ||||||
|     M.MDeriv(X, MDer);  // MDer = U * dS/dU |     der = -1.0* Implementation::projectForce(MDer);  // Ta if gauge fields | ||||||
|     der = Implementation::projectForce(MDer);  // Ta if gauge fields | //    std::cout << GridLogIntegrator << " DerivativeU: norm(in)= " << std::sqrt(norm2(in)) << std::endl; | ||||||
|  | //    std::cout << GridLogIntegrator << " DerivativeU: norm(der)= " << std::sqrt(norm2(der)) << std::endl; | ||||||
|      |      | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void AuxiliaryFieldsDerivative(MomentaField& der){ |   void AuxiliaryFieldsDerivative(MomentaField& der){ | ||||||
|     der = Zero(); |     der = Zero(); | ||||||
|     if (1){ | //    if(!M.Trivial())  | ||||||
|  |     { | ||||||
|       // Auxiliary fields |       // Auxiliary fields | ||||||
|       MomentaField der_temp(der.Grid()); |       MomentaField der_temp(der.Grid()); | ||||||
|       MomentaField X(der.Grid()); |       MomentaField X(der.Grid()); | ||||||
| @@ -173,6 +210,7 @@ public: | |||||||
|       //M.M(AuxMom, X); // X = M Aux |       //M.M(AuxMom, X); // X = M Aux | ||||||
|       // Two derivative terms |       // Two derivative terms | ||||||
|       // the Mderiv need separation of left and right terms |       // the Mderiv need separation of left and right terms | ||||||
|  |     std::cout << GridLogIntegrator << " AuxiliaryFieldsDerivative:norm(AuxMom)= " << std::sqrt(norm2(AuxMom)) << std::endl; | ||||||
|       M.MDeriv(AuxMom, der);  |       M.MDeriv(AuxMom, der);  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -180,6 +218,7 @@ public: | |||||||
|       //M.MDeriv(X, AuxMom, der_temp); der += der_temp; |       //M.MDeriv(X, AuxMom, der_temp); der += der_temp; | ||||||
|  |  | ||||||
|       der = -1.0*Implementation::projectForce(der); |       der = -1.0*Implementation::projectForce(der); | ||||||
|  |       std::cout << GridLogIntegrator << " AuxiliaryFieldsDerivative:norm(der)= " << std::sqrt(norm2(der)) << std::endl; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -189,22 +228,28 @@ public: | |||||||
|     // is the projection necessary here? |     // is the projection necessary here? | ||||||
|     // no for fields in the algebra |     // no for fields in the algebra | ||||||
|     der = Implementation::projectForce(der);  |     der = Implementation::projectForce(der);  | ||||||
|  |     std::cout << GridLogIntegrator << " DerivativeP:norm(der)= " << std::sqrt(norm2(der)) << std::endl; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void update_auxiliary_momenta(RealD ep){ |   void update_auxiliary_momenta(RealD ep){ | ||||||
|     if(1){ |       std::cout << GridLogIntegrator << "AuxMom update_auxiliary_fields: " << std::sqrt(norm2(AuxMom)) << std::endl; | ||||||
|       AuxMom -= ep * AuxField; |       std::cout << GridLogIntegrator << "AuxField update_auxiliary_fields: " << std::sqrt(norm2(AuxField)) << std::endl; | ||||||
|  |     { | ||||||
|  |       AuxMom -= ep * AuxField * HMC_MOMENTUM_DENOMINATOR; | ||||||
|  |       std::cout << GridLogIntegrator << "AuxMom update_auxiliary_fields: " << std::sqrt(norm2(AuxMom)) << std::endl; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void update_auxiliary_fields(RealD ep){ |   void update_auxiliary_fields(RealD ep){ | ||||||
|     if (1) { | //    if(!M.Trivial())  | ||||||
|  |     { | ||||||
|       MomentaField tmp(AuxMom.Grid()); |       MomentaField tmp(AuxMom.Grid()); | ||||||
|       MomentaField tmp2(AuxMom.Grid()); |       MomentaField tmp2(AuxMom.Grid()); | ||||||
|       M.M(AuxMom, tmp); |       M.M(AuxMom, tmp); | ||||||
|       // M.M(tmp, tmp2); |       // M.M(tmp, tmp2); | ||||||
|       AuxField += ep * tmp;  // M^2 AuxMom |       AuxField += ep * tmp;  // M^2 AuxMom | ||||||
|       // factor of 2? |       // factor of 2? | ||||||
|  |       std::cout << GridLogIntegrator << "AuxField update_auxiliary_fields: " << std::sqrt(norm2(AuxField)) << std::endl; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -464,7 +464,8 @@ public: | |||||||
|   //U_padded: the gauge link fields padded out using the PaddedCell class |   //U_padded: the gauge link fields padded out using the PaddedCell class | ||||||
|   //Cell: the padded cell class |   //Cell: the padded cell class | ||||||
|   //gStencil: the precomputed generalized local stencil for the staple |   //gStencil: the precomputed generalized local stencil for the staple | ||||||
|   static void StaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell, const GeneralLocalStencil &gStencil) { |   static void StaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell, const GeneralLocalStencil &gStencil) | ||||||
|  |   { | ||||||
|     double t0 = usecond(); |     double t0 = usecond(); | ||||||
|     assert(U_padded.size() == Nd); assert(staple.size() == Nd); |     assert(U_padded.size() == Nd); assert(staple.size() == Nd); | ||||||
|     assert(U_padded[0].Grid() == (GridBase*)Cell.grids.back()); |     assert(U_padded[0].Grid() == (GridBase*)Cell.grids.back()); | ||||||
| @@ -487,9 +488,9 @@ public: | |||||||
|     for(int mu=0;mu<Nd;mu++){ |     for(int mu=0;mu<Nd;mu++){ | ||||||
|       { //view scope |       { //view scope | ||||||
| 	autoView( gStaple_v , gStaple, AcceleratorWrite); | 	autoView( gStaple_v , gStaple, AcceleratorWrite); | ||||||
| 	auto gStencil_v = gStencil.View(AcceleratorRead); | 	auto gStencil_v = gStencil.View(); | ||||||
| 	 | 	 | ||||||
| 	accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), { | 	accelerator_for(ss, ggrid->oSites(), (size_t)ggrid->Nsimd(), { | ||||||
| 	    decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss; | 	    decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss; | ||||||
| 	    stencil_ss = Zero(); | 	    stencil_ss = Zero(); | ||||||
| 	    int off = outer_off; | 	    int off = outer_off; | ||||||
| @@ -1199,9 +1200,9 @@ public: | |||||||
|  |  | ||||||
|       { //view scope |       { //view scope | ||||||
| 	autoView( gStaple_v , gStaple, AcceleratorWrite); | 	autoView( gStaple_v , gStaple, AcceleratorWrite); | ||||||
| 	auto gStencil_v = gStencil.View(AcceleratorRead); | 	auto gStencil_v = gStencil.View(); | ||||||
|  |  | ||||||
| 	accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), { | 	accelerator_for(ss, ggrid->oSites(), (size_t)ggrid->Nsimd(), { | ||||||
| 	    decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss; | 	    decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss; | ||||||
| 	    stencil_ss = Zero(); | 	    stencil_ss = Zero(); | ||||||
| 	    int s=offset; | 	    int s=offset; | ||||||
|   | |||||||
| @@ -1130,14 +1130,6 @@ static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths inc | |||||||
| #endif | #endif | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| // Fixme need coalesced read gpermute |  | ||||||
| template<class vobj> void gpermute(vobj & inout,int perm){ |  | ||||||
|   vobj tmp=inout; |  | ||||||
|   if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;} |  | ||||||
|   if (perm & 0x2 ) { permute(inout,tmp,1); tmp=inout;} |  | ||||||
|   if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;} |  | ||||||
|   if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -43,10 +43,10 @@ class GeneralLocalStencilView { | |||||||
|   int                               _npoints; // Move to template param? |   int                               _npoints; // Move to template param? | ||||||
|   GeneralStencilEntry*  _entries_p; |   GeneralStencilEntry*  _entries_p; | ||||||
|  |  | ||||||
|   accelerator_inline GeneralStencilEntry * GetEntry(int point,int osite) {  |   accelerator_inline GeneralStencilEntry * GetEntry(int point,int osite) const {  | ||||||
|     return & this->_entries_p[point+this->_npoints*osite];  |     return & this->_entries_p[point+this->_npoints*osite];  | ||||||
|   } |   } | ||||||
|   void ViewClose(void){}; |  | ||||||
| }; | }; | ||||||
| //////////////////////////////////////// | //////////////////////////////////////// | ||||||
| // The Stencil Class itself | // The Stencil Class itself | ||||||
| @@ -61,7 +61,7 @@ protected: | |||||||
| public:  | public:  | ||||||
|   GridBase *Grid(void) const { return _grid; } |   GridBase *Grid(void) const { return _grid; } | ||||||
|  |  | ||||||
|   View_type View(int mode) const { |   View_type View(void) const { | ||||||
|     View_type accessor(*( (View_type *) this)); |     View_type accessor(*( (View_type *) this)); | ||||||
|     return accessor; |     return accessor; | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -137,18 +137,6 @@ inline void cuda_mem(void) | |||||||
|     dim3 cu_blocks ((num1+nt-1)/nt,num2,1);				\ |     dim3 cu_blocks ((num1+nt-1)/nt,num2,1);				\ | ||||||
|     LambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda);	\ |     LambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda);	\ | ||||||
|   } |   } | ||||||
| #define prof_accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... )	\ |  | ||||||
|   {									\ |  | ||||||
|     int nt=acceleratorThreads();					\ |  | ||||||
|     typedef uint64_t Iterator;						\ |  | ||||||
|     auto lambda = [=] accelerator					\ |  | ||||||
|       (Iterator iter1,Iterator iter2,Iterator lane) mutable {		\ |  | ||||||
|       __VA_ARGS__;							\ |  | ||||||
|     };									\ |  | ||||||
|     dim3 cu_threads(nsimd,acceleratorThreads(),1);			\ |  | ||||||
|     dim3 cu_blocks ((num1+nt-1)/nt,num2,1);				\ |  | ||||||
|     ProfileLambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda); \ |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| #define accelerator_for6dNB(iter1, num1,				\ | #define accelerator_for6dNB(iter1, num1,				\ | ||||||
|                             iter2, num2,				\ |                             iter2, num2,				\ | ||||||
| @@ -169,20 +157,6 @@ inline void cuda_mem(void) | |||||||
|     Lambda6Apply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,num3,num4,num5,num6,lambda); \ |     Lambda6Apply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,num3,num4,num5,num6,lambda); \ | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
| #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... )	\ |  | ||||||
|   {									\ |  | ||||||
|     int nt=acceleratorThreads();					\ |  | ||||||
|     typedef uint64_t Iterator;						\ |  | ||||||
|     auto lambda = [=] accelerator					\ |  | ||||||
|       (Iterator iter1,Iterator iter2,Iterator lane) mutable {		\ |  | ||||||
|       __VA_ARGS__;							\ |  | ||||||
|     };									\ |  | ||||||
|     dim3 cu_threads(nsimd,acceleratorThreads(),1);			\ |  | ||||||
|     dim3 cu_blocks ((num1+nt-1)/nt,num2,1);				\ |  | ||||||
|     LambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda);	\ |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| template<typename lambda>  __global__ | template<typename lambda>  __global__ | ||||||
| void LambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) | void LambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) | ||||||
| { | { | ||||||
| @@ -194,17 +168,6 @@ void LambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) | |||||||
|     Lambda(x,y,z); |     Lambda(x,y,z); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<typename lambda>  __global__ |  | ||||||
| void ProfileLambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) |  | ||||||
| { |  | ||||||
|   // Weird permute is to make lane coalesce for large blocks |  | ||||||
|   uint64_t x = threadIdx.y + blockDim.y*blockIdx.x; |  | ||||||
|   uint64_t y = threadIdx.z + blockDim.z*blockIdx.y; |  | ||||||
|   uint64_t z = threadIdx.x; |  | ||||||
|   if ( (x < num1) && (y<num2) && (z<num3) ) { |  | ||||||
|     Lambda(x,y,z); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<typename lambda>  __global__ | template<typename lambda>  __global__ | ||||||
| void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3, | void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3, | ||||||
| @@ -245,7 +208,6 @@ inline void *acceleratorAllocShared(size_t bytes) | |||||||
|   if( err != cudaSuccess ) { |   if( err != cudaSuccess ) { | ||||||
|     ptr = (void *) NULL; |     ptr = (void *) NULL; | ||||||
|     printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err)); |     printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err)); | ||||||
|     assert(0); |  | ||||||
|   } |   } | ||||||
|   return ptr; |   return ptr; | ||||||
| }; | }; | ||||||
| @@ -498,9 +460,6 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); | |||||||
| #if defined(GRID_SYCL) || defined(GRID_CUDA) || defined(GRID_HIP) | #if defined(GRID_SYCL) || defined(GRID_CUDA) || defined(GRID_HIP) | ||||||
| // FIXME -- the non-blocking nature got broken March 30 2023 by PAB | // FIXME -- the non-blocking nature got broken March 30 2023 by PAB | ||||||
| #define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} );   | #define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} );   | ||||||
| #define prof_accelerator_for( iter1, num1, nsimd, ... ) \ |  | ||||||
|   prof_accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} );\ |  | ||||||
|   accelerator_barrier(dummy); |  | ||||||
|  |  | ||||||
| #define accelerator_for( iter, num, nsimd, ... )		\ | #define accelerator_for( iter, num, nsimd, ... )		\ | ||||||
|   accelerator_forNB(iter, num, nsimd, { __VA_ARGS__ } );	\ |   accelerator_forNB(iter, num, nsimd, { __VA_ARGS__ } );	\ | ||||||
|   | |||||||
| @@ -94,13 +94,6 @@ static constexpr int MaxDims = GRID_MAX_LATTICE_DIMENSION; | |||||||
|  |  | ||||||
| typedef AcceleratorVector<int,MaxDims> Coordinate; | typedef AcceleratorVector<int,MaxDims> Coordinate; | ||||||
|  |  | ||||||
| template<class T,int _ndim> |  | ||||||
| inline bool operator==(const AcceleratorVector<T,_ndim> &v,const AcceleratorVector<T,_ndim> &w) |  | ||||||
| { |  | ||||||
|   if (v.size()!=w.size()) return false; |  | ||||||
|   for(int i=0;i<v.size();i++) if ( v[i]!=w[i] ) return false; |  | ||||||
|   return true; |  | ||||||
| } |  | ||||||
| template<class T,int _ndim> | template<class T,int _ndim> | ||||||
| inline std::ostream & operator<<(std::ostream &os, const AcceleratorVector<T,_ndim> &v) | inline std::ostream & operator<<(std::ostream &os, const AcceleratorVector<T,_ndim> &v) | ||||||
| { | { | ||||||
|   | |||||||
| @@ -8,7 +8,7 @@ namespace Grid{ | |||||||
|   public: |   public: | ||||||
|  |  | ||||||
|     template<class coor_t> |     template<class coor_t> | ||||||
|     static accelerator_inline void CoorFromIndex (coor_t& coor,int64_t index,const coor_t &dims){ |     static accelerator_inline void CoorFromIndex (coor_t& coor,int index,const coor_t &dims){ | ||||||
|       int nd= dims.size(); |       int nd= dims.size(); | ||||||
|       coor.resize(nd); |       coor.resize(nd); | ||||||
|       for(int d=0;d<nd;d++){ |       for(int d=0;d<nd;d++){ | ||||||
| @@ -18,45 +18,28 @@ namespace Grid{ | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     template<class coor_t> |     template<class coor_t> | ||||||
|     static accelerator_inline void IndexFromCoor (const coor_t& coor,int64_t &index,const coor_t &dims){ |     static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){ | ||||||
|       int nd=dims.size(); |       int nd=dims.size(); | ||||||
|       int stride=1; |       int stride=1; | ||||||
|       index=0; |       index=0; | ||||||
|       for(int d=0;d<nd;d++){ |       for(int d=0;d<nd;d++){ | ||||||
| 	index = index+(int64_t)stride*coor[d]; | 	index = index+stride*coor[d]; | ||||||
| 	stride=stride*dims[d]; | 	stride=stride*dims[d]; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     template<class coor_t> |  | ||||||
|     static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){ |  | ||||||
|       int64_t index64; |  | ||||||
|       IndexFromCoor(coor,index64,dims); |  | ||||||
|       assert(index64<2*1024*1024*1024LL); |  | ||||||
|       index = (int) index64; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     template<class coor_t> |     template<class coor_t> | ||||||
|     static inline void IndexFromCoorReversed (const coor_t& coor,int64_t &index,const coor_t &dims){ |     static inline void IndexFromCoorReversed (const coor_t& coor,int &index,const coor_t &dims){ | ||||||
|       int nd=dims.size(); |       int nd=dims.size(); | ||||||
|       int stride=1; |       int stride=1; | ||||||
|       index=0; |       index=0; | ||||||
|       for(int d=nd-1;d>=0;d--){ |       for(int d=nd-1;d>=0;d--){ | ||||||
| 	index = index+(int64_t)stride*coor[d]; | 	index = index+stride*coor[d]; | ||||||
| 	stride=stride*dims[d]; | 	stride=stride*dims[d]; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     template<class coor_t> |     template<class coor_t> | ||||||
|     static inline void IndexFromCoorReversed (const coor_t& coor,int &index,const coor_t &dims){ |     static inline void CoorFromIndexReversed (coor_t& coor,int index,const coor_t &dims){ | ||||||
|       int64_t index64; |  | ||||||
|       IndexFromCoorReversed(coor,index64,dims); |  | ||||||
|       if ( index64>=2*1024*1024*1024LL ){ |  | ||||||
| 	std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "<<dims<<std::endl; |  | ||||||
|       } |  | ||||||
|       assert(index64<2*1024*1024*1024LL); |  | ||||||
|       index = (int) index64; |  | ||||||
|     } |  | ||||||
|     template<class coor_t> |  | ||||||
|     static inline void CoorFromIndexReversed (coor_t& coor,int64_t index,const coor_t &dims){ |  | ||||||
|       int nd= dims.size(); |       int nd= dims.size(); | ||||||
|       coor.resize(nd); |       coor.resize(nd); | ||||||
|       for(int d=nd-1;d>=0;d--){ |       for(int d=nd-1;d>=0;d--){ | ||||||
|   | |||||||
							
								
								
									
										637
									
								
								HMC/Mobius2p1p1fEOFA_4Gev.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										637
									
								
								HMC/Mobius2p1p1fEOFA_4Gev.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,637 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file:  | ||||||
|  |  | ||||||
|  | Copyright (C) 2015-2016 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||||
|  | Author: Guido Cossu | ||||||
|  | Author: David Murphy | ||||||
|  | Author: Chulwoo Jung <chulwoo@bnl.gov> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #include <Grid/Grid.h> | ||||||
|  |  | ||||||
|  | #ifdef GRID_DEFAULT_PRECISION_DOUBLE | ||||||
|  | #define MIXED_PRECISION | ||||||
|  | #endif | ||||||
|  | // second level EOFA | ||||||
|  | #undef EOFA_H | ||||||
|  | #undef USE_OBC | ||||||
|  | #define DO_IMPLICIT | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  |   /* | ||||||
|  |    * Need a plan for gauge field update for mixed precision in HMC                      (2x speed up) | ||||||
|  |    *    -- Store the single prec action operator. | ||||||
|  |    *    -- Clone the gauge field from the operator function argument. | ||||||
|  |    *    -- Build the mixed precision operator dynamically from the passed operator and single prec clone. | ||||||
|  |    */ | ||||||
|  |  | ||||||
|  |   template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF>  | ||||||
|  |   class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> { | ||||||
|  |   public: | ||||||
|  |     typedef typename FermionOperatorD::FermionField FieldD; | ||||||
|  |     typedef typename FermionOperatorF::FermionField FieldF; | ||||||
|  |  | ||||||
|  |     using OperatorFunction<FieldD>::operator(); | ||||||
|  |  | ||||||
|  |     RealD   Tolerance; | ||||||
|  |     RealD   InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed | ||||||
|  |     Integer MaxInnerIterations; | ||||||
|  |     Integer MaxOuterIterations; | ||||||
|  |     GridBase* SinglePrecGrid4; //Grid for single-precision fields | ||||||
|  |     GridBase* SinglePrecGrid5; //Grid for single-precision fields | ||||||
|  |     RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance | ||||||
|  |  | ||||||
|  |     FermionOperatorF &FermOpF; | ||||||
|  |     FermionOperatorD &FermOpD;; | ||||||
|  |     SchurOperatorF &LinOpF; | ||||||
|  |     SchurOperatorD &LinOpD; | ||||||
|  |  | ||||||
|  |     Integer TotalInnerIterations; //Number of inner CG iterations | ||||||
|  |     Integer TotalOuterIterations; //Number of restarts | ||||||
|  |     Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step | ||||||
|  |  | ||||||
|  |     MixedPrecisionConjugateGradientOperatorFunction(RealD tol,  | ||||||
|  | 						    Integer maxinnerit,  | ||||||
|  | 						    Integer maxouterit,  | ||||||
|  | 						    GridBase* _sp_grid4,  | ||||||
|  | 						    GridBase* _sp_grid5,  | ||||||
|  | 						    FermionOperatorF &_FermOpF, | ||||||
|  | 						    FermionOperatorD &_FermOpD, | ||||||
|  | 						    SchurOperatorF   &_LinOpF, | ||||||
|  | 						    SchurOperatorD   &_LinOpD):  | ||||||
|  |       LinOpF(_LinOpF), | ||||||
|  |       LinOpD(_LinOpD), | ||||||
|  |       FermOpF(_FermOpF), | ||||||
|  |       FermOpD(_FermOpD), | ||||||
|  |       Tolerance(tol),  | ||||||
|  |       InnerTolerance(tol),  | ||||||
|  |       MaxInnerIterations(maxinnerit),  | ||||||
|  |       MaxOuterIterations(maxouterit),  | ||||||
|  |       SinglePrecGrid4(_sp_grid4), | ||||||
|  |       SinglePrecGrid5(_sp_grid5), | ||||||
|  |       OuterLoopNormMult(100.)  | ||||||
|  |     {  | ||||||
|  |       /* Debugging instances of objects; references are stored | ||||||
|  |       std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl; | ||||||
|  |       */ | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) { | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl; | ||||||
|  |  | ||||||
|  |       SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU); | ||||||
|  |        | ||||||
|  |       //      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl; | ||||||
|  |       //      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpD " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl; | ||||||
|  |       // Assumption made in code to extract gauge field | ||||||
|  |       // We could avoid storing LinopD reference alltogether ? | ||||||
|  |       assert(&(SchurOpU->_Mat)==&(LinOpD._Mat)); | ||||||
|  |  | ||||||
|  |       //////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |       // Must snarf a single precision copy of the gauge field in Linop_d argument | ||||||
|  |       //////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |       typedef typename FermionOperatorF::GaugeField GaugeFieldF; | ||||||
|  |       typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF; | ||||||
|  |       typedef typename FermionOperatorD::GaugeField GaugeFieldD; | ||||||
|  |       typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD; | ||||||
|  |  | ||||||
|  |       GridBase * GridPtrF = SinglePrecGrid4; | ||||||
|  |       GridBase * GridPtrD = FermOpD.Umu.Grid(); | ||||||
|  |       GaugeFieldF     U_f  (GridPtrF); | ||||||
|  |       GaugeLinkFieldF Umu_f(GridPtrF); | ||||||
|  |       //      std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d | ||||||
|  |       //      std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d | ||||||
|  |  | ||||||
|  |       //////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |       // Moving this to a Clone method of fermion operator would allow to duplicate the  | ||||||
|  |       // physics parameters and decrease gauge field copies | ||||||
|  |       //////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |       GaugeLinkFieldD Umu_d(GridPtrD); | ||||||
|  |       for(int mu=0;mu<Nd*2;mu++){  | ||||||
|  | 	Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu); | ||||||
|  | 	precisionChange(Umu_f,Umu_d); | ||||||
|  | 	PokeIndex<LorentzIndex>(FermOpF.Umu, Umu_f, mu); | ||||||
|  |       } | ||||||
|  |       pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); | ||||||
|  |       pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); | ||||||
|  |  | ||||||
|  |       //////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |       // Make a mixed precision conjugate gradient | ||||||
|  |       //////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |       MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); | ||||||
|  |       std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl; | ||||||
|  |       MPCG(src,psi); | ||||||
|  |     } | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
|  |  | ||||||
|  | int main(int argc, char **argv) { | ||||||
|  |   using namespace Grid; | ||||||
|  |  | ||||||
|  |   Grid_init(&argc, &argv); | ||||||
|  |   int threads = GridThread::GetThreads(); | ||||||
|  |   // here make a routine to print all the relevant information on the run | ||||||
|  |   std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; | ||||||
|  |  | ||||||
|  |    // Typedefs to simplify notation | ||||||
|  |   typedef WilsonImplR FermionImplPolicy; | ||||||
|  |   typedef MobiusFermionD FermionAction; | ||||||
|  |   typedef MobiusFermionF FermionActionF; | ||||||
|  |   typedef MobiusEOFAFermionD FermionEOFAAction; | ||||||
|  |   typedef MobiusEOFAFermionF FermionEOFAActionF; | ||||||
|  |   typedef typename FermionAction::FermionField FermionField; | ||||||
|  |   typedef typename FermionActionF::FermionField FermionFieldF; | ||||||
|  |  | ||||||
|  |   typedef Grid::XmlReader       Serialiser; | ||||||
|  |    | ||||||
|  |   //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: | ||||||
|  |  | ||||||
|  |   HMCparameters HMCparams; | ||||||
|  | #if 1 | ||||||
|  |   { | ||||||
|  |     XmlReader  HMCrd("HMCparameters.xml"); | ||||||
|  |     read(HMCrd,"HMCparameters",HMCparams); | ||||||
|  |   } | ||||||
|  | #else | ||||||
|  |   { | ||||||
|  | //    HMCparameters HMCparams; | ||||||
|  |   //  "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; | ||||||
|  |   //  HMCparams.StartingType     =std::string("ColdStart"); | ||||||
|  |     HMCparams.StartingType     =std::string("CheckpointStart"); | ||||||
|  |     HMCparams.StartTrajectory  =7; | ||||||
|  |     HMCparams.SW  =4; | ||||||
|  |     HMCparams.Trajectories     =1000; | ||||||
|  |     HMCparams.NoMetropolisUntil=0; | ||||||
|  |     HMCparams.MD.name          =std::string("Force Gradient"); | ||||||
|  |     HMCparams.MD.MDsteps       = 10; | ||||||
|  |     HMCparams.MD.trajL         = 1.0; | ||||||
|  |   } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | #ifdef DO_IMPLICIT | ||||||
|  | //    typedef GenericHMCRunner<ImplicitLeapFrog> HMCWrapper;  | ||||||
|  |   typedef GenericHMCRunner<ImplicitMinimumNorm2> HMCWrapper;  | ||||||
|  |   HMCparams.MD.name          =std::string("ImplicitMinimumNorm2"); | ||||||
|  | #else | ||||||
|  | //  typedef GenericHMCRunner<LeapFrog> HMCWrapper;  | ||||||
|  |   typedef GenericHMCRunner<ForceGradient> HMCWrapper;  | ||||||
|  | //  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;  | ||||||
|  |   HMCparams.MD.name          =std::string("ForceGradient"); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   std::cout << GridLogMessage<< HMCparams <<std::endl; | ||||||
|  |   HMCWrapper TheHMC(HMCparams); | ||||||
|  |   TheHMC.ReadCommandLine(argc, argv); | ||||||
|  |   {  | ||||||
|  |     XmlWriter HMCwr("HMCparameters.xml.out"); | ||||||
|  |     write(HMCwr,"HMCparameters",TheHMC.Parameters); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   // Grid from the command line arguments --grid and --mpi | ||||||
|  |   TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition | ||||||
|  |    | ||||||
|  |   CheckpointerParameters CPparams; | ||||||
|  |   CPparams.config_prefix = "ckpoint_lat"; | ||||||
|  |   CPparams.rng_prefix    = "ckpoint_rng"; | ||||||
|  |   CPparams.saveInterval  = 1; | ||||||
|  |   CPparams.format        = "IEEE64BIG"; | ||||||
|  |   TheHMC.Resources.LoadNerscCheckpointer(CPparams); | ||||||
|  |  | ||||||
|  |   RNGModuleParameters RNGpar; | ||||||
|  |   RNGpar.serial_seeds = "1 2 3 4 5"; | ||||||
|  |   RNGpar.parallel_seeds = "6 7 8 9 10"; | ||||||
|  |   TheHMC.Resources.SetRNGSeeds(RNGpar); | ||||||
|  |  | ||||||
|  |   // Construct observables | ||||||
|  |   // here there is too much indirection  | ||||||
|  |   typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs; | ||||||
|  |   TheHMC.Resources.AddObservable<PlaqObs>(); | ||||||
|  |   ////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  |   const int Ls      = 12; | ||||||
|  |   Real beta         = 5.983; | ||||||
|  |   std::cout << GridLogMessage << " beta  "<< beta << std::endl; | ||||||
|  |   Real light_mass   = 0.00049; | ||||||
|  |   Real strange_mass = 0.0158; | ||||||
|  |   Real charm_mass = 0.191; | ||||||
|  |   Real pv_mass    = 1.0; | ||||||
|  |   RealD M5  = 1.4; | ||||||
|  |   RealD b   = 2.0;  | ||||||
|  |   RealD c   = 1.0; | ||||||
|  |  | ||||||
|  |   // Copied from paper | ||||||
|  | //  std::vector<Real> hasenbusch({ 0.045 }); // Paper values from F1 incorrect run | ||||||
|  |   std::vector<Real> hasenbusch({ 0.0038, 0.0145, 0.045, 0.108 , 0.25, 0.51 }); // Paper values from F1 incorrect run | ||||||
|  |   std::vector<Real> hasenbusch2({ 0.4 }); // Paper values from F1 incorrect run | ||||||
|  |  | ||||||
|  | //  RealD eofa_mass=0.05 ; | ||||||
|  |  | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   //Bad choices with large dH. Equalising force L2 norm was not wise. | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   //std::vector<Real> hasenbusch({ 0.03, 0.2, 0.3, 0.5, 0.8 });  | ||||||
|  |  | ||||||
|  |   auto GridPtr   = TheHMC.Resources.GetCartesian(); | ||||||
|  |   auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); | ||||||
|  |   auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); | ||||||
|  |   auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); | ||||||
|  |  | ||||||
|  |   Coordinate latt  = GridDefaultLatt(); | ||||||
|  |   Coordinate mpi   = GridDefaultMpi(); | ||||||
|  |   Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd()); | ||||||
|  |   Coordinate simdD = GridDefaultSimd(Nd,vComplexD::Nsimd()); | ||||||
|  | //  auto GridPtrF   = SpaceTimeGrid::makeFourDimGrid(latt,simdF,mpi); | ||||||
|  |   auto UGrid_f    = SpaceTimeGrid::makeFourDimGrid(latt,simdF,mpi); | ||||||
|  |   auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f); | ||||||
|  |   auto FGridF     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f); | ||||||
|  |   auto FrbGridF   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f); | ||||||
|  |  | ||||||
|  |  | ||||||
|  | #ifndef USE_OBC | ||||||
|  | //  IwasakiGaugeActionR GaugeAction(beta); | ||||||
|  |   WilsonGaugeActionR GaugeAction(beta); | ||||||
|  | #else | ||||||
|  |   std::vector<Complex> boundaryG = {1,1,1,0}; | ||||||
|  |   WilsonGaugeActionR::ImplParams ParamsG(boundaryG); | ||||||
|  |   WilsonGaugeActionR GaugeAction(beta,ParamsG); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   // temporarily need a gauge field | ||||||
|  |   LatticeGaugeField U(GridPtr); | ||||||
|  |   LatticeGaugeFieldF UF(UGrid_f); | ||||||
|  |  | ||||||
|  |   // These lines are unecessary if BC are all periodic | ||||||
|  | #ifndef USE_OBC | ||||||
|  |   std::vector<Complex> boundary = {1,1,1,-1}; | ||||||
|  | #else | ||||||
|  |   std::vector<Complex> boundary = {1,1,1,0}; | ||||||
|  | #endif | ||||||
|  |   FermionAction::ImplParams Params(boundary); | ||||||
|  |   FermionActionF::ImplParams ParamsF(boundary); | ||||||
|  |    | ||||||
|  |   double ActionStoppingCondition     = 1e-8; | ||||||
|  |   double DerivativeStoppingCondition = 1e-8; | ||||||
|  |   double MaxCGIterations =  100000; | ||||||
|  |  | ||||||
|  |   //////////////////////////////////// | ||||||
|  |   // Collect actions | ||||||
|  |   //////////////////////////////////// | ||||||
|  |   ActionLevel<HMCWrapper::Field> Level1(1); | ||||||
|  |   ActionLevel<HMCWrapper::Field> Level2(HMCparams.SW); | ||||||
|  |  | ||||||
|  |   //////////////////////////////////// | ||||||
|  |   // Strange action | ||||||
|  |   //////////////////////////////////// | ||||||
|  |   typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF; | ||||||
|  |   typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD; | ||||||
|  |   typedef SchurDiagMooeeOperator<FermionEOFAActionF,FermionFieldF> LinearOperatorEOFAF; | ||||||
|  |   typedef SchurDiagMooeeOperator<FermionEOFAAction ,FermionField > LinearOperatorEOFAD; | ||||||
|  |  | ||||||
|  |   typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG; | ||||||
|  |   typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusEOFAFermionD,MobiusEOFAFermionF,LinearOperatorEOFAD,LinearOperatorEOFAF> MxPCG_EOFA; | ||||||
|  |  | ||||||
|  |   // DJM: setup for EOFA ratio (Mobius) | ||||||
|  |   OneFlavourRationalParams OFRp; | ||||||
|  |   OFRp.lo       = 0.99; // How do I know this on F1? | ||||||
|  |   OFRp.hi       = 20; | ||||||
|  |   OFRp.MaxIter  = 100000; | ||||||
|  |   OFRp.tolerance= 1.0e-12; | ||||||
|  |   OFRp.degree   = 12; | ||||||
|  |   OFRp.precision= 50; | ||||||
|  |  | ||||||
|  |    | ||||||
|  |   MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, charm_mass, 0.0, -1, M5, b, c); | ||||||
|  |   MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *UGrid_f, *GridRBPtrF, strange_mass, strange_mass, charm_mass, 0.0, -1, M5, b, c); | ||||||
|  |   MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , charm_mass, strange_mass,      charm_mass, -1.0, 1, M5, b, c); | ||||||
|  |   MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *UGrid_f, *GridRBPtrF, charm_mass, strange_mass,      charm_mass, -1.0, 1, M5, b, c); | ||||||
|  |    | ||||||
|  | #ifdef EOFA_H | ||||||
|  |   MobiusEOFAFermionD Strange2_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , eofa_mass, eofa_mass, charm_mass , 0.0, -1, M5, b, c); | ||||||
|  |   MobiusEOFAFermionF Strange2_Op_LF(UF, *FGridF, *FrbGridF, *UGrid_f, *GridRBPtrF, eofa_mass, eofa_mass, charm_mass , 0.0, -1, M5, b, c); | ||||||
|  |   MobiusEOFAFermionD Strange2_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , charm_mass , eofa_mass,      charm_mass , -1.0, 1, M5, b, c); | ||||||
|  |   MobiusEOFAFermionF Strange2_Op_RF(UF, *FGridF, *FrbGridF, *UGrid_f, *GridRBPtrF, charm_mass , eofa_mass,      charm_mass , -1.0, 1, M5, b, c); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   ConjugateGradient<FermionField>      ActionCG(ActionStoppingCondition,MaxCGIterations); | ||||||
|  |   ConjugateGradient<FermionField>  DerivativeCG(DerivativeStoppingCondition,MaxCGIterations); | ||||||
|  | #ifdef MIXED_PRECISION | ||||||
|  |   const int MX_inner = 50000; | ||||||
|  |  | ||||||
|  |   // Mixed precision EOFA | ||||||
|  |   LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L); | ||||||
|  |   LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R); | ||||||
|  |   LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF); | ||||||
|  |   LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF); | ||||||
|  |  | ||||||
|  | #ifdef EOFA_H | ||||||
|  |   // Mixed precision EOFA | ||||||
|  |   LinearOperatorEOFAD Strange2_LinOp_L (Strange2_Op_L); | ||||||
|  |   LinearOperatorEOFAD Strange2_LinOp_R (Strange2_Op_R); | ||||||
|  |   LinearOperatorEOFAF Strange2_LinOp_LF(Strange2_Op_LF); | ||||||
|  |   LinearOperatorEOFAF Strange2_LinOp_RF(Strange2_Op_RF); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   MxPCG_EOFA ActionCGL(ActionStoppingCondition, | ||||||
|  | 		       MX_inner, | ||||||
|  | 		       MaxCGIterations, | ||||||
|  | 		       UGrid_f, | ||||||
|  | 		       FrbGridF, | ||||||
|  | 		       Strange_Op_LF,Strange_Op_L, | ||||||
|  | 		       Strange_LinOp_LF,Strange_LinOp_L); | ||||||
|  |  | ||||||
|  | #ifdef EOFA_H | ||||||
|  |   MxPCG_EOFA ActionCGL2(ActionStoppingCondition, | ||||||
|  | 		       MX_inner, | ||||||
|  | 		       MaxCGIterations, | ||||||
|  | 		       UGrid_f, | ||||||
|  | 		       FrbGridF, | ||||||
|  | 		       Strange2_Op_LF,Strange2_Op_L, | ||||||
|  | 		       Strange2_LinOp_LF,Strange2_LinOp_L); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   MxPCG_EOFA DerivativeCGL(DerivativeStoppingCondition, | ||||||
|  | 			   MX_inner, | ||||||
|  | 			   MaxCGIterations, | ||||||
|  | 			   UGrid_f, | ||||||
|  | 			   FrbGridF, | ||||||
|  | 			   Strange_Op_LF,Strange_Op_L, | ||||||
|  | 			   Strange_LinOp_LF,Strange_LinOp_L); | ||||||
|  |  | ||||||
|  | #ifdef EOFA_H | ||||||
|  |   MxPCG_EOFA DerivativeCGL2(DerivativeStoppingCondition, | ||||||
|  | 			   MX_inner, | ||||||
|  | 			   MaxCGIterations, | ||||||
|  | 			   UGrid_f, | ||||||
|  | 			   FrbGridF, | ||||||
|  | 			   Strange2_Op_LF,Strange2_Op_L, | ||||||
|  | 			   Strange2_LinOp_LF,Strange2_LinOp_L); | ||||||
|  | #endif | ||||||
|  |    | ||||||
|  |   MxPCG_EOFA ActionCGR(ActionStoppingCondition, | ||||||
|  | 		       MX_inner, | ||||||
|  | 		       MaxCGIterations, | ||||||
|  | 		       UGrid_f, | ||||||
|  | 		       FrbGridF, | ||||||
|  | 		       Strange_Op_RF,Strange_Op_R, | ||||||
|  | 		       Strange_LinOp_RF,Strange_LinOp_R); | ||||||
|  |    | ||||||
|  | #ifdef EOFA_H | ||||||
|  |   MxPCG_EOFA ActionCGR2(ActionStoppingCondition, | ||||||
|  | 		       MX_inner, | ||||||
|  | 		       MaxCGIterations, | ||||||
|  | 		       UGrid_f, | ||||||
|  | 		       FrbGridF, | ||||||
|  | 		       Strange2_Op_RF,Strange2_Op_R, | ||||||
|  | 		       Strange2_LinOp_RF,Strange2_LinOp_R); | ||||||
|  | #endif | ||||||
|  |    | ||||||
|  |   MxPCG_EOFA DerivativeCGR(DerivativeStoppingCondition, | ||||||
|  | 			   MX_inner, | ||||||
|  | 			   MaxCGIterations, | ||||||
|  | 			   UGrid_f, | ||||||
|  | 			   FrbGridF, | ||||||
|  | 			   Strange_Op_RF,Strange_Op_R, | ||||||
|  | 			   Strange_LinOp_RF,Strange_LinOp_R); | ||||||
|  |    | ||||||
|  | #ifdef EOFA_H | ||||||
|  |   MxPCG_EOFA DerivativeCGR2(DerivativeStoppingCondition, | ||||||
|  | 			   MX_inner, | ||||||
|  | 			   MaxCGIterations, | ||||||
|  | 			   UGrid_f, | ||||||
|  | 			   FrbGridF, | ||||||
|  | 			   Strange2_Op_RF,Strange2_Op_R, | ||||||
|  | 			   Strange2_LinOp_RF,Strange2_LinOp_R); | ||||||
|  | #endif | ||||||
|  |    | ||||||
|  |   ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>  | ||||||
|  |     EOFA(Strange_Op_L, Strange_Op_R,  | ||||||
|  | 	 ActionCG,  | ||||||
|  | 	 ActionCGL, ActionCGR, | ||||||
|  | 	 DerivativeCGL, DerivativeCGR, | ||||||
|  | 	 OFRp, true); | ||||||
|  |    | ||||||
|  | #ifdef EOFA_H | ||||||
|  |   ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>  | ||||||
|  |     EOFA2(Strange2_Op_L, Strange2_Op_R,  | ||||||
|  | 	 ActionCG,  | ||||||
|  | 	 ActionCGL2, ActionCGR2, | ||||||
|  | 	 DerivativeCGL2, DerivativeCGR2, | ||||||
|  | 	 OFRp, true); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   Level1.push_back(&EOFA); | ||||||
|  | #ifdef EOFA_H | ||||||
|  |   Level1.push_back(&EOFA2); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | #else | ||||||
|  |   ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>  | ||||||
|  |     EOFA(Strange_Op_L, Strange_Op_R,  | ||||||
|  | 	 ActionCG,  | ||||||
|  | 	 ActionCG, ActionCG, | ||||||
|  | 	 ActionCG, ActionCG, | ||||||
|  | 	 //         DerivativeCG, DerivativeCG, | ||||||
|  | 	 OFRp, true); | ||||||
|  |   Level1.push_back(&EOFA); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   //////////////////////////////////// | ||||||
|  |   // up down action | ||||||
|  |   //////////////////////////////////// | ||||||
|  |   std::vector<Real> light_den; | ||||||
|  |   std::vector<Real> light_num; | ||||||
|  |  | ||||||
|  |   int n_hasenbusch = hasenbusch.size(); | ||||||
|  |   light_den.push_back(light_mass); | ||||||
|  |   for(int h=0;h<n_hasenbusch;h++){ | ||||||
|  |     light_den.push_back(hasenbusch[h]); | ||||||
|  |     light_num.push_back(hasenbusch[h]); | ||||||
|  |   } | ||||||
|  |   light_num.push_back(pv_mass); | ||||||
|  |  | ||||||
|  |   int n_hasenbusch2 = hasenbusch2.size(); | ||||||
|  |   light_den.push_back(charm_mass); | ||||||
|  |   for(int h=0;h<n_hasenbusch2;h++){ | ||||||
|  |     light_den.push_back(hasenbusch2[h]); | ||||||
|  |     light_num.push_back(hasenbusch2[h]); | ||||||
|  |   } | ||||||
|  |   light_num.push_back(pv_mass); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   ////////////////////////////////////////////////////////////// | ||||||
|  |   // Forced to replicate the MxPCG and DenominatorsF etc.. because | ||||||
|  |   // there is no convenient way to "Clone" physics params from double op | ||||||
|  |   // into single op for any operator pair. | ||||||
|  |   // Same issue prevents using MxPCG in the Heatbath step | ||||||
|  |   ////////////////////////////////////////////////////////////// | ||||||
|  |   std::vector<FermionAction *> Numerators; | ||||||
|  |   std::vector<FermionAction *> Denominators; | ||||||
|  |   std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients; | ||||||
|  |   std::vector<MxPCG *> ActionMPCG; | ||||||
|  |   std::vector<MxPCG *> MPCG; | ||||||
|  |   std::vector<FermionActionF *> DenominatorsF; | ||||||
|  |   std::vector<LinearOperatorD *> LinOpD; | ||||||
|  |   std::vector<LinearOperatorF *> LinOpF;  | ||||||
|  |  | ||||||
|  |   for(int h=0;h<light_den.size();h++){ | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl; | ||||||
|  |  | ||||||
|  |     Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params)); | ||||||
|  |     Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params)); | ||||||
|  |  | ||||||
|  | #ifdef MIXED_PRECISION | ||||||
|  |     //////////////////////////////////////////////////////////////////////////// | ||||||
|  |     // Mixed precision CG for 2f force | ||||||
|  |     //////////////////////////////////////////////////////////////////////////// | ||||||
|  |     double DerivativeStoppingConditionLoose = 1e-8; | ||||||
|  |  | ||||||
|  |     DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*UGrid_f,*GridRBPtrF,light_den[h],M5,b,c, ParamsF)); | ||||||
|  |     LinOpD.push_back(new LinearOperatorD(*Denominators[h])); | ||||||
|  |     LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h])); | ||||||
|  |  | ||||||
|  |     double conv  = DerivativeStoppingCondition; | ||||||
|  |     if (h<3) conv= DerivativeStoppingConditionLoose; // Relax on first two hasenbusch factors | ||||||
|  |     MPCG.push_back(new MxPCG(conv, | ||||||
|  | 			     MX_inner, | ||||||
|  | 			     MaxCGIterations, | ||||||
|  | 			     UGrid_f, | ||||||
|  | 			     FrbGridF, | ||||||
|  | 			     *DenominatorsF[h],*Denominators[h], | ||||||
|  | 			     *LinOpF[h], *LinOpD[h]) ); | ||||||
|  |  | ||||||
|  |     ActionMPCG.push_back(new MxPCG(ActionStoppingCondition, | ||||||
|  | 				   MX_inner, | ||||||
|  | 				   MaxCGIterations, | ||||||
|  | 				   UGrid_f, | ||||||
|  | 				   FrbGridF, | ||||||
|  | 				   *DenominatorsF[h],*Denominators[h], | ||||||
|  | 				   *LinOpF[h], *LinOpD[h]) ); | ||||||
|  |  | ||||||
|  |     // Heatbath not mixed yet. As inverts numerators not so important as raised mass. | ||||||
|  |     Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],ActionCG)); | ||||||
|  | #else | ||||||
|  |     //////////////////////////////////////////////////////////////////////////// | ||||||
|  |     // Standard CG for 2f force | ||||||
|  |     //////////////////////////////////////////////////////////////////////////// | ||||||
|  |     Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG)); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   for(int h=0;h<n_hasenbusch+1;h++){ | ||||||
|  |     Level1.push_back(Quotients[h]); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   ///////////////////////////////////////////////////////////// | ||||||
|  |   // Gauge action | ||||||
|  |   ///////////////////////////////////////////////////////////// | ||||||
|  |   Level2.push_back(&GaugeAction); | ||||||
|  |   TheHMC.TheAction.push_back(Level1); | ||||||
|  |   TheHMC.TheAction.push_back(Level2); | ||||||
|  |   std::cout << GridLogMessage << " Action complete "<< std::endl; | ||||||
|  |  | ||||||
|  |   ///////////////////////////////////////////////////////////// | ||||||
|  |   // HMC parameters are serialisable | ||||||
|  |  | ||||||
|  |   NoSmearing<HMCWrapper::ImplPolicy> S; | ||||||
|  | #ifndef DO_IMPLICIT | ||||||
|  |   TrivialMetric<HMCWrapper::ImplPolicy::Field> Mtr; | ||||||
|  | #else | ||||||
|  |     LaplacianRatParams gpar(2),mpar(2); | ||||||
|  |     gpar.offset = 1.; | ||||||
|  |     gpar.a0[0] = 500.; | ||||||
|  |     gpar.a1[0] = 0.; | ||||||
|  |     gpar.b0[0] = 0.25; | ||||||
|  |     gpar.b1[0] = 1.; | ||||||
|  |     gpar.a0[1] = -500.; | ||||||
|  |     gpar.a1[1] = 0.; | ||||||
|  |     gpar.b0[1] = 0.36; | ||||||
|  |     gpar.b1[1] = 1.2; | ||||||
|  |     gpar.b2=1.; | ||||||
|  |  | ||||||
|  |     mpar.offset = 1.; | ||||||
|  |     mpar.a0[0] =  -0.850891906532; | ||||||
|  |     mpar.a1[0] = -1.54707654538; | ||||||
|  |     mpar. b0[0] = 2.85557166137; | ||||||
|  |     mpar. b1[0] = 5.74194794773; | ||||||
|  |     mpar.a0[1] = -13.5120056831218384729709214298; | ||||||
|  |     mpar.a1[1] = 1.54707654538396877086370295729; | ||||||
|  |     mpar.b0[1] = 19.2921090880640520026645390317; | ||||||
|  |     mpar.b1[1] = -3.54194794773029020262811172870; | ||||||
|  |     mpar.b2=1.; | ||||||
|  |     for(int i=0;i<2;i++){ | ||||||
|  |        gpar.a1[i] *=16.; | ||||||
|  |        gpar.b1[i] *=16.; | ||||||
|  |        mpar.a1[i] *=16.; | ||||||
|  |        mpar.b1[i] *=16.; | ||||||
|  |     } | ||||||
|  |     gpar.b2 *= 16.*16.; | ||||||
|  |     mpar.b2 *= 16.*16.; | ||||||
|  |  | ||||||
|  |     ConjugateGradient<LatticeGaugeField> CG(1.0e-8,10000); | ||||||
|  |     LaplacianParams LapPar(0.0001, 1.0, 10000, 1e-8, 12, 64); | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage << "LaplacianRat " << std::endl; | ||||||
|  |     gpar.tolerance=HMCparams.MD.RMHMCCGTol; | ||||||
|  |     mpar.tolerance=HMCparams.MD.RMHMCCGTol; | ||||||
|  |     std::cout << GridLogMessage << "gpar offset= " << gpar.offset <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " a0= " << gpar.a0 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " a1= " << gpar.a1 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " b0= " << gpar.b0 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " b1= " << gpar.b1 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " b2= " << gpar.b2 <<std::endl ;; | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage << "mpar offset= " << mpar.offset <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " a0= " << mpar.a0 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " a1= " << mpar.a1 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " b0= " << mpar.b0 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " b1= " << mpar.b1 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " b2= " << mpar.b2 <<std::endl; | ||||||
|  | //  Assumes PeriodicGimplR or D at the moment | ||||||
|  |     auto UGrid = TheHMC.Resources.GetCartesian("gauge"); | ||||||
|  | //    auto UGrid_f   = GridPtrF; | ||||||
|  | //  auto GridPtrF   = SpaceTimeGrid::makeFourDimGrid(latt,simdF,mpi); | ||||||
|  | //    std::cout << GridLogMessage << " UGrid= " << UGrid <<std::endl; | ||||||
|  | //    std::cout << GridLogMessage << " UGrid_f= " << UGrid_f <<std::endl; | ||||||
|  |  | ||||||
|  |     LaplacianAdjointRat<HMCWrapper::ImplPolicy, PeriodicGimplF> Mtr(UGrid, UGrid_f ,CG, gpar, mpar); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   std::cout << GridLogMessage << " Running the HMC "<< std::endl; | ||||||
|  |   TheHMC.Run(S,Mtr);  // no smearing | ||||||
|  |  | ||||||
|  |   Grid_finalize(); | ||||||
|  | } // main | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -365,9 +365,15 @@ public: | |||||||
|     GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); |     GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||||
|     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; |     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; | ||||||
|  |  | ||||||
|  | #if 1 | ||||||
|     typedef DomainWallFermionF Action; |     typedef DomainWallFermionF Action; | ||||||
|     typedef typename Action::FermionField Fermion; |     typedef typename Action::FermionField Fermion; | ||||||
|     typedef LatticeGaugeFieldF Gauge; |     typedef LatticeGaugeFieldF Gauge; | ||||||
|  | #else | ||||||
|  |     typedef GparityDomainWallFermionF Action; | ||||||
|  |     typedef typename Action::FermionField Fermion; | ||||||
|  |     typedef LatticeGaugeFieldF Gauge; | ||||||
|  | #endif | ||||||
|      |      | ||||||
|     ///////// Source preparation //////////// |     ///////// Source preparation //////////// | ||||||
|     Gauge Umu(UGrid);  SU<Nc>::HotConfiguration(RNG4,Umu);  |     Gauge Umu(UGrid);  SU<Nc>::HotConfiguration(RNG4,Umu);  | ||||||
| @@ -635,6 +641,170 @@ public: | |||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|     return mflops_best; |     return mflops_best; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   static double Laplace(int L) | ||||||
|  |   { | ||||||
|  |     double mflops; | ||||||
|  |     double mflops_best = 0; | ||||||
|  |     double mflops_worst= 0; | ||||||
|  |     std::vector<double> mflops_all; | ||||||
|  |  | ||||||
|  |     /////////////////////////////////////////////////////// | ||||||
|  |     // Set/Get the layout & grid size | ||||||
|  |     /////////////////////////////////////////////////////// | ||||||
|  |     int threads = GridThread::GetThreads(); | ||||||
|  |     Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4); | ||||||
|  |     Coordinate local({L,L,L,L}); | ||||||
|  |     Coordinate latt4({local[0]*mpi[0],local[1]*mpi[1],local[2]*mpi[2],local[3]*mpi[3]}); | ||||||
|  |      | ||||||
|  |     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, | ||||||
|  | 								       GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||||
|  | 								       GridDefaultMpi()); | ||||||
|  |     uint64_t NP = TmpGrid->RankCount(); | ||||||
|  |     uint64_t NN = TmpGrid->NodeCount(); | ||||||
|  |     NN_global=NN; | ||||||
|  |     uint64_t SHM=NP/NN; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     ///////// Welcome message //////////// | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "Benchmark Laplace on "<<L<<"^4 local volume "<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* ranks          : "<<NP  <<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* nodes          : "<<NN  <<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* ranks/node     : "<<SHM <<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* ranks geom     : "<<GridCmdVectorIntToString(mpi)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|  |     ///////// Lattice Init //////////// | ||||||
|  |     GridCartesian         * FGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); | ||||||
|  |     GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid); | ||||||
|  |      | ||||||
|  |     ///////// RNG Init //////////// | ||||||
|  |     std::vector<int> seeds4({1,2,3,4}); | ||||||
|  |     GridParallelRNG          RNG4(FGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||||
|  |     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; | ||||||
|  |  | ||||||
|  |     RealD mass=0.1; | ||||||
|  |     RealD c1=9.0/8.0; | ||||||
|  |     RealD c2=-1.0/24.0; | ||||||
|  |     RealD u0=1.0; | ||||||
|  |  | ||||||
|  | //    typedef ImprovedStaggeredFermionF Action; | ||||||
|  | //    typedef typename Action::FermionField Fermion;  | ||||||
|  |     typedef LatticeGaugeFieldF Gauge; | ||||||
|  |      | ||||||
|  |     Gauge Umu(FGrid);  SU<Nc>::HotConfiguration(RNG4,Umu);  | ||||||
|  |  | ||||||
|  | //    typename Action::ImplParams params; | ||||||
|  | //    Action Ds(Umu,Umu,*FGrid,*FrbGrid,mass,c1,c2,u0,params); | ||||||
|  |  | ||||||
|  | //  PeriodicGimplF | ||||||
|  |     typedef typename PeriodicGimplF::LinkField GaugeLinkFieldF; | ||||||
|  |  | ||||||
|  |     ///////// Source preparation //////////// | ||||||
|  |     GaugeLinkFieldF src   (FGrid); random(RNG4,src); | ||||||
|  | //    GaugeLinkFieldF src_e (FrbGrid); | ||||||
|  | //    GaugeLinkFieldF src_o (FrbGrid); | ||||||
|  | //    GaugeLinkFieldF r_e   (FrbGrid); | ||||||
|  | //    GaugeLinkFieldF r_o   (FrbGrid); | ||||||
|  |     GaugeLinkFieldF r_eo  (FGrid); | ||||||
|  |    | ||||||
|  |     { | ||||||
|  |  | ||||||
|  |  //     pickCheckerboard(Even,src_e,src); | ||||||
|  |  //     pickCheckerboard(Odd,src_o,src); | ||||||
|  |      | ||||||
|  |       const int num_cases = 1; | ||||||
|  |       std::string fmt("G/O/C  "); | ||||||
|  |        | ||||||
|  |       controls Cases [] = { | ||||||
|  | 	{  StaggeredKernelsStatic::OptGeneric   ,  StaggeredKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicyConcurrent  }, | ||||||
|  |       };  | ||||||
|  |  | ||||||
|  |       for(int c=0;c<num_cases;c++) { | ||||||
|  |         CovariantAdjointLaplacianStencil<PeriodicGimplF,typename PeriodicGimplF::LinkField> LapStencilF(FGrid); | ||||||
|  |         QuadLinearOperator<CovariantAdjointLaplacianStencil<PeriodicGimplF,typename PeriodicGimplF::LinkField>,PeriodicGimplF::LinkField> QuadOpF(LapStencilF,c2,c1,1.); | ||||||
|  |         LapStencilF.GaugeImport(Umu); | ||||||
|  | 	 | ||||||
|  |  | ||||||
|  | 	StaggeredKernelsStatic::Comms = Cases[c].CommsOverlap; | ||||||
|  | 	StaggeredKernelsStatic::Opt   = Cases[c].Opt; | ||||||
|  | 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); | ||||||
|  |        | ||||||
|  | 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  | 	if ( StaggeredKernelsStatic::Opt == StaggeredKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using Stencil Nc Laplace" <<std::endl; | ||||||
|  | 	if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||||
|  | 	if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential Comms/Compute" <<std::endl; | ||||||
|  | 	std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|  | 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  | 	 | ||||||
|  | 	int nwarm = 10; | ||||||
|  | 	double t0=usecond(); | ||||||
|  | 	FGrid->Barrier(); | ||||||
|  | 	for(int i=0;i<nwarm;i++){ | ||||||
|  | //	  Ds.DhopEO(src_o,r_e,DaggerNo); | ||||||
|  |           QuadOpF.HermOp(src,r_eo); | ||||||
|  | 	} | ||||||
|  | 	FGrid->Barrier(); | ||||||
|  | 	double t1=usecond(); | ||||||
|  | 	uint64_t ncall = 500; | ||||||
|  |  | ||||||
|  | 	FGrid->Broadcast(0,&ncall,sizeof(ncall)); | ||||||
|  |  | ||||||
|  | 	//	std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl; | ||||||
|  |  | ||||||
|  | 	time_statistics timestat; | ||||||
|  | 	std::vector<double> t_time(ncall); | ||||||
|  | 	for(uint64_t i=0;i<ncall;i++){ | ||||||
|  | 	  t0=usecond(); | ||||||
|  | //	  Ds.DhopEO(src_o,r_e,DaggerNo); | ||||||
|  |           QuadOpF.HermOp(src,r_eo); | ||||||
|  | 	  t1=usecond(); | ||||||
|  | 	  t_time[i] = t1-t0; | ||||||
|  | 	} | ||||||
|  | 	FGrid->Barrier(); | ||||||
|  | 	 | ||||||
|  | 	double volume=1;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||||
|  | //	double flops=(1146.0*volume)/2; | ||||||
|  | 	double flops=(2*2*8*216.0*volume); | ||||||
|  | 	double mf_hi, mf_lo, mf_err; | ||||||
|  | 	 | ||||||
|  | 	timestat.statistics(t_time); | ||||||
|  | 	mf_hi = flops/timestat.min; | ||||||
|  | 	mf_lo = flops/timestat.max; | ||||||
|  | 	mf_err= flops/timestat.min * timestat.err/timestat.mean; | ||||||
|  |  | ||||||
|  | 	mflops = flops/timestat.mean; | ||||||
|  | 	mflops_all.push_back(mflops); | ||||||
|  | 	if ( mflops_best == 0   ) mflops_best = mflops; | ||||||
|  | 	if ( mflops_worst== 0   ) mflops_worst= mflops; | ||||||
|  | 	if ( mflops>mflops_best ) mflops_best = mflops; | ||||||
|  | 	if ( mflops<mflops_worst) mflops_worst= mflops; | ||||||
|  | 	 | ||||||
|  | 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Quad mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; | ||||||
|  | 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Quad mflop/s per rank   "<< mflops/NP<<std::endl; | ||||||
|  | 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Quad mflop/s per node   "<< mflops/NN<<std::endl; | ||||||
|  | 	FGrid->Barrier(); | ||||||
|  |        | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << L<<"^4  Quad Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << L<<"^4  Quad Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; | ||||||
|  |       std::cout<<GridLogMessage <<fmt << std::endl; | ||||||
|  |       std::cout<<GridLogMessage ; | ||||||
|  | 	FGrid->Barrier(); | ||||||
|  |  | ||||||
|  |       for(int i=0;i<mflops_all.size();i++){ | ||||||
|  | 	std::cout<<mflops_all[i]/NN<<" ; " ; | ||||||
|  |       } | ||||||
|  |       std::cout<<std::endl; | ||||||
|  |     } | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     return mflops_best; | ||||||
|  |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -662,6 +832,7 @@ int main (int argc, char ** argv) | |||||||
|   std::vector<double> wilson; |   std::vector<double> wilson; | ||||||
|   std::vector<double> dwf4; |   std::vector<double> dwf4; | ||||||
|   std::vector<double> staggered; |   std::vector<double> staggered; | ||||||
|  |   std::vector<double> lap; | ||||||
|  |  | ||||||
|   int Ls=1; |   int Ls=1; | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
| @@ -688,12 +859,20 @@ int main (int argc, char ** argv) | |||||||
|     staggered.push_back(result); |     staggered.push_back(result); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << " Laplace QuadOp 4D " <<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |   for(int l=0;l<L_list.size();l++){ | ||||||
|  |     double result = Benchmark::Laplace(L_list[l]) ; | ||||||
|  |     lap.push_back(result); | ||||||
|  |   } | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl; |   std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl; | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" <<std::endl; |   std::cout<<GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered \t\t Quad Laplace" <<std::endl; | ||||||
|   for(int l=0;l<L_list.size();l++){ |   for(int l=0;l<L_list.size();l++){ | ||||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t\t "<<dwf4[l] << " \t\t "<< staggered[l]<<std::endl; |     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t\t "<<dwf4[l] << " \t\t "<< staggered[l]<< " \t\t "<< lap[l]<< std::endl; | ||||||
|   } |   } | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -41,7 +41,7 @@ AC_PROG_RANLIB | |||||||
|  |  | ||||||
| ############### Get compiler informations | ############### Get compiler informations | ||||||
| AC_LANG([C++]) | AC_LANG([C++]) | ||||||
| AX_CXX_COMPILE_STDCXX(14,noext,mandatory) | AX_CXX_COMPILE_STDCXX(17,noext,mandatory) | ||||||
| AX_COMPILER_VENDOR | AX_COMPILER_VENDOR | ||||||
| AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"], | AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"], | ||||||
|       [vendor of C++ compiler that will compile the code]) |       [vendor of C++ compiler that will compile the code]) | ||||||
|   | |||||||
							
								
								
									
										1018
									
								
								m4/ax_cxx_compile_stdcxx.m4
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1018
									
								
								m4/ax_cxx_compile_stdcxx.m4
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										34
									
								
								m4/ax_cxx_compile_stdcxx_14.m4
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								m4/ax_cxx_compile_stdcxx_14.m4
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | |||||||
|  | # ============================================================================= | ||||||
|  | #  https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_14.html | ||||||
|  | # ============================================================================= | ||||||
|  | # | ||||||
|  | # SYNOPSIS | ||||||
|  | # | ||||||
|  | #   AX_CXX_COMPILE_STDCXX_14([ext|noext], [mandatory|optional]) | ||||||
|  | # | ||||||
|  | # DESCRIPTION | ||||||
|  | # | ||||||
|  | #   Check for baseline language coverage in the compiler for the C++14 | ||||||
|  | #   standard; if necessary, add switches to CXX and CXXCPP to enable | ||||||
|  | #   support. | ||||||
|  | # | ||||||
|  | #   This macro is a convenience alias for calling the AX_CXX_COMPILE_STDCXX | ||||||
|  | #   macro with the version set to C++14.  The two optional arguments are | ||||||
|  | #   forwarded literally as the second and third argument respectively. | ||||||
|  | #   Please see the documentation for the AX_CXX_COMPILE_STDCXX macro for | ||||||
|  | #   more information.  If you want to use this macro, you also need to | ||||||
|  | #   download the ax_cxx_compile_stdcxx.m4 file. | ||||||
|  | # | ||||||
|  | # LICENSE | ||||||
|  | # | ||||||
|  | #   Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu> | ||||||
|  | # | ||||||
|  | #   Copying and distribution of this file, with or without modification, are | ||||||
|  | #   permitted in any medium without royalty provided the copyright notice | ||||||
|  | #   and this notice are preserved. This file is offered as-is, without any | ||||||
|  | #   warranty. | ||||||
|  |  | ||||||
|  | #serial 5 | ||||||
|  |  | ||||||
|  | AX_REQUIRE_DEFINED([AX_CXX_COMPILE_STDCXX]) | ||||||
|  | AC_DEFUN([AX_CXX_COMPILE_STDCXX_14], [AX_CXX_COMPILE_STDCXX([14], [$1], [$2])]) | ||||||
| @@ -1,43 +0,0 @@ | |||||||
| #!/bin/bash -l |  | ||||||
| #SBATCH --job-name=bench |  | ||||||
| ##SBATCH --partition=small-g |  | ||||||
| #SBATCH --nodes=2 |  | ||||||
| #SBATCH --ntasks-per-node=8 |  | ||||||
| #SBATCH --cpus-per-task=7 |  | ||||||
| #SBATCH --gpus-per-node=8 |  | ||||||
| #SBATCH --time=00:10:00 |  | ||||||
| #SBATCH --account=phy157_dwf |  | ||||||
| #SBATCH --gpu-bind=none |  | ||||||
| #SBATCH --exclusive |  | ||||||
| #SBATCH --mem=0 |  | ||||||
|  |  | ||||||
| cat << EOF > select_gpu |  | ||||||
| #!/bin/bash |  | ||||||
| export GPU_MAP=(0 1 2 3 7 6 5 4) |  | ||||||
| export NUMA_MAP=(3 3 1 1 2 2 0 0) |  | ||||||
| export GPU=\${GPU_MAP[\$SLURM_LOCALID]} |  | ||||||
| export NUMA=\${NUMA_MAP[\$SLURM_LOCALID]} |  | ||||||
| export HIP_VISIBLE_DEVICES=\$GPU |  | ||||||
| unset ROCR_VISIBLE_DEVICES |  | ||||||
| echo RANK \$SLURM_LOCALID using GPU \$GPU     |  | ||||||
| exec numactl -m \$NUMA -N \$NUMA \$* |  | ||||||
| EOF |  | ||||||
|  |  | ||||||
| chmod +x ./select_gpu |  | ||||||
|  |  | ||||||
| root=$HOME/Frontier/Grid/systems/Frontier/ |  | ||||||
| source ${root}/sourceme.sh |  | ||||||
|  |  | ||||||
| export OMP_NUM_THREADS=7 |  | ||||||
| export MPICH_GPU_SUPPORT_ENABLED=1 |  | ||||||
| export MPICH_SMP_SINGLE_COPY_MODE=XPMEM |  | ||||||
|  |  | ||||||
| for vol in 32.32.32.64 |  | ||||||
| do |  | ||||||
| srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol  > log.shm0.ov.$vol |  | ||||||
| srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol  > log.shm1.ov.$vol |  | ||||||
|  |  | ||||||
| srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol  > log.shm0.seq.$vol |  | ||||||
| srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol |  | ||||||
| done |  | ||||||
|  |  | ||||||
| @@ -1,23 +0,0 @@ | |||||||
| CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-` |  | ||||||
| ../../configure --enable-comms=mpi-auto \ |  | ||||||
| --with-lime=$CLIME \ |  | ||||||
| --enable-unified=no \ |  | ||||||
| --enable-shm=nvlink \ |  | ||||||
| --enable-tracing=timer \ |  | ||||||
| --enable-accelerator=hip \ |  | ||||||
| --enable-gen-simd-width=64 \ |  | ||||||
| --disable-gparity \ |  | ||||||
| --disable-fermion-reps \ |  | ||||||
| --enable-simd=GPU \ |  | ||||||
| --enable-accelerator-cshift \ |  | ||||||
| --with-gmp=$OLCF_GMP_ROOT \ |  | ||||||
| --with-fftw=$FFTW_DIR/.. \ |  | ||||||
| --with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \ |  | ||||||
| --disable-fermion-reps \ |  | ||||||
| CXX=hipcc MPICXX=mpicxx \ |  | ||||||
| CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 " \ |  | ||||||
|  LDFLAGS="-L/lib64 -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 " |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -1,13 +0,0 @@ | |||||||
| #!/bin/bash |  | ||||||
|  |  | ||||||
| lrank=$SLURM_LOCALID |  | ||||||
| lgpu=(0 1 2 3 7 6 5 4) |  | ||||||
|  |  | ||||||
| export ROCR_VISIBLE_DEVICES=${lgpu[$lrank]} |  | ||||||
|  |  | ||||||
| echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES " |  | ||||||
|  |  | ||||||
| $* |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -1,13 +0,0 @@ | |||||||
| . /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh |  | ||||||
| spack load c-lime |  | ||||||
| #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/sw/crusher/spack-envs/base/opt/cray-sles15-zen3/gcc-11.2.0/gperftools-2.9.1-72ubwtuc5wcz2meqltbfdb76epufgzo2/lib |  | ||||||
| module load emacs  |  | ||||||
| module load PrgEnv-gnu |  | ||||||
| module load rocm |  | ||||||
| module load cray-mpich/8.1.23 |  | ||||||
| module load gmp |  | ||||||
| module load cray-fftw |  | ||||||
| module load craype-accel-amd-gfx90a |  | ||||||
| export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH |  | ||||||
| #Hack for lib |  | ||||||
| #export LD_LIBRARY_PATH=`pwd`:$LD_LIBRARY_PATH |  | ||||||
| @@ -1,9 +0,0 @@ | |||||||
| #!/bin/sh |  | ||||||
|  |  | ||||||
| export HIP_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES |  | ||||||
| unset ROCR_VISIBLE_DEVICES |  | ||||||
|  |  | ||||||
| #rank=$SLURM_PROCID |  | ||||||
| #rocprof -d rocprof.$rank -o rocprof.$rank/results.rank$SLURM_PROCID.csv --sys-trace $@ |  | ||||||
|  |  | ||||||
| $@ |  | ||||||
| @@ -1,9 +1,8 @@ | |||||||
| #!/bin/bash | #!/bin/bash | ||||||
| 
 | 
 | ||||||
| num_tile=2 | num_tile=2 | ||||||
| 
 | gpu_id=$(( (MPI_LOCALRANKID / num_tile ) )) | ||||||
| gpu_id=$(( (MPI_LOCAL_RANKID % num_tile ) )) | tile_id=$((MPI_LOCALRANKID % num_tile)) | ||||||
| tile_id=$((MPI_LOCAL_RANKID / num_tile)) |  | ||||||
| 
 | 
 | ||||||
| export ZE_AFFINITY_MASK=$gpu_id.$tile_id | export ZE_AFFINITY_MASK=$gpu_id.$tile_id | ||||||
| 
 | 
 | ||||||
| @@ -1,62 +0,0 @@ | |||||||
| #!/bin/sh |  | ||||||
| ##SBATCH -p PVC-SPR-QZEH  |  | ||||||
| ##SBATCH -p PVC-ICX-QZNW |  | ||||||
| #SBATCH -p QZ1J-ICX-PVC |  | ||||||
| ##SBATCH -p QZ1J-SPR-PVC-2C |  | ||||||
|  |  | ||||||
| #source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh |  | ||||||
|  |  | ||||||
| export NT=8 |  | ||||||
|  |  | ||||||
| export I_MPI_OFFLOAD=1 |  | ||||||
| export I_MPI_OFFLOAD_TOPOLIB=level_zero |  | ||||||
| export I_MPI_OFFLOAD_DOMAIN_SIZE=-1 |  | ||||||
|  |  | ||||||
| # export IGC_EnableLSCFenceUGMBeforeEOT=0 |  | ||||||
| # export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file=False" |  | ||||||
| export SYCL_DEVICE_FILTER=gpu,level_zero |  | ||||||
| #export IGC_ShaderDumpEnable=1  |  | ||||||
| #export IGC_DumpToCurrentDir=1 |  | ||||||
| export I_MPI_OFFLOAD_CELL=tile |  | ||||||
| export EnableImplicitScaling=0 |  | ||||||
| export EnableWalkerPartition=0 |  | ||||||
| export ZE_AFFINITY_MASK=0.0 |  | ||||||
| mpiexec -launcher ssh -n 1 -host localhost  ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 1 --device-mem 32768 |  | ||||||
|  |  | ||||||
| export ZE_AFFINITY_MASK=0 |  | ||||||
| export I_MPI_OFFLOAD_CELL=device |  | ||||||
| export EnableImplicitScaling=1 |  | ||||||
| export EnableWalkerPartition=1 |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #mpiexec -launcher ssh -n 2 -host localhost  vtune -collect gpu-hotspots -knob gpu-sampling-interval=1 -data-limit=0 -r ./vtune_run4 -- ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-overlap --shm-mpi 1 |  | ||||||
|  |  | ||||||
| #mpiexec  -launcher ssh -n 1 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-overlap --shm-mpi 1 |  | ||||||
|  |  | ||||||
| #mpiexec  -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 1 |  | ||||||
|  |  | ||||||
| #mpiexec  -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-overlap --shm-mpi 1 |  | ||||||
|  |  | ||||||
| #mpiexec  -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 0 |  | ||||||
|  |  | ||||||
| #mpirun -np 2 ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 16.32.32.64 --accelerator-threads $NT --comms-sequential --shm-mpi 0 |  | ||||||
| #mpirun -np 2 ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --comms-sequential --shm-mpi 1 |  | ||||||
|  |  | ||||||
| @@ -1,33 +0,0 @@ | |||||||
| #!/bin/bash |  | ||||||
| ##SBATCH -p PVC-SPR-QZEH  |  | ||||||
| ##SBATCH -p PVC-ICX-QZNW |  | ||||||
|  |  | ||||||
| #SBATCH -p QZ1J-ICX-PVC |  | ||||||
|  |  | ||||||
| #source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh |  | ||||||
|  |  | ||||||
| export NT=16 |  | ||||||
|  |  | ||||||
| # export IGC_EnableLSCFenceUGMBeforeEOT=0 |  | ||||||
| # export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file=False" |  | ||||||
| #export IGC_ShaderDumpEnable=1  |  | ||||||
| #export IGC_DumpToCurrentDir=1 |  | ||||||
| export I_MPI_OFFLOAD=1 |  | ||||||
| export I_MPI_OFFLOAD_TOPOLIB=level_zero |  | ||||||
| export I_MPI_OFFLOAD_DOMAIN_SIZE=-1 |  | ||||||
| export SYCL_DEVICE_FILTER=gpu,level_zero |  | ||||||
| export I_MPI_OFFLOAD_CELL=tile |  | ||||||
| export EnableImplicitScaling=0 |  | ||||||
| export EnableWalkerPartition=0 |  | ||||||
| #export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1 |  | ||||||
| #export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 |  | ||||||
| export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0 |  | ||||||
|  |  | ||||||
| for i in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |  | ||||||
| do |  | ||||||
| mpiexec -launcher ssh -n 2 -host localhost  ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT  --shm-mpi 0  --device-mem 32768 > 1.1.1.2.log$i |  | ||||||
| mpiexec -launcher ssh -n 2 -host localhost  ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT  --shm-mpi 0  --device-mem 32768 > 2.1.1.1.log$i  |  | ||||||
| done |  | ||||||
|  |  | ||||||
| mpiexec -launcher ssh -n 2 -host localhost  ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 0 |  | ||||||
|  |  | ||||||
| @@ -1,9 +0,0 @@ | |||||||
| #!/bin/sh |  | ||||||
|  |  | ||||||
| export ZE_AFFINITY_MASK=0.$MPI_LOCALRANKID |  | ||||||
|  |  | ||||||
| echo Ranke $MPI_LOCALRANKID ZE_AFFINITY_MASK is $ZE_AFFINITY_MASK |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   $@ |  | ||||||
|  |  | ||||||
| @@ -1,16 +0,0 @@ | |||||||
| INSTALL=/nfs/site/home/paboylx/prereqs/ |  | ||||||
| ../../configure \ |  | ||||||
| 	--enable-simd=GPU \ |  | ||||||
| 	--enable-gen-simd-width=64 \ |  | ||||||
| 	--enable-comms=mpi-auto \ |  | ||||||
| 	--disable-accelerator-cshift \ |  | ||||||
| 	--disable-gparity \ |  | ||||||
| 	--disable-fermion-reps \ |  | ||||||
| 	--enable-shm=nvlink \ |  | ||||||
| 	--enable-accelerator=sycl \ |  | ||||||
| 	--enable-unified=no \ |  | ||||||
| 	MPICXX=mpicxx \ |  | ||||||
| 	CXX=dpcpp \ |  | ||||||
| 	LDFLAGS="-fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L$INSTALL/lib" \ |  | ||||||
| 	CXXFLAGS="-fsycl-unnamed-lambda -fsycl -no-fma -I$INSTALL/include -Wno-tautological-compare" |  | ||||||
|  |  | ||||||
| @@ -1,18 +0,0 @@ | |||||||
| export https_proxy=http://proxy-chain.intel.com:911 |  | ||||||
| #export LD_LIBRARY_PATH=/nfs/site/home/azusayax/install/lib:$LD_LIBRARY_PATH |  | ||||||
| export LD_LIBRARY_PATH=$HOME/prereqs/lib/:$LD_LIBRARY_PATH |  | ||||||
|  |  | ||||||
| module load intel-release |  | ||||||
| module load intel-comp-rt/embargo-ci-neo |  | ||||||
|  |  | ||||||
| #source /opt/intel/oneapi/PVC_setup.sh |  | ||||||
| #source /opt/intel/oneapi/ATS_setup.sh |  | ||||||
| #module load intel-nightly/20230331 |  | ||||||
| #module load intel-comp-rt/ci-neo-master/026093 |  | ||||||
|  |  | ||||||
| #module load intel/mpich |  | ||||||
| module load intel/mpich/pvc45.3 |  | ||||||
| export PATH=~/ATS/pti-gpu/tools/onetrace/:$PATH |  | ||||||
|  |  | ||||||
| #clsh embargo-ci-neo-022845 |  | ||||||
| #source /opt/intel/vtune_amplifier/amplxe-vars.sh |  | ||||||
| @@ -20,7 +20,7 @@ unset OMP_PLACES | |||||||
|  |  | ||||||
| cd $PBS_O_WORKDIR | cd $PBS_O_WORKDIR | ||||||
|  |  | ||||||
| qsub jobscript.pbs | #qsub jobscript.pbs | ||||||
|  |  | ||||||
| echo Jobid: $PBS_JOBID | echo Jobid: $PBS_JOBID | ||||||
| echo Running on host `hostname` | echo Running on host `hostname` | ||||||
| @@ -44,3 +44,4 @@ CMD="mpiexec -np ${NTOTRANKS} -ppn ${NRANKS} -d ${NDEPTH} --cpu-bind=depth -enva | |||||||
| 	./Benchmark_dwf_fp32 --mpi 1.1.2.6 --grid 16.32.64.192 --comms-overlap \ | 	./Benchmark_dwf_fp32 --mpi 1.1.2.6 --grid 16.32.64.192 --comms-overlap \ | ||||||
| 	--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32" | 	--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32" | ||||||
|  |  | ||||||
|  | $CMD | ||||||
|   | |||||||
| @@ -45,8 +45,8 @@ echo "rank $PALS_RANKID ; local rank $PALS_LOCAL_RANKID ; ZE_AFFINITY_MASK=$ZE_A | |||||||
|  |  | ||||||
| if [ $PALS_LOCAL_RANKID = 0 ] | if [ $PALS_LOCAL_RANKID = 0 ] | ||||||
| then | then | ||||||
|     onetrace --chrome-device-timeline "$@" | #    onetrace --chrome-device-timeline "$@" | ||||||
| #    "$@" |     "$@" | ||||||
| else | else | ||||||
| "$@" | "$@" | ||||||
| fi | fi | ||||||
|   | |||||||
| @@ -11,6 +11,6 @@ TOOLS=$HOME/tools | |||||||
| 	--enable-unified=no \ | 	--enable-unified=no \ | ||||||
| 	MPICXX=mpicxx \ | 	MPICXX=mpicxx \ | ||||||
| 	CXX=icpx \ | 	CXX=icpx \ | ||||||
| 	LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -lapmidg -L$TOOLS/lib64/" \ | 	LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L$TOOLS/lib64/" \ | ||||||
| 	CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -I$TOOLS/include" | 	CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -I$TOOLS/include" | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,3 +1,4 @@ | |||||||
| BREW=/opt/local/ | BREW=/opt/local/ | ||||||
| MPICXX=mpicxx ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity --disable-debug | MPICXX=mpicxx ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity --disable-debug | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,235 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./tests/Test_padded_cell.cc |  | ||||||
|  |  | ||||||
|     Copyright (C) 2023 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
| #include <Grid/lattice/PaddedCell.h> |  | ||||||
| #include <Grid/stencil/GeneralLocalStencil.h> |  | ||||||
|  |  | ||||||
| #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h> |  | ||||||
| #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h> |  | ||||||
| #include <Grid/algorithms/iterative/BiCGSTAB.h> |  | ||||||
|  |  | ||||||
| using namespace std; |  | ||||||
| using namespace Grid; |  | ||||||
|  |  | ||||||
| /////////////////////// |  | ||||||
| // Tells little dirac op to use MdagM as the .Op() |  | ||||||
| /////////////////////// |  | ||||||
| template<class Field> |  | ||||||
| class HermOpAdaptor : public LinearOperatorBase<Field> |  | ||||||
| { |  | ||||||
|   LinearOperatorBase<Field> & wrapped; |  | ||||||
| public: |  | ||||||
|   HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme)  {}; |  | ||||||
|   void OpDiag (const Field &in, Field &out) {    assert(0);  } |  | ||||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } |  | ||||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  }; |  | ||||||
|   void Op     (const Field &in, Field &out){ |  | ||||||
|     wrapped.HermOp(in,out); |  | ||||||
|   } |  | ||||||
|   void AdjOp     (const Field &in, Field &out){ |  | ||||||
|     wrapped.HermOp(in,out); |  | ||||||
|   } |  | ||||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } |  | ||||||
|   void HermOp(const Field &in, Field &out){ |  | ||||||
|     wrapped.HermOp(in,out); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) |  | ||||||
| { |  | ||||||
|   Grid_init(&argc,&argv); |  | ||||||
|  |  | ||||||
|   const int Ls=4; |  | ||||||
|  |  | ||||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), |  | ||||||
| 								   GridDefaultSimd(Nd,vComplex::Nsimd()), |  | ||||||
| 								   GridDefaultMpi()); |  | ||||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |  | ||||||
|  |  | ||||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); |  | ||||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); |  | ||||||
|  |  | ||||||
|   // Construct a coarsened grid |  | ||||||
|   Coordinate clatt = GridDefaultLatt(); |  | ||||||
|   for(int d=0;d<clatt.size();d++){ |  | ||||||
|     clatt[d] = clatt[d]/2; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, |  | ||||||
| 							    GridDefaultSimd(Nd,vComplex::Nsimd()), |  | ||||||
| 							    GridDefaultMpi());; |  | ||||||
|   GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d); |  | ||||||
|  |  | ||||||
|   std::vector<int> seeds4({1,2,3,4}); |  | ||||||
|   std::vector<int> seeds5({5,6,7,8}); |  | ||||||
|   std::vector<int> cseeds({5,6,7,8}); |  | ||||||
|   GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5); |  | ||||||
|   GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4); |  | ||||||
|   GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); |  | ||||||
|  |  | ||||||
|   LatticeFermion    src(FGrid); random(RNG5,src); |  | ||||||
|   LatticeFermion result(FGrid); result=Zero(); |  | ||||||
|   LatticeFermion    ref(FGrid); ref=Zero(); |  | ||||||
|   LatticeFermion    tmp(FGrid); |  | ||||||
|   LatticeFermion    err(FGrid); |  | ||||||
|   LatticeGaugeField Umu(UGrid); |  | ||||||
|   SU<Nc>::HotConfiguration(RNG4,Umu); |  | ||||||
|   //  Umu=Zero(); |  | ||||||
|    |  | ||||||
|   RealD mass=0.1; |  | ||||||
|   RealD M5=1.8; |  | ||||||
|  |  | ||||||
|   DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |  | ||||||
|  |  | ||||||
|   const int nbasis = 16; |  | ||||||
|   const int cb = 0 ; |  | ||||||
|   LatticeFermion prom(FGrid); |  | ||||||
|  |  | ||||||
|   std::vector<LatticeFermion> subspace(nbasis,FGrid); |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<"Calling Aggregation class" <<std::endl; |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////////////// |  | ||||||
|   // Squared operator is in HermOp |  | ||||||
|   /////////////////////////////////////////////////////////// |  | ||||||
|   MdagMLinearOperator<DomainWallFermionD,LatticeFermion> HermDefOp(Ddwf); |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////// |  | ||||||
|   // Random aggregation space |  | ||||||
|   /////////////////////////////////////////////////// |  | ||||||
|   std::cout<<GridLogMessage << "Building random aggregation class"<< std::endl; |  | ||||||
|   typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; |  | ||||||
|   Subspace Aggregates(Coarse5d,FGrid,cb); |  | ||||||
|   Aggregates.CreateSubspaceRandom(RNG5); |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////// |  | ||||||
|   // Build little dirac op |  | ||||||
|   /////////////////////////////////////////////////// |  | ||||||
|   std::cout<<GridLogMessage << "Building little Dirac operator"<< std::endl; |  | ||||||
|  |  | ||||||
|   typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator; |  | ||||||
|   typedef LittleDiracOperator::CoarseVector CoarseVector; |  | ||||||
|  |  | ||||||
|   NextToNearestStencilGeometry5D geom(Coarse5d); |  | ||||||
|   LittleDiracOperator LittleDiracOp(geom,FGrid,Coarse5d); |  | ||||||
|   LittleDiracOperator LittleDiracOpCol(geom,FGrid,Coarse5d); |  | ||||||
|  |  | ||||||
|   HermOpAdaptor<LatticeFermionD> HOA(HermDefOp); |  | ||||||
|  |  | ||||||
|   int pp=16; |  | ||||||
|   LittleDiracOp.CoarsenOperator(HOA,Aggregates); |  | ||||||
|    |  | ||||||
|   /////////////////////////////////////////////////// |  | ||||||
|   // Test the operator |  | ||||||
|   /////////////////////////////////////////////////// |  | ||||||
|   CoarseVector c_src (Coarse5d); |  | ||||||
|   CoarseVector c_res (Coarse5d); |  | ||||||
|   CoarseVector c_res_dag(Coarse5d); |  | ||||||
|   CoarseVector c_proj(Coarse5d); |  | ||||||
|  |  | ||||||
|   subspace=Aggregates.subspace; |  | ||||||
|  |  | ||||||
|   //  random(CRNG,c_src); |  | ||||||
|   c_src = 1.0; |  | ||||||
|  |  | ||||||
|   blockPromote(c_src,err,subspace); |  | ||||||
|  |  | ||||||
|   prom=Zero(); |  | ||||||
|   for(int b=0;b<nbasis;b++){ |  | ||||||
|     prom=prom+subspace[b]; |  | ||||||
|   } |  | ||||||
|   err=err-prom;  |  | ||||||
|   std::cout<<GridLogMessage<<"Promoted back from subspace: err "<<norm2(err)<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl; |  | ||||||
|  |  | ||||||
|   HermDefOp.HermOp(prom,tmp); |  | ||||||
|  |  | ||||||
|   blockProject(c_proj,tmp,subspace); |  | ||||||
|   std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<" Calling little Dirac Op "<<std::endl; |  | ||||||
|   LittleDiracOp.M(c_src,c_res); |  | ||||||
|   LittleDiracOp.Mdag(c_src,c_res_dag); |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"Little dop dag : "<<norm2(c_res_dag)<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl; |  | ||||||
|  |  | ||||||
|   c_proj = c_proj - c_res; |  | ||||||
|   std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl; |  | ||||||
|  |  | ||||||
|   c_res_dag = c_res_dag - c_res; |  | ||||||
|   std::cout<<GridLogMessage<<"Little dopDag - dop: "<<norm2(c_res_dag)<<std::endl; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "Testing Hermiticity stochastically "<< std::endl; |  | ||||||
|   CoarseVector phi(Coarse5d); |  | ||||||
|   CoarseVector chi(Coarse5d); |  | ||||||
|   CoarseVector Aphi(Coarse5d); |  | ||||||
|   CoarseVector Achi(Coarse5d); |  | ||||||
|  |  | ||||||
|   random(CRNG,phi); |  | ||||||
|   random(CRNG,chi); |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<"Made randoms "<<norm2(phi)<<" " << norm2(chi)<<std::endl; |  | ||||||
|  |  | ||||||
|   LittleDiracOp.M(phi,Aphi); |  | ||||||
|  |  | ||||||
|   LittleDiracOp.Mdag(chi,Achi); |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<"Aphi "<<norm2(Aphi)<<" A chi" << norm2(Achi)<<std::endl; |  | ||||||
|  |  | ||||||
|   ComplexD pAc = innerProduct(chi,Aphi); |  | ||||||
|   ComplexD cAp = innerProduct(phi,Achi); |  | ||||||
|   ComplexD cAc = innerProduct(chi,Achi); |  | ||||||
|   ComplexD pAp = innerProduct(phi,Aphi); |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<< "pAc "<<pAc<<" cAp "<< cAp<< " diff "<<pAc-adj(cAp)<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<< "pAp "<<pAp<<" cAc "<< cAc<<"Should be real"<< std::endl; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<"Testing linearity"<<std::endl; |  | ||||||
|   CoarseVector PhiPlusChi(Coarse5d); |  | ||||||
|   CoarseVector APhiPlusChi(Coarse5d); |  | ||||||
|   CoarseVector linerr(Coarse5d); |  | ||||||
|   PhiPlusChi = phi+chi; |  | ||||||
|   LittleDiracOp.M(PhiPlusChi,APhiPlusChi); |  | ||||||
|  |  | ||||||
|   linerr= APhiPlusChi-Aphi; |  | ||||||
|   linerr= linerr-Achi; |  | ||||||
|   std::cout<<GridLogMessage<<"**Diff "<<norm2(linerr)<<std::endl; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; |  | ||||||
|    |  | ||||||
|   Grid_finalize(); |  | ||||||
|   return 0; |  | ||||||
| } |  | ||||||
| @@ -1,408 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./tests/Test_general_coarse_hdcg.cc |  | ||||||
|  |  | ||||||
|     Copyright (C) 2023 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <pboyle@bnl.gov> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
| #include <Grid/lattice/PaddedCell.h> |  | ||||||
| #include <Grid/stencil/GeneralLocalStencil.h> |  | ||||||
| //#include <Grid/algorithms/GeneralCoarsenedMatrix.h> |  | ||||||
| #include <Grid/algorithms/iterative/AdefGeneric.h> |  | ||||||
|  |  | ||||||
| using namespace std; |  | ||||||
| using namespace Grid; |  | ||||||
|  |  | ||||||
| template<class Coarsened> |  | ||||||
| void SaveOperator(Coarsened &Operator,std::string file) |  | ||||||
| { |  | ||||||
| #ifdef HAVE_LIME |  | ||||||
|   emptyUserRecord record; |  | ||||||
|   ScidacWriter WR(Operator.Grid()->IsBoss()); |  | ||||||
|   assert(Operator._A.size()==Operator.geom.npoint); |  | ||||||
|   WR.open(file); |  | ||||||
|   for(int p=0;p<Operator._A.size();p++){ |  | ||||||
|     auto tmp = Operator.Cell.Extract(Operator._A[p]); |  | ||||||
|     WR.writeScidacFieldRecord(tmp,record); |  | ||||||
|   } |  | ||||||
|   WR.close(); |  | ||||||
| #endif |  | ||||||
| } |  | ||||||
| template<class Coarsened> |  | ||||||
| void LoadOperator(Coarsened Operator,std::string file) |  | ||||||
| { |  | ||||||
| #ifdef HAVE_LIME |  | ||||||
|   emptyUserRecord record; |  | ||||||
|   Grid::ScidacReader RD ; |  | ||||||
|   RD.open(file); |  | ||||||
|   assert(Operator._A.size()==Operator.geom.npoint); |  | ||||||
|   for(int p=0;p<Operator.geom.npoint;p++){ |  | ||||||
|     conformable(Operator._A[p].Grid(),Operator.CoarseGrid()); |  | ||||||
|     RD.readScidacFieldRecord(Operator._A[p],record); |  | ||||||
|   }     |  | ||||||
|   RD.close(); |  | ||||||
|   Operator.ExchangeCoarseLinks(); |  | ||||||
| #endif |  | ||||||
| } |  | ||||||
| template<class aggregation> |  | ||||||
| void SaveBasis(aggregation &Agg,std::string file) |  | ||||||
| { |  | ||||||
| #ifdef HAVE_LIME |  | ||||||
|   emptyUserRecord record; |  | ||||||
|   ScidacWriter WR(Agg.FineGrid->IsBoss()); |  | ||||||
|   WR.open(file); |  | ||||||
|   for(int b=0;b<Agg.subspace.size();b++){ |  | ||||||
|     WR.writeScidacFieldRecord(Agg.subspace[b],record); |  | ||||||
|   } |  | ||||||
|   WR.close(); |  | ||||||
| #endif |  | ||||||
| } |  | ||||||
| template<class aggregation> |  | ||||||
| void LoadBasis(aggregation &Agg, std::string file) |  | ||||||
| { |  | ||||||
| #ifdef HAVE_LIME |  | ||||||
|   emptyUserRecord record; |  | ||||||
|   ScidacReader RD ; |  | ||||||
|   RD.open(file); |  | ||||||
|   for(int b=0;b<Agg.subspace.size();b++){ |  | ||||||
|     RD.readScidacFieldRecord(Agg.subspace[b],record); |  | ||||||
|   }     |  | ||||||
|   RD.close(); |  | ||||||
| #endif |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class Field> class TestSolver : public LinearFunction<Field> { |  | ||||||
| public: |  | ||||||
|   TestSolver() {}; |  | ||||||
|   void operator() (const Field &in, Field &out){    out = Zero();  }      |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| RealD InverseApproximation(RealD x){ |  | ||||||
|   return 1.0/x; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Want Op in CoarsenOp to call MatPcDagMatPc |  | ||||||
| template<class Field> |  | ||||||
| class HermOpAdaptor : public LinearOperatorBase<Field> |  | ||||||
| { |  | ||||||
|   LinearOperatorBase<Field> & wrapped; |  | ||||||
| public: |  | ||||||
|   HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme)  {}; |  | ||||||
|   void Op     (const Field &in, Field &out)   { wrapped.HermOp(in,out);  } |  | ||||||
|   void HermOp(const Field &in, Field &out)    { wrapped.HermOp(in,out); } |  | ||||||
|   void AdjOp     (const Field &in, Field &out){ wrapped.HermOp(in,out);  } |  | ||||||
|   void OpDiag (const Field &in, Field &out)                  {    assert(0);  } |  | ||||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } |  | ||||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out)  {    assert(0);  }; |  | ||||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } |  | ||||||
| }; |  | ||||||
| template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|   using LinearFunction<Field>::operator(); |  | ||||||
|   typedef LinearOperatorBase<Field> FineOperator; |  | ||||||
|   FineOperator   & _SmootherOperator; |  | ||||||
|   Chebyshev<Field> Cheby; |  | ||||||
|   ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator) : |  | ||||||
|     _SmootherOperator(SmootherOperator), |  | ||||||
|     Cheby(_lo,_hi,_ord,InverseApproximation) |  | ||||||
|   { |  | ||||||
|     std::cout << GridLogMessage<<" Chebyshev smoother order "<<_ord<<" ["<<_lo<<","<<_hi<<"]"<<std::endl; |  | ||||||
|   }; |  | ||||||
|   void operator() (const Field &in, Field &out)  |  | ||||||
|   { |  | ||||||
|     Field tmp(in.Grid()); |  | ||||||
|     tmp = in; |  | ||||||
|     Cheby(_SmootherOperator,tmp,out); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) |  | ||||||
| { |  | ||||||
|   Grid_init(&argc,&argv); |  | ||||||
|  |  | ||||||
|   const int Ls=16; |  | ||||||
|   const int nbasis = 40; |  | ||||||
|   const int cb = 0 ; |  | ||||||
|   RealD mass=0.01; |  | ||||||
|   RealD M5=1.8; |  | ||||||
|   RealD b=1.5; |  | ||||||
|   RealD c=0.5; |  | ||||||
|  |  | ||||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), |  | ||||||
| 								   GridDefaultSimd(Nd,vComplex::Nsimd()), |  | ||||||
| 								   GridDefaultMpi()); |  | ||||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |  | ||||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); |  | ||||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); |  | ||||||
|  |  | ||||||
|   // Construct a coarsened grid with 4^4 cell |  | ||||||
|   Coordinate clatt = GridDefaultLatt(); |  | ||||||
|   for(int d=0;d<clatt.size();d++){ |  | ||||||
|     clatt[d] = clatt[d]/4; |  | ||||||
|   } |  | ||||||
|   GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, |  | ||||||
| 							    GridDefaultSimd(Nd,vComplex::Nsimd()), |  | ||||||
| 							    GridDefaultMpi());; |  | ||||||
|   GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d); |  | ||||||
|  |  | ||||||
|   ///////////////////////// RNGs ///////////////////////////////// |  | ||||||
|   std::vector<int> seeds4({1,2,3,4}); |  | ||||||
|   std::vector<int> seeds5({5,6,7,8}); |  | ||||||
|   std::vector<int> cseeds({5,6,7,8}); |  | ||||||
|  |  | ||||||
|   GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5); |  | ||||||
|   GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4); |  | ||||||
|   GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); |  | ||||||
|  |  | ||||||
|   ///////////////////////// Configuration ///////////////////////////////// |  | ||||||
|   LatticeGaugeField Umu(UGrid); |  | ||||||
|  |  | ||||||
|   FieldMetaData header; |  | ||||||
|   std::string file("ckpoint_lat.4000"); |  | ||||||
|   NerscIO::readConfiguration(Umu,header,file); |  | ||||||
|  |  | ||||||
|   //////////////////////// Fermion action ////////////////////////////////// |  | ||||||
|   MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); |  | ||||||
|  |  | ||||||
|   SchurDiagMooeeOperator<MobiusFermionD, LatticeFermion> HermOpEO(Ddwf); |  | ||||||
|  |  | ||||||
|   typedef HermOpAdaptor<LatticeFermionD> HermFineMatrix; |  | ||||||
|   HermFineMatrix FineHermOp(HermOpEO); |  | ||||||
|    |  | ||||||
|   LatticeFermion result(FrbGrid); result=Zero(); |  | ||||||
|  |  | ||||||
|   LatticeFermion    src(FrbGrid); random(RNG5,src); |  | ||||||
|  |  | ||||||
|   // Run power method on FineHermOp |  | ||||||
|   PowerMethod<LatticeFermion>       PM;   PM(HermOpEO,src); |  | ||||||
|  |  | ||||||
|   |  | ||||||
|   //////////////////////////////////////////////////////////// |  | ||||||
|   ///////////// Coarse basis and Little Dirac Operator /////// |  | ||||||
|   //////////////////////////////////////////////////////////// |  | ||||||
|   typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator; |  | ||||||
|   typedef LittleDiracOperator::CoarseVector CoarseVector; |  | ||||||
|  |  | ||||||
|   NextToNextToNextToNearestStencilGeometry5D geom(Coarse5d); |  | ||||||
|   NearestStencilGeometry5D geom_nn(Coarse5d); |  | ||||||
|    |  | ||||||
|   // Warning: This routine calls PVdagM.Op, not PVdagM.HermOp |  | ||||||
|   typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; |  | ||||||
|   Subspace Aggregates(Coarse5d,FrbGrid,cb); |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////// |  | ||||||
|   // Need to check about red-black grid coarsening |  | ||||||
|   //////////////////////////////////////////////////////////// |  | ||||||
|   LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d); |  | ||||||
|  |  | ||||||
|   bool load=false; |  | ||||||
|   if ( load ) { |  | ||||||
|     LoadBasis(Aggregates,"Subspace.scidac"); |  | ||||||
|     LoadOperator(LittleDiracOp,"LittleDiracOp.scidac"); |  | ||||||
|   } else { |  | ||||||
|     Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis, |  | ||||||
| 				       95.0,0.1, |  | ||||||
| 				       //				     400,200,200 -- 48 iters |  | ||||||
| 				       //				     600,200,200 -- 38 iters, 162s |  | ||||||
| 				       //				     600,200,100 -- 38 iters, 169s |  | ||||||
| 				       //				     600,200,50  -- 88 iters. 370s  |  | ||||||
| 				       600, |  | ||||||
| 				       200, |  | ||||||
| 				       100, |  | ||||||
| 				       0.0); |  | ||||||
|     LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates); |  | ||||||
|     SaveBasis(Aggregates,"Subspace.scidac"); |  | ||||||
|     SaveOperator(LittleDiracOp,"LittleDiracOp.scidac"); |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   // Try projecting to one hop only |  | ||||||
|   LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d); |  | ||||||
|   LittleDiracOpProj.ProjectNearestNeighbour(0.2,LittleDiracOp); |  | ||||||
|  |  | ||||||
|   typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix; |  | ||||||
|   HermMatrix CoarseOp (LittleDiracOp); |  | ||||||
|   HermMatrix CoarseOpProj (LittleDiracOpProj); |  | ||||||
|    |  | ||||||
|   ////////////////////////////////////////// |  | ||||||
|   // Build a coarse lanczos |  | ||||||
|   ////////////////////////////////////////// |  | ||||||
|   Chebyshev<CoarseVector>      IRLCheby(0.5,60.0,71);  // 1 iter |  | ||||||
|   FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp); |  | ||||||
|   PlainHermOp<CoarseVector>    IRLOp    (CoarseOp); |  | ||||||
|   int Nk=48; |  | ||||||
|   int Nm=64; |  | ||||||
|   int Nstop=Nk; |  | ||||||
|   ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-5,20); |  | ||||||
|  |  | ||||||
|   int Nconv; |  | ||||||
|   std::vector<RealD>            eval(Nm); |  | ||||||
|   std::vector<CoarseVector>     evec(Nm,Coarse5d); |  | ||||||
|   CoarseVector c_src(Coarse5d); c_src=1.0; |  | ||||||
|   CoarseVector c_res(Coarse5d);  |  | ||||||
|   CoarseVector c_ref(Coarse5d);  |  | ||||||
|  |  | ||||||
|   PowerMethod<CoarseVector>       cPM;   cPM(CoarseOp,c_src); |  | ||||||
|  |  | ||||||
|   IRL.calc(eval,evec,c_src,Nconv); |  | ||||||
|   DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval); |  | ||||||
|  |  | ||||||
|    |  | ||||||
|   ////////////////////////////////////////// |  | ||||||
|   // Build a coarse space solver |  | ||||||
|   ////////////////////////////////////////// |  | ||||||
|   int maxit=20000; |  | ||||||
|   ConjugateGradient<CoarseVector>  CG(1.0e-8,maxit,false); |  | ||||||
|   ConjugateGradient<LatticeFermionD>  CGfine(1.0e-8,10000,false); |  | ||||||
|   ZeroGuesser<CoarseVector> CoarseZeroGuesser; |  | ||||||
|  |  | ||||||
|   //  HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser); |  | ||||||
|   HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser); |  | ||||||
|   c_res=Zero(); |  | ||||||
|   HPDSolve(c_src,c_res); c_ref = c_res; |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Deflated (with real op EV's) solve for the projected coarse op |  | ||||||
|   // Work towards ADEF1 in the coarse space |  | ||||||
|   ////////////////////////////////////////////////////////////////////////// |  | ||||||
|   HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser); |  | ||||||
|   c_res=Zero(); |  | ||||||
|   HPDSolveProj(c_src,c_res); |  | ||||||
|   c_res = c_res - c_ref; |  | ||||||
|   std::cout << "Projected solver error "<<norm2(c_res)<<std::endl; |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Coarse ADEF1 with deflation space |  | ||||||
|   ////////////////////////////////////////////////////////////////////// |  | ||||||
|   ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(4.0,45.,16,CoarseOpProj);  // 311 |  | ||||||
|   //  ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj);  // 311 |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////// |  | ||||||
|   // CG, Cheby mode spacing 200,200 |  | ||||||
|   // Unprojected Coarse CG solve to 1e-8 : 190 iters, 4.9s |  | ||||||
|   // Unprojected Coarse CG solve to 4e-2 :  33 iters, 0.8s |  | ||||||
|   // Projected Coarse CG solve to 1e-8 : 100 iters, 0.36s |  | ||||||
|   //////////////////////////////////////////////////////// |  | ||||||
|   // CoarseSmoother(1.0,48.,8,CoarseOpProj); 48 evecs  |  | ||||||
|   //////////////////////////////////////////////////////// |  | ||||||
|   // ADEF1 Coarse solve to 1e-8 : 44 iters, 2.34s  2.1x gain |  | ||||||
|   // ADEF1 Coarse solve to 4e-2 : 7 iters, 0.4s |  | ||||||
|   // HDCG 38 iters 162s |  | ||||||
|   // |  | ||||||
|   // CoarseSmoother(1.0,40.,8,CoarseOpProj); 48 evecs  |  | ||||||
|   // ADEF1 Coarse solve to 1e-8 : 37 iters, 2.0s  2.1x gain |  | ||||||
|   // ADEF1 Coarse solve to 4e-2 : 6 iters, 0.36s |  | ||||||
|   // HDCG 38 iters 169s |  | ||||||
|  |  | ||||||
|   TwoLevelADEF1defl<CoarseVector> |  | ||||||
|     cADEF1(1.0e-8, 100, |  | ||||||
| 	   CoarseOp, |  | ||||||
| 	   CoarseSmoother, |  | ||||||
| 	   evec,eval); |  | ||||||
|  |  | ||||||
|   c_res=Zero(); |  | ||||||
|   cADEF1(c_src,c_res); |  | ||||||
|   c_res = c_res - c_ref; |  | ||||||
|   std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; |  | ||||||
|    |  | ||||||
|   cADEF1.Tolerance = 1.0e-9; |  | ||||||
|   c_res=Zero(); |  | ||||||
|   cADEF1(c_src,c_res); |  | ||||||
|   c_res = c_res - c_ref; |  | ||||||
|   std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; |  | ||||||
|    |  | ||||||
|   ////////////////////////////////////////// |  | ||||||
|   // Build a smoother |  | ||||||
|   ////////////////////////////////////////// |  | ||||||
|   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(10.0,100.0,10,FineHermOp); //499 |  | ||||||
|   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(3.0,100.0,10,FineHermOp);  //383 |  | ||||||
|   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(1.0,100.0,10,FineHermOp);  //328 |  | ||||||
|   //  std::vector<RealD> los({0.5,1.0,3.0}); // 147/142/146 nbasis 1 |  | ||||||
|   //  std::vector<RealD> los({1.0,2.0}); // Nbasis 24: 88,86 iterations |  | ||||||
|   //  std::vector<RealD> los({2.0,4.0}); // Nbasis 32 == 52, iters |  | ||||||
|   //  std::vector<RealD> los({2.0,4.0}); // Nbasis 40 == 36,36 iters |  | ||||||
|  |  | ||||||
|   // |  | ||||||
|   // Turns approx 2700 iterations into 340 fine multiplies with Nbasis 40 |  | ||||||
|   // Need to measure cost of coarse space. |  | ||||||
|   // |  | ||||||
|   // -- i) Reduce coarse residual   -- 0.04 |  | ||||||
|   // -- ii) Lanczos on coarse space -- done |  | ||||||
|   // -- iii) Possible 1 hop project and/or preconditioning it - easy - PrecCG it and |  | ||||||
|   //         use a limited stencil. Reread BFM code to check on evecs / deflation strategy with prec |  | ||||||
|   // |  | ||||||
|   std::vector<RealD> los({3.0}); // Nbasis 40 == 36,36 iters |  | ||||||
|  |  | ||||||
|   //  std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults) |  | ||||||
|   std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)   |  | ||||||
|  |  | ||||||
|   for(int l=0;l<los.size();l++){ |  | ||||||
|  |  | ||||||
|     RealD lo = los[l]; |  | ||||||
|  |  | ||||||
|     for(int o=0;o<ords.size();o++){ |  | ||||||
|  |  | ||||||
|       ConjugateGradient<CoarseVector>  CGsloppy(4.0e-2,maxit,false); |  | ||||||
|       HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser); |  | ||||||
|        |  | ||||||
|       //    ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case |  | ||||||
|       ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,ords[o],FineHermOp);  // 311 |  | ||||||
|  |  | ||||||
|       ////////////////////////////////////////// |  | ||||||
|       // Build a HDCG solver |  | ||||||
|       ////////////////////////////////////////// |  | ||||||
|       TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> |  | ||||||
| 	HDCG(1.0e-8, 3000, |  | ||||||
| 	     FineHermOp, |  | ||||||
| 	     Smoother, |  | ||||||
| 	     HPDSolveSloppy, |  | ||||||
| 	     HPDSolve, |  | ||||||
| 	     Aggregates); |  | ||||||
|  |  | ||||||
|       result=Zero(); |  | ||||||
|       HDCG(src,result); |  | ||||||
|  |  | ||||||
|       TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> |  | ||||||
| 	HDCGdefl(1.0e-8, 100, |  | ||||||
| 		 FineHermOp, |  | ||||||
| 		 Smoother, |  | ||||||
| 		 cADEF1, |  | ||||||
| 		 HPDSolve, |  | ||||||
| 		 Aggregates); |  | ||||||
|        |  | ||||||
|       result=Zero(); |  | ||||||
|       HDCGdefl(src,result); |  | ||||||
|        |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // Standard CG |  | ||||||
|   result=Zero(); |  | ||||||
|   CGfine(HermOpEO, src, result); |  | ||||||
|    |  | ||||||
|   Grid_finalize(); |  | ||||||
|   return 0; |  | ||||||
| } |  | ||||||
| @@ -1,267 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./tests/Test_padded_cell.cc |  | ||||||
|  |  | ||||||
|     Copyright (C) 2023 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
| #include <Grid/lattice/PaddedCell.h> |  | ||||||
| #include <Grid/stencil/GeneralLocalStencil.h> |  | ||||||
|  |  | ||||||
| #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h> |  | ||||||
| #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h> |  | ||||||
| #include <Grid/algorithms/iterative/BiCGSTAB.h> |  | ||||||
|  |  | ||||||
| using namespace std; |  | ||||||
| using namespace Grid; |  | ||||||
|  |  | ||||||
| template<class Field> |  | ||||||
| class HermOpAdaptor : public LinearOperatorBase<Field> |  | ||||||
| { |  | ||||||
|   LinearOperatorBase<Field> & wrapped; |  | ||||||
| public: |  | ||||||
|   HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme)  {}; |  | ||||||
|   void OpDiag (const Field &in, Field &out) {    assert(0);  } |  | ||||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } |  | ||||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  }; |  | ||||||
|   void Op     (const Field &in, Field &out){ |  | ||||||
|     wrapped.HermOp(in,out); |  | ||||||
|   } |  | ||||||
|   void AdjOp     (const Field &in, Field &out){ |  | ||||||
|     wrapped.HermOp(in,out); |  | ||||||
|   } |  | ||||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } |  | ||||||
|   void HermOp(const Field &in, Field &out){ |  | ||||||
|     wrapped.HermOp(in,out); |  | ||||||
|   } |  | ||||||
|    |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template<class Matrix,class Field> |  | ||||||
| class PVdagMLinearOperator : public LinearOperatorBase<Field> { |  | ||||||
|   Matrix &_Mat; |  | ||||||
|   Matrix &_PV; |  | ||||||
| public: |  | ||||||
|   PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){}; |  | ||||||
|  |  | ||||||
|   void OpDiag (const Field &in, Field &out) {    assert(0);  } |  | ||||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } |  | ||||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  }; |  | ||||||
|   void Op     (const Field &in, Field &out){ |  | ||||||
|     Field tmp(in.Grid()); |  | ||||||
|     _Mat.M(in,tmp); |  | ||||||
|     _PV.Mdag(tmp,out); |  | ||||||
|   } |  | ||||||
|   void AdjOp     (const Field &in, Field &out){ |  | ||||||
|     Field tmp(in.Grid()); |  | ||||||
|     _PV.M(tmp,out); |  | ||||||
|     _Mat.Mdag(in,tmp); |  | ||||||
|   } |  | ||||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } |  | ||||||
|   void HermOp(const Field &in, Field &out){ |  | ||||||
|     std::cout << "HermOp"<<std::endl; |  | ||||||
|     Field tmp(in.Grid()); |  | ||||||
|     _Mat.M(in,tmp); |  | ||||||
|     _PV.Mdag(tmp,out); |  | ||||||
|     _PV.M(out,tmp); |  | ||||||
|     _Mat.Mdag(tmp,out); |  | ||||||
|     std::cout << "HermOp done "<<norm2(out)<<std::endl; |  | ||||||
|      |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template<class Field> class DumbOperator  : public LinearOperatorBase<Field> { |  | ||||||
| public: |  | ||||||
|   LatticeComplex scale; |  | ||||||
|   DumbOperator(GridBase *grid) : scale(grid) |  | ||||||
|   { |  | ||||||
|     scale = 0.0; |  | ||||||
|     LatticeComplex scalesft(grid); |  | ||||||
|     LatticeComplex scaletmp(grid); |  | ||||||
|     for(int d=0;d<4;d++){ |  | ||||||
|       Lattice<iScalar<vInteger> > x(grid); LatticeCoordinate(x,d+1); |  | ||||||
|       LatticeCoordinate(scaletmp,d+1); |  | ||||||
|       scalesft = Cshift(scaletmp,d+1,1); |  | ||||||
|       scale = 100.0*scale + where( mod(x    ,2)==(Integer)0, scalesft,scaletmp); |  | ||||||
|     } |  | ||||||
|     std::cout << " scale\n" << scale << std::endl; |  | ||||||
|   } |  | ||||||
|   // Support for coarsening to a multigrid |  | ||||||
|   void OpDiag (const Field &in, Field &out) {}; |  | ||||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp){}; |  | ||||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out) {}; |  | ||||||
|  |  | ||||||
|   void Op     (const Field &in, Field &out){ |  | ||||||
|     out = scale * in; |  | ||||||
|   } |  | ||||||
|   void AdjOp  (const Field &in, Field &out){ |  | ||||||
|     out = scale * in; |  | ||||||
|   } |  | ||||||
|   void HermOp(const Field &in, Field &out){ |  | ||||||
|     double n1, n2; |  | ||||||
|     HermOpAndNorm(in,out,n1,n2); |  | ||||||
|   } |  | ||||||
|   void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){ |  | ||||||
|     ComplexD dot; |  | ||||||
|  |  | ||||||
|     out = scale * in; |  | ||||||
|  |  | ||||||
|     dot= innerProduct(in,out); |  | ||||||
|     n1=real(dot); |  | ||||||
|  |  | ||||||
|     dot = innerProduct(out,out); |  | ||||||
|     n2=real(dot); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) |  | ||||||
| { |  | ||||||
|   Grid_init(&argc,&argv); |  | ||||||
|  |  | ||||||
|   const int Ls=2; |  | ||||||
|  |  | ||||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |  | ||||||
|  |  | ||||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); |  | ||||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); |  | ||||||
|  |  | ||||||
|   // Construct a coarsened grid |  | ||||||
|   Coordinate clatt = GridDefaultLatt(); |  | ||||||
|   for(int d=0;d<clatt.size();d++){ |  | ||||||
|     clatt[d] = clatt[d]/4; |  | ||||||
|   } |  | ||||||
|   GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());; |  | ||||||
|   GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d); |  | ||||||
|  |  | ||||||
|   std::vector<int> seeds4({1,2,3,4}); |  | ||||||
|   std::vector<int> seeds5({5,6,7,8}); |  | ||||||
|   std::vector<int> cseeds({5,6,7,8}); |  | ||||||
|   GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5); |  | ||||||
|   GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4); |  | ||||||
|   GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); |  | ||||||
|  |  | ||||||
|   LatticeFermion    src(FGrid); random(RNG5,src); |  | ||||||
|   LatticeFermion result(FGrid); result=Zero(); |  | ||||||
|   LatticeFermion    ref(FGrid); ref=Zero(); |  | ||||||
|   LatticeFermion    tmp(FGrid); |  | ||||||
|   LatticeFermion    err(FGrid); |  | ||||||
|   LatticeGaugeField Umu(UGrid); |  | ||||||
|  |  | ||||||
|   FieldMetaData header; |  | ||||||
|   std::string file("ckpoint_lat.4000"); |  | ||||||
|   NerscIO::readConfiguration(Umu,header,file); |  | ||||||
|   //Umu = 1.0; |  | ||||||
|    |  | ||||||
|   RealD mass=0.5; |  | ||||||
|   RealD M5=1.8; |  | ||||||
|  |  | ||||||
|   DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |  | ||||||
|   DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5); |  | ||||||
|  |  | ||||||
|   const int nbasis = 1; |  | ||||||
|   const int cb = 0 ; |  | ||||||
|   LatticeFermion prom(FGrid); |  | ||||||
|  |  | ||||||
|   typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator; |  | ||||||
|   typedef LittleDiracOperator::CoarseVector CoarseVector; |  | ||||||
|  |  | ||||||
|   NextToNearestStencilGeometry5D geom(Coarse5d); |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<std::endl; |  | ||||||
|    |  | ||||||
|   PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM(Ddwf,Dpv); |  | ||||||
|   HermOpAdaptor<LatticeFermionD> HOA(PVdagM); |  | ||||||
|  |  | ||||||
|   // Run power method on HOA?? |  | ||||||
|   PowerMethod<LatticeFermion>       PM;   PM(HOA,src); |  | ||||||
|   |  | ||||||
|   // Warning: This routine calls PVdagM.Op, not PVdagM.HermOp |  | ||||||
|   typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; |  | ||||||
|   Subspace AggregatesPD(Coarse5d,FGrid,cb); |  | ||||||
|   AggregatesPD.CreateSubspaceChebyshev(RNG5, |  | ||||||
| 				       HOA, |  | ||||||
| 				       nbasis, |  | ||||||
| 				       5000.0, |  | ||||||
| 				       0.02, |  | ||||||
| 				       100, |  | ||||||
| 				       50, |  | ||||||
| 				       50, |  | ||||||
| 				       0.0); |  | ||||||
|    |  | ||||||
|   LittleDiracOperator LittleDiracOpPV(geom,FGrid,Coarse5d); |  | ||||||
|   LittleDiracOpPV.CoarsenOperator(PVdagM,AggregatesPD); |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"Testing coarsened operator "<<std::endl; |  | ||||||
|    |  | ||||||
|   CoarseVector c_src (Coarse5d); |  | ||||||
|   CoarseVector c_res (Coarse5d); |  | ||||||
|   CoarseVector c_proj(Coarse5d); |  | ||||||
|  |  | ||||||
|   std::vector<LatticeFermion> subspace(nbasis,FGrid); |  | ||||||
|   subspace=AggregatesPD.subspace; |  | ||||||
|  |  | ||||||
|   Complex one(1.0); |  | ||||||
|   c_src = one;  // 1 in every element for vector 1. |  | ||||||
|   blockPromote(c_src,err,subspace); |  | ||||||
|  |  | ||||||
|   prom=Zero(); |  | ||||||
|   for(int b=0;b<nbasis;b++){ |  | ||||||
|     prom=prom+subspace[b]; |  | ||||||
|   } |  | ||||||
|   err=err-prom;  |  | ||||||
|   std::cout<<GridLogMessage<<"Promoted back from subspace: err "<<norm2(err)<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl; |  | ||||||
|  |  | ||||||
|   PVdagM.Op(prom,tmp); |  | ||||||
|   blockProject(c_proj,tmp,subspace); |  | ||||||
|   std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl; |  | ||||||
|  |  | ||||||
|   LittleDiracOpPV.M(c_src,c_res); |  | ||||||
|   std::cout<<GridLogMessage<<" Called Little Dirac Op c_src "<< norm2(c_src) << "  c_res "<< norm2(c_res) <<std::endl; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl; |  | ||||||
|   //  std::cout<<GridLogMessage<<" Little "<< c_res<<std::endl; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl; |  | ||||||
|   //  std::cout<<GridLogMessage<<" Big "<< c_proj<<std::endl; |  | ||||||
|   c_proj = c_proj - c_res; |  | ||||||
|   std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl; |  | ||||||
|   //  std::cout<<GridLogMessage<<" error "<< c_proj<<std::endl; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "Done "<< std::endl; |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |  | ||||||
|   return 0; |  | ||||||
| } |  | ||||||
| @@ -83,8 +83,15 @@ int main(int argc, char **argv) | |||||||
|   // need wrappers of the fermionic classes  |   // need wrappers of the fermionic classes  | ||||||
|   // that have a complex construction |   // that have a complex construction | ||||||
|   // standard |   // standard | ||||||
|   RealD beta = 5.6 ; |   RealD beta = 6.6 ;  | ||||||
|  |  | ||||||
|  | #if 0 | ||||||
|   WilsonGaugeActionR Waction(beta); |   WilsonGaugeActionR Waction(beta); | ||||||
|  | #else | ||||||
|  |   std::vector<Complex> boundaryG = {1,1,1,0}; | ||||||
|  |   WilsonGaugeActionR::ImplParams ParamsG(boundaryG); | ||||||
|  |   WilsonGaugeActionR Waction(beta,ParamsG); | ||||||
|  | #endif | ||||||
|    |    | ||||||
|   ActionLevel<HMCWrapper::Field> Level1(1); |   ActionLevel<HMCWrapper::Field> Level1(1); | ||||||
|   Level1.push_back(&Waction); |   Level1.push_back(&Waction); | ||||||
|   | |||||||
							
								
								
									
										238
									
								
								tests/hmc/Test_hmc_WilsonGauge_Implicit.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										238
									
								
								tests/hmc/Test_hmc_WilsonGauge_Implicit.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,238 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./tests/Test_hmc_WilsonFermionGauge.cc | ||||||
|  |  | ||||||
|  | Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||||
|  | Author: neo <cossu@post.kek.jp> | ||||||
|  | Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #include <Grid/Grid.h> | ||||||
|  |  | ||||||
|  | #undef USE_OBC | ||||||
|  | #define DO_IMPLICIT | ||||||
|  |  | ||||||
|  |  | ||||||
|  | int main(int argc, char **argv)  | ||||||
|  | { | ||||||
|  |   using namespace Grid; | ||||||
|  |  | ||||||
|  |   Grid_init(&argc, &argv); | ||||||
|  |   GridLogLayout(); | ||||||
|  |    | ||||||
|  |   std::string arg; | ||||||
|  |   | ||||||
|  |   HMCparameters HMCparams; | ||||||
|  | #if 1 | ||||||
|  |   { | ||||||
|  |     XmlReader  HMCrd("HMCparameters.xml"); | ||||||
|  |     read(HMCrd,"HMCparameters",HMCparams); | ||||||
|  |   } | ||||||
|  | #else | ||||||
|  | //IntegratorParameters MD; | ||||||
|  |   std::vector<int> steps(0); | ||||||
|  |   if( GridCmdOptionExists(argv,argv+argc,"--MDsteps") ){ | ||||||
|  |     arg= GridCmdOptionPayload(argv,argv+argc,"--MDsteps"); | ||||||
|  |     GridCmdOptionIntVector(arg,steps); | ||||||
|  |     assert(steps.size()==1); | ||||||
|  |   } | ||||||
|  |   MD.trajL   = 0.001*std::sqrt(2.); | ||||||
|  |   MD.MDsteps = 1; | ||||||
|  |   if (steps.size()>0) MD.MDsteps = steps[0]; | ||||||
|  |   if( GridCmdOptionExists(argv,argv+argc,"--trajL") ){ | ||||||
|  |     arg= GridCmdOptionPayload(argv,argv+argc,"--trajL"); | ||||||
|  |     std::vector<int> traj(0); | ||||||
|  |     GridCmdOptionIntVector(arg,traj); | ||||||
|  |     assert(traj.size()==1); | ||||||
|  |     MD.trajL *= double(traj[0]); | ||||||
|  |   } | ||||||
|  |   MD.RMHMCTol=1e-8; | ||||||
|  |   MD.RMHMCCGTol=1e-8; | ||||||
|  |   std::cout << "RMHMCTol= "<<  MD.RMHMCTol<<" RMHMCCGTol= "<<MD.RMHMCCGTol<<std::endl; | ||||||
|  |  | ||||||
|  |   HMCparameters HMCparams; | ||||||
|  |   HMCparams.StartTrajectory  = 0; | ||||||
|  |   HMCparams.Trajectories     = 1; | ||||||
|  |   HMCparams.NoMetropolisUntil=  100; | ||||||
|  |   // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; | ||||||
|  |   HMCparams.StartingType     =std::string("ColdStart"); | ||||||
|  |   HMCparams.Kappa=0.01; //checking against trivial. Pathetic. | ||||||
|  |   HMCparams.MD = MD; | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |    // Typedefs to simplify notation | ||||||
|  | #ifdef DO_IMPLICIT | ||||||
|  |   typedef GenericHMCRunner<ImplicitMinimumNorm2> HMCWrapper;  // Uses the default minimum norm | ||||||
|  | //  typedef GenericHMCRunner<ImplicitCampostrini> HMCWrapper;  // 4th order | ||||||
|  |   HMCparams.MD.name    = std::string("ImplicitMinimumNorm2"); | ||||||
|  | #else | ||||||
|  |   typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;  // Uses the default minimum norm | ||||||
|  |   HMCparams.MD.name    = std::string("MinimumNorm2"); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   // Possibile to create the module by hand  | ||||||
|  |   // hardcoding parameters or using a Reader | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   // Checkpointer definition | ||||||
|  |   CheckpointerParameters CPparams;   | ||||||
|  |   CPparams.config_prefix = "ckpoint_lat"; | ||||||
|  |   CPparams.rng_prefix = "ckpoint_rng"; | ||||||
|  |   CPparams.saveInterval = 1; | ||||||
|  |   CPparams.format = "IEEE64BIG"; | ||||||
|  |    | ||||||
|  |   HMCWrapper TheHMC(HMCparams); | ||||||
|  |   // Grid from the command line | ||||||
|  |   TheHMC.Resources.AddFourDimGrid("gauge"); | ||||||
|  |   TheHMC.Resources.LoadNerscCheckpointer(CPparams); | ||||||
|  |  | ||||||
|  |   RNGModuleParameters RNGpar; | ||||||
|  |   RNGpar.serial_seeds = "1 2 3 4 5"; | ||||||
|  |   RNGpar.parallel_seeds = "6 7 8 9 10"; | ||||||
|  |   TheHMC.Resources.SetRNGSeeds(RNGpar); | ||||||
|  |  | ||||||
|  |   // Construct observables | ||||||
|  |   // here there is too much indirection  | ||||||
|  |   typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs; | ||||||
|  |   typedef TopologicalChargeMod<HMCWrapper::ImplPolicy> QObs; | ||||||
|  |   TheHMC.Resources.AddObservable<PlaqObs>(); | ||||||
|  |   TopologyObsParameters TopParams; | ||||||
|  |   TopParams.interval = 1; | ||||||
|  |   TopParams.do_smearing = true; | ||||||
|  | //  TopParams.Smearing.steps = 1600; | ||||||
|  | //  TopParams.Smearing.step_size = 0.01; | ||||||
|  |   TopParams.Smearing.init_step_size = 0.01; | ||||||
|  |   TopParams.Smearing.meas_interval = 10; | ||||||
|  |   TopParams.Smearing.maxTau = 16.0;  | ||||||
|  | //  TheHMC.Resources.AddObservable<QObs>(TopParams); | ||||||
|  |   ////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  |   ///////////////////////////////////////////////////////////// | ||||||
|  |   // Collect actions, here use more encapsulation | ||||||
|  |   // need wrappers of the fermionic classes  | ||||||
|  |   // that have a complex construction | ||||||
|  |   // standard | ||||||
|  |  | ||||||
|  |   RealD beta = 6.6; | ||||||
|  |   std::cout << "Wilson Gauge beta= " <<beta <<std::endl; | ||||||
|  | #ifndef USE_OBC | ||||||
|  |   WilsonGaugeActionR Waction(beta); | ||||||
|  | #else | ||||||
|  |   std::vector<Complex> boundaryG = {1,1,1,0}; | ||||||
|  |   WilsonGaugeActionR::ImplParams ParamsG(boundaryG); | ||||||
|  |   WilsonGaugeActionR Waction(beta,ParamsG); | ||||||
|  |   std::cout << "boundaryG = " <<boundaryG  <<std::endl; | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |    | ||||||
|  |   ActionLevel<HMCWrapper::Field> Level1(1); | ||||||
|  |   Level1.push_back(&Waction); | ||||||
|  |   TheHMC.TheAction.push_back(Level1); | ||||||
|  |  | ||||||
|  |   TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file | ||||||
|  |   std::cout << "trajL= " <<TheHMC.Parameters.MD.trajL <<" steps= "<<TheHMC.Parameters.MD.MDsteps << " integrator= "<<TheHMC.Parameters.MD.name<<std::endl; | ||||||
|  |  | ||||||
|  |   NoSmearing<HMCWrapper::ImplPolicy> S; | ||||||
|  | #ifndef DO_IMPLICIT | ||||||
|  |   TrivialMetric<HMCWrapper::ImplPolicy::Field> Mtr; | ||||||
|  | #else | ||||||
|  | // g_x3_2 | ||||||
|  |     LaplacianRatParams gpar(2),mpar(2); | ||||||
|  |     gpar.offset = 1.; | ||||||
|  |     gpar.a0[0] = 500.; | ||||||
|  |     gpar.a1[0] = 0.; | ||||||
|  |     gpar.b0[0] = 0.25; | ||||||
|  |     gpar.b1[0] = 1.; | ||||||
|  |     gpar.a0[1] = -500.; | ||||||
|  |     gpar.a1[1] = 0.; | ||||||
|  |     gpar.b0[1] = 0.36; | ||||||
|  |     gpar.b1[1] = 1.2; | ||||||
|  |     gpar.b2=1.; | ||||||
|  |  | ||||||
|  |     mpar.offset = 1.; | ||||||
|  |     mpar.a0[0] =  -0.850891906532; | ||||||
|  |     mpar.a1[0] = -1.54707654538; | ||||||
|  |     mpar. b0[0] = 2.85557166137; | ||||||
|  |     mpar. b1[0] = 5.74194794773; | ||||||
|  |     mpar.a0[1] = -13.5120056831218384729709214298; | ||||||
|  |     mpar.a1[1] = 1.54707654538396877086370295729; | ||||||
|  |     mpar.b0[1] = 19.2921090880640520026645390317; | ||||||
|  |     mpar.b1[1] = -3.54194794773029020262811172870; | ||||||
|  |     mpar.b2=1.; | ||||||
|  |     for(int i=0;i<2;i++){ | ||||||
|  |        gpar.a1[i] *=16.; | ||||||
|  |        gpar.b1[i] *=16.; | ||||||
|  |        mpar.a1[i] *=16.; | ||||||
|  |        mpar.b1[i] *=16.; | ||||||
|  |     } | ||||||
|  |     gpar.b2 *= 16.*16.; | ||||||
|  |     mpar.b2 *= 16.*16.; | ||||||
|  |  | ||||||
|  |     ConjugateGradient<LatticeGaugeField> CG(1.0e-8,10000); | ||||||
|  |     LaplacianParams LapPar(0.0001, 1.0, 10000, 1e-8, 12, 64); | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage << "LaplacianRat " << std::endl; | ||||||
|  |  | ||||||
|  |     gpar.tolerance=HMCparams.MD.RMHMCCGTol; | ||||||
|  |     mpar.tolerance=HMCparams.MD.RMHMCCGTol; | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage << "gpar offset= " << gpar.offset <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " a0= " << gpar.a0 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " a1= " << gpar.a1 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " b0= " << gpar.b0 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " b1= " << gpar.b1 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " b2= " << gpar.b2 <<std::endl ;; | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage << "mpar offset= " << mpar.offset <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " a0= " << mpar.a0 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " a1= " << mpar.a1 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " b0= " << mpar.b0 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " b1= " << mpar.b1 <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " b2= " << mpar.b2 <<std::endl; | ||||||
|  | //  Assumes PeriodicGimplR or D at the moment | ||||||
|  |     Coordinate latt  = GridDefaultLatt(); | ||||||
|  |     Coordinate mpi   = GridDefaultMpi(); | ||||||
|  |     auto UGrid = TheHMC.Resources.GetCartesian("gauge"); | ||||||
|  |     Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd()); | ||||||
|  |     auto UGrid_f   = SpaceTimeGrid::makeFourDimGrid(latt,simdF,mpi); | ||||||
|  |     std::cout << GridLogMessage << " UGrid= " << UGrid <<std::endl; | ||||||
|  |     std::cout << GridLogMessage << " UGrid_f= " << UGrid_f <<std::endl; | ||||||
|  |  | ||||||
|  |     LaplacianAdjointRat<HMCWrapper::ImplPolicy, PeriodicGimplF> Mtr(UGrid, UGrid_f,CG, gpar, mpar); | ||||||
|  | #endif | ||||||
|  |   | ||||||
|  |   { | ||||||
|  |     XmlWriter HMCwr("HMCparameters.xml.out"); | ||||||
|  |     write(HMCwr,"HMCparameters",TheHMC.Parameters); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   TheHMC.Run(S,Mtr);  // no smearing | ||||||
|  |  | ||||||
|  |   Grid_finalize(); | ||||||
|  |  | ||||||
|  | } // main | ||||||
		Reference in New Issue
	
	Block a user