mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-22 00:44:45 +01:00 
			
		
		
		
	Compare commits
	
		
			77 Commits
		
	
	
		
			feature/dw
			...
			feature/la
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 27ea2afe86 | ||
|  | 78e8704eac | ||
|  | 67131d82f2 | ||
|  | 615a9448b9 | ||
|  | 00164f5ce5 | ||
|  | a7f72eb994 | ||
|  | 501fa1614a | ||
|  | 5bf42e1e15 | ||
|  | fe4d9b003c | ||
|  | 4a699b4da3 | ||
|  | 689323f4ee | ||
|  | 84b441800f | ||
|  | 1ef424b139 | ||
|  | aa66f41c69 | ||
|  | f96c800d25 | ||
|  | 32a52d7583 | ||
|  | fa04b6d3c2 | ||
|  | 7fab183c0e | ||
|  | 9ec9850bdb | ||
|  | 0c4ddaea0b | ||
|  | 00ebc150ad | ||
|  | 0f3e9ae57d | ||
|  | 034de160bf | ||
|  | 14507fd6e4 | ||
|  | 2db05ac214 | ||
|  | 31f99574fa | ||
|  | a34c8a2961 | ||
|  | ccd20df827 | ||
|  | e9be293444 | ||
|  | d577211cc3 | ||
|  | f4336e480a | ||
|  | e4d461cb03 | ||
|  | 3d63b4894e | ||
|  | 08583afaff | ||
|  | b395a312af | ||
|  | 66295b99aa | ||
|  | b8654be0ef | ||
|  | a479325349 | ||
|  | f6c3f6bf2d | ||
|  | d83868fdbb | ||
|  | 303e0b927d | ||
|  | 28ba8a0f48 | ||
|  | f9e28577f3 | ||
|  | 8a3aae98f6 | ||
|  | 8309f2364b | ||
|  | cac1750078 | ||
|  | 27936900e6 | ||
|  | e325929851 | ||
|  | 47af3565f4 | ||
|  | 4b4d187935 | ||
|  | 9aff354ab5 | ||
|  | cb9ff20249 | ||
|  | 9fe6ac71ea | ||
|  | f1fa00b71b | ||
|  | bf58557fb1 | ||
|  | 10cb37f504 | ||
|  | 1374c943d4 | ||
|  | a1d80282ec | ||
|  | 4eb8bbbebe | ||
|  | d1c6288c5f | ||
|  | dd949bc428 | ||
|  | bb7378cfc3 | ||
|  | f0e084a88c | ||
|  | 153672d8ec | ||
|  | 08ca338875 | ||
|  | f7cbf82c04 | ||
|  | 07009c569a | ||
|  | 09f4cdb11e | ||
|  | 1e54882f71 | ||
|  | d54807b8c0 | ||
|  | 5625b47c7d | ||
|  | eb6153080a | ||
|  | f7072d1ac2 | ||
|  | a6eeea777b | ||
|  | 77f7737ccc | ||
|  | f9df685cde | ||
|  | 0cd6b1858c | 
| @@ -550,6 +550,7 @@ AC_CONFIG_FILES(tests/forces/Makefile) | ||||
| AC_CONFIG_FILES(tests/hadrons/Makefile) | ||||
| AC_CONFIG_FILES(tests/hmc/Makefile) | ||||
| AC_CONFIG_FILES(tests/solver/Makefile) | ||||
| AC_CONFIG_FILES(tests/lanczos/Makefile) | ||||
| AC_CONFIG_FILES(tests/smearing/Makefile) | ||||
| AC_CONFIG_FILES(tests/qdpxx/Makefile) | ||||
| AC_CONFIG_FILES(tests/testu01/Makefile) | ||||
|   | ||||
| @@ -103,29 +103,32 @@ namespace Grid { | ||||
|     GridBase *CoarseGrid; | ||||
|     GridBase *FineGrid; | ||||
|     std::vector<Lattice<Fobj> > subspace; | ||||
|     int checkerboard; | ||||
|  | ||||
|     Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid) :  | ||||
|       CoarseGrid(_CoarseGrid), | ||||
|   Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :  | ||||
|     CoarseGrid(_CoarseGrid), | ||||
|       FineGrid(_FineGrid), | ||||
|       subspace(nbasis,_FineGrid) | ||||
|       subspace(nbasis,_FineGrid), | ||||
|       checkerboard(_checkerboard) | ||||
| 	{ | ||||
| 	}; | ||||
|    | ||||
|     void Orthogonalise(void){ | ||||
|       CoarseScalar InnerProd(CoarseGrid);  | ||||
|       std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl; | ||||
|       blockOrthogonalise(InnerProd,subspace); | ||||
|       std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl; | ||||
|       blockOrthogonalise(InnerProd,subspace); | ||||
|       //      std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl; | ||||
|       //      CheckOrthogonal(); | ||||
|     }  | ||||
|     void CheckOrthogonal(void){ | ||||
|       CoarseVector iProj(CoarseGrid);  | ||||
|       CoarseVector eProj(CoarseGrid);  | ||||
|       Lattice<CComplex> pokey(CoarseGrid); | ||||
|  | ||||
|        | ||||
|       for(int i=0;i<nbasis;i++){ | ||||
| 	blockProject(iProj,subspace[i],subspace); | ||||
|  | ||||
| 	eProj=zero;  | ||||
| 	for(int ss=0;ss<CoarseGrid->oSites();ss++){ | ||||
| 	parallel_for(int ss=0;ss<CoarseGrid->oSites();ss++){ | ||||
| 	  eProj._odata[ss](i)=CComplex(1.0); | ||||
| 	} | ||||
| 	eProj=eProj - iProj; | ||||
| @@ -137,6 +140,7 @@ namespace Grid { | ||||
|       blockProject(CoarseVec,FineVec,subspace); | ||||
|     } | ||||
|     void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){ | ||||
|       FineVec.checkerboard = subspace[0].checkerboard; | ||||
|       blockPromote(CoarseVec,FineVec,subspace); | ||||
|     } | ||||
|     void CreateSubspaceRandom(GridParallelRNG &RNG){ | ||||
| @@ -147,6 +151,7 @@ namespace Grid { | ||||
|       Orthogonalise(); | ||||
|     } | ||||
|  | ||||
|     /* | ||||
|     virtual void CreateSubspaceLanczos(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)  | ||||
|     { | ||||
|       // Run a Lanczos with sloppy convergence | ||||
| @@ -195,7 +200,7 @@ namespace Grid { | ||||
| 	  std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl; | ||||
| 	} | ||||
|     } | ||||
|  | ||||
|     */ | ||||
|     virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) { | ||||
|  | ||||
|       RealD scale; | ||||
|   | ||||
| @@ -162,15 +162,10 @@ namespace Grid { | ||||
| 	_Mat.M(in,out); | ||||
|       } | ||||
|       void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||
| 	ComplexD dot; | ||||
|  | ||||
| 	_Mat.M(in,out); | ||||
| 	 | ||||
| 	dot= innerProduct(in,out); | ||||
| 	n1=real(dot); | ||||
|  | ||||
| 	dot = innerProduct(out,out); | ||||
| 	n2=real(dot); | ||||
| 	ComplexD dot= innerProduct(in,out); n1=real(dot); | ||||
| 	n2=norm2(out); | ||||
|       } | ||||
|       void HermOp(const Field &in, Field &out){ | ||||
| 	_Mat.M(in,out); | ||||
| @@ -192,10 +187,10 @@ namespace Grid { | ||||
| 	ni=Mpc(in,tmp); | ||||
| 	no=MpcDag(tmp,out); | ||||
|       } | ||||
|       void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||
|       virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||
| 	MpcDagMpc(in,out,n1,n2); | ||||
|       } | ||||
|       void HermOp(const Field &in, Field &out){ | ||||
|       virtual void HermOp(const Field &in, Field &out){ | ||||
| 	RealD n1,n2; | ||||
| 	HermOpAndNorm(in,out,n1,n2); | ||||
|       } | ||||
| @@ -212,7 +207,6 @@ namespace Grid { | ||||
|       void OpDir  (const Field &in, Field &out,int dir,int disp) { | ||||
| 	assert(0); | ||||
|       } | ||||
|  | ||||
|     }; | ||||
|     template<class Matrix,class Field> | ||||
|       class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> { | ||||
| @@ -270,7 +264,6 @@ namespace Grid { | ||||
| 	return axpy_norm(out,-1.0,tmp,in); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     template<class Matrix,class Field> | ||||
|       class SchurDiagTwoOperator :  public SchurOperatorBase<Field> { | ||||
|     protected: | ||||
| @@ -299,6 +292,45 @@ namespace Grid { | ||||
| 	return axpy_norm(out,-1.0,tmp,in); | ||||
|       } | ||||
|     }; | ||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     // Left  handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta  -->  ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta | ||||
|     // Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta  -->  ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi | ||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ; | ||||
|     template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ; | ||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     //  Staggered use | ||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     template<class Matrix,class Field> | ||||
|       class SchurStaggeredOperator :  public SchurOperatorBase<Field> { | ||||
|     protected: | ||||
|       Matrix &_Mat; | ||||
|     public: | ||||
|       SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){}; | ||||
|       virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||
| 	n2 = Mpc(in,out); | ||||
| 	ComplexD dot= innerProduct(in,out); | ||||
| 	n1 = real(dot); | ||||
|       } | ||||
|       virtual void HermOp(const Field &in, Field &out){ | ||||
| 	Mpc(in,out); | ||||
|       } | ||||
|       virtual  RealD Mpc      (const Field &in, Field &out) { | ||||
| 	Field tmp(in._grid); | ||||
| 	_Mat.Meooe(in,tmp); | ||||
| 	_Mat.MooeeInv(tmp,out); | ||||
| 	_Mat.Meooe(out,tmp); | ||||
| 	_Mat.Mooee(in,out); | ||||
|         return axpy_norm(out,-1.0,tmp,out); | ||||
|       } | ||||
|       virtual  RealD MpcDag   (const Field &in, Field &out){ | ||||
| 	return Mpc(in,out); | ||||
|       } | ||||
|       virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) { | ||||
| 	assert(0);// Never need with staggered | ||||
|       } | ||||
|     }; | ||||
|     template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>; | ||||
|  | ||||
|  | ||||
|     ///////////////////////////////////////////////////////////// | ||||
| @@ -314,6 +346,14 @@ namespace Grid { | ||||
|       virtual void operator() (const Field &in, Field &out) = 0; | ||||
|     }; | ||||
|  | ||||
|     template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { | ||||
|     public: | ||||
|       void operator() (const Field &in, Field &out){ | ||||
| 	out = in; | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|  | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Base classes for Multishift solvers for operators | ||||
|     ///////////////////////////////////////////////////////////// | ||||
| @@ -336,6 +376,64 @@ namespace Grid { | ||||
|      }; | ||||
|     */ | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Hermitian operator Linear function and operator function | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     template<class Field> | ||||
|       class HermOpOperatorFunction : public OperatorFunction<Field> { | ||||
|       void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||
| 	Linop.HermOp(in,out); | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|     template<typename Field> | ||||
|       class PlainHermOp : public LinearFunction<Field> { | ||||
|     public: | ||||
|       LinearOperatorBase<Field> &_Linop; | ||||
|        | ||||
|       PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop)  | ||||
|       {} | ||||
|        | ||||
|       void operator()(const Field& in, Field& out) { | ||||
| 	_Linop.HermOp(in,out); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     template<typename Field> | ||||
|     class FunctionHermOp : public LinearFunction<Field> { | ||||
|     public: | ||||
|       OperatorFunction<Field>   & _poly; | ||||
|       LinearOperatorBase<Field> &_Linop; | ||||
|        | ||||
|       FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop)  | ||||
| 	: _poly(poly), _Linop(linop) {}; | ||||
|        | ||||
|       void operator()(const Field& in, Field& out) { | ||||
| 	_poly(_Linop,in,out); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|   template<class Field> | ||||
|   class Polynomial : public OperatorFunction<Field> { | ||||
|   private: | ||||
|     std::vector<RealD> Coeffs; | ||||
|   public: | ||||
|     Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { }; | ||||
|  | ||||
|     // Implement the required interface | ||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||
|  | ||||
|       Field AtoN(in._grid); | ||||
|       Field Mtmp(in._grid); | ||||
|       AtoN = in; | ||||
|       out = AtoN*Coeffs[0]; | ||||
|       for(int n=1;n<Coeffs.size();n++){ | ||||
| 	Mtmp = AtoN; | ||||
| 	Linop.HermOp(Mtmp,AtoN); | ||||
| 	out=out+AtoN*Coeffs[n]; | ||||
|       } | ||||
|     }; | ||||
|   }; | ||||
|  | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -8,6 +8,7 @@ | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Christoph Lehner <clehner@bnl.gov> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
| @@ -33,41 +34,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Simple general polynomial with user supplied coefficients | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class Field> | ||||
|   class HermOpOperatorFunction : public OperatorFunction<Field> { | ||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||
|       Linop.HermOp(in,out); | ||||
|     }; | ||||
|   }; | ||||
|  | ||||
|   template<class Field> | ||||
|   class Polynomial : public OperatorFunction<Field> { | ||||
|   private: | ||||
|     std::vector<RealD> Coeffs; | ||||
|   public: | ||||
|     Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { }; | ||||
|  | ||||
|     // Implement the required interface | ||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||
|  | ||||
|       Field AtoN(in._grid); | ||||
|       Field Mtmp(in._grid); | ||||
|       AtoN = in; | ||||
|       out = AtoN*Coeffs[0]; | ||||
| //            std::cout <<"Poly in " <<norm2(in)<<" size "<< Coeffs.size()<<std::endl; | ||||
| //            std::cout <<"Coeffs[0]= "<<Coeffs[0]<< " 0 " <<norm2(out)<<std::endl; | ||||
|       for(int n=1;n<Coeffs.size();n++){ | ||||
| 	Mtmp = AtoN; | ||||
| 	Linop.HermOp(Mtmp,AtoN); | ||||
| 	out=out+AtoN*Coeffs[n]; | ||||
| //            std::cout <<"Coeffs "<<n<<"= "<< Coeffs[n]<< " 0 " <<std::endl; | ||||
| //		std::cout << n<<" " <<norm2(out)<<std::endl; | ||||
|       } | ||||
|     }; | ||||
|   }; | ||||
| struct ChebyParams : Serializable { | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams, | ||||
| 				  RealD, alpha,   | ||||
| 				  RealD, beta,    | ||||
| 				  int, Npoly); | ||||
| }; | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Generic Chebyshev approximations | ||||
| @@ -82,8 +54,10 @@ namespace Grid { | ||||
|  | ||||
|   public: | ||||
|     void csv(std::ostream &out){ | ||||
| 	RealD diff = hi-lo; | ||||
|       for (RealD x=lo-0.2*diff; x<hi+0.2*diff; x+=(hi-lo)/1000) { | ||||
|       RealD diff = hi-lo; | ||||
|       RealD delta = (hi-lo)*1.0e-9; | ||||
|       for (RealD x=lo; x<hi; x+=delta) { | ||||
| 	delta*=1.1; | ||||
| 	RealD f = approx(x); | ||||
| 	out<< x<<" "<<f<<std::endl; | ||||
|       } | ||||
| @@ -99,6 +73,7 @@ namespace Grid { | ||||
|     }; | ||||
|  | ||||
|     Chebyshev(){}; | ||||
|     Chebyshev(ChebyParams p){ Init(p.alpha,p.beta,p.Npoly);}; | ||||
|     Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);}; | ||||
|     Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);}; | ||||
|  | ||||
| @@ -193,6 +168,47 @@ namespace Grid { | ||||
|       return sum; | ||||
|     }; | ||||
|  | ||||
|     RealD approxD(RealD x) | ||||
|     { | ||||
|       RealD Un; | ||||
|       RealD Unm; | ||||
|       RealD Unp; | ||||
|        | ||||
|       RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo)); | ||||
|        | ||||
|       RealD U0=1; | ||||
|       RealD U1=2*y; | ||||
|        | ||||
|       RealD sum; | ||||
|       sum = Coeffs[1]*U0; | ||||
|       sum+= Coeffs[2]*U1*2.0; | ||||
|        | ||||
|       Un =U1; | ||||
|       Unm=U0; | ||||
|       for(int i=2;i<order-1;i++){ | ||||
| 	Unp=2*y*Un-Unm; | ||||
| 	Unm=Un; | ||||
| 	Un =Unp; | ||||
| 	sum+= Un*Coeffs[i+1]*(i+1.0); | ||||
|       } | ||||
|       return sum/(0.5*(hi-lo)); | ||||
|     }; | ||||
|      | ||||
|     RealD approxInv(RealD z, RealD x0, int maxiter, RealD resid) { | ||||
|       RealD x = x0; | ||||
|       RealD eps; | ||||
|        | ||||
|       int i; | ||||
|       for (i=0;i<maxiter;i++) { | ||||
| 	eps = approx(x) - z; | ||||
| 	if (fabs(eps / z) < resid) | ||||
| 	  return x; | ||||
| 	x = x - eps / approxD(x); | ||||
|       } | ||||
|        | ||||
|       return std::numeric_limits<double>::quiet_NaN(); | ||||
|     } | ||||
|      | ||||
|     // Implement the required interface | ||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||
|  | ||||
|   | ||||
| @@ -78,12 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|     cp = a; | ||||
|     ssq = norm2(src); | ||||
|  | ||||
|     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:   src " << ssq << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:    mp " << d << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:   mmp " << b << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:  cp,r " << cp << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:     p " << a << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:   src " << ssq << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:    mp " << d << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:   mmp " << b << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:  cp,r " << cp << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:     p " << a << std::endl; | ||||
|  | ||||
|     RealD rsq = Tolerance * Tolerance * ssq; | ||||
|  | ||||
| @@ -92,7 +92,7 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|     std::cout << GridLogIterative << std::setprecision(8) | ||||
|               << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; | ||||
|  | ||||
|     GridStopWatch LinalgTimer; | ||||
|   | ||||
| @@ -7,8 +7,9 @@ | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Chulwoo Jung | ||||
| Author: Guido Cossu | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Chulwoo Jung <chulwoo@bnl.gov> | ||||
| Author: Christoph Lehner <clehner@bnl.gov> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
| @@ -27,125 +28,282 @@ Author: Guido Cossu | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_IRL_H | ||||
| #define GRID_IRL_H | ||||
| #ifndef GRID_BIRL_H | ||||
| #define GRID_BIRL_H | ||||
|  | ||||
| #include <string.h> //memset | ||||
| //#include <zlib.h> | ||||
| #include <sys/stat.h> | ||||
|  | ||||
| namespace Grid { | ||||
| namespace Grid {  | ||||
|  | ||||
|   enum IRLdiagonalisation {  | ||||
|     IRLdiagonaliseWithDSTEGR, | ||||
|     IRLdiagonaliseWithQR, | ||||
|     IRLdiagonaliseWithEigen | ||||
|   }; | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| // Helper class for sorting the evalues AND evectors by Field | ||||
| // Use pointer swizzle on vectors | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
|   //////////////////////////////////////////////////////// | ||||
|   // Move following 100 LOC to lattice/Lattice_basis.h | ||||
|   //////////////////////////////////////////////////////// | ||||
| template<class Field> | ||||
| class SortEigen { | ||||
|  private: | ||||
|   static bool less_lmd(RealD left,RealD right){ | ||||
|     return left > right; | ||||
|   }   | ||||
|   static bool less_pair(std::pair<RealD,Field const*>& left, | ||||
|                         std::pair<RealD,Field const*>& right){ | ||||
|     return left.first > (right.first); | ||||
|   }   | ||||
|    | ||||
|  public: | ||||
|   void push(std::vector<RealD>& lmd,std::vector<Field>& evec,int N) { | ||||
|      | ||||
|     //////////////////////////////////////////////////////////////////////// | ||||
|     // PAB: FIXME: VERY VERY VERY wasteful: takes a copy of the entire vector set. | ||||
|     //    : The vector reorder should be done by pointer swizzle somehow | ||||
|     //////////////////////////////////////////////////////////////////////// | ||||
|     std::vector<Field> cpy(lmd.size(),evec[0]._grid); | ||||
|     for(int i=0;i<lmd.size();i++) cpy[i] = evec[i]; | ||||
|      | ||||
|     std::vector<std::pair<RealD, Field const*> > emod(lmd.size());     | ||||
| void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)  | ||||
| { | ||||
|   for(int j=0; j<k; ++j){ | ||||
|     auto ip = innerProduct(basis[j],w); | ||||
|     w = w - ip*basis[j]; | ||||
|   } | ||||
| } | ||||
|  | ||||
|     for(int i=0;i<lmd.size();++i)  emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]); | ||||
|  | ||||
|     partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); | ||||
|  | ||||
|     typename std::vector<std::pair<RealD, Field const*> >::iterator it = emod.begin(); | ||||
|     for(int i=0;i<N;++i){ | ||||
|       lmd[i]=it->first; | ||||
|       evec[i]=*(it->second); | ||||
|       ++it; | ||||
| template<class Field> | ||||
| void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm)  | ||||
| { | ||||
|   typedef typename Field::vector_object vobj; | ||||
|   GridBase* grid = basis[0]._grid; | ||||
|        | ||||
|   parallel_region | ||||
|   { | ||||
|     std::vector < vobj > B(Nm); // Thread private | ||||
|          | ||||
|     parallel_for_internal(int ss=0;ss < grid->oSites();ss++){ | ||||
|       for(int j=j0; j<j1; ++j) B[j]=0.; | ||||
|        | ||||
|       for(int j=j0; j<j1; ++j){ | ||||
| 	for(int k=k0; k<k1; ++k){ | ||||
| 	  B[j] +=Qt(j,k) * basis[k]._odata[ss]; | ||||
| 	} | ||||
|       } | ||||
|       for(int j=j0; j<j1; ++j){ | ||||
| 	  basis[j]._odata[ss] = B[j]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   void push(std::vector<RealD>& lmd,int N) { | ||||
|     std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); | ||||
| } | ||||
|  | ||||
| // Extract a single rotated vector | ||||
| template<class Field> | ||||
| void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)  | ||||
| { | ||||
|   typedef typename Field::vector_object vobj; | ||||
|   GridBase* grid = basis[0]._grid; | ||||
|  | ||||
|   result.checkerboard = basis[0].checkerboard; | ||||
|   parallel_for(int ss=0;ss < grid->oSites();ss++){ | ||||
|     vobj B = zero; | ||||
|     for(int k=k0; k<k1; ++k){ | ||||
|       B +=Qt(j,k) * basis[k]._odata[ss]; | ||||
|     } | ||||
|     result._odata[ss] = B; | ||||
|   } | ||||
|   bool saturated(RealD lmd, RealD thrs) { | ||||
|     return fabs(lmd) > fabs(thrs); | ||||
| } | ||||
|  | ||||
| template<class Field> | ||||
| void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)  | ||||
| { | ||||
|   int vlen = idx.size(); | ||||
|  | ||||
|   assert(vlen>=1); | ||||
|   assert(vlen<=sort_vals.size()); | ||||
|   assert(vlen<=_v.size()); | ||||
|  | ||||
|   for (size_t i=0;i<vlen;i++) { | ||||
|  | ||||
|     if (idx[i] != i) { | ||||
|  | ||||
|       ////////////////////////////////////// | ||||
|       // idx[i] is a table of desired sources giving a permutation. | ||||
|       // Swap v[i] with v[idx[i]]. | ||||
|       // Find  j>i for which _vnew[j] = _vold[i], | ||||
|       // track the move idx[j] => idx[i] | ||||
|       // track the move idx[i] => i | ||||
|       ////////////////////////////////////// | ||||
|       size_t j; | ||||
|       for (j=i;j<idx.size();j++) | ||||
| 	if (idx[j]==i) | ||||
| 	  break; | ||||
|  | ||||
|       assert(idx[i] > i);     assert(j!=idx.size());      assert(idx[j]==i); | ||||
|  | ||||
|       std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy | ||||
|       std::swap(sort_vals[i],sort_vals[idx[i]]); | ||||
|  | ||||
|       idx[j] = idx[i]; | ||||
|       idx[i] = i; | ||||
|     } | ||||
|   } | ||||
| }; | ||||
| } | ||||
|  | ||||
| inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)  | ||||
| { | ||||
|   std::vector<int> idx(sort_vals.size()); | ||||
|   std::iota(idx.begin(), idx.end(), 0); | ||||
|  | ||||
|   // sort indexes based on comparing values in v | ||||
|   std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { | ||||
|     return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); | ||||
|   }); | ||||
|   return idx; | ||||
| } | ||||
|  | ||||
| template<class Field> | ||||
| void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)  | ||||
| { | ||||
|   std::vector<int> idx = basisSortGetIndex(sort_vals); | ||||
|   if (reverse) | ||||
|     std::reverse(idx.begin(), idx.end()); | ||||
|    | ||||
|   basisReorderInPlace(_v,sort_vals,idx); | ||||
| } | ||||
|  | ||||
| // PAB: faster to compute the inner products first then fuse loops. | ||||
| // If performance critical can improve. | ||||
| template<class Field> | ||||
| void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) { | ||||
|   result = zero; | ||||
|   assert(_v.size()==eval.size()); | ||||
|   int N = (int)_v.size(); | ||||
|   for (int i=0;i<N;i++) { | ||||
|     Field& tmp = _v[i]; | ||||
|     axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result); | ||||
|   } | ||||
| } | ||||
|  | ||||
| ///////////////////////////////////////////////////////////// | ||||
| // Implicitly restarted lanczos | ||||
| ///////////////////////////////////////////////////////////// | ||||
| template<class Field> class ImplicitlyRestartedLanczosTester  | ||||
| { | ||||
|  public: | ||||
|   virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); | ||||
|   virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); | ||||
| }; | ||||
|  | ||||
| enum IRLdiagonalisation {  | ||||
|   IRLdiagonaliseWithDSTEGR, | ||||
|   IRLdiagonaliseWithQR, | ||||
|   IRLdiagonaliseWithEigen | ||||
| }; | ||||
|  | ||||
| template<class Field> class ImplicitlyRestartedLanczosHermOpTester  : public ImplicitlyRestartedLanczosTester<Field> | ||||
| { | ||||
|  public: | ||||
|   LinearFunction<Field>       &_HermOpTest; | ||||
|   ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOpTest) : _HermOpTest(HermOpTest)  {  }; | ||||
|   int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox) | ||||
|   { | ||||
|     return TestConvergence(j,resid,B,eval,evalMaxApprox); | ||||
|   } | ||||
|   int TestConvergence(int j,RealD eresid,Field &B, RealD &eval,RealD evalMaxApprox) | ||||
|   { | ||||
|     Field v(B); | ||||
|     RealD eval_poly = eval; | ||||
|     // Apply operator | ||||
|     _HermOpTest(B,v); | ||||
|  | ||||
|     RealD vnum = real(innerProduct(B,v)); // HermOp. | ||||
|     RealD vden = norm2(B); | ||||
|     RealD vv0  = norm2(v); | ||||
|     eval   = vnum/vden; | ||||
|     v -= eval*B; | ||||
|  | ||||
|     RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); | ||||
|  | ||||
|     std::cout.precision(13); | ||||
|     std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] " | ||||
| 	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" | ||||
| 	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv | ||||
| 	     <<std::endl; | ||||
|  | ||||
|     int conv=0; | ||||
|     if( (vv<eresid*eresid) ) conv = 1; | ||||
|  | ||||
|     return conv; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class Field>  | ||||
| class ImplicitlyRestartedLanczos { | ||||
|  | ||||
| private:        | ||||
|  | ||||
|   int MaxIter;   // Max iterations | ||||
|   int Nstop;     // Number of evecs checked for convergence | ||||
|   int Nk;        // Number of converged sought | ||||
|   int Nm;        // Nm -- total number of vectors | ||||
|   RealD eresid; | ||||
|  private: | ||||
|   const RealD small = 1.0e-8; | ||||
|   int MaxIter; | ||||
|   int MinRestart; // Minimum number of restarts; only check for convergence after | ||||
|   int Nstop;   // Number of evecs checked for convergence | ||||
|   int Nk;      // Number of converged sought | ||||
|   //  int Np;      // Np -- Number of spare vecs in krylov space //  == Nm - Nk | ||||
|   int Nm;      // Nm -- total number of vectors | ||||
|   IRLdiagonalisation diagonalisation; | ||||
|   //////////////////////////////////// | ||||
|   int orth_period; | ||||
|      | ||||
|   RealD OrthoTime; | ||||
|   RealD eresid, betastp; | ||||
|   //////////////////////////////// | ||||
|   // Embedded objects | ||||
|   //////////////////////////////////// | ||||
|            SortEigen<Field> _sort; | ||||
|   LinearOperatorBase<Field> &_Linop; | ||||
|     OperatorFunction<Field> &_poly; | ||||
|  | ||||
|   //////////////////////////////// | ||||
|   LinearFunction<Field>       &_HermOp; | ||||
|   LinearFunction<Field>       &_HermOpTest; | ||||
|   ImplicitlyRestartedLanczosTester<Field> &_Tester; | ||||
|   // Default tester provided (we need a ref to something in default case) | ||||
|   ImplicitlyRestartedLanczosHermOpTester<Field> SimpleTester; | ||||
|   ///////////////////////// | ||||
|   // Constructor | ||||
|   ///////////////////////// | ||||
|    | ||||
| public:        | ||||
|  ImplicitlyRestartedLanczos(LinearOperatorBase<Field> &Linop, // op | ||||
| 			    OperatorFunction<Field> & poly,   // polynomial | ||||
| 			    int _Nstop, // really sought vecs | ||||
| 			    int _Nk,    // sought vecs | ||||
| 			    int _Nm,    // total vecs | ||||
| 			    RealD _eresid, // resid in lmd deficit  | ||||
| 			    int _MaxIter,  // Max iterations | ||||
| 			    IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen ) : | ||||
|     _Linop(Linop),    _poly(poly), | ||||
|       Nstop(_Nstop), Nk(_Nk), Nm(_Nm), | ||||
|       eresid(_eresid),  MaxIter(_MaxIter), | ||||
|       diagonalisation(_diagonalisation) | ||||
|       { }; | ||||
|   ////////////////////////////////////////////////////////////////// | ||||
|   // PAB: | ||||
|   ////////////////////////////////////////////////////////////////// | ||||
|   // Too many options  & knobs. Do we really need orth_period | ||||
|   // What is the theoretical basis & guarantees of betastp ? | ||||
|   // Nstop=Nk viable? | ||||
|   // MinRestart avoidable with new convergence test? | ||||
|   // Could cut to HermOp, HermOpTest, Tester, Nk, Nm, resid, maxiter (+diagonalisation) | ||||
|   // HermOpTest could be eliminated if we dropped the Power method for max eval. | ||||
|   // -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear | ||||
|   ////////////////////////////////////////////////////////////////// | ||||
|  ImplicitlyRestartedLanczos(LinearFunction<Field> & HermOp, | ||||
| 			    LinearFunction<Field> & HermOpTest, | ||||
| 			    ImplicitlyRestartedLanczosTester<Field> & Tester, | ||||
| 			    int _Nstop, // sought vecs | ||||
| 			    int _Nk, // sought vecs | ||||
| 			    int _Nm, // spare vecs | ||||
| 			    RealD _eresid, // resid in lmdue deficit  | ||||
| 			    int _MaxIter, // Max iterations | ||||
| 			    RealD _betastp=0.0, // if beta(k) < betastp: converged | ||||
| 			    int _MinRestart=1, int _orth_period = 1, | ||||
| 			    IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : | ||||
|     SimpleTester(HermOpTest), _HermOp(HermOp),      _HermOpTest(HermOpTest), _Tester(Tester), | ||||
|     Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm), | ||||
|     eresid(_eresid),      betastp(_betastp), | ||||
|     MaxIter(_MaxIter)  ,      MinRestart(_MinRestart), | ||||
|     orth_period(_orth_period), diagonalisation(_diagonalisation)  { }; | ||||
|  | ||||
|     ImplicitlyRestartedLanczos(LinearFunction<Field> & HermOp, | ||||
| 			       LinearFunction<Field> & HermOpTest, | ||||
| 			       int _Nstop, // sought vecs | ||||
| 			       int _Nk, // sought vecs | ||||
| 			       int _Nm, // spare vecs | ||||
| 			       RealD _eresid, // resid in lmdue deficit  | ||||
| 			       int _MaxIter, // Max iterations | ||||
| 			       RealD _betastp=0.0, // if beta(k) < betastp: converged | ||||
| 			       int _MinRestart=1, int _orth_period = 1, | ||||
| 			       IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : | ||||
|     SimpleTester(HermOpTest),  _HermOp(HermOp),      _HermOpTest(HermOpTest), _Tester(SimpleTester), | ||||
|     Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm), | ||||
|     eresid(_eresid),      betastp(_betastp), | ||||
|     MaxIter(_MaxIter)  ,      MinRestart(_MinRestart), | ||||
|     orth_period(_orth_period), diagonalisation(_diagonalisation)  { }; | ||||
|  | ||||
|   //////////////////////////////// | ||||
|   // Helpers | ||||
|   //////////////////////////////// | ||||
|   static RealD normalise(Field& v)  | ||||
|   template<typename T>  static RealD normalise(T& v)  | ||||
|   { | ||||
|     RealD nn = norm2(v); | ||||
|     nn = sqrt(nn); | ||||
|     v = v * (1.0/nn); | ||||
|     return nn; | ||||
|   } | ||||
|    | ||||
|   void orthogonalize(Field& w, std::vector<Field>& evec, int k) | ||||
|  | ||||
|   void orthogonalize(Field& w, std::vector<Field>& evec,int k) | ||||
|   { | ||||
|     typedef typename Field::scalar_type MyComplex; | ||||
|     MyComplex ip; | ||||
|      | ||||
|     for(int j=0; j<k; ++j){ | ||||
|       ip = innerProduct(evec[j],w);  | ||||
|       w = w - ip * evec[j]; | ||||
|     } | ||||
|     OrthoTime-=usecond()/1e6; | ||||
|     basisOrthogonalize(evec,w,k); | ||||
|     normalise(w); | ||||
|     OrthoTime+=usecond()/1e6; | ||||
|   } | ||||
|  | ||||
| /* Rudy Arthur's thesis pp.137 | ||||
| @@ -165,184 +323,234 @@ repeat | ||||
|   →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM | ||||
| until convergence | ||||
| */ | ||||
|   void calc(std::vector<RealD>& eval,  std::vector<Field>& evec, const Field& src, int& Nconv) | ||||
|   void calc(std::vector<RealD>& eval, std::vector<Field>& evec,  const Field& src, int& Nconv, bool reverse=true) | ||||
|   { | ||||
|     GridBase *grid = src._grid; | ||||
|     assert(grid == evec[0]._grid); | ||||
|      | ||||
|     GridBase *grid = evec[0]._grid; | ||||
|     assert(grid == src._grid); | ||||
|      | ||||
|     std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; | ||||
|     std::cout << GridLogMessage <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 /  "<< MaxIter<< std::endl; | ||||
|     std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; | ||||
|     std::cout << GridLogMessage <<" -- seek   Nk    = " << Nk    <<" vectors"<< std::endl; | ||||
|     std::cout << GridLogMessage <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; | ||||
|     std::cout << GridLogMessage <<" -- total  Nm    = " << Nm    <<" vectors"<< std::endl; | ||||
|     std::cout << GridLogMessage <<" -- size of eval = " << eval.size() << std::endl; | ||||
|     std::cout << GridLogMessage <<" -- size of evec = " << evec.size() << std::endl; | ||||
|     GridLogIRL.TimingMode(1); | ||||
|     std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; | ||||
|     std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 /  "<< MaxIter<< std::endl; | ||||
|     std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; | ||||
|     std::cout << GridLogIRL <<" -- seek   Nk    = " << Nk    <<" vectors"<< std::endl; | ||||
|     std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; | ||||
|     std::cout << GridLogIRL <<" -- total  Nm    = " << Nm    <<" vectors"<< std::endl; | ||||
|     std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl; | ||||
|     std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl; | ||||
|     if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { | ||||
|       std::cout << GridLogMessage << "Diagonalisation is DSTEGR "<<std::endl; | ||||
|       std::cout << GridLogIRL << "Diagonalisation is DSTEGR "<<std::endl; | ||||
|     } else if ( diagonalisation == IRLdiagonaliseWithQR ) {  | ||||
|       std::cout << GridLogMessage << "Diagonalisation is QR "<<std::endl; | ||||
|       std::cout << GridLogIRL << "Diagonalisation is QR "<<std::endl; | ||||
|     }  else if ( diagonalisation == IRLdiagonaliseWithEigen ) {  | ||||
|       std::cout << GridLogMessage << "Diagonalisation is Eigen "<<std::endl; | ||||
|       std::cout << GridLogIRL << "Diagonalisation is Eigen "<<std::endl; | ||||
|     } | ||||
|     std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; | ||||
|     std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; | ||||
| 	 | ||||
|     assert(Nm <= evec.size() && Nm <= eval.size()); | ||||
|      | ||||
|     assert(Nm == evec.size() && Nm == eval.size()); | ||||
|     // quickly get an idea of the largest eigenvalue to more properly normalize the residuum | ||||
|     RealD evalMaxApprox = 0.0; | ||||
|     { | ||||
|       auto src_n = src; | ||||
|       auto tmp = src; | ||||
|       const int _MAX_ITER_IRL_MEVAPP_ = 50; | ||||
|       for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) { | ||||
| 	_HermOpTest(src_n,tmp); | ||||
| 	RealD vnum = real(innerProduct(src_n,tmp)); // HermOp. | ||||
| 	RealD vden = norm2(src_n); | ||||
| 	RealD na = vnum/vden; | ||||
| 	if (fabs(evalMaxApprox/na - 1.0) < 0.05) | ||||
| 	  i=_MAX_ITER_IRL_MEVAPP_; | ||||
| 	evalMaxApprox = na; | ||||
| 	std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl; | ||||
| 	src_n = tmp; | ||||
|       } | ||||
|     } | ||||
| 	 | ||||
|     std::vector<RealD> lme(Nm);   | ||||
|     std::vector<RealD> lme2(Nm); | ||||
|     std::vector<RealD> eval2(Nm); | ||||
|     std::vector<RealD> eval2_copy(Nm); | ||||
|     Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); | ||||
|  | ||||
|     Eigen::MatrixXd    Qt = Eigen::MatrixXd::Zero(Nm,Nm); | ||||
|  | ||||
|     std::vector<int>   Iconv(Nm); | ||||
|     std::vector<Field>  B(Nm,grid); // waste of space replicating | ||||
|      | ||||
|     Field f(grid); | ||||
|     Field v(grid); | ||||
|      | ||||
|     int k1 = 1; | ||||
|     int k2 = Nk; | ||||
|      | ||||
|     Nconv = 0; | ||||
|      | ||||
|     RealD beta_k; | ||||
|  | ||||
|     Nconv = 0; | ||||
|    | ||||
|     // Set initial vector | ||||
|     evec[0] = src; | ||||
|     std::cout << GridLogMessage <<"norm2(src)= " << norm2(src)<<std::endl; | ||||
|      | ||||
|     normalise(evec[0]); | ||||
|     std::cout << GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl; | ||||
|      | ||||
| 	 | ||||
|     // Initial Nk steps | ||||
|     OrthoTime=0.; | ||||
|     for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k); | ||||
|      | ||||
|     std::cout<<GridLogIRL <<"Initial "<< Nk <<"steps done "<<std::endl; | ||||
|     std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; | ||||
|  | ||||
|     ////////////////////////////////// | ||||
|     // Restarting loop begins | ||||
|     ////////////////////////////////// | ||||
|     int iter; | ||||
|     for(iter = 0; iter<MaxIter; ++iter){ | ||||
|        | ||||
|       OrthoTime=0.; | ||||
|  | ||||
|       std::cout<< GridLogMessage <<" **********************"<< std::endl; | ||||
|       std::cout<< GridLogMessage <<" Restart iteration = "<< iter << std::endl; | ||||
|       std::cout<< GridLogMessage <<" **********************"<< std::endl; | ||||
|        | ||||
|  | ||||
|       std::cout<<GridLogIRL <<" running "<<Nm-Nk <<" steps: "<<std::endl; | ||||
|       for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k); | ||||
|        | ||||
|       f *= lme[Nm-1]; | ||||
|        | ||||
|  | ||||
|       std::cout<<GridLogIRL <<" "<<Nm-Nk <<" steps done "<<std::endl; | ||||
|       std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; | ||||
| 	   | ||||
|       ////////////////////////////////// | ||||
|       // getting eigenvalues | ||||
|       ////////////////////////////////// | ||||
|       for(int k=0; k<Nm; ++k){ | ||||
| 	eval2[k] = eval[k+k1-1]; | ||||
| 	lme2[k] = lme[k+k1-1]; | ||||
|       } | ||||
|       Qt = Eigen::MatrixXd::Identity(Nm,Nm); | ||||
|       diagonalize(eval2,lme2,Nm,Nm,Qt,grid); | ||||
|       std::cout<<GridLogIRL <<" diagonalized "<<std::endl; | ||||
|  | ||||
|       ////////////////////////////////// | ||||
|       // sorting | ||||
|       _sort.push(eval2,Nm); | ||||
|        | ||||
|       ////////////////////////////////// | ||||
|       eval2_copy = eval2; | ||||
|       std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater<RealD>()); | ||||
|       std::cout<<GridLogIRL <<" evals sorted "<<std::endl; | ||||
|       const int chunk=8; | ||||
|       for(int io=0; io<k2;io+=chunk){ | ||||
| 	std::cout<<GridLogIRL << "eval "<< std::setw(3) << io ; | ||||
| 	for(int ii=0;ii<chunk;ii++){ | ||||
| 	  if ( (io+ii)<k2 ) | ||||
| 	    std::cout<< " "<< std::setw(12)<< eval2[io+ii]; | ||||
| 	} | ||||
| 	std::cout << std::endl; | ||||
|       } | ||||
|  | ||||
|       ////////////////////////////////// | ||||
|       // Implicitly shifted QR transformations | ||||
|       ////////////////////////////////// | ||||
|       Qt = Eigen::MatrixXd::Identity(Nm,Nm); | ||||
|       for(int ip=k2; ip<Nm; ++ip){  | ||||
| 	// Eigen replacement for qr_decomp ??? | ||||
| 	qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); | ||||
| 	QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); | ||||
|       } | ||||
|      | ||||
|       for(int i=0; i<(Nk+1); ++i) B[i] = 0.0; | ||||
| 	   | ||||
|       for(int j=k1-1; j<k2+1; ++j){ | ||||
| 	for(int k=0; k<Nm; ++k){ | ||||
| 	  B[j].checkerboard = evec[k].checkerboard; | ||||
| 	  B[j] += Qt(j,k) * evec[k]; | ||||
| 	} | ||||
|       } | ||||
|       for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j]; | ||||
|       std::cout<<GridLogIRL <<"QR decomposed "<<std::endl; | ||||
|  | ||||
|       assert(k2<Nm);      assert(k2<Nm);      assert(k1>0); | ||||
|  | ||||
|       basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis | ||||
|       std::cout<<GridLogIRL <<"basisRotated  by Qt"<<std::endl; | ||||
|        | ||||
|       //////////////////////////////////////////////////// | ||||
|       // Compressed vector f and beta(k2) | ||||
|       //////////////////////////////////////////////////// | ||||
|       f *= Qt(k2-1,Nm-1); | ||||
|       f += lme[k2-1] * evec[k2]; | ||||
|       beta_k = norm2(f); | ||||
|       beta_k = sqrt(beta_k); | ||||
|       std::cout<< GridLogMessage<<" beta(k) = "<<beta_k<<std::endl; | ||||
|        | ||||
|       std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl; | ||||
| 	   | ||||
|       RealD betar = 1.0/beta_k; | ||||
|       evec[k2] = betar * f; | ||||
|       lme[k2-1] = beta_k; | ||||
|        | ||||
| 	   | ||||
|       //////////////////////////////////////////////////// | ||||
|       // Convergence test | ||||
|       //////////////////////////////////////////////////// | ||||
|       for(int k=0; k<Nm; ++k){     | ||||
| 	eval2[k] = eval[k]; | ||||
| 	lme2[k] = lme[k]; | ||||
|       } | ||||
|       Qt = Eigen::MatrixXd::Identity(Nm,Nm); | ||||
|       diagonalize(eval2,lme2,Nk,Nm,Qt,grid); | ||||
|        | ||||
|       for(int k = 0; k<Nk; ++k) B[k]=0.0; | ||||
|        | ||||
|       for(int j = 0; j<Nk; ++j){ | ||||
| 	for(int k = 0; k<Nk; ++k){ | ||||
| 	  B[j].checkerboard = evec[k].checkerboard; | ||||
| 	  B[j] += Qt(j,k) * evec[k]; | ||||
| 	} | ||||
|       } | ||||
|  | ||||
|       std::cout<<GridLogIRL <<" Diagonalized "<<std::endl; | ||||
| 	   | ||||
|       Nconv = 0; | ||||
|       for(int i=0; i<Nk; ++i){ | ||||
| 	 | ||||
| 	_Linop.HermOp(B[i],v); | ||||
| 	     | ||||
| 	RealD vnum = real(innerProduct(B[i],v)); // HermOp. | ||||
| 	RealD vden = norm2(B[i]); | ||||
| 	eval2[i] = vnum/vden; | ||||
| 	v -= eval2[i]*B[i]; | ||||
| 	RealD vv = norm2(v); | ||||
| 	 | ||||
| 	std::cout.precision(13); | ||||
| 	std::cout << GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] "; | ||||
| 	std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i]; | ||||
| 	std::cout << " |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl; | ||||
| 	 | ||||
| 	// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged | ||||
| 	if((vv<eresid*eresid) && (i == Nconv) ){ | ||||
| 	  Iconv[Nconv] = i; | ||||
| 	  ++Nconv; | ||||
| 	} | ||||
| 	 | ||||
|       }  // i-loop end | ||||
|        | ||||
|       std::cout<< GridLogMessage <<" #modes converged: "<<Nconv<<std::endl; | ||||
|       if (iter >= MinRestart) { | ||||
|  | ||||
|       if( Nconv>=Nstop ){ | ||||
| 	goto converged; | ||||
|       } | ||||
|     } // end of iter loop | ||||
|      | ||||
|     std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; | ||||
|     std::cout<< GridLogError    <<" ImplicitlyRestartedLanczos::calc() NOT converged."; | ||||
|     std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; | ||||
| 	std::cout << GridLogIRL << "Test convergence: rotate subset of vectors to test convergence " << std::endl; | ||||
|  | ||||
| 	Field B(grid); B.checkerboard = evec[0].checkerboard; | ||||
|  | ||||
| 	//  power of two search pattern;  not every evalue in eval2 is assessed. | ||||
| 	for(int jj = 1; jj<=Nstop; jj*=2){ | ||||
| 	  int j = Nstop-jj; | ||||
| 	  RealD e = eval2_copy[j]; // Discard the evalue | ||||
| 	  basisRotateJ(B,evec,Qt,j,0,Nk,Nm);	     | ||||
| 	  if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) { | ||||
| 	    if ( j > Nconv ) { | ||||
| 	      Nconv=j+1; | ||||
| 	      jj=Nstop; // Terminate the scan | ||||
| 	    } | ||||
| 	  } | ||||
| 	} | ||||
| 	// Do evec[0] for good measure | ||||
| 	{  | ||||
| 	  int j=0; | ||||
| 	  RealD e = eval2_copy[0];  | ||||
| 	  basisRotateJ(B,evec,Qt,j,0,Nk,Nm);	     | ||||
| 	  _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox); | ||||
| 	} | ||||
| 	// test if we converged, if so, terminate | ||||
| 	std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl; | ||||
| 	//	if( Nconv>=Nstop || beta_k < betastp){ | ||||
| 	if( Nconv>=Nstop){ | ||||
| 	  goto converged; | ||||
| 	} | ||||
| 	   | ||||
|       } else { | ||||
| 	std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n"; | ||||
|       } // end of iter loop | ||||
|     } | ||||
|  | ||||
|     std::cout<<GridLogError<<"\n NOT converged.\n"; | ||||
|     abort(); | ||||
| 	 | ||||
|   converged: | ||||
|     // Sorting | ||||
|     eval.resize(Nconv); | ||||
|     evec.resize(Nconv,grid); | ||||
|     for(int i=0; i<Nconv; ++i){ | ||||
|       eval[i] = eval2[Iconv[i]]; | ||||
|       evec[i] = B[Iconv[i]]; | ||||
|     { | ||||
|       Field B(grid); B.checkerboard = evec[0].checkerboard; | ||||
|       basisRotate(evec,Qt,0,Nk,0,Nk,Nm);	     | ||||
|       std::cout << GridLogIRL << " Rotated basis"<<std::endl; | ||||
|       Nconv=0; | ||||
|       ////////////////////////////////////////////////////////////////////// | ||||
|       // Full final convergence test; unconditionally applied | ||||
|       ////////////////////////////////////////////////////////////////////// | ||||
|       for(int j = 0; j<=Nk; j++){ | ||||
| 	B=evec[j]; | ||||
| 	if( _Tester.ReconstructEval(j,eresid,B,eval2[j],evalMaxApprox) ) { | ||||
| 	  Nconv++; | ||||
| 	} | ||||
|       } | ||||
|  | ||||
|       if ( Nconv < Nstop ) | ||||
| 	std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl; | ||||
|  | ||||
|       eval=eval2; | ||||
|  | ||||
|       basisSortInPlace(evec,eval,reverse); | ||||
|        | ||||
|     } | ||||
|     _sort.push(eval,evec,Nconv); | ||||
|      | ||||
|     std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; | ||||
|     std::cout << GridLogMessage << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n"; | ||||
|     std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; | ||||
|     std::cout << GridLogMessage << " -- Iterations  = "<< iter   << "\n"; | ||||
|     std::cout << GridLogMessage << " -- beta(k)     = "<< beta_k << "\n"; | ||||
|     std::cout << GridLogMessage << " -- Nconv       = "<< Nconv  << "\n"; | ||||
|     std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; | ||||
|         | ||||
|     std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; | ||||
|     std::cout << GridLogIRL << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n"; | ||||
|     std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; | ||||
|     std::cout << GridLogIRL << " -- Iterations  = "<< iter   << "\n"; | ||||
|     std::cout << GridLogIRL << " -- beta(k)     = "<< beta_k << "\n"; | ||||
|     std::cout << GridLogIRL << " -- Nconv       = "<< Nconv  << "\n"; | ||||
|     std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; | ||||
|   } | ||||
|  | ||||
| private: | ||||
|  private: | ||||
| /* Saad PP. 195 | ||||
| 1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 | ||||
| 2. For k = 1,2,...,m Do: | ||||
| @@ -360,28 +568,38 @@ private: | ||||
|   { | ||||
|     const RealD tiny = 1.0e-20; | ||||
|     assert( k< Nm ); | ||||
|      | ||||
|     _poly(_Linop,evec[k],w);      // 3. wk:=Avk−βkv_{k−1} | ||||
|      | ||||
|  | ||||
|     GridStopWatch gsw_op,gsw_o; | ||||
|  | ||||
|     Field& evec_k = evec[k]; | ||||
|  | ||||
|     _HermOp(evec_k,w);    std::cout<<GridLogIRL << "Poly(HermOp)" <<std::endl; | ||||
|  | ||||
|     if(k>0) w -= lme[k-1] * evec[k-1]; | ||||
|      | ||||
|     ComplexD zalph = innerProduct(evec[k],w); // 4. αk:=(wk,vk) | ||||
|  | ||||
|     ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) | ||||
|     RealD     alph = real(zalph); | ||||
|      | ||||
|     w = w - alph * evec[k];// 5. wk:=wk−αkvk | ||||
|      | ||||
|  | ||||
|     w = w - alph * evec_k;// 5. wk:=wk−αkvk | ||||
|  | ||||
|     RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop | ||||
|     // 7. vk+1 := wk/βk+1 | ||||
|      | ||||
|  | ||||
|     lmd[k] = alph; | ||||
|     lme[k] = beta; | ||||
|      | ||||
|     if ( k > 0 ) orthogonalize(w,evec,k); // orthonormalise | ||||
|     if ( k < Nm-1) evec[k+1] = w; | ||||
|      | ||||
|     if ( beta < tiny ) std::cout << GridLogMessage << " beta is tiny "<<beta<<std::endl; | ||||
|  | ||||
|     if (k>0 && k % orth_period == 0) { | ||||
|       orthogonalize(w,evec,k); // orthonormalise | ||||
|       std::cout<<GridLogIRL << "Orthogonalised " <<std::endl; | ||||
|     } | ||||
|  | ||||
|     if(k < Nm-1) evec[k+1] = w; | ||||
|  | ||||
|     std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl; | ||||
|     if ( beta < tiny )  | ||||
|       std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl; | ||||
|   } | ||||
|        | ||||
|  | ||||
|   void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,  | ||||
| 			 int Nk, int Nm,   | ||||
| 			 Eigen::MatrixXd & Qt, // Nm x Nm | ||||
| @@ -404,11 +622,11 @@ private: | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   /////////////////////////////////////////////////////////////////////////// | ||||
|   // File could end here if settle on Eigen ??? | ||||
|   /////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|   void qr_decomp(std::vector<RealD>& lmd,   // Nm  | ||||
|   /////////////////////////////////////////////////////////////////////////// | ||||
|   // File could end here if settle on Eigen ??? !!! | ||||
|   /////////////////////////////////////////////////////////////////////////// | ||||
|   void QR_decomp(std::vector<RealD>& lmd,   // Nm  | ||||
| 		 std::vector<RealD>& lme,   // Nm  | ||||
| 		 int Nk, int Nm,            // Nk, Nm | ||||
| 		 Eigen::MatrixXd& Qt,       // Nm x Nm matrix | ||||
| @@ -575,51 +793,50 @@ void diagonalize_lapack(std::vector<RealD>& lmd, | ||||
| #endif | ||||
| } | ||||
|  | ||||
|   void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme,  | ||||
| 		      int Nk, int Nm,    | ||||
| 		      Eigen::MatrixXd & Qt, | ||||
| 		      GridBase *grid) | ||||
|   { | ||||
|     int Niter = 100*Nm; | ||||
|     int kmin = 1; | ||||
|     int kmax = Nk; | ||||
|  | ||||
|     // (this should be more sophisticated) | ||||
|     for(int iter=0; iter<Niter; ++iter){ | ||||
|        | ||||
|       // determination of 2x2 leading submatrix | ||||
|       RealD dsub = lmd[kmax-1]-lmd[kmax-2]; | ||||
|       RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]); | ||||
|       RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub))); | ||||
|       // (Dsh: shift) | ||||
| 	 | ||||
|       // transformation | ||||
|       qr_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm | ||||
| 	 | ||||
|       // Convergence criterion (redef of kmin and kamx) | ||||
|       for(int j=kmax-1; j>= kmin; --j){ | ||||
| 	RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); | ||||
| 	if(fabs(lme[j-1])+dds > dds){ | ||||
| 	  kmax = j+1; | ||||
| 	  goto continued; | ||||
| 	} | ||||
|       } | ||||
|       Niter = iter; | ||||
|       return; | ||||
|  | ||||
|     continued: | ||||
|       for(int j=0; j<kmax-1; ++j){ | ||||
| 	RealD dds = fabs(lmd[j])+fabs(lmd[j+1]); | ||||
| 	if(fabs(lme[j])+dds > dds){ | ||||
| 	  kmin = j+1; | ||||
| 	  break; | ||||
| 	} | ||||
| void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme,  | ||||
| 		    int Nk, int Nm,    | ||||
| 		    Eigen::MatrixXd & Qt, | ||||
| 		    GridBase *grid) | ||||
| { | ||||
|   int QRiter = 100*Nm; | ||||
|   int kmin = 1; | ||||
|   int kmax = Nk; | ||||
|    | ||||
|   // (this should be more sophisticated) | ||||
|   for(int iter=0; iter<QRiter; ++iter){ | ||||
|      | ||||
|     // determination of 2x2 leading submatrix | ||||
|     RealD dsub = lmd[kmax-1]-lmd[kmax-2]; | ||||
|     RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]); | ||||
|     RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub))); | ||||
|     // (Dsh: shift) | ||||
|      | ||||
|     // transformation | ||||
|     QR_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm | ||||
|      | ||||
|     // Convergence criterion (redef of kmin and kamx) | ||||
|     for(int j=kmax-1; j>= kmin; --j){ | ||||
|       RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); | ||||
|       if(fabs(lme[j-1])+dds > dds){ | ||||
| 	kmax = j+1; | ||||
| 	goto continued; | ||||
|       } | ||||
|     } | ||||
|     QRiter = iter; | ||||
|     return; | ||||
|      | ||||
|   continued: | ||||
|     for(int j=0; j<kmax-1; ++j){ | ||||
|       RealD dds = fabs(lmd[j])+fabs(lmd[j+1]); | ||||
|       if(fabs(lme[j])+dds > dds){ | ||||
| 	kmin = j+1; | ||||
| 	break; | ||||
|       } | ||||
|     } | ||||
|     std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<Niter<<"\n"; | ||||
|     abort(); | ||||
|   } | ||||
|  | ||||
|  }; | ||||
|   std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<QRiter<<"\n"; | ||||
|   abort(); | ||||
| } | ||||
| }; | ||||
| } | ||||
| #endif | ||||
|   | ||||
							
								
								
									
										352
									
								
								lib/algorithms/iterative/LocalCoherenceLanczos.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										352
									
								
								lib/algorithms/iterative/LocalCoherenceLanczos.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,352 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Christoph Lehner <clehner@bnl.gov> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_LOCAL_COHERENCE_IRL_H | ||||
| #define GRID_LOCAL_COHERENCE_IRL_H | ||||
| namespace Grid {  | ||||
| struct LanczosParams : Serializable { | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, | ||||
| 				  ChebyParams, Cheby,/*Chebyshev*/ | ||||
| 				  int, Nstop,    /*Vecs in Lanczos must converge Nstop < Nk < Nm*/ | ||||
| 				  int, Nk,       /*Vecs in Lanczos seek converge*/ | ||||
| 				  int, Nm,       /*Total vecs in Lanczos include restart*/ | ||||
| 				  RealD, resid,  /*residual*/ | ||||
|  				  int, MaxIt,  | ||||
| 				  RealD, betastp,  /* ? */ | ||||
| 				  int, MinRes);    // Must restart | ||||
| }; | ||||
|  | ||||
| struct LocalCoherenceLanczosParams : Serializable { | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams, | ||||
| 				  bool, doFine, | ||||
| 				  bool, doFineRead, | ||||
| 				  bool, doCoarse, | ||||
| 	       			  bool, doCoarseRead, | ||||
| 				  LanczosParams, FineParams, | ||||
| 				  LanczosParams, CoarseParams, | ||||
| 				  ChebyParams,   Smoother, | ||||
| 				  RealD        , coarse_relax_tol, | ||||
| 				  std::vector<int>, blockSize, | ||||
| 				  std::string, config, | ||||
| 				  std::vector < std::complex<double>  >, omega, | ||||
| 				  RealD, mass, | ||||
| 				  RealD, M5); | ||||
| }; | ||||
|  | ||||
| // Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { | ||||
| public: | ||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; | ||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; | ||||
|   typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<Fobj>          FineField; | ||||
|  | ||||
|   LinearOperatorBase<FineField> &_Linop; | ||||
|   Aggregation<Fobj,CComplex,nbasis> &_Aggregate; | ||||
|  | ||||
|   ProjectedHermOp(LinearOperatorBase<FineField>& linop,  Aggregation<Fobj,CComplex,nbasis> &aggregate) :  | ||||
|     _Linop(linop), | ||||
|     _Aggregate(aggregate)  {  }; | ||||
|  | ||||
|   void operator()(const CoarseField& in, CoarseField& out) { | ||||
|  | ||||
|     GridBase *FineGrid = _Aggregate.FineGrid; | ||||
|     FineField fin(FineGrid); | ||||
|     FineField fout(FineGrid); | ||||
|  | ||||
|     _Aggregate.PromoteFromSubspace(in,fin);    std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl; | ||||
|     _Linop.HermOp(fin,fout);                   std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl; | ||||
|     _Aggregate.ProjectToSubspace(out,fout);    std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { | ||||
| public: | ||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; | ||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; | ||||
|   typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<Fobj>          FineField; | ||||
|  | ||||
|  | ||||
|   OperatorFunction<FineField>   & _poly; | ||||
|   LinearOperatorBase<FineField> &_Linop; | ||||
|   Aggregation<Fobj,CComplex,nbasis> &_Aggregate; | ||||
|  | ||||
|   ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop,  | ||||
| 			  Aggregation<Fobj,CComplex,nbasis> &aggregate) :  | ||||
|     _poly(poly), | ||||
|     _Linop(linop), | ||||
|     _Aggregate(aggregate)  {  }; | ||||
|  | ||||
|   void operator()(const CoarseField& in, CoarseField& out) { | ||||
|  | ||||
|     GridBase *FineGrid = _Aggregate.FineGrid; | ||||
|  | ||||
|     FineField fin(FineGrid) ;fin.checkerboard  =_Aggregate.checkerboard; | ||||
|     FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; | ||||
|      | ||||
|     _Aggregate.PromoteFromSubspace(in,fin);    std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl; | ||||
|     _poly(_Linop,fin,fout);                    std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl; | ||||
|     _Aggregate.ProjectToSubspace(out,fout);    std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class ImplicitlyRestartedLanczosSmoothedTester  : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > > | ||||
| { | ||||
|  public: | ||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; | ||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; | ||||
|   typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<Fobj>          FineField; | ||||
|  | ||||
|   LinearFunction<CoarseField> & _Poly; | ||||
|   OperatorFunction<FineField>   & _smoother; | ||||
|   LinearOperatorBase<FineField> &_Linop; | ||||
|   Aggregation<Fobj,CComplex,nbasis> &_Aggregate; | ||||
|   RealD                             _coarse_relax_tol; | ||||
|   ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField>   &Poly, | ||||
| 					   OperatorFunction<FineField>   &smoother, | ||||
| 					   LinearOperatorBase<FineField> &Linop, | ||||
| 					   Aggregation<Fobj,CComplex,nbasis> &Aggregate, | ||||
| 					   RealD coarse_relax_tol=5.0e3)  | ||||
|     : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol)  {    }; | ||||
|  | ||||
|   int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) | ||||
|   { | ||||
|     CoarseField v(B); | ||||
|     RealD eval_poly = eval; | ||||
|     // Apply operator | ||||
|     _Poly(B,v); | ||||
|  | ||||
|     RealD vnum = real(innerProduct(B,v)); // HermOp. | ||||
|     RealD vden = norm2(B); | ||||
|     RealD vv0  = norm2(v); | ||||
|     eval   = vnum/vden; | ||||
|     v -= eval*B; | ||||
|  | ||||
|     RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); | ||||
|  | ||||
|     std::cout.precision(13); | ||||
|     std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] " | ||||
| 	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" | ||||
| 	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv | ||||
| 	     <<std::endl; | ||||
|  | ||||
|     int conv=0; | ||||
|     if( (vv<eresid*eresid) ) conv = 1; | ||||
|     return conv; | ||||
|   } | ||||
|   int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) | ||||
|   { | ||||
|     GridBase *FineGrid = _Aggregate.FineGrid; | ||||
|  | ||||
|     int checkerboard   = _Aggregate.checkerboard; | ||||
|  | ||||
|     FineField fB(FineGrid);fB.checkerboard =checkerboard; | ||||
|     FineField fv(FineGrid);fv.checkerboard =checkerboard; | ||||
|  | ||||
|     _Aggregate.PromoteFromSubspace(B,fv); | ||||
|     _smoother(_Linop,fv,fB);  | ||||
|  | ||||
|     RealD eval_poly = eval; | ||||
|     _Linop.HermOp(fB,fv); | ||||
|  | ||||
|     RealD vnum = real(innerProduct(fB,fv)); // HermOp. | ||||
|     RealD vden = norm2(fB); | ||||
|     RealD vv0  = norm2(fv); | ||||
|     eval   = vnum/vden; | ||||
|     fv -= eval*fB; | ||||
|     RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0); | ||||
|  | ||||
|     std::cout.precision(13); | ||||
|     std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] " | ||||
| 	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" | ||||
| 	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv | ||||
| 	     <<std::endl; | ||||
|     if ( j > nbasis ) eresid = eresid*_coarse_relax_tol; | ||||
|     if( (vv<eresid*eresid) ) return 1; | ||||
|     return 0; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| //////////////////////////////////////////// | ||||
| // Make serializable Lanczos params | ||||
| //////////////////////////////////////////// | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class LocalCoherenceLanczos  | ||||
| { | ||||
| public: | ||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; | ||||
|   typedef Lattice<CComplex>                   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; | ||||
|   typedef Lattice<Fobj>                       FineField; | ||||
|  | ||||
| protected: | ||||
|   GridBase *_CoarseGrid; | ||||
|   GridBase *_FineGrid; | ||||
|   int _checkerboard; | ||||
|   LinearOperatorBase<FineField>                 & _FineOp; | ||||
|    | ||||
|   // FIXME replace Aggregation with vector of fine; the code reuse is too small for | ||||
|   // the hassle and complexity of cross coupling. | ||||
|   Aggregation<Fobj,CComplex,nbasis>               _Aggregate;   | ||||
|   std::vector<RealD>                              evals_fine; | ||||
|   std::vector<RealD>                              evals_coarse;  | ||||
|   std::vector<CoarseField>                        evec_coarse; | ||||
| public: | ||||
|   LocalCoherenceLanczos(GridBase *FineGrid, | ||||
| 		GridBase *CoarseGrid, | ||||
| 		LinearOperatorBase<FineField> &FineOp, | ||||
| 		int checkerboard) : | ||||
|     _CoarseGrid(CoarseGrid), | ||||
|     _FineGrid(FineGrid), | ||||
|     _Aggregate(CoarseGrid,FineGrid,checkerboard), | ||||
|     _FineOp(FineOp), | ||||
|     _checkerboard(checkerboard) | ||||
|   { | ||||
|     evals_fine.resize(0); | ||||
|     evals_coarse.resize(0); | ||||
|   }; | ||||
|   void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } | ||||
|  | ||||
|   template<typename T>  static RealD normalise(T& v)  | ||||
|   { | ||||
|     RealD nn = norm2(v); | ||||
|     nn = ::sqrt(nn); | ||||
|     v = v * (1.0/nn); | ||||
|     return nn; | ||||
|   } | ||||
|  | ||||
|   void fakeFine(void) | ||||
|   { | ||||
|     int Nk = nbasis; | ||||
|     _Aggregate.subspace.resize(Nk,_FineGrid); | ||||
|     _Aggregate.subspace[0]=1.0; | ||||
|     _Aggregate.subspace[0].checkerboard=_checkerboard; | ||||
|     normalise(_Aggregate.subspace[0]); | ||||
|     PlainHermOp<FineField>    Op(_FineOp); | ||||
|     for(int k=1;k<Nk;k++){ | ||||
|       _Aggregate.subspace[k].checkerboard=_checkerboard; | ||||
|       Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]); | ||||
|       normalise(_Aggregate.subspace[k]); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void testFine(RealD resid)  | ||||
|   { | ||||
|     assert(evals_fine.size() == nbasis); | ||||
|     assert(_Aggregate.subspace.size() == nbasis); | ||||
|     PlainHermOp<FineField>    Op(_FineOp); | ||||
|     ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op); | ||||
|     for(int k=0;k<nbasis;k++){ | ||||
|       assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)  | ||||
|   { | ||||
|     assert(evals_fine.size() == nbasis); | ||||
|     assert(_Aggregate.subspace.size() == nbasis); | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     Chebyshev<FineField>                          ChebySmooth(cheby_smooth); | ||||
|     ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate); | ||||
|     ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); | ||||
|  | ||||
|     for(int k=0;k<evec_coarse.size();k++){ | ||||
|       if ( k < nbasis ) {  | ||||
| 	assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1); | ||||
|       } else {  | ||||
| 	assert(ChebySmoothTester.ReconstructEval(k,resid*relax,evec_coarse[k],evals_coarse[k],1.0)==1); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid,  | ||||
| 		RealD MaxIt, RealD betastp, int MinRes) | ||||
|   { | ||||
|     assert(nbasis<=Nm); | ||||
|     Chebyshev<FineField>      Cheby(cheby_parms); | ||||
|     FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp); | ||||
|     PlainHermOp<FineField>    Op(_FineOp); | ||||
|  | ||||
|     evals_fine.resize(Nm); | ||||
|     _Aggregate.subspace.resize(Nm,_FineGrid); | ||||
|  | ||||
|     ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); | ||||
|  | ||||
|     FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; | ||||
|  | ||||
|     int Nconv; | ||||
|     IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false); | ||||
|      | ||||
|     // Shrink down to number saved | ||||
|     assert(Nstop>=nbasis); | ||||
|     assert(Nconv>=nbasis); | ||||
|     evals_fine.resize(nbasis); | ||||
|     _Aggregate.subspace.resize(nbasis,_FineGrid); | ||||
|   } | ||||
|   void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, | ||||
| 		  int Nstop, int Nk, int Nm,RealD resid,  | ||||
| 		  RealD MaxIt, RealD betastp, int MinRes) | ||||
|   { | ||||
|     Chebyshev<FineField>                          Cheby(cheby_op); | ||||
|     ProjectedHermOp<Fobj,CComplex,nbasis>         Op(_FineOp,_Aggregate); | ||||
|     ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate); | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|     Chebyshev<FineField>                                           ChebySmooth(cheby_smooth); | ||||
|     ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); | ||||
|  | ||||
|     evals_coarse.resize(Nm); | ||||
|     evec_coarse.resize(Nm,_CoarseGrid); | ||||
|  | ||||
|     CoarseField src(_CoarseGrid);     src=1.0;  | ||||
|  | ||||
|     ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); | ||||
|     int Nconv=0; | ||||
|     IRL.calc(evals_coarse,evec_coarse,src,Nconv,false); | ||||
|     assert(Nconv>=Nstop); | ||||
|     evals_coarse.resize(Nstop); | ||||
|     evec_coarse.resize (Nstop,_CoarseGrid); | ||||
|     for (int i=0;i<Nstop;i++){ | ||||
|       std::cout << i << " Coarse eval = " << evals_coarse[i]  << std::endl; | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
| @@ -53,16 +53,119 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|    *     M psi = eta | ||||
|    *********************** | ||||
|    *Odd | ||||
|    * i)   (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1}  eta_o | ||||
|    * i)                 D_oo psi_o =  L^{-1}  eta_o | ||||
|    *                        eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) | ||||
|    * | ||||
|    * Wilson: | ||||
|    *      (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1}  eta_o | ||||
|    * Stag: | ||||
|    *      D_oo psi_o = L^{-1}  eta =    (eta_o - Moe Mee^{-1} eta_e) | ||||
|    * | ||||
|    * L^-1 eta_o= (1              0 ) (e | ||||
|    *             (-MoeMee^{-1}   1 )    | ||||
|    * | ||||
|    *Even | ||||
|    * ii)  Mee psi_e + Meo psi_o = src_e | ||||
|    * | ||||
|    *   => sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
|    * | ||||
|    *  | ||||
|    * TODO: Other options: | ||||
|    *  | ||||
|    * a) change checkerboards for Schur e<->o | ||||
|    * | ||||
|    * Left precon by Moo^-1 | ||||
|    * b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 =  (D_oo)^dag M_oo^-dag Moo^-1 L^{-1}  eta_o | ||||
|    *                              eta_o'     = (D_oo)^dag  M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e) | ||||
|    * | ||||
|    * Right precon by Moo^-1 | ||||
|    * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1}  eta_o | ||||
|    *                              eta_o'     = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) | ||||
|    *                              psi_o = M_oo^-1 phi_o | ||||
|    * TODO: Deflation  | ||||
|    */ | ||||
| namespace Grid { | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Take a matrix and form a Red Black solver calling a Herm solver | ||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|   template<class Field> class SchurRedBlackStaggeredSolve { | ||||
|   private: | ||||
|     OperatorFunction<Field> & _HermitianRBSolver; | ||||
|     int CBfactorise; | ||||
|   public: | ||||
|  | ||||
|     ///////////////////////////////////////////////////// | ||||
|     // Wrap the usual normal equations Schur trick | ||||
|     ///////////////////////////////////////////////////// | ||||
|   SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver)  : | ||||
|      _HermitianRBSolver(HermitianRBSolver)  | ||||
|     {  | ||||
|       CBfactorise=0; | ||||
|     }; | ||||
|  | ||||
|     template<class Matrix> | ||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||
|  | ||||
|       // FIXME CGdiagonalMee not implemented virtual function | ||||
|       // FIXME use CBfactorise to control schur decomp | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|   | ||||
|       Field src_e(grid); | ||||
|       Field src_o(grid); | ||||
|       Field sol_e(grid); | ||||
|       Field sol_o(grid); | ||||
|       Field   tmp(grid); | ||||
|       Field  Mtmp(grid); | ||||
|       Field resid(fgrid); | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,in); | ||||
|       pickCheckerboard(Odd ,src_o,in); | ||||
|       pickCheckerboard(Even,sol_e,out); | ||||
|       pickCheckerboard(Odd ,sol_o,out); | ||||
|      | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // src_o = (source_o - Moe MeeInv source_e) | ||||
|       ///////////////////////////////////////////////////// | ||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); | ||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      | ||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      | ||||
|  | ||||
|       src_o = tmp;     assert(src_o.checkerboard ==Odd); | ||||
|       //  _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source | ||||
|  | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the red-black solver | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl; | ||||
|       _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||
|  | ||||
|       /////////////////////////////////////////////////// | ||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
|       /////////////////////////////////////////////////// | ||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); | ||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); | ||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||
|       | ||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||
|  | ||||
|       // Verify the unprec residual | ||||
|       _Matrix.M(out,resid);  | ||||
|       resid = resid-in; | ||||
|       RealD ns = norm2(in); | ||||
|       RealD nr = norm2(resid); | ||||
|  | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; | ||||
|     }      | ||||
|   }; | ||||
|   template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>; | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Take a matrix and form a Red Black solver calling a Herm solver | ||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||
| @@ -76,12 +179,10 @@ namespace Grid { | ||||
|     ///////////////////////////////////////////////////// | ||||
|     // Wrap the usual normal equations Schur trick | ||||
|     ///////////////////////////////////////////////////// | ||||
|   SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver)  : | ||||
|      _HermitianRBSolver(HermitianRBSolver)  | ||||
|     {  | ||||
|       CBfactorise=0; | ||||
|     }; | ||||
|  | ||||
|   SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0)  :  _HermitianRBSolver(HermitianRBSolver)  | ||||
|   {  | ||||
|     CBfactorise=cb; | ||||
|   }; | ||||
|     template<class Matrix> | ||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||
|  | ||||
| @@ -141,5 +242,166 @@ namespace Grid { | ||||
|     }      | ||||
|   }; | ||||
|  | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Take a matrix and form a Red Black solver calling a Herm solver | ||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class Field> class SchurRedBlackDiagTwoSolve { | ||||
|   private: | ||||
|     OperatorFunction<Field> & _HermitianRBSolver; | ||||
|     int CBfactorise; | ||||
|   public: | ||||
|  | ||||
|     ///////////////////////////////////////////////////// | ||||
|     // Wrap the usual normal equations Schur trick | ||||
|     ///////////////////////////////////////////////////// | ||||
|   SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver)  : | ||||
|      _HermitianRBSolver(HermitianRBSolver)  | ||||
|     {  | ||||
|       CBfactorise=0; | ||||
|     }; | ||||
|  | ||||
|     template<class Matrix> | ||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||
|  | ||||
|       // FIXME CGdiagonalMee not implemented virtual function | ||||
|       // FIXME use CBfactorise to control schur decomp | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|   | ||||
|       Field src_e(grid); | ||||
|       Field src_o(grid); | ||||
|       Field sol_e(grid); | ||||
|       Field sol_o(grid); | ||||
|       Field   tmp(grid); | ||||
|       Field  Mtmp(grid); | ||||
|       Field resid(fgrid); | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,in); | ||||
|       pickCheckerboard(Odd ,src_o,in); | ||||
|       pickCheckerboard(Even,sol_e,out); | ||||
|       pickCheckerboard(Odd ,sol_o,out); | ||||
|      | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // src_o = Mdag * (source_o - Moe MeeInv source_e) | ||||
|       ///////////////////////////////////////////////////// | ||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); | ||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      | ||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      | ||||
|  | ||||
|       // get the right MpcDag | ||||
|       _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);        | ||||
|  | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the red-black solver | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; | ||||
| //      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||
|       _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd); | ||||
|       _Matrix.MooeeInv(tmp,sol_o);        assert(  sol_o.checkerboard   ==Odd); | ||||
|  | ||||
|       /////////////////////////////////////////////////// | ||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
|       /////////////////////////////////////////////////// | ||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); | ||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); | ||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||
|       | ||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||
|  | ||||
|       // Verify the unprec residual | ||||
|       _Matrix.M(out,resid);  | ||||
|       resid = resid-in; | ||||
|       RealD ns = norm2(in); | ||||
|       RealD nr = norm2(resid); | ||||
|  | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; | ||||
|     }      | ||||
|   }; | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Take a matrix and form a Red Black solver calling a Herm solver | ||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class Field> class SchurRedBlackDiagTwoMixed { | ||||
|   private: | ||||
|     LinearFunction<Field> & _HermitianRBSolver; | ||||
|     int CBfactorise; | ||||
|   public: | ||||
|  | ||||
|     ///////////////////////////////////////////////////// | ||||
|     // Wrap the usual normal equations Schur trick | ||||
|     ///////////////////////////////////////////////////// | ||||
|   SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver)  : | ||||
|      _HermitianRBSolver(HermitianRBSolver)  | ||||
|     {  | ||||
|       CBfactorise=0; | ||||
|     }; | ||||
|  | ||||
|     template<class Matrix> | ||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||
|  | ||||
|       // FIXME CGdiagonalMee not implemented virtual function | ||||
|       // FIXME use CBfactorise to control schur decomp | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|   | ||||
|       Field src_e(grid); | ||||
|       Field src_o(grid); | ||||
|       Field sol_e(grid); | ||||
|       Field sol_o(grid); | ||||
|       Field   tmp(grid); | ||||
|       Field  Mtmp(grid); | ||||
|       Field resid(fgrid); | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,in); | ||||
|       pickCheckerboard(Odd ,src_o,in); | ||||
|       pickCheckerboard(Even,sol_e,out); | ||||
|       pickCheckerboard(Odd ,sol_o,out); | ||||
|      | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // src_o = Mdag * (source_o - Moe MeeInv source_e) | ||||
|       ///////////////////////////////////////////////////// | ||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); | ||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      | ||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      | ||||
|  | ||||
|       // get the right MpcDag | ||||
|       _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);        | ||||
|  | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the red-black solver | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; | ||||
| //      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||
| //      _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd); | ||||
|       _HermitianRBSolver(src_o,tmp);  assert(tmp.checkerboard==Odd); | ||||
|       _Matrix.MooeeInv(tmp,sol_o);        assert(  sol_o.checkerboard   ==Odd); | ||||
|  | ||||
|       /////////////////////////////////////////////////// | ||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
|       /////////////////////////////////////////////////// | ||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); | ||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); | ||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||
|       | ||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||
|  | ||||
|       // Verify the unprec residual | ||||
|       _Matrix.M(out,resid);  | ||||
|       resid = resid-in; | ||||
|       RealD ns = norm2(in); | ||||
|       RealD nr = norm2(resid); | ||||
|  | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; | ||||
|     }      | ||||
|   }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -44,13 +44,20 @@ namespace Grid{ | ||||
|   class GridBase : public CartesianCommunicator , public GridThread { | ||||
|  | ||||
| public: | ||||
|  | ||||
|     int dummy; | ||||
|     // Give Lattice access | ||||
|     template<class object> friend class Lattice; | ||||
|  | ||||
|     GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; | ||||
|     GridBase(const std::vector<int> & processor_grid, | ||||
| 	     const CartesianCommunicator &parent) : CartesianCommunicator(processor_grid,parent) {}; | ||||
| 	     const CartesianCommunicator &parent, | ||||
| 	     int &split_rank)  | ||||
|       : CartesianCommunicator(processor_grid,parent,split_rank) {}; | ||||
|     GridBase(const std::vector<int> & processor_grid, | ||||
| 	     const CartesianCommunicator &parent)  | ||||
|       : CartesianCommunicator(processor_grid,parent,dummy) {}; | ||||
|  | ||||
|     virtual ~GridBase() = default; | ||||
|  | ||||
|     // Physics Grid information. | ||||
|     std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. | ||||
|   | ||||
| @@ -38,7 +38,7 @@ namespace Grid{ | ||||
| class GridCartesian: public GridBase { | ||||
|  | ||||
| public: | ||||
|  | ||||
|     int dummy; | ||||
|     virtual int  CheckerBoardFromOindexTable (int Oindex) { | ||||
|       return 0; | ||||
|     } | ||||
| @@ -67,7 +67,14 @@ public: | ||||
|     GridCartesian(const std::vector<int> &dimensions, | ||||
| 		  const std::vector<int> &simd_layout, | ||||
| 		  const std::vector<int> &processor_grid, | ||||
| 		  const GridCartesian &parent) : GridBase(processor_grid,parent) | ||||
| 		  const GridCartesian &parent) : GridBase(processor_grid,parent,dummy) | ||||
|     { | ||||
|       Init(dimensions,simd_layout,processor_grid); | ||||
|     } | ||||
|     GridCartesian(const std::vector<int> &dimensions, | ||||
| 		  const std::vector<int> &simd_layout, | ||||
| 		  const std::vector<int> &processor_grid, | ||||
| 		  const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank) | ||||
|     { | ||||
|       Init(dimensions,simd_layout,processor_grid); | ||||
|     } | ||||
| @@ -81,6 +88,8 @@ public: | ||||
|       Init(dimensions,simd_layout,processor_grid); | ||||
|     } | ||||
|  | ||||
|     virtual ~GridCartesian() = default; | ||||
|  | ||||
|     void Init(const std::vector<int> &dimensions, | ||||
| 	      const std::vector<int> &simd_layout, | ||||
| 	      const std::vector<int> &processor_grid) | ||||
|   | ||||
| @@ -133,6 +133,8 @@ public: | ||||
|     { | ||||
|       Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim)  ; | ||||
|     } | ||||
|  | ||||
|     virtual ~GridRedBlackCartesian() = default; | ||||
| #if 0 | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     // Create redblack grid ;; deprecate these. Should not | ||||
| @@ -205,6 +207,7 @@ public: | ||||
|         { | ||||
|           assert((_gdimensions[d] & 0x1) == 0); | ||||
|           _gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard | ||||
| 	  _gsites /= 2; | ||||
|         } | ||||
|         _ldimensions[d] = _gdimensions[d] / _processors[d]; | ||||
|         assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); | ||||
|   | ||||
| @@ -96,6 +96,138 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) | ||||
|   GlobalSumVector((double *)c,2*N); | ||||
| } | ||||
|  | ||||
|  | ||||
| #if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)  | ||||
| { | ||||
|   _ndimension = processors.size(); | ||||
|   assert(_ndimension = parent._ndimension); | ||||
|    | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // split the communicator | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   int Nparent; | ||||
|   MPI_Comm_size(parent.communicator,&Nparent); | ||||
|  | ||||
|   int childsize=1; | ||||
|   for(int d=0;d<processors.size();d++) { | ||||
|     childsize *= processors[d]; | ||||
|   } | ||||
|   int Nchild = Nparent/childsize; | ||||
|   assert (childsize * Nchild == Nparent); | ||||
|  | ||||
|   std::vector<int> ccoor(_ndimension); // coor within subcommunicator | ||||
|   std::vector<int> scoor(_ndimension); // coor of split within parent | ||||
|   std::vector<int> ssize(_ndimension); // coor of split within parent | ||||
|  | ||||
|   for(int d=0;d<_ndimension;d++){ | ||||
|     ccoor[d] = parent._processor_coor[d] % processors[d]; | ||||
|     scoor[d] = parent._processor_coor[d] / processors[d]; | ||||
|     ssize[d] = parent._processors[d]     / processors[d]; | ||||
|   } | ||||
|   int crank;  // rank within subcomm ; srank is rank of subcomm within blocks of subcomms | ||||
|   // Mpi uses the reverse Lexico convention to us | ||||
|   Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); | ||||
|   Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); | ||||
|  | ||||
|   MPI_Comm comm_split; | ||||
|   if ( Nchild > 1 ) {  | ||||
|  | ||||
|     /* | ||||
|     std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl; | ||||
|     std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"]    "; | ||||
|     for(int d=0;d<parent._processors.size();d++)  std::cout << parent._processors[d] << " "; | ||||
|     std::cout<<std::endl; | ||||
|  | ||||
|     std::cout << GridLogMessage<<" child grid["<< _ndimension <<"]    "; | ||||
|     for(int d=0;d<processors.size();d++)  std::cout << processors[d] << " "; | ||||
|     std::cout<<std::endl; | ||||
|  | ||||
|     std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< _ndimension <<"]    "; | ||||
|     for(int d=0;d<processors.size();d++)  std::cout << parent._processor_coor[d] << " "; | ||||
|     std::cout<<std::endl; | ||||
|  | ||||
|     std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"]    "; | ||||
|     for(int d=0;d<processors.size();d++)  std::cout << ccoor[d] << " "; | ||||
|     std::cout<<std::endl; | ||||
|  | ||||
|     std::cout << GridLogMessage<<" new coor ["<< _ndimension <<"]    "; | ||||
|     for(int d=0;d<processors.size();d++)  std::cout << parent._processor_coor[d] << " "; | ||||
|     std::cout<<std::endl; | ||||
|     */ | ||||
|  | ||||
|     int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split); | ||||
|     assert(ierr==0); | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     // Declare victory | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     /* | ||||
|     std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into " | ||||
| 	      << Nchild <<" communicators with " << childsize << " ranks"<<std::endl; | ||||
|     */ | ||||
|   } else { | ||||
|     comm_split=parent.communicator; | ||||
|     srank = 0; | ||||
|   } | ||||
|  | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Set up from the new split communicator | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   InitFromMPICommunicator(processors,comm_split); | ||||
| } | ||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Take an MPI_Comm and self assemble | ||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base) | ||||
| { | ||||
|   _ndimension = processors.size(); | ||||
|   _processor_coor.resize(_ndimension); | ||||
|  | ||||
|   ///////////////////////////////// | ||||
|   // Count the requested nodes | ||||
|   ///////////////////////////////// | ||||
|   _Nprocessors=1; | ||||
|   _processors = processors; | ||||
|   for(int i=0;i<_ndimension;i++){ | ||||
|     _Nprocessors*=_processors[i]; | ||||
|   } | ||||
|  | ||||
|   std::vector<int> periodic(_ndimension,1); | ||||
|   MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); | ||||
|   MPI_Comm_rank(communicator,&_processor); | ||||
|   MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); | ||||
|  | ||||
|   if ( communicator_base != communicator_world ) { | ||||
|     std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl; | ||||
|      | ||||
|     std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] "; | ||||
|     for(int d=0;d<_processors.size();d++){ | ||||
|       std::cout << _processor_coor[d]<<" "; | ||||
|     } | ||||
|     std::cout << std::endl; | ||||
|   } | ||||
|  | ||||
|   int Size; | ||||
|   MPI_Comm_size(communicator,&Size); | ||||
|  | ||||
| #ifdef GRID_COMMS_MPIT | ||||
|   communicator_halo.resize (2*_ndimension); | ||||
|   for(int i=0;i<_ndimension*2;i++){ | ||||
|     MPI_Comm_dup(communicator,&communicator_halo[i]); | ||||
|   } | ||||
| #endif | ||||
|    | ||||
|   assert(Size==_Nprocessors); | ||||
| } | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)  | ||||
| { | ||||
|   InitFromMPICommunicator(processors,communicator_world); | ||||
| } | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #if !defined( GRID_COMMS_MPI3)  | ||||
|  | ||||
| int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();}; | ||||
|   | ||||
| @@ -153,12 +153,12 @@ class CartesianCommunicator { | ||||
|   // Constructors to sub-divide a parent communicator | ||||
|   // and default to comm world | ||||
|   //////////////////////////////////////////////// | ||||
|   CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent); | ||||
|   CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank); | ||||
|   CartesianCommunicator(const std::vector<int> &pdimensions_in); | ||||
|   virtual ~CartesianCommunicator(); | ||||
|  | ||||
|  private: | ||||
| #if defined (GRID_COMMS_MPI)  | ||||
|   //|| defined (GRID_COMMS_MPI3)  | ||||
| #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)  | ||||
|   //////////////////////////////////////////////// | ||||
|   // Private initialise from an MPI communicator | ||||
|   // Can use after an MPI_Comm_split, but hidden from user so private | ||||
| @@ -263,6 +263,27 @@ class CartesianCommunicator { | ||||
|   // Broadcast a buffer and composite larger | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   void Broadcast(int root,void* data, int bytes); | ||||
|  | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   // All2All down one dimension | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   template<class T> void AllToAll(int dim,std::vector<T> &in, std::vector<T> &out){ | ||||
|     assert(dim>=0); | ||||
|     assert(dim<_ndimension); | ||||
|     int numnode = _processors[dim]; | ||||
|     //    std::cerr << " AllToAll in.size()  "<<in.size()<<std::endl; | ||||
|     //    std::cerr << " AllToAll out.size() "<<out.size()<<std::endl; | ||||
|     assert(in.size()==out.size()); | ||||
|     uint64_t bytes=sizeof(T); | ||||
|     uint64_t words=in.size()/numnode; | ||||
|  | ||||
|     assert(numnode * words == in.size()); | ||||
|     assert(words < (1ULL<<32)); | ||||
|  | ||||
|     AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes); | ||||
|   } | ||||
|   void AllToAll(int dim  ,void *in,void *out,uint64_t words,uint64_t bytes); | ||||
|   void AllToAll(void  *in,void *out,uint64_t words         ,uint64_t bytes); | ||||
|    | ||||
|   template<class obj> void Broadcast(int root,obj &data) | ||||
|     { | ||||
|   | ||||
| @@ -53,94 +53,14 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | ||||
|   ShmInitGeneric(); | ||||
| } | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)  | ||||
| CartesianCommunicator::~CartesianCommunicator() | ||||
| { | ||||
|   InitFromMPICommunicator(processors,communicator_world); | ||||
|   //  std::cout << "Passed communicator world to a new communicator" <<communicator<<std::endl; | ||||
| } | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)  | ||||
| { | ||||
|   _ndimension = processors.size(); | ||||
|   assert(_ndimension = parent._ndimension); | ||||
|    | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // split the communicator | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   int Nparent; | ||||
|   MPI_Comm_size(parent.communicator,&Nparent); | ||||
|  | ||||
|   int childsize=1; | ||||
|   for(int d=0;d<processors.size();d++) { | ||||
|     childsize *= processors[d]; | ||||
|   } | ||||
|   int Nchild = Nparent/childsize; | ||||
|   assert (childsize * Nchild == Nparent); | ||||
|  | ||||
|   int prank;  MPI_Comm_rank(parent.communicator,&prank); | ||||
|   int crank = prank % childsize; | ||||
|   int ccomm = prank / childsize; | ||||
|  | ||||
|   MPI_Comm comm_split; | ||||
|   if ( Nchild > 1 ) {  | ||||
|  | ||||
|     std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl; | ||||
|     std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"]    "; | ||||
|     for(int d=0;d<parent._processors.size();d++)  std::cout << parent._processors[d] << " "; | ||||
|     std::cout<<std::endl; | ||||
|  | ||||
|     std::cout << GridLogMessage<<" child grid["<< _ndimension <<"]    "; | ||||
|     for(int d=0;d<processors.size();d++)  std::cout << processors[d] << " "; | ||||
|     std::cout<<std::endl; | ||||
|  | ||||
|     int ierr= MPI_Comm_split(parent.communicator, ccomm,crank,&comm_split); | ||||
|     assert(ierr==0); | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     // Declare victory | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into " | ||||
| 	      <<Nchild <<" communicators with " << childsize << " ranks"<<std::endl; | ||||
|   } else { | ||||
|     comm_split=parent.communicator; | ||||
|     //    std::cout << "Passed parental communicator to a new communicator" <<std::endl; | ||||
|   } | ||||
|  | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Set up from the new split communicator | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   InitFromMPICommunicator(processors,comm_split); | ||||
|  | ||||
|   int MPI_is_finalised; | ||||
|   MPI_Finalized(&MPI_is_finalised); | ||||
|   if (communicator && MPI_is_finalised) | ||||
|     MPI_Comm_free(&communicator); | ||||
| } | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Take an MPI_Comm and self assemble | ||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base) | ||||
| { | ||||
|   //  if ( communicator_base != communicator_world ) { | ||||
|   //    std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl; | ||||
|   //  } | ||||
|   _ndimension = processors.size(); | ||||
|   _processor_coor.resize(_ndimension); | ||||
|  | ||||
|   ///////////////////////////////// | ||||
|   // Count the requested nodes | ||||
|   ///////////////////////////////// | ||||
|   _Nprocessors=1; | ||||
|   _processors = processors; | ||||
|   for(int i=0;i<_ndimension;i++){ | ||||
|     _Nprocessors*=_processors[i]; | ||||
|   } | ||||
|  | ||||
|   std::vector<int> periodic(_ndimension,1); | ||||
|   MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],1,&communicator); | ||||
|   MPI_Comm_rank(communicator,&_processor); | ||||
|   MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); | ||||
|  | ||||
|   int Size; | ||||
|   MPI_Comm_size(communicator,&Size); | ||||
|    | ||||
|   assert(Size==_Nprocessors); | ||||
| } | ||||
| void CartesianCommunicator::GlobalSum(uint32_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| @@ -276,6 +196,36 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) | ||||
| 		     root, | ||||
| 		     communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,uint64_t words,uint64_t bytes) | ||||
| { | ||||
|   std::vector<int> row(_ndimension,1); | ||||
|   assert(dim>=0 && dim<_ndimension); | ||||
|  | ||||
|   //  Split the communicator | ||||
|   row[dim] = _processors[dim]; | ||||
|  | ||||
|   int me; | ||||
|   CartesianCommunicator Comm(row,*this,me); | ||||
|   Comm.AllToAll(in,out,words,bytes); | ||||
| } | ||||
| void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t bytes) | ||||
| { | ||||
|   // MPI is a pain and uses "int" arguments | ||||
|   // 64*64*64*128*16 == 500Million elements of data. | ||||
|   // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. | ||||
|   // (Turns up on 32^3 x 64 Gparity too) | ||||
|   MPI_Datatype object; | ||||
|   int iwords;  | ||||
|   int ibytes; | ||||
|   iwords = words; | ||||
|   ibytes = bytes; | ||||
|   assert(words == iwords); // safe to cast to int ? | ||||
|   assert(bytes == ibytes); // safe to cast to int ? | ||||
|   MPI_Type_contiguous(ibytes,MPI_BYTE,&object); | ||||
|   MPI_Type_commit(&object); | ||||
|   MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); | ||||
|   MPI_Type_free(&object); | ||||
| } | ||||
|   /////////////////////////////////////////////////////// | ||||
|   // Should only be used prior to Grid Init finished. | ||||
| @@ -296,5 +246,7 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) | ||||
|   assert(ierr==0); | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -712,7 +712,8 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | ||||
| 							 int from, | ||||
| 							 int bytes,int dir) | ||||
| { | ||||
|   assert(dir < communicator_halo.size()); | ||||
|   int ncomm  =communicator_halo.size();  | ||||
|   int commdir=dir%ncomm; | ||||
|  | ||||
|   MPI_Request xrq; | ||||
|   MPI_Request rrq; | ||||
| @@ -732,14 +733,14 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | ||||
|   gfrom = MPI_UNDEFINED; | ||||
| #endif | ||||
|   if ( gfrom ==MPI_UNDEFINED) { | ||||
|     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[dir],&rrq); | ||||
|     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[commdir],&rrq); | ||||
|     assert(ierr==0); | ||||
|     list.push_back(rrq); | ||||
|     off_node_bytes+=bytes; | ||||
|   } | ||||
|  | ||||
|   if ( gdest == MPI_UNDEFINED ) { | ||||
|     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[dir],&xrq); | ||||
|     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[commdir],&xrq); | ||||
|     assert(ierr==0); | ||||
|     list.push_back(xrq); | ||||
|     off_node_bytes+=bytes; | ||||
|   | ||||
| @@ -53,33 +53,13 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | ||||
|   ShmInitGeneric(); | ||||
| } | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
| CartesianCommunicator::~CartesianCommunicator() | ||||
| { | ||||
|   _ndimension = processors.size(); | ||||
|   std::vector<int> periodic(_ndimension,1); | ||||
|  | ||||
|   _Nprocessors=1; | ||||
|   _processors = processors; | ||||
|   _processor_coor.resize(_ndimension); | ||||
|    | ||||
|   MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); | ||||
|   MPI_Comm_rank(communicator,&_processor); | ||||
|   MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); | ||||
|  | ||||
|   for(int i=0;i<_ndimension;i++){ | ||||
|     _Nprocessors*=_processors[i]; | ||||
|   } | ||||
|  | ||||
|   communicator_halo.resize (2*_ndimension); | ||||
|   for(int i=0;i<_ndimension*2;i++){ | ||||
|     MPI_Comm_dup(communicator,&communicator_halo[i]); | ||||
|   } | ||||
|    | ||||
|   int Size;  | ||||
|   MPI_Comm_size(communicator,&Size); | ||||
|    | ||||
|   assert(Size==_Nprocessors); | ||||
|   if (communicator && !MPI::Is_finalized()) | ||||
|     MPI_Comm_free(&communicator); | ||||
| } | ||||
|  | ||||
|  | ||||
| void CartesianCommunicator::GlobalSum(uint32_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| @@ -244,13 +224,14 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | ||||
| { | ||||
|   int myrank = _processor; | ||||
|   int ierr; | ||||
|   assert(dir < communicator_halo.size()); | ||||
|   int ncomm  =communicator_halo.size();  | ||||
|   int commdir=dir%ncomm; | ||||
|    | ||||
|   //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl; | ||||
|   // Give the CPU to MPI immediately; can use threads to overlap optionally | ||||
|   MPI_Request req[2]; | ||||
|   MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]); | ||||
|   MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[dir],&req[0]); | ||||
|   MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[commdir],&req[1]); | ||||
|   MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[commdir],&req[0]); | ||||
|  | ||||
|   list.push_back(req[0]); | ||||
|   list.push_back(req[1]); | ||||
| @@ -269,13 +250,14 @@ double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, | ||||
| { | ||||
|   int myrank = _processor; | ||||
|   int ierr; | ||||
|   assert(dir < communicator_halo.size()); | ||||
|    | ||||
|   //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl; | ||||
|   //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo.size()<< <std::endl; | ||||
|  | ||||
|   int ncomm  =communicator_halo.size();  | ||||
|   int commdir=dir%ncomm; | ||||
|   // Give the CPU to MPI immediately; can use threads to overlap optionally | ||||
|   MPI_Request req[2]; | ||||
|   MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]); | ||||
|   MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[dir],&req[0]); | ||||
|   MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[commdir],&req[1]); | ||||
|   MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[commdir],&req[0]); | ||||
|   MPI_Waitall(2, req, MPI_STATUSES_IGNORE); | ||||
|   return 2.0*bytes; | ||||
| } | ||||
|   | ||||
| @@ -38,8 +38,8 @@ void CartesianCommunicator::Init(int *argc, char *** arv) | ||||
|   ShmInitGeneric(); | ||||
| } | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)  | ||||
|   : CartesianCommunicator(processors) {} | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)  | ||||
|   : CartesianCommunicator(processors) { srank=0;} | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
| { | ||||
| @@ -56,6 +56,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
|   } | ||||
| } | ||||
|  | ||||
| CartesianCommunicator::~CartesianCommunicator(){} | ||||
|  | ||||
| void CartesianCommunicator::GlobalSum(float &){} | ||||
| void CartesianCommunicator::GlobalSumVector(float *,int N){} | ||||
| void CartesianCommunicator::GlobalSum(double &){} | ||||
| @@ -98,6 +100,14 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> & | ||||
| { | ||||
|   assert(0); | ||||
| } | ||||
| void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,uint64_t words,uint64_t bytes) | ||||
| { | ||||
|   bcopy(in,out,bytes*words); | ||||
| } | ||||
| void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t bytes) | ||||
| { | ||||
|   bcopy(in,out,bytes*words); | ||||
| } | ||||
|  | ||||
| int  CartesianCommunicator::RankWorld(void){return 0;} | ||||
| void CartesianCommunicator::Barrier(void){} | ||||
|   | ||||
| @@ -63,7 +63,7 @@ SOFTWARE. | ||||
|         #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" | ||||
|     #endif | ||||
| #elif defined(__GNUC__) | ||||
|     #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40900 | ||||
|     #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40805 | ||||
|         #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" | ||||
|     #endif | ||||
| #endif | ||||
|   | ||||
| @@ -109,8 +109,8 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData, | ||||
|  | ||||
|   coarseData=zero; | ||||
|  | ||||
|   // Loop with a cache friendly loop ordering | ||||
|   for(int sf=0;sf<fine->oSites();sf++){ | ||||
|   // Loop over coars parallel, and then loop over fine associated with coarse. | ||||
|   parallel_for(int sf=0;sf<fine->oSites();sf++){ | ||||
|  | ||||
|     int sc; | ||||
|     std::vector<int> coor_c(_ndimension); | ||||
| @@ -119,8 +119,9 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData, | ||||
|     for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; | ||||
|     Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); | ||||
|  | ||||
| PARALLEL_CRITICAL | ||||
|     for(int i=0;i<nbasis;i++) { | ||||
|        | ||||
|  | ||||
|       coarseData._odata[sc](i)=coarseData._odata[sc](i) | ||||
| 	+ innerProduct(Basis[i]._odata[sf],fineData._odata[sf]); | ||||
|  | ||||
| @@ -139,6 +140,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ, | ||||
|   GridBase * coarse= coarseA._grid; | ||||
|  | ||||
|   fineZ.checkerboard=fineX.checkerboard; | ||||
|   assert(fineX.checkerboard==fineY.checkerboard); | ||||
|   subdivides(coarse,fine); // require they map | ||||
|   conformable(fineX,fineY); | ||||
|   conformable(fineX,fineZ); | ||||
| @@ -180,9 +182,10 @@ template<class vobj,class CComplex> | ||||
|   GridBase *coarse(CoarseInner._grid); | ||||
|   GridBase *fine  (fineX._grid); | ||||
|  | ||||
|   Lattice<dotp> fine_inner(fine); | ||||
|   Lattice<dotp> fine_inner(fine); fine_inner.checkerboard = fineX.checkerboard; | ||||
|   Lattice<dotp> coarse_inner(coarse); | ||||
|  | ||||
|   // Precision promotion? | ||||
|   fine_inner = localInnerProduct(fineX,fineY); | ||||
|   blockSum(coarse_inner,fine_inner); | ||||
|   parallel_for(int ss=0;ss<coarse->oSites();ss++){ | ||||
| @@ -193,7 +196,7 @@ template<class vobj,class CComplex> | ||||
| inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX) | ||||
| { | ||||
|   GridBase *coarse = ip._grid; | ||||
|   Lattice<vobj> zz(fineX._grid); zz=zero; | ||||
|   Lattice<vobj> zz(fineX._grid); zz=zero; zz.checkerboard=fineX.checkerboard; | ||||
|   blockInnerProduct(ip,fineX,fineX); | ||||
|   ip = pow(ip,-0.5); | ||||
|   blockZAXPY(fineX,ip,fineX,zz); | ||||
| @@ -216,19 +219,25 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | ||||
|     block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; | ||||
|   } | ||||
|  | ||||
|   // Turn this around to loop threaded over sc and interior loop  | ||||
|   // over sf would thread better | ||||
|   coarseData=zero; | ||||
|   for(int sf=0;sf<fine->oSites();sf++){ | ||||
|      | ||||
|   parallel_region { | ||||
|  | ||||
|     int sc; | ||||
|     std::vector<int> coor_c(_ndimension); | ||||
|     std::vector<int> coor_f(_ndimension); | ||||
|  | ||||
|     Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); | ||||
|     for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; | ||||
|     Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); | ||||
|  | ||||
|     coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf]; | ||||
|     parallel_for_internal(int sf=0;sf<fine->oSites();sf++){ | ||||
|      | ||||
|       Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); | ||||
|       for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; | ||||
|       Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); | ||||
|        | ||||
| PARALLEL_CRITICAL | ||||
|       coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf]; | ||||
|  | ||||
|     } | ||||
|   } | ||||
|   return; | ||||
| } | ||||
| @@ -238,7 +247,7 @@ inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vob | ||||
| { | ||||
|   GridBase * fine = unpicked._grid; | ||||
|  | ||||
|   Lattice<vobj> zz(fine); | ||||
|   Lattice<vobj> zz(fine); zz.checkerboard = unpicked.checkerboard; | ||||
|   Lattice<iScalar<vInteger> > fcoor(fine); | ||||
|  | ||||
|   zz = zero; | ||||
| @@ -303,20 +312,21 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData, | ||||
|   } | ||||
|  | ||||
|   // Loop with a cache friendly loop ordering | ||||
|   for(int sf=0;sf<fine->oSites();sf++){ | ||||
|  | ||||
|   parallel_region { | ||||
|     int sc; | ||||
|     std::vector<int> coor_c(_ndimension); | ||||
|     std::vector<int> coor_f(_ndimension); | ||||
|  | ||||
|     Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); | ||||
|     for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; | ||||
|     Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); | ||||
|  | ||||
|     for(int i=0;i<nbasis;i++) { | ||||
|       if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf]; | ||||
|       else     fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf]; | ||||
|     parallel_for_internal(int sf=0;sf<fine->oSites();sf++){ | ||||
|  | ||||
|       Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); | ||||
|       for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; | ||||
|       Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); | ||||
|        | ||||
|       for(int i=0;i<nbasis;i++) { | ||||
| 	if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf]; | ||||
| 	else     fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   return; | ||||
| @@ -684,6 +694,315 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){ | ||||
|     merge(out._odata[out_oidx], ptrs, 0); | ||||
|   } | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| // Communicate between grids | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| // All to all plan | ||||
| // | ||||
| // Subvolume on fine grid is v.    Vectors a,b,c,d  | ||||
| // | ||||
| /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // SIMPLEST CASE: | ||||
| /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Mesh of nodes (2) ; subdivide to  1 subdivisions | ||||
| // | ||||
| // Lex ord:    | ||||
| //          N0 va0 vb0  N1 va1 vb1  | ||||
| // | ||||
| // For each dimension do an all to all | ||||
| // | ||||
| // full AllToAll(0) | ||||
| //          N0 va0 va1    N1 vb0 vb1 | ||||
| // | ||||
| // REARRANGE | ||||
| //          N0 va01       N1 vb01 | ||||
| // | ||||
| // Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract". | ||||
| // NB: Easiest to programme if keep in lex order. | ||||
| // | ||||
| /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // SIMPLE CASE: | ||||
| /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| // Mesh of nodes (2x2) ; subdivide to  1x1 subdivisions | ||||
| // | ||||
| // Lex ord:    | ||||
| //          N0 va0 vb0 vc0 vd0       N1 va1 vb1 vc1 vd1   | ||||
| //          N2 va2 vb2 vc2 vd2       N3 va3 vb3 vc3 vd3  | ||||
| // | ||||
| // Ratio = full[dim] / split[dim] | ||||
| // | ||||
| // For each dimension do an all to all; get Nvec -> Nvec / ratio | ||||
| //                                          Ldim -> Ldim * ratio | ||||
| //                                          LocalVol -> LocalVol * ratio | ||||
| // full AllToAll(0) | ||||
| //          N0 va0 vb0 va1 vb1       N1 vc0 vd0 vc1 vd1    | ||||
| //          N2 va2 vb2 va3 vb3       N3 vc2 vd2 vc3 vd3  | ||||
| // | ||||
| // REARRANGE | ||||
| //          N0 va01 vb01      N1 vc01 vd01 | ||||
| //          N2 va23 vb23      N3 vc23 vd23 | ||||
| // | ||||
| // full AllToAll(1)           // Not what is wanted. FIXME | ||||
| //          N0 va01 va23      N1 vc01 vc23  | ||||
| //          N2 vb01 vb23      N3 vd01 vd23 | ||||
| //  | ||||
| // REARRANGE | ||||
| //          N0 va0123      N1 vc0123 | ||||
| //          N2 vb0123      N3 vd0123 | ||||
| // | ||||
| // Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract". | ||||
| // NB: Easiest to programme if keep in lex order. | ||||
| // | ||||
| ///////////////////////////////////////////////////////// | ||||
|  | ||||
| template<class Vobj> | ||||
| void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split) | ||||
| { | ||||
|   typedef typename Vobj::scalar_object Sobj; | ||||
|  | ||||
|   int full_vecs   = full.size(); | ||||
|  | ||||
|   assert(full_vecs>=1); | ||||
|  | ||||
|   GridBase * full_grid = full[0]._grid; | ||||
|   GridBase *split_grid = split._grid; | ||||
|  | ||||
|   int       ndim  = full_grid->_ndimension; | ||||
|   int  full_nproc = full_grid->_Nprocessors; | ||||
|   int split_nproc =split_grid->_Nprocessors; | ||||
|  | ||||
|   //////////////////////////////// | ||||
|   // Checkerboard management | ||||
|   //////////////////////////////// | ||||
|   int cb = full[0].checkerboard; | ||||
|   split.checkerboard = cb; | ||||
|  | ||||
|   ////////////////////////////// | ||||
|   // Checks | ||||
|   ////////////////////////////// | ||||
|   assert(full_grid->_ndimension==split_grid->_ndimension); | ||||
|   for(int n=0;n<full_vecs;n++){ | ||||
|     assert(full[n].checkerboard == cb); | ||||
|     for(int d=0;d<ndim;d++){ | ||||
|       assert(full[n]._grid->_gdimensions[d]==split._grid->_gdimensions[d]); | ||||
|       assert(full[n]._grid->_fdimensions[d]==split._grid->_fdimensions[d]); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   int   nvector   =full_nproc/split_nproc;  | ||||
|   assert(nvector*split_nproc==full_nproc); | ||||
|   assert(nvector == full_vecs); | ||||
|  | ||||
|   std::vector<int> ratio(ndim); | ||||
|   for(int d=0;d<ndim;d++){ | ||||
|     ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d]; | ||||
|   } | ||||
|  | ||||
|   uint64_t lsites = full_grid->lSites(); | ||||
|   uint64_t     sz = lsites * nvector; | ||||
|   std::vector<Sobj> tmpdata(sz); | ||||
|   std::vector<Sobj> alldata(sz); | ||||
|   std::vector<Sobj> scalardata(lsites);  | ||||
|  | ||||
|   for(int v=0;v<nvector;v++){ | ||||
|     unvectorizeToLexOrdArray(scalardata,full[v]);     | ||||
|     parallel_for(int site=0;site<lsites;site++){ | ||||
|       alldata[v*lsites+site] = scalardata[site]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   int nvec = nvector; // Counts down to 1 as we collapse dims | ||||
|   std::vector<int> ldims = full_grid->_ldimensions; | ||||
|   std::vector<int> lcoor(ndim); | ||||
|  | ||||
|   for(int d=ndim-1;d>=0;d--){ | ||||
|  | ||||
|     if ( ratio[d] != 1 ) { | ||||
|  | ||||
|       full_grid ->AllToAll(d,alldata,tmpdata); | ||||
|       //      std::cout << GridLogMessage << "Grid_split: dim " <<d<<" ratio "<<ratio[d]<<" nvec "<<nvec<<" procs "<<split_grid->_processors[d]<<std::endl; | ||||
|       //      for(int v=0;v<nvec;v++){ | ||||
|       //	std::cout << "Grid_split: alldata["<<v<<"] " << alldata[v] <<std::endl; | ||||
|       //	std::cout << "Grid_split: tmpdata["<<v<<"] " << tmpdata[v] <<std::endl; | ||||
|       //      } | ||||
|       ////////////////////////////////////////// | ||||
|       //Local volume for this dimension is expanded by ratio of processor extents | ||||
|       // Number of vectors is decreased by same factor | ||||
|       // Rearrange to lexico for bigger volume | ||||
|       ////////////////////////////////////////// | ||||
|       nvec    /= ratio[d]; | ||||
|  | ||||
|       auto rdims = ldims; rdims[d]  *=   ratio[d]; | ||||
|       auto rsites= lsites*ratio[d]; | ||||
|       for(int v=0;v<nvec;v++){ | ||||
|  | ||||
| 	// For loop over each site within old subvol | ||||
| 	for(int lsite=0;lsite<lsites;lsite++){ | ||||
|  | ||||
| 	  Lexicographic::CoorFromIndex(lcoor, lsite, ldims);	   | ||||
|  | ||||
| 	  for(int r=0;r<ratio[d];r++){ // ratio*nvec terms | ||||
|  | ||||
| 	    auto rcoor = lcoor;	    rcoor[d]  += r*ldims[d]; | ||||
|  | ||||
| 	    int rsite; Lexicographic::IndexFromCoor(rcoor, rsite, rdims);	   | ||||
| 	    rsite += v * rsites; | ||||
|  | ||||
| 	    int rmul=nvec*lsites; | ||||
| 	    int vmul=     lsites; | ||||
| 	    alldata[rsite] = tmpdata[lsite+r*rmul+v*vmul]; | ||||
| 	    //	    if ( lsite==0 ) { | ||||
| 	    //	      std::cout << "Grid_split: grow alldata["<<rsite<<"] " << alldata[rsite] << " <- tmpdata["<< lsite+r*rmul+v*vmul<<"] "<<tmpdata[lsite+r*rmul+v*vmul]  <<std::endl; | ||||
| 	    //	    }	       | ||||
| 	  } | ||||
| 	} | ||||
|       } | ||||
|       ldims[d]*= ratio[d]; | ||||
|       lsites  *= ratio[d]; | ||||
|  | ||||
|       if ( split_grid->_processors[d] > 1 ) { | ||||
| 	tmpdata = alldata; | ||||
| 	split_grid->AllToAll(d,tmpdata,alldata); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   vectorizeFromLexOrdArray(alldata,split);     | ||||
| } | ||||
|  | ||||
| template<class Vobj> | ||||
| void Grid_split(Lattice<Vobj> &full,Lattice<Vobj>   & split) | ||||
| { | ||||
|   int nvector = full._grid->_Nprocessors / split._grid->_Nprocessors; | ||||
|   std::vector<Lattice<Vobj> > full_v(nvector,full._grid); | ||||
|   for(int n=0;n<nvector;n++){ | ||||
|     full_v[n] = full; | ||||
|   } | ||||
|   Grid_split(full_v,split); | ||||
| } | ||||
|  | ||||
| template<class Vobj> | ||||
| void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split) | ||||
| { | ||||
|   typedef typename Vobj::scalar_object Sobj; | ||||
|  | ||||
|   int full_vecs   = full.size(); | ||||
|  | ||||
|   assert(full_vecs>=1); | ||||
|  | ||||
|   GridBase * full_grid = full[0]._grid; | ||||
|   GridBase *split_grid = split._grid; | ||||
|  | ||||
|   int       ndim  = full_grid->_ndimension; | ||||
|   int  full_nproc = full_grid->_Nprocessors; | ||||
|   int split_nproc =split_grid->_Nprocessors; | ||||
|  | ||||
|   //////////////////////////////// | ||||
|   // Checkerboard management | ||||
|   //////////////////////////////// | ||||
|   int cb = full[0].checkerboard; | ||||
|   split.checkerboard = cb; | ||||
|  | ||||
|   ////////////////////////////// | ||||
|   // Checks | ||||
|   ////////////////////////////// | ||||
|   assert(full_grid->_ndimension==split_grid->_ndimension); | ||||
|   for(int n=0;n<full_vecs;n++){ | ||||
|     assert(full[n].checkerboard == cb); | ||||
|     for(int d=0;d<ndim;d++){ | ||||
|       assert(full[n]._grid->_gdimensions[d]==split._grid->_gdimensions[d]); | ||||
|       assert(full[n]._grid->_fdimensions[d]==split._grid->_fdimensions[d]); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   int   nvector   =full_nproc/split_nproc;  | ||||
|   assert(nvector*split_nproc==full_nproc); | ||||
|   assert(nvector == full_vecs); | ||||
|  | ||||
|   std::vector<int> ratio(ndim); | ||||
|   for(int d=0;d<ndim;d++){ | ||||
|     ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d]; | ||||
|   } | ||||
|  | ||||
|   uint64_t lsites = full_grid->lSites(); | ||||
|   uint64_t     sz = lsites * nvector; | ||||
|   std::vector<Sobj> tmpdata(sz); | ||||
|   std::vector<Sobj> alldata(sz); | ||||
|   std::vector<Sobj> scalardata(lsites);  | ||||
|  | ||||
|   unvectorizeToLexOrdArray(alldata,split);     | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////////// | ||||
|   // Start from split grid and work towards full grid | ||||
|   ///////////////////////////////////////////////////////////////// | ||||
|   std::vector<int> lcoor(ndim); | ||||
|   std::vector<int> rcoor(ndim); | ||||
|  | ||||
|   int nvec = 1; | ||||
|   lsites = split_grid->lSites(); | ||||
|   std::vector<int> ldims = split_grid->_ldimensions; | ||||
|  | ||||
|   //  for(int d=ndim-1;d>=0;d--){ | ||||
|   for(int d=0;d<ndim;d++){ | ||||
|  | ||||
|     if ( ratio[d] != 1 ) { | ||||
|  | ||||
|  | ||||
|       if ( split_grid->_processors[d] > 1 ) { | ||||
| 	tmpdata = alldata; | ||||
| 	split_grid->AllToAll(d,tmpdata,alldata); | ||||
|       } | ||||
|  | ||||
|       ////////////////////////////////////////// | ||||
|       //Local volume for this dimension is expanded by ratio of processor extents | ||||
|       // Number of vectors is decreased by same factor | ||||
|       // Rearrange to lexico for bigger volume | ||||
|       ////////////////////////////////////////// | ||||
|       auto rsites= lsites/ratio[d]; | ||||
|       auto rdims = ldims; rdims[d]/=ratio[d]; | ||||
|  | ||||
|       for(int v=0;v<nvec;v++){ | ||||
|  | ||||
| 	// rsite, rcoor --> smaller local volume | ||||
| 	// lsite, lcoor --> bigger original (single node?) volume | ||||
| 	// For loop over each site within smaller subvol | ||||
| 	for(int rsite=0;rsite<rsites;rsite++){ | ||||
|  | ||||
| 	  Lexicographic::CoorFromIndex(rcoor, rsite, rdims);	   | ||||
| 	  int lsite; | ||||
|  | ||||
| 	  for(int r=0;r<ratio[d];r++){  | ||||
|  | ||||
| 	    lcoor = rcoor; lcoor[d] += r*rdims[d]; | ||||
| 	    Lexicographic::IndexFromCoor(lcoor, lsite, ldims); lsite += v * lsites; | ||||
|  | ||||
| 	    int rmul=nvec*rsites; | ||||
| 	    int vmul=     rsites; | ||||
| 	    tmpdata[rsite+r*rmul+v*vmul]=alldata[lsite]; | ||||
|  | ||||
| 	  } | ||||
| 	} | ||||
|       } | ||||
|       nvec   *= ratio[d]; | ||||
|       ldims[d]=rdims[d]; | ||||
|       lsites  =rsites; | ||||
|  | ||||
|       full_grid ->AllToAll(d,tmpdata,alldata); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   lsites = full_grid->lSites(); | ||||
|   for(int v=0;v<nvector;v++){ | ||||
|     assert(v<full.size()); | ||||
|     parallel_for(int site=0;site<lsites;site++){ | ||||
|       scalardata[site] = alldata[v*lsites+site]; | ||||
|     } | ||||
|     vectorizeFromLexOrdArray(scalardata,full[v]);     | ||||
|   } | ||||
| } | ||||
|  | ||||
|   | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -50,7 +50,7 @@ namespace Grid { | ||||
|     return (status==0) ? res.get() : name ; | ||||
|   } | ||||
|    | ||||
| GridStopWatch Logger::StopWatch; | ||||
| GridStopWatch Logger::GlobalStopWatch; | ||||
| int Logger::timestamp; | ||||
| std::ostream Logger::devnull(0); | ||||
|  | ||||
| @@ -59,13 +59,15 @@ void GridLogTimestamp(int on){ | ||||
| } | ||||
|  | ||||
| Colours GridLogColours(0); | ||||
| GridLogger GridLogError(1, "Error", GridLogColours, "RED"); | ||||
| GridLogger GridLogIRL    (1, "IRL"   , GridLogColours, "NORMAL"); | ||||
| GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL"); | ||||
| GridLogger GridLogError  (1, "Error" , GridLogColours, "RED"); | ||||
| GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW"); | ||||
| GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL"); | ||||
| GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE"); | ||||
| GridLogger GridLogDebug  (1, "Debug", GridLogColours, "PURPLE"); | ||||
| GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); | ||||
| GridLogger GridLogIterative(1, "Iterative", GridLogColours, "BLUE"); | ||||
| GridLogger GridLogIntegrator(1, "Integrator", GridLogColours, "BLUE"); | ||||
| GridLogger GridLogIterative  (1, "Iterative", GridLogColours, "BLUE"); | ||||
| GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE"); | ||||
|  | ||||
| void GridLogConfigure(std::vector<std::string> &logstreams) { | ||||
|   GridLogError.Active(0); | ||||
|   | ||||
| @@ -85,12 +85,15 @@ class Logger { | ||||
| protected: | ||||
|   Colours &Painter; | ||||
|   int active; | ||||
|   int timing_mode; | ||||
|   static int timestamp; | ||||
|   std::string name, topName; | ||||
|   std::string COLOUR; | ||||
|  | ||||
| public: | ||||
|   static GridStopWatch StopWatch; | ||||
|   static GridStopWatch GlobalStopWatch; | ||||
|   GridStopWatch         LocalStopWatch; | ||||
|   GridStopWatch *StopWatch; | ||||
|   static std::ostream devnull; | ||||
|  | ||||
|   std::string background() {return Painter.colour["NORMAL"];} | ||||
| @@ -101,22 +104,38 @@ public: | ||||
|     name(nm), | ||||
|     topName(topNm), | ||||
|     Painter(col_class), | ||||
|     COLOUR(col) {} ; | ||||
|     timing_mode(0), | ||||
|     COLOUR(col)  | ||||
|     { | ||||
|       StopWatch = & GlobalStopWatch; | ||||
|     }; | ||||
|    | ||||
|   void Active(int on) {active = on;}; | ||||
|   int  isActive(void) {return active;}; | ||||
|   static void Timestamp(int on) {timestamp = on;}; | ||||
|    | ||||
|   void Reset(void) {  | ||||
|     StopWatch->Reset();  | ||||
|     StopWatch->Start();  | ||||
|   } | ||||
|   void TimingMode(int on) {  | ||||
|     timing_mode = on;  | ||||
|     if(on) {  | ||||
|       StopWatch = &LocalStopWatch; | ||||
|       Reset();  | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   friend std::ostream& operator<< (std::ostream& stream, Logger& log){ | ||||
|  | ||||
|     if ( log.active ) { | ||||
|       stream << log.background()<< std::setw(8) << std::left << log.topName << log.background()<< " : "; | ||||
|       stream << log.colour() << std::setw(10) << std::left << log.name << log.background() << " : "; | ||||
|       stream << log.background()<<  std::left << log.topName << log.background()<< " : "; | ||||
|       stream << log.colour() <<  std::left << log.name << log.background() << " : "; | ||||
|       if ( log.timestamp ) { | ||||
| 	StopWatch.Stop(); | ||||
| 	GridTime now = StopWatch.Elapsed(); | ||||
| 	StopWatch.Start(); | ||||
| 	stream << log.evidence()<< now << log.background() << " : " ; | ||||
| 	log.StopWatch->Stop(); | ||||
| 	GridTime now = log.StopWatch->Elapsed(); | ||||
| 	if ( log.timing_mode==1 ) log.StopWatch->Reset(); | ||||
| 	log.StopWatch->Start(); | ||||
| 	stream << log.evidence()<< std::setw(6)<<now << log.background() << " : " ; | ||||
|       } | ||||
|       stream << log.colour(); | ||||
|       return stream; | ||||
| @@ -135,6 +154,8 @@ public: | ||||
|  | ||||
| void GridLogConfigure(std::vector<std::string> &logstreams); | ||||
|  | ||||
| extern GridLogger GridLogIRL; | ||||
| extern GridLogger GridLogSolver; | ||||
| extern GridLogger GridLogError; | ||||
| extern GridLogger GridLogWarning; | ||||
| extern GridLogger GridLogMessage; | ||||
|   | ||||
| @@ -261,7 +261,7 @@ class BinaryIO { | ||||
| 			      GridBase *grid, | ||||
| 			      std::vector<fobj> &iodata, | ||||
| 			      std::string file, | ||||
| 			      int offset, | ||||
| 			      Integer offset, | ||||
| 			      const std::string &format, int control, | ||||
| 			      uint32_t &nersc_csum, | ||||
| 			      uint32_t &scidac_csuma, | ||||
| @@ -356,7 +356,7 @@ class BinaryIO { | ||||
|  | ||||
|       if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { | ||||
| #ifdef USE_MPI_IO | ||||
| 	std::cout<< GridLogMessage<< "MPI read I/O "<< file<< std::endl; | ||||
| 	std::cout<< GridLogMessage<<"IOobject: MPI read I/O "<< file<< std::endl; | ||||
| 	ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh);    assert(ierr==0); | ||||
| 	ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);    assert(ierr==0); | ||||
| 	ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status);    assert(ierr==0); | ||||
| @@ -367,7 +367,7 @@ class BinaryIO { | ||||
| 	assert(0); | ||||
| #endif | ||||
|       } else { | ||||
|         std::cout << GridLogMessage << "C++ read I/O " << file << " : " | ||||
| 	std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : " | ||||
|                   << iodata.size() * sizeof(fobj) << " bytes" << std::endl; | ||||
|         std::ifstream fin; | ||||
|         fin.open(file, std::ios::binary | std::ios::in); | ||||
| @@ -413,9 +413,9 @@ class BinaryIO { | ||||
|       timer.Start(); | ||||
|       if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { | ||||
| #ifdef USE_MPI_IO | ||||
|         std::cout << GridLogMessage << "MPI write I/O " << file << std::endl; | ||||
|         std::cout << GridLogMessage <<"IOobject: MPI write I/O " << file << std::endl; | ||||
|         ierr = MPI_File_open(grid->communicator, (char *)file.c_str(), MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh); | ||||
|         std::cout << GridLogMessage << "Checking for errors" << std::endl; | ||||
| 	//        std::cout << GridLogMessage << "Checking for errors" << std::endl; | ||||
|         if (ierr != MPI_SUCCESS) | ||||
|         { | ||||
|           char error_string[BUFSIZ]; | ||||
| @@ -444,48 +444,56 @@ class BinaryIO { | ||||
| 	assert(0); | ||||
| #endif | ||||
|       } else {  | ||||
|  | ||||
|         std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : " | ||||
|                   << iodata.size() * sizeof(fobj) << " bytes" << std::endl; | ||||
|          | ||||
| 	std::ofstream fout;  | ||||
|   fout.exceptions ( std::fstream::failbit | std::fstream::badbit ); | ||||
|   try { | ||||
|     fout.open(file,std::ios::binary|std::ios::out|std::ios::in); | ||||
|   } catch (const std::fstream::failure& exc) { | ||||
|     std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl; | ||||
|     std::cout << GridLogError << "Exception description: " << exc.what() << std::endl; | ||||
|     std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl; | ||||
|     #ifdef USE_MPI_IO | ||||
|     MPI_Abort(MPI_COMM_WORLD,1); | ||||
|     #else | ||||
|     exit(1); | ||||
|     #endif | ||||
|   } | ||||
| 	std::cout << GridLogMessage<< "C++ write I/O "<< file<<" : " | ||||
| 		        << iodata.size()*sizeof(fobj)<<" bytes"<<std::endl; | ||||
| 	 | ||||
|   if ( control & BINARYIO_MASTER_APPEND )  { | ||||
| 	  fout.seekp(0,fout.end); | ||||
| 	} else { | ||||
| 	  fout.seekp(offset+myrank*lsites*sizeof(fobj)); | ||||
| 	fout.exceptions ( std::fstream::failbit | std::fstream::badbit ); | ||||
| 	try { | ||||
| 	  fout.open(file,std::ios::binary|std::ios::out|std::ios::in); | ||||
| 	} catch (const std::fstream::failure& exc) { | ||||
| 	  std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl; | ||||
| 	  std::cout << GridLogError << "Exception description: " << exc.what() << std::endl; | ||||
| 	  std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl; | ||||
| #ifdef USE_MPI_IO | ||||
| 	  MPI_Abort(MPI_COMM_WORLD,1); | ||||
| #else | ||||
| 	  exit(1); | ||||
| #endif | ||||
| 	} | ||||
| 	 | ||||
| 	if ( control & BINARYIO_MASTER_APPEND )  { | ||||
| 	  try { | ||||
| 	    fout.seekp(0,fout.end); | ||||
| 	  } catch (const std::fstream::failure& exc) { | ||||
| 	    std::cout << "Exception in seeking file end " << file << std::endl; | ||||
| 	  } | ||||
| 	} else { | ||||
| 	  try {  | ||||
| 	    fout.seekp(offset+myrank*lsites*sizeof(fobj)); | ||||
| 	  } catch (const std::fstream::failure& exc) { | ||||
| 	    std::cout << "Exception in seeking file " << file <<" offset "<< offset << std::endl; | ||||
| 	  } | ||||
| 	} | ||||
|    | ||||
|   try { | ||||
|   	fout.write((char *)&iodata[0],iodata.size()*sizeof(fobj));//assert( fout.fail()==0); | ||||
|   } | ||||
|   catch (const std::fstream::failure& exc) { | ||||
|     std::cout << "Exception in writing file " << file << std::endl; | ||||
|     std::cout << GridLogError << "Exception description: "<< exc.what() << std::endl; | ||||
|     #ifdef USE_MPI_IO | ||||
|     MPI_Abort(MPI_COMM_WORLD,1); | ||||
|     #else | ||||
|     exit(1); | ||||
|     #endif | ||||
|   } | ||||
|  | ||||
| 	try { | ||||
| 	  fout.write((char *)&iodata[0],iodata.size()*sizeof(fobj));//assert( fout.fail()==0); | ||||
| 	} | ||||
| 	catch (const std::fstream::failure& exc) { | ||||
| 	  std::cout << "Exception in writing file " << file << std::endl; | ||||
| 	  std::cout << GridLogError << "Exception description: "<< exc.what() << std::endl; | ||||
| #ifdef USE_MPI_IO | ||||
| 	  MPI_Abort(MPI_COMM_WORLD,1); | ||||
| #else | ||||
| 	  exit(1); | ||||
| #endif | ||||
| 	} | ||||
| 	fout.close(); | ||||
|   } | ||||
|   timer.Stop(); | ||||
|   } | ||||
|  | ||||
|       } | ||||
|       timer.Stop(); | ||||
|     } | ||||
|      | ||||
|     std::cout<<GridLogMessage<<"IOobject: "; | ||||
|     if ( control & BINARYIO_READ) std::cout << " read  "; | ||||
|     else                          std::cout << " write "; | ||||
| @@ -515,7 +523,7 @@ class BinaryIO { | ||||
|   static inline void readLatticeObject(Lattice<vobj> &Umu, | ||||
| 				       std::string file, | ||||
| 				       munger munge, | ||||
| 				       int offset, | ||||
| 				       Integer offset, | ||||
| 				       const std::string &format, | ||||
| 				       uint32_t &nersc_csum, | ||||
| 				       uint32_t &scidac_csuma, | ||||
| @@ -552,7 +560,7 @@ class BinaryIO { | ||||
|     static inline void writeLatticeObject(Lattice<vobj> &Umu, | ||||
| 					  std::string file, | ||||
| 					  munger munge, | ||||
| 					  int offset, | ||||
| 					  Integer offset, | ||||
| 					  const std::string &format, | ||||
| 					  uint32_t &nersc_csum, | ||||
| 					  uint32_t &scidac_csuma, | ||||
| @@ -589,7 +597,7 @@ class BinaryIO { | ||||
|   static inline void readRNG(GridSerialRNG &serial, | ||||
| 			     GridParallelRNG ¶llel, | ||||
| 			     std::string file, | ||||
| 			     int offset, | ||||
| 			     Integer offset, | ||||
| 			     uint32_t &nersc_csum, | ||||
| 			     uint32_t &scidac_csuma, | ||||
| 			     uint32_t &scidac_csumb) | ||||
| @@ -651,7 +659,7 @@ class BinaryIO { | ||||
|   static inline void writeRNG(GridSerialRNG &serial, | ||||
| 			      GridParallelRNG ¶llel, | ||||
| 			      std::string file, | ||||
| 			      int offset, | ||||
| 			      Integer offset, | ||||
| 			      uint32_t &nersc_csum, | ||||
| 			      uint32_t &scidac_csuma, | ||||
| 			      uint32_t &scidac_csumb) | ||||
|   | ||||
| @@ -147,7 +147,7 @@ namespace QCD { | ||||
|  | ||||
|    _scidacRecord = sr; | ||||
|  | ||||
|    std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl; | ||||
|    //   std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl; | ||||
|  } | ||||
|   | ||||
|  /////////////////////////////////////////////////////// | ||||
| @@ -159,7 +159,7 @@ namespace QCD { | ||||
|    uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16); | ||||
|    if ( scidac_csuma !=scidac_checksuma) return 0; | ||||
|    if ( scidac_csumb !=scidac_checksumb) return 0; | ||||
|     return 1; | ||||
|    return 1; | ||||
|  } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -224,7 +224,7 @@ class GridLimeReader : public BinaryIO { | ||||
|  | ||||
| 	assert(PayloadSize == file_bytes);// Must match or user error | ||||
|  | ||||
| 	off_t offset= ftell(File); | ||||
| 	uint64_t offset= ftello(File); | ||||
| 	//	std::cout << " ReadLatticeObject from offset "<<offset << std::endl; | ||||
| 	BinarySimpleMunger<sobj,sobj> munge; | ||||
| 	BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||
| @@ -237,7 +237,7 @@ class GridLimeReader : public BinaryIO { | ||||
| 	///////////////////////////////////////////// | ||||
| 	// Verify checksums | ||||
| 	///////////////////////////////////////////// | ||||
| 	scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); | ||||
| 	assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1); | ||||
| 	return; | ||||
|       } | ||||
|     } | ||||
| @@ -253,16 +253,13 @@ class GridLimeReader : public BinaryIO { | ||||
|     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  | ||||
|  | ||||
|       //      std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl; | ||||
|  | ||||
|       uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) | ||||
|  | ||||
|       if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) )  ) { | ||||
|  | ||||
| 	//	std::cout << GridLogMessage<< " readLimeObject matches ! " << record_name <<std::endl; | ||||
|  | ||||
| 	std::vector<char> xmlc(nbytes+1,'\0'); | ||||
| 	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);     | ||||
|  | ||||
| 	//	std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl; | ||||
|  | ||||
| 	XmlReader RD(&xmlc[0],""); | ||||
| @@ -332,7 +329,7 @@ class GridLimeWriter : public BinaryIO { | ||||
|     err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); | ||||
|     err=limeWriterCloseRecord(LimeW);                       assert(err>=0); | ||||
|     limeDestroyHeader(h); | ||||
|     //    std::cout << " File offset is now"<<ftell(File) << std::endl; | ||||
|     //    std::cout << " File offset is now"<<ftello(File) << std::endl; | ||||
|   } | ||||
|   //////////////////////////////////////////// | ||||
|   // Write a generic lattice field and csum | ||||
| @@ -349,7 +346,6 @@ class GridLimeWriter : public BinaryIO { | ||||
|     uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites; | ||||
|     createLimeRecordHeader(record_name, 0, 0, PayloadSize); | ||||
|  | ||||
|  | ||||
|     //    std::cout << "W sizeof(sobj)"      <<sizeof(sobj)<<std::endl; | ||||
|     //    std::cout << "W Gsites "           <<field._grid->_gsites<<std::endl; | ||||
|     //    std::cout << "W Payload expected " <<PayloadSize<<std::endl; | ||||
| @@ -361,18 +357,20 @@ class GridLimeWriter : public BinaryIO { | ||||
|     // These are both buffered, so why I think this code is right is as follows. | ||||
|     // | ||||
|     // i)  write record header to FILE *File, telegraphing the size.  | ||||
|     // ii) ftell reads the offset from FILE *File . | ||||
|     // ii) ftello reads the offset from FILE *File . | ||||
|     // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk. | ||||
|     //      Closes iostream and flushes. | ||||
|     // iv) fseek on FILE * to end of this disjoint section. | ||||
|     //  v) Continue writing scidac record. | ||||
|     //////////////////////////////////////////////////////////////////// | ||||
|     off_t offset = ftell(File); | ||||
|     uint64_t offset = ftello(File); | ||||
|     //    std::cout << " Writing to offset "<<offset << std::endl; | ||||
|     std::string format = getFormatString<vobj>(); | ||||
|     BinarySimpleMunger<sobj,sobj> munge; | ||||
|     BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||
|     //    fseek(File,0,SEEK_END);    offset = ftello(File);std::cout << " offset now "<<offset << std::endl; | ||||
|     err=limeWriterCloseRecord(LimeW);  assert(err>=0); | ||||
|  | ||||
|     //////////////////////////////////////// | ||||
|     // Write checksum element, propagaing forward from the BinaryIO | ||||
|     // Always pair a checksum with a binary object, and close message | ||||
| @@ -382,7 +380,7 @@ class GridLimeWriter : public BinaryIO { | ||||
|     std::stringstream streamb; streamb << std::hex << scidac_csumb; | ||||
|     checksum.suma= streama.str(); | ||||
|     checksum.sumb= streamb.str(); | ||||
|     std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl; | ||||
|     //    std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl; | ||||
|     writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM)); | ||||
|   } | ||||
| }; | ||||
| @@ -642,7 +640,7 @@ class IldgReader : public GridLimeReader { | ||||
| 	// Copy out the string | ||||
| 	std::vector<char> xmlc(nbytes+1,'\0'); | ||||
| 	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);     | ||||
| 	std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl; | ||||
| 	//	std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl; | ||||
|  | ||||
| 	////////////////////////////////// | ||||
| 	// ILDG format record | ||||
| @@ -686,7 +684,7 @@ class IldgReader : public GridLimeReader { | ||||
| 	  std::string xmls(&xmlc[0]); | ||||
| 	  // is it a USQCD info field | ||||
| 	  if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {  | ||||
| 	    std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl; | ||||
| 	    //	    std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl; | ||||
| 	    XmlReader RD(&xmlc[0],""); | ||||
| 	    read(RD,"usqcdInfo",usqcdInfo_); | ||||
| 	    found_usqcdInfo = 1; | ||||
| @@ -704,8 +702,7 @@ class IldgReader : public GridLimeReader { | ||||
| 	// Binary data | ||||
| 	///////////////////////////////// | ||||
| 	std::cout << GridLogMessage << "ILDG Binary record found : "  ILDG_BINARY_DATA << std::endl; | ||||
| 	off_t offset= ftell(File); | ||||
|  | ||||
| 	uint64_t offset= ftello(File); | ||||
| 	if ( format == std::string("IEEE64BIG") ) { | ||||
| 	  GaugeSimpleMunger<dobj, sobj> munge; | ||||
| 	  BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||
|   | ||||
| @@ -77,7 +77,6 @@ void CayleyFermion5D<Impl>::DminusDag(const FermionField &psi, FermionField &chi | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class Impl> void CayleyFermion5D<Impl>::CayleyReport(void) | ||||
| { | ||||
|   this->Report(); | ||||
| @@ -119,7 +118,6 @@ template<class Impl> void CayleyFermion5D<Impl>::CayleyZeroCounters(void) | ||||
|   MooeeInvTime=0; | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class Impl>   | ||||
| void CayleyFermion5D<Impl>::M5D   (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   | ||||
| @@ -61,10 +61,10 @@ namespace QCD { | ||||
|     } | ||||
|  | ||||
|     /*************************************************************** | ||||
|     /* Additional EOFA operators only called outside the inverter. | ||||
|     /* Since speed is not essential, simple axpby-style | ||||
|     /* implementations should be fine. | ||||
|     /***************************************************************/ | ||||
|      * Additional EOFA operators only called outside the inverter. | ||||
|      * Since speed is not essential, simple axpby-style | ||||
|      * implementations should be fine. | ||||
|      ***************************************************************/ | ||||
|     template<class Impl> | ||||
|     void DomainWallEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag) | ||||
|     { | ||||
| @@ -116,8 +116,8 @@ namespace QCD { | ||||
|     } | ||||
|  | ||||
|     /******************************************************************** | ||||
|     /* Performance critical fermion operators called inside the inverter | ||||
|     /********************************************************************/ | ||||
|      * Performance critical fermion operators called inside the inverter | ||||
|      ********************************************************************/ | ||||
|  | ||||
|     template<class Impl> | ||||
|     void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi) | ||||
|   | ||||
| @@ -77,11 +77,11 @@ namespace QCD { | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     /*************************************************************** | ||||
|     /* Additional EOFA operators only called outside the inverter. | ||||
|     /* Since speed is not essential, simple axpby-style | ||||
|     /* implementations should be fine. | ||||
|     /***************************************************************/ | ||||
|     /**************************************************************** | ||||
|      * Additional EOFA operators only called outside the inverter.   | ||||
|      * Since speed is not essential, simple axpby-style | ||||
|      * implementations should be fine. | ||||
|      ***************************************************************/ | ||||
|     template<class Impl> | ||||
|     void MobiusEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag) | ||||
|     { | ||||
| @@ -194,8 +194,8 @@ namespace QCD { | ||||
|     } | ||||
|  | ||||
|     /******************************************************************** | ||||
|     /* Performance critical fermion operators called inside the inverter | ||||
|     /********************************************************************/ | ||||
|      * Performance critical fermion operators called inside the inverter | ||||
|      ********************************************************************/ | ||||
|  | ||||
|     template<class Impl> | ||||
|     void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi) | ||||
|   | ||||
| @@ -231,7 +231,7 @@ class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy, | ||||
|     Field Pfg(U._grid); | ||||
|     Ufg = U; | ||||
|     Pfg = zero; | ||||
|     std::cout << GridLogMessage << "FG update " << fg_dt << " " << ep | ||||
|     std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep | ||||
|               << std::endl; | ||||
|     // prepare_fg; no prediction/result cache for now | ||||
|     // could relax CG stopping conditions for the | ||||
|   | ||||
| @@ -70,8 +70,8 @@ XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("") | ||||
|   pugi::xml_parse_result result; | ||||
|   result = doc_.load_string(xmlstring); | ||||
|   if ( !result ) { | ||||
|     cerr << "XML error description: " << result.description() << "\n"; | ||||
|     cerr << "XML error offset     : " << result.offset        << "\n"; | ||||
|     cerr << "XML error description (from char *): " << result.description() << "\nXML\n"<< xmlstring << "\n"; | ||||
|     cerr << "XML error offset      (from char *) " << result.offset         << "\nXML\n"<< xmlstring <<"\n"; | ||||
|     abort(); | ||||
|   } | ||||
|   if ( toplev == std::string("") ) { | ||||
| @@ -87,8 +87,8 @@ XmlReader::XmlReader(const string &fileName,string toplev) : fileName_(fileName) | ||||
|   pugi::xml_parse_result result; | ||||
|   result = doc_.load_file(fileName_.c_str()); | ||||
|   if ( !result ) { | ||||
|     cerr << "XML error description: " << result.description() << "\n"; | ||||
|     cerr << "XML error offset     : " << result.offset        << "\n"; | ||||
|     cerr << "XML error description: " << result.description() <<" "<< fileName_ <<"\n"; | ||||
|     cerr << "XML error offset     : " << result.offset        <<" "<< fileName_ <<"\n"; | ||||
|     abort(); | ||||
|   } | ||||
|   if ( toplev == std::string("") ) { | ||||
|   | ||||
| @@ -51,7 +51,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #define PARALLEL_CRITICAL | ||||
| #endif | ||||
|  | ||||
| #define parallel_region    PARALLEL_REGION | ||||
| #define parallel_for       PARALLEL_FOR_LOOP for | ||||
| #define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for | ||||
| #define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for | ||||
|  | ||||
| namespace Grid { | ||||
|   | ||||
| @@ -208,7 +208,7 @@ static int Grid_is_initialised = 0; | ||||
|  | ||||
| void Grid_init(int *argc,char ***argv) | ||||
| { | ||||
|   GridLogger::StopWatch.Start(); | ||||
|   GridLogger::GlobalStopWatch.Start(); | ||||
|  | ||||
|   std::string arg; | ||||
|  | ||||
| @@ -243,6 +243,12 @@ void Grid_init(int *argc,char ***argv) | ||||
|     fname<<CartesianCommunicator::RankWorld(); | ||||
|     fp=freopen(fname.str().c_str(),"w",stdout); | ||||
|     assert(fp!=(FILE *)NULL); | ||||
|  | ||||
|     std::ostringstream ename; | ||||
|     ename<<"Grid.stderr."; | ||||
|     ename<<CartesianCommunicator::RankWorld(); | ||||
|     fp=freopen(ename.str().c_str(),"w",stderr); | ||||
|     assert(fp!=(FILE *)NULL); | ||||
|   } | ||||
|  | ||||
|   //////////////////////////////////// | ||||
|   | ||||
| @@ -26,6 +26,25 @@ namespace Grid{ | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     static inline void IndexFromCoorReversed (const std::vector<int>& coor,int &index,const std::vector<int> &dims){ | ||||
|       int nd=dims.size(); | ||||
|       int stride=1; | ||||
|       index=0; | ||||
|       for(int d=nd-1;d>=0;d--){ | ||||
| 	index = index+stride*coor[d]; | ||||
| 	stride=stride*dims[d]; | ||||
|       } | ||||
|     } | ||||
|     static inline void CoorFromIndexReversed (std::vector<int>& coor,int index,const std::vector<int> &dims){ | ||||
|       int nd= dims.size(); | ||||
|       coor.resize(nd); | ||||
|       for(int d=nd-1;d>=0;d--){ | ||||
| 	coor[d] = index % dims[d]; | ||||
| 	index   = index / dims[d]; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|  | ||||
|   }; | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| SUBDIRS = . core forces hmc solver debug smearing IO | ||||
| SUBDIRS = . core forces hmc solver debug smearing IO lanczos | ||||
|  | ||||
| if BUILD_CHROMA_REGRESSION | ||||
|   SUBDIRS+= qdpxx | ||||
|   | ||||
| @@ -37,8 +37,15 @@ RealD InverseApproximation(RealD x){ | ||||
| RealD SqrtApproximation(RealD x){ | ||||
|   return std::sqrt(x); | ||||
| } | ||||
| RealD Approximation32(RealD x){ | ||||
|   return std::pow(x,-1.0/32.0); | ||||
| } | ||||
| RealD Approximation2(RealD x){ | ||||
|   return std::pow(x,-1.0/2.0); | ||||
| } | ||||
|  | ||||
| RealD StepFunction(RealD x){ | ||||
|   if ( x<0.1 )  return 1.0; | ||||
|   if ( x<10.0 )  return 1.0; | ||||
|   else return 0.0; | ||||
| } | ||||
|  | ||||
| @@ -56,7 +63,6 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   Chebyshev<LatticeFermion> ChebyInv(lo,hi,2000,InverseApproximation); | ||||
|  | ||||
|  | ||||
|   { | ||||
|     std::ofstream of("chebyinv"); | ||||
|     ChebyInv.csv(of); | ||||
| @@ -78,7 +84,6 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|  | ||||
|   ChebyStep.JacksonSmooth(); | ||||
|  | ||||
|   { | ||||
|     std::ofstream of("chebystepjack"); | ||||
|     ChebyStep.csv(of); | ||||
| @@ -100,5 +105,30 @@ int main (int argc, char ** argv) | ||||
|     ChebyNE.csv(of); | ||||
|   } | ||||
|  | ||||
|   lo=0.0; | ||||
|   hi=4.0; | ||||
|   Chebyshev<LatticeFermion> Cheby32(lo,hi,2000,Approximation32); | ||||
|   { | ||||
|     std::ofstream of("cheby32"); | ||||
|     Cheby32.csv(of); | ||||
|   } | ||||
|   Cheby32.JacksonSmooth(); | ||||
|   { | ||||
|     std::ofstream of("cheby32jack"); | ||||
|     Cheby32.csv(of); | ||||
|   } | ||||
|  | ||||
|   Chebyshev<LatticeFermion> ChebySqrt(lo,hi,2000,Approximation2); | ||||
|   { | ||||
|     std::ofstream of("chebysqrt"); | ||||
|     ChebySqrt.csv(of); | ||||
|   } | ||||
|   ChebySqrt.JacksonSmooth(); | ||||
|   { | ||||
|     std::ofstream of("chebysqrtjack"); | ||||
|     ChebySqrt.csv(of); | ||||
|   } | ||||
|  | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
|   | ||||
| @@ -38,11 +38,11 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   std::cout<<GridLogMessage << "Testing Remez"<<std::endl; | ||||
|  | ||||
|   double     lo=0.01; | ||||
|   double     hi=1.0; | ||||
|   double     lo=1.0e-3; | ||||
|   double     hi=5.0; | ||||
|   int precision=64; | ||||
|   int    degree=10; | ||||
|   AlgRemez remez(0.001,1.0,precision); | ||||
|   int    degree=16; | ||||
|   AlgRemez remez(lo,hi,precision); | ||||
|  | ||||
|   //////////////////////////////////////// | ||||
|   // sqrt and inverse sqrt | ||||
| @@ -50,21 +50,50 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/2)"<<std::endl; | ||||
|   remez.generateApprox(degree,1,2); | ||||
|   MultiShiftFunction Sqrt(remez,1.0,false); | ||||
|   MultiShiftFunction InvSqrt(remez,1.0,true); | ||||
|   MultiShiftFunction Root2(remez,1.0,false); | ||||
|   MultiShiftFunction InvRoot2(remez,1.0,true); | ||||
|  | ||||
|  | ||||
|   std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/4)"<<std::endl; | ||||
|   remez.generateApprox(degree,1,4); | ||||
|   MultiShiftFunction SqrtSqrt(remez,1.0,false); | ||||
|   MultiShiftFunction InvSqrtSqrt(remez,1.0,true); | ||||
|   MultiShiftFunction Root4(remez,1.0,false); | ||||
|   MultiShiftFunction InvRoot4(remez,1.0,true); | ||||
|  | ||||
|   std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/8)"<<std::endl; | ||||
|   remez.generateApprox(degree,1,8); | ||||
|   MultiShiftFunction Root8(remez,1.0,false); | ||||
|   MultiShiftFunction InvRoot8(remez,1.0,true); | ||||
|  | ||||
|   std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/16)"<<std::endl; | ||||
|   remez.generateApprox(degree,1,16); | ||||
|   MultiShiftFunction Root16(remez,1.0,false); | ||||
|   MultiShiftFunction InvRoot16(remez,1.0,true); | ||||
|  | ||||
|   std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/32)"<<std::endl; | ||||
|   remez.generateApprox(degree,1,32); | ||||
|   MultiShiftFunction Root32(remez,1.0,false); | ||||
|   MultiShiftFunction InvRoot32(remez,1.0,true); | ||||
|    | ||||
|   ofstream gnuplot(std::string("Sqrt.gnu"),std::ios::out|std::ios::trunc); | ||||
|   Sqrt.gnuplot(gnuplot); | ||||
|   ofstream gnuplot(std::string("Root2.gnu"),std::ios::out|std::ios::trunc); | ||||
|   Root2.gnuplot(gnuplot); | ||||
|  | ||||
|   ofstream gnuplot_i2(std::string("InvRoot2.gnu"),std::ios::out|std::ios::trunc); | ||||
|   InvRoot2.gnuplot(gnuplot_i2); | ||||
|  | ||||
|   ofstream gnuplot_i4(std::string("InvRoot4.gnu"),std::ios::out|std::ios::trunc); | ||||
|   InvRoot4.gnuplot(gnuplot_i4); | ||||
|  | ||||
|   ofstream gnuplot_i8(std::string("InvRoot8.gnu"),std::ios::out|std::ios::trunc); | ||||
|   InvRoot8.gnuplot(gnuplot_i8); | ||||
|  | ||||
|   ofstream gnuplot_i16(std::string("InvRoot16.gnu"),std::ios::out|std::ios::trunc); | ||||
|   InvRoot16.gnuplot(gnuplot_i16); | ||||
|  | ||||
|   ofstream gnuplot_i32(std::string("InvRoot32.gnu"),std::ios::out|std::ios::trunc); | ||||
|   InvRoot32.gnuplot(gnuplot_i32); | ||||
|  | ||||
|  | ||||
|  | ||||
|   ofstream gnuplot_inv(std::string("InvSqrt.gnu"),std::ios::out|std::ios::trunc); | ||||
|   InvSqrt.gnuplot(gnuplot); | ||||
|  | ||||
|   double x=0.6789; | ||||
|   double sx=std::sqrt(x); | ||||
| @@ -72,10 +101,10 @@ int main (int argc, char ** argv) | ||||
|   double isx=1.0/sx; | ||||
|   double issx=1.0/ssx; | ||||
|  | ||||
|   double asx  =Sqrt.approx(x); | ||||
|   double assx =SqrtSqrt.approx(x); | ||||
|   double aisx =InvSqrt.approx(x); | ||||
|   double aissx=InvSqrtSqrt.approx(x); | ||||
|   double asx  =Root2.approx(x); | ||||
|   double assx =Root4.approx(x); | ||||
|   double aisx =InvRoot2.approx(x); | ||||
|   double aissx=InvRoot4.approx(x); | ||||
|  | ||||
|   std::cout<<GridLogMessage << "x^(1/2) : "<<sx<<" "<<asx<<std::endl; | ||||
|   std::cout<<GridLogMessage << "x^(1/4) : "<<ssx<<" "<<assx<<std::endl; | ||||
|   | ||||
							
								
								
									
										143
									
								
								tests/lanczos/BlockProjector.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										143
									
								
								tests/lanczos/BlockProjector.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,143 @@ | ||||
| namespace Grid {  | ||||
|  | ||||
| /* | ||||
|   BlockProjector | ||||
|  | ||||
|   If _HP_BLOCK_PROJECTORS_ is defined, we assume that _evec is a basis that is not | ||||
|   fully orthonormalized (to the precision of the coarse field) and we allow for higher-precision | ||||
|   coarse field than basis field. | ||||
|  | ||||
| */ | ||||
| //#define _HP_BLOCK_PROJECTORS_ | ||||
|  | ||||
| template<typename Field> | ||||
| class BlockProjector { | ||||
| public: | ||||
|  | ||||
|   BasisFieldVector<Field>& _evec; | ||||
|   BlockedGrid<Field>& _bgrid; | ||||
|  | ||||
|   BlockProjector(BasisFieldVector<Field>& evec, BlockedGrid<Field>& bgrid) : _evec(evec), _bgrid(bgrid) { | ||||
|   } | ||||
|  | ||||
|   void createOrthonormalBasis(RealD thres = 0.0) { | ||||
|  | ||||
|     GridStopWatch sw; | ||||
|     sw.Start(); | ||||
|  | ||||
|     int cnt = 0; | ||||
|  | ||||
| #pragma omp parallel shared(cnt) | ||||
|     { | ||||
|       int lcnt = 0; | ||||
|  | ||||
| #pragma omp for | ||||
|       for (int b=0;b<_bgrid._o_blocks;b++) { | ||||
| 	 | ||||
| 	for (int i=0;i<_evec._Nm;i++) { | ||||
| 	   | ||||
| 	  auto nrm0 = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]); | ||||
| 	   | ||||
| 	  // |i> -= <j|i> |j> | ||||
| 	  for (int j=0;j<i;j++) { | ||||
| 	    _bgrid.block_caxpy(b,_evec._v[i],-_bgrid.block_sp(b,_evec._v[j],_evec._v[i]),_evec._v[j],_evec._v[i]); | ||||
| 	  } | ||||
| 	   | ||||
| 	  auto nrm = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]); | ||||
| 	   | ||||
| 	  auto eps = nrm/nrm0; | ||||
| 	  if (Reduce(eps).real() < thres) { | ||||
| 	    lcnt++; | ||||
| 	  } | ||||
| 	   | ||||
| 	  // TODO: if norm is too small, remove this eigenvector/mark as not needed; in practice: set it to zero norm here and return a mask | ||||
| 	  // that is then used later to decide not to write certain eigenvectors to disk (add a norm calculation before subtraction step and look at nrm/nrm0 < eps to decide) | ||||
| 	  _bgrid.block_cscale(b,1.0 / sqrt(nrm),_evec._v[i]); | ||||
| 	   | ||||
| 	} | ||||
| 	 | ||||
|       } | ||||
|  | ||||
| #pragma omp critical | ||||
|       { | ||||
| 	cnt += lcnt; | ||||
|       } | ||||
|     } | ||||
|     sw.Stop(); | ||||
|     std::cout << GridLogMessage << "Gram-Schmidt to create blocked basis took " << sw.Elapsed() << " (" << ((RealD)cnt / (RealD)_bgrid._o_blocks / (RealD)_evec._Nm)  | ||||
| 	      << " below threshold)" << std::endl; | ||||
|  | ||||
|   } | ||||
|  | ||||
|   template<typename CoarseField> | ||||
|   void coarseToFine(const CoarseField& in, Field& out) { | ||||
|  | ||||
|     out = zero; | ||||
|     out.checkerboard = _evec._v[0].checkerboard; | ||||
|  | ||||
|     int Nbasis = sizeof(in._odata[0]._internal._internal) / sizeof(in._odata[0]._internal._internal[0]); | ||||
|     assert(Nbasis == _evec._Nm); | ||||
|      | ||||
| #pragma omp parallel for | ||||
|     for (int b=0;b<_bgrid._o_blocks;b++) { | ||||
|       for (int j=0;j<_evec._Nm;j++) { | ||||
| 	_bgrid.block_caxpy(b,out,in._odata[b]._internal._internal[j],_evec._v[j],out); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|   } | ||||
|  | ||||
|   template<typename CoarseField> | ||||
|   void fineToCoarse(const Field& in, CoarseField& out) { | ||||
|  | ||||
|     out = zero; | ||||
|  | ||||
|     int Nbasis = sizeof(out._odata[0]._internal._internal) / sizeof(out._odata[0]._internal._internal[0]); | ||||
|     assert(Nbasis == _evec._Nm); | ||||
|  | ||||
|  | ||||
|     Field tmp(_bgrid._grid); | ||||
|     tmp = in; | ||||
|      | ||||
| #pragma omp parallel for | ||||
|     for (int b=0;b<_bgrid._o_blocks;b++) { | ||||
|       for (int j=0;j<_evec._Nm;j++) { | ||||
| 	// |rhs> -= <j|rhs> |j> | ||||
| 	auto c = _bgrid.block_sp(b,_evec._v[j],tmp); | ||||
| 	_bgrid.block_caxpy(b,tmp,-c,_evec._v[j],tmp); // may make this more numerically stable | ||||
| 	out._odata[b]._internal._internal[j] = c; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|   } | ||||
|  | ||||
|   template<typename CoarseField> | ||||
|     void deflateFine(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) { | ||||
|     result = zero; | ||||
|     for (int i=0;i<N;i++) { | ||||
|       Field tmp(result._grid); | ||||
|       coarseToFine(_coef._v[i],tmp); | ||||
|       axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template<typename CoarseField> | ||||
|     void deflateCoarse(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) { | ||||
|     CoarseField src_coarse(_coef._v[0]._grid); | ||||
|     CoarseField result_coarse = src_coarse; | ||||
|     result_coarse = zero; | ||||
|     fineToCoarse(src_orig,src_coarse); | ||||
|     for (int i=0;i<N;i++) { | ||||
|       axpy(result_coarse,TensorRemove(innerProduct(_coef._v[i],src_coarse)) / eval[i],_coef._v[i],result_coarse); | ||||
|     } | ||||
|     coarseToFine(result_coarse,result); | ||||
|   } | ||||
|  | ||||
|   template<typename CoarseField> | ||||
|     void deflate(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) { | ||||
|     // Deflation on coarse Grid is much faster, so use it by default.  Deflation on fine Grid is kept for legacy reasons for now. | ||||
|     deflateCoarse(_coef,eval,N,src_orig,result); | ||||
|   } | ||||
|  | ||||
| }; | ||||
| } | ||||
							
								
								
									
										401
									
								
								tests/lanczos/BlockedGrid.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										401
									
								
								tests/lanczos/BlockedGrid.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,401 @@ | ||||
| namespace Grid { | ||||
|  | ||||
| template<typename Field> | ||||
| class BlockedGrid { | ||||
| public: | ||||
|   GridBase* _grid; | ||||
|   typedef typename Field::scalar_type  Coeff_t; | ||||
|   typedef typename Field::vector_type vCoeff_t; | ||||
|    | ||||
|   std::vector<int> _bs; // block size | ||||
|   std::vector<int> _nb; // number of blocks | ||||
|   std::vector<int> _l;  // local dimensions irrespective of cb | ||||
|   std::vector<int> _l_cb;  // local dimensions of checkerboarded vector | ||||
|   std::vector<int> _l_cb_o;  // local dimensions of inner checkerboarded vector | ||||
|   std::vector<int> _bs_cb; // block size in checkerboarded vector | ||||
|   std::vector<int> _nb_o; // number of blocks of simd o-sites | ||||
|  | ||||
|   int _nd, _blocks, _cf_size, _cf_block_size, _cf_o_block_size, _o_blocks, _block_sites; | ||||
|    | ||||
|   BlockedGrid(GridBase* grid, const std::vector<int>& block_size) : | ||||
|     _grid(grid), _bs(block_size), _nd((int)_bs.size()),  | ||||
|       _nb(block_size), _l(block_size), _l_cb(block_size), _nb_o(block_size), | ||||
|       _l_cb_o(block_size), _bs_cb(block_size) { | ||||
|  | ||||
|     _blocks = 1; | ||||
|     _o_blocks = 1; | ||||
|     _l = grid->FullDimensions(); | ||||
|     _l_cb = grid->LocalDimensions(); | ||||
|     _l_cb_o = grid->_rdimensions; | ||||
|  | ||||
|     _cf_size = 1; | ||||
|     _block_sites = 1; | ||||
|     for (int i=0;i<_nd;i++) { | ||||
|       _l[i] /= grid->_processors[i]; | ||||
|  | ||||
|       assert(!(_l[i] % _bs[i])); // lattice must accommodate choice of blocksize | ||||
|  | ||||
|       int r = _l[i] / _l_cb[i]; | ||||
|       assert(!(_bs[i] % r)); // checkerboarding must accommodate choice of blocksize | ||||
|       _bs_cb[i] = _bs[i] / r; | ||||
|       _block_sites *= _bs_cb[i]; | ||||
|       _nb[i] = _l[i] / _bs[i]; | ||||
|       _nb_o[i] = _nb[i] / _grid->_simd_layout[i]; | ||||
|       if (_nb[i] % _grid->_simd_layout[i]) { // simd must accommodate choice of blocksize | ||||
| 	std::cout << GridLogMessage << "Problem: _nb[" << i << "] = " << _nb[i] << " _grid->_simd_layout[" << i << "] = " << _grid->_simd_layout[i] << std::endl; | ||||
| 	assert(0); | ||||
|       } | ||||
|       _blocks *= _nb[i]; | ||||
|       _o_blocks *= _nb_o[i]; | ||||
|       _cf_size *= _l[i]; | ||||
|     } | ||||
|  | ||||
|     _cf_size *= 12 / 2; | ||||
|     _cf_block_size = _cf_size / _blocks; | ||||
|     _cf_o_block_size = _cf_size / _o_blocks; | ||||
|  | ||||
|     std::cout << GridLogMessage << "BlockedGrid:" << std::endl; | ||||
|     std::cout << GridLogMessage << " _l     = " << _l << std::endl; | ||||
|     std::cout << GridLogMessage << " _l_cb     = " << _l_cb << std::endl; | ||||
|     std::cout << GridLogMessage << " _l_cb_o     = " << _l_cb_o << std::endl; | ||||
|     std::cout << GridLogMessage << " _bs    = " << _bs << std::endl; | ||||
|     std::cout << GridLogMessage << " _bs_cb    = " << _bs_cb << std::endl; | ||||
|  | ||||
|     std::cout << GridLogMessage << " _nb    = " << _nb << std::endl; | ||||
|     std::cout << GridLogMessage << " _nb_o    = " << _nb_o << std::endl; | ||||
|     std::cout << GridLogMessage << " _blocks = " << _blocks << std::endl; | ||||
|     std::cout << GridLogMessage << " _o_blocks = " << _o_blocks << std::endl; | ||||
|     std::cout << GridLogMessage << " sizeof(vCoeff_t) = " << sizeof(vCoeff_t) << std::endl; | ||||
|     std::cout << GridLogMessage << " _cf_size = " << _cf_size << std::endl; | ||||
|     std::cout << GridLogMessage << " _cf_block_size = " << _cf_block_size << std::endl; | ||||
|     std::cout << GridLogMessage << " _block_sites = " << _block_sites << std::endl; | ||||
|     std::cout << GridLogMessage << " _grid->oSites() = " << _grid->oSites() << std::endl; | ||||
|  | ||||
|     //    _grid->Barrier(); | ||||
|     //abort(); | ||||
|   } | ||||
|  | ||||
|     void block_to_coor(int b, std::vector<int>& x0) { | ||||
|  | ||||
|       std::vector<int> bcoor; | ||||
|       bcoor.resize(_nd); | ||||
|       x0.resize(_nd); | ||||
|       assert(b < _o_blocks); | ||||
|       Lexicographic::CoorFromIndex(bcoor,b,_nb_o); | ||||
|       int i; | ||||
|  | ||||
|       for (i=0;i<_nd;i++) { | ||||
| 	x0[i] = bcoor[i]*_bs_cb[i]; | ||||
|       } | ||||
|  | ||||
|       //std::cout << GridLogMessage << "Map block b -> " << x0 << std::endl; | ||||
|  | ||||
|     } | ||||
|  | ||||
|     void block_site_to_o_coor(const std::vector<int>& x0, std::vector<int>& coor, int i) { | ||||
|       Lexicographic::CoorFromIndex(coor,i,_bs_cb); | ||||
|       for (int j=0;j<_nd;j++) | ||||
| 	coor[j] += x0[j]; | ||||
|     } | ||||
|  | ||||
|     int block_site_to_o_site(const std::vector<int>& x0, int i) { | ||||
|       std::vector<int> coor;  coor.resize(_nd); | ||||
|       block_site_to_o_coor(x0,coor,i); | ||||
|       Lexicographic::IndexFromCoor(coor,i,_l_cb_o); | ||||
|       return i; | ||||
|     } | ||||
|  | ||||
|     vCoeff_t block_sp(int b, const Field& x, const Field& y) { | ||||
|  | ||||
|       std::vector<int> x0; | ||||
|       block_to_coor(b,x0); | ||||
|  | ||||
|       vCoeff_t ret = 0.0; | ||||
|       for (int i=0;i<_block_sites;i++) { // only odd sites | ||||
| 	int ss = block_site_to_o_site(x0,i); | ||||
| 	ret += TensorRemove(innerProduct(x._odata[ss],y._odata[ss])); | ||||
|       } | ||||
|  | ||||
|       return ret; | ||||
|  | ||||
|     } | ||||
|  | ||||
|     vCoeff_t block_sp(int b, const Field& x, const std::vector< ComplexD >& y) { | ||||
|  | ||||
|       std::vector<int> x0; | ||||
|       block_to_coor(b,x0); | ||||
|  | ||||
|       constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t); | ||||
|       int lsize = _cf_o_block_size / _block_sites; | ||||
|  | ||||
|       std::vector< ComplexD > ret(nsimd); | ||||
|       for (int i=0;i<nsimd;i++) | ||||
| 	ret[i] = 0.0; | ||||
|  | ||||
|       for (int i=0;i<_block_sites;i++) { // only odd sites | ||||
| 	int ss = block_site_to_o_site(x0,i); | ||||
|  | ||||
| 	int n = lsize / nsimd; | ||||
| 	for (int l=0;l<n;l++) { | ||||
| 	  for (int j=0;j<nsimd;j++) { | ||||
| 	    int t = lsize * i + l*nsimd + j; | ||||
|  | ||||
| 	    ret[j] += conjugate(((Coeff_t*)&x._odata[ss]._internal)[l*nsimd + j]) * y[t]; | ||||
| 	  } | ||||
| 	} | ||||
|       } | ||||
|  | ||||
|       vCoeff_t vret; | ||||
|       for (int i=0;i<nsimd;i++) | ||||
| 	((Coeff_t*)&vret)[i] = (Coeff_t)ret[i]; | ||||
|  | ||||
|       return vret; | ||||
|  | ||||
|     } | ||||
|  | ||||
|     template<class T> | ||||
|       void vcaxpy(iScalar<T>& r,const vCoeff_t& a,const iScalar<T>& x,const iScalar<T>& y) { | ||||
|       vcaxpy(r._internal,a,x._internal,y._internal); | ||||
|     } | ||||
|  | ||||
|     template<class T,int N> | ||||
|       void vcaxpy(iVector<T,N>& r,const vCoeff_t& a,const iVector<T,N>& x,const iVector<T,N>& y) { | ||||
|       for (int i=0;i<N;i++) | ||||
| 	vcaxpy(r._internal[i],a,x._internal[i],y._internal[i]); | ||||
|     } | ||||
|  | ||||
|     void vcaxpy(vCoeff_t& r,const vCoeff_t& a,const vCoeff_t& x,const vCoeff_t& y) { | ||||
|       r = a*x + y; | ||||
|     } | ||||
|  | ||||
|     void block_caxpy(int b, Field& ret, const vCoeff_t& a, const Field& x, const Field& y) { | ||||
|  | ||||
|       std::vector<int> x0; | ||||
|       block_to_coor(b,x0); | ||||
|  | ||||
|       for (int i=0;i<_block_sites;i++) { // only odd sites | ||||
| 	int ss = block_site_to_o_site(x0,i); | ||||
| 	vcaxpy(ret._odata[ss],a,x._odata[ss],y._odata[ss]); | ||||
|       } | ||||
|  | ||||
|     } | ||||
|  | ||||
|     void block_caxpy(int b, std::vector< ComplexD >& ret, const vCoeff_t& a, const Field& x, const std::vector< ComplexD >& y) { | ||||
|       std::vector<int> x0; | ||||
|       block_to_coor(b,x0); | ||||
|  | ||||
|       constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t); | ||||
|       int lsize = _cf_o_block_size / _block_sites; | ||||
|  | ||||
|       for (int i=0;i<_block_sites;i++) { // only odd sites | ||||
| 	int ss = block_site_to_o_site(x0,i); | ||||
|  | ||||
| 	int n = lsize / nsimd; | ||||
| 	for (int l=0;l<n;l++) { | ||||
| 	  vCoeff_t r = a* ((vCoeff_t*)&x._odata[ss]._internal)[l]; | ||||
|  | ||||
| 	  for (int j=0;j<nsimd;j++) { | ||||
| 	    int t = lsize * i + l*nsimd + j; | ||||
| 	    ret[t] = y[t] + ((Coeff_t*)&r)[j]; | ||||
| 	  } | ||||
| 	} | ||||
|       } | ||||
|  | ||||
|     } | ||||
|  | ||||
|     void block_set(int b, Field& ret, const std::vector< ComplexD >& x) { | ||||
|       std::vector<int> x0; | ||||
|       block_to_coor(b,x0); | ||||
|  | ||||
|       int lsize = _cf_o_block_size / _block_sites; | ||||
|  | ||||
|       for (int i=0;i<_block_sites;i++) { // only odd sites | ||||
| 	int ss = block_site_to_o_site(x0,i); | ||||
|  | ||||
| 	for (int l=0;l<lsize;l++) | ||||
| 	  ((Coeff_t*)&ret._odata[ss]._internal)[l] = (Coeff_t)x[lsize * i + l]; // convert precision | ||||
|       } | ||||
|  | ||||
|     } | ||||
|  | ||||
|     void block_get(int b, const Field& ret, std::vector< ComplexD >& x) { | ||||
|       std::vector<int> x0; | ||||
|       block_to_coor(b,x0); | ||||
|  | ||||
|       int lsize = _cf_o_block_size / _block_sites; | ||||
|  | ||||
|       for (int i=0;i<_block_sites;i++) { // only odd sites | ||||
| 	int ss = block_site_to_o_site(x0,i); | ||||
|  | ||||
| 	for (int l=0;l<lsize;l++) | ||||
| 	  x[lsize * i + l] = (ComplexD)((Coeff_t*)&ret._odata[ss]._internal)[l]; | ||||
|       } | ||||
|  | ||||
|     } | ||||
|  | ||||
|     template<class T> | ||||
|     void vcscale(iScalar<T>& r,const vCoeff_t& a,const iScalar<T>& x) { | ||||
|       vcscale(r._internal,a,x._internal); | ||||
|     } | ||||
|  | ||||
|     template<class T,int N> | ||||
|     void vcscale(iVector<T,N>& r,const vCoeff_t& a,const iVector<T,N>& x) { | ||||
|       for (int i=0;i<N;i++) | ||||
| 	vcscale(r._internal[i],a,x._internal[i]); | ||||
|     } | ||||
|  | ||||
|     void vcscale(vCoeff_t& r,const vCoeff_t& a,const vCoeff_t& x) { | ||||
|       r = a*x; | ||||
|     } | ||||
|  | ||||
|     void block_cscale(int b, const vCoeff_t& a, Field& ret) { | ||||
|  | ||||
|       std::vector<int> x0; | ||||
|       block_to_coor(b,x0); | ||||
|        | ||||
|       for (int i=0;i<_block_sites;i++) { // only odd sites | ||||
| 	int ss = block_site_to_o_site(x0,i); | ||||
| 	vcscale(ret._odata[ss],a,ret._odata[ss]); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     void getCanonicalBlockOffset(int cb, std::vector<int>& x0) { | ||||
|       const int ndim = 5; | ||||
|       assert(_nb.size() == ndim); | ||||
|       std::vector<int> _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] }; | ||||
|       std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; | ||||
|       x0.resize(ndim); | ||||
|  | ||||
|       assert(cb >= 0); | ||||
|       assert(cb < _nbc[0]*_nbc[1]*_nbc[2]*_nbc[3]*_nbc[4]); | ||||
|  | ||||
|       Lexicographic::CoorFromIndex(x0,cb,_nbc); | ||||
|       int i; | ||||
|  | ||||
|       for (i=0;i<ndim;i++) { | ||||
| 	x0[i] *= _bsc[i]; | ||||
|       } | ||||
|  | ||||
|       //if (cb < 2) | ||||
|       //	std::cout << GridLogMessage << "Map: " << cb << " To: " << x0 << std::endl; | ||||
|     } | ||||
|  | ||||
|     void pokeBlockOfVectorCanonical(int cb,Field& v,const std::vector<float>& buf) { | ||||
|       std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; | ||||
|       std::vector<int> ldim = v._grid->LocalDimensions(); | ||||
|       std::vector<int> cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] }; | ||||
|       const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4]; | ||||
|       // take canonical block cb of v and put it in canonical ordering in buf | ||||
|       std::vector<int> cx0; | ||||
|       getCanonicalBlockOffset(cb,cx0); | ||||
|  | ||||
| #pragma omp parallel | ||||
|       { | ||||
| 	std::vector<int> co0,cl0; | ||||
| 	co0=cx0; cl0=cx0; | ||||
|  | ||||
| #pragma omp for | ||||
| 	for (int i=0;i<_nbsc;i++) { | ||||
| 	  Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo | ||||
| 	  for (int j=0;j<(int)_bsc.size();j++) | ||||
| 	    cl0[j] = cx0[j] + co0[j]; | ||||
| 	   | ||||
| 	  std::vector<int> l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] }; | ||||
| 	  int oi = v._grid->oIndex(l0); | ||||
| 	  int ii = v._grid->iIndex(l0); | ||||
| 	  int lti = i; | ||||
|  | ||||
| 	  //if (cb < 2 && i<2) | ||||
| 	  //  std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl; | ||||
| 	   | ||||
| 	  for (int s=0;s<4;s++) | ||||
| 	    for (int c=0;c<3;c++) { | ||||
| 	      Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii]; | ||||
| 	      int ti = 12*lti + 3*s + c; | ||||
| 	      ld = Coeff_t(buf[2*ti+0], buf[2*ti+1]); | ||||
| 	    } | ||||
| 	} | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     void peekBlockOfVectorCanonical(int cb,const Field& v,std::vector<float>& buf) { | ||||
|       std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; | ||||
|       std::vector<int> ldim = v._grid->LocalDimensions(); | ||||
|       std::vector<int> cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] }; | ||||
|       const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4]; | ||||
|       // take canonical block cb of v and put it in canonical ordering in buf | ||||
|       std::vector<int> cx0; | ||||
|       getCanonicalBlockOffset(cb,cx0); | ||||
|  | ||||
|       buf.resize(_cf_block_size * 2); | ||||
|  | ||||
| #pragma omp parallel | ||||
|       { | ||||
| 	std::vector<int> co0,cl0; | ||||
| 	co0=cx0; cl0=cx0; | ||||
|  | ||||
| #pragma omp for | ||||
| 	for (int i=0;i<_nbsc;i++) { | ||||
| 	  Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo | ||||
| 	  for (int j=0;j<(int)_bsc.size();j++) | ||||
| 	    cl0[j] = cx0[j] + co0[j]; | ||||
| 	   | ||||
| 	  std::vector<int> l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] }; | ||||
| 	  int oi = v._grid->oIndex(l0); | ||||
| 	  int ii = v._grid->iIndex(l0); | ||||
| 	  int lti = i; | ||||
| 	   | ||||
| 	  //if (cb < 2 && i<2) | ||||
| 	  //  std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl; | ||||
|  | ||||
| 	  for (int s=0;s<4;s++) | ||||
| 	    for (int c=0;c<3;c++) { | ||||
| 	      Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii]; | ||||
| 	      int ti = 12*lti + 3*s + c; | ||||
| 	      buf[2*ti+0] = ld.real(); | ||||
| 	      buf[2*ti+1] = ld.imag(); | ||||
| 	    } | ||||
| 	} | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     int globalToLocalCanonicalBlock(int slot,const std::vector<int>& src_nodes,int nb) { | ||||
|       // processor coordinate | ||||
|       int _nd = (int)src_nodes.size(); | ||||
|       std::vector<int> _src_nodes = src_nodes; | ||||
|       std::vector<int> pco(_nd); | ||||
|       Lexicographic::CoorFromIndex(pco,slot,_src_nodes); | ||||
|       std::vector<int> cpco = { pco[1], pco[2], pco[3], pco[4], pco[0] }; | ||||
|  | ||||
|       // get local block | ||||
|       std::vector<int> _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] }; | ||||
|       assert(_nd == 5); | ||||
|       std::vector<int> c_src_local_blocks(_nd); | ||||
|       for (int i=0;i<_nd;i++) { | ||||
| 	assert(_grid->_fdimensions[i] % (src_nodes[i] * _bs[i]) == 0); | ||||
| 	c_src_local_blocks[(i+4) % 5] = _grid->_fdimensions[i] / src_nodes[i] / _bs[i]; | ||||
|       } | ||||
|       std::vector<int> cbcoor(_nd); // coordinate of block in slot in canonical form | ||||
|       Lexicographic::CoorFromIndex(cbcoor,nb,c_src_local_blocks); | ||||
|  | ||||
|       // cpco, cbcoor | ||||
|       std::vector<int> clbcoor(_nd); | ||||
|       for (int i=0;i<_nd;i++) { | ||||
| 	int cgcoor = cpco[i] * c_src_local_blocks[i] + cbcoor[i]; // global block coordinate | ||||
| 	int pcoor = cgcoor / _nbc[i]; // processor coordinate in my Grid | ||||
| 	int tpcoor = _grid->_processor_coor[(i+1)%5]; | ||||
| 	if (pcoor != tpcoor) | ||||
| 	  return -1; | ||||
| 	clbcoor[i] = cgcoor - tpcoor * _nbc[i]; // canonical local block coordinate for canonical dimension i | ||||
|       } | ||||
|  | ||||
|       int lnb; | ||||
|       Lexicographic::IndexFromCoor(clbcoor,lnb,_nbc); | ||||
|       //std::cout << "Mapped slot = " << slot << " nb = " << nb << " to " << lnb << std::endl; | ||||
|       return lnb; | ||||
|     } | ||||
|  | ||||
|  | ||||
|  }; | ||||
|  | ||||
| } | ||||
							
								
								
									
										81
									
								
								tests/lanczos/FieldBasisVector.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								tests/lanczos/FieldBasisVector.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,81 @@ | ||||
| namespace Grid {  | ||||
|  | ||||
| template<class Field> | ||||
| class BasisFieldVector { | ||||
|  public: | ||||
|   int _Nm; | ||||
|  | ||||
|   typedef typename Field::scalar_type Coeff_t; | ||||
|   typedef typename Field::vector_type vCoeff_t; | ||||
|   typedef typename Field::vector_object vobj; | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|  | ||||
|   std::vector<Field> _v; // _Nfull vectors | ||||
|  | ||||
|   void report(int n,GridBase* value) { | ||||
|  | ||||
|     std::cout << GridLogMessage << "BasisFieldVector allocated:\n"; | ||||
|     std::cout << GridLogMessage << " Delta N = " << n << "\n"; | ||||
|     std::cout << GridLogMessage << " Size of full vectors (size) = " <<  | ||||
|       ((double)n*sizeof(vobj)*value->oSites() / 1024./1024./1024.) << " GB\n"; | ||||
|     std::cout << GridLogMessage << " Size = " << _v.size() << " Capacity = " << _v.capacity() << std::endl; | ||||
|  | ||||
|     value->Barrier(); | ||||
|  | ||||
| #ifdef __linux | ||||
|     if (value->IsBoss()) { | ||||
|       system("cat /proc/meminfo"); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     value->Barrier(); | ||||
|  | ||||
|   } | ||||
|  | ||||
|   BasisFieldVector(int Nm,GridBase* value) : _Nm(Nm), _v(Nm,value) { | ||||
|     report(Nm,value); | ||||
|   } | ||||
|    | ||||
|   ~BasisFieldVector() { | ||||
|   } | ||||
|  | ||||
|   Field& operator[](int i) { | ||||
|     return _v[i]; | ||||
|   } | ||||
|  | ||||
|   void orthogonalize(Field& w, int k) { | ||||
|     basisOrthogonalize(_v,w,k); | ||||
|   } | ||||
|  | ||||
|   void rotate(Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) { | ||||
|     basisRotate(_v,Qt,j0,j1,k0,k1,Nm); | ||||
|   } | ||||
|  | ||||
|   size_t size() const { | ||||
|     return _Nm; | ||||
|   } | ||||
|  | ||||
|   void resize(int n) { | ||||
|     if (n > _Nm) | ||||
|       _v.reserve(n); | ||||
|      | ||||
|     _v.resize(n,_v[0]._grid); | ||||
|  | ||||
|     if (n < _Nm) | ||||
|       _v.shrink_to_fit(); | ||||
|  | ||||
|     report(n - _Nm,_v[0]._grid); | ||||
|  | ||||
|     _Nm = n; | ||||
|   } | ||||
|  | ||||
|   void sortInPlace(std::vector<RealD>& sort_vals, bool reverse) { | ||||
|     basisSortInPlace(_v,sort_vals,reverse); | ||||
|   } | ||||
|  | ||||
|   void deflate(const std::vector<RealD>& eval,const Field& src_orig,Field& result) { | ||||
|     basisDeflate(_v,eval,src_orig,result); | ||||
|   } | ||||
|  | ||||
|  };  | ||||
| } | ||||
							
								
								
									
										1085
									
								
								tests/lanczos/FieldVectorIO.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1085
									
								
								tests/lanczos/FieldVectorIO.h
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1
									
								
								tests/lanczos/Makefile.am
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								tests/lanczos/Makefile.am
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| include Make.inc | ||||
							
								
								
									
										136
									
								
								tests/lanczos/Params.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								tests/lanczos/Params.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,136 @@ | ||||
| /* | ||||
|   Params IO | ||||
|  | ||||
|   Author: Christoph Lehner | ||||
|   Date:   2017 | ||||
| */ | ||||
|  | ||||
| #define PADD(p,X) p.get(#X,X); | ||||
|  | ||||
| class Params { | ||||
|  protected: | ||||
|  | ||||
|   std::string trim(const std::string& sc) { | ||||
|     std::string s = sc; | ||||
|     s.erase(s.begin(), std::find_if(s.begin(), s.end(), | ||||
| 				    std::not1(std::ptr_fun<int, int>(std::isspace)))); | ||||
|     s.erase(std::find_if(s.rbegin(), s.rend(), | ||||
| 			 std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end()); | ||||
|     return s; | ||||
|   } | ||||
|  | ||||
|  public: | ||||
|  | ||||
|   std::map< std::string, std::string > lines; | ||||
|   std::string _fn; | ||||
|  | ||||
|  Params(const char* fn) : _fn(fn) { | ||||
|     FILE* f = fopen(fn,"rt"); | ||||
|     assert(f); | ||||
|     while (!feof(f)) { | ||||
|       char buf[4096]; | ||||
|       if (fgets(buf,sizeof(buf),f)) { | ||||
| 	if (buf[0] != '#' && buf[0] != '\r' && buf[0] != '\n') { | ||||
| 	  char* sep = strchr(buf,'='); | ||||
| 	  assert(sep); | ||||
| 	  *sep = '\0'; | ||||
| 	  lines[trim(buf)] = trim(sep+1); | ||||
| 	} | ||||
|       } | ||||
|     }       | ||||
|     fclose(f); | ||||
|   } | ||||
|  | ||||
|   ~Params() { | ||||
|   } | ||||
|  | ||||
|   std::string loghead() { | ||||
|     return _fn + ": "; | ||||
|   } | ||||
|  | ||||
|   bool has(const char* name) { | ||||
|     auto f = lines.find(name); | ||||
|     return (f != lines.end()); | ||||
|   } | ||||
|  | ||||
|   const std::string& get(const char* name) { | ||||
|     auto f = lines.find(name); | ||||
|     if (f == lines.end()) { | ||||
|       std::cout << Grid::GridLogMessage << loghead() << "Could not find value for " << name << std::endl; | ||||
|       abort(); | ||||
|     } | ||||
|     return f->second; | ||||
|   } | ||||
|  | ||||
|   void parse(std::string& s, const std::string& cval) { | ||||
|     std::stringstream trimmer; | ||||
|     trimmer << cval; | ||||
|     s.clear(); | ||||
|     trimmer >> s; | ||||
|   } | ||||
|  | ||||
|   void parse(int& i, const std::string& cval) { | ||||
|     assert(sscanf(cval.c_str(),"%d",&i)==1); | ||||
|   } | ||||
|  | ||||
|   void parse(long long& i, const std::string& cval) { | ||||
|     assert(sscanf(cval.c_str(),"%lld",&i)==1); | ||||
|   } | ||||
|  | ||||
|   void parse(double& f, const std::string& cval) { | ||||
|     assert(sscanf(cval.c_str(),"%lf",&f)==1); | ||||
|   } | ||||
|  | ||||
|   void parse(float& f, const std::string& cval) { | ||||
|     assert(sscanf(cval.c_str(),"%f",&f)==1); | ||||
|   } | ||||
|  | ||||
|   void parse(bool& b, const std::string& cval) { | ||||
|     std::string lcval = cval; | ||||
|     std::transform(lcval.begin(), lcval.end(), lcval.begin(), ::tolower); | ||||
|     if (lcval == "true" || lcval == "yes") { | ||||
|       b = true; | ||||
|     } else if (lcval == "false" || lcval == "no") { | ||||
|       b = false; | ||||
|     } else { | ||||
|       std::cout << "Invalid value for boolean: " << b << std::endl; | ||||
|       assert(0); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void parse(std::complex<double>& f, const std::string& cval) { | ||||
|     double r,i; | ||||
|     assert(sscanf(cval.c_str(),"%lf %lf",&r,&i)==2); | ||||
|     f = std::complex<double>(r,i); | ||||
|   } | ||||
|  | ||||
|   void parse(std::complex<float>& f, const std::string& cval) { | ||||
|     float r,i; | ||||
|     assert(sscanf(cval.c_str(),"%f %f",&r,&i)==2); | ||||
|     f = std::complex<float>(r,i); | ||||
|   } | ||||
|  | ||||
|   template<class T> | ||||
|     void get(const char* name, std::vector<T>& v) { | ||||
|     int i = 0; | ||||
|     v.resize(0); | ||||
|     while (true) { | ||||
|       char buf[4096]; | ||||
|       sprintf(buf,"%s[%d]",name,i++); | ||||
|       if (!has(buf)) | ||||
| 	break; | ||||
|       T val; | ||||
|       parse(val,get(buf)); | ||||
|       std::cout << Grid::GridLogMessage << loghead() << "Set " << buf << " to " << val << std::endl; | ||||
|       v.push_back(val); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template<class T> | ||||
|     void get(const char* name, T& f) { | ||||
|     parse(f,get(name)); | ||||
|     std::cout << Grid::GridLogMessage << loghead() << "Set " << name << " to " << f << std::endl; | ||||
|   } | ||||
|  | ||||
|    | ||||
| }; | ||||
							
								
								
									
										712
									
								
								tests/lanczos/Test_dwf_compressed_lanczos.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										712
									
								
								tests/lanczos/Test_dwf_compressed_lanczos.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,712 @@ | ||||
| /* | ||||
|   Authors: Christoph Lehner | ||||
|   Date: 2017 | ||||
|  | ||||
|   Multigrid Lanczos | ||||
|  | ||||
|  | ||||
|  | ||||
|   TODO: | ||||
|  | ||||
|   High priority: | ||||
|   - Explore filtering of starting vector again, should really work:  If cheby has 4 for low mode region and 1 for high mode, applying 15 iterations has 1e9 suppression | ||||
|     of high modes, which should create the desired invariant subspace already?  Missing something here???  Maybe dynamic range dangerous, i.e., could also kill interesting | ||||
|     eigenrange if not careful. | ||||
|  | ||||
|     Better: Use all Cheby up to order N in order to approximate a step function; try this!  Problem: width of step function.  Can kill eigenspace > 1e-3 and have < 1e-5 equal | ||||
|             to 1 | ||||
|  | ||||
|   Low priority: | ||||
|   - Given that I seem to need many restarts and high degree poly to create the base and this takes about 1 day, seriously consider a simple method to create a basis | ||||
|     (ortho krylov low poly); and then fix up lowest say 200 eigenvalues by 1 run with high-degree poly (600 could be enough) | ||||
| */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | ||||
| ///////////////////////////////////////////////////////////////////////////// | ||||
| // The following are now decoupled from the Lanczos and deal with grids. | ||||
| // Safe to replace functionality | ||||
| ///////////////////////////////////////////////////////////////////////////// | ||||
| #include "BlockedGrid.h" | ||||
| #include "FieldBasisVector.h" | ||||
| #include "BlockProjector.h" | ||||
| #include "FieldVectorIO.h" | ||||
| #include "Params.h" | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| bool read_evals(GridBase* _grid, char* fn, std::vector<RealD>& evals) { | ||||
|  | ||||
|   FILE* f = 0; | ||||
|   uint32_t status = 0; | ||||
|   if (_grid->IsBoss()) { | ||||
|     f = fopen(fn,"rt"); | ||||
|     status = f ? 1 : 0; | ||||
|   } | ||||
|   _grid->GlobalSum(status); | ||||
|  | ||||
|   if (!status) | ||||
|     return false; | ||||
|  | ||||
|   uint32_t N; | ||||
|   if (f) | ||||
|     assert(fscanf(f,"%d\n",&N)==1); | ||||
|   else | ||||
|     N = 0; | ||||
|   _grid->GlobalSum(N); | ||||
|  | ||||
|   std::cout << "Reading " << N << " eigenvalues" << std::endl; | ||||
|  | ||||
|   evals.resize(N); | ||||
|  | ||||
|   for (int i=0;i<N;i++) { | ||||
|     if (f) | ||||
|       assert(fscanf(f,"%lf",&evals[i])==1); | ||||
|     else | ||||
|       evals[i] = 0; | ||||
|   } | ||||
|  | ||||
|   _grid->GlobalSumVector(&evals[0],evals.size()); | ||||
|  | ||||
|   if (f) | ||||
|     fclose(f); | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| void write_evals(char* fn, std::vector<RealD>& evals) { | ||||
|   FILE* f = fopen(fn,"wt"); | ||||
|   assert(f); | ||||
|  | ||||
|   int N = (int)evals.size(); | ||||
|   fprintf(f,"%d\n",N); | ||||
|  | ||||
|   for (int i=0;i<N;i++) { | ||||
|     fprintf(f,"%.15E\n",evals[i]); | ||||
|   } | ||||
|  | ||||
|   fclose(f); | ||||
| } | ||||
|  | ||||
| void write_history(char* fn, std::vector<RealD>& hist) { | ||||
|   FILE* f = fopen(fn,"wt"); | ||||
|   assert(f); | ||||
|  | ||||
|   int N = (int)hist.size(); | ||||
|   for (int i=0;i<N;i++) { | ||||
|     fprintf(f,"%d %.15E\n",i,hist[i]); | ||||
|   } | ||||
|  | ||||
|   fclose(f); | ||||
| } | ||||
|  | ||||
|  | ||||
| template<typename Field> | ||||
| class CheckpointedLinearFunction : public LinearFunction<Field> { | ||||
| public: | ||||
|   LinearFunction<Field>& _op; | ||||
|   std::string _dir; | ||||
|   int _max_apply; | ||||
|   int _apply, _apply_actual; | ||||
|   GridBase* _grid; | ||||
|   FILE* _f; | ||||
|  | ||||
|   CheckpointedLinearFunction(GridBase* grid, LinearFunction<Field>& op, const char* dir,int max_apply) : _op(op), _dir(dir), _grid(grid), _f(0), | ||||
| 													 _max_apply(max_apply), _apply(0), _apply_actual(0) { | ||||
|  | ||||
|     FieldVectorIO::conditionalMkDir(dir); | ||||
|  | ||||
|     char fn[4096]; | ||||
|     sprintf(fn,"%s/ckpt_op.%4.4d",_dir.c_str(),_grid->ThisRank()); | ||||
|     printf("CheckpointLinearFunction:: file %s\n",fn); | ||||
|     _f = fopen(fn,"r+b"); | ||||
|     if (!_f) | ||||
|       _f = fopen(fn,"w+b"); | ||||
|     assert(_f); | ||||
|     fseek(_f,0,SEEK_CUR); | ||||
|  | ||||
|   } | ||||
|  | ||||
|   ~CheckpointedLinearFunction() { | ||||
|     if (_f) { | ||||
|       fclose(_f); | ||||
|       _f = 0; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   bool load_ckpt(const Field& in, Field& out) { | ||||
|  | ||||
|     off_t cur = ftello(_f); | ||||
|     fseeko(_f,0,SEEK_END); | ||||
|     if (cur == ftello(_f)) | ||||
|       return false; | ||||
|     fseeko(_f,cur,SEEK_SET); | ||||
|  | ||||
|     size_t sz = sizeof(out._odata[0]) * out._odata.size(); | ||||
|  | ||||
|     GridStopWatch gsw; | ||||
|     gsw.Start(); | ||||
|     uint32_t crc_exp; | ||||
|     assert(fread(&crc_exp,4,1,_f)==1); | ||||
|     assert(fread(&out._odata[0],sz,1,_f)==1); | ||||
|     assert(FieldVectorIO::crc32_threaded((unsigned char*)&out._odata[0],sz,0x0)==crc_exp); | ||||
|     gsw.Stop(); | ||||
|  | ||||
|     printf("CheckpointLinearFunction:: reading %lld\n",(long long)sz); | ||||
|     std::cout << GridLogMessage << "Loading " << ((RealD)sz/1024./1024./1024.) << " GB in " << gsw.Elapsed() << std::endl; | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   void save_ckpt(const Field& in, Field& out) { | ||||
|  | ||||
|     fseek(_f,0,SEEK_CUR); // switch to write | ||||
|  | ||||
|     size_t sz = sizeof(out._odata[0]) * out._odata.size(); | ||||
|  | ||||
|     GridStopWatch gsw; | ||||
|     gsw.Start(); | ||||
|     uint32_t crc = FieldVectorIO::crc32_threaded((unsigned char*)&out._odata[0],sz,0x0); | ||||
|     assert(fwrite(&crc,4,1,_f)==1); | ||||
|     assert(fwrite(&out._odata[0],sz,1,_f)==1); | ||||
|     fflush(_f); // try this on the GPFS to suppress OPA usage for disk during dslash; this is not needed at Lustre/JLAB | ||||
|     gsw.Stop(); | ||||
|  | ||||
|     printf("CheckpointLinearFunction:: writing %lld\n",(long long)sz); | ||||
|     std::cout << GridLogMessage << "Saving " << ((RealD)sz/1024./1024./1024.) << " GB in " << gsw.Elapsed() << std::endl; | ||||
|   } | ||||
|  | ||||
|   void operator()(const Field& in, Field& out) { | ||||
|  | ||||
|     _apply++; | ||||
|  | ||||
|     if (load_ckpt(in,out)) | ||||
|       return; | ||||
|  | ||||
|     _op(in,out); | ||||
|      | ||||
|     save_ckpt(in,out); | ||||
|  | ||||
|     if (_apply_actual++ >= _max_apply) { | ||||
|       std::cout << GridLogMessage << "Maximum application of operator reached, checkpoint and finish in future job" << std::endl; | ||||
|       if (_f) { fclose(_f); _f=0; } | ||||
|       in._grid->Barrier(); | ||||
|       Grid_finalize(); | ||||
|       exit(3); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename CoarseField,typename Field> | ||||
| class ProjectedFunctionHermOp : public LinearFunction<CoarseField> { | ||||
| public: | ||||
|   OperatorFunction<Field>   & _poly; | ||||
|   LinearOperatorBase<Field> &_Linop; | ||||
|   BlockProjector<Field>& _pr; | ||||
|  | ||||
|   ProjectedFunctionHermOp(BlockProjector<Field>& pr,OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop) : _poly(poly), _Linop(linop), _pr(pr) { | ||||
|   } | ||||
|  | ||||
|   void operator()(const CoarseField& in, CoarseField& out) { | ||||
|     assert(_pr._bgrid._o_blocks == in._grid->oSites()); | ||||
|  | ||||
|     Field fin(_pr._bgrid._grid); | ||||
|     Field fout(_pr._bgrid._grid); | ||||
|  | ||||
|     GridStopWatch gsw1,gsw2,gsw3; | ||||
|     // fill fin | ||||
|     gsw1.Start(); | ||||
|     _pr.coarseToFine(in,fin); | ||||
|     gsw1.Stop(); | ||||
|  | ||||
|     // apply poly | ||||
|     gsw2.Start(); | ||||
|     _poly(_Linop,fin,fout); | ||||
|     gsw2.Stop(); | ||||
|  | ||||
|     // fill out | ||||
|     gsw3.Start(); | ||||
|     _pr.fineToCoarse(fout,out); | ||||
|     gsw3.Stop(); | ||||
|  | ||||
|     auto eps = innerProduct(in,out); | ||||
|     std::cout << GridLogMessage << "Operator timing details: c2f = " << gsw1.Elapsed() << " poly = " << gsw2.Elapsed() << " f2c = " << gsw3.Elapsed() <<  | ||||
|       "   Complimentary Hermiticity check: " << eps.imag() / std::abs(eps) << std::endl; | ||||
|  | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename CoarseField,typename Field> | ||||
| class ProjectedHermOp : public LinearFunction<CoarseField> { | ||||
| public: | ||||
|   LinearOperatorBase<Field> &_Linop; | ||||
|   BlockProjector<Field>& _pr; | ||||
|  | ||||
|   ProjectedHermOp(BlockProjector<Field>& pr,LinearOperatorBase<Field>& linop) : _Linop(linop), _pr(pr) { | ||||
|   } | ||||
|  | ||||
|   void operator()(const CoarseField& in, CoarseField& out) { | ||||
|     assert(_pr._bgrid._o_blocks == in._grid->oSites()); | ||||
|     Field fin(_pr._bgrid._grid); | ||||
|     Field fout(_pr._bgrid._grid); | ||||
|     _pr.coarseToFine(in,fin); | ||||
|     _Linop.HermOp(fin,fout); | ||||
|     _pr.fineToCoarse(fout,out); | ||||
|  | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename vtype, int N > using CoarseSiteFieldGeneral = iScalar< iVector<vtype, N> >; | ||||
| template<int N> using CoarseSiteFieldD = CoarseSiteFieldGeneral< vComplexD, N >; | ||||
| template<int N> using CoarseSiteFieldF = CoarseSiteFieldGeneral< vComplexF, N >; | ||||
| template<int N> using CoarseSiteField  = CoarseSiteFieldGeneral< vComplex,  N >; | ||||
| template<int N> using CoarseLatticeFermion  = Lattice< CoarseSiteField<N> >; | ||||
| template<int N> using CoarseLatticeFermionD = Lattice< CoarseSiteFieldD<N> >; | ||||
|  | ||||
| template<typename Field,int Nstop1> | ||||
| void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npoly2, | ||||
| 		       int Nstop2,int Nk2,int Nm2,RealD resid2,RealD betastp2,int MaxIt,int MinRes2, | ||||
| 		       LinearOperatorBase<Field>& HermOp, std::vector<RealD>& eval1, bool cg_test_enabled,  | ||||
| 		       int cg_test_maxiter,int nsingle,int SkipTest2, int MaxApply2,bool smoothed_eval_enabled, | ||||
| 		       int smoothed_eval_inner,int smoothed_eval_outer,int smoothed_eval_begin, | ||||
| 		       int smoothed_eval_end,RealD smoothed_eval_inner_resid) { | ||||
|  | ||||
|   BlockedGrid<Field>& bgrid = pr._bgrid; | ||||
|   BasisFieldVector<Field>& basis = pr._evec; | ||||
|  | ||||
|  | ||||
|   std::vector<int> coarseFourDimLatt; | ||||
|   for (int i=0;i<4;i++) | ||||
|     coarseFourDimLatt.push_back(bgrid._nb[1+i] * bgrid._grid->_processors[1+i]); | ||||
|   assert(bgrid._grid->_processors[0] == 1); | ||||
|  | ||||
|   std::cout << GridLogMessage << "CoarseGrid = " << coarseFourDimLatt << " with basis = " << Nstop1 << std::endl; | ||||
|   GridCartesian         * UCoarseGrid   = SpaceTimeGrid::makeFourDimGrid(coarseFourDimLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridCartesian         * FCoarseGrid   = SpaceTimeGrid::makeFiveDimGrid(bgrid._nb[0],UCoarseGrid); | ||||
|  | ||||
|   Chebyshev<Field> Cheb2(alpha2,beta,Npoly2); | ||||
|   CoarseLatticeFermion<Nstop1> src_coarse(FCoarseGrid); | ||||
|  | ||||
|   // Second round of Lanczos in blocked space | ||||
|   std::vector<RealD>         eval2(Nm2); | ||||
|   std::vector<RealD>         eval3(Nm2); | ||||
|   BasisFieldVector<CoarseLatticeFermion<Nstop1> > coef(Nm2,FCoarseGrid); | ||||
|  | ||||
|   ProjectedFunctionHermOp<CoarseLatticeFermion<Nstop1>,LatticeFermion> Op2plain(pr,Cheb2,HermOp); | ||||
|   CheckpointedLinearFunction<CoarseLatticeFermion<Nstop1> > Op2ckpt(src_coarse._grid,Op2plain,"checkpoint",MaxApply2); | ||||
|   LinearFunction< CoarseLatticeFermion<Nstop1> >* Op2; | ||||
|   if (MaxApply2) { | ||||
|     Op2 = &Op2ckpt; | ||||
|   } else { | ||||
|     Op2 = &Op2plain; | ||||
|   } | ||||
|   ProjectedHermOp<CoarseLatticeFermion<Nstop1>,LatticeFermion> Op2nopoly(pr,HermOp); | ||||
|   ImplicitlyRestartedLanczos<CoarseLatticeFermion<Nstop1> > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,MaxIt,betastp2,MinRes2); | ||||
|  | ||||
|  | ||||
|   src_coarse = 1.0; | ||||
|    | ||||
|   // Precision test | ||||
|   { | ||||
|     Field tmp(bgrid._grid); | ||||
|     CoarseLatticeFermion<Nstop1> tmp2(FCoarseGrid); | ||||
|     CoarseLatticeFermion<Nstop1> tmp3(FCoarseGrid); | ||||
|     tmp2 = 1.0; | ||||
|     tmp3 = 1.0; | ||||
|  | ||||
|     pr.coarseToFine(tmp2,tmp); | ||||
|     pr.fineToCoarse(tmp,tmp2); | ||||
|  | ||||
|     tmp2 -= tmp3; | ||||
|     std::cout << GridLogMessage << "Precision Test c->f->c: " << norm2(tmp2) / norm2(tmp3) << std::endl; | ||||
|  | ||||
|     //bgrid._grid->Barrier(); | ||||
|     //return; | ||||
|   } | ||||
|  | ||||
|   int Nconv; | ||||
|   if (!FieldVectorIO::read_compressed_vectors("lanczos.output",pr,coef) || | ||||
|       !read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt",eval3) || | ||||
|       !read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt.linear",eval1) || | ||||
|       !read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt.poly",eval2) | ||||
|       ) { | ||||
|      | ||||
|  | ||||
|     IRL2.calc(eval2,coef._v,src_coarse,Nconv,true); | ||||
|  | ||||
|     coef.resize(Nstop2); | ||||
|     eval2.resize(Nstop2); | ||||
|     eval3.resize(Nstop2); | ||||
|  | ||||
|     std::vector<Field> step3_cache; | ||||
|  | ||||
|     // reconstruct eigenvalues of original operator | ||||
|     for (int i=0;i<Nstop2;i++){ | ||||
|       RealD eval2_linear; | ||||
|  | ||||
|       if (i<Nstop1) { | ||||
| 	eval2_linear = eval1[i]; | ||||
|       } else { | ||||
| 	eval2_linear = eval2[i-1]; | ||||
|       } | ||||
|  | ||||
|       RealD eval2_poly = eval2[i]; | ||||
|       RealD eval_reconstruct = Cheb2.approxInv(eval2_poly,eval2_linear,100,1e-10); | ||||
|       std::cout << i << " Reconstructed eval = " << eval_reconstruct << " from quess " << eval2_linear << std::endl; | ||||
|       eval2[i] = eval_reconstruct; | ||||
|     } | ||||
|      | ||||
|     // as demonstrated in CG test below, best result from mixed determination | ||||
|     for (int i=0;i<Nstop2;i++) | ||||
|       eval3[i] = (i < Nstop1) ? eval1[i] : eval2[i]; | ||||
|      | ||||
|     for(int i=0;i<Nstop2;i++){ | ||||
|       std::cout << i<<" / "<< Nstop2<< " eigenvalue "<< eval3[i] <<std::endl; | ||||
|     }; | ||||
|      | ||||
|     // write | ||||
|     mkdir("lanczos.output",ACCESSPERMS); | ||||
|     FieldVectorIO::write_compressed_vectors("lanczos.output",pr,coef,nsingle); | ||||
|     if (bgrid._grid->IsBoss()) { | ||||
|       write_evals((char *)"lanczos.output/eigen-values.txt",eval3); | ||||
|       write_evals((char *)"lanczos.output/eigen-values.txt.linear",eval1); | ||||
|       write_evals((char *)"lanczos.output/eigen-values.txt.poly",eval2); | ||||
|     } | ||||
|  | ||||
|   } | ||||
|  | ||||
|   // fix up eigenvalues | ||||
|   if (!read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt.smoothed",eval3) && smoothed_eval_enabled) { | ||||
|  | ||||
|     ConjugateGradient<LatticeFermion> CG(smoothed_eval_inner_resid, smoothed_eval_inner, false); | ||||
|  | ||||
|     LatticeFermion v_i(basis[0]._grid); | ||||
|     auto tmp = v_i; | ||||
|     auto tmp2 = v_i; | ||||
|  | ||||
|     for (int i=smoothed_eval_begin;i<smoothed_eval_end;i++) { | ||||
|  | ||||
|       GridStopWatch gsw; | ||||
|  | ||||
|       gsw.Start(); | ||||
|  | ||||
|       pr.coarseToFine(coef[i],v_i); | ||||
|       v_i.checkerboard = Odd; | ||||
|        | ||||
|       for (int j=0;j<smoothed_eval_outer;j++) { | ||||
| 	tmp=zero; | ||||
| 	//pr.deflate(coef,eval3,Nstop2,v_i,tmp); | ||||
| 	CG(HermOp, v_i, tmp); | ||||
|  | ||||
| 	v_i = 1.0 / ::sqrt( norm2(tmp) ) * tmp; | ||||
|       } | ||||
|  | ||||
|       tmp = v_i; | ||||
|  | ||||
|       HermOp.HermOp(tmp,tmp2); | ||||
|  | ||||
|       RealD ev = innerProduct(tmp,tmp2).real(); | ||||
|  | ||||
|       gsw.Stop(); | ||||
|  | ||||
|       std::cout << GridLogMessage << "Smoothed eigenvalue " << i << " from " << eval3[i] << " to " << ev << " in " << gsw.Elapsed() << std::endl; | ||||
|       //	" with effective smoother precision " << (CG.ResHistory.back() / CG.ResHistory.front() ) << std::endl; | ||||
|       //      CG.ResHistory.clear(); | ||||
|  | ||||
|       eval3[i] = ev; | ||||
|     } | ||||
|  | ||||
|     if (bgrid._grid->IsBoss()) { | ||||
|       write_evals((char *)"lanczos.output/eigen-values.txt.smoothed",eval3); | ||||
|       write_evals((char *)"lanczos.output/eigen-values.txt",eval3); // also reset this to the best ones we have available | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // do CG test with and without deflation | ||||
|   if (cg_test_enabled) { | ||||
|     ConjugateGradient<LatticeFermion> CG(1.0e-8, cg_test_maxiter, false); | ||||
|     LatticeFermion src_orig(bgrid._grid); | ||||
|     src_orig.checkerboard = Odd; | ||||
|     src_orig = 1.0; | ||||
|     src_orig = src_orig * (1.0 / ::sqrt(norm2(src_orig)) ); | ||||
|     auto result = src_orig;  | ||||
|  | ||||
|     // undeflated solve | ||||
|     std::cout << GridLogMessage << " Undeflated solve "<<std::endl; | ||||
|     result = zero; | ||||
|     CG(HermOp, src_orig, result); | ||||
|     //    if (UCoarseGrid->IsBoss()) | ||||
|     //      write_history("cg_test.undefl",CG.ResHistory); | ||||
|     //    CG.ResHistory.clear(); | ||||
|  | ||||
|     // deflated solve with all eigenvectors | ||||
|     std::cout << GridLogMessage << " Deflated solve with all evectors"<<std::endl; | ||||
|     result = zero; | ||||
|     pr.deflate(coef,eval2,Nstop2,src_orig,result); | ||||
|     CG(HermOp, src_orig, result); | ||||
|     //    if (UCoarseGrid->IsBoss()) | ||||
|     //      write_history("cg_test.defl_all",CG.ResHistory); | ||||
|     //    CG.ResHistory.clear(); | ||||
|  | ||||
|     // deflated solve with non-blocked eigenvectors | ||||
|     std::cout << GridLogMessage << " Deflated solve with non-blocked evectors"<<std::endl; | ||||
|     result = zero; | ||||
|     pr.deflate(coef,eval1,Nstop1,src_orig,result); | ||||
|     CG(HermOp, src_orig, result); | ||||
|     //    if (UCoarseGrid->IsBoss()) | ||||
|     //      write_history("cg_test.defl_full",CG.ResHistory); | ||||
|     //    CG.ResHistory.clear(); | ||||
|  | ||||
|     // deflated solve with all eigenvectors and original eigenvalues from proj | ||||
|     std::cout << GridLogMessage << " Deflated solve with all eigenvectors and original eigenvalues from proj"<<std::endl; | ||||
|     result = zero; | ||||
|     pr.deflate(coef,eval3,Nstop2,src_orig,result); | ||||
|     CG(HermOp, src_orig, result); | ||||
|     //    if (UCoarseGrid->IsBoss()) | ||||
|     //      write_history("cg_test.defl_all_ev3",CG.ResHistory); | ||||
|     //    CG.ResHistory.clear(); | ||||
|  | ||||
|   } | ||||
|    | ||||
| } | ||||
|  | ||||
|  | ||||
| template<typename Field> | ||||
| void quick_krylov_basis(BasisFieldVector<Field>& evec,Field& src,LinearFunction<Field>& Op,int Nstop) { | ||||
|   Field tmp = src; | ||||
|   Field tmp2 = tmp; | ||||
|  | ||||
|   for (int i=0;i<Nstop;i++) { | ||||
|     GridStopWatch gsw; | ||||
|     gsw.Start(); | ||||
|     Op(tmp,tmp2); | ||||
|     gsw.Stop(); | ||||
|     evec.orthogonalize(tmp2,i); | ||||
|  | ||||
|     RealD nn = norm2(tmp2); | ||||
|     nn = Grid::sqrt(nn); | ||||
|     tmp2 = tmp2 * (1.0/nn); | ||||
|  | ||||
|     evec[i] = tmp2; | ||||
|     tmp = tmp2; | ||||
|     std::cout << GridLogMessage << "Quick_krylov_basis: " << i << "/" << Nstop << " timing of operator=" << gsw.Elapsed() << std::endl; | ||||
|   } | ||||
|  | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| int main (int argc, char ** argv) { | ||||
|  | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   const int MaxIt = 10000; | ||||
|  | ||||
|   int Ls; | ||||
|   RealD mass; | ||||
|   RealD M5; | ||||
|   std::vector < std::complex<double>  > omega; | ||||
|    | ||||
|   RealD alpha1, alpha2, beta; | ||||
|   int Npoly1, Npoly2; | ||||
|   int Nstop1, Nstop2; | ||||
|   int Nk1, Nk2; | ||||
|   int Np1, Np2; | ||||
|   int MinRes1, MinRes2; | ||||
|   int SkipTest2, MaxApply2; | ||||
|   bool checkpoint_basis; | ||||
|   bool cg_test_enabled; | ||||
|   bool exit_after_basis_calculation; | ||||
|   bool simple_krylov_basis; | ||||
|   int cg_test_maxiter; | ||||
|   int nsingle; // store in single precision, the rest in FP16 | ||||
|   int max_cheb_time_ms; | ||||
|   bool smoothed_eval_enabled; | ||||
|   int smoothed_eval_inner; | ||||
|   int smoothed_eval_outer; | ||||
|   int smoothed_eval_begin; | ||||
|   int smoothed_eval_end; | ||||
|   RealD smoothed_eval_inner_resid; | ||||
|  | ||||
|   // vector representation | ||||
|   std::vector<int> block_size; // 5d block size | ||||
|  | ||||
|   RealD resid1, resid2, betastp1, betastp2, basis_norm_threshold; | ||||
|  | ||||
|   std::string config; | ||||
|    | ||||
|   Params jp("params.txt"); | ||||
|   PADD(jp,Npoly1); PADD(jp,Npoly2); | ||||
|   PADD(jp,max_cheb_time_ms); | ||||
|   PADD(jp,Nstop1); PADD(jp,Nstop2); PADD(jp,MaxApply2); | ||||
|   PADD(jp,Nk1); PADD(jp,Nk2); PADD(jp,betastp1); PADD(jp,betastp2); | ||||
|   PADD(jp,Np1); PADD(jp,Np2); basis_norm_threshold = 1e-5; //PADD(jp,basis_norm_threshold); | ||||
|   PADD(jp,block_size); PADD(jp,smoothed_eval_enabled); PADD(jp,smoothed_eval_inner); | ||||
|   PADD(jp,resid1); PADD(jp,resid2); PADD(jp,smoothed_eval_outer); | ||||
|   PADD(jp,alpha1); PADD(jp,alpha2); PADD(jp,smoothed_eval_begin); | ||||
|   PADD(jp,MinRes1); PADD(jp,MinRes2); PADD(jp,smoothed_eval_end); | ||||
|   PADD(jp,beta); PADD(jp,mass); PADD(jp,smoothed_eval_inner_resid); | ||||
|   PADD(jp,omega); PADD(jp,config);  | ||||
|   PADD(jp,M5); PADD(jp,cg_test_enabled); | ||||
|   PADD(jp,cg_test_maxiter); PADD(jp,checkpoint_basis); | ||||
|   PADD(jp,nsingle); PADD(jp,exit_after_basis_calculation); | ||||
|   PADD(jp,simple_krylov_basis); PADD(jp,SkipTest2); | ||||
|  | ||||
|   Ls = (int)omega.size(); | ||||
|  | ||||
|   // Grids | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridCartesian         * UGridHP = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridRedBlackCartesian * UrbGridHP = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridHP); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridCartesian         * FGridHP   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridHP); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGridHP = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridHP); | ||||
|  | ||||
|   // Gauge field | ||||
|   LatticeGaugeField Umu(UGrid); | ||||
|   FieldMetaData header; | ||||
|   NerscIO::readConfiguration(Umu,header,config); | ||||
|   std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() | ||||
|             << "   Ls: " << Ls << std::endl; | ||||
|  | ||||
|   // ZMobius EO Operator | ||||
|   ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omega,1.,0.); | ||||
|   SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf); | ||||
|  | ||||
|   // Eigenvector storage | ||||
|   const int Nm1 = Np1 + Nk1; | ||||
|   const int Nm2 = Np2 + Nk2; // maximum number of vectors we need to keep | ||||
|   std::cout << GridLogMessage << "Keep " << Nm1 << " full vectors" << std::endl; | ||||
|   std::cout << GridLogMessage << "Keep " << Nm2 << " total vectors" << std::endl; | ||||
|   assert(Nm2 >= Nm1); | ||||
|   BasisFieldVector<LatticeFermion> evec(Nm1,FrbGrid); // start off with keeping full vectors | ||||
|  | ||||
|   // First and second cheby | ||||
|   Chebyshev<LatticeFermion> Cheb1(alpha1,beta,Npoly1); | ||||
|   FunctionHermOp<LatticeFermion> Op1(Cheb1,HermOp); | ||||
|   PlainHermOp<LatticeFermion> Op1test(HermOp); | ||||
|  | ||||
|   // Eigenvalue storage | ||||
|   std::vector<RealD>          eval1(evec.size()); | ||||
|  | ||||
|   // Construct source vector | ||||
|   LatticeFermion    src(FrbGrid); | ||||
|   { | ||||
|     src=1.0; | ||||
|     src.checkerboard = Odd; | ||||
|  | ||||
|     // normalize | ||||
|     RealD nn = norm2(src); | ||||
|     nn = Grid::sqrt(nn); | ||||
|     src = src * (1.0/nn); | ||||
|   } | ||||
|  | ||||
|   // Do a benchmark and a quick exit if performance is too little (ugly but needed due to performance fluctuations) | ||||
|   if (max_cheb_time_ms) { | ||||
|     // one round of warmup | ||||
|     auto tmp = src; | ||||
|     GridStopWatch gsw1,gsw2; | ||||
|     gsw1.Start(); | ||||
|     Cheb1(HermOp,src,tmp); | ||||
|     gsw1.Stop(); | ||||
|     Ddwf.ZeroCounters(); | ||||
|     gsw2.Start(); | ||||
|     Cheb1(HermOp,src,tmp); | ||||
|     gsw2.Stop(); | ||||
|     Ddwf.Report(); | ||||
|     std::cout << GridLogMessage << "Performance check; warmup = " << gsw1.Elapsed() << "  test = " << gsw2.Elapsed() << std::endl; | ||||
|     int ms = (int)(gsw2.useconds()/1e3); | ||||
|     if (ms > max_cheb_time_ms) { | ||||
|       std::cout << GridLogMessage << "Performance too poor: " << ms << " ms, cutoff = " << max_cheb_time_ms << " ms" << std::endl; | ||||
|       Grid_finalize(); | ||||
|       return 2; | ||||
|     } | ||||
|  | ||||
|   } | ||||
|  | ||||
|   // First round of Lanczos to get low mode basis | ||||
|   ImplicitlyRestartedLanczos<LatticeFermion> IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,MaxIt,betastp1,MinRes1); | ||||
|   int Nconv; | ||||
|  | ||||
|   char tag[1024]; | ||||
|   if (!FieldVectorIO::read_argonne(evec,(char *)"checkpoint") || !read_evals(UGrid,(char *)"checkpoint/eigen-values.txt",eval1)) { | ||||
|  | ||||
|     if (simple_krylov_basis) { | ||||
|       quick_krylov_basis(evec,src,Op1,Nstop1); | ||||
|     } else { | ||||
|       IRL1.calc(eval1,evec._v,src,Nconv,false); | ||||
|     } | ||||
|     evec.resize(Nstop1); // and throw away superfluous | ||||
|     eval1.resize(Nstop1); | ||||
|     if (checkpoint_basis) | ||||
|       FieldVectorIO::write_argonne(evec,(char *)"checkpoint"); | ||||
|     if (UGrid->IsBoss() && checkpoint_basis) | ||||
|       write_evals((char *)"checkpoint/eigen-values.txt",eval1); | ||||
|  | ||||
|     Ddwf.Report(); | ||||
|  | ||||
|     if (exit_after_basis_calculation) { | ||||
|       Grid_finalize(); | ||||
|       return 0; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // now test eigenvectors | ||||
|   if (!simple_krylov_basis) { | ||||
|     for (int i=0;i<Nstop1;i++){ | ||||
|       auto B = evec[i]; | ||||
|       auto tmp = B; | ||||
|       auto v = B; | ||||
|        | ||||
|       { | ||||
| 	HermOp.HermOp(B,v); | ||||
| 	 | ||||
| 	RealD vnum = real(innerProduct(B,v)); // HermOp. | ||||
| 	RealD vden = norm2(B); | ||||
| 	RealD vv0 = norm2(v); | ||||
| 	RealD eval2 = vnum/vden; | ||||
| 	v -= eval2*B; | ||||
| 	RealD vv = norm2(v); | ||||
| 	 | ||||
| 	std::cout << i << " OP eval = " << eval2 << " (" << eval1[i] << ") " | ||||
| 		  << "res2 = " << vv << " norm2 = " << norm2(B) << std::endl; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // do second step only if needed | ||||
|   if (Nstop1 <= Nstop2) { | ||||
|      | ||||
|     // Now setup blocking | ||||
|     assert(evec.size() == Nstop1); | ||||
|     BlockedGrid<LatticeFermion> bgrid(FrbGrid, block_size); | ||||
|     BlockProjector<LatticeFermion> pr(evec,bgrid); | ||||
|     pr.createOrthonormalBasis(basis_norm_threshold); | ||||
|     pr.createOrthonormalBasis(basis_norm_threshold); // another round due to precision issues created by local coherence | ||||
|  | ||||
|     constexpr int common_basis_sizes[] = { 60, 250, 400 }; | ||||
|     constexpr int n_common_basis_sizes = sizeof(common_basis_sizes) / sizeof(common_basis_sizes[0]); | ||||
|     switch (Nstop1) { | ||||
| #define BASIS(n) case common_basis_sizes[n]:\ | ||||
|       CoarseGridLanczos<LatticeFermion,common_basis_sizes[n]>\ | ||||
| 	(pr,alpha2,beta,Npoly2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2,HermOp,eval1, \ | ||||
| 	 cg_test_enabled,cg_test_maxiter,nsingle,SkipTest2, \ | ||||
| 	 MaxApply2,smoothed_eval_enabled,smoothed_eval_inner,smoothed_eval_outer, \ | ||||
| 	 smoothed_eval_begin,smoothed_eval_end,smoothed_eval_inner_resid); break; | ||||
|       BASIS(0); | ||||
|       BASIS(1); | ||||
|       BASIS(2); | ||||
|     default: | ||||
|       std::cout << GridLogMessage << "Basis size " << Nstop1 << " must be added at compile-time" << std::endl; | ||||
|       std::cout << GridLogMessage << "Currently available sizes: " << std::endl; | ||||
|       for (int i=0;i<n_common_basis_sizes;i++) { | ||||
| 	std::cout << GridLogMessage << "  " << common_basis_sizes[i] << std::endl; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|   } | ||||
|      | ||||
|   Grid_finalize(); | ||||
| } | ||||
|  | ||||
							
								
								
									
										254
									
								
								tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										254
									
								
								tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,254 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc | ||||
|  | ||||
|     Copyright (C) 2017 | ||||
|  | ||||
| Author: Leans heavily on Christoph Lehner's code | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| /* | ||||
|  *  Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features  | ||||
|  *  in Grid that were intended to be used to support blocked Aggregates, from | ||||
|  */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | ||||
| #include <Grid/algorithms/iterative/LocalCoherenceLanczos.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis> | ||||
| {  | ||||
| public: | ||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; | ||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; | ||||
|   typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<Fobj>          FineField; | ||||
|  | ||||
|   LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid, | ||||
| 			      LinearOperatorBase<FineField> &FineOp, | ||||
| 			      int checkerboard)  | ||||
|     // Base constructor | ||||
|     : LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard)  | ||||
|   {}; | ||||
|  | ||||
|   void checkpointFine(std::string evecs_file,std::string evals_file) | ||||
|   { | ||||
|     assert(this->_Aggregate.subspace.size()==nbasis); | ||||
|     emptyUserRecord record; | ||||
|     Grid::QCD::ScidacWriter WR; | ||||
|     WR.open(evecs_file); | ||||
|     for(int k=0;k<nbasis;k++) { | ||||
|       WR.writeScidacFieldRecord(this->_Aggregate.subspace[k],record); | ||||
|     } | ||||
|     WR.close(); | ||||
|      | ||||
|     XmlWriter WRx(evals_file); | ||||
|     write(WRx,"evals",this->evals_fine); | ||||
|   } | ||||
|  | ||||
|   void checkpointFineRestore(std::string evecs_file,std::string evals_file) | ||||
|   { | ||||
|     this->evals_fine.resize(nbasis); | ||||
|     this->_Aggregate.subspace.resize(nbasis,this->_FineGrid); | ||||
|      | ||||
|     std::cout << GridLogIRL<< "checkpointFineRestore:  Reading evals from "<<evals_file<<std::endl; | ||||
|     XmlReader RDx(evals_file); | ||||
|     read(RDx,"evals",this->evals_fine); | ||||
|      | ||||
|     assert(this->evals_fine.size()==nbasis); | ||||
|      | ||||
|     std::cout << GridLogIRL<< "checkpointFineRestore:  Reading evecs from "<<evecs_file<<std::endl; | ||||
|     emptyUserRecord record; | ||||
|     Grid::QCD::ScidacReader RD ; | ||||
|     RD.open(evecs_file); | ||||
|     for(int k=0;k<nbasis;k++) { | ||||
|       this->_Aggregate.subspace[k].checkerboard=this->_checkerboard; | ||||
|       RD.readScidacFieldRecord(this->_Aggregate.subspace[k],record); | ||||
|        | ||||
|     } | ||||
|     RD.close(); | ||||
|   } | ||||
|  | ||||
|   void checkpointCoarse(std::string evecs_file,std::string evals_file) | ||||
|   { | ||||
|     int n = this->evec_coarse.size(); | ||||
|     emptyUserRecord record; | ||||
|     Grid::QCD::ScidacWriter WR; | ||||
|     WR.open(evecs_file); | ||||
|     for(int k=0;k<n;k++) { | ||||
|       WR.writeScidacFieldRecord(this->evec_coarse[k],record); | ||||
|     } | ||||
|     WR.close(); | ||||
|      | ||||
|     XmlWriter WRx(evals_file); | ||||
|     write(WRx,"evals",this->evals_coarse); | ||||
|   } | ||||
|  | ||||
|   void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec) | ||||
|   { | ||||
|     std::cout << "resizing coarse vecs to " << nvec<< std::endl; | ||||
|     this->evals_coarse.resize(nvec); | ||||
|     this->evec_coarse.resize(nvec,this->_CoarseGrid); | ||||
|     std::cout << GridLogIRL<< "checkpointCoarseRestore:  Reading evals from "<<evals_file<<std::endl; | ||||
|     XmlReader RDx(evals_file); | ||||
|     read(RDx,"evals",this->evals_coarse); | ||||
|  | ||||
|     assert(this->evals_coarse.size()==nvec); | ||||
|     emptyUserRecord record; | ||||
|     std::cout << GridLogIRL<< "checkpointCoarseRestore:  Reading evecs from "<<evecs_file<<std::endl; | ||||
|     Grid::QCD::ScidacReader RD ; | ||||
|     RD.open(evecs_file); | ||||
|     for(int k=0;k<nvec;k++) { | ||||
|       RD.readScidacFieldRecord(this->evec_coarse[k],record); | ||||
|     } | ||||
|     RD.close(); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| int main (int argc, char ** argv) { | ||||
|  | ||||
|   Grid_init(&argc,&argv); | ||||
|   GridLogIRL.TimingMode(1); | ||||
|  | ||||
|   LocalCoherenceLanczosParams Params; | ||||
|   { | ||||
|     Params.omega.resize(10); | ||||
|     Params.blockSize.resize(5); | ||||
|     XmlWriter writer("Params_template.xml"); | ||||
|     write(writer,"Params",Params); | ||||
|     std::cout << GridLogMessage << " Written Params_template.xml" <<std::endl; | ||||
|   } | ||||
|    | ||||
|   {  | ||||
|     XmlReader reader(std::string("./Params.xml")); | ||||
|     read(reader, "Params", Params); | ||||
|   } | ||||
|  | ||||
|   int     Ls = (int)Params.omega.size(); | ||||
|   RealD mass = Params.mass; | ||||
|   RealD M5   = Params.M5; | ||||
|   std::vector<int> blockSize = Params.blockSize; | ||||
|  | ||||
|   // Grids | ||||
|   GridCartesian         * UGrid     = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), | ||||
| 								     GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 								     GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid   = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridCartesian         * FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   std::vector<int> fineLatt     = GridDefaultLatt(); | ||||
|   int dims=fineLatt.size(); | ||||
|   assert(blockSize.size()==dims+1); | ||||
|   std::vector<int> coarseLatt(dims); | ||||
|   std::vector<int> coarseLatt5d ; | ||||
|  | ||||
|   for (int d=0;d<coarseLatt.size();d++){ | ||||
|     coarseLatt[d] = fineLatt[d]/blockSize[d];    assert(coarseLatt[d]*blockSize[d]==fineLatt[d]); | ||||
|   } | ||||
|  | ||||
|   std::cout << GridLogMessage<< " 5d coarse lattice is "; | ||||
|   for (int i=0;i<coarseLatt.size();i++){ | ||||
|     std::cout << coarseLatt[i]<<"x"; | ||||
|   }  | ||||
|   int cLs = Ls/blockSize[dims]; assert(cLs*blockSize[dims]==Ls); | ||||
|   std::cout << cLs<<std::endl; | ||||
|    | ||||
|   GridCartesian         * CoarseGrid4    = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * CoarseGrid4rb  = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4); | ||||
|   GridCartesian         * CoarseGrid5    = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4); | ||||
|   GridRedBlackCartesian * CoarseGrid5rb  = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid5); | ||||
|  | ||||
|   // Gauge field | ||||
|   LatticeGaugeField Umu(UGrid); | ||||
|   FieldMetaData header; | ||||
|   NerscIO::readConfiguration(Umu,header,Params.config); | ||||
|   std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << "   Ls: " << Ls << std::endl; | ||||
|  | ||||
|   // ZMobius EO Operator | ||||
|   ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); | ||||
|   SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf); | ||||
|  | ||||
|   // Eigenvector storage | ||||
|   LanczosParams fine  =Params.FineParams;   | ||||
|   LanczosParams coarse=Params.CoarseParams;   | ||||
|  | ||||
|   const int Ns1 = fine.Nstop;   const int Ns2 = coarse.Nstop; | ||||
|   const int Nk1 = fine.Nk;      const int Nk2 = coarse.Nk; | ||||
|   const int Nm1 = fine.Nm;      const int Nm2 = coarse.Nm; | ||||
|  | ||||
|   std::cout << GridLogMessage << "Keep " << fine.Nstop   << " fine   vectors" << std::endl; | ||||
|   std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl; | ||||
|   assert(Nm2 >= Nm1); | ||||
|  | ||||
|   const int nbasis= 60; | ||||
|   assert(nbasis==Ns1); | ||||
|   LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd); | ||||
|   std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; | ||||
|  | ||||
|   assert( (Params.doFine)||(Params.doFineRead)); | ||||
|  | ||||
|   if ( Params.doFine ) {  | ||||
|     std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl; | ||||
|     _LocalCoherenceLanczos.calcFine(fine.Cheby, | ||||
| 		 fine.Nstop,fine.Nk,fine.Nm, | ||||
| 		 fine.resid,fine.MaxIt,  | ||||
| 		 fine.betastp,fine.MinRes); | ||||
|  | ||||
|     std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl; | ||||
|     _LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml")); | ||||
|     _LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check | ||||
|     _LocalCoherenceLanczos.Orthogonalise(); | ||||
|   } | ||||
|  | ||||
|   if ( Params.doFineRead ) {  | ||||
|     _LocalCoherenceLanczos.checkpointFineRestore(std::string("evecs.scidac"),std::string("evals.xml")); | ||||
|     _LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check | ||||
|     _LocalCoherenceLanczos.Orthogonalise(); | ||||
|   } | ||||
|  | ||||
|   if ( Params.doCoarse ) { | ||||
|     std::cout << GridLogMessage << "Orthogonalising " << nbasis<<" Nm "<<Nm2<< std::endl; | ||||
|      | ||||
|     std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl; | ||||
|     _LocalCoherenceLanczos.calcCoarse(coarse.Cheby,Params.Smoother,Params.coarse_relax_tol, | ||||
| 			      coarse.Nstop, coarse.Nk,coarse.Nm, | ||||
| 			      coarse.resid, coarse.MaxIt,  | ||||
| 			      coarse.betastp,coarse.MinRes); | ||||
|  | ||||
|  | ||||
|     std::cout << GridLogIRL<<"Checkpointing coarse evecs"<<std::endl; | ||||
|     _LocalCoherenceLanczos.checkpointCoarse(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml")); | ||||
|   } | ||||
|  | ||||
|   if ( Params.doCoarseRead ) { | ||||
|     // Verify we can reread ??? | ||||
|     _LocalCoherenceLanczos.checkpointCoarseRestore(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml"),coarse.Nstop); | ||||
|     _LocalCoherenceLanczos.testCoarse(coarse.resid*100.0,Params.Smoother,Params.coarse_relax_tol); // Coarse check | ||||
|   } | ||||
|   Grid_finalize(); | ||||
| } | ||||
|  | ||||
| @@ -84,11 +84,12 @@ int main (int argc, char ** argv) | ||||
| 
 | ||||
|   std::vector<double> Coeffs { 0.,-1.}; | ||||
|   Polynomial<FermionField> PolyX(Coeffs); | ||||
|   Chebyshev<FermionField> Cheb(0.2,5.,11); | ||||
| //  ChebyshevLanczos<LatticeFermion> Cheb(9.,1.,0.,20);
 | ||||
| //  Cheb.csv(std::cout);
 | ||||
| //  exit(-24);
 | ||||
|   ImplicitlyRestartedLanczos<FermionField> IRL(HermOp,Cheb,Nstop,Nk,Nm,resid,MaxIt); | ||||
|   Chebyshev<FermionField> Cheby(0.2,5.,11); | ||||
| 
 | ||||
|   FunctionHermOp<FermionField> OpCheby(Cheby,HermOp); | ||||
|      PlainHermOp<FermionField> Op     (HermOp); | ||||
| 
 | ||||
|   ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); | ||||
| 
 | ||||
|    | ||||
|   std::vector<RealD>          eval(Nm); | ||||
| @@ -119,12 +119,13 @@ int main (int argc, char ** argv) | ||||
|   RealD beta  = 0.1; | ||||
|   RealD mu    = 0.0; | ||||
|   int order = 11; | ||||
|   ChebyshevLanczos<LatticeComplex> Cheby(alpha,beta,mu,order); | ||||
|   Chebyshev<LatticeComplex> Cheby(alpha,beta,order); | ||||
|   std::ofstream file("cheby.dat"); | ||||
|   Cheby.csv(file); | ||||
| 
 | ||||
|   HermOpOperatorFunction<LatticeComplex> X; | ||||
|   DumbOperator<LatticeComplex> HermOp(grid); | ||||
|   FunctionHermOp<LatticeComplex> OpCheby(Cheby,HermOp); | ||||
|      PlainHermOp<LatticeComplex> Op(HermOp); | ||||
| 
 | ||||
|   const int Nk = 40; | ||||
|   const int Nm = 80; | ||||
| @@ -133,8 +134,9 @@ int main (int argc, char ** argv) | ||||
|   int Nconv; | ||||
|   RealD eresid = 1.0e-6; | ||||
| 
 | ||||
|   ImplicitlyRestartedLanczos<LatticeComplex> IRL(HermOp,X,Nk,Nk,Nm,eresid,Nit); | ||||
|   ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(HermOp,Cheby,Nk,Nk,Nm,eresid,Nit); | ||||
| 
 | ||||
|   ImplicitlyRestartedLanczos<LatticeComplex> IRL(Op,Op,Nk,Nk,Nm,eresid,Nit); | ||||
|   ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(OpCheby,Op,Nk,Nk,Nm,eresid,Nit); | ||||
| 
 | ||||
|   LatticeComplex src(grid); gaussian(RNG,src); | ||||
|   { | ||||
| @@ -86,9 +86,12 @@ int main(int argc, char** argv) { | ||||
| 
 | ||||
|   std::vector<double> Coeffs{0, 1.}; | ||||
|   Polynomial<FermionField> PolyX(Coeffs); | ||||
|   Chebyshev<FermionField> Cheb(0.0, 10., 12); | ||||
|   ImplicitlyRestartedLanczos<FermionField> IRL(HermOp, PolyX, Nstop, Nk, Nm, | ||||
|                                                resid, MaxIt); | ||||
|   Chebyshev<FermionField> Cheby(0.0, 10., 12); | ||||
| 
 | ||||
|   FunctionHermOp<FermionField> OpCheby(Cheby,HermOp); | ||||
|      PlainHermOp<FermionField> Op     (HermOp); | ||||
| 
 | ||||
|   ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt); | ||||
| 
 | ||||
|   std::vector<RealD> eval(Nm); | ||||
|   FermionField src(FGrid); | ||||
| @@ -555,13 +555,13 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl; | ||||
|   std::cout<<GridLogMessage << "**************************************************"<< std::endl; | ||||
|   MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf); | ||||
|   Subspace Aggregates(Coarse5d,FGrid); | ||||
|   Subspace Aggregates(Coarse5d,FGrid,0); | ||||
|   //  Aggregates.CreateSubspace(RNG5,HermDefOp,nbasis); | ||||
|   assert ( (nbasis & 0x1)==0); | ||||
|   int nb=nbasis/2; | ||||
|   std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl; | ||||
|   //  Aggregates.CreateSubspace(RNG5,HermDefOp,nb); | ||||
|   Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb); | ||||
|   Aggregates.CreateSubspace(RNG5,HermDefOp,nb); | ||||
|   //  Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb); | ||||
|   for(int n=0;n<nb;n++){ | ||||
|     G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]); | ||||
|     std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl; | ||||
|   | ||||
| @@ -38,7 +38,7 @@ int main (int argc, char ** argv) | ||||
|   typedef typename DomainWallFermionR::ComplexField ComplexField;  | ||||
|   typename DomainWallFermionR::ImplParams params;  | ||||
|  | ||||
|   const int Ls=8; | ||||
|   const int Ls=4; | ||||
|  | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
| @@ -47,42 +47,51 @@ int main (int argc, char ** argv) | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|   std::vector<int> mpi_split (mpi_layout.size(),1); | ||||
|  | ||||
|   std::cout << "UGrid (world root)"<<std::endl; | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|  | ||||
|   std::cout << "FGrid (child of UGrid)"<<std::endl; | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|  | ||||
|   int nrhs = UGrid->RankCount() ; | ||||
|   GridRedBlackCartesian * rbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   ///////////////////////////////////////////// | ||||
|   // Split into 1^4 mpi communicators | ||||
|   ///////////////////////////////////////////// | ||||
|   std::cout << "SGrid (world root)"<<std::endl; | ||||
|   for(int i=0;i<argc;i++){ | ||||
|     if(std::string(argv[i]) == "--split"){ | ||||
|       for(int k=0;k<mpi_layout.size();k++){ | ||||
| 	std::stringstream ss;  | ||||
| 	ss << argv[i+1+k];  | ||||
| 	ss >> mpi_split[k]; | ||||
|       } | ||||
|       break; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   int nrhs = 1; | ||||
|   int me; | ||||
|   for(int i=0;i<mpi_layout.size();i++) nrhs *= (mpi_layout[i]/mpi_split[i]); | ||||
|  | ||||
|   GridCartesian         * SGrid = new GridCartesian(GridDefaultLatt(), | ||||
| 						    GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 						    mpi_split, | ||||
| 						    *UGrid);  | ||||
| 						    *UGrid,me);  | ||||
|  | ||||
|   GridCartesian         * SFGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); | ||||
|   std::cout << "SFGrid"<<std::endl; | ||||
|   GridRedBlackCartesian * SrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); | ||||
|   std::cout << "SrbGrid"<<std::endl; | ||||
|   GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid); | ||||
|   std::cout << "SFrbGrid"<<std::endl; | ||||
|  | ||||
|   /////////////////////////////////////////////// | ||||
|   // Set up the problem as a 4d spreadout job | ||||
|   /////////////////////////////////////////////// | ||||
|   std::vector<int> seeds({1,2,3,4}); | ||||
|  | ||||
|   GridParallelRNG pRNG(UGrid );  pRNG.SeedFixedIntegers(seeds); | ||||
|   GridParallelRNG pRNG5(FGrid);  pRNG5.SeedFixedIntegers(seeds); | ||||
|   std::vector<FermionField>    src(nrhs,FGrid); | ||||
|   std::vector<FermionField> src_chk(nrhs,FGrid); | ||||
|   std::vector<FermionField> result(nrhs,FGrid); | ||||
|   FermionField tmp(FGrid); | ||||
|  | ||||
|   for(int s=0;s<nrhs;s++) random(pRNG5,src[s]); | ||||
|   for(int s=0;s<nrhs;s++) result[s] = zero; | ||||
|   for(int s=0;s<nrhs;s++) result[s]=zero; | ||||
|  | ||||
|   LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); | ||||
|  | ||||
| @@ -96,9 +105,11 @@ int main (int argc, char ** argv) | ||||
|   emptyUserRecord record; | ||||
|   std::string file("./scratch.scidac"); | ||||
|   std::string filef("./scratch.scidac.ferm"); | ||||
|   int me = UGrid->ThisRank(); | ||||
|  | ||||
|   LatticeGaugeField s_Umu(SGrid); | ||||
|   FermionField s_src(SFGrid); | ||||
|   FermionField s_src_split(SFGrid); | ||||
|   FermionField s_tmp(SFGrid); | ||||
|   FermionField s_res(SFGrid); | ||||
|  | ||||
|   { | ||||
| @@ -157,6 +168,24 @@ int main (int argc, char ** argv) | ||||
|     FGrid->Barrier(); | ||||
|   } | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // split the source out using MPI instead of I/O | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   std::cout << GridLogMessage << " Splitting the grid data "<<std::endl; | ||||
|   Grid_split  (src,s_src_split); | ||||
|   std::cout << GridLogMessage << " Finished splitting the grid data "<<std::endl; | ||||
|   for(int n=0;n<nrhs;n++){ | ||||
|     std::cout <<GridLogMessage<<"Full "<< n <<" "<< norm2(src[n])<<std::endl; | ||||
|   } | ||||
|   s_tmp = s_src_split - s_src; | ||||
|   for(int n=0;n<nrhs;n++){ | ||||
|     FGrid->Barrier(); | ||||
|     if ( n==me ) { | ||||
|       std::cout << GridLogMessage<<"Split "<< me << " " << norm2(s_src_split) << " " << norm2(s_src)<< " diff " << norm2(s_tmp)<<std::endl; | ||||
|     } | ||||
|     FGrid->Barrier(); | ||||
|   } | ||||
|  | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Set up N-solvers as trivially parallel | ||||
| @@ -164,6 +193,7 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   RealD mass=0.01; | ||||
|   RealD M5=1.8; | ||||
|   DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5); | ||||
|   DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5); | ||||
|  | ||||
|   std::cout << GridLogMessage << "****************************************************************** "<<std::endl; | ||||
| @@ -171,25 +201,40 @@ int main (int argc, char ** argv) | ||||
|   std::cout << GridLogMessage << "****************************************************************** "<<std::endl; | ||||
|  | ||||
|   MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf); | ||||
|   ConjugateGradient<FermionField> CG((1.0e-8/(me+1)),10000); | ||||
|   MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk); | ||||
|   ConjugateGradient<FermionField> CG((1.0e-5/(me+1)),10000); | ||||
|   s_res = zero; | ||||
|   CG(HermOp,s_src,s_res); | ||||
|  | ||||
|   /////////////////////////////////////// | ||||
|   // Share the information | ||||
|   /////////////////////////////////////// | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // Report how long they all took | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   std::vector<uint32_t> iterations(nrhs,0); | ||||
|   iterations[me] = CG.IterationsToComplete; | ||||
|  | ||||
|   for(int n=0;n<nrhs;n++){ | ||||
|     UGrid->GlobalSum(iterations[n]); | ||||
|     std::cout << GridLogMessage<<" Rank "<<n<<" "<< iterations[n]<<" CG iterations"<<std::endl; | ||||
|   } | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // Report how long they all took | ||||
|   // Gather and residual check on the results | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   for(int r=0;r<nrhs;r++){ | ||||
|     std::cout << GridLogMessage<<" Rank "<<r<<" "<< iterations[r]<<" CG iterations"<<std::endl; | ||||
|   std::cout << GridLogMessage<< "Unsplitting the result"<<std::endl; | ||||
|   Grid_unsplit(result,s_res); | ||||
|   /* | ||||
|   Grid_unsplit(src_chk,s_src); | ||||
|   for(int n=0;n<nrhs;n++){ | ||||
|     tmp = src[n]-src_chk[n]; | ||||
|     std::cout << " src_chk "<<n<<" "<<norm2(src_chk[n])<<" " <<norm2(src[n])<<" " <<norm2(tmp)<< std::endl; | ||||
|     std::cout << " diff " <<tmp<<std::endl; | ||||
|   } | ||||
|   */ | ||||
|   std::cout << GridLogMessage<< "Checking the residuals"<<std::endl; | ||||
|   for(int n=0;n<nrhs;n++){ | ||||
|     HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n]; | ||||
|     std::cout << GridLogMessage<<" resid["<<n<<"]  "<< norm2(tmp)<<std::endl; | ||||
|   } | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
|   | ||||
							
								
								
									
										223
									
								
								tests/solver/Test_dwf_mrhs_cg_mpi.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										223
									
								
								tests/solver/Test_dwf_mrhs_cg_mpi.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,223 @@ | ||||
|    /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_dwf_mrhs_cg.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid/algorithms/iterative/BlockConjugateGradient.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   typedef typename DomainWallFermionR::FermionField FermionField;  | ||||
|   typedef typename DomainWallFermionR::ComplexField ComplexField;  | ||||
|   typename DomainWallFermionR::ImplParams params;  | ||||
|  | ||||
|   const int Ls=4; | ||||
|  | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   std::vector<int> latt_size   = GridDefaultLatt(); | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|   std::vector<int> mpi_split (mpi_layout.size(),1); | ||||
|  | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),  | ||||
| 								   GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 								   GridDefaultMpi()); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * rbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   ///////////////////////////////////////////// | ||||
|   // Split into 1^4 mpi communicators | ||||
|   ///////////////////////////////////////////// | ||||
|  | ||||
|   for(int i=0;i<argc;i++){ | ||||
|     if(std::string(argv[i]) == "--split"){ | ||||
|       for(int k=0;k<mpi_layout.size();k++){ | ||||
| 	std::stringstream ss;  | ||||
| 	ss << argv[i+1+k];  | ||||
| 	ss >> mpi_split[k]; | ||||
|       } | ||||
|       break; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   int nrhs = 1; | ||||
|   int me; | ||||
|   for(int i=0;i<mpi_layout.size();i++) nrhs *= (mpi_layout[i]/mpi_split[i]); | ||||
|  | ||||
|   GridCartesian         * SGrid = new GridCartesian(GridDefaultLatt(), | ||||
| 						    GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 						    mpi_split, | ||||
| 						    *UGrid,me);  | ||||
|  | ||||
|   GridCartesian         * SFGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); | ||||
|   GridRedBlackCartesian * SrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); | ||||
|   GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid); | ||||
|  | ||||
|   /////////////////////////////////////////////// | ||||
|   // Set up the problem as a 4d spreadout job | ||||
|   /////////////////////////////////////////////// | ||||
|   std::vector<int> seeds({1,2,3,4}); | ||||
|  | ||||
|   GridParallelRNG pRNG(UGrid );  pRNG.SeedFixedIntegers(seeds); | ||||
|   GridParallelRNG pRNG5(FGrid);  pRNG5.SeedFixedIntegers(seeds); | ||||
|   std::vector<FermionField>    src(nrhs,FGrid); | ||||
|   std::vector<FermionField> src_chk(nrhs,FGrid); | ||||
|   std::vector<FermionField> result(nrhs,FGrid); | ||||
|   FermionField tmp(FGrid); | ||||
|  | ||||
|   for(int s=0;s<nrhs;s++) result[s]=zero; | ||||
| #undef LEXICO_TEST | ||||
| #ifdef LEXICO_TEST | ||||
|   { | ||||
|     LatticeFermion lex(FGrid);  lex = zero; | ||||
|     LatticeFermion ftmp(FGrid); | ||||
|     Integer stride =10000; | ||||
|     double nrm; | ||||
|     LatticeComplex coor(FGrid); | ||||
|     for(int d=0;d<5;d++){ | ||||
|       LatticeCoordinate(coor,d); | ||||
|       ftmp = stride; | ||||
|       ftmp = ftmp * coor; | ||||
|       lex = lex + ftmp; | ||||
|       stride=stride/10; | ||||
|     } | ||||
|     for(int s=0;s<nrhs;s++) { | ||||
|       src[s]=lex; | ||||
|       ftmp = 1000*1000*s; | ||||
|       src[s] = src[s] + ftmp; | ||||
|     }     | ||||
|   } | ||||
| #else | ||||
|   for(int s=0;s<nrhs;s++) { | ||||
|     random(pRNG5,src[s]); | ||||
|     tmp = 100.0*s; | ||||
|     src[s] = (src[s] * 0.1) + tmp; | ||||
|     std::cout << " src ]"<<s<<"] "<<norm2(src[s])<<std::endl; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for(int n =0 ; n< nrhs ; n++) {  | ||||
|     std::cout << " src"<<n<<"\n"<< src[n] <<std::endl; | ||||
|   } | ||||
|  | ||||
|   LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); | ||||
|  | ||||
|   ///////////////// | ||||
|   // MPI only sends | ||||
|   ///////////////// | ||||
|   LatticeGaugeField s_Umu(SGrid); | ||||
|   FermionField s_src(SFGrid); | ||||
|   FermionField s_tmp(SFGrid); | ||||
|   FermionField s_res(SFGrid); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // split the source out using MPI instead of I/O | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   Grid_split  (Umu,s_Umu); | ||||
|   Grid_split  (src,s_src); | ||||
|   std::cout << " split rank  " <<me << " s_src "<<norm2(s_src)<<std::endl; | ||||
|   std::cout << " s_src\n "<< s_src <<std::endl; | ||||
|  | ||||
| #ifdef LEXICO_TEST | ||||
|   FermionField s_src_tmp(SFGrid); | ||||
|   FermionField s_src_diff(SFGrid); | ||||
|   { | ||||
|     LatticeFermion lex(SFGrid);  lex = zero; | ||||
|     LatticeFermion ftmp(SFGrid); | ||||
|     Integer stride =10000; | ||||
|     double nrm; | ||||
|     LatticeComplex coor(SFGrid); | ||||
|     for(int d=0;d<5;d++){ | ||||
|       LatticeCoordinate(coor,d); | ||||
|       ftmp = stride; | ||||
|       ftmp = ftmp * coor; | ||||
|       lex = lex + ftmp; | ||||
|       stride=stride/10; | ||||
|     } | ||||
|     s_src_tmp=lex; | ||||
|     ftmp = 1000*1000*me; | ||||
|     s_src_tmp = s_src_tmp + ftmp; | ||||
|   } | ||||
|   s_src_diff = s_src_tmp - s_src; | ||||
|   std::cout << " s_src_diff " << norm2(s_src_diff)<<std::endl; | ||||
|  | ||||
|   std::cout << " s_src \n" << s_src << std::endl; | ||||
|   std::cout << " s_src_tmp \n" << s_src_tmp << std::endl; | ||||
|   std::cout << " s_src_diff \n" << s_src_diff << std::endl; | ||||
| #endif | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Set up N-solvers as trivially parallel | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   RealD mass=0.01; | ||||
|   RealD M5=1.8; | ||||
|   DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5); | ||||
|   DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5); | ||||
|  | ||||
|   std::cout << GridLogMessage << "****************************************************************** "<<std::endl; | ||||
|   std::cout << GridLogMessage << " Calling DWF CG "<<std::endl; | ||||
|   std::cout << GridLogMessage << "****************************************************************** "<<std::endl; | ||||
|  | ||||
|   MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf); | ||||
|   MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk); | ||||
|   ConjugateGradient<FermionField> CG((1.0e-5),10000); | ||||
|   s_res = zero; | ||||
|   CG(HermOp,s_src,s_res); | ||||
|  | ||||
|   std::cout << " s_res norm "<<norm2(s_res)<<std::endl; | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // Report how long they all took | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   std::vector<uint32_t> iterations(nrhs,0); | ||||
|   iterations[me] = CG.IterationsToComplete; | ||||
|  | ||||
|   for(int n=0;n<nrhs;n++){ | ||||
|     UGrid->GlobalSum(iterations[n]); | ||||
|     std::cout << GridLogMessage<<" Rank "<<n<<" "<< iterations[n]<<" CG iterations"<<std::endl; | ||||
|   } | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // Gather and residual check on the results | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   std::cout << GridLogMessage<< "Unsplitting the result"<<std::endl; | ||||
|   Grid_unsplit(result,s_res); | ||||
|  | ||||
|  | ||||
|   std::cout << GridLogMessage<< "Checking the residuals"<<std::endl; | ||||
|   for(int n=0;n<nrhs;n++){ | ||||
|     std::cout << " res["<<n<<"] norm "<<norm2(result[n])<<std::endl; | ||||
|     HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n]; | ||||
|     std::cout << GridLogMessage<<" resid["<<n<<"]  "<< norm2(tmp)/norm2(src[n])<<std::endl; | ||||
|   } | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
							
								
								
									
										164
									
								
								tests/solver/Test_dwf_mrhs_cg_mpieo.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										164
									
								
								tests/solver/Test_dwf_mrhs_cg_mpieo.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,164 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_dwf_mrhs_cg.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid/algorithms/iterative/BlockConjugateGradient.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   typedef typename DomainWallFermionR::FermionField FermionField;  | ||||
|   typedef typename DomainWallFermionR::ComplexField ComplexField;  | ||||
|   typename DomainWallFermionR::ImplParams params;  | ||||
|  | ||||
|   const int Ls=4; | ||||
|  | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   std::vector<int> latt_size   = GridDefaultLatt(); | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|   std::vector<int> mpi_split (mpi_layout.size(),1); | ||||
|  | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),  | ||||
| 								   GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 								   GridDefaultMpi()); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * rbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   int nrhs = UGrid->RankCount() ; | ||||
|  | ||||
|   ///////////////////////////////////////////// | ||||
|   // Split into 1^4 mpi communicators | ||||
|   ///////////////////////////////////////////// | ||||
|   int me; | ||||
|   GridCartesian         * SGrid = new GridCartesian(GridDefaultLatt(), | ||||
| 						    GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 						    mpi_split, | ||||
| 						    *UGrid,me);  | ||||
|  | ||||
|   GridCartesian         * SFGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); | ||||
|   GridRedBlackCartesian * SrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); | ||||
|   GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid); | ||||
|  | ||||
|   /////////////////////////////////////////////// | ||||
|   // Set up the problem as a 4d spreadout job | ||||
|   /////////////////////////////////////////////// | ||||
|   std::vector<int> seeds({1,2,3,4}); | ||||
|  | ||||
|   GridParallelRNG pRNG(UGrid );  pRNG.SeedFixedIntegers(seeds); | ||||
|   GridParallelRNG pRNG5(FGrid);  pRNG5.SeedFixedIntegers(seeds); | ||||
|   std::vector<FermionField>    src(nrhs,FGrid); | ||||
|   std::vector<FermionField> src_chk(nrhs,FGrid); | ||||
|   std::vector<FermionField> result(nrhs,FGrid); | ||||
|   FermionField tmp(FGrid); | ||||
|  | ||||
|   std::vector<FermionField> src_e(nrhs,FrbGrid); | ||||
|   std::vector<FermionField> src_o(nrhs,FrbGrid); | ||||
|  | ||||
|   for(int s=0;s<nrhs;s++) random(pRNG5,src[s]); | ||||
|   for(int s=0;s<nrhs;s++) result[s]=zero; | ||||
|  | ||||
|   LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); | ||||
|  | ||||
|   ///////////////// | ||||
|   // MPI only sends | ||||
|   ///////////////// | ||||
|   LatticeGaugeField s_Umu(SGrid); | ||||
|   FermionField s_src(SFGrid); | ||||
|   FermionField s_src_e(SFrbGrid); | ||||
|   FermionField s_src_o(SFrbGrid); | ||||
|   FermionField s_tmp(SFGrid); | ||||
|   FermionField s_res(SFGrid); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // split the source out using MPI instead of I/O | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   Grid_split  (Umu,s_Umu); | ||||
|   Grid_split  (src,s_src); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Check even odd cases | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   for(int s=0;s<nrhs;s++){ | ||||
|     pickCheckerboard(Odd , src_o[s], src[s]); | ||||
|     pickCheckerboard(Even, src_e[s], src[s]); | ||||
|   } | ||||
|   Grid_split  (src_e,s_src_e); | ||||
|   Grid_split  (src_o,s_src_o); | ||||
|   setCheckerboard(s_tmp, s_src_o); | ||||
|   setCheckerboard(s_tmp, s_src_e); | ||||
|   s_tmp = s_tmp - s_src; | ||||
|   std::cout << GridLogMessage<<" EvenOdd Difference " <<norm2(s_tmp)<<std::endl; | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Set up N-solvers as trivially parallel | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   RealD mass=0.01; | ||||
|   RealD M5=1.8; | ||||
|   DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5); | ||||
|   DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5); | ||||
|  | ||||
|   std::cout << GridLogMessage << "****************************************************************** "<<std::endl; | ||||
|   std::cout << GridLogMessage << " Calling DWF CG "<<std::endl; | ||||
|   std::cout << GridLogMessage << "****************************************************************** "<<std::endl; | ||||
|  | ||||
|   MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf); | ||||
|   MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk); | ||||
|   ConjugateGradient<FermionField> CG((1.0e-8/(me+1)),10000); | ||||
|   s_res = zero; | ||||
|   CG(HermOp,s_src,s_res); | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // Report how long they all took | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   std::vector<uint32_t> iterations(nrhs,0); | ||||
|   iterations[me] = CG.IterationsToComplete; | ||||
|  | ||||
|   for(int n=0;n<nrhs;n++){ | ||||
|     UGrid->GlobalSum(iterations[n]); | ||||
|     std::cout << GridLogMessage<<" Rank "<<n<<" "<< iterations[n]<<" CG iterations"<<std::endl; | ||||
|   } | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // Gather and residual check on the results | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   std::cout << GridLogMessage<< "Unsplitting the result"<<std::endl; | ||||
|   Grid_unsplit(result,s_res); | ||||
|  | ||||
|   std::cout << GridLogMessage<< "Checking the residuals"<<std::endl; | ||||
|   for(int n=0;n<nrhs;n++){ | ||||
|     HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n]; | ||||
|     std::cout << GridLogMessage<<" resid["<<n<<"]  "<< norm2(tmp)<<std::endl; | ||||
|   } | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
							
								
								
									
										157
									
								
								tests/solver/Test_split_grid.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										157
									
								
								tests/solver/Test_split_grid.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,157 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_dwf_mrhs_cg.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid/algorithms/iterative/BlockConjugateGradient.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   typedef typename DomainWallFermionR::FermionField FermionField;  | ||||
|   typedef typename DomainWallFermionR::ComplexField ComplexField;  | ||||
|   typename DomainWallFermionR::ImplParams params;  | ||||
|  | ||||
|   const int Ls=4; | ||||
|  | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   std::vector<int> latt_size   = GridDefaultLatt(); | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|   std::vector<int> mpi_split (mpi_layout.size(),1); | ||||
|  | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * rbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   ///////////////////////////////////////////// | ||||
|   // Split into 1^4 mpi communicators | ||||
|   ///////////////////////////////////////////// | ||||
|  | ||||
|   for(int i=0;i<argc;i++){ | ||||
|     if(std::string(argv[i]) == "--split"){ | ||||
|       for(int k=0;k<mpi_layout.size();k++){ | ||||
| 	std::stringstream ss;  | ||||
| 	ss << argv[i+1+k];  | ||||
| 	ss >> mpi_split[k]; | ||||
|       } | ||||
|       break; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   int nrhs = 1; | ||||
|   for(int i=0;i<mpi_layout.size();i++) nrhs *= (mpi_layout[i]/mpi_split[i]); | ||||
|  | ||||
|   GridCartesian         * SGrid = new GridCartesian(GridDefaultLatt(), | ||||
| 						    GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 						    mpi_split, | ||||
| 						    *UGrid);  | ||||
|  | ||||
|   GridCartesian         * SFGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); | ||||
|   GridRedBlackCartesian * SrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); | ||||
|   GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid); | ||||
|  | ||||
|   /////////////////////////////////////////////// | ||||
|   // Set up the problem as a 4d spreadout job | ||||
|   /////////////////////////////////////////////// | ||||
|   std::vector<int> seeds({1,2,3,4}); | ||||
|  | ||||
|   GridParallelRNG pRNG(UGrid );  pRNG.SeedFixedIntegers(seeds); | ||||
|   GridParallelRNG pRNG5(FGrid);  pRNG5.SeedFixedIntegers(seeds); | ||||
|   std::vector<FermionField>    src(nrhs,FGrid); | ||||
|   std::vector<FermionField> src_chk(nrhs,FGrid); | ||||
|   std::vector<FermionField> result(nrhs,FGrid); | ||||
|   FermionField tmp(FGrid); | ||||
|  | ||||
|   for(int s=0;s<nrhs;s++) random(pRNG5,src[s]); | ||||
|   for(int s=0;s<nrhs;s++) result[s]=zero; | ||||
|  | ||||
|   LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); | ||||
|  | ||||
|   ///////////////// | ||||
|   // MPI only sends | ||||
|   ///////////////// | ||||
|   int me = UGrid->ThisRank(); | ||||
|  | ||||
|   LatticeGaugeField s_Umu(SGrid); | ||||
|   FermionField s_src(SFGrid); | ||||
|   FermionField s_tmp(SFGrid); | ||||
|   FermionField s_res(SFGrid); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // split the source out using MPI instead of I/O | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   Grid_split  (Umu,s_Umu); | ||||
|   Grid_split  (src,s_src); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Set up N-solvers as trivially parallel | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   RealD mass=0.01; | ||||
|   RealD M5=1.8; | ||||
|   DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5); | ||||
|   DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5); | ||||
|  | ||||
|   std::cout << GridLogMessage << "****************************************************************** "<<std::endl; | ||||
|   std::cout << GridLogMessage << " Calling DWF CG "<<std::endl; | ||||
|   std::cout << GridLogMessage << "****************************************************************** "<<std::endl; | ||||
|  | ||||
|   MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf); | ||||
|   MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk); | ||||
|   ConjugateGradient<FermionField> CG((1.0e-8/(me+1)),10000); | ||||
|   s_res = zero; | ||||
|   CG(HermOp,s_src,s_res); | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // Report how long they all took | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   std::vector<uint32_t> iterations(nrhs,0); | ||||
|   iterations[me] = CG.IterationsToComplete; | ||||
|  | ||||
|   for(int n=0;n<nrhs;n++){ | ||||
|     UGrid->GlobalSum(iterations[n]); | ||||
|     std::cout << GridLogMessage<<" Rank "<<n<<" "<< iterations[n]<<" CG iterations"<<std::endl; | ||||
|   } | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // Gather and residual check on the results | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   std::cout << GridLogMessage<< "Unsplitting the result"<<std::endl; | ||||
|   Grid_unsplit(result,s_res); | ||||
|  | ||||
|   std::cout << GridLogMessage<< "Checking the residuals"<<std::endl; | ||||
|   for(int n=0;n<nrhs;n++){ | ||||
|     HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n]; | ||||
|     std::cout << GridLogMessage<<" resid["<<n<<"]  "<< norm2(tmp)<<std::endl; | ||||
|   } | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
							
								
								
									
										130
									
								
								tests/solver/Test_staggered_block_cg_prec.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										130
									
								
								tests/solver/Test_staggered_block_cg_prec.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,130 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_wilson_cg_unprec.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| template<class d> | ||||
| struct scal { | ||||
|   d internal; | ||||
| }; | ||||
|  | ||||
|   Gamma::Algebra Gmu [] = { | ||||
|     Gamma::Algebra::GammaX, | ||||
|     Gamma::Algebra::GammaY, | ||||
|     Gamma::Algebra::GammaZ, | ||||
|     Gamma::Algebra::GammaT | ||||
|   }; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField;  | ||||
|   typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField;  | ||||
|   typename ImprovedStaggeredFermion5DR::ImplParams params;  | ||||
|  | ||||
|   const int Ls=8; | ||||
|  | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   std::vector<int> latt_size   = GridDefaultLatt(); | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|  | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   std::vector<int> seeds({1,2,3,4}); | ||||
|   GridParallelRNG pRNG(UGrid );  pRNG.SeedFixedIntegers(seeds); | ||||
|   GridParallelRNG pRNG5(FGrid);  pRNG5.SeedFixedIntegers(seeds); | ||||
|  | ||||
|   FermionField src(FGrid); random(pRNG5,src); | ||||
|   FermionField src_o(FrbGrid);   pickCheckerboard(Odd,src_o,src); | ||||
|   FermionField result_o(FrbGrid); result_o=zero;  | ||||
|   RealD nrm = norm2(src); | ||||
|  | ||||
|   LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); | ||||
|  | ||||
|   RealD mass=0.003; | ||||
|   ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass);  | ||||
|   SchurStaggeredOperator<ImprovedStaggeredFermion5DR,FermionField> HermOp(Ds); | ||||
|  | ||||
|   ConjugateGradient<FermionField> CG(1.0e-8,10000); | ||||
|   int blockDim = 0; | ||||
|   BlockConjugateGradient<FermionField>    BCGrQ(BlockCGrQ,blockDim,1.0e-8,10000); | ||||
|   BlockConjugateGradient<FermionField>    BCG  (BlockCG,blockDim,1.0e-8,10000); | ||||
|   BlockConjugateGradient<FermionField>    mCG  (CGmultiRHS,blockDim,1.0e-8,10000); | ||||
|  | ||||
|   std::cout << GridLogMessage << "****************************************************************** "<<std::endl; | ||||
|   std::cout << GridLogMessage << " Calling 4d CG "<<std::endl; | ||||
|   std::cout << GridLogMessage << "****************************************************************** "<<std::endl; | ||||
|   ImprovedStaggeredFermionR Ds4d(Umu,Umu,*UGrid,*UrbGrid,mass); | ||||
|   SchurStaggeredOperator<ImprovedStaggeredFermionR,FermionField> HermOp4d(Ds4d); | ||||
|   FermionField src4d(UGrid); random(pRNG,src4d); | ||||
|   FermionField src4d_o(UrbGrid);   pickCheckerboard(Odd,src4d_o,src4d); | ||||
|   FermionField result4d_o(UrbGrid);  | ||||
|  | ||||
|   result4d_o=zero; | ||||
|   CG(HermOp4d,src4d_o,result4d_o); | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|  | ||||
|  | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|   std::cout << GridLogMessage << " Calling 5d CG for "<<Ls <<" right hand sides" <<std::endl; | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|   Ds.ZeroCounters(); | ||||
|   result_o=zero; | ||||
|   CG(HermOp,src_o,result_o); | ||||
|   Ds.Report(); | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|  | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|   std::cout << GridLogMessage << " Calling multiRHS CG for "<<Ls <<" right hand sides" <<std::endl; | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|   Ds.ZeroCounters(); | ||||
|   result_o=zero; | ||||
|   mCG(HermOp,src_o,result_o); | ||||
|   Ds.Report(); | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|  | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|   std::cout << GridLogMessage << " Calling Block CG for "<<Ls <<" right hand sides" <<std::endl; | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|   Ds.ZeroCounters(); | ||||
|   result_o=zero; | ||||
|   BCGrQ(HermOp,src_o,result_o); | ||||
|   Ds.Report(); | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|  | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
| @@ -48,7 +48,6 @@ struct scal { | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   typedef typename ImprovedStaggeredFermionR::FermionField FermionField;  | ||||
|   typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;  | ||||
|   typename ImprovedStaggeredFermionR::ImplParams params;  | ||||
|  | ||||
|   Grid_init(&argc,&argv); | ||||
| @@ -71,7 +70,7 @@ int main (int argc, char ** argv) | ||||
|     volume=volume*latt_size[mu]; | ||||
|   }   | ||||
|    | ||||
|   RealD mass=0.1; | ||||
|   RealD mass=0.003; | ||||
|   ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass); | ||||
|  | ||||
|   FermionField res_o(&RBGrid);  | ||||
| @@ -79,9 +78,14 @@ int main (int argc, char ** argv) | ||||
|   pickCheckerboard(Odd,src_o,src); | ||||
|   res_o=zero; | ||||
|  | ||||
|   SchurDiagMooeeOperator<ImprovedStaggeredFermionR,FermionField> HermOpEO(Ds); | ||||
|   SchurStaggeredOperator<ImprovedStaggeredFermionR,FermionField> HermOpEO(Ds); | ||||
|   ConjugateGradient<FermionField> CG(1.0e-8,10000); | ||||
|   CG(HermOpEO,src_o,res_o); | ||||
|  | ||||
|   FermionField tmp(&RBGrid); | ||||
|  | ||||
|   HermOpEO.Mpc(res_o,tmp); | ||||
|   std::cout << "check Mpc resid " << axpy_norm(tmp,-1.0,src_o,tmp)/norm2(src_o) << "\n"; | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
|   | ||||
							
								
								
									
										76
									
								
								tests/solver/Test_staggered_cg_schur.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								tests/solver/Test_staggered_cg_schur.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,76 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_wilson_cg_schur.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| template<class d> | ||||
| struct scal { | ||||
|   d internal; | ||||
| }; | ||||
|  | ||||
|   Gamma::Algebra Gmu [] = { | ||||
|     Gamma::Algebra::GammaX, | ||||
|     Gamma::Algebra::GammaY, | ||||
|     Gamma::Algebra::GammaZ, | ||||
|     Gamma::Algebra::GammaT | ||||
|   }; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   typedef typename ImprovedStaggeredFermionR::FermionField FermionField;  | ||||
|   typename ImprovedStaggeredFermionR::ImplParams params;  | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   std::vector<int> latt_size   = GridDefaultLatt(); | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|   GridCartesian               Grid(latt_size,simd_layout,mpi_layout); | ||||
|   GridRedBlackCartesian     RBGrid(&Grid); | ||||
|  | ||||
|   std::vector<int> seeds({1,2,3,4}); | ||||
|   GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds); | ||||
|  | ||||
|   LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); | ||||
|  | ||||
|   FermionField    src(&Grid); random(pRNG,src); | ||||
|   FermionField result(&Grid); result=zero; | ||||
|   FermionField  resid(&Grid);  | ||||
|  | ||||
|   RealD mass=0.1; | ||||
|   ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass); | ||||
|  | ||||
|   ConjugateGradient<FermionField> CG(1.0e-8,10000); | ||||
|   SchurRedBlackStaggeredSolve<FermionField> SchurSolver(CG); | ||||
|  | ||||
|   SchurSolver(Ds,src,result); | ||||
|    | ||||
|   Grid_finalize(); | ||||
| } | ||||
		Reference in New Issue
	
	Block a user