mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-25 18:19:34 +01:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			0.8.2
			...
			feature/np
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | ac1d655de8 | 
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -114,4 +114,3 @@ gh-pages/ | ||||
| ##################### | ||||
| Grid/qcd/spin/gamma-gen/*.h | ||||
| Grid/qcd/spin/gamma-gen/*.cc | ||||
| Grid/util/Version.h | ||||
|   | ||||
| @@ -1,5 +0,0 @@ | ||||
| Version : 0.8.0 | ||||
|  | ||||
| - Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended | ||||
| - MPI and MPI3 comms optimisations for KNL and OPA finished | ||||
| - Half precision comms | ||||
|   | ||||
| @@ -42,7 +42,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/GridQCDcore.h> | ||||
| #include <Grid/qcd/action/Action.h> | ||||
| #include <Grid/qcd/utils/GaugeFix.h> | ||||
| #include <Grid/qcd/utils/CovariantSmearing.h> | ||||
| #include <Grid/qcd/smearing/Smearing.h> | ||||
| #include <Grid/parallelIO/MetaData.h> | ||||
| #include <Grid/qcd/hmc/HMC_aggregate.h> | ||||
|   | ||||
| @@ -48,16 +48,14 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | ||||
| #include <Grid/algorithms/iterative/BlockConjugateGradient.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h> | ||||
| #include <Grid/algorithms/iterative/MinimalResidual.h> | ||||
| #include <Grid/algorithms/iterative/GeneralisedMinimalResidual.h> | ||||
| #include <Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h> | ||||
| #include <Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h> | ||||
| #include <Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h> | ||||
| #include <Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h> | ||||
| #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | ||||
| #include <Grid/algorithms/iterative/PowerMethod.h> | ||||
|  | ||||
| #include <Grid/algorithms/CoarsenedMatrix.h> | ||||
| #include <Grid/algorithms/FFT.h> | ||||
|  | ||||
| // EigCg | ||||
| // Pcg | ||||
| // Hdcg | ||||
| // GCR | ||||
| // etc.. | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -211,7 +211,6 @@ namespace Grid { | ||||
|  | ||||
|       for(int b=0;b<nn;b++){ | ||||
| 	 | ||||
| 	subspace[b] = zero; | ||||
| 	gaussian(RNG,noise); | ||||
| 	scale = std::pow(norm2(noise),-0.5);  | ||||
| 	noise=noise*scale; | ||||
| @@ -297,57 +296,12 @@ namespace Grid { | ||||
|     }; | ||||
|  | ||||
|     RealD Mdag (const CoarseVector &in, CoarseVector &out){  | ||||
|       // // corresponds to Petrov-Galerkin coarsening | ||||
|       // return M(in,out); | ||||
|  | ||||
|       // corresponds to Galerkin coarsening | ||||
|       CoarseVector tmp(Grid()); | ||||
|       G5C(tmp, in); | ||||
|       M(tmp, out); | ||||
|       G5C(out, out); | ||||
|       return norm2(out); | ||||
|       return M(in,out); | ||||
|     }; | ||||
|  | ||||
|     void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){ | ||||
|  | ||||
|       conformable(_grid,in._grid); | ||||
|       conformable(in._grid,out._grid); | ||||
|  | ||||
|       SimpleCompressor<siteVector> compressor; | ||||
|       Stencil.HaloExchange(in,compressor); | ||||
|  | ||||
|       auto point = [dir, disp](){ | ||||
|         if(dir == 0 and disp == 0) | ||||
|           return 8; | ||||
|         else | ||||
|           return (4 * dir + 1 - disp) / 2; | ||||
|       }(); | ||||
|  | ||||
|       parallel_for(int ss=0;ss<Grid()->oSites();ss++){ | ||||
|         siteVector res = zero; | ||||
|         siteVector nbr; | ||||
|         int ptype; | ||||
|         StencilEntry *SE; | ||||
|  | ||||
|         SE=Stencil.GetEntry(ptype,point,ss); | ||||
|  | ||||
|         if(SE->_is_local&&SE->_permute) { | ||||
|           permute(nbr,in._odata[SE->_offset],ptype); | ||||
|         } else if(SE->_is_local) { | ||||
|           nbr = in._odata[SE->_offset]; | ||||
|         } else { | ||||
|           nbr = Stencil.CommBuf()[SE->_offset]; | ||||
|         } | ||||
|  | ||||
|         res = res + A[point]._odata[ss]*nbr; | ||||
|  | ||||
|         vstream(out._odata[ss],res); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     void Mdiag(const CoarseVector &in, CoarseVector &out){ | ||||
|       Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil | ||||
|     }; | ||||
|     // Defer support for further coarsening for now | ||||
|     void Mdiag    (const CoarseVector &in,  CoarseVector &out){}; | ||||
|     void Mdir     (const CoarseVector &in,  CoarseVector &out,int dir, int disp){}; | ||||
|  | ||||
|     CoarsenedMatrix(GridCartesian &CoarseGrid) 	:  | ||||
|  | ||||
| @@ -463,7 +417,7 @@ namespace Grid { | ||||
|       std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl; | ||||
| #endif | ||||
|       //      ForceHermitian(); | ||||
|       // AssertHermitian(); | ||||
|       AssertHermitian(); | ||||
|       // ForceDiagonal(); | ||||
|     } | ||||
|     void ForceDiagonal(void) { | ||||
|   | ||||
| @@ -212,8 +212,9 @@ namespace Grid { | ||||
|     }; | ||||
|     template<class Matrix,class Field> | ||||
|       class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> { | ||||
|     public: | ||||
|     protected: | ||||
|       Matrix &_Mat; | ||||
|     public: | ||||
|       SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; | ||||
|       virtual  RealD Mpc      (const Field &in, Field &out) { | ||||
|       Field tmp(in._grid); | ||||
| @@ -379,12 +380,6 @@ namespace Grid { | ||||
|     template<class Field> class OperatorFunction { | ||||
|     public: | ||||
|       virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0; | ||||
|       virtual void operator() (LinearOperatorBase<Field> &Linop, const std::vector<Field> &in,std::vector<Field> &out) { | ||||
| 	assert(in.size()==out.size()); | ||||
| 	for(int k=0;k<in.size();k++){ | ||||
| 	  (*this)(Linop,in[k],out[k]); | ||||
| 	} | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|     template<class Field> class LinearFunction { | ||||
|   | ||||
| @@ -55,14 +55,6 @@ namespace Grid { | ||||
|     template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> { | ||||
|     public: | ||||
|       virtual GridBase *RedBlackGrid(void)=0; | ||||
|  | ||||
|       ////////////////////////////////////////////////////////////////////// | ||||
|       // Query the even even properties to make algorithmic decisions | ||||
|       ////////////////////////////////////////////////////////////////////// | ||||
|       virtual RealD  Mass(void)        { return 0.0; }; | ||||
|       virtual int    ConstEE(void)     { return 1; }; // Disable assumptions unless overridden | ||||
|       virtual int    isTrivialEE(void) { return 0; }; // by a derived class that knows better | ||||
|  | ||||
|       // half checkerboard operaions | ||||
|       virtual  void Meooe    (const Field &in, Field &out)=0; | ||||
|       virtual  void Mooee    (const Field &in, Field &out)=0; | ||||
|   | ||||
| @@ -33,7 +33,7 @@ directory | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS, BlockCGVec, BlockCGrQVec }; | ||||
| enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // Block conjugate gradient. Dimension zero should be the block direction | ||||
| @@ -42,6 +42,7 @@ template <class Field> | ||||
| class BlockConjugateGradient : public OperatorFunction<Field> { | ||||
|  public: | ||||
|  | ||||
|  | ||||
|   typedef typename Field::scalar_type scomplex; | ||||
|  | ||||
|   int blockDim ; | ||||
| @@ -53,15 +54,21 @@ class BlockConjugateGradient : public OperatorFunction<Field> { | ||||
|   RealD Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||
|   Integer PrintInterval; //GridLogMessages or Iterative | ||||
|    | ||||
|   BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||
|     : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100) | ||||
|     : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) | ||||
|   {}; | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Thin QR factorisation (google it) | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| void ThinQRfact (Eigen::MatrixXcd &m_rr, | ||||
| 		 Eigen::MatrixXcd &C, | ||||
| 		 Eigen::MatrixXcd &Cinv, | ||||
| 		 Field & Q, | ||||
| 		 const Field & R) | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   //Dimensions | ||||
|   // R_{ferm x Nblock} =  Q_{ferm x Nblock} x  C_{Nblock x Nblock} -> ferm x Nblock | ||||
| @@ -78,20 +85,22 @@ class BlockConjugateGradient : public OperatorFunction<Field> { | ||||
|   // Cdag C = Rdag R ; passes. | ||||
|   // QdagQ  = 1      ; passes | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| void ThinQRfact (Eigen::MatrixXcd &m_rr, | ||||
| 		 Eigen::MatrixXcd &C, | ||||
| 		 Eigen::MatrixXcd &Cinv, | ||||
| 		 Field & Q, | ||||
| 		 const Field & R) | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||
|  | ||||
|   // Force manifest hermitian to avoid rounding related | ||||
|   m_rr = 0.5*(m_rr+m_rr.adjoint()); | ||||
|  | ||||
|   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  | ||||
| #if 0 | ||||
|   std::cout << " Calling Cholesky  ldlt on m_rr "  << m_rr <<std::endl; | ||||
|   Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL();  | ||||
|   std::cout << " Called Cholesky  ldlt on m_rr "  << L_ldlt <<std::endl; | ||||
|   auto  D_ldlt = m_rr.ldlt().vectorD();  | ||||
|   std::cout << " Called Cholesky  ldlt on m_rr "  << D_ldlt <<std::endl; | ||||
| #endif | ||||
|  | ||||
|   //  std::cout << " Calling Cholesky  llt on m_rr "  <<std::endl; | ||||
|   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  | ||||
|   //  std::cout << " Called Cholesky  llt on m_rr "  << L <<std::endl; | ||||
|   C    = L.adjoint(); | ||||
|   Cinv = C.inverse(); | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -103,25 +112,6 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr, | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   sliceMulMatrix(Q,Cinv,R,Orthog); | ||||
| } | ||||
| // see comments above | ||||
| void ThinQRfact (Eigen::MatrixXcd &m_rr, | ||||
| 		 Eigen::MatrixXcd &C, | ||||
| 		 Eigen::MatrixXcd &Cinv, | ||||
| 		 std::vector<Field> & Q, | ||||
| 		 const std::vector<Field> & R) | ||||
| { | ||||
|   InnerProductMatrix(m_rr,R,R); | ||||
|  | ||||
|   m_rr = 0.5*(m_rr+m_rr.adjoint()); | ||||
|  | ||||
|   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  | ||||
|  | ||||
|   C    = L.adjoint(); | ||||
|   Cinv = C.inverse(); | ||||
|  | ||||
|   MulMatrix(Q,Cinv,R); | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Call one of several implementations | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -129,20 +119,14 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
| { | ||||
|   if ( CGtype == BlockCGrQ ) { | ||||
|     BlockCGrQsolve(Linop,Src,Psi); | ||||
|   } else if (CGtype == BlockCG ) { | ||||
|     BlockCGsolve(Linop,Src,Psi); | ||||
|   } else if (CGtype == CGmultiRHS ) { | ||||
|     CGmultiRHSsolve(Linop,Src,Psi); | ||||
|   } else { | ||||
|     assert(0); | ||||
|   } | ||||
| } | ||||
| virtual void operator()(LinearOperatorBase<Field> &Linop, const std::vector<Field> &Src, std::vector<Field> &Psi)  | ||||
| { | ||||
|   if ( CGtype == BlockCGrQVec ) { | ||||
|     BlockCGrQsolveVec(Linop,Src,Psi); | ||||
|   } else { | ||||
|     assert(0); | ||||
|   } | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////// | ||||
| // BlockCGrQ implementation: | ||||
| @@ -155,8 +139,7 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X) | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   Nblock = B._grid->_fdimensions[Orthog]; | ||||
| /* FAKE */ | ||||
|   Nblock=8; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||
|  | ||||
|   X.checkerboard = B.checkerboard; | ||||
| @@ -219,10 +202,15 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X) | ||||
|   std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl; | ||||
|  | ||||
|   //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||
|  | ||||
|   Linop.HermOp(X, AD); | ||||
|   tmp = B - AD;   | ||||
|  | ||||
|   //std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl; | ||||
|   ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); | ||||
|   //std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl; | ||||
|   //std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl; | ||||
|   //std::cout << GridLogMessage << " m_C " << m_C<<std::endl; | ||||
|   //std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl; | ||||
|   D=Q; | ||||
|  | ||||
|   std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl; | ||||
| @@ -244,12 +232,14 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X) | ||||
|     MatrixTimer.Start(); | ||||
|     Linop.HermOp(D, Z);       | ||||
|     MatrixTimer.Stop(); | ||||
|     //std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl; | ||||
|  | ||||
|     //4. M  = [D^dag Z]^{-1} | ||||
|     sliceInnerTimer.Start(); | ||||
|     sliceInnerProductMatrix(m_DZ,D,Z,Orthog); | ||||
|     sliceInnerTimer.Stop(); | ||||
|     m_M       = m_DZ.inverse(); | ||||
|     //std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl; | ||||
|      | ||||
|     //5. X  = X + D MC | ||||
|     m_tmp     = m_M * m_C; | ||||
| @@ -267,7 +257,6 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X) | ||||
|      | ||||
|     //7. D  = Q + D S^dag | ||||
|     m_tmp = m_S.adjoint(); | ||||
|  | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(D,m_tmp,D,Q,Orthog); | ||||
|     sliceMaddTimer.Stop(); | ||||
| @@ -328,6 +317,152 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X) | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // Block conjugate gradient; Original O'Leary Dimension zero should be the block direction | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   Nblock = Src._grid->_fdimensions[Orthog]; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||
|  | ||||
|   Psi.checkerboard = Src.checkerboard; | ||||
|   conformable(Psi, Src); | ||||
|  | ||||
|   Field P(Src); | ||||
|   Field AP(Src); | ||||
|   Field R(Src); | ||||
|    | ||||
|   Eigen::MatrixXcd m_pAp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
|   Eigen::MatrixXcd m_alpha      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_beta   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
|   // Initial residual computation & set up | ||||
|   std::vector<RealD> residuals(Nblock); | ||||
|   std::vector<RealD> ssq(Nblock); | ||||
|  | ||||
|   sliceNorm(ssq,Src,Orthog); | ||||
|   RealD sssum=0; | ||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||
|  | ||||
|   sliceNorm(residuals,Src,Orthog); | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   sliceNorm(residuals,Psi,Orthog); | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   // Initial search dir is guess | ||||
|   Linop.HermOp(Psi, AP); | ||||
|    | ||||
|  | ||||
|   /************************************************************************ | ||||
|    * Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980) | ||||
|    ************************************************************************ | ||||
|    * O'Leary : R = B - A X | ||||
|    * O'Leary : P = M R ; preconditioner M = 1 | ||||
|    * O'Leary : alpha = PAP^{-1} RMR | ||||
|    * O'Leary : beta  = RMR^{-1}_old RMR_new | ||||
|    * O'Leary : X=X+Palpha | ||||
|    * O'Leary : R_new=R_old-AP alpha | ||||
|    * O'Leary : P=MR_new+P beta | ||||
|    */ | ||||
|  | ||||
|   R = Src - AP;   | ||||
|   P = R; | ||||
|   sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||
|  | ||||
|   GridStopWatch sliceInnerTimer; | ||||
|   GridStopWatch sliceMaddTimer; | ||||
|   GridStopWatch MatrixTimer; | ||||
|   GridStopWatch SolverTimer; | ||||
|   SolverTimer.Start(); | ||||
|  | ||||
|   int k; | ||||
|   for (k = 1; k <= MaxIterations; k++) { | ||||
|  | ||||
|     RealD rrsum=0; | ||||
|     for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b)); | ||||
|  | ||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum | ||||
| 	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl; | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     Linop.HermOp(P, AP); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     // Alpha | ||||
|     sliceInnerTimer.Start(); | ||||
|     sliceInnerProductMatrix(m_pAp,P,AP,Orthog); | ||||
|     sliceInnerTimer.Stop(); | ||||
|     m_pAp_inv = m_pAp.inverse(); | ||||
|     m_alpha   = m_pAp_inv * m_rr ; | ||||
|  | ||||
|     // Psi, R update | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog);     // add alpha *  P to psi | ||||
|     sliceMaddMatrix(R  ,m_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid | ||||
|     sliceMaddTimer.Stop(); | ||||
|  | ||||
|     // Beta | ||||
|     m_rr_inv = m_rr.inverse(); | ||||
|     sliceInnerTimer.Start(); | ||||
|     sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||
|     sliceInnerTimer.Stop(); | ||||
|     m_beta = m_rr_inv *m_rr; | ||||
|  | ||||
|     // Search update | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(AP,m_beta,P,R,Orthog); | ||||
|     sliceMaddTimer.Stop(); | ||||
|     P= AP; | ||||
|  | ||||
|     /********************* | ||||
|      * convergence monitor | ||||
|      ********************* | ||||
|      */ | ||||
|     RealD max_resid=0; | ||||
|     RealD rr; | ||||
|     for(int b=0;b<Nblock;b++){ | ||||
|       rr = real(m_rr(b,b))/ssq[b]; | ||||
|       if ( rr > max_resid ) max_resid = rr; | ||||
|     } | ||||
|      | ||||
|     if ( max_resid < Tolerance*Tolerance ) {  | ||||
|  | ||||
|       SolverTimer.Stop(); | ||||
|  | ||||
|       std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; | ||||
|       for(int b=0;b<Nblock;b++){ | ||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | ||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||
|       } | ||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||
|  | ||||
|       Linop.HermOp(Psi, AP); | ||||
|       AP = AP-Src; | ||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||
|  | ||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||
| 	     | ||||
|       IterationsToComplete = k; | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|   } | ||||
|   std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl; | ||||
|  | ||||
|   if (ErrorOnNoConverge) assert(0); | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // multiRHS conjugate gradient. Dimension zero should be the block direction | ||||
| // Use this for spread out across nodes | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| @@ -465,233 +600,6 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field & | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
|  | ||||
| void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y){ | ||||
|   for(int b=0;b<Nblock;b++){ | ||||
|   for(int bp=0;bp<Nblock;bp++) { | ||||
|     m(b,bp) = innerProduct(X[b],Y[bp]);   | ||||
|   }} | ||||
| } | ||||
| void MaddMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X,const std::vector<Field> &Y,RealD scale=1.0){ | ||||
|   // Should make this cache friendly with site outermost, parallel_for | ||||
|   // Deal with case AP aliases with either Y or X | ||||
|   std::vector<Field> tmp(Nblock,X[0]); | ||||
|   for(int b=0;b<Nblock;b++){ | ||||
|     tmp[b]   = Y[b]; | ||||
|     for(int bp=0;bp<Nblock;bp++) { | ||||
|       tmp[b] = tmp[b] + (scale*m(bp,b))*X[bp];  | ||||
|     } | ||||
|   } | ||||
|   for(int b=0;b<Nblock;b++){ | ||||
|     AP[b] = tmp[b]; | ||||
|   } | ||||
| } | ||||
| void MulMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X){ | ||||
|   // Should make this cache friendly with site outermost, parallel_for | ||||
|   for(int b=0;b<Nblock;b++){ | ||||
|     AP[b] = zero; | ||||
|     for(int bp=0;bp<Nblock;bp++) { | ||||
|       AP[b] += (m(bp,b))*X[bp];  | ||||
|     } | ||||
|   } | ||||
| } | ||||
| double normv(const std::vector<Field> &P){ | ||||
|   double nn = 0.0; | ||||
|   for(int b=0;b<Nblock;b++) { | ||||
|     nn+=norm2(P[b]); | ||||
|   } | ||||
|   return nn; | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////// | ||||
| // BlockCGrQvec implementation: | ||||
| //-------------------------- | ||||
| // X is guess/Solution | ||||
| // B is RHS | ||||
| // Solve A X_i = B_i    ;        i refers to Nblock index | ||||
| //////////////////////////////////////////////////////////////////////////// | ||||
| void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field> &B, std::vector<Field> &X)  | ||||
| { | ||||
|   Nblock = B.size(); | ||||
|   assert(Nblock == X.size()); | ||||
|  | ||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl; | ||||
|  | ||||
|   for(int b=0;b<Nblock;b++){  | ||||
|     X[b].checkerboard = B[b].checkerboard; | ||||
|     conformable(X[b], B[b]); | ||||
|     conformable(X[b], X[0]);  | ||||
|   } | ||||
|  | ||||
|   Field Fake(B[0]); | ||||
|  | ||||
|   std::vector<Field> tmp(Nblock,Fake); | ||||
|   std::vector<Field>   Q(Nblock,Fake); | ||||
|   std::vector<Field>   D(Nblock,Fake); | ||||
|   std::vector<Field>   Z(Nblock,Fake); | ||||
|   std::vector<Field>  AD(Nblock,Fake); | ||||
|  | ||||
|   Eigen::MatrixXcd m_DZ     = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_M      = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
|   Eigen::MatrixXcd m_C      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_Cinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_S      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_Sinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
|   Eigen::MatrixXcd m_tmp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_tmp1   = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|  | ||||
|   // Initial residual computation & set up | ||||
|   std::vector<RealD> residuals(Nblock); | ||||
|   std::vector<RealD> ssq(Nblock); | ||||
|  | ||||
|   RealD sssum=0; | ||||
|   for(int b=0;b<Nblock;b++){ ssq[b] = norm2(B[b]);} | ||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||
|  | ||||
|   for(int b=0;b<Nblock;b++){ residuals[b] = norm2(B[b]);} | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   for(int b=0;b<Nblock;b++){ residuals[b] = norm2(X[b]);} | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   /************************************************************************ | ||||
|    * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001) | ||||
|    ************************************************************************ | ||||
|    * Dimensions: | ||||
|    * | ||||
|    *   X,B==(Nferm x Nblock) | ||||
|    *   A==(Nferm x Nferm) | ||||
|    *   | ||||
|    * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site | ||||
|    *  | ||||
|    * QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||
|    * for k:  | ||||
|    *   Z  = AD | ||||
|    *   M  = [D^dag Z]^{-1} | ||||
|    *   X  = X + D MC | ||||
|    *   QS = Q - ZM | ||||
|    *   D  = Q + D S^dag | ||||
|    *   C  = S C | ||||
|    */ | ||||
|   /////////////////////////////////////// | ||||
|   // Initial block: initial search dir is guess | ||||
|   /////////////////////////////////////// | ||||
|   std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " <<std::endl; | ||||
|  | ||||
|   //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||
|   for(int b=0;b<Nblock;b++) { | ||||
|     Linop.HermOp(X[b], AD[b]); | ||||
|     tmp[b] = B[b] - AD[b];   | ||||
|   } | ||||
|  | ||||
|   ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); | ||||
|  | ||||
|   for(int b=0;b<Nblock;b++) D[b]=Q[b]; | ||||
|  | ||||
|   std::cout << GridLogMessage<<"BlockCGrQ vec computed initial residual and QR fact " <<std::endl; | ||||
|  | ||||
|   /////////////////////////////////////// | ||||
|   // Timers | ||||
|   /////////////////////////////////////// | ||||
|   GridStopWatch sliceInnerTimer; | ||||
|   GridStopWatch sliceMaddTimer; | ||||
|   GridStopWatch QRTimer; | ||||
|   GridStopWatch MatrixTimer; | ||||
|   GridStopWatch SolverTimer; | ||||
|   SolverTimer.Start(); | ||||
|  | ||||
|   int k; | ||||
|   for (k = 1; k <= MaxIterations; k++) { | ||||
|  | ||||
|     //3. Z  = AD | ||||
|     MatrixTimer.Start(); | ||||
|     for(int b=0;b<Nblock;b++) Linop.HermOp(D[b], Z[b]);       | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     //4. M  = [D^dag Z]^{-1} | ||||
|     sliceInnerTimer.Start(); | ||||
|     InnerProductMatrix(m_DZ,D,Z); | ||||
|     sliceInnerTimer.Stop(); | ||||
|     m_M       = m_DZ.inverse(); | ||||
|      | ||||
|     //5. X  = X + D MC | ||||
|     m_tmp     = m_M * m_C; | ||||
|     sliceMaddTimer.Start(); | ||||
|     MaddMatrix(X,m_tmp, D,X);      | ||||
|     sliceMaddTimer.Stop(); | ||||
|  | ||||
|     //6. QS = Q - ZM | ||||
|     sliceMaddTimer.Start(); | ||||
|     MaddMatrix(tmp,m_M,Z,Q,-1.0); | ||||
|     sliceMaddTimer.Stop(); | ||||
|     QRTimer.Start(); | ||||
|     ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp); | ||||
|     QRTimer.Stop(); | ||||
|      | ||||
|     //7. D  = Q + D S^dag | ||||
|     m_tmp = m_S.adjoint(); | ||||
|     sliceMaddTimer.Start(); | ||||
|     MaddMatrix(D,m_tmp,D,Q); | ||||
|     sliceMaddTimer.Stop(); | ||||
|  | ||||
|     //8. C  = S C | ||||
|     m_C = m_S*m_C; | ||||
|      | ||||
|     /********************* | ||||
|      * convergence monitor | ||||
|      ********************* | ||||
|      */ | ||||
|     m_rr = m_C.adjoint() * m_C; | ||||
|  | ||||
|     RealD max_resid=0; | ||||
|     RealD rrsum=0; | ||||
|     RealD rr; | ||||
|  | ||||
|     for(int b=0;b<Nblock;b++) { | ||||
|       rrsum+=real(m_rr(b,b)); | ||||
|       rr = real(m_rr(b,b))/ssq[b]; | ||||
|       if ( rr > max_resid ) max_resid = rr; | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl; | ||||
|  | ||||
|     if ( max_resid < Tolerance*Tolerance ) {  | ||||
|  | ||||
|       SolverTimer.Stop(); | ||||
|  | ||||
|       std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl; | ||||
|  | ||||
|       for(int b=0;b<Nblock;b++){ | ||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||
|       } | ||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||
|  | ||||
|       for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]); | ||||
|       for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b]; | ||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl; | ||||
|  | ||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed()  <<std::endl; | ||||
| 	     | ||||
|       IterationsToComplete = k; | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|   } | ||||
|   std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl; | ||||
|  | ||||
|   if (ErrorOnNoConverge) assert(0); | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| }; | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -1,244 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h | ||||
|  | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Daniel Richtmann <daniel.richtmann@ur.de> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H | ||||
| #define GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| template<class Field> | ||||
| class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> { | ||||
|  public: | ||||
|   bool ErrorOnNoConverge; // Throw an assert when CAGMRES fails to converge, | ||||
|                           // defaults to true | ||||
|  | ||||
|   RealD   Tolerance; | ||||
|  | ||||
|   Integer MaxIterations; | ||||
|   Integer RestartLength; | ||||
|   Integer MaxNumberOfRestarts; | ||||
|   Integer IterationCount; // Number of iterations the CAGMRES took to finish, | ||||
|                           // filled in upon completion | ||||
|  | ||||
|   GridStopWatch MatrixTimer; | ||||
|   GridStopWatch LinalgTimer; | ||||
|   GridStopWatch QrTimer; | ||||
|   GridStopWatch CompSolutionTimer; | ||||
|  | ||||
|   Eigen::MatrixXcd H; | ||||
|  | ||||
|   std::vector<std::complex<double>> y; | ||||
|   std::vector<std::complex<double>> gamma; | ||||
|   std::vector<std::complex<double>> c; | ||||
|   std::vector<std::complex<double>> s; | ||||
|  | ||||
|   CommunicationAvoidingGeneralisedMinimalResidual(RealD   tol, | ||||
|                                                   Integer maxit, | ||||
|                                                   Integer restart_length, | ||||
|                                                   bool    err_on_no_conv = true) | ||||
|       : Tolerance(tol) | ||||
|       , MaxIterations(maxit) | ||||
|       , RestartLength(restart_length) | ||||
|       , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1)) | ||||
|       , ErrorOnNoConverge(err_on_no_conv) | ||||
|       , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base | ||||
|       , y(RestartLength + 1, 0.) | ||||
|       , gamma(RestartLength + 1, 0.) | ||||
|       , c(RestartLength + 1, 0.) | ||||
|       , s(RestartLength + 1, 0.) {}; | ||||
|  | ||||
|   void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) { | ||||
|  | ||||
|     std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular GMRES" << std::endl; | ||||
|  | ||||
|     psi.checkerboard = src.checkerboard; | ||||
|     conformable(psi, src); | ||||
|  | ||||
|     RealD guess = norm2(psi); | ||||
|     assert(std::isnan(guess) == 0); | ||||
|  | ||||
|     RealD cp; | ||||
|     RealD ssq = norm2(src); | ||||
|     RealD rsq = Tolerance * Tolerance * ssq; | ||||
|  | ||||
|     Field r(src._grid); | ||||
|  | ||||
|     std::cout << std::setprecision(4) << std::scientific; | ||||
|     std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl; | ||||
|  | ||||
|     MatrixTimer.Reset(); | ||||
|     LinalgTimer.Reset(); | ||||
|     QrTimer.Reset(); | ||||
|     CompSolutionTimer.Reset(); | ||||
|  | ||||
|     GridStopWatch SolverTimer; | ||||
|     SolverTimer.Start(); | ||||
|  | ||||
|     IterationCount = 0; | ||||
|  | ||||
|     for (int k=0; k<MaxNumberOfRestarts; k++) { | ||||
|  | ||||
|       cp = outerLoopBody(LinOp, src, psi, rsq); | ||||
|  | ||||
|       // Stopping condition | ||||
|       if (cp <= rsq) { | ||||
|  | ||||
|         SolverTimer.Stop(); | ||||
|  | ||||
|         LinOp.Op(psi,r); | ||||
|         axpy(r,-1.0,src,r); | ||||
|  | ||||
|         RealD srcnorm       = sqrt(ssq); | ||||
|         RealD resnorm       = sqrt(norm2(r)); | ||||
|         RealD true_residual = resnorm / srcnorm; | ||||
|  | ||||
|         std::cout << GridLogMessage        << "CommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount | ||||
|                   << " computed residual " << sqrt(cp / ssq) | ||||
|                   << " true residual "     << true_residual | ||||
|                   << " target "            << Tolerance << std::endl; | ||||
|  | ||||
|         std::cout << GridLogMessage << "CAGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "CAGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "CAGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "CAGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "CAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl; | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl; | ||||
|  | ||||
|     if (ErrorOnNoConverge) | ||||
|       assert(0); | ||||
|   } | ||||
|  | ||||
|   RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) { | ||||
|  | ||||
|     RealD cp = 0; | ||||
|  | ||||
|     Field w(src._grid); | ||||
|     Field r(src._grid); | ||||
|  | ||||
|     // this should probably be made a class member so that it is only allocated once, not in every restart | ||||
|     std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero; | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     LinOp.Op(psi, w); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     LinalgTimer.Start(); | ||||
|     r = src - w; | ||||
|  | ||||
|     gamma[0] = sqrt(norm2(r)); | ||||
|  | ||||
|     v[0] = (1. / gamma[0]) * r; | ||||
|     LinalgTimer.Stop(); | ||||
|  | ||||
|     for (int i=0; i<RestartLength; i++) { | ||||
|  | ||||
|       IterationCount++; | ||||
|  | ||||
|       arnoldiStep(LinOp, v, w, i); | ||||
|  | ||||
|       qrUpdate(i); | ||||
|  | ||||
|       cp = std::norm(gamma[i+1]); | ||||
|  | ||||
|       std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount | ||||
|                 << " residual " << cp << " target " << rsq << std::endl; | ||||
|  | ||||
|       if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) { | ||||
|  | ||||
|         computeSolution(v, psi, i); | ||||
|  | ||||
|         return cp; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     assert(0); // Never reached | ||||
|     return cp; | ||||
|   } | ||||
|  | ||||
|   void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) { | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     LinOp.Op(v[iter], w); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     LinalgTimer.Start(); | ||||
|     for (int i = 0; i <= iter; ++i) { | ||||
|       H(iter, i) = innerProduct(v[i], w); | ||||
|       w = w - H(iter, i) * v[i]; | ||||
|     } | ||||
|  | ||||
|     H(iter, iter + 1) = sqrt(norm2(w)); | ||||
|     v[iter + 1] = (1. / H(iter, iter + 1)) * w; | ||||
|     LinalgTimer.Stop(); | ||||
|   } | ||||
|  | ||||
|   void qrUpdate(int iter) { | ||||
|  | ||||
|     QrTimer.Start(); | ||||
|     for (int i = 0; i < iter ; ++i) { | ||||
|       auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1); | ||||
|       H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1); | ||||
|       H(iter, i + 1) = tmp; | ||||
|     } | ||||
|  | ||||
|     // Compute new Givens Rotation | ||||
|     ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); | ||||
|     c[iter]     = H(iter, iter) / nu; | ||||
|     s[iter]     = H(iter, iter + 1) / nu; | ||||
|  | ||||
|     // Apply new Givens rotation | ||||
|     H(iter, iter)     = nu; | ||||
|     H(iter, iter + 1) = 0.; | ||||
|  | ||||
|     gamma[iter + 1] = -s[iter] * gamma[iter]; | ||||
|     gamma[iter]     = std::conj(c[iter]) * gamma[iter]; | ||||
|     QrTimer.Stop(); | ||||
|   } | ||||
|  | ||||
|   void computeSolution(std::vector<Field> const &v, Field &psi, int iter) { | ||||
|  | ||||
|     CompSolutionTimer.Start(); | ||||
|     for (int i = iter; i >= 0; i--) { | ||||
|       y[i] = gamma[i]; | ||||
|       for (int k = i + 1; k <= iter; k++) | ||||
|         y[i] = y[i] - H(k, i) * y[k]; | ||||
|       y[i] = y[i] / H(i, i); | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; i <= iter; i++) | ||||
|       psi = psi + v[i] * y[i]; | ||||
|     CompSolutionTimer.Stop(); | ||||
|   } | ||||
| }; | ||||
| } | ||||
| #endif | ||||
| @@ -89,8 +89,6 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|  | ||||
|     // Check if guess is really REALLY good :) | ||||
|     if (cp <= rsq) { | ||||
|       std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl; | ||||
|       IterationsToComplete = 0;	 | ||||
|       return; | ||||
|     } | ||||
|  | ||||
| @@ -106,7 +104,7 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|  | ||||
|     SolverTimer.Start(); | ||||
|     int k; | ||||
|     for (k = 1; k <= MaxIterations; k++) { | ||||
|     for (k = 1; k <= MaxIterations*1000; k++) { | ||||
|       c = cp; | ||||
|  | ||||
|       MatrixTimer.Start(); | ||||
| @@ -135,7 +133,7 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|       LinalgTimer.Stop(); | ||||
|  | ||||
|       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k | ||||
|                 << " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl; | ||||
|                 << " residual " << cp << " target " << rsq << std::endl; | ||||
|  | ||||
|       // Stopping condition | ||||
|       if (cp <= rsq) { | ||||
| @@ -152,13 +150,13 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
| 	std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl; | ||||
|  | ||||
|         std::cout << GridLogMessage << "Time breakdown "<<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tInner      " << InnerTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl; | ||||
|         std::cout << GridLogPerformance << "Time breakdown "<<std::endl; | ||||
| 	std::cout << GridLogPerformance << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogPerformance << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogPerformance << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogPerformance << "\tInner      " << InnerTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogPerformance << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogPerformance << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl; | ||||
|  | ||||
|         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); | ||||
|  | ||||
| @@ -167,7 +165,8 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|     std::cout << GridLogMessage << "ConjugateGradient did NOT converge "<<k<<" / "<< MaxIterations<< std::endl; | ||||
|     std::cout << GridLogMessage << "ConjugateGradient did NOT converge" | ||||
|               << std::endl; | ||||
|  | ||||
|     if (ErrorOnNoConverge) assert(0); | ||||
|     IterationsToComplete = k; | ||||
|   | ||||
| @@ -30,11 +30,8 @@ Author: Christopher Kelly <ckelly@phys.columbia.edu> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|  | ||||
|   //Mixed precision restarted defect correction CG | ||||
|   template<class FieldD,class FieldF,  | ||||
|     typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, | ||||
|     typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>  | ||||
|   template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>  | ||||
|   class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> { | ||||
|   public:                                                 | ||||
|     RealD   Tolerance; | ||||
| @@ -53,12 +50,7 @@ namespace Grid { | ||||
|     //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess | ||||
|     LinearFunction<FieldF> *guesser; | ||||
|      | ||||
|     MixedPrecisionConjugateGradient(RealD tol,  | ||||
| 				    Integer maxinnerit,  | ||||
| 				    Integer maxouterit,  | ||||
| 				    GridBase* _sp_grid,  | ||||
| 				    LinearOperatorBase<FieldF> &_Linop_f,  | ||||
| 				    LinearOperatorBase<FieldD> &_Linop_d) : | ||||
|     MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) : | ||||
|       Linop_f(_Linop_f), Linop_d(_Linop_d), | ||||
|       Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), | ||||
|       OuterLoopNormMult(100.), guesser(NULL){ }; | ||||
| @@ -157,8 +149,6 @@ namespace Grid { | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|  | ||||
|  | ||||
| } | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -35,11 +35,7 @@ class ZeroGuesser: public LinearFunction<Field> { | ||||
| public: | ||||
|   virtual void operator()(const Field &src, Field &guess) { guess = zero; }; | ||||
| }; | ||||
| template<class Field> | ||||
| class DoNothingGuesser: public LinearFunction<Field> { | ||||
| public: | ||||
|   virtual void operator()(const Field &src, Field &guess) {  }; | ||||
| }; | ||||
|  | ||||
| template<class Field> | ||||
| class SourceGuesser: public LinearFunction<Field> { | ||||
| public: | ||||
|   | ||||
| @@ -1,256 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h | ||||
|  | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Daniel Richtmann <daniel.richtmann@ur.de> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H | ||||
| #define GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| template<class Field> | ||||
| class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> { | ||||
|  public: | ||||
|   bool ErrorOnNoConverge; // Throw an assert when FCAGMRES fails to converge, | ||||
|                           // defaults to true | ||||
|  | ||||
|   RealD   Tolerance; | ||||
|  | ||||
|   Integer MaxIterations; | ||||
|   Integer RestartLength; | ||||
|   Integer MaxNumberOfRestarts; | ||||
|   Integer IterationCount; // Number of iterations the FCAGMRES took to finish, | ||||
|                           // filled in upon completion | ||||
|  | ||||
|   GridStopWatch MatrixTimer; | ||||
|   GridStopWatch PrecTimer; | ||||
|   GridStopWatch LinalgTimer; | ||||
|   GridStopWatch QrTimer; | ||||
|   GridStopWatch CompSolutionTimer; | ||||
|  | ||||
|   Eigen::MatrixXcd H; | ||||
|  | ||||
|   std::vector<std::complex<double>> y; | ||||
|   std::vector<std::complex<double>> gamma; | ||||
|   std::vector<std::complex<double>> c; | ||||
|   std::vector<std::complex<double>> s; | ||||
|  | ||||
|   LinearFunction<Field> &Preconditioner; | ||||
|  | ||||
|   FlexibleCommunicationAvoidingGeneralisedMinimalResidual(RealD   tol, | ||||
|                                                           Integer maxit, | ||||
|                                                           LinearFunction<Field> &Prec, | ||||
|                                                           Integer restart_length, | ||||
|                                                           bool    err_on_no_conv = true) | ||||
|       : Tolerance(tol) | ||||
|       , MaxIterations(maxit) | ||||
|       , RestartLength(restart_length) | ||||
|       , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1)) | ||||
|       , ErrorOnNoConverge(err_on_no_conv) | ||||
|       , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base | ||||
|       , y(RestartLength + 1, 0.) | ||||
|       , gamma(RestartLength + 1, 0.) | ||||
|       , c(RestartLength + 1, 0.) | ||||
|       , s(RestartLength + 1, 0.) | ||||
|       , Preconditioner(Prec) {}; | ||||
|  | ||||
|   void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) { | ||||
|  | ||||
|     std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular FGMRES" << std::endl; | ||||
|  | ||||
|     psi.checkerboard = src.checkerboard; | ||||
|     conformable(psi, src); | ||||
|  | ||||
|     RealD guess = norm2(psi); | ||||
|     assert(std::isnan(guess) == 0); | ||||
|  | ||||
|     RealD cp; | ||||
|     RealD ssq = norm2(src); | ||||
|     RealD rsq = Tolerance * Tolerance * ssq; | ||||
|  | ||||
|     Field r(src._grid); | ||||
|  | ||||
|     std::cout << std::setprecision(4) << std::scientific; | ||||
|     std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl; | ||||
|  | ||||
|     PrecTimer.Reset(); | ||||
|     MatrixTimer.Reset(); | ||||
|     LinalgTimer.Reset(); | ||||
|     QrTimer.Reset(); | ||||
|     CompSolutionTimer.Reset(); | ||||
|  | ||||
|     GridStopWatch SolverTimer; | ||||
|     SolverTimer.Start(); | ||||
|  | ||||
|     IterationCount = 0; | ||||
|  | ||||
|     for (int k=0; k<MaxNumberOfRestarts; k++) { | ||||
|  | ||||
|       cp = outerLoopBody(LinOp, src, psi, rsq); | ||||
|  | ||||
|       // Stopping condition | ||||
|       if (cp <= rsq) { | ||||
|  | ||||
|         SolverTimer.Stop(); | ||||
|  | ||||
|         LinOp.Op(psi,r); | ||||
|         axpy(r,-1.0,src,r); | ||||
|  | ||||
|         RealD srcnorm       = sqrt(ssq); | ||||
|         RealD resnorm       = sqrt(norm2(r)); | ||||
|         RealD true_residual = resnorm / srcnorm; | ||||
|  | ||||
|         std::cout << GridLogMessage        << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount | ||||
|                   << " computed residual " << sqrt(cp / ssq) | ||||
|                   << " true residual "     << true_residual | ||||
|                   << " target "            << Tolerance << std::endl; | ||||
|  | ||||
|         std::cout << GridLogMessage << "FCAGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "FCAGMRES Time elapsed: Precon  " <<         PrecTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "FCAGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "FCAGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "FCAGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "FCAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl; | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl; | ||||
|  | ||||
|     if (ErrorOnNoConverge) | ||||
|       assert(0); | ||||
|   } | ||||
|  | ||||
|   RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) { | ||||
|  | ||||
|     RealD cp = 0; | ||||
|  | ||||
|     Field w(src._grid); | ||||
|     Field r(src._grid); | ||||
|  | ||||
|     // these should probably be made class members so that they are only allocated once, not in every restart | ||||
|     std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero; | ||||
|     std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero; | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     LinOp.Op(psi, w); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     LinalgTimer.Start(); | ||||
|     r = src - w; | ||||
|  | ||||
|     gamma[0] = sqrt(norm2(r)); | ||||
|  | ||||
|     v[0] = (1. / gamma[0]) * r; | ||||
|     LinalgTimer.Stop(); | ||||
|  | ||||
|     for (int i=0; i<RestartLength; i++) { | ||||
|  | ||||
|       IterationCount++; | ||||
|  | ||||
|       arnoldiStep(LinOp, v, z, w, i); | ||||
|  | ||||
|       qrUpdate(i); | ||||
|  | ||||
|       cp = std::norm(gamma[i+1]); | ||||
|  | ||||
|       std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount | ||||
|                 << " residual " << cp << " target " << rsq << std::endl; | ||||
|  | ||||
|       if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) { | ||||
|  | ||||
|         computeSolution(z, psi, i); | ||||
|  | ||||
|         return cp; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     assert(0); // Never reached | ||||
|     return cp; | ||||
|   } | ||||
|  | ||||
|   void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) { | ||||
|  | ||||
|     PrecTimer.Start(); | ||||
|     Preconditioner(v[iter], z[iter]); | ||||
|     PrecTimer.Stop(); | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     LinOp.Op(z[iter], w); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     LinalgTimer.Start(); | ||||
|     for (int i = 0; i <= iter; ++i) { | ||||
|       H(iter, i) = innerProduct(v[i], w); | ||||
|       w = w - H(iter, i) * v[i]; | ||||
|     } | ||||
|  | ||||
|     H(iter, iter + 1) = sqrt(norm2(w)); | ||||
|     v[iter + 1] = (1. / H(iter, iter + 1)) * w; | ||||
|     LinalgTimer.Stop(); | ||||
|   } | ||||
|  | ||||
|   void qrUpdate(int iter) { | ||||
|  | ||||
|     QrTimer.Start(); | ||||
|     for (int i = 0; i < iter ; ++i) { | ||||
|       auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1); | ||||
|       H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1); | ||||
|       H(iter, i + 1) = tmp; | ||||
|     } | ||||
|  | ||||
|     // Compute new Givens Rotation | ||||
|     ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); | ||||
|     c[iter]     = H(iter, iter) / nu; | ||||
|     s[iter]     = H(iter, iter + 1) / nu; | ||||
|  | ||||
|     // Apply new Givens rotation | ||||
|     H(iter, iter)     = nu; | ||||
|     H(iter, iter + 1) = 0.; | ||||
|  | ||||
|     gamma[iter + 1] = -s[iter] * gamma[iter]; | ||||
|     gamma[iter]     = std::conj(c[iter]) * gamma[iter]; | ||||
|     QrTimer.Stop(); | ||||
|   } | ||||
|  | ||||
|   void computeSolution(std::vector<Field> const &z, Field &psi, int iter) { | ||||
|  | ||||
|     CompSolutionTimer.Start(); | ||||
|     for (int i = iter; i >= 0; i--) { | ||||
|       y[i] = gamma[i]; | ||||
|       for (int k = i + 1; k <= iter; k++) | ||||
|         y[i] = y[i] - H(k, i) * y[k]; | ||||
|       y[i] = y[i] / H(i, i); | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; i <= iter; i++) | ||||
|       psi = psi + z[i] * y[i]; | ||||
|     CompSolutionTimer.Stop(); | ||||
|   } | ||||
| }; | ||||
| } | ||||
| #endif | ||||
| @@ -1,254 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h | ||||
|  | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Daniel Richtmann <daniel.richtmann@ur.de> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H | ||||
| #define GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| template<class Field> | ||||
| class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> { | ||||
|  public: | ||||
|   bool ErrorOnNoConverge; // Throw an assert when FGMRES fails to converge, | ||||
|                           // defaults to true | ||||
|  | ||||
|   RealD   Tolerance; | ||||
|  | ||||
|   Integer MaxIterations; | ||||
|   Integer RestartLength; | ||||
|   Integer MaxNumberOfRestarts; | ||||
|   Integer IterationCount; // Number of iterations the FGMRES took to finish, | ||||
|                           // filled in upon completion | ||||
|  | ||||
|   GridStopWatch MatrixTimer; | ||||
|   GridStopWatch PrecTimer; | ||||
|   GridStopWatch LinalgTimer; | ||||
|   GridStopWatch QrTimer; | ||||
|   GridStopWatch CompSolutionTimer; | ||||
|  | ||||
|   Eigen::MatrixXcd H; | ||||
|  | ||||
|   std::vector<std::complex<double>> y; | ||||
|   std::vector<std::complex<double>> gamma; | ||||
|   std::vector<std::complex<double>> c; | ||||
|   std::vector<std::complex<double>> s; | ||||
|  | ||||
|   LinearFunction<Field> &Preconditioner; | ||||
|  | ||||
|   FlexibleGeneralisedMinimalResidual(RealD   tol, | ||||
|                                      Integer maxit, | ||||
|                                      LinearFunction<Field> &Prec, | ||||
|                                      Integer restart_length, | ||||
|                                      bool    err_on_no_conv = true) | ||||
|       : Tolerance(tol) | ||||
|       , MaxIterations(maxit) | ||||
|       , RestartLength(restart_length) | ||||
|       , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1)) | ||||
|       , ErrorOnNoConverge(err_on_no_conv) | ||||
|       , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base | ||||
|       , y(RestartLength + 1, 0.) | ||||
|       , gamma(RestartLength + 1, 0.) | ||||
|       , c(RestartLength + 1, 0.) | ||||
|       , s(RestartLength + 1, 0.) | ||||
|       , Preconditioner(Prec) {}; | ||||
|  | ||||
|   void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) { | ||||
|  | ||||
|     psi.checkerboard = src.checkerboard; | ||||
|     conformable(psi, src); | ||||
|  | ||||
|     RealD guess = norm2(psi); | ||||
|     assert(std::isnan(guess) == 0); | ||||
|  | ||||
|     RealD cp; | ||||
|     RealD ssq = norm2(src); | ||||
|     RealD rsq = Tolerance * Tolerance * ssq; | ||||
|  | ||||
|     Field r(src._grid); | ||||
|  | ||||
|     std::cout << std::setprecision(4) << std::scientific; | ||||
|     std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual:   src " << ssq   << std::endl; | ||||
|  | ||||
|     PrecTimer.Reset(); | ||||
|     MatrixTimer.Reset(); | ||||
|     LinalgTimer.Reset(); | ||||
|     QrTimer.Reset(); | ||||
|     CompSolutionTimer.Reset(); | ||||
|  | ||||
|     GridStopWatch SolverTimer; | ||||
|     SolverTimer.Start(); | ||||
|  | ||||
|     IterationCount = 0; | ||||
|  | ||||
|     for (int k=0; k<MaxNumberOfRestarts; k++) { | ||||
|  | ||||
|       cp = outerLoopBody(LinOp, src, psi, rsq); | ||||
|  | ||||
|       // Stopping condition | ||||
|       if (cp <= rsq) { | ||||
|  | ||||
|         SolverTimer.Stop(); | ||||
|  | ||||
|         LinOp.Op(psi,r); | ||||
|         axpy(r,-1.0,src,r); | ||||
|  | ||||
|         RealD srcnorm       = sqrt(ssq); | ||||
|         RealD resnorm       = sqrt(norm2(r)); | ||||
|         RealD true_residual = resnorm / srcnorm; | ||||
|  | ||||
|         std::cout << GridLogMessage        << "FlexibleGeneralisedMinimalResidual: Converged on iteration " << IterationCount | ||||
|                   << " computed residual " << sqrt(cp / ssq) | ||||
|                   << " true residual "     << true_residual | ||||
|                   << " target "            << Tolerance << std::endl; | ||||
|  | ||||
|         std::cout << GridLogMessage << "FGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "FGMRES Time elapsed: Precon  " <<         PrecTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "FGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "FGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "FGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "FGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl; | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual did NOT converge" << std::endl; | ||||
|  | ||||
|     if (ErrorOnNoConverge) | ||||
|       assert(0); | ||||
|   } | ||||
|  | ||||
|   RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) { | ||||
|  | ||||
|     RealD cp = 0; | ||||
|  | ||||
|     Field w(src._grid); | ||||
|     Field r(src._grid); | ||||
|  | ||||
|     // these should probably be made class members so that they are only allocated once, not in every restart | ||||
|     std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero; | ||||
|     std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero; | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     LinOp.Op(psi, w); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     LinalgTimer.Start(); | ||||
|     r = src - w; | ||||
|  | ||||
|     gamma[0] = sqrt(norm2(r)); | ||||
|  | ||||
|     v[0] = (1. / gamma[0]) * r; | ||||
|     LinalgTimer.Stop(); | ||||
|  | ||||
|     for (int i=0; i<RestartLength; i++) { | ||||
|  | ||||
|       IterationCount++; | ||||
|  | ||||
|       arnoldiStep(LinOp, v, z, w, i); | ||||
|  | ||||
|       qrUpdate(i); | ||||
|  | ||||
|       cp = std::norm(gamma[i+1]); | ||||
|  | ||||
|       std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: Iteration " << IterationCount | ||||
|                 << " residual " << cp << " target " << rsq << std::endl; | ||||
|  | ||||
|       if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) { | ||||
|  | ||||
|         computeSolution(z, psi, i); | ||||
|  | ||||
|         return cp; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     assert(0); // Never reached | ||||
|     return cp; | ||||
|   } | ||||
|  | ||||
|   void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) { | ||||
|  | ||||
|     PrecTimer.Start(); | ||||
|     Preconditioner(v[iter], z[iter]); | ||||
|     PrecTimer.Stop(); | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     LinOp.Op(z[iter], w); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     LinalgTimer.Start(); | ||||
|     for (int i = 0; i <= iter; ++i) { | ||||
|       H(iter, i) = innerProduct(v[i], w); | ||||
|       w = w - H(iter, i) * v[i]; | ||||
|     } | ||||
|  | ||||
|     H(iter, iter + 1) = sqrt(norm2(w)); | ||||
|     v[iter + 1] = (1. / H(iter, iter + 1)) * w; | ||||
|     LinalgTimer.Stop(); | ||||
|   } | ||||
|  | ||||
|   void qrUpdate(int iter) { | ||||
|  | ||||
|     QrTimer.Start(); | ||||
|     for (int i = 0; i < iter ; ++i) { | ||||
|       auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1); | ||||
|       H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1); | ||||
|       H(iter, i + 1) = tmp; | ||||
|     } | ||||
|  | ||||
|     // Compute new Givens Rotation | ||||
|     ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); | ||||
|     c[iter]     = H(iter, iter) / nu; | ||||
|     s[iter]     = H(iter, iter + 1) / nu; | ||||
|  | ||||
|     // Apply new Givens rotation | ||||
|     H(iter, iter)     = nu; | ||||
|     H(iter, iter + 1) = 0.; | ||||
|  | ||||
|     gamma[iter + 1] = -s[iter] * gamma[iter]; | ||||
|     gamma[iter]     = std::conj(c[iter]) * gamma[iter]; | ||||
|     QrTimer.Stop(); | ||||
|   } | ||||
|  | ||||
|   void computeSolution(std::vector<Field> const &z, Field &psi, int iter) { | ||||
|  | ||||
|     CompSolutionTimer.Start(); | ||||
|     for (int i = iter; i >= 0; i--) { | ||||
|       y[i] = gamma[i]; | ||||
|       for (int k = i + 1; k <= iter; k++) | ||||
|         y[i] = y[i] - H(k, i) * y[k]; | ||||
|       y[i] = y[i] / H(i, i); | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; i <= iter; i++) | ||||
|       psi = psi + z[i] * y[i]; | ||||
|     CompSolutionTimer.Stop(); | ||||
|   } | ||||
| }; | ||||
| } | ||||
| #endif | ||||
| @@ -1,242 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/algorithms/iterative/GeneralisedMinimalResidual.h | ||||
|  | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Daniel Richtmann <daniel.richtmann@ur.de> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_GENERALISED_MINIMAL_RESIDUAL_H | ||||
| #define GRID_GENERALISED_MINIMAL_RESIDUAL_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| template<class Field> | ||||
| class GeneralisedMinimalResidual : public OperatorFunction<Field> { | ||||
|  public: | ||||
|   bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge, | ||||
|                           // defaults to true | ||||
|  | ||||
|   RealD   Tolerance; | ||||
|  | ||||
|   Integer MaxIterations; | ||||
|   Integer RestartLength; | ||||
|   Integer MaxNumberOfRestarts; | ||||
|   Integer IterationCount; // Number of iterations the GMRES took to finish, | ||||
|                           // filled in upon completion | ||||
|  | ||||
|   GridStopWatch MatrixTimer; | ||||
|   GridStopWatch LinalgTimer; | ||||
|   GridStopWatch QrTimer; | ||||
|   GridStopWatch CompSolutionTimer; | ||||
|  | ||||
|   Eigen::MatrixXcd H; | ||||
|  | ||||
|   std::vector<std::complex<double>> y; | ||||
|   std::vector<std::complex<double>> gamma; | ||||
|   std::vector<std::complex<double>> c; | ||||
|   std::vector<std::complex<double>> s; | ||||
|  | ||||
|   GeneralisedMinimalResidual(RealD   tol, | ||||
|                              Integer maxit, | ||||
|                              Integer restart_length, | ||||
|                              bool    err_on_no_conv = true) | ||||
|       : Tolerance(tol) | ||||
|       , MaxIterations(maxit) | ||||
|       , RestartLength(restart_length) | ||||
|       , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1)) | ||||
|       , ErrorOnNoConverge(err_on_no_conv) | ||||
|       , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base | ||||
|       , y(RestartLength + 1, 0.) | ||||
|       , gamma(RestartLength + 1, 0.) | ||||
|       , c(RestartLength + 1, 0.) | ||||
|       , s(RestartLength + 1, 0.) {}; | ||||
|  | ||||
|   void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) { | ||||
|  | ||||
|     psi.checkerboard = src.checkerboard; | ||||
|     conformable(psi, src); | ||||
|  | ||||
|     RealD guess = norm2(psi); | ||||
|     assert(std::isnan(guess) == 0); | ||||
|  | ||||
|     RealD cp; | ||||
|     RealD ssq = norm2(src); | ||||
|     RealD rsq = Tolerance * Tolerance * ssq; | ||||
|  | ||||
|     Field r(src._grid); | ||||
|  | ||||
|     std::cout << std::setprecision(4) << std::scientific; | ||||
|     std::cout << GridLogIterative << "GeneralisedMinimalResidual: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << "GeneralisedMinimalResidual:   src " << ssq   << std::endl; | ||||
|  | ||||
|     MatrixTimer.Reset(); | ||||
|     LinalgTimer.Reset(); | ||||
|     QrTimer.Reset(); | ||||
|     CompSolutionTimer.Reset(); | ||||
|  | ||||
|     GridStopWatch SolverTimer; | ||||
|     SolverTimer.Start(); | ||||
|  | ||||
|     IterationCount = 0; | ||||
|  | ||||
|     for (int k=0; k<MaxNumberOfRestarts; k++) { | ||||
|  | ||||
|       cp = outerLoopBody(LinOp, src, psi, rsq); | ||||
|  | ||||
|       // Stopping condition | ||||
|       if (cp <= rsq) { | ||||
|  | ||||
|         SolverTimer.Stop(); | ||||
|  | ||||
|         LinOp.Op(psi,r); | ||||
|         axpy(r,-1.0,src,r); | ||||
|  | ||||
|         RealD srcnorm       = sqrt(ssq); | ||||
|         RealD resnorm       = sqrt(norm2(r)); | ||||
|         RealD true_residual = resnorm / srcnorm; | ||||
|  | ||||
|         std::cout << GridLogMessage        << "GeneralisedMinimalResidual: Converged on iteration " << IterationCount | ||||
|                   << " computed residual " << sqrt(cp / ssq) | ||||
|                   << " true residual "     << true_residual | ||||
|                   << " target "            << Tolerance << std::endl; | ||||
|  | ||||
|         std::cout << GridLogMessage << "GMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "GMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "GMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "GMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "GMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl; | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogMessage << "GeneralisedMinimalResidual did NOT converge" << std::endl; | ||||
|  | ||||
|     if (ErrorOnNoConverge) | ||||
|       assert(0); | ||||
|   } | ||||
|  | ||||
|   RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) { | ||||
|  | ||||
|     RealD cp = 0; | ||||
|  | ||||
|     Field w(src._grid); | ||||
|     Field r(src._grid); | ||||
|  | ||||
|     // this should probably be made a class member so that it is only allocated once, not in every restart | ||||
|     std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero; | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     LinOp.Op(psi, w); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     LinalgTimer.Start(); | ||||
|     r = src - w; | ||||
|  | ||||
|     gamma[0] = sqrt(norm2(r)); | ||||
|  | ||||
|     v[0] = (1. / gamma[0]) * r; | ||||
|     LinalgTimer.Stop(); | ||||
|  | ||||
|     for (int i=0; i<RestartLength; i++) { | ||||
|  | ||||
|       IterationCount++; | ||||
|  | ||||
|       arnoldiStep(LinOp, v, w, i); | ||||
|  | ||||
|       qrUpdate(i); | ||||
|  | ||||
|       cp = std::norm(gamma[i+1]); | ||||
|  | ||||
|       std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration " << IterationCount | ||||
|                 << " residual " << cp << " target " << rsq << std::endl; | ||||
|  | ||||
|       if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) { | ||||
|  | ||||
|         computeSolution(v, psi, i); | ||||
|  | ||||
|         return cp; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     assert(0); // Never reached | ||||
|     return cp; | ||||
|   } | ||||
|  | ||||
|   void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) { | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     LinOp.Op(v[iter], w); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     LinalgTimer.Start(); | ||||
|     for (int i = 0; i <= iter; ++i) { | ||||
|       H(iter, i) = innerProduct(v[i], w); | ||||
|       w = w - H(iter, i) * v[i]; | ||||
|     } | ||||
|  | ||||
|     H(iter, iter + 1) = sqrt(norm2(w)); | ||||
|     v[iter + 1] = (1. / H(iter, iter + 1)) * w; | ||||
|     LinalgTimer.Stop(); | ||||
|   } | ||||
|  | ||||
|   void qrUpdate(int iter) { | ||||
|  | ||||
|     QrTimer.Start(); | ||||
|     for (int i = 0; i < iter ; ++i) { | ||||
|       auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1); | ||||
|       H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1); | ||||
|       H(iter, i + 1) = tmp; | ||||
|     } | ||||
|  | ||||
|     // Compute new Givens Rotation | ||||
|     ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); | ||||
|     c[iter]     = H(iter, iter) / nu; | ||||
|     s[iter]     = H(iter, iter + 1) / nu; | ||||
|  | ||||
|     // Apply new Givens rotation | ||||
|     H(iter, iter)     = nu; | ||||
|     H(iter, iter + 1) = 0.; | ||||
|  | ||||
|     gamma[iter + 1] = -s[iter] * gamma[iter]; | ||||
|     gamma[iter]     = std::conj(c[iter]) * gamma[iter]; | ||||
|     QrTimer.Stop(); | ||||
|   } | ||||
|  | ||||
|   void computeSolution(std::vector<Field> const &v, Field &psi, int iter) { | ||||
|  | ||||
|     CompSolutionTimer.Start(); | ||||
|     for (int i = iter; i >= 0; i--) { | ||||
|       y[i] = gamma[i]; | ||||
|       for (int k = i + 1; k <= iter; k++) | ||||
|         y[i] = y[i] - H(k, i) * y[k]; | ||||
|       y[i] = y[i] / H(i, i); | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; i <= iter; i++) | ||||
|       psi = psi + v[i] * y[i]; | ||||
|     CompSolutionTimer.Stop(); | ||||
|   } | ||||
| }; | ||||
| } | ||||
| #endif | ||||
| @@ -1,156 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/algorithms/iterative/MinimalResidual.h | ||||
|  | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Daniel Richtmann <daniel.richtmann@ur.de> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_MINIMAL_RESIDUAL_H | ||||
| #define GRID_MINIMAL_RESIDUAL_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| template<class Field> class MinimalResidual : public OperatorFunction<Field> { | ||||
|  public: | ||||
|   bool ErrorOnNoConverge; // throw an assert when the MR fails to converge. | ||||
|                           // Defaults true. | ||||
|   RealD   Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   RealD   overRelaxParam; | ||||
|   Integer IterationsToComplete; // Number of iterations the MR took to finish. | ||||
|                                 // Filled in upon completion | ||||
|  | ||||
|   MinimalResidual(RealD tol, Integer maxit, Real ovrelparam = 1.0, bool err_on_no_conv = true) | ||||
|     : Tolerance(tol), MaxIterations(maxit), overRelaxParam(ovrelparam), ErrorOnNoConverge(err_on_no_conv){}; | ||||
|  | ||||
|   void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) { | ||||
|  | ||||
|     psi.checkerboard = src.checkerboard; | ||||
|     conformable(psi, src); | ||||
|  | ||||
|     Complex a, c; | ||||
|     Real    d; | ||||
|  | ||||
|     Field Mr(src); | ||||
|     Field r(src); | ||||
|  | ||||
|     // Initial residual computation & set up | ||||
|     RealD guess = norm2(psi); | ||||
|     assert(std::isnan(guess) == 0); | ||||
|  | ||||
|     RealD ssq = norm2(src); | ||||
|     RealD rsq = Tolerance * Tolerance * ssq; | ||||
|  | ||||
|     Linop.Op(psi, Mr); | ||||
|  | ||||
|     r = src - Mr; | ||||
|  | ||||
|     RealD cp = norm2(r); | ||||
|  | ||||
|     std::cout << std::setprecision(4) << std::scientific; | ||||
|     std::cout << GridLogIterative << "MinimalResidual: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << "MinimalResidual:   src " << ssq << std::endl; | ||||
|     std::cout << GridLogIterative << "MinimalResidual:    mp " << d << std::endl; | ||||
|     std::cout << GridLogIterative << "MinimalResidual:  cp,r " << cp << std::endl; | ||||
|  | ||||
|     if (cp <= rsq) { | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogIterative << "MinimalResidual: k=0 residual " << cp << " target " << rsq << std::endl; | ||||
|  | ||||
|     GridStopWatch LinalgTimer; | ||||
|     GridStopWatch MatrixTimer; | ||||
|     GridStopWatch SolverTimer; | ||||
|  | ||||
|     SolverTimer.Start(); | ||||
|     int k; | ||||
|     for (k = 1; k <= MaxIterations; k++) { | ||||
|  | ||||
|       MatrixTimer.Start(); | ||||
|       Linop.Op(r, Mr); | ||||
|       MatrixTimer.Stop(); | ||||
|  | ||||
|       LinalgTimer.Start(); | ||||
|  | ||||
|       c = innerProduct(Mr, r); | ||||
|  | ||||
|       d = norm2(Mr); | ||||
|  | ||||
|       a = c / d; | ||||
|  | ||||
|       a = a * overRelaxParam; | ||||
|  | ||||
|       psi = psi + r * a; | ||||
|  | ||||
|       r = r - Mr * a; | ||||
|  | ||||
|       cp = norm2(r); | ||||
|  | ||||
|       LinalgTimer.Stop(); | ||||
|  | ||||
|       std::cout << GridLogIterative << "MinimalResidual: Iteration " << k | ||||
|                 << " residual " << cp << " target " << rsq << std::endl; | ||||
|       std::cout << GridLogDebug << "a = " << a << " c = " << c << " d = " << d << std::endl; | ||||
|  | ||||
|       // Stopping condition | ||||
|       if (cp <= rsq) { | ||||
|         SolverTimer.Stop(); | ||||
|  | ||||
|         Linop.Op(psi, Mr); | ||||
|         r = src - Mr; | ||||
|  | ||||
|         RealD srcnorm       = sqrt(ssq); | ||||
|         RealD resnorm       = sqrt(norm2(r)); | ||||
|         RealD true_residual = resnorm / srcnorm; | ||||
|  | ||||
|         std::cout << GridLogMessage        << "MinimalResidual Converged on iteration " << k | ||||
|                   << " computed residual " << sqrt(cp / ssq) | ||||
|                   << " true residual "     << true_residual | ||||
|                   << " target "            << Tolerance << std::endl; | ||||
|  | ||||
|         std::cout << GridLogMessage << "MR Time elapsed: Total   " << SolverTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "MR Time elapsed: Matrix  " << MatrixTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "MR Time elapsed: Linalg  " << LinalgTimer.Elapsed() << std::endl; | ||||
|  | ||||
|         if (ErrorOnNoConverge) | ||||
|           assert(true_residual / Tolerance < 10000.0); | ||||
|  | ||||
|         IterationsToComplete = k; | ||||
|  | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogMessage << "MinimalResidual did NOT converge" | ||||
|               << std::endl; | ||||
|  | ||||
|     if (ErrorOnNoConverge) | ||||
|       assert(0); | ||||
|  | ||||
|     IterationsToComplete = k; | ||||
|   } | ||||
| }; | ||||
| } // namespace Grid | ||||
| #endif | ||||
| @@ -1,273 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h | ||||
|  | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Daniel Richtmann <daniel.richtmann@ur.de> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H | ||||
| #define GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| template<class FieldD, class FieldF, typename std::enable_if<getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> | ||||
| class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction<FieldD> { | ||||
|  public: | ||||
|   bool ErrorOnNoConverge; // Throw an assert when MPFGMRES fails to converge, | ||||
|                           // defaults to true | ||||
|  | ||||
|   RealD   Tolerance; | ||||
|  | ||||
|   Integer MaxIterations; | ||||
|   Integer RestartLength; | ||||
|   Integer MaxNumberOfRestarts; | ||||
|   Integer IterationCount; // Number of iterations the MPFGMRES took to finish, | ||||
|                           // filled in upon completion | ||||
|  | ||||
|   GridStopWatch MatrixTimer; | ||||
|   GridStopWatch PrecTimer; | ||||
|   GridStopWatch LinalgTimer; | ||||
|   GridStopWatch QrTimer; | ||||
|   GridStopWatch CompSolutionTimer; | ||||
|   GridStopWatch ChangePrecTimer; | ||||
|  | ||||
|   Eigen::MatrixXcd H; | ||||
|  | ||||
|   std::vector<std::complex<double>> y; | ||||
|   std::vector<std::complex<double>> gamma; | ||||
|   std::vector<std::complex<double>> c; | ||||
|   std::vector<std::complex<double>> s; | ||||
|  | ||||
|   GridBase* SinglePrecGrid; | ||||
|  | ||||
|   LinearFunction<FieldF> &Preconditioner; | ||||
|  | ||||
|   MixedPrecisionFlexibleGeneralisedMinimalResidual(RealD   tol, | ||||
|                                                    Integer maxit, | ||||
|                                                    GridBase * sp_grid, | ||||
|                                                    LinearFunction<FieldF> &Prec, | ||||
|                                                    Integer restart_length, | ||||
|                                                    bool    err_on_no_conv = true) | ||||
|       : Tolerance(tol) | ||||
|       , MaxIterations(maxit) | ||||
|       , RestartLength(restart_length) | ||||
|       , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1)) | ||||
|       , ErrorOnNoConverge(err_on_no_conv) | ||||
|       , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base | ||||
|       , y(RestartLength + 1, 0.) | ||||
|       , gamma(RestartLength + 1, 0.) | ||||
|       , c(RestartLength + 1, 0.) | ||||
|       , s(RestartLength + 1, 0.) | ||||
|       , SinglePrecGrid(sp_grid) | ||||
|       , Preconditioner(Prec) {}; | ||||
|  | ||||
|   void operator()(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi) { | ||||
|  | ||||
|     psi.checkerboard = src.checkerboard; | ||||
|     conformable(psi, src); | ||||
|  | ||||
|     RealD guess = norm2(psi); | ||||
|     assert(std::isnan(guess) == 0); | ||||
|  | ||||
|     RealD cp; | ||||
|     RealD ssq = norm2(src); | ||||
|     RealD rsq = Tolerance * Tolerance * ssq; | ||||
|  | ||||
|     FieldD r(src._grid); | ||||
|  | ||||
|     std::cout << std::setprecision(4) << std::scientific; | ||||
|     std::cout << GridLogIterative << "MPFGMRES: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << "MPFGMRES:   src " << ssq   << std::endl; | ||||
|  | ||||
|     PrecTimer.Reset(); | ||||
|     MatrixTimer.Reset(); | ||||
|     LinalgTimer.Reset(); | ||||
|     QrTimer.Reset(); | ||||
|     CompSolutionTimer.Reset(); | ||||
|     ChangePrecTimer.Reset(); | ||||
|  | ||||
|     GridStopWatch SolverTimer; | ||||
|     SolverTimer.Start(); | ||||
|  | ||||
|     IterationCount = 0; | ||||
|  | ||||
|     for (int k=0; k<MaxNumberOfRestarts; k++) { | ||||
|  | ||||
|       cp = outerLoopBody(LinOp, src, psi, rsq); | ||||
|  | ||||
|       // Stopping condition | ||||
|       if (cp <= rsq) { | ||||
|  | ||||
|         SolverTimer.Stop(); | ||||
|  | ||||
|         LinOp.Op(psi,r); | ||||
|         axpy(r,-1.0,src,r); | ||||
|  | ||||
|         RealD srcnorm       = sqrt(ssq); | ||||
|         RealD resnorm       = sqrt(norm2(r)); | ||||
|         RealD true_residual = resnorm / srcnorm; | ||||
|  | ||||
|         std::cout << GridLogMessage        << "MPFGMRES: Converged on iteration " << IterationCount | ||||
|                   << " computed residual " << sqrt(cp / ssq) | ||||
|                   << " true residual "     << true_residual | ||||
|                   << " target "            << Tolerance << std::endl; | ||||
|  | ||||
|         std::cout << GridLogMessage << "MPFGMRES Time elapsed: Total      " <<       SolverTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "MPFGMRES Time elapsed: Precon     " <<         PrecTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "MPFGMRES Time elapsed: Matrix     " <<       MatrixTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "MPFGMRES Time elapsed: Linalg     " <<       LinalgTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "MPFGMRES Time elapsed: QR         " <<           QrTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "MPFGMRES Time elapsed: CompSol    " << CompSolutionTimer.Elapsed() << std::endl; | ||||
|         std::cout << GridLogMessage << "MPFGMRES Time elapsed: PrecChange " <<   ChangePrecTimer.Elapsed() << std::endl; | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogMessage << "MPFGMRES did NOT converge" << std::endl; | ||||
|  | ||||
|     if (ErrorOnNoConverge) | ||||
|       assert(0); | ||||
|   } | ||||
|  | ||||
|   RealD outerLoopBody(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi, RealD rsq) { | ||||
|  | ||||
|     RealD cp = 0; | ||||
|  | ||||
|     FieldD w(src._grid); | ||||
|     FieldD r(src._grid); | ||||
|  | ||||
|     // these should probably be made class members so that they are only allocated once, not in every restart | ||||
|     std::vector<FieldD> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero; | ||||
|     std::vector<FieldD> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero; | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     LinOp.Op(psi, w); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     LinalgTimer.Start(); | ||||
|     r = src - w; | ||||
|  | ||||
|     gamma[0] = sqrt(norm2(r)); | ||||
|  | ||||
|     v[0] = (1. / gamma[0]) * r; | ||||
|     LinalgTimer.Stop(); | ||||
|  | ||||
|     for (int i=0; i<RestartLength; i++) { | ||||
|  | ||||
|       IterationCount++; | ||||
|  | ||||
|       arnoldiStep(LinOp, v, z, w, i); | ||||
|  | ||||
|       qrUpdate(i); | ||||
|  | ||||
|       cp = std::norm(gamma[i+1]); | ||||
|  | ||||
|       std::cout << GridLogIterative << "MPFGMRES: Iteration " << IterationCount | ||||
|                 << " residual " << cp << " target " << rsq << std::endl; | ||||
|  | ||||
|       if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) { | ||||
|  | ||||
|         computeSolution(z, psi, i); | ||||
|  | ||||
|         return cp; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     assert(0); // Never reached | ||||
|     return cp; | ||||
|   } | ||||
|  | ||||
|   void arnoldiStep(LinearOperatorBase<FieldD> &LinOp, std::vector<FieldD> &v, std::vector<FieldD> &z, FieldD &w, int iter) { | ||||
|  | ||||
|     FieldF v_f(SinglePrecGrid); | ||||
|     FieldF z_f(SinglePrecGrid); | ||||
|  | ||||
|     ChangePrecTimer.Start(); | ||||
|     precisionChange(v_f, v[iter]); | ||||
|     precisionChange(z_f, z[iter]); | ||||
|     ChangePrecTimer.Stop(); | ||||
|  | ||||
|     PrecTimer.Start(); | ||||
|     Preconditioner(v_f, z_f); | ||||
|     PrecTimer.Stop(); | ||||
|  | ||||
|     ChangePrecTimer.Start(); | ||||
|     precisionChange(z[iter], z_f); | ||||
|     ChangePrecTimer.Stop(); | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     LinOp.Op(z[iter], w); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     LinalgTimer.Start(); | ||||
|     for (int i = 0; i <= iter; ++i) { | ||||
|       H(iter, i) = innerProduct(v[i], w); | ||||
|       w = w - H(iter, i) * v[i]; | ||||
|     } | ||||
|  | ||||
|     H(iter, iter + 1) = sqrt(norm2(w)); | ||||
|     v[iter + 1] = (1. / H(iter, iter + 1)) * w; | ||||
|     LinalgTimer.Stop(); | ||||
|   } | ||||
|  | ||||
|   void qrUpdate(int iter) { | ||||
|  | ||||
|     QrTimer.Start(); | ||||
|     for (int i = 0; i < iter ; ++i) { | ||||
|       auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1); | ||||
|       H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1); | ||||
|       H(iter, i + 1) = tmp; | ||||
|     } | ||||
|  | ||||
|     // Compute new Givens Rotation | ||||
|     ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); | ||||
|     c[iter]     = H(iter, iter) / nu; | ||||
|     s[iter]     = H(iter, iter + 1) / nu; | ||||
|  | ||||
|     // Apply new Givens rotation | ||||
|     H(iter, iter)     = nu; | ||||
|     H(iter, iter + 1) = 0.; | ||||
|  | ||||
|     gamma[iter + 1] = -s[iter] * gamma[iter]; | ||||
|     gamma[iter]     = std::conj(c[iter]) * gamma[iter]; | ||||
|     QrTimer.Stop(); | ||||
|   } | ||||
|  | ||||
|   void computeSolution(std::vector<FieldD> const &z, FieldD &psi, int iter) { | ||||
|  | ||||
|     CompSolutionTimer.Start(); | ||||
|     for (int i = iter; i >= 0; i--) { | ||||
|       y[i] = gamma[i]; | ||||
|       for (int k = i + 1; k <= iter; k++) | ||||
|         y[i] = y[i] - H(k, i) * y[k]; | ||||
|       y[i] = y[i] / H(i, i); | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; i <= iter; i++) | ||||
|       psi = psi + z[i] * y[i]; | ||||
|     CompSolutionTimer.Stop(); | ||||
|   } | ||||
| }; | ||||
| } | ||||
| #endif | ||||
| @@ -1,45 +0,0 @@ | ||||
| #pragma once | ||||
| namespace Grid { | ||||
| template<class Field> class PowerMethod   | ||||
| {  | ||||
|  public:  | ||||
|  | ||||
|   template<typename T>  static RealD normalise(T& v)  | ||||
|   { | ||||
|     RealD nn = norm2(v); | ||||
|     nn = sqrt(nn); | ||||
|     v = v * (1.0/nn); | ||||
|     return nn; | ||||
|   } | ||||
|  | ||||
|   RealD operator()(LinearOperatorBase<Field> &HermOp, const Field &src)  | ||||
|   {  | ||||
|     GridBase *grid = src._grid;  | ||||
|      | ||||
|     // quickly get an idea of the largest eigenvalue to more properly normalize the residuum  | ||||
|     RealD evalMaxApprox = 0.0;  | ||||
|     auto src_n = src;  | ||||
|     auto tmp = src;  | ||||
|     const int _MAX_ITER_EST_ = 50;  | ||||
|  | ||||
|     for (int i=0;i<_MAX_ITER_EST_;i++) {  | ||||
|        | ||||
|       normalise(src_n);  | ||||
|       HermOp.HermOp(src_n,tmp);  | ||||
|       RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.  | ||||
|       RealD vden = norm2(src_n);  | ||||
|       RealD na = vnum/vden;  | ||||
|        | ||||
|       if ( (fabs(evalMaxApprox/na - 1.0) < 0.01) || (i==_MAX_ITER_EST_-1) ) {  | ||||
|  	evalMaxApprox = na;  | ||||
|  	return evalMaxApprox;  | ||||
|       }  | ||||
|       evalMaxApprox = na;  | ||||
|       std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl; | ||||
|       src_n = tmp; | ||||
|     } | ||||
|     assert(0); | ||||
|     return 0; | ||||
|   } | ||||
| }; | ||||
| } | ||||
| @@ -139,10 +139,7 @@ namespace Grid { | ||||
|       MatTimer.Start(); | ||||
|       Linop.HermOpAndNorm(psi,Az,zAz,zAAz);  | ||||
|       MatTimer.Stop(); | ||||
|  | ||||
|       LinalgTimer.Start(); | ||||
|       r=src-Az; | ||||
|       LinalgTimer.Stop(); | ||||
|        | ||||
|       ///////////////////// | ||||
|       // p = Prec(r) | ||||
| @@ -155,10 +152,8 @@ namespace Grid { | ||||
|       Linop.HermOp(z,tmp);  | ||||
|       MatTimer.Stop(); | ||||
|  | ||||
|       LinalgTimer.Start(); | ||||
|       ttmp=tmp; | ||||
|       tmp=tmp-r; | ||||
|       LinalgTimer.Stop(); | ||||
|  | ||||
|       /* | ||||
|       std::cout<<GridLogMessage<<r<<std::endl; | ||||
| @@ -171,14 +166,12 @@ namespace Grid { | ||||
|       Linop.HermOpAndNorm(z,Az,zAz,zAAz);  | ||||
|       MatTimer.Stop(); | ||||
|  | ||||
|       LinalgTimer.Start(); | ||||
|       //p[0],q[0],qq[0]  | ||||
|       p[0]= z; | ||||
|       q[0]= Az; | ||||
|       qq[0]= zAAz; | ||||
|  | ||||
|       cp =norm2(r); | ||||
|       LinalgTimer.Stop(); | ||||
|  | ||||
|       for(int k=0;k<nstep;k++){ | ||||
|  | ||||
| @@ -188,14 +181,12 @@ namespace Grid { | ||||
| 	int peri_k = k %mmax; | ||||
| 	int peri_kp= kp%mmax; | ||||
|  | ||||
|         LinalgTimer.Start(); | ||||
| 	rq= real(innerProduct(r,q[peri_k])); // what if rAr not real? | ||||
| 	a = rq/qq[peri_k]; | ||||
|  | ||||
| 	axpy(psi,a,p[peri_k],psi);          | ||||
|  | ||||
| 	cp = axpy_norm(r,-a,q[peri_k],r);   | ||||
|         LinalgTimer.Stop(); | ||||
|  | ||||
| 	if((k==nstep-1)||(cp<rsq)){ | ||||
| 	  return cp; | ||||
| @@ -211,8 +202,6 @@ namespace Grid { | ||||
| 	Linop.HermOpAndNorm(z,Az,zAz,zAAz); | ||||
| 	Linop.HermOp(z,tmp); | ||||
| 	MatTimer.Stop(); | ||||
|  | ||||
|         LinalgTimer.Start(); | ||||
|         tmp=tmp-r; | ||||
| 	std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl;  | ||||
|  | ||||
| @@ -230,9 +219,9 @@ namespace Grid { | ||||
|  | ||||
| 	} | ||||
| 	qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm | ||||
|         LinalgTimer.Stop(); | ||||
|       } | ||||
|  | ||||
|  | ||||
|       } | ||||
|       assert(0); // never reached | ||||
|       return cp; | ||||
|     } | ||||
|   | ||||
| @@ -87,25 +87,228 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| namespace Grid { | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Use base class to share code | ||||
|   // Take a matrix and form a Red Black solver calling a Herm solver | ||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Now make the norm reflect extra factor of Mee | ||||
|   template<class Field> class SchurRedBlackStaggeredSolve { | ||||
|   private: | ||||
|     OperatorFunction<Field> & _HermitianRBSolver; | ||||
|     int CBfactorise; | ||||
|     bool subGuess; | ||||
|   public: | ||||
|  | ||||
|     ///////////////////////////////////////////////////// | ||||
|     // Wrap the usual normal equations Schur trick | ||||
|     ///////////////////////////////////////////////////// | ||||
|   SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)  : | ||||
|      _HermitianRBSolver(HermitianRBSolver)  | ||||
|     {  | ||||
|       CBfactorise=0; | ||||
|       subtractGuess(initSubGuess); | ||||
|     }; | ||||
|     void subtractGuess(const bool initSubGuess) | ||||
|     { | ||||
|       subGuess = initSubGuess; | ||||
|     } | ||||
|     bool isSubtractGuess(void) | ||||
|     { | ||||
|       return subGuess; | ||||
|     } | ||||
|  | ||||
|     template<class Matrix> | ||||
|     void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||
|       ZeroGuesser<Field> guess; | ||||
|       (*this)(_Matrix,in,out,guess); | ||||
|     } | ||||
|     template<class Matrix, class Guesser> | ||||
|     void operator() (Matrix & _Matrix,const Field &in, Field &out, Guesser &guess){ | ||||
|  | ||||
|       // FIXME CGdiagonalMee not implemented virtual function | ||||
|       // FIXME use CBfactorise to control schur decomp | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|   | ||||
|       Field src_e(grid); | ||||
|       Field src_o(grid); | ||||
|       Field sol_e(grid); | ||||
|       Field sol_o(grid); | ||||
|       Field   tmp(grid); | ||||
|       Field  Mtmp(grid); | ||||
|       Field resid(fgrid); | ||||
|        | ||||
|       std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve " <<std::endl; | ||||
|       pickCheckerboard(Even,src_e,in); | ||||
|       pickCheckerboard(Odd ,src_o,in); | ||||
|       pickCheckerboard(Even,sol_e,out); | ||||
|       pickCheckerboard(Odd ,sol_o,out); | ||||
|       std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl; | ||||
|      | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // src_o = (source_o - Moe MeeInv source_e) | ||||
|       ///////////////////////////////////////////////////// | ||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); | ||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      | ||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      | ||||
|  | ||||
|       //src_o = tmp;     assert(src_o.checkerboard ==Odd); | ||||
|       _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm. | ||||
|  | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the red-black solver | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl; | ||||
|       guess(src_o, sol_o); | ||||
|       Mtmp = sol_o; | ||||
|       _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called  the Mpc solver" <<std::endl; | ||||
|       // Fionn A2A boolean behavioural control | ||||
|       if (subGuess)        sol_o = sol_o-Mtmp; | ||||
|  | ||||
|       /////////////////////////////////////////////////// | ||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
|       /////////////////////////////////////////////////// | ||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); | ||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); | ||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||
|       | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver reconstructed other CB" <<std::endl; | ||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver inserted solution" <<std::endl; | ||||
|  | ||||
|       // Verify the unprec residual | ||||
|       if ( ! subGuess ) { | ||||
|         _Matrix.M(out,resid);  | ||||
|         resid = resid-in; | ||||
|         RealD ns = norm2(in); | ||||
|         RealD nr = norm2(resid); | ||||
|         std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; | ||||
|       } else { | ||||
|         std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl; | ||||
|       } | ||||
|     }      | ||||
|   }; | ||||
|   template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>; | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Take a matrix and form a Red Black solver calling a Herm solver | ||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class Field> class SchurRedBlackBase { | ||||
|   protected: | ||||
|     typedef CheckerBoardedSparseMatrixBase<Field> Matrix; | ||||
|   template<class Field> class SchurRedBlackDiagMooeeSolve { | ||||
|   private: | ||||
|     OperatorFunction<Field> & _HermitianRBSolver; | ||||
|     int CBfactorise; | ||||
|     bool subGuess; | ||||
|     bool useSolnAsInitGuess; // if true user-supplied solution vector is used as initial guess for solver | ||||
|   public: | ||||
|  | ||||
|     SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false, | ||||
|         const bool _solnAsInitGuess = false)  : | ||||
|     _HermitianRBSolver(HermitianRBSolver), | ||||
|     useSolnAsInitGuess(_solnAsInitGuess) | ||||
|     ///////////////////////////////////////////////////// | ||||
|     // Wrap the usual normal equations Schur trick | ||||
|     ///////////////////////////////////////////////////// | ||||
|   SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0, const bool initSubGuess = false)  :  _HermitianRBSolver(HermitianRBSolver)  | ||||
|   {  | ||||
|     CBfactorise=cb; | ||||
|     subtractGuess(initSubGuess); | ||||
|   }; | ||||
|     void subtractGuess(const bool initSubGuess) | ||||
|     { | ||||
|       subGuess = initSubGuess; | ||||
|     } | ||||
|     bool isSubtractGuess(void) | ||||
|     { | ||||
|       return subGuess; | ||||
|     } | ||||
|     template<class Matrix> | ||||
|     void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||
|       ZeroGuesser<Field> guess; | ||||
|       (*this)(_Matrix,in,out,guess); | ||||
|     } | ||||
|     template<class Matrix, class Guesser> | ||||
|     void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ | ||||
|  | ||||
|       // FIXME CGdiagonalMee not implemented virtual function | ||||
|       // FIXME use CBfactorise to control schur decomp | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|   | ||||
|       Field src_e(grid); | ||||
|       Field src_o(grid); | ||||
|       Field sol_e(grid); | ||||
|       Field sol_o(grid); | ||||
|       Field   tmp(grid); | ||||
|       Field  Mtmp(grid); | ||||
|       Field resid(fgrid); | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,in); | ||||
|       pickCheckerboard(Odd ,src_o,in); | ||||
|       pickCheckerboard(Even,sol_e,out); | ||||
|       pickCheckerboard(Odd ,sol_o,out); | ||||
|      | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // src_o = Mdag * (source_o - Moe MeeInv source_e) | ||||
|       ///////////////////////////////////////////////////// | ||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); | ||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      | ||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      | ||||
|  | ||||
|       // get the right MpcDag | ||||
|       _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);        | ||||
|  | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the red-black solver | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; | ||||
|       guess(src_o,sol_o); | ||||
|       Mtmp = sol_o; | ||||
|       _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||
|       // Fionn A2A boolean behavioural control | ||||
|       if (subGuess)        sol_o = sol_o-Mtmp; | ||||
|  | ||||
|       /////////////////////////////////////////////////// | ||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
|       /////////////////////////////////////////////////// | ||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); | ||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); | ||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||
|       | ||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||
|  | ||||
|       // Verify the unprec residual | ||||
|       if ( ! subGuess ) { | ||||
|         _Matrix.M(out,resid);  | ||||
|         resid = resid-in; | ||||
|         RealD ns = norm2(in); | ||||
|         RealD nr = norm2(resid); | ||||
|  | ||||
|         std::cout<<GridLogMessage << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; | ||||
|       } else { | ||||
|         std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl; | ||||
|       } | ||||
|     }      | ||||
|   }; | ||||
|  | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Take a matrix and form a Red Black solver calling a Herm solver | ||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class Field> class SchurRedBlackDiagTwoSolve { | ||||
|   private: | ||||
|     OperatorFunction<Field> & _HermitianRBSolver; | ||||
|     int CBfactorise; | ||||
|     bool subGuess; | ||||
|   public: | ||||
|  | ||||
|     ///////////////////////////////////////////////////// | ||||
|     // Wrap the usual normal equations Schur trick | ||||
|     ///////////////////////////////////////////////////// | ||||
|   SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)  : | ||||
|      _HermitianRBSolver(HermitianRBSolver)  | ||||
|     {  | ||||
|       CBfactorise = 0; | ||||
|       subtractGuess(initSubGuess); | ||||
| @@ -119,90 +322,12 @@ namespace Grid { | ||||
|       return subGuess; | ||||
|     } | ||||
|  | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Shared code | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     template<class Matrix> | ||||
|     void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||
|       ZeroGuesser<Field> guess; | ||||
|       (*this)(_Matrix,in,out,guess); | ||||
|     } | ||||
|     void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out)  | ||||
|     { | ||||
|       ZeroGuesser<Field> guess; | ||||
|       (*this)(_Matrix,in,out,guess); | ||||
|     } | ||||
|  | ||||
|     template<class Guesser> | ||||
|     void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)  | ||||
|     { | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|       int nblock = in.size(); | ||||
|  | ||||
|       std::vector<Field> src_o(nblock,grid); | ||||
|       std::vector<Field> sol_o(nblock,grid); | ||||
|        | ||||
|       std::vector<Field> guess_save; | ||||
|  | ||||
|       Field resid(fgrid); | ||||
|       Field tmp(grid); | ||||
|  | ||||
|       //////////////////////////////////////////////// | ||||
|       // Prepare RedBlack source | ||||
|       //////////////////////////////////////////////// | ||||
|       for(int b=0;b<nblock;b++){ | ||||
| 	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); | ||||
|       } | ||||
|       //////////////////////////////////////////////// | ||||
|       // Make the guesses | ||||
|       //////////////////////////////////////////////// | ||||
|       if ( subGuess ) guess_save.resize(nblock,grid); | ||||
|  | ||||
|       for(int b=0;b<nblock;b++){ | ||||
|         if(useSolnAsInitGuess) { | ||||
|           pickCheckerboard(Odd, sol_o[b], out[b]); | ||||
|         } else { | ||||
|           guess(src_o[b],sol_o[b]);  | ||||
|         } | ||||
|  | ||||
| 	if ( subGuess ) {  | ||||
| 	  guess_save[b] = sol_o[b]; | ||||
| 	} | ||||
|       } | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the block solver | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl; | ||||
|       RedBlackSolve(_Matrix,src_o,sol_o); | ||||
|  | ||||
|       //////////////////////////////////////////////// | ||||
|       // A2A boolean behavioural control & reconstruct other checkerboard | ||||
|       //////////////////////////////////////////////// | ||||
|       for(int b=0;b<nblock;b++) { | ||||
|  | ||||
| 	if (subGuess)   sol_o[b] = sol_o[b] - guess_save[b]; | ||||
|  | ||||
| 	///////// Needs even source ////////////// | ||||
| 	pickCheckerboard(Even,tmp,in[b]); | ||||
| 	RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]); | ||||
|  | ||||
| 	///////////////////////////////////////////////// | ||||
| 	// Check unprec residual if possible | ||||
| 	///////////////////////////////////////////////// | ||||
| 	if ( ! subGuess ) { | ||||
| 	  _Matrix.M(out[b],resid);  | ||||
| 	  resid = resid-in[b]; | ||||
| 	  RealD ns = norm2(in[b]); | ||||
| 	  RealD nr = norm2(resid); | ||||
| 	 | ||||
| 	  std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl; | ||||
| 	} else { | ||||
| 	  std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl; | ||||
| 	} | ||||
|  | ||||
|       } | ||||
|     } | ||||
|     template<class Guesser> | ||||
|     template<class Matrix,class Guesser> | ||||
|     void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ | ||||
|  | ||||
|       // FIXME CGdiagonalMee not implemented virtual function | ||||
| @@ -210,109 +335,42 @@ namespace Grid { | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       Field resid(fgrid); | ||||
|       Field src_o(grid); | ||||
|       SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|   | ||||
|       Field src_e(grid); | ||||
|       Field src_o(grid); | ||||
|       Field sol_e(grid); | ||||
|       Field sol_o(grid); | ||||
|  | ||||
|       //////////////////////////////////////////////// | ||||
|       // RedBlack source | ||||
|       //////////////////////////////////////////////// | ||||
|       RedBlackSource(_Matrix,in,src_e,src_o); | ||||
|  | ||||
|       //////////////////////////////// | ||||
|       // Construct the guess | ||||
|       //////////////////////////////// | ||||
|       if(useSolnAsInitGuess) { | ||||
|         pickCheckerboard(Odd, sol_o, out); | ||||
|       } else { | ||||
|         guess(src_o,sol_o); | ||||
|       } | ||||
|  | ||||
|       Field  guess_save(grid); | ||||
|       guess_save = sol_o; | ||||
|  | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the red-black solver | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       RedBlackSolve(_Matrix,src_o,sol_o); | ||||
|  | ||||
|       //////////////////////////////////////////////// | ||||
|       // Fionn A2A boolean behavioural control | ||||
|       //////////////////////////////////////////////// | ||||
|       if (subGuess)      sol_o= sol_o-guess_save; | ||||
|  | ||||
|       /////////////////////////////////////////////////// | ||||
|       // RedBlack solution needs the even source | ||||
|       /////////////////////////////////////////////////// | ||||
|       RedBlackSolution(_Matrix,sol_o,src_e,out); | ||||
|  | ||||
|       // Verify the unprec residual | ||||
|       if ( ! subGuess ) { | ||||
|         _Matrix.M(out,resid);  | ||||
|         resid = resid-in; | ||||
|         RealD ns = norm2(in); | ||||
|         RealD nr = norm2(resid); | ||||
|  | ||||
|         std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl; | ||||
|       } else { | ||||
|         std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl; | ||||
|       } | ||||
|     }      | ||||
|      | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Override in derived.  | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     virtual void RedBlackSource  (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)                =0; | ||||
|     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)          =0; | ||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)                           =0; | ||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o)=0; | ||||
|  | ||||
|   }; | ||||
|  | ||||
|   template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> { | ||||
|   public: | ||||
|     typedef CheckerBoardedSparseMatrixBase<Field> Matrix; | ||||
|  | ||||
|     SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false, | ||||
|         const bool _solnAsInitGuess = false)  | ||||
|       :    SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess,_solnAsInitGuess)  | ||||
|     { | ||||
|     } | ||||
|  | ||||
|     ////////////////////////////////////////////////////// | ||||
|     // Override RedBlack specialisation | ||||
|     ////////////////////////////////////////////////////// | ||||
|     virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) | ||||
|     { | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       Field   tmp(grid); | ||||
|       Field  Mtmp(grid); | ||||
|       Field resid(fgrid); | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,src); | ||||
|       pickCheckerboard(Odd ,src_o,src); | ||||
|       pickCheckerboard(Even,src_e,in); | ||||
|       pickCheckerboard(Odd ,src_o,in); | ||||
|       pickCheckerboard(Even,sol_e,out); | ||||
|       pickCheckerboard(Odd ,sol_o,out); | ||||
|      | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // src_o = (source_o - Moe MeeInv source_e) | ||||
|       // src_o = Mdag * (source_o - Moe MeeInv source_e) | ||||
|       ///////////////////////////////////////////////////// | ||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); | ||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      | ||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      | ||||
|  | ||||
|       _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm. | ||||
|     } | ||||
|     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol) | ||||
|     { | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|       // get the right MpcDag | ||||
|       _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);        | ||||
|  | ||||
|       Field   tmp(grid); | ||||
|       Field   sol_e(grid); | ||||
|       Field   src_e(grid); | ||||
|  | ||||
|       src_e = src_e_c; // Const correctness | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the red-black solver | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; | ||||
| //      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||
|       guess(src_o,tmp); | ||||
|       Mtmp = tmp; | ||||
|       _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd); | ||||
|       // Fionn A2A boolean behavioural control | ||||
|       if (subGuess)      tmp = tmp-Mtmp; | ||||
|       _Matrix.MooeeInv(tmp,sol_o);       assert(  sol_o.checkerboard   ==Odd); | ||||
|  | ||||
|       /////////////////////////////////////////////////// | ||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
| @@ -321,118 +379,78 @@ namespace Grid { | ||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); | ||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||
|       | ||||
|       setCheckerboard(sol,sol_e); assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(sol,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||
|     } | ||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o) | ||||
|     { | ||||
|       SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|       this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||
|     }; | ||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o) | ||||
|     { | ||||
|       SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|       this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);  | ||||
|     } | ||||
|   }; | ||||
|   template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>; | ||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||
|  | ||||
|       // Verify the unprec residual | ||||
|       if ( ! subGuess ) { | ||||
|         _Matrix.M(out,resid);  | ||||
|         resid = resid-in; | ||||
|         RealD ns = norm2(in); | ||||
|         RealD nr = norm2(resid); | ||||
|  | ||||
|         std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; | ||||
|       } else { | ||||
|         std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl; | ||||
|       } | ||||
|     }      | ||||
|   }; | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Site diagonal has Mooee on it. | ||||
|   // Take a matrix and form a Red Black solver calling a Herm solver | ||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> { | ||||
|   template<class Field> class SchurRedBlackDiagTwoMixed { | ||||
|   private: | ||||
|     LinearFunction<Field> & _HermitianRBSolver; | ||||
|     int CBfactorise; | ||||
|     bool subGuess; | ||||
|   public: | ||||
|     typedef CheckerBoardedSparseMatrixBase<Field> Matrix; | ||||
|  | ||||
|     SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false, | ||||
|         const bool _solnAsInitGuess = false)   | ||||
|       : SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess,_solnAsInitGuess) {}; | ||||
|  | ||||
|  | ||||
|     ////////////////////////////////////////////////////// | ||||
|     // Override RedBlack specialisation | ||||
|     ////////////////////////////////////////////////////// | ||||
|     virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) | ||||
|     { | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       Field   tmp(grid); | ||||
|       Field  Mtmp(grid); | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,src); | ||||
|       pickCheckerboard(Odd ,src_o,src); | ||||
|  | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // src_o = Mdag * (source_o - Moe MeeInv source_e) | ||||
|       ///////////////////////////////////////////////////// | ||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); | ||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      | ||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      | ||||
|  | ||||
|       // get the right MpcDag | ||||
|       SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|       _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);        | ||||
|  | ||||
|     } | ||||
|     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) | ||||
|     { | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       Field   tmp(grid); | ||||
|       Field  sol_e(grid); | ||||
|       Field  src_e_i(grid); | ||||
|       /////////////////////////////////////////////////// | ||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
|       /////////////////////////////////////////////////// | ||||
|       _Matrix.Meooe(sol_o,tmp);          assert(  tmp.checkerboard   ==Even); | ||||
|       src_e_i = src_e-tmp;               assert(  src_e_i.checkerboard ==Even); | ||||
|       _Matrix.MooeeInv(src_e_i,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||
|       | ||||
|       setCheckerboard(sol,sol_e); assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(sol,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||
|     } | ||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o) | ||||
|     { | ||||
|       SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|       this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||
|     }; | ||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o) | ||||
|     { | ||||
|       SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|       this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);  | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Site diagonal is identity, right preconditioned by Mee^inv | ||||
|   // ( 1 - Meo Moo^inv Moe Mee^inv  ) phi =( 1 - Meo Moo^inv Moe Mee^inv  ) Mee psi =  = eta  = eta | ||||
|   //=> psi = MeeInv phi | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> { | ||||
|   public: | ||||
|     typedef CheckerBoardedSparseMatrixBase<Field> Matrix; | ||||
|  | ||||
|     ///////////////////////////////////////////////////// | ||||
|     // Wrap the usual normal equations Schur trick | ||||
|     ///////////////////////////////////////////////////// | ||||
|   SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false, | ||||
|       const bool _solnAsInitGuess = false)   | ||||
|     : SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess,_solnAsInitGuess) {}; | ||||
|  | ||||
|     virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) | ||||
|   SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)  : | ||||
|      _HermitianRBSolver(HermitianRBSolver)  | ||||
|     {  | ||||
|       CBfactorise=0; | ||||
|       subtractGuess(initSubGuess); | ||||
|     }; | ||||
|     void subtractGuess(const bool initSubGuess) | ||||
|     { | ||||
|       subGuess = initSubGuess; | ||||
|     } | ||||
|     bool isSubtractGuess(void) | ||||
|     { | ||||
|       return subGuess; | ||||
|     } | ||||
|  | ||||
|     template<class Matrix> | ||||
|     void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||
|       ZeroGuesser<Field> guess; | ||||
|       (*this)(_Matrix,in,out,guess); | ||||
|     } | ||||
|     template<class Matrix, class Guesser> | ||||
|     void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ | ||||
|  | ||||
|       // FIXME CGdiagonalMee not implemented virtual function | ||||
|       // FIXME use CBfactorise to control schur decomp | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|   | ||||
|       Field src_e(grid); | ||||
|       Field src_o(grid); | ||||
|       Field sol_e(grid); | ||||
|       Field sol_o(grid); | ||||
|       Field   tmp(grid); | ||||
|       Field  Mtmp(grid); | ||||
|       Field resid(fgrid); | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,src); | ||||
|       pickCheckerboard(Odd ,src_o,src); | ||||
|       pickCheckerboard(Even,src_e,in); | ||||
|       pickCheckerboard(Odd ,src_o,in); | ||||
|       pickCheckerboard(Even,sol_e,out); | ||||
|       pickCheckerboard(Odd ,sol_o,out); | ||||
|      | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // src_o = Mdag * (source_o - Moe MeeInv source_e) | ||||
| @@ -443,44 +461,43 @@ namespace Grid { | ||||
|  | ||||
|       // get the right MpcDag | ||||
|       _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);        | ||||
|     } | ||||
|  | ||||
|     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) | ||||
|     { | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       Field   sol_o_i(grid); | ||||
|       Field   tmp(grid); | ||||
|       Field   sol_e(grid); | ||||
|  | ||||
|       //////////////////////////////////////////////// | ||||
|       // MooeeInv due to pecond | ||||
|       //////////////////////////////////////////////// | ||||
|       _Matrix.MooeeInv(sol_o,tmp); | ||||
|       sol_o_i = tmp; | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the red-black solver | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; | ||||
| //      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||
| //      _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd); | ||||
|       guess(src_o,tmp); | ||||
|       Mtmp = tmp; | ||||
|       _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd); | ||||
|       // Fionn A2A boolean behavioural control | ||||
|       if (subGuess)      tmp = tmp-Mtmp; | ||||
|       _Matrix.MooeeInv(tmp,sol_o);        assert(  sol_o.checkerboard   ==Odd); | ||||
|  | ||||
|       /////////////////////////////////////////////////// | ||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
|       /////////////////////////////////////////////////// | ||||
|       _Matrix.Meooe(sol_o_i,tmp);    assert(  tmp.checkerboard   ==Even); | ||||
|       tmp = src_e-tmp;               assert(  src_e.checkerboard ==Even); | ||||
|       _Matrix.MooeeInv(tmp,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); | ||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); | ||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||
|       | ||||
|       setCheckerboard(sol,sol_e);    assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(sol,sol_o_i);  assert(  sol_o_i.checkerboard ==Odd ); | ||||
|     }; | ||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||
|  | ||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o) | ||||
|     { | ||||
|       SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|       this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); | ||||
|     }; | ||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o) | ||||
|     { | ||||
|       SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|       this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);  | ||||
|       // Verify the unprec residual | ||||
|       if ( ! subGuess ) { | ||||
|         _Matrix.M(out,resid);  | ||||
|         resid = resid-in; | ||||
|         RealD ns = norm2(in); | ||||
|         RealD nr = norm2(resid); | ||||
|  | ||||
|         std::cout << GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid " << std::sqrt(nr / ns) << " nr " << nr << " ns " << ns << std::endl; | ||||
|       } else { | ||||
|         std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl; | ||||
|       } | ||||
|     }      | ||||
|   }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -50,15 +50,15 @@ void CartesianCommunicator::Init(int *argc, char ***argv) | ||||
|       assert(0); | ||||
|   } | ||||
|  | ||||
|   Grid_quiesce_nodes(); | ||||
|  | ||||
|   // Never clean up as done once. | ||||
|   MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); | ||||
|  | ||||
|   Grid_quiesce_nodes(); | ||||
|   GlobalSharedMemory::Init(communicator_world); | ||||
|   GlobalSharedMemory::SharedMemoryAllocate( | ||||
| 		   GlobalSharedMemory::MAX_MPI_SHM_BYTES, | ||||
| 		   GlobalSharedMemory::Hugepages); | ||||
|   Grid_unquiesce_nodes(); | ||||
| } | ||||
|  | ||||
| /////////////////////////////////////////////////////////////////////////// | ||||
| @@ -107,7 +107,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
| ////////////////////////////////// | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)     | ||||
| { | ||||
|   _ndimension = processors.size();  assert(_ndimension>=1); | ||||
|   _ndimension = processors.size(); | ||||
|  | ||||
|   int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension); | ||||
|   std::vector<int> parent_processor_coor(_ndimension,0); | ||||
|   std::vector<int> parent_processors    (_ndimension,1); | ||||
| @@ -123,8 +124,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors, | ||||
|   // split the communicator | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   //  int Nparent = parent._processors ;  | ||||
|   //  std::cout << " splitting from communicator "<<parent.communicator <<std::endl; | ||||
|   int Nparent; | ||||
|   MPI_Comm_size(parent.communicator,&Nparent); | ||||
|   //  std::cout << " Parent size  "<<Nparent <<std::endl; | ||||
|  | ||||
|   int childsize=1; | ||||
|   for(int d=0;d<processors.size();d++) { | ||||
| @@ -133,6 +136,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors, | ||||
|   int Nchild = Nparent/childsize; | ||||
|   assert (childsize * Nchild == Nparent); | ||||
|  | ||||
|   //  std::cout << " child size  "<<childsize <<std::endl; | ||||
|  | ||||
|   std::vector<int> ccoor(_ndimension); // coor within subcommunicator | ||||
|   std::vector<int> scoor(_ndimension); // coor of split within parent | ||||
|   std::vector<int> ssize(_ndimension); // coor of split within parent | ||||
|   | ||||
| @@ -52,7 +52,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors, | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
| { | ||||
|   _processors = processors; | ||||
|   _ndimension = processors.size();  assert(_ndimension>=1); | ||||
|   _ndimension = processors.size(); | ||||
|   _processor_coor.resize(_ndimension); | ||||
|    | ||||
|   // Require 1^N processor grid for fake | ||||
|   | ||||
| @@ -103,8 +103,6 @@ class GlobalSharedMemory { | ||||
|   ////////////////////////////////////////////////////////////////////////////////////// | ||||
|   static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD | ||||
|   static void OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian | ||||
|   static void OptimalCommunicatorHypercube(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian | ||||
|   static void OptimalCommunicatorSharedMemory(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian | ||||
|   /////////////////////////////////////////////////// | ||||
|   // Provide shared memory facilities off comm world | ||||
|   /////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -132,22 +132,7 @@ int Log2Size(int TwoToPower,int MAXLOG2) | ||||
| } | ||||
| void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm) | ||||
| { | ||||
|   ////////////////////////////////////////////////////////////////////////////// | ||||
|   // Look and see if it looks like an HPE 8600 based on hostname conventions | ||||
|   ////////////////////////////////////////////////////////////////////////////// | ||||
|   const int namelen = _POSIX_HOST_NAME_MAX; | ||||
|   char name[namelen]; | ||||
|   int R; | ||||
|   int I; | ||||
|   int N; | ||||
|   gethostname(name,namelen); | ||||
|   int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ; | ||||
|  | ||||
|   if(nscan==3) OptimalCommunicatorHypercube(processors,optimal_comm); | ||||
|   else         OptimalCommunicatorSharedMemory(processors,optimal_comm); | ||||
| } | ||||
| void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm) | ||||
| { | ||||
| #ifdef HYPERCUBE | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   // Assert power of two shm_size. | ||||
|   //////////////////////////////////////////////////////////////// | ||||
| @@ -268,9 +253,7 @@ void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector<int> &pr | ||||
|   ///////////////////////////////////////////////////////////////// | ||||
|   int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm) | ||||
| { | ||||
| #else  | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   // Assert power of two shm_size. | ||||
|   //////////////////////////////////////////////////////////////// | ||||
| @@ -323,6 +306,7 @@ void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const std::vector<int> | ||||
|   ///////////////////////////////////////////////////////////////// | ||||
|   int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm); | ||||
|   assert(ierr==0); | ||||
| #endif | ||||
| } | ||||
| //////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // SHMGET | ||||
| @@ -353,7 +337,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|         int errsv = errno; | ||||
|         printf("Errno %d\n",errsv); | ||||
|         printf("key   %d\n",key); | ||||
|         printf("size  %ld\n",size); | ||||
|         printf("size  %lld\n",size); | ||||
|         printf("flags %d\n",flags); | ||||
|         perror("shmget"); | ||||
|         exit(1); | ||||
| @@ -429,7 +413,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|     assert(((uint64_t)ptr&0x3F)==0); | ||||
|     close(fd); | ||||
|     WorldShmCommBufs[r] =ptr; | ||||
|     //    std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl; | ||||
|     std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl; | ||||
|   } | ||||
|   _ShmAlloc=1; | ||||
|   _ShmAllocBytes  = bytes; | ||||
| @@ -471,7 +455,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|     assert(((uint64_t)ptr&0x3F)==0); | ||||
|     close(fd); | ||||
|     WorldShmCommBufs[r] =ptr; | ||||
|     //    std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl; | ||||
|     std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl; | ||||
|   } | ||||
|   _ShmAlloc=1; | ||||
|   _ShmAllocBytes  = bytes; | ||||
| @@ -515,7 +499,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
| #endif | ||||
|       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); | ||||
|        | ||||
|       //      std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl; | ||||
|       std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl; | ||||
|       if ( ptr == (void * )MAP_FAILED ) {        | ||||
| 	perror("failed mmap");      | ||||
| 	assert(0);     | ||||
|   | ||||
| @@ -85,7 +85,7 @@ class LatticeTrinaryExpression :public std::pair<Op,std::tuple<T1,T2,T3> >, publ | ||||
|  | ||||
| void inline conformable(GridBase *lhs,GridBase *rhs) | ||||
| { | ||||
|   assert((lhs == rhs) && " conformable check pointers mismatch "); | ||||
|   assert(lhs == rhs); | ||||
| } | ||||
|  | ||||
| template<class vobj> | ||||
|   | ||||
| @@ -464,11 +464,9 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int | ||||
|   assert(orthog>=0); | ||||
|  | ||||
|   for(int d=0;d<nh;d++){ | ||||
|     if ( d!=orthog ) { | ||||
|     assert(lg->_processors[d]  == hg->_processors[d]); | ||||
|     assert(lg->_ldimensions[d] == hg->_ldimensions[d]); | ||||
|   } | ||||
|   } | ||||
|  | ||||
|   // the above should guarantee that the operations are local | ||||
|   parallel_for(int idx=0;idx<lg->lSites();idx++){ | ||||
| @@ -487,7 +485,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int | ||||
|  | ||||
|  | ||||
| template<class vobj> | ||||
| void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) | ||||
| void ExtractSliceLocal(Lattice<vobj> &lowDim, const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) | ||||
| { | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|  | ||||
| @@ -501,11 +499,9 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int | ||||
|   assert(orthog>=0); | ||||
|  | ||||
|   for(int d=0;d<nh;d++){ | ||||
|     if ( d!=orthog ) { | ||||
|     assert(lg->_processors[d]  == hg->_processors[d]); | ||||
|     assert(lg->_ldimensions[d] == hg->_ldimensions[d]); | ||||
|   } | ||||
|   } | ||||
|  | ||||
|   // the above should guarantee that the operations are local | ||||
|   parallel_for(int idx=0;idx<lg->lSites();idx++){ | ||||
| @@ -524,7 +520,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int | ||||
|  | ||||
|  | ||||
| template<class vobj> | ||||
| void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) | ||||
| void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine) | ||||
| { | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|  | ||||
|   | ||||
| @@ -59,7 +59,6 @@ void GridLogTimestamp(int on){ | ||||
| } | ||||
|  | ||||
| Colours GridLogColours(0); | ||||
| GridLogger GridLogMG     (1, "MG"    , GridLogColours, "NORMAL"); | ||||
| GridLogger GridLogIRL    (1, "IRL"   , GridLogColours, "NORMAL"); | ||||
| GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL"); | ||||
| GridLogger GridLogError  (1, "Error" , GridLogColours, "RED"); | ||||
| @@ -77,7 +76,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) { | ||||
|   GridLogIterative.Active(0); | ||||
|   GridLogDebug.Active(0); | ||||
|   GridLogPerformance.Active(0); | ||||
|   GridLogIntegrator.Active(1); | ||||
|   GridLogIntegrator.Active(0); | ||||
|   GridLogColours.Active(0); | ||||
|  | ||||
|   for (int i = 0; i < logstreams.size(); i++) { | ||||
| @@ -86,7 +85,8 @@ void GridLogConfigure(std::vector<std::string> &logstreams) { | ||||
|     if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0); | ||||
|     if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1); | ||||
|     if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1); | ||||
|     if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1); | ||||
|     if (logstreams[i] == std::string("Performance")) | ||||
|       GridLogPerformance.Active(1); | ||||
|     if (logstreams[i] == std::string("Integrator")) GridLogIntegrator.Active(1); | ||||
|     if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1); | ||||
|   } | ||||
|   | ||||
| @@ -146,11 +146,9 @@ public: | ||||
|       if ( log.timestamp ) { | ||||
| 	log.StopWatch->Stop(); | ||||
| 	GridTime now = log.StopWatch->Elapsed(); | ||||
| 	 | ||||
| 	if ( log.timing_mode==1 ) log.StopWatch->Reset(); | ||||
| 	log.StopWatch->Start(); | ||||
| 	stream << log.evidence() | ||||
| 	       << now	       << log.background() << " : " ; | ||||
| 	stream << log.evidence()<< std::setw(6)<<now << log.background() << " : " ; | ||||
|       } | ||||
|       stream << log.colour(); | ||||
|       return stream; | ||||
| @@ -169,7 +167,6 @@ public: | ||||
|  | ||||
| void GridLogConfigure(std::vector<std::string> &logstreams); | ||||
|  | ||||
| extern GridLogger GridLogMG; | ||||
| extern GridLogger GridLogIRL; | ||||
| extern GridLogger GridLogSolver; | ||||
| extern GridLogger GridLogError; | ||||
|   | ||||
| @@ -1,3 +0,0 @@ | ||||
| #include <Grid/GridCore.h> | ||||
|  | ||||
| int Grid::BinaryIO::latticeWriteMaxRetry = -1; | ||||
| @@ -81,7 +81,6 @@ inline void removeWhitespace(std::string &key) | ||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| class BinaryIO { | ||||
|  public: | ||||
|   static int latticeWriteMaxRetry; | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////////////////////// | ||||
|   // more byte manipulation helpers | ||||
| @@ -210,10 +209,10 @@ PARALLEL_CRITICAL | ||||
|   static inline void le32toh_v(void *file_object,uint64_t bytes) | ||||
|   { | ||||
|     uint32_t *fp = (uint32_t *)file_object; | ||||
|     uint32_t f; | ||||
|  | ||||
|     uint64_t count = bytes/sizeof(uint32_t); | ||||
|     parallel_for(uint64_t i=0;i<count;i++){   | ||||
|       uint32_t f; | ||||
|       f = fp[i]; | ||||
|       // got network order and the network to host | ||||
|       f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;  | ||||
| @@ -235,9 +234,10 @@ PARALLEL_CRITICAL | ||||
|   static inline void le64toh_v(void *file_object,uint64_t bytes) | ||||
|   { | ||||
|     uint64_t *fp = (uint64_t *)file_object; | ||||
|     uint64_t f,g; | ||||
|      | ||||
|     uint64_t count = bytes/sizeof(uint64_t); | ||||
|     parallel_for(uint64_t i=0;i<count;i++){   | ||||
|       uint64_t f,g; | ||||
|       f = fp[i]; | ||||
|       // got network order and the network to host | ||||
|       g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;  | ||||
| @@ -348,8 +348,7 @@ PARALLEL_CRITICAL | ||||
|     int ieee32    = (format == std::string("IEEE32")); | ||||
|     int ieee64big = (format == std::string("IEEE64BIG")); | ||||
|     int ieee64    = (format == std::string("IEEE64")); | ||||
|     assert(ieee64||ieee32|ieee64big||ieee32big); | ||||
|     assert((ieee64+ieee32+ieee64big+ieee32big)==1); | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////////////// | ||||
|     // Do the I/O | ||||
|     ////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -371,7 +370,7 @@ PARALLEL_CRITICAL | ||||
| #endif | ||||
|       } else { | ||||
| 	std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : " | ||||
|                   << iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl; | ||||
|                   << iodata.size() * sizeof(fobj) << " bytes" << std::endl; | ||||
|         std::ifstream fin; | ||||
| 	fin.open(file, std::ios::binary | std::ios::in); | ||||
|         if (control & BINARYIO_MASTER_APPEND) | ||||
| @@ -583,9 +582,7 @@ PARALLEL_CRITICAL | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|     typedef typename vobj::Realified::scalar_type word;    word w=0; | ||||
|     GridBase *grid = Umu._grid; | ||||
|     uint64_t lsites = grid->lSites(), offsetCopy = offset; | ||||
|     int attemptsLeft = std::max(0, BinaryIO::latticeWriteMaxRetry); | ||||
|     bool checkWrite = (BinaryIO::latticeWriteMaxRetry >= 0); | ||||
|     uint64_t lsites = grid->lSites(); | ||||
|  | ||||
|     std::vector<sobj> scalardata(lsites);  | ||||
|     std::vector<fobj>     iodata(lsites); // Munge, checksum, byte order in here | ||||
| @@ -600,36 +597,9 @@ PARALLEL_CRITICAL | ||||
|  | ||||
|     grid->Barrier(); | ||||
|     timer.Stop(); | ||||
|     while (attemptsLeft >= 0) | ||||
|     { | ||||
|       grid->Barrier(); | ||||
|  | ||||
|     IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC, | ||||
| 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||
|       if (checkWrite) | ||||
|       { | ||||
|         std::vector<fobj> ckiodata(lsites); | ||||
|         uint32_t          cknersc_csum, ckscidac_csuma, ckscidac_csumb; | ||||
|         uint64_t          ckoffset = offsetCopy; | ||||
|  | ||||
|         std::cout << GridLogMessage << "writeLatticeObject: read back object" << std::endl; | ||||
|         grid->Barrier(); | ||||
|         IOobject(w,grid,ckiodata,file,ckoffset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC, | ||||
| 	               cknersc_csum,ckscidac_csuma,ckscidac_csumb); | ||||
|         if ((cknersc_csum != nersc_csum) or (ckscidac_csuma != scidac_csuma) or (ckscidac_csumb != scidac_csumb)) | ||||
|         { | ||||
|           std::cout << GridLogMessage << "writeLatticeObject: read test checksum failure, re-writing (" << attemptsLeft << " attempt(s) remaining)" << std::endl; | ||||
|           offset = offsetCopy; | ||||
|           parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|           std::cout << GridLogMessage << "writeLatticeObject: read test checksum correct" << std::endl; | ||||
|           break; | ||||
|         } | ||||
|       } | ||||
|       attemptsLeft--; | ||||
|     } | ||||
|      | ||||
|  | ||||
|     std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed()  <<std::endl; | ||||
|   } | ||||
| @@ -755,6 +725,5 @@ PARALLEL_CRITICAL | ||||
|     std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -46,12 +46,6 @@ extern "C" { | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|  | ||||
| #define GRID_FIELD_NORM "FieldNormMetaData" | ||||
| #define GRID_FIELD_NORM_CALC(FieldNormMetaData_, n2ck) \ | ||||
| 0.5*fabs(FieldNormMetaData_.norm2 - n2ck)/(FieldNormMetaData_.norm2 + n2ck) | ||||
| #define GRID_FIELD_NORM_CHECK(FieldNormMetaData_, n2ck) \ | ||||
| assert(GRID_FIELD_NORM_CALC(FieldNormMetaData_, n2ck) < 1.0e-5); | ||||
|  | ||||
|   ///////////////////////////////// | ||||
|   // Encode word types as strings | ||||
|   ///////////////////////////////// | ||||
| @@ -211,7 +205,6 @@ class GridLimeReader : public BinaryIO { | ||||
|   { | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|     scidacChecksum scidacChecksum_; | ||||
|     FieldNormMetaData  FieldNormMetaData_; | ||||
|     uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
|  | ||||
|     std::string format = getFormatString<vobj>(); | ||||
| @@ -240,52 +233,20 @@ class GridLimeReader : public BinaryIO { | ||||
| 	//	std::cout << " ReadLatticeObject from offset "<<offset << std::endl; | ||||
| 	BinarySimpleMunger<sobj,sobj> munge; | ||||
| 	BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl; | ||||
| 	std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl; | ||||
|  | ||||
| 	///////////////////////////////////////////// | ||||
| 	// Insist checksum is next record | ||||
| 	///////////////////////////////////////////// | ||||
| 	readScidacChecksum(scidacChecksum_,FieldNormMetaData_); | ||||
| 	readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM)); | ||||
|  | ||||
| 	///////////////////////////////////////////// | ||||
| 	// Verify checksums | ||||
| 	///////////////////////////////////////////// | ||||
| 	if(FieldNormMetaData_.norm2 != 0.0){  | ||||
| 	  RealD n2ck = norm2(field); | ||||
| 	  std::cout << GridLogMessage << "Field norm: metadata= " << FieldNormMetaData_.norm2  | ||||
|               << " / field= " << n2ck << " / rdiff= " << GRID_FIELD_NORM_CALC(FieldNormMetaData_,n2ck) << std::endl; | ||||
| 	  GRID_FIELD_NORM_CHECK(FieldNormMetaData_,n2ck); | ||||
| 	} | ||||
| 	assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1); | ||||
|  | ||||
| 	// find out if next field is a GridFieldNorm | ||||
| 	return; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   void readScidacChecksum(scidacChecksum     &scidacChecksum_, | ||||
| 			  FieldNormMetaData  &FieldNormMetaData_) | ||||
|   { | ||||
|     FieldNormMetaData_.norm2 =0.0; | ||||
|     std::string scidac_str(SCIDAC_CHECKSUM); | ||||
|     std::string field_norm_str(GRID_FIELD_NORM); | ||||
|     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  | ||||
|       uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) | ||||
|       std::vector<char> xmlc(nbytes+1,'\0'); | ||||
|       limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);     | ||||
|       std::string xmlstring = std::string(&xmlc[0]); | ||||
|       XmlReader RD(xmlstring, true, ""); | ||||
|       if ( !strncmp(limeReaderType(LimeR), field_norm_str.c_str(),strlen(field_norm_str.c_str()) )  ) { | ||||
| 	//	std::cout << "FieldNormMetaData "<<xmlstring<<std::endl; | ||||
| 	read(RD,field_norm_str,FieldNormMetaData_); | ||||
|       } | ||||
|       if ( !strncmp(limeReaderType(LimeR), scidac_str.c_str(),strlen(scidac_str.c_str()) )  ) { | ||||
| 	//	std::cout << SCIDAC_CHECKSUM << " " <<xmlstring<<std::endl; | ||||
| 	read(RD,std::string("scidacChecksum"),scidacChecksum_); | ||||
| 	return; | ||||
|       }       | ||||
|     } | ||||
|     assert(0); | ||||
|   } | ||||
|   //////////////////////////////////////////// | ||||
|   // Read a generic serialisable object | ||||
|   //////////////////////////////////////////// | ||||
| @@ -428,8 +389,6 @@ class GridLimeWriter : public BinaryIO | ||||
|     GridBase *grid = field._grid; | ||||
|     assert(boss_node == field._grid->IsBoss() ); | ||||
|  | ||||
|     FieldNormMetaData FNMD; FNMD.norm2 = norm2(field); | ||||
|  | ||||
|     //////////////////////////////////////////// | ||||
|     // Create record header | ||||
|     //////////////////////////////////////////// | ||||
| @@ -488,7 +447,6 @@ class GridLimeWriter : public BinaryIO | ||||
|     checksum.suma= streama.str(); | ||||
|     checksum.sumb= streamb.str(); | ||||
|     if ( boss_node ) {  | ||||
|       writeLimeObject(0,0,FNMD,std::string(GRID_FIELD_NORM),std::string(GRID_FIELD_NORM)); | ||||
|       writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM)); | ||||
|     } | ||||
|   } | ||||
| @@ -666,12 +624,6 @@ class IldgWriter : public ScidacWriter { | ||||
|     assert(header.nd==4); | ||||
|     assert(header.nd==header.dimension.size()); | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////////////// | ||||
|     // Field norm tests | ||||
|     ////////////////////////////////////////////////////////////////////////////// | ||||
|     FieldNormMetaData FieldNormMetaData_; | ||||
|     FieldNormMetaData_.norm2 = norm2(Umu); | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////////////// | ||||
|     // Fill the USQCD info field | ||||
|     ////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -680,12 +632,11 @@ class IldgWriter : public ScidacWriter { | ||||
|     info.plaq   = header.plaquette; | ||||
|     info.linktr = header.link_trace; | ||||
|  | ||||
|     //    std::cout << GridLogMessage << " Writing config; IldgIO n2 "<< FieldNormMetaData_.norm2<<std::endl; | ||||
|     std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl; | ||||
|     ////////////////////////////////////////////// | ||||
|     // Fill the Lime file record by record | ||||
|     ////////////////////////////////////////////// | ||||
|     writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message  | ||||
|     writeLimeObject(0,0,FieldNormMetaData_,FieldNormMetaData_.SerialisableClassName(),std::string(GRID_FIELD_NORM)); | ||||
|     writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); | ||||
|     writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); | ||||
|     writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); | ||||
| @@ -728,7 +679,6 @@ class IldgReader : public GridLimeReader { | ||||
|     std::string    ildgLFN_       ; | ||||
|     scidacChecksum scidacChecksum_;  | ||||
|     usqcdInfo      usqcdInfo_     ; | ||||
|     FieldNormMetaData FieldNormMetaData_; | ||||
|  | ||||
|     // track what we read from file | ||||
|     int found_ildgFormat    =0; | ||||
| @@ -737,7 +687,7 @@ class IldgReader : public GridLimeReader { | ||||
|     int found_usqcdInfo     =0; | ||||
|     int found_ildgBinary =0; | ||||
|     int found_FieldMetaData =0; | ||||
|     int found_FieldNormMetaData =0; | ||||
|  | ||||
|     uint32_t nersc_csum; | ||||
|     uint32_t scidac_csuma; | ||||
|     uint32_t scidac_csumb; | ||||
| @@ -824,17 +774,11 @@ class IldgReader : public GridLimeReader { | ||||
| 	  found_scidacChecksum = 1; | ||||
| 	} | ||||
|  | ||||
| 	if ( !strncmp(limeReaderType(LimeR), GRID_FIELD_NORM,strlen(GRID_FIELD_NORM)) ) {  | ||||
| 	  XmlReader RD(xmlstring, true, ""); | ||||
| 	  read(RD,GRID_FIELD_NORM,FieldNormMetaData_); | ||||
| 	  found_FieldNormMetaData = 1; | ||||
| 	} | ||||
|  | ||||
|       } else {   | ||||
| 	///////////////////////////////// | ||||
| 	// Binary data | ||||
| 	///////////////////////////////// | ||||
| 	//	std::cout << GridLogMessage << "ILDG Binary record found : "  ILDG_BINARY_DATA << std::endl; | ||||
| 	std::cout << GridLogMessage << "ILDG Binary record found : "  ILDG_BINARY_DATA << std::endl; | ||||
| 	uint64_t offset= ftello(File); | ||||
| 	if ( format == std::string("IEEE64BIG") ) { | ||||
| 	  GaugeSimpleMunger<dobj, sobj> munge; | ||||
| @@ -901,13 +845,6 @@ class IldgReader : public GridLimeReader { | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     // Really really want to mandate a scidac checksum | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     if ( found_FieldNormMetaData ) {  | ||||
|       RealD nn = norm2(Umu); | ||||
|       GRID_FIELD_NORM_CHECK(FieldNormMetaData_,nn); | ||||
|       std::cout << GridLogMessage<<"FieldNormMetaData matches " << std::endl; | ||||
|     }  else {  | ||||
|       std::cout << GridLogWarning<<"FieldNormMetaData not found. " << std::endl; | ||||
|     } | ||||
|     if ( found_scidacChecksum ) { | ||||
|       FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16); | ||||
|       FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16); | ||||
|   | ||||
| @@ -56,10 +56,6 @@ namespace Grid { | ||||
|   //////////////////////////////////////////////////////////////////////////////// | ||||
|   // header specification/interpretation | ||||
|   //////////////////////////////////////////////////////////////////////////////// | ||||
|     class FieldNormMetaData : Serializable { | ||||
|     public: | ||||
|       GRID_SERIALIZABLE_CLASS_MEMBERS(FieldNormMetaData, double, norm2); | ||||
|     }; | ||||
|     class FieldMetaData : Serializable { | ||||
|     public: | ||||
|  | ||||
|   | ||||
| @@ -49,38 +49,20 @@ inline double usecond(void) { | ||||
|  | ||||
| typedef  std::chrono::system_clock          GridClock; | ||||
| typedef  std::chrono::time_point<GridClock> GridTimePoint; | ||||
|  | ||||
| typedef  std::chrono::seconds               GridSecs; | ||||
| typedef  std::chrono::milliseconds          GridMillisecs; | ||||
| typedef  std::chrono::microseconds          GridUsecs; | ||||
| typedef  std::chrono::microseconds          GridTime; | ||||
| typedef  std::chrono::microseconds          GridUsecs; | ||||
|  | ||||
| inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time) | ||||
| inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time) | ||||
| { | ||||
|   stream << time.count()<<" s"; | ||||
|   stream << time.count()<<" ms"; | ||||
|   return stream; | ||||
| } | ||||
| inline std::ostream& operator<< (std::ostream & stream, const GridMillisecs & now) | ||||
| inline std::ostream& operator<< (std::ostream & stream, const std::chrono::microseconds & time) | ||||
| { | ||||
|   GridSecs second(1); | ||||
|   auto     secs       = now/second ;  | ||||
|   auto     subseconds = now%second ; | ||||
|   auto     fill       = stream.fill(); | ||||
|   stream << secs<<"."<<std::setw(3)<<std::setfill('0')<<subseconds.count()<<" s"; | ||||
|   stream.fill(fill); | ||||
|   stream << time.count()<<" usec"; | ||||
|   return stream; | ||||
| } | ||||
| inline std::ostream& operator<< (std::ostream & stream, const GridUsecs & now) | ||||
| { | ||||
|   GridSecs second(1); | ||||
|   auto     seconds    = now/second ;  | ||||
|   auto     subseconds = now%second ; | ||||
|   auto     fill       = stream.fill(); | ||||
|   stream << seconds<<"."<<std::setw(6)<<std::setfill('0')<<subseconds.count()<<" s"; | ||||
|   stream.fill(fill); | ||||
|   return stream; | ||||
| } | ||||
|  | ||||
|   | ||||
| class GridStopWatch { | ||||
| private: | ||||
|   | ||||
| @@ -44,15 +44,12 @@ namespace QCD { | ||||
|    | ||||
|   struct WilsonImplParams { | ||||
|     bool overlapCommsCompute; | ||||
|     std::vector<Real> twist_n_2pi_L; | ||||
|     std::vector<Complex> boundary_phases; | ||||
|     WilsonImplParams() : overlapCommsCompute(false) { | ||||
|       boundary_phases.resize(Nd, 1.0); | ||||
|       twist_n_2pi_L.resize(Nd, 0.0); | ||||
|     }; | ||||
|     WilsonImplParams(const std::vector<Complex> phi) : boundary_phases(phi), overlapCommsCompute(false) { | ||||
|       twist_n_2pi_L.resize(Nd, 0.0); | ||||
|     } | ||||
|     WilsonImplParams(const std::vector<Complex> phi) | ||||
|       : boundary_phases(phi), overlapCommsCompute(false) {} | ||||
|   }; | ||||
|  | ||||
|   struct StaggeredImplParams { | ||||
| @@ -66,8 +63,7 @@ namespace QCD { | ||||
| 				    int,   MaxIter,  | ||||
| 				    RealD, tolerance,  | ||||
| 				    int,   degree,  | ||||
| 				    int,   precision, | ||||
| 				    int,   BoundsCheckFreq); | ||||
| 				    int,   precision); | ||||
|      | ||||
|     // MaxIter and tolerance, vectors?? | ||||
|      | ||||
| @@ -77,15 +73,13 @@ namespace QCD { | ||||
| 				int _maxit     = 1000, | ||||
| 				RealD tol      = 1.0e-8,  | ||||
|                            	int _degree    = 10, | ||||
| 				int _precision = 64, | ||||
| 				int _BoundsCheckFreq=20) | ||||
| 				int _precision = 64) | ||||
|       : lo(_lo), | ||||
| 	hi(_hi), | ||||
| 	MaxIter(_maxit), | ||||
| 	tolerance(tol), | ||||
| 	degree(_degree), | ||||
|         precision(_precision), | ||||
|         BoundsCheckFreq(_BoundsCheckFreq){}; | ||||
| 	precision(_precision){}; | ||||
|   }; | ||||
|    | ||||
|    | ||||
|   | ||||
| @@ -43,7 +43,7 @@ namespace Grid { | ||||
|      INHERIT_IMPL_TYPES(Impl); | ||||
|     public: | ||||
|  | ||||
|       void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary, std::vector<double> twist, bool fiveD) { | ||||
|       void FreePropagator(const FermionField &in,FermionField &out,RealD mass, std::vector<double> twist, bool fiveD) { | ||||
| 	FermionField in_k(in._grid); | ||||
| 	FermionField prop_k(in._grid); | ||||
|  | ||||
| @@ -53,22 +53,17 @@ namespace Grid { | ||||
| 	ComplexField coor(in._grid); | ||||
| 	ComplexField ph(in._grid);  ph = zero; | ||||
| 	FermionField in_buf(in._grid); in_buf = zero; | ||||
| 	Scalar ci(0.0,1.0); | ||||
| 	Complex ci(0.0,1.0); | ||||
| 	assert(twist.size() == Nd);//check that twist is Nd | ||||
| 	assert(boundary.size() == Nd);//check that boundary conditions is Nd | ||||
| 	int shift = 0; | ||||
| 	if(fiveD) shift = 1; | ||||
| 	for(unsigned int nu = 0; nu < Nd; nu++) | ||||
| 	{ | ||||
| 	  // Shift coordinate lattice index by 1 to account for 5th dimension. | ||||
|           LatticeCoordinate(coor, nu + shift); | ||||
| 	  double boundary_phase = ::acos(real(boundary[nu])); | ||||
| 	  ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu+shift]))); | ||||
| 	  //momenta for propagator shifted by twist+boundary | ||||
| 	  twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI)); | ||||
| 	  ph = ph + twist[nu]*coor*((1./(in._grid->_fdimensions[nu+shift]))); | ||||
| 	} | ||||
| 	in_buf = exp(ci*ph*(-1.0))*in; | ||||
|  | ||||
| 	in_buf = exp((Real)(2.0*M_PI)*ci*ph*(-1.0))*in; | ||||
|  | ||||
| 	if(fiveD){//FFT only on temporal and spatial dimensions | ||||
|           std::vector<int> mask(Nd+1,1); mask[0] = 0; | ||||
| @@ -81,28 +76,25 @@ namespace Grid { | ||||
|           this->MomentumSpacePropagatorHt(prop_k,in_k,mass,twist); | ||||
| 	  theFFT.FFT_all_dim(out,prop_k,FFT::backward); | ||||
|         } | ||||
|  | ||||
| 	//phase for boundary condition | ||||
| 	out = out * exp(ci*ph); | ||||
| 	out = out * exp((Real)(2.0*M_PI)*ci*ph); | ||||
|       }; | ||||
|  | ||||
|       virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) { | ||||
|       virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<double> twist) { | ||||
|         bool fiveD = true; //5d propagator by default | ||||
| 	FreePropagator(in,out,mass,boundary,twist,fiveD); | ||||
|         FreePropagator(in,out,mass,twist,fiveD); | ||||
|       }; | ||||
|  | ||||
|       virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass, bool fiveD) { | ||||
| 	std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions | ||||
| 	std::vector<Complex> boundary; | ||||
| 	for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions | ||||
| 	FreePropagator(in,out,mass,boundary,twist,fiveD); | ||||
|         FreePropagator(in,out,mass,twist,fiveD); | ||||
|       }; | ||||
|  | ||||
|       virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) { | ||||
|         bool fiveD = true; //5d propagator by default | ||||
| 	std::vector<double> twist(Nd,0.0); //default: twist angle 0 | ||||
| 	std::vector<Complex> boundary; | ||||
| 	for(int i=0;i<Nd;i++) boundary.push_back(1); //default: periodic boundary conditions | ||||
| 	FreePropagator(in,out,mass,boundary,twist,fiveD); | ||||
| 	std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions | ||||
|         FreePropagator(in,out,mass,twist,fiveD); | ||||
|       }; | ||||
|  | ||||
|       virtual void   Instantiatable(void) {}; | ||||
|   | ||||
| @@ -64,6 +64,11 @@ namespace Grid { | ||||
|       virtual RealD  M    (const FermionField &in, FermionField &out)=0; | ||||
|       virtual RealD  Mdag (const FermionField &in, FermionField &out)=0; | ||||
|  | ||||
|       // Query the even even properties to make algorithmic decisions | ||||
|       virtual int    ConstEE(void) { return 1; }; // clover returns zero as EE depends on gauge field | ||||
|       virtual int    isTrivialEE(void) { return 0; }; | ||||
|       virtual RealD  Mass(void) {return 0.0;}; | ||||
|  | ||||
|       // half checkerboard operaions | ||||
|       virtual void   Meooe       (const FermionField &in, FermionField &out)=0; | ||||
|       virtual void   MeooeDag    (const FermionField &in, FermionField &out)=0; | ||||
| @@ -96,7 +101,7 @@ namespace Grid { | ||||
|  | ||||
|       virtual void  MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);}; | ||||
|  | ||||
|       virtual void  FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) { | ||||
|       virtual void  FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<double> twist) { | ||||
| 	FFT theFFT((GridCartesian *) in._grid); | ||||
|  | ||||
| 	FermionField in_k(in._grid); | ||||
| @@ -106,33 +111,26 @@ namespace Grid { | ||||
| 	ComplexField coor(in._grid); | ||||
| 	ComplexField ph(in._grid);  ph = zero; | ||||
| 	FermionField in_buf(in._grid); in_buf = zero; | ||||
| 	Scalar ci(0.0,1.0); | ||||
| 	Complex ci(0.0,1.0); | ||||
| 	assert(twist.size() == Nd);//check that twist is Nd | ||||
| 	assert(boundary.size() == Nd);//check that boundary conditions is Nd | ||||
| 	for(unsigned int nu = 0; nu < Nd; nu++) | ||||
| 	{ | ||||
|           LatticeCoordinate(coor, nu); | ||||
| 	  double boundary_phase = ::acos(real(boundary[nu])); | ||||
| 	  ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu]))); | ||||
| 	  //momenta for propagator shifted by twist+boundary | ||||
| 	  twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI)); | ||||
| 	  ph = ph + twist[nu]*coor*((1./(in._grid->_fdimensions[nu]))); | ||||
| 	} | ||||
| 	in_buf = exp(ci*ph*(-1.0))*in; | ||||
| 	in_buf = exp((Real)(2.0*M_PI)*ci*ph*(-1.0))*in; | ||||
|  | ||||
| 	theFFT.FFT_all_dim(in_k,in_buf,FFT::forward); | ||||
|         this->MomentumSpacePropagator(prop_k,in_k,mass,twist); | ||||
| 	theFFT.FFT_all_dim(out,prop_k,FFT::backward); | ||||
|  | ||||
| 	//phase for boundary condition | ||||
| 	out = out * exp(ci*ph); | ||||
| 	out = out * exp((Real)(2.0*M_PI)*ci*ph); | ||||
|  | ||||
|       }; | ||||
|  | ||||
|       virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) { | ||||
| 		std::vector<Complex> boundary; | ||||
| 		for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions | ||||
| 		std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions | ||||
| 	        FreePropagator(in,out,mass,boundary,twist); | ||||
| 	        FreePropagator(in,out,mass,twist); | ||||
|       }; | ||||
|  | ||||
|       /////////////////////////////////////////////// | ||||
| @@ -143,7 +141,6 @@ namespace Grid { | ||||
|       ////////////////////////////////////////////////////////////////////// | ||||
|       // Conserved currents, either contract at sink or insert sequentially. | ||||
|       ////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|       virtual void ContractConservedCurrent(PropagatorField &q_in_1, | ||||
|                                             PropagatorField &q_in_2, | ||||
|                                             PropagatorField &q_out, | ||||
| @@ -156,12 +153,6 @@ namespace Grid { | ||||
|                                        unsigned int tmin,  | ||||
|                                        unsigned int tmax, | ||||
|                                        ComplexField &lattice_cmplx)=0; | ||||
|  | ||||
|       // Only reimplemented in Wilson5D  | ||||
|       // Default to just a zero correlation function | ||||
|       virtual void ContractJ5q(FermionField &q_in   ,ComplexField &J5q) { J5q=zero; }; | ||||
|       virtual void ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { J5q=zero; }; | ||||
|  | ||||
|       /////////////////////////////////////////////// | ||||
|       // Physical field import/export | ||||
|       /////////////////////////////////////////////// | ||||
|   | ||||
| @@ -240,30 +240,16 @@ namespace QCD { | ||||
|       GaugeLinkField tmp(GaugeGrid); | ||||
|  | ||||
|       Lattice<iScalar<vInteger> > coor(GaugeGrid); | ||||
|       //////////////////////////////////////////////////// | ||||
|       // apply any boundary phase or twists | ||||
|       //////////////////////////////////////////////////// | ||||
|       for (int mu = 0; mu < Nd; mu++) { | ||||
|  | ||||
| 	////////// boundary phase ///////////// | ||||
| 	      auto pha = Params.boundary_phases[mu]; | ||||
| 	      scalar_type phase( real(pha),imag(pha) ); | ||||
|  | ||||
| 	int L   = GaugeGrid->GlobalDimensions()[mu]; | ||||
|         int Lmu = L - 1; | ||||
|         int Lmu = GaugeGrid->GlobalDimensions()[mu] - 1; | ||||
|  | ||||
|         LatticeCoordinate(coor, mu); | ||||
|  | ||||
|         U = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|  | ||||
| 	// apply any twists | ||||
| 	RealD theta = Params.twist_n_2pi_L[mu] * 2*M_PI / L; | ||||
| 	if ( theta != 0.0) {  | ||||
| 	  scalar_type twphase(::cos(theta),::sin(theta)); | ||||
| 	  U = twphase*U; | ||||
| 	  std::cout << GridLogMessage << " Twist ["<<mu<<"] "<< Params.twist_n_2pi_L[mu]<< " phase"<<phase <<std::endl; | ||||
| 	} | ||||
|  | ||||
|         tmp = where(coor == Lmu, phase * U, U); | ||||
|         PokeIndex<LorentzIndex>(Uds, tmp, mu); | ||||
|  | ||||
|   | ||||
| @@ -26,7 +26,7 @@ See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|   | ||||
| @@ -26,11 +26,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid.h> | ||||
|  | ||||
| #ifdef AVX512 | ||||
| #include <Grid/simd/Intel512common.h> | ||||
| #include <Grid/simd/Intel512avx.h> | ||||
| #include <simd/Intel512common.h> | ||||
| #include <simd/Intel512avx.h> | ||||
| #endif | ||||
|  | ||||
| // Interleave operations from two directions | ||||
| @@ -679,7 +679,7 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, | ||||
|   gauge3 =(uint64_t)&UU._odata[sU]( T );  | ||||
|    | ||||
|   // This is the single precision 5th direction vectorised kernel | ||||
| #include <Grid/simd/Intel512single.h> | ||||
| #include <simd/Intel512single.h> | ||||
| template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,  | ||||
| 								    DoubledGaugeField &U, DoubledGaugeField &UUU, | ||||
| 								    SiteSpinor *buf, int LLs, int sU,  | ||||
| @@ -732,7 +732,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl | ||||
|     | ||||
| } | ||||
|  | ||||
| #include <Grid/simd/Intel512double.h> | ||||
| #include <simd/Intel512double.h> | ||||
| template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,  | ||||
| 								    DoubledGaugeField &U, DoubledGaugeField &UUU, | ||||
| 								    SiteSpinor *buf, int LLs, int sU,  | ||||
| @@ -816,7 +816,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl | ||||
|  | ||||
|   // This is the single precision 5th direction vectorised kernel | ||||
|  | ||||
| #include <Grid/simd/Intel512single.h> | ||||
| #include <simd/Intel512single.h> | ||||
| template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,  | ||||
| 							       DoubledGaugeField &U, DoubledGaugeField &UUU, | ||||
| 							       SiteSpinor *buf, int LLs, int sU,  | ||||
| @@ -884,7 +884,7 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, | ||||
| #endif | ||||
| } | ||||
|  | ||||
| #include <Grid/simd/Intel512double.h> | ||||
| #include <simd/Intel512double.h> | ||||
| template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,  | ||||
| 							       DoubledGaugeField &U, DoubledGaugeField &UUU, | ||||
| 							       SiteSpinor *buf, int LLs, int sU,  | ||||
|   | ||||
| @@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid.h> | ||||
|  | ||||
|  | ||||
| #define LOAD_CHI(b)		\ | ||||
|   | ||||
| @@ -67,7 +67,6 @@ public: | ||||
| public: | ||||
|   typedef WilsonFermion<Impl> WilsonBase; | ||||
|  | ||||
|   virtual int    ConstEE(void)     { return 0; }; | ||||
|   virtual void Instantiatable(void){}; | ||||
|   // Constructors | ||||
|   WilsonCloverFermion(GaugeField &_Umu, GridCartesian &Fgrid, | ||||
|   | ||||
| @@ -939,75 +939,6 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe | ||||
|     merge(qSiteRev, qSiteVec); \ | ||||
| } | ||||
|  | ||||
| //          psi = chiralProjectPlus(Result_s[Ls/2-1]); | ||||
| //          psi+= chiralProjectMinus(Result_s[Ls/2]); | ||||
| //         PJ5q+=localInnerProduct(psi,psi); | ||||
|  | ||||
| template<class vobj>  | ||||
| Lattice<vobj> spProj5p(const Lattice<vobj> & in) | ||||
| { | ||||
|   GridBase *grid=in._grid; | ||||
|   Gamma G5(Gamma::Algebra::Gamma5); | ||||
|   Lattice<vobj> ret(grid); | ||||
|   parallel_for(int ss=0;ss<grid->oSites();ss++){ | ||||
|     ret._odata[ss] = in._odata[ss] + G5*in._odata[ss]; | ||||
|   } | ||||
|   return ret; | ||||
| } | ||||
| template<class vobj>  | ||||
| Lattice<vobj> spProj5m(const Lattice<vobj> & in) | ||||
| { | ||||
|   Gamma G5(Gamma::Algebra::Gamma5); | ||||
|   GridBase *grid=in._grid; | ||||
|   Lattice<vobj> ret(grid); | ||||
|   parallel_for(int ss=0;ss<grid->oSites();ss++){ | ||||
|     ret._odata[ss] = in._odata[ss] - G5*in._odata[ss]; | ||||
|   } | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion5D<Impl>::ContractJ5q(FermionField &q_in,ComplexField &J5q) | ||||
| { | ||||
|   conformable(GaugeGrid(), J5q._grid); | ||||
|   conformable(q_in._grid, FermionGrid()); | ||||
|  | ||||
|   // 4d field | ||||
|   int Ls = this->Ls; | ||||
|   FermionField psi(GaugeGrid()); | ||||
|   FermionField p_plus (GaugeGrid()); | ||||
|   FermionField p_minus(GaugeGrid()); | ||||
|   FermionField p(GaugeGrid()); | ||||
|  | ||||
|   ExtractSlice(p_plus , q_in, Ls/2   , 0); | ||||
|   ExtractSlice(p_minus, q_in, Ls/2-1 , 0); | ||||
|   p_plus = spProj5p(p_plus ); | ||||
|   p_minus= spProj5m(p_minus); | ||||
|   p=p_plus+p_minus; | ||||
|   J5q = localInnerProduct(p,p); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion5D<Impl>::ContractJ5q(PropagatorField &q_in,ComplexField &J5q) | ||||
| { | ||||
|   conformable(GaugeGrid(), J5q._grid); | ||||
|   conformable(q_in._grid, FermionGrid()); | ||||
|  | ||||
|   // 4d field | ||||
|   int Ls = this->Ls; | ||||
|   PropagatorField psi(GaugeGrid()); | ||||
|   PropagatorField p_plus (GaugeGrid()); | ||||
|   PropagatorField p_minus(GaugeGrid()); | ||||
|   PropagatorField p(GaugeGrid()); | ||||
|  | ||||
|   ExtractSlice(p_plus , q_in, Ls/2   , 0); | ||||
|   ExtractSlice(p_minus, q_in, Ls/2-1 , 0); | ||||
|   p_plus = spProj5p(p_plus ); | ||||
|   p_minus= spProj5m(p_minus); | ||||
|   p=p_plus+p_minus; | ||||
|   J5q = localInnerProduct(p,p); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1, | ||||
|                                                      PropagatorField &q_in_2, | ||||
| @@ -1018,7 +949,6 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1, | ||||
|     conformable(q_in_1._grid, FermionGrid()); | ||||
|     conformable(q_in_1._grid, q_in_2._grid); | ||||
|     conformable(_FourDimGrid, q_out._grid); | ||||
|  | ||||
|     PropagatorField tmp1(FermionGrid()), tmp2(FermionGrid()); | ||||
|     unsigned int LLs = q_in_1._grid->_rdimensions[0]; | ||||
|     q_out = zero; | ||||
| @@ -1065,6 +995,7 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1, | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,  | ||||
|                                                 PropagatorField &q_out, | ||||
|   | ||||
| @@ -230,10 +230,6 @@ namespace QCD { | ||||
|                              unsigned int tmin,  | ||||
|                              unsigned int tmax, | ||||
| 			     ComplexField &lattice_cmplx); | ||||
|  | ||||
|     void ContractJ5q(PropagatorField &q_in,ComplexField &J5q); | ||||
|     void ContractJ5q(FermionField &q_in,ComplexField &J5q); | ||||
|  | ||||
|   }; | ||||
|  | ||||
| }} | ||||
|   | ||||
| @@ -81,8 +81,8 @@ WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,Doubl | ||||
|   assert(0); | ||||
| } | ||||
|  | ||||
| #include <Grid/qcd/action/fermion/WilsonKernelsAsmAvx512.h> | ||||
| #include <Grid/qcd/action/fermion/WilsonKernelsAsmQPX.h> | ||||
| #include <qcd/action/fermion/WilsonKernelsAsmAvx512.h> | ||||
| #include <qcd/action/fermion/WilsonKernelsAsmQPX.h> | ||||
|  | ||||
| #define INSTANTIATE_ASM(A)\ | ||||
| template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ | ||||
|   | ||||
| @@ -29,14 +29,6 @@ directory | ||||
| #ifndef GRID_GAUGE_IMPL_TYPES_H | ||||
| #define GRID_GAUGE_IMPL_TYPES_H | ||||
|  | ||||
| #define CPS_MD_TIME | ||||
|  | ||||
| #ifdef CPS_MD_TIME | ||||
| #define HMC_MOMENTUM_DENOMINATOR (2.0) | ||||
| #else | ||||
| #define HMC_MOMENTUM_DENOMINATOR (1.0) | ||||
| #endif | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|  | ||||
| @@ -46,7 +38,6 @@ namespace QCD { | ||||
|  | ||||
| #define INHERIT_GIMPL_TYPES(GImpl)                  \ | ||||
|   typedef typename GImpl::Simd Simd;                \ | ||||
|   typedef typename GImpl::Scalar Scalar;	    \ | ||||
|   typedef typename GImpl::LinkField GaugeLinkField; \ | ||||
|   typedef typename GImpl::Field GaugeField;         \ | ||||
|   typedef typename GImpl::ComplexField ComplexField;\ | ||||
| @@ -64,8 +55,7 @@ namespace QCD { | ||||
| template <class S, int Nrepresentation = Nc, int Nexp = 12 > class GaugeImplTypes { | ||||
| public: | ||||
|   typedef S Simd; | ||||
|   typedef typename Simd::scalar_type scalar_type; | ||||
|   typedef scalar_type Scalar; | ||||
|  | ||||
|   template <typename vtype> using iImplScalar     = iScalar<iScalar<iScalar<vtype> > >; | ||||
|   template <typename vtype> using iImplGaugeLink  = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >; | ||||
|   template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>; | ||||
| @@ -97,32 +87,12 @@ public: | ||||
|   /////////////////////////////////////////////////////////// | ||||
|   // Move these to another class | ||||
|   // HMC auxiliary functions | ||||
|   static inline void generate_momenta(Field &P, GridParallelRNG &pRNG)  | ||||
|   { | ||||
|     // Zbigniew Srocinsky thesis: | ||||
|     // | ||||
|     // P(p) =  N \Prod_{x\mu}e^-{1/2 Tr (p^2_mux)} | ||||
|     //  | ||||
|     // p_x,mu = c_x,mu,a T_a | ||||
|     // | ||||
|     // Tr p^2 =  sum_a,x,mu 1/2 (c_x,mu,a)^2 | ||||
|     // | ||||
|     // Which implies P(p) =  N \Prod_{x,\mu,a} e^-{1/4 c_xmua^2  } | ||||
|     // | ||||
|     //                    =  N \Prod_{x,\mu,a} e^-{1/2 (c_xmua/sqrt{2})^2  } | ||||
|     //  | ||||
|     // Expect c' = cxmua/sqrt(2) to be a unit variance gaussian. | ||||
|     // | ||||
|     // Expect cxmua variance sqrt(2). | ||||
|     // | ||||
|     // Must scale the momentum by sqrt(2) to invoke CPS and UKQCD conventions | ||||
|     // | ||||
|   static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) { | ||||
|     // specific for SU gauge fields | ||||
|     LinkField Pmu(P._grid); | ||||
|     Pmu = Zero(); | ||||
|     Pmu = zero; | ||||
|     for (int mu = 0; mu < Nd; mu++) { | ||||
|       SU<Nrepresentation>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu); | ||||
|       RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR) ; | ||||
|       Pmu = Pmu*scale; | ||||
|       PokeIndex<LorentzIndex>(P, Pmu, mu); | ||||
|     } | ||||
|   } | ||||
|   | ||||
| @@ -4,11 +4,9 @@ | ||||
|   | ||||
|  Source file: ./lib/qcd/action/gauge/Photon.h | ||||
|   | ||||
| Copyright (C) 2015-2018 | ||||
|  Copyright (C) 2015 | ||||
|   | ||||
|  Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  Author: James Harrison <J.Harrison@soton.ac.uk> | ||||
|   | ||||
|  This program is free software; you can redistribute it and/or modify | ||||
|  it under the terms of the GNU General Public License as published by | ||||
| @@ -32,13 +30,11 @@ Copyright (C) 2015-2018 | ||||
|  | ||||
| namespace Grid{ | ||||
| namespace QCD{ | ||||
|  | ||||
|   template <class S> | ||||
|   class QedGImpl | ||||
|   class QedGimpl | ||||
|   { | ||||
|   public: | ||||
|     typedef S Simd; | ||||
|     typedef typename Simd::scalar_type Scalar; | ||||
|      | ||||
|     template <typename vtype> | ||||
|     using iImplGaugeLink  = iScalar<iScalar<iScalar<vtype>>>; | ||||
| @@ -47,27 +43,27 @@ namespace QCD{ | ||||
|      | ||||
|     typedef iImplGaugeLink<Simd>  SiteLink; | ||||
|     typedef iImplGaugeField<Simd> SiteField; | ||||
|     typedef SiteLink              SiteComplex; | ||||
|     typedef SiteField             SiteComplex; | ||||
|      | ||||
|     typedef Lattice<SiteLink>  LinkField; | ||||
|     typedef Lattice<SiteField> Field; | ||||
|     typedef Field              ComplexField; | ||||
|   }; | ||||
|    | ||||
|   typedef QedGImpl<vComplex> QedGImplR; | ||||
|   typedef QedGimpl<vComplex> QedGimplR; | ||||
|    | ||||
|   template <class GImpl> | ||||
|   template<class Gimpl> | ||||
|   class Photon | ||||
|   { | ||||
|   public: | ||||
|     INHERIT_GIMPL_TYPES(GImpl); | ||||
|     typedef typename SiteGaugeLink::scalar_object ScalarSite; | ||||
|     typedef typename ScalarSite::scalar_type      ScalarComplex; | ||||
|     INHERIT_GIMPL_TYPES(Gimpl); | ||||
|     GRID_SERIALIZABLE_ENUM(Gauge, undef, feynman, 1, coulomb, 2, landau, 3); | ||||
|     GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2); | ||||
|     GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2, qedInf, 3); | ||||
|   public: | ||||
|     Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvement); | ||||
|     Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme); | ||||
|     Photon(Gauge gauge, ZmScheme zmScheme); | ||||
|     Photon(Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvements); | ||||
|     Photon(Gauge gauge, ZmScheme zmScheme, Real G0); | ||||
|     Photon(Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvements, Real G0); | ||||
|     virtual ~Photon(void) = default; | ||||
|     void FreePropagator(const GaugeField &in, GaugeField &out); | ||||
|     void MomentumSpacePropagator(const GaugeField &in, GaugeField &out); | ||||
| @@ -77,255 +73,345 @@ namespace QCD{ | ||||
|                          const GaugeLinkField &weight); | ||||
|     void UnitField(GaugeField &out); | ||||
|   private: | ||||
|     void makeSpatialNorm(LatticeInteger &spNrm); | ||||
|     void makeKHat(std::vector<GaugeLinkField> &khat); | ||||
|     void makeInvKHatSquared(GaugeLinkField &out); | ||||
|     void infVolPropagator(GaugeLinkField &out); | ||||
|     void invKHatSquared(GaugeLinkField &out); | ||||
|     void zmSub(GaugeLinkField &out); | ||||
|     void transverseProjectSpatial(GaugeField &out); | ||||
|     void gaugeTransform(GaugeField &out); | ||||
|   private: | ||||
|     GridBase          *grid_; | ||||
|     Gauge    gauge_; | ||||
|     ZmScheme zmScheme_; | ||||
|     std::vector<Real>  improvement_; | ||||
|     Real     G0_; | ||||
|   }; | ||||
|  | ||||
|   typedef Photon<QedGImplR>  PhotonR; | ||||
|   typedef Photon<QedGimplR>  PhotonR; | ||||
|    | ||||
|   template<class GImpl> | ||||
|   Photon<GImpl>::Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme, | ||||
|   template<class Gimpl> | ||||
|   Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme) | ||||
|   : gauge_(gauge), zmScheme_(zmScheme), improvement_(std::vector<Real>()), | ||||
|     G0_(0.15493339023106021408483720810737508876916113364521) | ||||
|   {} | ||||
|  | ||||
|   template<class Gimpl> | ||||
|   Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme, | ||||
|                         std::vector<Real> improvements) | ||||
|   : grid_(grid), gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements) | ||||
|   : gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements), | ||||
|     G0_(0.15493339023106021408483720810737508876916113364521) | ||||
|   {} | ||||
|  | ||||
|   template<class GImpl> | ||||
|   Photon<GImpl>::Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme) | ||||
|   : Photon(grid, gauge, zmScheme, std::vector<Real>()) | ||||
|   template<class Gimpl> | ||||
|   Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme, Real G0) | ||||
|   : gauge_(gauge), zmScheme_(zmScheme), improvement_(std::vector<Real>()), G0_(G0) | ||||
|   {} | ||||
|  | ||||
|   template<class GImpl> | ||||
|   void Photon<GImpl>::FreePropagator(const GaugeField &in, GaugeField &out) | ||||
|   { | ||||
|     FFT        theFFT(dynamic_cast<GridCartesian *>(grid_)); | ||||
|     GaugeField in_k(grid_); | ||||
|     GaugeField prop_k(grid_); | ||||
|   template<class Gimpl> | ||||
|   Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme, | ||||
|                         std::vector<Real> improvements, Real G0) | ||||
|   : gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements), G0_(G0) | ||||
|   {} | ||||
|  | ||||
|     theFFT.FFT_all_dim(in_k, in, FFT::forward); | ||||
|     MomentumSpacePropagator(prop_k, in_k); | ||||
|     theFFT.FFT_all_dim(out, prop_k, FFT::backward); | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::FreePropagator (const GaugeField &in,GaugeField &out) | ||||
|   { | ||||
|     FFT theFFT(in._grid); | ||||
|      | ||||
|     GaugeField in_k(in._grid); | ||||
|     GaugeField prop_k(in._grid); | ||||
|      | ||||
|     theFFT.FFT_all_dim(in_k,in,FFT::forward); | ||||
|     MomentumSpacePropagator(prop_k,in_k); | ||||
|     theFFT.FFT_all_dim(out,prop_k,FFT::backward); | ||||
|   } | ||||
|  | ||||
|   template<class GImpl> | ||||
|   void Photon<GImpl>::makeSpatialNorm(LatticeInteger &spNrm) | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::infVolPropagator(GaugeLinkField &out) | ||||
|   { | ||||
|     LatticeInteger   coor(grid_); | ||||
|     std::vector<int> l = grid_->FullDimensions(); | ||||
|  | ||||
|     spNrm = zero; | ||||
|     for(int mu = 0; mu < grid_->Nd() - 1; mu++) | ||||
|     { | ||||
|       LatticeCoordinate(coor, mu); | ||||
|       coor  = where(coor < Integer(l[mu]/2), coor, coor - Integer(l[mu])); | ||||
|       spNrm = spNrm + coor*coor; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template<class GImpl> | ||||
|   void Photon<GImpl>::makeKHat(std::vector<GaugeLinkField> &khat) | ||||
|   { | ||||
|     const unsigned int nd = grid_->Nd(); | ||||
|     std::vector<int>   l  = grid_->FullDimensions(); | ||||
|     Complex            ci(0., 1.); | ||||
|  | ||||
|     khat.resize(nd, grid_); | ||||
|     for (unsigned int mu = 0; mu < nd; ++mu) | ||||
|     { | ||||
|       Real piL = M_PI/l[mu]; | ||||
|  | ||||
|       LatticeCoordinate(khat[mu], mu); | ||||
|       khat[mu] = exp(piL*ci*khat[mu])*2.*sin(piL*khat[mu]); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template<class GImpl> | ||||
|   void Photon<GImpl>::makeInvKHatSquared(GaugeLinkField &out) | ||||
|   { | ||||
|     std::vector<GaugeLinkField> khat; | ||||
|     GaugeLinkField              lone(grid_); | ||||
|     const unsigned int          nd = grid_->Nd(); | ||||
|     std::vector<int>            zm(nd, 0); | ||||
|     ScalarSite                  one = ScalarComplex(1., 0.), z = ScalarComplex(0., 0.); | ||||
|     auto               *grid = dynamic_cast<GridCartesian *>(out._grid); | ||||
|     LatticeReal        xmu(grid); | ||||
|     GaugeLinkField     one(grid); | ||||
|     const unsigned int nd    = grid->_ndimension; | ||||
|     std::vector<int>   &l    = grid->_fdimensions; | ||||
|     std::vector<int>   x0(nd,0); | ||||
|     TComplex           Tone  = Complex(1.0,0.0); | ||||
|     TComplex           Tzero = Complex(G0_,0.0); | ||||
|     FFT                fft(grid); | ||||
|      | ||||
|     one = Complex(1.0,0.0); | ||||
|     out = zero; | ||||
|     makeKHat(khat); | ||||
|     for(int mu = 0; mu < nd; mu++) | ||||
|     { | ||||
|       out = out + khat[mu]*conjugate(khat[mu]); | ||||
|       LatticeCoordinate(xmu,mu); | ||||
|       Real lo2 = l[mu]/2.0; | ||||
|       xmu = where(xmu < lo2, xmu, xmu-double(l[mu])); | ||||
|       out = out + toComplex(4*M_PI*M_PI*xmu*xmu); | ||||
|     } | ||||
|     lone = ScalarComplex(1., 0.); | ||||
|     pokeSite(one, out, zm); | ||||
|     out = lone/out; | ||||
|     pokeSite(z, out, zm); | ||||
|     pokeSite(Tone, out, x0); | ||||
|     out = one/out; | ||||
|     pokeSite(Tzero, out, x0); | ||||
|     fft.FFT_all_dim(out, out, FFT::forward); | ||||
|   } | ||||
|    | ||||
|   template<class GImpl> | ||||
|   void Photon<GImpl>::zmSub(GaugeLinkField &out) | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::invKHatSquared(GaugeLinkField &out) | ||||
|   { | ||||
|     GridBase           *grid = out._grid; | ||||
|     GaugeLinkField     kmu(grid), one(grid); | ||||
|     const unsigned int nd    = grid->_ndimension; | ||||
|     std::vector<int>   &l    = grid->_fdimensions; | ||||
|     std::vector<int>   zm(nd,0); | ||||
|     TComplex           Tone = Complex(1.0,0.0); | ||||
|     TComplex           Tzero= Complex(0.0,0.0); | ||||
|      | ||||
|     one = Complex(1.0,0.0); | ||||
|     out = zero; | ||||
|     for(int mu = 0; mu < nd; mu++) | ||||
|     { | ||||
|       Real twoPiL = M_PI*2./l[mu]; | ||||
|        | ||||
|       LatticeCoordinate(kmu,mu); | ||||
|       kmu = 2.*sin(.5*twoPiL*kmu); | ||||
|       out = out + kmu*kmu; | ||||
|     } | ||||
|     pokeSite(Tone, out, zm); | ||||
|     out = one/out; | ||||
|     pokeSite(Tzero, out, zm); | ||||
|   } | ||||
|    | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::zmSub(GaugeLinkField &out) | ||||
|   { | ||||
|     GridBase           *grid = out._grid; | ||||
|     const unsigned int nd    = grid->_ndimension; | ||||
|     std::vector<int>   &l    = grid->_fdimensions; | ||||
|      | ||||
|     switch (zmScheme_) | ||||
|     { | ||||
|       case ZmScheme::qedTL: | ||||
|       { | ||||
|         std::vector<int> zm(grid_->Nd(), 0); | ||||
|         ScalarSite       z = ScalarComplex(0., 0.); | ||||
|         std::vector<int> zm(nd,0); | ||||
|         TComplex         Tzero = Complex(0.0,0.0); | ||||
|          | ||||
|         pokeSite(Tzero, out, zm); | ||||
|          | ||||
|         pokeSite(z, out, zm); | ||||
|         break; | ||||
|       } | ||||
|       case ZmScheme::qedL: | ||||
|       { | ||||
|         LatticeInteger spNrm(grid_); | ||||
|         LatticeInteger spNrm(grid), coor(grid); | ||||
|         GaugeLinkField z(grid); | ||||
|          | ||||
|         makeSpatialNorm(spNrm); | ||||
|         spNrm = zero; | ||||
|         for(int d = 0; d < grid->_ndimension - 1; d++) | ||||
|         { | ||||
|           LatticeCoordinate(coor,d); | ||||
|           coor = where(coor < Integer(l[d]/2), coor, coor-Integer(l[d])); | ||||
|           spNrm = spNrm + coor*coor; | ||||
|         } | ||||
|         out = where(spNrm == Integer(0), 0.*out, out); | ||||
|  | ||||
|         // IR improvement | ||||
|         for(int i = 0; i < improvement_.size(); i++) | ||||
|         { | ||||
|           Real f = sqrt(improvement_[i] + 1); | ||||
|           out = where(spNrm == Integer(i + 1), f*out, out); | ||||
|           Real f = sqrt(improvement_[i]+1); | ||||
|           out = where(spNrm == Integer(i+1), f*out, out); | ||||
|         } | ||||
|         break; | ||||
|       } | ||||
|       default: | ||||
|         assert(0); | ||||
|         break; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template<class GImpl> | ||||
|   void Photon<GImpl>::transverseProjectSpatial(GaugeField &out) | ||||
|   { | ||||
|     const unsigned int          nd = grid_->Nd(); | ||||
|     GaugeLinkField              invKHat(grid_), cst(grid_), spdiv(grid_); | ||||
|     LatticeInteger              spNrm(grid_); | ||||
|     std::vector<GaugeLinkField> khat, a(nd, grid_), aProj(nd, grid_); | ||||
|  | ||||
|     invKHat = zero; | ||||
|     makeSpatialNorm(spNrm); | ||||
|     makeKHat(khat); | ||||
|     for (unsigned int mu = 0; mu < nd; ++mu) | ||||
|     { | ||||
|       a[mu] = peekLorentz(out, mu); | ||||
|       if (mu < nd - 1) | ||||
|       { | ||||
|         invKHat += khat[mu]*conjugate(khat[mu]); | ||||
|       } | ||||
|     } | ||||
|     cst     = ScalarComplex(1., 0.); | ||||
|     invKHat = where(spNrm == Integer(0), cst, invKHat); | ||||
|     invKHat = cst/invKHat; | ||||
|     cst     = zero; | ||||
|     invKHat = where(spNrm == Integer(0), cst, invKHat); | ||||
|     spdiv   = zero; | ||||
|     for (unsigned int nu = 0; nu < nd - 1; ++nu) | ||||
|     { | ||||
|       spdiv += conjugate(khat[nu])*a[nu]; | ||||
|     } | ||||
|     spdiv *= invKHat; | ||||
|     for (unsigned int mu = 0; mu < nd; ++mu) | ||||
|     { | ||||
|       aProj[mu] = a[mu] - khat[mu]*spdiv; | ||||
|       pokeLorentz(out, aProj[mu], mu); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template<class GImpl> | ||||
|   void Photon<GImpl>::gaugeTransform(GaugeField &out) | ||||
|   { | ||||
|     switch (gauge_) | ||||
|     { | ||||
|       case Gauge::feynman: | ||||
|         break; | ||||
|       case Gauge::coulomb: | ||||
|         transverseProjectSpatial(out); | ||||
|         break; | ||||
|       case Gauge::landau: | ||||
|         assert(0); | ||||
|         break; | ||||
|       default: | ||||
|         assert(0); | ||||
|         break; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template<class GImpl> | ||||
|   void Photon<GImpl>::MomentumSpacePropagator(const GaugeField &in, | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::MomentumSpacePropagator(const GaugeField &in, | ||||
|                                                GaugeField &out) | ||||
|   { | ||||
|     LatticeComplex momProp(grid_); | ||||
|   GridBase           *grid = out._grid; | ||||
|     LatticeComplex     momProp(grid); | ||||
|      | ||||
|     makeInvKHatSquared(momProp); | ||||
|     switch (zmScheme_) | ||||
|     { | ||||
|       case ZmScheme::qedTL: | ||||
|       case ZmScheme::qedL: | ||||
|       { | ||||
|         invKHatSquared(momProp); | ||||
|         zmSub(momProp); | ||||
|         break; | ||||
|       } | ||||
|       case ZmScheme::qedInf: | ||||
|       { | ||||
|         infVolPropagator(momProp); | ||||
|         break; | ||||
|       } | ||||
|       default: | ||||
|         break; | ||||
|     } | ||||
|      | ||||
|     out = in*momProp; | ||||
|   } | ||||
|    | ||||
|   template<class GImpl> | ||||
|   void Photon<GImpl>::StochasticWeight(GaugeLinkField &weight) | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::StochasticWeight(GaugeLinkField &weight) | ||||
|   { | ||||
|     const unsigned int nd  = grid_->Nd(); | ||||
|     std::vector<int>   l   = grid_->FullDimensions(); | ||||
|     Integer            vol = 1; | ||||
|     auto               *grid     = dynamic_cast<GridCartesian *>(weight._grid); | ||||
|     const unsigned int nd        = grid->_ndimension; | ||||
|     std::vector<int>   latt_size = grid->_fdimensions; | ||||
|      | ||||
|     for(unsigned int mu = 0; mu < nd; mu++) | ||||
|     switch (zmScheme_) | ||||
|     { | ||||
|       vol = vol*l[mu]; | ||||
|       case ZmScheme::qedTL: | ||||
|       case ZmScheme::qedL: | ||||
|       { | ||||
|         Integer vol = 1; | ||||
|         for(int d = 0; d < nd; d++) | ||||
|         { | ||||
|           vol = vol * latt_size[d]; | ||||
|         } | ||||
|     makeInvKHatSquared(weight); | ||||
|         invKHatSquared(weight); | ||||
|         weight = sqrt(vol)*sqrt(weight); | ||||
|         zmSub(weight); | ||||
|         break; | ||||
|       } | ||||
|       case ZmScheme::qedInf: | ||||
|       { | ||||
|         infVolPropagator(weight); | ||||
|         weight = sqrt(real(weight)); | ||||
|         break; | ||||
|       } | ||||
|       default: | ||||
|         break; | ||||
|     } | ||||
|   } | ||||
|    | ||||
|   template<class GImpl> | ||||
|   void Photon<GImpl>::StochasticField(GaugeField &out, GridParallelRNG &rng) | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng) | ||||
|   { | ||||
|     GaugeLinkField weight(grid_); | ||||
|     auto           *grid = dynamic_cast<GridCartesian *>(out._grid); | ||||
|     GaugeLinkField weight(grid); | ||||
|      | ||||
|     StochasticWeight(weight); | ||||
|     StochasticField(out, rng, weight); | ||||
|   } | ||||
|    | ||||
|   template<class GImpl> | ||||
|   void Photon<GImpl>::StochasticField(GaugeField &out, GridParallelRNG &rng, | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng, | ||||
|                                       const GaugeLinkField &weight) | ||||
|   { | ||||
|     const unsigned int nd = grid_->Nd(); | ||||
|     GaugeLinkField     r(grid_); | ||||
|     GaugeField         aTilde(grid_); | ||||
|     FFT                fft(dynamic_cast<GridCartesian *>(grid_)); | ||||
|     auto               *grid = dynamic_cast<GridCartesian *>(out._grid); | ||||
|     const unsigned int nd = grid->_ndimension; | ||||
|     GaugeLinkField     r(grid); | ||||
|     GaugeField         aTilde(grid); | ||||
|     FFT                fft(grid); | ||||
|      | ||||
|     for(unsigned int mu = 0; mu < nd; mu++) | ||||
|     switch (zmScheme_) | ||||
|     { | ||||
|       case ZmScheme::qedTL: | ||||
|       case ZmScheme::qedL: | ||||
|       { | ||||
|         for(int mu = 0; mu < nd; mu++) | ||||
|         { | ||||
|           gaussian(rng, r); | ||||
|           r = weight*r; | ||||
|           pokeLorentz(aTilde, r, mu); | ||||
|         } | ||||
|     gaugeTransform(aTilde); | ||||
|         break; | ||||
|       } | ||||
|       case ZmScheme::qedInf: | ||||
|       { | ||||
|         Complex                    shift(1., 1.); // This needs to be a GaugeLink element? | ||||
|         for(int mu = 0; mu < nd; mu++) | ||||
|         { | ||||
|           bernoulli(rng, r); | ||||
|           r = weight*(2.*r - shift); | ||||
|           pokeLorentz(aTilde, r, mu); | ||||
|         } | ||||
|         break; | ||||
|       } | ||||
|       default: | ||||
|         break; | ||||
|     } | ||||
|  | ||||
|     fft.FFT_all_dim(out, aTilde, FFT::backward); | ||||
|      | ||||
|     out = real(out); | ||||
|   } | ||||
|  | ||||
|   template<class GImpl> | ||||
|   void Photon<GImpl>::UnitField(GaugeField &out) | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::UnitField(GaugeField &out) | ||||
|   { | ||||
|     const unsigned int nd = grid_->Nd(); | ||||
|     GaugeLinkField     r(grid_); | ||||
|     auto               *grid = dynamic_cast<GridCartesian *>(out._grid); | ||||
|     const unsigned int nd = grid->_ndimension; | ||||
|     GaugeLinkField     r(grid); | ||||
|      | ||||
|     r = ScalarComplex(1., 0.); | ||||
|     for(unsigned int mu = 0; mu < nd; mu++) | ||||
|     r = Complex(1.0,0.0); | ||||
|  | ||||
|     for(int mu = 0; mu < nd; mu++) | ||||
|     { | ||||
|       pokeLorentz(out, r, mu); | ||||
|     } | ||||
|      | ||||
|     out = real(out); | ||||
|   } | ||||
| //  template<class Gimpl> | ||||
| //  void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out, | ||||
| //                                                            const GaugeField &in) | ||||
| //  { | ||||
| //     | ||||
| //    FeynmanGaugeMomentumSpacePropagator_TL(out,in); | ||||
| //     | ||||
| //    GridBase *grid = out._grid; | ||||
| //    LatticeInteger     coor(grid); | ||||
| //    GaugeField zz(grid); zz=zero; | ||||
| //     | ||||
| //    // xyzt | ||||
| //    for(int d = 0; d < grid->_ndimension-1;d++){ | ||||
| //      LatticeCoordinate(coor,d); | ||||
| //      out = where(coor==Integer(0),zz,out); | ||||
| //    } | ||||
| //  } | ||||
| //   | ||||
| //  template<class Gimpl> | ||||
| //  void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out, | ||||
| //                                                             const GaugeField &in) | ||||
| //  { | ||||
| //     | ||||
| //    // what type LatticeComplex | ||||
| //    GridBase *grid = out._grid; | ||||
| //    int nd = grid->_ndimension; | ||||
| //     | ||||
| //    typedef typename GaugeField::vector_type vector_type; | ||||
| //    typedef typename GaugeField::scalar_type ScalComplex; | ||||
| //    typedef Lattice<iSinglet<vector_type> > LatComplex; | ||||
| //     | ||||
| //    std::vector<int> latt_size   = grid->_fdimensions; | ||||
| //     | ||||
| //    LatComplex denom(grid); denom= zero; | ||||
| //    LatComplex   one(grid); one = ScalComplex(1.0,0.0); | ||||
| //    LatComplex   kmu(grid); | ||||
| //     | ||||
| //    ScalComplex ci(0.0,1.0); | ||||
| //    // momphase = n * 2pi / L | ||||
| //    for(int mu=0;mu<Nd;mu++) { | ||||
| //       | ||||
| //      LatticeCoordinate(kmu,mu); | ||||
| //       | ||||
| //      RealD TwoPiL =  M_PI * 2.0/ latt_size[mu]; | ||||
| //       | ||||
| //      kmu = TwoPiL * kmu ; | ||||
| //       | ||||
| //      denom = denom + 4.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term | ||||
| //    } | ||||
| //    std::vector<int> zero_mode(nd,0); | ||||
| //    TComplexD Tone = ComplexD(1.0,0.0); | ||||
| //    TComplexD Tzero= ComplexD(0.0,0.0); | ||||
| //     | ||||
| //    pokeSite(Tone,denom,zero_mode); | ||||
| //     | ||||
| //    denom= one/denom; | ||||
| //     | ||||
| //    pokeSite(Tzero,denom,zero_mode); | ||||
| //     | ||||
| //    out = zero; | ||||
| //    out = in*denom; | ||||
| //  }; | ||||
|    | ||||
| }} | ||||
| #endif | ||||
|   | ||||
| @@ -1,53 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| namespace Grid{ | ||||
|   namespace QCD{ | ||||
|  | ||||
|     template<class Field> | ||||
|     void HighBoundCheck(LinearOperatorBase<Field> &HermOp,  | ||||
| 			Field &Phi, | ||||
| 			RealD hi) | ||||
|     { | ||||
|       // Eigenvalue bound check at high end | ||||
|       PowerMethod<Field> power_method; | ||||
|       auto lambda_max = power_method(HermOp,Phi); | ||||
|       std::cout << GridLogMessage << "Pseudofermion action lamda_max "<<lambda_max<<"( bound "<<hi<<")"<<std::endl; | ||||
|       assert( (lambda_max < hi) && " High Bounds Check on operator failed" ); | ||||
|     } | ||||
|        | ||||
|     template<class Field> void InverseSqrtBoundsCheck(int MaxIter,double tol, | ||||
| 						       LinearOperatorBase<Field> &HermOp, | ||||
| 						       Field &GaussNoise, | ||||
| 						       MultiShiftFunction &PowerNegHalf)  | ||||
|     { | ||||
|       GridBase *FermionGrid = GaussNoise._grid; | ||||
|  | ||||
|       Field X(FermionGrid); | ||||
|       Field Y(FermionGrid); | ||||
|       Field Z(FermionGrid); | ||||
|  | ||||
|       X=GaussNoise; | ||||
|       RealD Nx = norm2(X); | ||||
|  | ||||
|       ConjugateGradientMultiShift<Field> msCG(MaxIter,PowerNegHalf); | ||||
|       msCG(HermOp,X,Y); | ||||
|       msCG(HermOp,Y,Z); | ||||
|  | ||||
|       RealD Nz = norm2(Z); | ||||
|  | ||||
|       HermOp.HermOp(Z,Y); | ||||
|       RealD Ny = norm2(Y); | ||||
|  | ||||
|       X=X-Y; | ||||
|       RealD Nd = norm2(X); | ||||
|       std::cout << "************************* "<<std::endl; | ||||
|       std::cout << " noise                         = "<<Nx<<std::endl; | ||||
|       std::cout << " (MdagM^-1/2)^2  noise         = "<<Nz<<std::endl; | ||||
|       std::cout << " MdagM (MdagM^-1/2)^2  noise   = "<<Ny<<std::endl; | ||||
|       std::cout << " noise - MdagM (MdagM^-1/2)^2  noise   = "<<Nd<<std::endl; | ||||
|       std::cout << "************************* "<<std::endl; | ||||
|       assert( (std::sqrt(Nd/Nx)<tol) && " InverseSqrtBoundsCheck "); | ||||
|     } | ||||
|  | ||||
|   } | ||||
| } | ||||
| @@ -58,30 +58,13 @@ namespace QCD{ | ||||
|       bool use_heatbath_forecasting; | ||||
|       AbstractEOFAFermion<Impl>& Lop; // the basic LH operator | ||||
|       AbstractEOFAFermion<Impl>& Rop; // the basic RH operator | ||||
|       SchurRedBlackDiagMooeeSolve<FermionField> SolverHB; | ||||
|       SchurRedBlackDiagMooeeSolve<FermionField> SolverL; | ||||
|       SchurRedBlackDiagMooeeSolve<FermionField> SolverR; | ||||
|       SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverL; | ||||
|       SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverR; | ||||
|       SchurRedBlackDiagMooeeSolve<FermionField> Solver; | ||||
|       FermionField Phi; // the pseudofermion field for this trajectory | ||||
|  | ||||
|     public: | ||||
|  | ||||
|       ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop,  | ||||
| 					      AbstractEOFAFermion<Impl>& _Rop, | ||||
| 					      OperatorFunction<FermionField>& HeatbathCG,  | ||||
| 					      OperatorFunction<FermionField>& ActionCGL, OperatorFunction<FermionField>& ActionCGR,  | ||||
| 					      OperatorFunction<FermionField>& DerivCGL , OperatorFunction<FermionField>& DerivCGR,  | ||||
| 					      Params& p,  | ||||
| 					      bool use_fc=false) :  | ||||
|         Lop(_Lop),  | ||||
| 	Rop(_Rop),  | ||||
| 	SolverHB(HeatbathCG,false,true), | ||||
| 	SolverL(ActionCGL, false, true), SolverR(ActionCGR, false, true),  | ||||
| 	DerivativeSolverL(DerivCGL, false, true), DerivativeSolverR(DerivCGR, false, true),  | ||||
| 	Phi(_Lop.FermionGrid()),  | ||||
| 	param(p),  | ||||
|         use_heatbath_forecasting(use_fc) | ||||
|       ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, AbstractEOFAFermion<Impl>& _Rop, | ||||
|         OperatorFunction<FermionField>& S, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), Solver(S), | ||||
|         Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc) | ||||
|       { | ||||
|         AlgRemez remez(param.lo, param.hi, param.precision); | ||||
|  | ||||
| @@ -115,9 +98,6 @@ namespace QCD{ | ||||
|       // We generate a Gaussian noise vector \eta, and then compute | ||||
|       //  \Phi = M_{\rm EOFA}^{-1/2} * \eta | ||||
|       // using a rational approximation to the inverse square root | ||||
|       // | ||||
|       // As a check of rational require \Phi^dag M_{EOFA} \Phi == eta^dag M^-1/2^dag M M^-1/2 eta = eta^dag eta | ||||
|       // | ||||
|       virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) | ||||
|       { | ||||
|         Lop.ImportGauge(U); | ||||
| @@ -138,6 +118,7 @@ namespace QCD{ | ||||
|         RealD scale = std::sqrt(0.5); | ||||
|         gaussian(pRNG,eta); | ||||
|         eta = eta * scale; | ||||
|         printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta)); | ||||
|  | ||||
|         // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta | ||||
|         RealD N(PowerNegHalf.norm); | ||||
| @@ -158,11 +139,11 @@ namespace QCD{ | ||||
|           if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles | ||||
|             Lop.Mdag(CG_src, Forecast_src); | ||||
|             CG_soln = Forecast(Lop, Forecast_src, prev_solns); | ||||
|             SolverHB(Lop, CG_src, CG_soln); | ||||
|             Solver(Lop, CG_src, CG_soln); | ||||
|             prev_solns.push_back(CG_soln); | ||||
|           } else { | ||||
|             CG_soln = zero; // Just use zero as the initial guess | ||||
|             SolverHB(Lop, CG_src, CG_soln); | ||||
|             Solver(Lop, CG_src, CG_soln); | ||||
|           } | ||||
|           Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back | ||||
|           tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0]; | ||||
| @@ -185,11 +166,11 @@ namespace QCD{ | ||||
|           if(use_heatbath_forecasting){ | ||||
|             Rop.Mdag(CG_src, Forecast_src); | ||||
|             CG_soln = Forecast(Rop, Forecast_src, prev_solns); | ||||
|             SolverHB(Rop, CG_src, CG_soln); | ||||
|             Solver(Rop, CG_src, CG_soln); | ||||
|             prev_solns.push_back(CG_soln); | ||||
|           } else { | ||||
|             CG_soln = zero; | ||||
|             SolverHB(Rop, CG_src, CG_soln); | ||||
|             Solver(Rop, CG_src, CG_soln); | ||||
|           } | ||||
|           Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back | ||||
|           tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0]; | ||||
| @@ -201,47 +182,8 @@ namespace QCD{ | ||||
|         // Reset shift coefficients for energy and force evals | ||||
|         Lop.RefreshShiftCoefficients(0.0); | ||||
|         Rop.RefreshShiftCoefficients(-1.0); | ||||
|  | ||||
| 	// Bounds check | ||||
| 	RealD EtaDagEta = norm2(eta); | ||||
| 	//	RealD PhiDagMPhi= norm2(eta); | ||||
|  | ||||
|       }; | ||||
|  | ||||
|       void Meofa(const GaugeField& U,const FermionField &phi, FermionField & Mphi)  | ||||
|       { | ||||
| #if 0 | ||||
|         Lop.ImportGauge(U); | ||||
|         Rop.ImportGauge(U); | ||||
|  | ||||
|         FermionField spProj_Phi(Lop.FermionGrid()); | ||||
| 	FermionField mPhi(Lop.FermionGrid()); | ||||
|         std::vector<FermionField> tmp(2, Lop.FermionGrid()); | ||||
| 	mPhi = phi; | ||||
| 	 | ||||
|         // LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi> | ||||
|         spProj(Phi, spProj_Phi, -1, Lop.Ls); | ||||
|         Lop.Omega(spProj_Phi, tmp[0], -1, 0); | ||||
|         G5R5(tmp[1], tmp[0]); | ||||
|         tmp[0] = zero; | ||||
|         SolverL(Lop, tmp[1], tmp[0]); | ||||
|         Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back | ||||
|         Lop.Omega(tmp[1], tmp[0], -1, 1); | ||||
| 	mPhi = mPhi -  Lop.k * innerProduct(spProj_Phi, tmp[0]).real(); | ||||
|  | ||||
|         // RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb) | ||||
|         //               - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi> | ||||
|         spProj(Phi, spProj_Phi, 1, Rop.Ls); | ||||
|         Rop.Omega(spProj_Phi, tmp[0], 1, 0); | ||||
|         G5R5(tmp[1], tmp[0]); | ||||
|         tmp[0] = zero; | ||||
|         SolverR(Rop, tmp[1], tmp[0]); | ||||
|         Rop.Dtilde(tmp[0], tmp[1]); | ||||
|         Rop.Omega(tmp[1], tmp[0], 1, 1); | ||||
|         action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real(); | ||||
| #endif | ||||
|       } | ||||
|  | ||||
|       // EOFA action: see Eqn. (10) of arXiv:1706.05843 | ||||
|       virtual RealD S(const GaugeField& U) | ||||
|       { | ||||
| @@ -259,7 +201,7 @@ namespace QCD{ | ||||
|         Lop.Omega(spProj_Phi, tmp[0], -1, 0); | ||||
|         G5R5(tmp[1], tmp[0]); | ||||
|         tmp[0] = zero; | ||||
|         SolverL(Lop, tmp[1], tmp[0]); | ||||
|         Solver(Lop, tmp[1], tmp[0]); | ||||
|         Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back | ||||
|         Lop.Omega(tmp[1], tmp[0], -1, 1); | ||||
|         action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real(); | ||||
| @@ -270,7 +212,7 @@ namespace QCD{ | ||||
|         Rop.Omega(spProj_Phi, tmp[0], 1, 0); | ||||
|         G5R5(tmp[1], tmp[0]); | ||||
|         tmp[0] = zero; | ||||
|         SolverR(Rop, tmp[1], tmp[0]); | ||||
|         Solver(Rop, tmp[1], tmp[0]); | ||||
|         Rop.Dtilde(tmp[0], tmp[1]); | ||||
|         Rop.Omega(tmp[1], tmp[0], 1, 1); | ||||
|         action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real(); | ||||
| @@ -292,22 +234,17 @@ namespace QCD{ | ||||
|  | ||||
|         GaugeField force(Lop.GaugeGrid()); | ||||
|  | ||||
| 	///////////////////////////////////////////// | ||||
| 	// PAB:  | ||||
| 	//   Optional single precision derivative ? | ||||
| 	///////////////////////////////////////////// | ||||
|  | ||||
|         // LH: dSdU = k \chi_{L}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{L} | ||||
|         //     \chi_{L} = H(mf)^{-1} \Omega_{-} P_{-} \Phi | ||||
|         spProj(Phi, spProj_Phi, -1, Lop.Ls); | ||||
|         Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0); | ||||
|         G5R5(CG_src, Omega_spProj_Phi); | ||||
|         spProj_Phi = zero; | ||||
|         DerivativeSolverL(Lop, CG_src, spProj_Phi); | ||||
|         Solver(Lop, CG_src, spProj_Phi); | ||||
|         Lop.Dtilde(spProj_Phi, Chi); | ||||
|         G5R5(g5_R5_Chi, Chi); | ||||
|         Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo); | ||||
|         dSdU = -Lop.k * force; | ||||
|         dSdU = Lop.k * force; | ||||
|  | ||||
|         // RH: dSdU = dSdU - k \chi_{R}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{} | ||||
|         //     \chi_{R} = ( H(mb) - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \Phi | ||||
| @@ -315,11 +252,11 @@ namespace QCD{ | ||||
|         Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0); | ||||
|         G5R5(CG_src, Omega_spProj_Phi); | ||||
|         spProj_Phi = zero; | ||||
|         DerivativeSolverR(Rop, CG_src, spProj_Phi); | ||||
|         Solver(Rop, CG_src, spProj_Phi); | ||||
|         Rop.Dtilde(spProj_Phi, Chi); | ||||
|         G5R5(g5_R5_Chi, Chi); | ||||
|         Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo); | ||||
|         dSdU = dSdU + Rop.k * force; | ||||
|         dSdU = dSdU - Rop.k * force; | ||||
|       }; | ||||
|   }; | ||||
| }} | ||||
|   | ||||
| @@ -157,13 +157,6 @@ class OneFlavourEvenOddRationalPseudoFermionAction | ||||
|  | ||||
|     msCG(Mpc, PhiOdd, Y); | ||||
|  | ||||
|     if ( (rand()%param.BoundsCheckFreq)==0 ) {  | ||||
|       FermionField gauss(FermOp.FermionRedBlackGrid()); | ||||
|       gauss = PhiOdd; | ||||
|       HighBoundCheck(Mpc,gauss,param.hi); | ||||
|       InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,Mpc,gauss,PowerNegHalf); | ||||
|     } | ||||
|  | ||||
|     RealD action = norm2(Y); | ||||
|     std::cout << GridLogMessage << "Pseudofermion action FIXME -- is -1/4 " | ||||
|                                    "solve or -1/2 solve faster??? " | ||||
|   | ||||
| @@ -170,14 +170,6 @@ namespace Grid{ | ||||
| 	ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegQuarter); | ||||
| 	msCG_M(MdagM,X,Y); | ||||
|  | ||||
| 	// Randomly apply rational bounds checks. | ||||
| 	if ( (rand()%param.BoundsCheckFreq)==0 ) {  | ||||
| 	  FermionField gauss(NumOp.FermionRedBlackGrid()); | ||||
| 	  gauss = PhiOdd; | ||||
| 	  HighBoundCheck(MdagM,gauss,param.hi); | ||||
| 	  InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,MdagM,gauss,PowerNegHalf); | ||||
| 	} | ||||
|  | ||||
| 	//  Phidag VdagV^1/4 MdagM^-1/4  MdagM^-1/4 VdagV^1/4 Phi | ||||
| 	RealD action = norm2(Y); | ||||
|  | ||||
|   | ||||
| @@ -143,14 +143,6 @@ namespace Grid{ | ||||
|  | ||||
| 	msCG(MdagMOp,Phi,Y); | ||||
|  | ||||
| 	if ( (rand()%param.BoundsCheckFreq)==0 ) {  | ||||
| 	  FermionField gauss(FermOp.FermionGrid()); | ||||
| 	  gauss = Phi; | ||||
| 	  HighBoundCheck(MdagMOp,gauss,param.hi); | ||||
| 	  InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,MdagMOp,gauss,PowerNegHalf); | ||||
| 	} | ||||
|  | ||||
|  | ||||
| 	RealD action = norm2(Y); | ||||
| 	std::cout << GridLogMessage << "Pseudofermion action FIXME -- is -1/4 solve or -1/2 solve faster??? "<<action<<std::endl; | ||||
| 	return action; | ||||
|   | ||||
| @@ -156,14 +156,6 @@ namespace Grid{ | ||||
| 	ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegQuarter); | ||||
| 	msCG_M(MdagM,X,Y); | ||||
|  | ||||
| 	// Randomly apply rational bounds checks. | ||||
| 	if ( (rand()%param.BoundsCheckFreq)==0 ) {  | ||||
| 	  FermionField gauss(NumOp.FermionGrid()); | ||||
| 	  gauss = Phi; | ||||
| 	  HighBoundCheck(MdagM,gauss,param.hi); | ||||
| 	  InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,MdagM,gauss,PowerNegHalf); | ||||
| 	} | ||||
|  | ||||
| 	//  Phidag VdagV^1/4 MdagM^-1/4  MdagM^-1/4 VdagV^1/4 Phi | ||||
| 	RealD action = norm2(Y); | ||||
|  | ||||
|   | ||||
| @@ -29,9 +29,6 @@ directory | ||||
| #ifndef QCD_PSEUDOFERMION_AGGREGATE_H | ||||
| #define QCD_PSEUDOFERMION_AGGREGATE_H | ||||
|  | ||||
| // Rational functions | ||||
| #include <Grid/qcd/action/pseudofermion/Bounds.h> | ||||
|  | ||||
| #include <Grid/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h> | ||||
| #include <Grid/qcd/action/pseudofermion/TwoFlavour.h> | ||||
| #include <Grid/qcd/action/pseudofermion/TwoFlavourRatio.h> | ||||
|   | ||||
| @@ -85,20 +85,21 @@ class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> { | ||||
|     // and must multiply by 0.707.... | ||||
|     // | ||||
|     // Chroma has this scale factor: two_flavor_monomial_w.h | ||||
|     // CPS uses this factor | ||||
|     // IroIro: does not use this scale. It is absorbed by a change of vars | ||||
|     //         in the Phi integral, and thus is only an irrelevant prefactor for | ||||
|     //         the partition function. | ||||
|     // | ||||
|  | ||||
|     const RealD scale = std::sqrt(0.5); | ||||
|     RealD scale = std::sqrt(0.5); | ||||
|  | ||||
|     FermionField eta(FermOp.FermionGrid()); | ||||
|  | ||||
|     gaussian(pRNG, eta); eta = scale *eta; | ||||
|     gaussian(pRNG, eta); | ||||
|  | ||||
|     FermOp.ImportGauge(U); | ||||
|     FermOp.Mdag(eta, Phi); | ||||
|  | ||||
|     Phi = Phi * scale; | ||||
|   }; | ||||
|  | ||||
|   ////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -46,7 +46,6 @@ namespace Grid{ | ||||
|  | ||||
|       OperatorFunction<FermionField> &DerivativeSolver; | ||||
|       OperatorFunction<FermionField> &ActionSolver; | ||||
|       OperatorFunction<FermionField> &HeatbathSolver; | ||||
|  | ||||
|       FermionField PhiOdd;   // the pseudo fermion field for this trajectory | ||||
|       FermionField PhiEven;  // the pseudo fermion field for this trajectory | ||||
| @@ -55,18 +54,11 @@ namespace Grid{ | ||||
|       TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl>  &_NumOp,  | ||||
|                                                 FermionOperator<Impl>  &_DenOp,  | ||||
|                                                 OperatorFunction<FermionField> & DS, | ||||
|                                                 OperatorFunction<FermionField> & AS ) :  | ||||
|       TwoFlavourEvenOddRatioPseudoFermionAction(_NumOp,_DenOp, DS,AS,AS) {}; | ||||
|  | ||||
|       TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl>  &_NumOp,  | ||||
|                                                 FermionOperator<Impl>  &_DenOp,  | ||||
|                                                 OperatorFunction<FermionField> & DS, | ||||
|                                                 OperatorFunction<FermionField> & AS, OperatorFunction<FermionField> & HS) : | ||||
|                                                 OperatorFunction<FermionField> & AS) : | ||||
|       NumOp(_NumOp),  | ||||
|       DenOp(_DenOp),  | ||||
|       DerivativeSolver(DS),  | ||||
|       ActionSolver(AS), | ||||
|       HeatbathSolver(HS), | ||||
|       PhiEven(_NumOp.FermionRedBlackGrid()), | ||||
|       PhiOdd(_NumOp.FermionRedBlackGrid())  | ||||
|         { | ||||
| @@ -119,7 +111,7 @@ namespace Grid{ | ||||
|         // Odd det factors | ||||
|         Mpc.MpcDag(etaOdd,PhiOdd); | ||||
|         tmp=zero; | ||||
|         HeatbathSolver(Vpc,PhiOdd,tmp); | ||||
|         ActionSolver(Vpc,PhiOdd,tmp); | ||||
|         Vpc.Mpc(tmp,PhiOdd);             | ||||
|  | ||||
|         // Even det factors | ||||
|   | ||||
| @@ -54,7 +54,7 @@ public: | ||||
|  | ||||
|   template <class ReaderClass, typename std::enable_if<isReader<ReaderClass>::value, int >::type = 0 > | ||||
|   IntegratorParameters(ReaderClass & Reader){ | ||||
|     std::cout << GridLogMessage << "Reading integrator\n"; | ||||
|     std::cout << "Reading integrator\n"; | ||||
|         read(Reader, "Integrator", *this); | ||||
|   } | ||||
|  | ||||
| @@ -88,7 +88,8 @@ class Integrator { | ||||
|     t_P[level] += ep; | ||||
|     update_P(P, U, level, ep); | ||||
|  | ||||
|     std::cout << GridLogIntegrator << "[" << level << "] P " << " dt " << ep << " : t_P " << t_P[level] << std::endl; | ||||
|     std::cout << GridLogIntegrator << "[" << level << "] P " | ||||
|               << " dt " << ep << " : t_P " << t_P[level] << std::endl; | ||||
|   } | ||||
|  | ||||
|   // to be used by the actionlevel class to iterate | ||||
| @@ -104,7 +105,7 @@ class Integrator { | ||||
|         GF force = Rep.RtoFundamentalProject(forceR);  // Ta for the fundamental rep | ||||
|         Real force_abs = std::sqrt(norm2(force)/(U._grid->gSites())); | ||||
|         std::cout << GridLogIntegrator << "Hirep Force average: " << force_abs << std::endl; | ||||
| 	Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;;  | ||||
|         Mom -= force * ep ; | ||||
|       } | ||||
|     } | ||||
|   } update_P_hireps{}; | ||||
| @@ -128,11 +129,11 @@ class Integrator { | ||||
|       double end_force = usecond(); | ||||
|       Real force_abs = std::sqrt(norm2(force)/U._grid->gSites()); | ||||
|       std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << std::endl; | ||||
|       Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;;  | ||||
|       Mom -= force * ep;  | ||||
|       double end_full = usecond(); | ||||
|       double time_full  = (end_full - start_full) / 1e3; | ||||
|       double time_force = (end_force - start_force) / 1e3; | ||||
|       std::cout << GridLogMessage << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)"  << std::endl; | ||||
|       std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)"  << std::endl; | ||||
|     } | ||||
|  | ||||
|     // Force from the other representations | ||||
| @@ -237,7 +238,8 @@ class Integrator { | ||||
|       for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { | ||||
|         // get gauge field from the SmearingPolicy and | ||||
|         // based on the boolean is_smeared in actionID | ||||
|         Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared); | ||||
|         Field& Us = | ||||
|             Smearer.get_U(as[level].actions.at(actionID)->is_smeared); | ||||
|         as[level].actions.at(actionID)->refresh(Us, pRNG); | ||||
|       } | ||||
|  | ||||
| @@ -250,11 +252,13 @@ class Integrator { | ||||
|   // over the representations | ||||
|   struct _S { | ||||
|     template <class FieldType, class Repr> | ||||
|     void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep, int level, RealD& H) { | ||||
|     void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep, | ||||
|                     int level, RealD& H) { | ||||
|        | ||||
|       for (int a = 0; a < repr_set.size(); ++a) { | ||||
|         RealD Hterm = repr_set.at(a)->S(Rep.U); | ||||
|         std::cout << GridLogMessage << "S Level " << level << " term " << a << " H Hirep = " << Hterm << std::endl; | ||||
|         std::cout << GridLogMessage << "S Level " << level << " term " << a | ||||
|                   << " H Hirep = " << Hterm << std::endl; | ||||
|         H += Hterm; | ||||
|  | ||||
|       } | ||||
| @@ -264,21 +268,20 @@ class Integrator { | ||||
|   // Calculate action | ||||
|   RealD S(Field& U) {  // here also U not used | ||||
|  | ||||
|     std::cout << GridLogIntegrator << "Integrator action\n"; | ||||
|  | ||||
|     RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom | ||||
|  | ||||
|     RealD H = - FieldImplementation::FieldSquareNorm(P); // - trace (P*P) | ||||
|     RealD Hterm; | ||||
|     std::cout << GridLogMessage << "Momentum action H_p = " << H << "\n"; | ||||
|  | ||||
|     // Actions | ||||
|     for (int level = 0; level < as.size(); ++level) { | ||||
|       for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { | ||||
|         // get gauge field from the SmearingPolicy and | ||||
|         // based on the boolean is_smeared in actionID | ||||
|         Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared); | ||||
|         std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl; | ||||
|         Field& Us = | ||||
|             Smearer.get_U(as[level].actions.at(actionID)->is_smeared); | ||||
|         Hterm = as[level].actions.at(actionID)->S(Us); | ||||
|         std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl; | ||||
|         std::cout << GridLogMessage << "S Level " << level << " term " | ||||
|                   << actionID << " H = " << Hterm << std::endl; | ||||
|         H += Hterm; | ||||
|       } | ||||
|       as[level].apply(S_hireps, Representations, level, H); | ||||
| @@ -303,7 +306,8 @@ class Integrator { | ||||
|     // Check the clocks all match on all levels | ||||
|     for (int level = 0; level < as.size(); ++level) { | ||||
|       assert(fabs(t_U - t_P[level]) < 1.0e-6);  // must be the same | ||||
|       std::cout << GridLogIntegrator << " times[" << level << "]= " << t_P[level] << " " << t_U << std::endl; | ||||
|       std::cout << GridLogIntegrator << " times[" << level | ||||
|                 << "]= " << t_P[level] << " " << t_U << std::endl; | ||||
|     } | ||||
|  | ||||
|     // and that we indeed got to the end of the trajectory | ||||
|   | ||||
| @@ -231,7 +231,8 @@ class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy, | ||||
|     Field Pfg(U._grid); | ||||
|     Ufg = U; | ||||
|     Pfg = zero; | ||||
|     std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep << std::endl; | ||||
|     std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep | ||||
|               << std::endl; | ||||
|     // prepare_fg; no prediction/result cache for now | ||||
|     // could relax CG stopping conditions for the | ||||
|     // derivatives in the small step since the force gets multiplied by | ||||
| @@ -270,7 +271,8 @@ class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy, | ||||
|         this->step(U, level + 1, first_step, 0); | ||||
|       } | ||||
|  | ||||
|       this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps), (1.0 - 2.0 * lambda) * eps); | ||||
|       this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps), | ||||
|                         (1.0 - 2.0 * lambda) * eps); | ||||
|  | ||||
|       if (level == fl) {  // lowest level | ||||
|         this->update_U(U, 0.5 * eps); | ||||
|   | ||||
| @@ -11,24 +11,6 @@ const std::array<const Gamma, 4> Gamma::gmu = {{ | ||||
|   Gamma(Gamma::Algebra::GammaZ), | ||||
|   Gamma(Gamma::Algebra::GammaT)}}; | ||||
|  | ||||
| const std::array<const Gamma, 16> Gamma::gall = {{ | ||||
|   Gamma(Gamma::Algebra::Identity), | ||||
|   Gamma(Gamma::Algebra::Gamma5), | ||||
|   Gamma(Gamma::Algebra::GammaX), | ||||
|   Gamma(Gamma::Algebra::GammaY), | ||||
|   Gamma(Gamma::Algebra::GammaZ), | ||||
|   Gamma(Gamma::Algebra::GammaT), | ||||
|   Gamma(Gamma::Algebra::GammaXGamma5), | ||||
|   Gamma(Gamma::Algebra::GammaYGamma5), | ||||
|   Gamma(Gamma::Algebra::GammaZGamma5), | ||||
|   Gamma(Gamma::Algebra::GammaTGamma5), | ||||
|   Gamma(Gamma::Algebra::SigmaXT),       | ||||
|   Gamma(Gamma::Algebra::SigmaXY),       | ||||
|   Gamma(Gamma::Algebra::SigmaXZ),       | ||||
|   Gamma(Gamma::Algebra::SigmaYT), | ||||
|   Gamma(Gamma::Algebra::SigmaYZ), | ||||
|   Gamma(Gamma::Algebra::SigmaZT)}}; | ||||
|  | ||||
| const std::array<const char *, Gamma::nGamma> Gamma::name = {{ | ||||
|   "-Gamma5      ", | ||||
|   "Gamma5       ", | ||||
|   | ||||
| @@ -48,7 +48,6 @@ class Gamma { | ||||
|     static const std::array<std::array<Algebra, nGamma>, nGamma> mul; | ||||
|     static const std::array<Algebra, nGamma>                     adj; | ||||
|     static const std::array<const Gamma, 4>                      gmu; | ||||
|     static const std::array<const Gamma, 16>                     gall; | ||||
|     Algebra                                                      g; | ||||
|   public: | ||||
|     Gamma(Algebra initg): g(initg) {}   | ||||
|   | ||||
| @@ -10,10 +10,10 @@ | ||||
| NotebookFileLineBreakTest | ||||
| NotebookFileLineBreakTest | ||||
| NotebookDataPosition[       158,          7] | ||||
| NotebookDataLength[     67118,       1714] | ||||
| NotebookOptionsPosition[     63485,       1652] | ||||
| NotebookOutlinePosition[     63842,       1668] | ||||
| CellTagsIndexPosition[     63799,       1665] | ||||
| NotebookDataLength[     75090,       1956] | ||||
| NotebookOptionsPosition[     69536,       1867] | ||||
| NotebookOutlinePosition[     69898,       1883] | ||||
| CellTagsIndexPosition[     69855,       1880] | ||||
| WindowFrame->Normal*) | ||||
|  | ||||
| (* Beginning of Notebook Content *) | ||||
| @@ -76,6 +76,234 @@ Cell[BoxData["\<\"/Users/antonin/Development/Grid/lib/qcd/spin/gamma-gen\"\>"]\ | ||||
|  | ||||
| Cell[CellGroupData[{ | ||||
|  | ||||
| Cell[BoxData[ | ||||
|  RowBox[{"FactorInteger", "[", "3152", "]"}]], "Input", | ||||
|  CellChangeTimes->{{3.7432347536316767`*^9, 3.7432347764739027`*^9}, { | ||||
|   3.743234833567358*^9,  | ||||
|   3.743234862146022*^9}},ExpressionUUID->"d1a0fd03-85e1-43af-ba80-\ | ||||
| 3ca4235675d8"], | ||||
|  | ||||
| Cell[BoxData[ | ||||
|  RowBox[{"{",  | ||||
|   RowBox[{ | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{"2", ",", "4"}], "}"}], ",",  | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{"197", ",", "1"}], "}"}]}], "}"}]], "Output", | ||||
|  CellChangeTimes->{{3.743234836792224*^9,  | ||||
|   3.743234862493619*^9}},ExpressionUUID->"16d3f953-4b24-4ed2-ae62-\ | ||||
| 306dcab66ca7"] | ||||
| }, Open  ]], | ||||
|  | ||||
| Cell[CellGroupData[{ | ||||
|  | ||||
| Cell[BoxData[ | ||||
|  RowBox[{"sol", "=",  | ||||
|   RowBox[{"Solve", "[",  | ||||
|    RowBox[{ | ||||
|     RowBox[{ | ||||
|      RowBox[{ | ||||
|       SuperscriptBox["x", "2"], "+",  | ||||
|       SuperscriptBox["y", "2"], "+",  | ||||
|       SuperscriptBox["z", "2"]}], "\[Equal]", "2"}], ",",  | ||||
|     RowBox[{"{",  | ||||
|      RowBox[{"x", ",", "y", ",", "z"}], "}"}], ",", "Integers"}],  | ||||
|    "]"}]}]], "Input", | ||||
|  CellChangeTimes->{{3.743235304127721*^9,  | ||||
|   3.7432353087929983`*^9}},ExpressionUUID->"f0fa2a5c-3d81-4d75-a447-\ | ||||
| 50c7ca3459ff"], | ||||
|  | ||||
| Cell[BoxData[ | ||||
|  RowBox[{"{",  | ||||
|   RowBox[{ | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"x", "\[Rule]",  | ||||
|       RowBox[{"-", "1"}]}], ",",  | ||||
|      RowBox[{"y", "\[Rule]",  | ||||
|       RowBox[{"-", "1"}]}], ",",  | ||||
|      RowBox[{"z", "\[Rule]", "0"}]}], "}"}], ",",  | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"x", "\[Rule]",  | ||||
|       RowBox[{"-", "1"}]}], ",",  | ||||
|      RowBox[{"y", "\[Rule]", "0"}], ",",  | ||||
|      RowBox[{"z", "\[Rule]",  | ||||
|       RowBox[{"-", "1"}]}]}], "}"}], ",",  | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"x", "\[Rule]",  | ||||
|       RowBox[{"-", "1"}]}], ",",  | ||||
|      RowBox[{"y", "\[Rule]", "0"}], ",",  | ||||
|      RowBox[{"z", "\[Rule]", "1"}]}], "}"}], ",",  | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"x", "\[Rule]",  | ||||
|       RowBox[{"-", "1"}]}], ",",  | ||||
|      RowBox[{"y", "\[Rule]", "1"}], ",",  | ||||
|      RowBox[{"z", "\[Rule]", "0"}]}], "}"}], ",",  | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"x", "\[Rule]", "0"}], ",",  | ||||
|      RowBox[{"y", "\[Rule]",  | ||||
|       RowBox[{"-", "1"}]}], ",",  | ||||
|      RowBox[{"z", "\[Rule]",  | ||||
|       RowBox[{"-", "1"}]}]}], "}"}], ",",  | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"x", "\[Rule]", "0"}], ",",  | ||||
|      RowBox[{"y", "\[Rule]",  | ||||
|       RowBox[{"-", "1"}]}], ",",  | ||||
|      RowBox[{"z", "\[Rule]", "1"}]}], "}"}], ",",  | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"x", "\[Rule]", "0"}], ",",  | ||||
|      RowBox[{"y", "\[Rule]", "1"}], ",",  | ||||
|      RowBox[{"z", "\[Rule]",  | ||||
|       RowBox[{"-", "1"}]}]}], "}"}], ",",  | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"x", "\[Rule]", "0"}], ",",  | ||||
|      RowBox[{"y", "\[Rule]", "1"}], ",",  | ||||
|      RowBox[{"z", "\[Rule]", "1"}]}], "}"}], ",",  | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"x", "\[Rule]", "1"}], ",",  | ||||
|      RowBox[{"y", "\[Rule]",  | ||||
|       RowBox[{"-", "1"}]}], ",",  | ||||
|      RowBox[{"z", "\[Rule]", "0"}]}], "}"}], ",",  | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"x", "\[Rule]", "1"}], ",",  | ||||
|      RowBox[{"y", "\[Rule]", "0"}], ",",  | ||||
|      RowBox[{"z", "\[Rule]",  | ||||
|       RowBox[{"-", "1"}]}]}], "}"}], ",",  | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"x", "\[Rule]", "1"}], ",",  | ||||
|      RowBox[{"y", "\[Rule]", "0"}], ",",  | ||||
|      RowBox[{"z", "\[Rule]", "1"}]}], "}"}], ",",  | ||||
|    RowBox[{"{",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"x", "\[Rule]", "1"}], ",",  | ||||
|      RowBox[{"y", "\[Rule]", "1"}], ",",  | ||||
|      RowBox[{"z", "\[Rule]", "0"}]}], "}"}]}], "}"}]], "Output", | ||||
|  CellChangeTimes->{{3.743235305220907*^9,  | ||||
|   3.743235309139554*^9}},ExpressionUUID->"d9825c95-24bb-442a-8734-\ | ||||
| 4c0f47e99dfc"] | ||||
| }, Open  ]], | ||||
|  | ||||
| Cell[BoxData[ | ||||
|  RowBox[{ | ||||
|   RowBox[{"xmlElem", "[", "x_", "]"}], ":=",  | ||||
|   RowBox[{"Print", "[",  | ||||
|    RowBox[{"\"\<<elem>\>\"", "<>",  | ||||
|     RowBox[{"ToString", "[",  | ||||
|      RowBox[{"x", "[",  | ||||
|       RowBox[{"[", "1", "]"}], "]"}], "]"}], "<>", "\"\< \>\"", "<>",  | ||||
|     RowBox[{"ToString", "[",  | ||||
|      RowBox[{"x", "[",  | ||||
|       RowBox[{"[", "2", "]"}], "]"}], "]"}], "<>", "\"\< \>\"", "<>",  | ||||
|     RowBox[{"ToString", "[",  | ||||
|      RowBox[{"x", "[",  | ||||
|       RowBox[{"[", "3", "]"}], "]"}], "]"}], "<>", "\"\<</elem>\>\""}],  | ||||
|    "]"}]}]], "Input", | ||||
|  CellChangeTimes->{{3.74323534002862*^9, 3.743235351000985*^9}, { | ||||
|   3.743235403233039*^9, 3.743235413488028*^9}, {3.743235473169856*^9,  | ||||
|   3.7432354747126904`*^9}},ExpressionUUID->"aea76313-c89e-45e8-b429-\ | ||||
| 3f454091666d"], | ||||
|  | ||||
| Cell[CellGroupData[{ | ||||
|  | ||||
| Cell[BoxData[ | ||||
|  RowBox[{ | ||||
|   RowBox[{ | ||||
|    RowBox[{"xmlElem", "[",  | ||||
|     RowBox[{ | ||||
|      RowBox[{"{",  | ||||
|       RowBox[{"x", ",", "y", ",", "z"}], "}"}], "/.", "#"}], "]"}], "&"}], "/@", | ||||
|    "sol"}]], "Input", | ||||
|  CellChangeTimes->{{3.743235415820318*^9,  | ||||
|   3.743235467025091*^9}},ExpressionUUID->"07da3998-8eab-40ba-8c0b-\ | ||||
| ac6b130cb4fb"], | ||||
|  | ||||
| Cell[CellGroupData[{ | ||||
|  | ||||
| Cell[BoxData["\<\"<elem>-1 -1 0</elem>\"\>"], "Print", | ||||
|  CellChangeTimes->{ | ||||
|   3.743235476581676*^9},ExpressionUUID->"c577ba06-b67a-405a-9ff5-\ | ||||
| 2bf7dc898d03"], | ||||
|  | ||||
| Cell[BoxData["\<\"<elem>-1 0 -1</elem>\"\>"], "Print", | ||||
|  CellChangeTimes->{ | ||||
|   3.743235476588011*^9},ExpressionUUID->"d041aa36-0cea-457c-9d4b-\ | ||||
| 1fe9be66e2ab"], | ||||
|  | ||||
| Cell[BoxData["\<\"<elem>-1 0 1</elem>\"\>"], "Print", | ||||
|  CellChangeTimes->{ | ||||
|   3.743235476596887*^9},ExpressionUUID->"bf141b55-86b2-4430-a994-\ | ||||
| 5c03d5a19441"], | ||||
|  | ||||
| Cell[BoxData["\<\"<elem>-1 1 0</elem>\"\>"], "Print", | ||||
|  CellChangeTimes->{ | ||||
|   3.743235476605785*^9},ExpressionUUID->"4968a660-4ecf-4b66-9071-\ | ||||
| 8bd798c18d21"], | ||||
|  | ||||
| Cell[BoxData["\<\"<elem>0 -1 -1</elem>\"\>"], "Print", | ||||
|  CellChangeTimes->{ | ||||
|   3.743235476613523*^9},ExpressionUUID->"4e22d943-2680-416b-a1d7-\ | ||||
| a16ca20b781f"], | ||||
|  | ||||
| Cell[BoxData["\<\"<elem>0 -1 1</elem>\"\>"], "Print", | ||||
|  CellChangeTimes->{ | ||||
|   3.7432354766218576`*^9},ExpressionUUID->"6dd38385-08b3-4dd9-932f-\ | ||||
| 98a00c6db1b2"], | ||||
|  | ||||
| Cell[BoxData["\<\"<elem>0 1 -1</elem>\"\>"], "Print", | ||||
|  CellChangeTimes->{ | ||||
|   3.743235476629427*^9},ExpressionUUID->"ef3baad3-91d1-4735-9a22-\ | ||||
| 53495a624c15"], | ||||
|  | ||||
| Cell[BoxData["\<\"<elem>0 1 1</elem>\"\>"], "Print", | ||||
|  CellChangeTimes->{ | ||||
|   3.743235476638257*^9},ExpressionUUID->"413fbb68-5017-4272-a62a-\ | ||||
| fa234e6daaea"], | ||||
|  | ||||
| Cell[BoxData["\<\"<elem>1 -1 0</elem>\"\>"], "Print", | ||||
|  CellChangeTimes->{ | ||||
|   3.743235476646203*^9},ExpressionUUID->"3a832a60-ae00-414b-a9ac-\ | ||||
| f5e86e67e917"], | ||||
|  | ||||
| Cell[BoxData["\<\"<elem>1 0 -1</elem>\"\>"], "Print", | ||||
|  CellChangeTimes->{ | ||||
|   3.743235476653907*^9},ExpressionUUID->"bfc79ef6-f6c7-4f1e-88e8-\ | ||||
| 005ac314be9c"], | ||||
|  | ||||
| Cell[BoxData["\<\"<elem>1 0 1</elem>\"\>"], "Print", | ||||
|  CellChangeTimes->{ | ||||
|   3.743235476662575*^9},ExpressionUUID->"0f892891-f885-489c-9925-\ | ||||
| ddef4d698410"], | ||||
|  | ||||
| Cell[BoxData["\<\"<elem>1 1 0</elem>\"\>"], "Print", | ||||
|  CellChangeTimes->{ | ||||
|   3.7432354766702337`*^9},ExpressionUUID->"2906f190-e673-4f33-9c34-\ | ||||
| e8e56efe7a27"] | ||||
| }, Open  ]], | ||||
|  | ||||
| Cell[BoxData[ | ||||
|  RowBox[{"{",  | ||||
|   RowBox[{ | ||||
|   "Null", ",", "Null", ",", "Null", ",", "Null", ",", "Null", ",", "Null",  | ||||
|    ",", "Null", ",", "Null", ",", "Null", ",", "Null", ",", "Null", ",",  | ||||
|    "Null"}], "}"}]], "Output", | ||||
|  CellChangeTimes->{ | ||||
|   3.7432354246225967`*^9, {3.7432354674878073`*^9,  | ||||
|    3.743235476678007*^9}},ExpressionUUID->"500ca3c1-88d8-46e5-a1a1-\ | ||||
| 86a7878e5638"] | ||||
| }, Open  ]], | ||||
|  | ||||
| Cell[CellGroupData[{ | ||||
|  | ||||
| Cell["Clifford algebra generation", "Section", | ||||
|  CellChangeTimes->{{3.6942089434583883`*^9,  | ||||
|   3.694208978559093*^9}},ExpressionUUID->"a5b064b3-3011-4922-8559-\ | ||||
| @@ -820,10 +1048,9 @@ generated by the Mathematica notebook gamma-gen/gamma-gen.nb\n\n#include \ | ||||
|        "\"\<    static const std::array<const char *, nGamma>                \ | ||||
| name;\n    static const std::array<std::array<Algebra, nGamma>, nGamma> mul;\n\ | ||||
|     static const std::array<Algebra, nGamma>                     adj;\n    \ | ||||
| static const std::array<const Gamma, 4>                      gmu;\n    static \ | ||||
| const std::array<const Gamma, 16>                     gall;\n    Algebra      \ | ||||
|                                                 g;\n  public:\n    \ | ||||
| Gamma(Algebra initg): g(initg) {}  \n};\n\n\>\""}]}], ";",  | ||||
| static const std::array<const Gamma, 4>                      gmu;\n    \ | ||||
| Algebra                                                      g;\n  public:\n  \ | ||||
|   Gamma(Algebra initg): g(initg) {}  \n};\n\n\>\""}]}], ";",  | ||||
|      "\[IndentingNewLine]",  | ||||
|      RowBox[{"out", " ", "=",  | ||||
|       RowBox[{"out", "<>", "funcCode"}]}], ";", "\[IndentingNewLine]",  | ||||
| @@ -849,8 +1076,7 @@ Gamma(Algebra initg): g(initg) {}  \n};\n\n\>\""}]}], ";", | ||||
|    3.694963343265525*^9}, {3.694964367519239*^9, 3.69496439461199*^9}, { | ||||
|    3.694964462130747*^9, 3.6949644669959793`*^9}, 3.694964509762739*^9, { | ||||
|    3.694964705045744*^9, 3.694964723148797*^9}, {3.694964992988984*^9,  | ||||
|    3.6949649968504257`*^9}, {3.758291687176977*^9,  | ||||
|    3.758291694181189*^9}},ExpressionUUID->"c7103bd6-b539-4495-b98c-\ | ||||
|    3.6949649968504257`*^9}},ExpressionUUID->"c7103bd6-b539-4495-b98c-\ | ||||
| d4d12ac6cad8"], | ||||
|  | ||||
| Cell["Gamma enum generation:", "Text", | ||||
| @@ -1519,17 +1745,8 @@ namespace QCD {\>\""}]}], ";", "\[IndentingNewLine]", | ||||
|        "\"\<\n\nconst std::array<const Gamma, 4> Gamma::gmu = {{\n  \ | ||||
| Gamma(Gamma::Algebra::GammaX),\n  Gamma(Gamma::Algebra::GammaY),\n  \ | ||||
| Gamma(Gamma::Algebra::GammaZ),\n  Gamma(Gamma::Algebra::GammaT)}};\n\nconst \ | ||||
| std::array<const Gamma, 16> Gamma::gall = {{\n  \ | ||||
| Gamma(Gamma::Algebra::Identity),\n  Gamma(Gamma::Algebra::Gamma5),\n  \ | ||||
| Gamma(Gamma::Algebra::GammaX),\n  Gamma(Gamma::Algebra::GammaY),\n  \ | ||||
| Gamma(Gamma::Algebra::GammaZ),\n  Gamma(Gamma::Algebra::GammaT),\n  \ | ||||
| Gamma(Gamma::Algebra::GammaXGamma5),\n  Gamma(Gamma::Algebra::GammaYGamma5),\n\ | ||||
|   Gamma(Gamma::Algebra::GammaZGamma5),\n  \ | ||||
| Gamma(Gamma::Algebra::GammaTGamma5),\n  Gamma(Gamma::Algebra::SigmaXT),      \ | ||||
| \n  Gamma(Gamma::Algebra::SigmaXY),      \n  Gamma(Gamma::Algebra::SigmaXZ),  \ | ||||
|     \n  Gamma(Gamma::Algebra::SigmaYT),\n  Gamma(Gamma::Algebra::SigmaYZ),\n  \ | ||||
| Gamma(Gamma::Algebra::SigmaZT)}};\n\nconst std::array<const char *, \ | ||||
| Gamma::nGamma> Gamma::name = {{\n\>\""}]}], ";", "\[IndentingNewLine]",  | ||||
| std::array<const char *, Gamma::nGamma> Gamma::name = {{\n\>\""}]}], ";",  | ||||
|      "\[IndentingNewLine]",  | ||||
|      RowBox[{"Do", "[", "\[IndentingNewLine]",  | ||||
|       RowBox[{ | ||||
|        RowBox[{"out", " ", "=", " ",  | ||||
| @@ -1630,9 +1847,7 @@ Gamma::nGamma> Gamma::mul = {{\\n\>\""}]}], ";", "\[IndentingNewLine]", | ||||
|    3.694963031525289*^9}, {3.694963065828494*^9, 3.694963098327538*^9}, { | ||||
|    3.6949632020836153`*^9, 3.6949632715940027`*^9}, {3.694963440035037*^9,  | ||||
|    3.6949634418966017`*^9}, {3.6949651447067547`*^9, 3.694965161228381*^9}, { | ||||
|    3.694967957845581*^9, 3.694967958364184*^9}, {3.758291673792514*^9,  | ||||
|    3.758291676983432*^9}},ExpressionUUID->"b1b309f8-a3a7-4081-a781-\ | ||||
| c3845e3cd372"], | ||||
|    3.694967957845581*^9, 3.694967958364184*^9}}], | ||||
|  | ||||
| Cell[BoxData[ | ||||
|  RowBox[{ | ||||
| @@ -1652,8 +1867,8 @@ Cell[BoxData[""], "Input", | ||||
| }, | ||||
| WindowSize->{1246, 1005}, | ||||
| WindowMargins->{{282, Automatic}, {Automatic, 14}}, | ||||
| FrontEndVersion->"11.3 for Mac OS X x86 (32-bit, 64-bit Kernel) (March 5, \ | ||||
| 2018)", | ||||
| FrontEndVersion->"11.2 for Mac OS X x86 (32-bit, 64-bit Kernel) (September \ | ||||
| 10, 2017)", | ||||
| StyleDefinitions->"Default.nb" | ||||
| ] | ||||
| (* End of Notebook Content *) | ||||
| @@ -1673,48 +1888,75 @@ Cell[1948, 43, 570, 11, 73, "Input",ExpressionUUID->"5c937a3e-adfd-4d7e-8fde-afb | ||||
| Cell[2521, 56, 1172, 17, 34, "Output",ExpressionUUID->"72817ba6-2f6a-4a4d-8212-6f0970f49e7c"] | ||||
| }, Open  ]], | ||||
| Cell[CellGroupData[{ | ||||
| Cell[3730, 78, 174, 3, 67, "Section",ExpressionUUID->"a5b064b3-3011-4922-8559-ead857cad102"], | ||||
| Cell[3907, 83, 535, 16, 52, "Input",ExpressionUUID->"aa28f02b-31e1-4df2-9b5d-482177464b59"], | ||||
| Cell[4445, 101, 250, 4, 35, "Text",ExpressionUUID->"c8896b88-f1db-4ce4-b7a6-0c9838bdb8f1"], | ||||
| Cell[4698, 107, 5511, 169, 425, "Input",ExpressionUUID->"52a96ff6-047e-4043-86d0-e303866e5f8e"], | ||||
| Cell[CellGroupData[{ | ||||
| Cell[10234, 280, 2183, 58, 135, "Input",ExpressionUUID->"8b0f4955-2c3f-418c-9226-9be8f87621e8"], | ||||
| Cell[12420, 340, 1027, 27, 56, "Output",ExpressionUUID->"edd0619f-6f12-4070-a1d2-6b547877fadc"] | ||||
| Cell[3730, 78, 248, 5, 30, "Input",ExpressionUUID->"d1a0fd03-85e1-43af-ba80-3ca4235675d8"], | ||||
| Cell[3981, 85, 299, 9, 34, "Output",ExpressionUUID->"16d3f953-4b24-4ed2-ae62-306dcab66ca7"] | ||||
| }, Open  ]], | ||||
| Cell[CellGroupData[{ | ||||
| Cell[13484, 372, 1543, 46, 114, "Input",ExpressionUUID->"fb45123c-c610-4075-99b0-7cd71c728ae7"], | ||||
| Cell[15030, 420, 1311, 32, 87, "Output",ExpressionUUID->"2ae14565-b412-4dc0-9dce-bd6c1ba5ef27"] | ||||
| Cell[4317, 99, 469, 14, 33, "Input",ExpressionUUID->"f0fa2a5c-3d81-4d75-a447-50c7ca3459ff"], | ||||
| Cell[4789, 115, 2423, 77, 56, "Output",ExpressionUUID->"d9825c95-24bb-442a-8734-4c0f47e99dfc"] | ||||
| }, Open  ]], | ||||
| Cell[16356, 455, 179, 3, 35, "Text",ExpressionUUID->"af247231-a58d-417b-987a-26908dafffdb"], | ||||
| Cell[16538, 460, 2175, 65, 94, "Input",ExpressionUUID->"7c44cadd-e488-4f51-87d8-c64eef11f40c"], | ||||
| Cell[18716, 527, 193, 3, 35, "Text",ExpressionUUID->"856f1746-1107-4509-a5ce-ac9c7f56cdb1"], | ||||
| Cell[7227, 195, 751, 18, 30, "Input",ExpressionUUID->"aea76313-c89e-45e8-b429-3f454091666d"], | ||||
| Cell[CellGroupData[{ | ||||
| Cell[18934, 534, 536, 16, 30, "Input",ExpressionUUID->"8674484a-8543-434f-b177-3b27f9353212"], | ||||
| Cell[19473, 552, 1705, 35, 87, "Output",ExpressionUUID->"c3b3f84d-91f6-41af-af6b-a394ca020511"] | ||||
| Cell[8003, 217, 323, 10, 30, "Input",ExpressionUUID->"07da3998-8eab-40ba-8c0b-ac6b130cb4fb"], | ||||
| Cell[CellGroupData[{ | ||||
| Cell[8351, 231, 156, 3, 24, "Print",ExpressionUUID->"c577ba06-b67a-405a-9ff5-2bf7dc898d03"], | ||||
| Cell[8510, 236, 156, 3, 24, "Print",ExpressionUUID->"d041aa36-0cea-457c-9d4b-1fe9be66e2ab"], | ||||
| Cell[8669, 241, 155, 3, 24, "Print",ExpressionUUID->"bf141b55-86b2-4430-a994-5c03d5a19441"], | ||||
| Cell[8827, 246, 155, 3, 24, "Print",ExpressionUUID->"4968a660-4ecf-4b66-9071-8bd798c18d21"], | ||||
| Cell[8985, 251, 156, 3, 24, "Print",ExpressionUUID->"4e22d943-2680-416b-a1d7-a16ca20b781f"], | ||||
| Cell[9144, 256, 157, 3, 24, "Print",ExpressionUUID->"6dd38385-08b3-4dd9-932f-98a00c6db1b2"], | ||||
| Cell[9304, 261, 155, 3, 24, "Print",ExpressionUUID->"ef3baad3-91d1-4735-9a22-53495a624c15"], | ||||
| Cell[9462, 266, 154, 3, 24, "Print",ExpressionUUID->"413fbb68-5017-4272-a62a-fa234e6daaea"], | ||||
| Cell[9619, 271, 155, 3, 24, "Print",ExpressionUUID->"3a832a60-ae00-414b-a9ac-f5e86e67e917"], | ||||
| Cell[9777, 276, 155, 3, 24, "Print",ExpressionUUID->"bfc79ef6-f6c7-4f1e-88e8-005ac314be9c"], | ||||
| Cell[9935, 281, 154, 3, 24, "Print",ExpressionUUID->"0f892891-f885-489c-9925-ddef4d698410"], | ||||
| Cell[10092, 286, 156, 3, 24, "Print",ExpressionUUID->"2906f190-e673-4f33-9c34-e8e56efe7a27"] | ||||
| }, Open  ]], | ||||
| Cell[10263, 292, 376, 9, 34, "Output",ExpressionUUID->"500ca3c1-88d8-46e5-a1a1-86a7878e5638"] | ||||
| }, Open  ]], | ||||
| Cell[21193, 590, 170, 3, 35, "Text",ExpressionUUID->"518a3040-54b1-4d43-8947-5c7d12efa94d"], | ||||
| Cell[CellGroupData[{ | ||||
| Cell[21388, 597, 536, 14, 30, "Input",ExpressionUUID->"61a2e974-2b39-4a07-8043-2dfd39a70569"], | ||||
| Cell[21927, 613, 6754, 167, 303, "Output",ExpressionUUID->"73480ac0-3043-4077-80cc-b952a94c822a"] | ||||
| Cell[10676, 306, 174, 3, 67, "Section",ExpressionUUID->"a5b064b3-3011-4922-8559-ead857cad102"], | ||||
| Cell[10853, 311, 535, 16, 52, "Input",ExpressionUUID->"aa28f02b-31e1-4df2-9b5d-482177464b59"], | ||||
| Cell[11391, 329, 250, 4, 35, "Text",ExpressionUUID->"c8896b88-f1db-4ce4-b7a6-0c9838bdb8f1"], | ||||
| Cell[11644, 335, 5511, 169, 425, "Input",ExpressionUUID->"52a96ff6-047e-4043-86d0-e303866e5f8e"], | ||||
| Cell[CellGroupData[{ | ||||
| Cell[17180, 508, 2183, 58, 135, "Input",ExpressionUUID->"8b0f4955-2c3f-418c-9226-9be8f87621e8"], | ||||
| Cell[19366, 568, 1027, 27, 67, "Output",ExpressionUUID->"edd0619f-6f12-4070-a1d2-6b547877fadc"] | ||||
| }, Open  ]], | ||||
| Cell[CellGroupData[{ | ||||
| Cell[20430, 600, 1543, 46, 114, "Input",ExpressionUUID->"fb45123c-c610-4075-99b0-7cd71c728ae7"], | ||||
| Cell[21976, 648, 1311, 32, 98, "Output",ExpressionUUID->"2ae14565-b412-4dc0-9dce-bd6c1ba5ef27"] | ||||
| }, Open  ]], | ||||
| Cell[23302, 683, 179, 3, 35, "Text",ExpressionUUID->"af247231-a58d-417b-987a-26908dafffdb"], | ||||
| Cell[23484, 688, 2175, 65, 94, "Input",ExpressionUUID->"7c44cadd-e488-4f51-87d8-c64eef11f40c"], | ||||
| Cell[25662, 755, 193, 3, 35, "Text",ExpressionUUID->"856f1746-1107-4509-a5ce-ac9c7f56cdb1"], | ||||
| Cell[CellGroupData[{ | ||||
| Cell[25880, 762, 536, 16, 30, "Input",ExpressionUUID->"8674484a-8543-434f-b177-3b27f9353212"], | ||||
| Cell[26419, 780, 1705, 35, 87, "Output",ExpressionUUID->"c3b3f84d-91f6-41af-af6b-a394ca020511"] | ||||
| }, Open  ]], | ||||
| Cell[28139, 818, 170, 3, 35, "Text",ExpressionUUID->"518a3040-54b1-4d43-8947-5c7d12efa94d"], | ||||
| Cell[CellGroupData[{ | ||||
| Cell[28334, 825, 536, 14, 30, "Input",ExpressionUUID->"61a2e974-2b39-4a07-8043-2dfd39a70569"], | ||||
| Cell[28873, 841, 6754, 167, 303, "Output",ExpressionUUID->"73480ac0-3043-4077-80cc-b952a94c822a"] | ||||
| }, Open  ]] | ||||
| }, Open  ]], | ||||
| Cell[CellGroupData[{ | ||||
| Cell[28730, 786, 226, 4, 67, "Section",ExpressionUUID->"4e833cd6-9f0e-4aa3-a873-3d579e874720"], | ||||
| Cell[28959, 792, 188, 4, 44, "Text",ExpressionUUID->"6d27fc04-3a60-4e03-8df7-3dd3aeee35b4"], | ||||
| Cell[29150, 798, 3104, 55, 724, "Input",ExpressionUUID->"c7103bd6-b539-4495-b98c-d4d12ac6cad8"], | ||||
| Cell[32257, 855, 221, 4, 44, "Text",ExpressionUUID->"0625593d-290f-4a39-9d80-8e2c6fdbc94e"], | ||||
| Cell[32481, 861, 4936, 150, 682, "Input",ExpressionUUID->"1ad4904c-352f-4b1d-a7c7-91e1b0549409"], | ||||
| Cell[37420, 1013, 2645, 56, 199, "Input",ExpressionUUID->"0221674f-9b63-4662-91bc-ccc8c6ae9589"], | ||||
| Cell[40068, 1071, 209, 4, 44, "Text",ExpressionUUID->"d2d2257a-487b-416f-bc40-abd4482225f7"], | ||||
| Cell[40280, 1077, 15306, 397, 2131, "Input",ExpressionUUID->"daea68a9-c9e8-46ab-9bc8-5186e2cf477c"], | ||||
| Cell[55589, 1476, 137, 2, 44, "Text",ExpressionUUID->"76ba9d5a-7ee3-4888-be7e-6377003275e8"], | ||||
| Cell[55729, 1480, 521, 12, 30, "Input",ExpressionUUID->"4ec61f4c-3fd3-49ea-b5ef-6f7f04a16b34"] | ||||
| Cell[35676, 1014, 226, 4, 67, "Section",ExpressionUUID->"4e833cd6-9f0e-4aa3-a873-3d579e874720"], | ||||
| Cell[35905, 1020, 188, 4, 44, "Text",ExpressionUUID->"6d27fc04-3a60-4e03-8df7-3dd3aeee35b4"], | ||||
| Cell[36096, 1026, 2980, 53, 703, "Input",ExpressionUUID->"c7103bd6-b539-4495-b98c-d4d12ac6cad8"], | ||||
| Cell[39079, 1081, 221, 4, 44, "Text",ExpressionUUID->"0625593d-290f-4a39-9d80-8e2c6fdbc94e"], | ||||
| Cell[39303, 1087, 4936, 150, 682, "Input",ExpressionUUID->"1ad4904c-352f-4b1d-a7c7-91e1b0549409"], | ||||
| Cell[44242, 1239, 2645, 56, 199, "Input",ExpressionUUID->"0221674f-9b63-4662-91bc-ccc8c6ae9589"], | ||||
| Cell[46890, 1297, 209, 4, 44, "Text",ExpressionUUID->"d2d2257a-487b-416f-bc40-abd4482225f7"], | ||||
| Cell[47102, 1303, 15306, 397, 2131, "Input",ExpressionUUID->"daea68a9-c9e8-46ab-9bc8-5186e2cf477c"], | ||||
| Cell[62411, 1702, 137, 2, 44, "Text",ExpressionUUID->"76ba9d5a-7ee3-4888-be7e-6377003275e8"], | ||||
| Cell[62551, 1706, 521, 12, 30, "Input",ExpressionUUID->"4ec61f4c-3fd3-49ea-b5ef-6f7f04a16b34"] | ||||
| }, Open  ]], | ||||
| Cell[CellGroupData[{ | ||||
| Cell[56287, 1497, 167, 2, 67, "Section",ExpressionUUID->"a4458b3a-09b5-4e36-a1fc-781d6702b2dc"], | ||||
| Cell[56457, 1501, 6464, 133, 1207, "Input",ExpressionUUID->"b1b309f8-a3a7-4081-a781-c3845e3cd372"], | ||||
| Cell[62924, 1636, 448, 10, 30, "Input",ExpressionUUID->"cba42949-b0f2-42ce-aebd-ffadfd83ef88"], | ||||
| Cell[63375, 1648, 94, 1, 30, "Input",ExpressionUUID->"6175b72c-af9f-43c2-b4ca-bd84c48a456d"] | ||||
| Cell[63109, 1723, 167, 2, 67, "Section",ExpressionUUID->"a4458b3a-09b5-4e36-a1fc-781d6702b2dc"], | ||||
| Cell[63279, 1727, 5693, 122, 829, "Input",ExpressionUUID->"b1b309f8-a3a7-4081-a781-c3845e3cd372"], | ||||
| Cell[68975, 1851, 448, 10, 30, "Input",ExpressionUUID->"cba42949-b0f2-42ce-aebd-ffadfd83ef88"], | ||||
| Cell[69426, 1863, 94, 1, 30, "Input",ExpressionUUID->"6175b72c-af9f-43c2-b4ca-bd84c48a456d"] | ||||
| }, Open  ]] | ||||
| } | ||||
| ] | ||||
|   | ||||
| @@ -987,17 +987,16 @@ void A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV, | ||||
|       for(int s=0;s<N_s;s++){ | ||||
| 	auto tmp1 = vs[s]._odata[ss]; | ||||
| 	vobj tmp2 = zero; | ||||
|   vobj tmp3 = zero; | ||||
|  | ||||
| 	for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){ | ||||
| 	  Scalar_v coeff = WW_sd(t,s,d); | ||||
| 	  tmp3 = conjugate(vd[d]._odata[ss]); | ||||
| 	  mac(&tmp2, &coeff, &tmp3); | ||||
| 	  mac(&tmp2 ,& coeff, & vd[d]._odata[ss]); | ||||
| 	} | ||||
|  | ||||
| 	////////////////////////// | ||||
| 	// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll | ||||
| 	////////////////////////// | ||||
| 	tmp2 = conjugate(tmp2); | ||||
| 	for(int s1=0;s1<Ns;s1++){ | ||||
| 	for(int s2=0;s2<Ns;s2++){ | ||||
| 	  WWVV[t]._odata[ss]()(s1,s2)(0,0) += tmp1()(s1)(0)*tmp2()(s2)(0); | ||||
|   | ||||
| @@ -1,87 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/qcd/action/scalar/CovariantLaplacian.h | ||||
|  | ||||
| Copyright (C) 2016 | ||||
|  | ||||
| Author: Azusa Yamaguchi | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| #pragma once | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|  | ||||
| template <class Gimpl> class CovariantSmearing : public Gimpl  | ||||
| { | ||||
| public: | ||||
|   INHERIT_GIMPL_TYPES(Gimpl); | ||||
|  | ||||
|   typedef typename Gimpl::GaugeLinkField GaugeMat; | ||||
|   typedef typename Gimpl::GaugeField GaugeLorentz; | ||||
|  | ||||
|   template<typename T> | ||||
|   static void GaussianSmear(const std::vector<LatticeColourMatrix>& U,  | ||||
| 			    T& chi,  | ||||
| 			    const Real& width, int Iterations, int orthog) | ||||
|   { | ||||
|     GridBase *grid = chi._grid; | ||||
|     T psi(grid); | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////////////////// | ||||
|     // Follow Chroma conventions for width to keep compatibility with previous data | ||||
|     // Free field iterates  | ||||
|     //   chi = (1 - w^2/4N p^2)^N chi | ||||
|     // | ||||
|     //       ~ (e^(-w^2/4N p^2)^N chi | ||||
|     //       ~ (e^(-w^2/4 p^2) chi | ||||
|     //       ~ (e^(-w'^2/2 p^2) chi          [ w' = w/sqrt(2) ] | ||||
|     // | ||||
|     // Which in coordinate space is proportional to | ||||
|     // | ||||
|     //   e^(-x^2/w^2) = e^(-x^2/2w'^2)  | ||||
|     // | ||||
|     // The 4 is a bit unconventional from Gaussian width perspective, but... it's Chroma convention. | ||||
|     // 2nd derivative approx d^2/dx^2  =  x+mu + x-mu - 2x | ||||
|     // | ||||
|     // d^2/dx^2 = - p^2 | ||||
|     // | ||||
|     // chi = ( 1 + w^2/4N d^2/dx^2 )^N chi | ||||
|     // | ||||
|     //////////////////////////////////////////////////////////////////////////////////// | ||||
|     Real coeff = (width*width) / Real(4*Iterations); | ||||
|    | ||||
|     int dims = Nd; | ||||
|     if( orthog < Nd ) dims=Nd-1; | ||||
|  | ||||
|     for(int n = 0; n < Iterations; ++n) { | ||||
|       psi = (-2.0*dims)*chi; | ||||
|       for(int mu=0;mu<Nd;mu++) { | ||||
| 	if ( mu != orthog ) {  | ||||
| 	  psi = psi + Gimpl::CovShiftForward(U[mu],mu,chi);     | ||||
| 	  psi = psi + Gimpl::CovShiftBackward(U[mu],mu,chi);     | ||||
| 	} | ||||
|       } | ||||
|       chi = chi + coeff*psi; | ||||
|     } | ||||
|   } | ||||
| }; | ||||
| }} | ||||
| @@ -31,7 +31,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|  | ||||
|  | ||||
| template <class Gimpl>  | ||||
| class FourierAcceleratedGaugeFixer  : public Gimpl { | ||||
|  public: | ||||
| @@ -46,22 +45,13 @@ class FourierAcceleratedGaugeFixer  : public Gimpl { | ||||
|       A[mu] = Ta(U[mu]) * cmi; | ||||
|     } | ||||
|   } | ||||
|   static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu,int orthog) { | ||||
|   static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu) { | ||||
|     dmuAmu=zero; | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|       if ( mu != orthog ) { | ||||
|       dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1); | ||||
|     } | ||||
|   }   | ||||
|   }   | ||||
|  | ||||
|   static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) { | ||||
|     GridBase *grid = Umu._grid; | ||||
|     GaugeMat xform(grid); | ||||
|     SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog); | ||||
|   } | ||||
|   static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) { | ||||
|  | ||||
|   static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false) { | ||||
|     GridBase *grid = Umu._grid; | ||||
|  | ||||
|     Real org_plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu); | ||||
| @@ -69,35 +59,16 @@ class FourierAcceleratedGaugeFixer  : public Gimpl { | ||||
|     Real old_trace = org_link_trace; | ||||
|     Real trG; | ||||
|  | ||||
|     xform=1.0; | ||||
|  | ||||
|     std::vector<GaugeMat> U(Nd,grid); | ||||
|  | ||||
|                  GaugeMat dmuAmu(grid); | ||||
|  | ||||
|     { | ||||
|       Real plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu); | ||||
|       Real link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);  | ||||
|       if( (orthog>=0) && (orthog<Nd) ){ | ||||
| 	std::cout << GridLogMessage << " Gauge fixing to Coulomb gauge time="<<orthog<< " plaq= "<<plaq<<" link trace = "<<link_trace<<  std::endl; | ||||
|       } else {  | ||||
| 	std::cout << GridLogMessage << " Gauge fixing to Landau gauge plaq= "<<plaq<<" link trace = "<<link_trace<<  std::endl; | ||||
|       } | ||||
|     } | ||||
|     for(int i=0;i<maxiter;i++){ | ||||
|  | ||||
|       for(int mu=0;mu<Nd;mu++) U[mu]= PeekIndex<LorentzIndex>(Umu,mu); | ||||
|  | ||||
|       if ( Fourier==false ) {  | ||||
| 	trG = SteepestDescentStep(U,xform,alpha,dmuAmu,orthog); | ||||
| 	trG = SteepestDescentStep(U,alpha,dmuAmu); | ||||
|       } else {  | ||||
| 	trG = FourierAccelSteepestDescentStep(U,xform,alpha,dmuAmu,orthog); | ||||
| 	trG = FourierAccelSteepestDescentStep(U,alpha,dmuAmu); | ||||
|       } | ||||
|  | ||||
|       //      std::cout << GridLogMessage << "trG   "<< trG<< std::endl; | ||||
|       //      std::cout << GridLogMessage << "xform "<< norm2(xform)<< std::endl; | ||||
|       //      std::cout << GridLogMessage << "dmuAmu "<< norm2(dmuAmu)<< std::endl; | ||||
|  | ||||
|       for(int mu=0;mu<Nd;mu++) PokeIndex<LorentzIndex>(Umu,U[mu],mu); | ||||
|       // Monitor progress and convergence test  | ||||
|       // infrequently to minimise cost overhead | ||||
| @@ -113,6 +84,7 @@ class FourierAcceleratedGaugeFixer  : public Gimpl { | ||||
| 	Real Phi  = 1.0 - old_trace / link_trace ; | ||||
| 	Real Omega= 1.0 - trG; | ||||
|  | ||||
|  | ||||
| 	std::cout << GridLogMessage << " Iteration "<<i<< " Phi= "<<Phi<< " Omega= " << Omega<< " trG " << trG <<std::endl; | ||||
| 	if ( (Omega < Omega_tol) && ( ::fabs(Phi) < Phi_tol) ) { | ||||
| 	  std::cout << GridLogMessage << "Converged ! "<<std::endl; | ||||
| @@ -124,26 +96,25 @@ class FourierAcceleratedGaugeFixer  : public Gimpl { | ||||
|       } | ||||
|     } | ||||
|   }; | ||||
|   static Real SteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) { | ||||
|   static Real SteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) { | ||||
|     GridBase *grid = U[0]._grid; | ||||
|  | ||||
|     std::vector<GaugeMat> A(Nd,grid); | ||||
|     GaugeMat g(grid); | ||||
|  | ||||
|     GaugeLinkToLieAlgebraField(U,A); | ||||
|     ExpiAlphaDmuAmu(A,g,alpha,dmuAmu,orthog); | ||||
|     ExpiAlphaDmuAmu(A,g,alpha,dmuAmu); | ||||
|  | ||||
|  | ||||
|     Real vol = grid->gSites(); | ||||
|     Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; | ||||
|  | ||||
|     xform = g*xform ; | ||||
|     SU<Nc>::GaugeTransform(U,g); | ||||
|  | ||||
|     return trG; | ||||
|   } | ||||
|  | ||||
|   static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) { | ||||
|   static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) { | ||||
|  | ||||
|     GridBase *grid = U[0]._grid; | ||||
|  | ||||
| @@ -162,41 +133,38 @@ class FourierAcceleratedGaugeFixer  : public Gimpl { | ||||
|  | ||||
|     GaugeLinkToLieAlgebraField(U,A); | ||||
|  | ||||
|     DmuAmu(A,dmuAmu,orthog); | ||||
|     DmuAmu(A,dmuAmu); | ||||
|  | ||||
|     std::vector<int> mask(Nd,1); | ||||
|     for(int mu=0;mu<Nd;mu++) if (mu==orthog) mask[mu]=0; | ||||
|     theFFT.FFT_dim_mask(dmuAmu_p,dmuAmu,mask,FFT::forward); | ||||
|     theFFT.FFT_all_dim(dmuAmu_p,dmuAmu,FFT::forward); | ||||
|  | ||||
|     ////////////////////////////////// | ||||
|     // Work out Fp = psq_max/ psq... | ||||
|     // Avoid singularities in Fp | ||||
|     ////////////////////////////////// | ||||
|     std::vector<int> latt_size = grid->GlobalDimensions(); | ||||
|     std::vector<int> coor(grid->_ndimension,0); | ||||
|     for(int mu=0;mu<Nd;mu++) { | ||||
|       if ( mu != orthog ) {  | ||||
|  | ||||
|       Real TwoPiL =  M_PI * 2.0/ latt_size[mu]; | ||||
|       LatticeCoordinate(pmu,mu); | ||||
|       pmu = TwoPiL * pmu ; | ||||
|       psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5);  | ||||
|     } | ||||
|     } | ||||
|  | ||||
|     Complex psqMax(16.0); | ||||
|     Fp =  psqMax*one/psq; | ||||
|  | ||||
|     pokeSite(TComplex(16.0),Fp,coor); | ||||
|     if( (orthog>=0) && (orthog<Nd) ){ | ||||
|       for(int t=0;t<grid->GlobalDimensions()[orthog];t++){ | ||||
| 	coor[orthog]=t; | ||||
| 	pokeSite(TComplex(16.0),Fp,coor); | ||||
|       } | ||||
|     } | ||||
|     /* | ||||
|     static int once; | ||||
|     if ( once == 0 ) {  | ||||
|       std::cout << " Fp " << Fp <<std::endl; | ||||
|       once ++; | ||||
|       }*/ | ||||
|  | ||||
|     pokeSite(TComplex(1.0),Fp,coor); | ||||
|  | ||||
|     dmuAmu_p  = dmuAmu_p * Fp;  | ||||
|  | ||||
|     theFFT.FFT_dim_mask(dmuAmu,dmuAmu_p,mask,FFT::backward); | ||||
|     theFFT.FFT_all_dim(dmuAmu,dmuAmu_p,FFT::backward); | ||||
|  | ||||
|     GaugeMat ciadmam(grid); | ||||
|     Complex cialpha(0.0,-alpha); | ||||
| @@ -205,17 +173,16 @@ class FourierAcceleratedGaugeFixer  : public Gimpl { | ||||
|  | ||||
|     Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; | ||||
|  | ||||
|     xform = g*xform ; | ||||
|     SU<Nc>::GaugeTransform(U,g); | ||||
|  | ||||
|     return trG; | ||||
|   } | ||||
|  | ||||
|   static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu,int orthog) { | ||||
|   static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) { | ||||
|     GridBase *grid = g._grid; | ||||
|     Complex cialpha(0.0,-alpha); | ||||
|     GaugeMat ciadmam(grid); | ||||
|     DmuAmu(A,dmuAmu,orthog); | ||||
|     DmuAmu(A,dmuAmu); | ||||
|     ciadmam = dmuAmu*cialpha; | ||||
|     SU<Nc>::taExp(ciadmam,g); | ||||
|   }   | ||||
|   | ||||
| @@ -173,39 +173,6 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x) | ||||
|     } | ||||
|   } | ||||
| } | ||||
| } | ||||
|  | ||||
| // I explicitly need these outside the QCD namespace | ||||
| template<typename vobj> | ||||
| void G5C(Lattice<vobj> &z, const Lattice<vobj> &x) | ||||
| { | ||||
|   GridBase *grid = x._grid; | ||||
|   z.checkerboard = x.checkerboard; | ||||
|   conformable(x, z); | ||||
|  | ||||
|   QCD::Gamma G5(QCD::Gamma::Algebra::Gamma5); | ||||
|   z = G5 * x; | ||||
| } | ||||
|  | ||||
| template<class CComplex, int nbasis> | ||||
| void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x) | ||||
| { | ||||
|   GridBase *grid = x._grid; | ||||
|   z.checkerboard = x.checkerboard; | ||||
|   conformable(x, z); | ||||
|  | ||||
|   static_assert(nbasis % 2 == 0, ""); | ||||
|   int nb = nbasis / 2; | ||||
|  | ||||
|   parallel_for(int ss = 0; ss < grid->oSites(); ss++) { | ||||
|     for(int n = 0; n < nb; ++n) { | ||||
|       z._odata[ss](n) = x._odata[ss](n); | ||||
|     } | ||||
|     for(int n = nb; n < nbasis; ++n) { | ||||
|       z._odata[ss](n) = -x._odata[ss](n); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| } | ||||
| }} | ||||
| #endif  | ||||
|   | ||||
| @@ -676,18 +676,10 @@ class SU { | ||||
|     } | ||||
|   } | ||||
| /* | ||||
|  * Fundamental rep gauge xform | ||||
|  */ | ||||
|   template<typename Fundamental,typename GaugeMat> | ||||
|   static void GaugeTransformFundamental( Fundamental &ferm, GaugeMat &g){ | ||||
|     GridBase *grid = ferm._grid; | ||||
|     conformable(grid,g._grid); | ||||
|     ferm = g*ferm; | ||||
|   } | ||||
| /* | ||||
|  * Adjoint rep gauge xform | ||||
|  */ | ||||
|   template<typename GaugeField,typename GaugeMat> | ||||
|  add GaugeTrans | ||||
| */ | ||||
|  | ||||
| template<typename GaugeField,typename GaugeMat> | ||||
|   static void GaugeTransform( GaugeField &Umu, GaugeMat &g){ | ||||
|     GridBase *grid = Umu._grid; | ||||
|     conformable(grid,g._grid); | ||||
|   | ||||
| @@ -6,12 +6,10 @@ | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
|     Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|     Author: neo <cossu@post.kek.jp> | ||||
|     Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     Author: James Harrison <J.Harrison@soton.ac.uk> | ||||
|     Author: Antonin Portelli <antonin.portelli@me.com> | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: neo <cossu@post.kek.jp> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
| @@ -647,184 +645,6 @@ static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   ////////////////////////////////////////////////// | ||||
|   // Wilson loop of size (R1, R2), oriented in mu,nu plane | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U, | ||||
|                            const int Rmu, const int Rnu, | ||||
|                            const int mu, const int nu) { | ||||
|     wl = U[nu]; | ||||
|  | ||||
|     for(int i = 0; i < Rnu-1; i++){ | ||||
|       wl = Gimpl::CovShiftForward(U[nu], nu, wl); | ||||
|     } | ||||
|  | ||||
|     for(int i = 0; i < Rmu; i++){ | ||||
|       wl = Gimpl::CovShiftForward(U[mu], mu, wl); | ||||
|     } | ||||
|  | ||||
|     for(int i = 0; i < Rnu; i++){ | ||||
|       wl = Gimpl::CovShiftBackward(U[nu], nu, wl); | ||||
|     } | ||||
|  | ||||
|     for(int i = 0; i < Rmu; i++){ | ||||
|       wl = Gimpl::CovShiftBackward(U[mu], mu, wl); | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // trace of Wilson Loop oriented in mu,nu plane | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void traceWilsonLoop(LatticeComplex &wl, | ||||
|                                 const std::vector<GaugeMat> &U, | ||||
|                                 const int Rmu, const int Rnu, | ||||
|                                 const int mu, const int nu) { | ||||
|     GaugeMat sp(U[0]._grid); | ||||
|     wilsonLoop(sp, U, Rmu, Rnu, mu, nu); | ||||
|     wl = trace(sp); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all planes of Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void siteWilsonLoop(LatticeComplex &Wl, | ||||
|                             const std::vector<GaugeMat> &U, | ||||
|                             const int R1, const int R2) { | ||||
|     LatticeComplex siteWl(U[0]._grid); | ||||
|     Wl = zero; | ||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { | ||||
|       for (int nu = 0; nu < mu; nu++) { | ||||
|         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); | ||||
|         Wl = Wl + siteWl; | ||||
|         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); | ||||
|         Wl = Wl + siteWl; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over planes of Wilson loop with length R1 | ||||
|   // in the time direction | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void siteTimelikeWilsonLoop(LatticeComplex &Wl, | ||||
|                             const std::vector<GaugeMat> &U, | ||||
|                             const int R1, const int R2) { | ||||
|     LatticeComplex siteWl(U[0]._grid); | ||||
|  | ||||
|     int ndim = U[0]._grid->_ndimension; | ||||
|  | ||||
|     Wl = zero; | ||||
|     for (int nu = 0; nu < ndim - 1; nu++) { | ||||
|       traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu); | ||||
|       Wl = Wl + siteWl; | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum Wilson loop over all planes orthogonal to the time direction | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void siteSpatialWilsonLoop(LatticeComplex &Wl, | ||||
|                             const std::vector<GaugeMat> &U, | ||||
|                             const int R1, const int R2) { | ||||
|     LatticeComplex siteWl(U[0]._grid); | ||||
|  | ||||
|     Wl = zero; | ||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) { | ||||
|       for (int nu = 0; nu < mu; nu++) { | ||||
|         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); | ||||
|         Wl = Wl + siteWl; | ||||
|         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); | ||||
|         Wl = Wl + siteWl; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all x,y,z,t and over all planes of Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real sumWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     std::vector<GaugeMat> U(4, Umu._grid); | ||||
|  | ||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Wl(Umu._grid); | ||||
|  | ||||
|     siteWilsonLoop(Wl, U, R1, R2); | ||||
|  | ||||
|     TComplex Tp = sum(Wl); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all x,y,z,t and over all planes of timelike Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     std::vector<GaugeMat> U(4, Umu._grid); | ||||
|  | ||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Wl(Umu._grid); | ||||
|  | ||||
|     siteTimelikeWilsonLoop(Wl, U, R1, R2); | ||||
|  | ||||
|     TComplex Tp = sum(Wl); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all x,y,z,t and over all planes of spatial Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     std::vector<GaugeMat> U(4, Umu._grid); | ||||
|  | ||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Wl(Umu._grid); | ||||
|  | ||||
|     siteSpatialWilsonLoop(Wl, U, R1, R2); | ||||
|  | ||||
|     TComplex Tp = sum(Wl); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // average over all x,y,z,t and over all planes of Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real avgWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     int ndim = Umu._grid->_ndimension; | ||||
|     Real sumWl = sumWilsonLoop(Umu, R1, R2); | ||||
|     Real vol = Umu._grid->gSites(); | ||||
|     Real faces = 1.0 * ndim * (ndim - 1); | ||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // average over all x,y,z,t and over all planes of timelike Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     int ndim = Umu._grid->_ndimension; | ||||
|     Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2); | ||||
|     Real vol = Umu._grid->gSites(); | ||||
|     Real faces = 1.0 * (ndim - 1); | ||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // average over all x,y,z,t and over all planes of spatial Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     int ndim = Umu._grid->_ndimension; | ||||
|     Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2); | ||||
|     Real vol = Umu._grid->gSites(); | ||||
|     Real faces = 1.0 * (ndim - 1) * (ndim - 2); | ||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME | ||||
|   } | ||||
| }; | ||||
|  | ||||
| typedef WilsonLoops<PeriodicGimplR> ColourWilsonLoops; | ||||
|   | ||||
| @@ -33,72 +33,8 @@ Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||
| #include <type_traits> | ||||
| #include <Grid/tensors/Tensors.h> | ||||
| #include <Grid/serialisation/VectorUtils.h> | ||||
| #include <Grid/Eigen/unsupported/CXX11/Tensor> | ||||
|  | ||||
| namespace Grid { | ||||
|   namespace EigenIO { | ||||
|     // EigenIO works for scalars that are not just Grid supported scalars | ||||
|     template<typename T, typename V = void> struct is_complex : public std::false_type {}; | ||||
|     // Support all complex types (not just Grid complex types) - even if the definitions overlap (!) | ||||
|     template<typename T> struct is_complex<             T , typename | ||||
|         std::enable_if< ::Grid::is_complex<             T >::value>::type> : public std::true_type {}; | ||||
|     template<typename T> struct is_complex<std::complex<T>, typename | ||||
|         std::enable_if<!::Grid::is_complex<std::complex<T>>::value>::type> : public std::true_type {}; | ||||
|  | ||||
|     // Helpers to support I/O for Eigen tensors of arithmetic scalars, complex types, or Grid tensors | ||||
|     template<typename T, typename V = void> struct is_scalar : public std::false_type {}; | ||||
|     template<typename T> struct is_scalar<T, typename std::enable_if<std::is_arithmetic<T>::value || is_complex<T>::value>::type> : public std::true_type {}; | ||||
|  | ||||
|     // Is this an Eigen tensor | ||||
|     template<typename T> struct is_tensor : std::integral_constant<bool, | ||||
|       std::is_base_of<Eigen::TensorBase<T, Eigen::ReadOnlyAccessors>, T>::value> {}; | ||||
|  | ||||
|     // Is this an Eigen tensor of a supported scalar | ||||
|     template<typename T, typename V = void> struct is_tensor_of_scalar : public std::false_type {}; | ||||
|     template<typename T> struct is_tensor_of_scalar<T, typename std::enable_if<is_tensor<T>::value && is_scalar<typename T::Scalar>::value>::type> : public std::true_type {}; | ||||
|  | ||||
|     // Is this an Eigen tensor of a supported container | ||||
|     template<typename T, typename V = void> struct is_tensor_of_container : public std::false_type {}; | ||||
|     template<typename T> struct is_tensor_of_container<T, typename std::enable_if<is_tensor<T>::value && isGridTensor<typename T::Scalar>::value>::type> : public std::true_type {}; | ||||
|  | ||||
|     // These traits describe the scalars inside Eigen tensors | ||||
|     // I wish I could define these in reference to the scalar type (so there would be fewer traits defined) | ||||
|     // but I'm unable to find a syntax to make this work | ||||
|     template<typename T, typename V = void> struct Traits {}; | ||||
|     // Traits are the default for scalars, or come from GridTypeMapper for GridTensors | ||||
|     template<typename T> struct Traits<T, typename std::enable_if<is_tensor_of_scalar<T>::value>::type> | ||||
|       : public GridTypeMapper_Base { | ||||
|       using scalar_type   = typename T::Scalar; // ultimate base scalar | ||||
|       static constexpr bool is_complex = ::Grid::EigenIO::is_complex<scalar_type>::value; | ||||
|     }; | ||||
|     // Traits are the default for scalars, or come from GridTypeMapper for GridTensors | ||||
|     template<typename T> struct Traits<T, typename std::enable_if<is_tensor_of_container<T>::value>::type> { | ||||
|       using BaseTraits  = GridTypeMapper<typename T::Scalar>; | ||||
|       using scalar_type = typename BaseTraits::scalar_type; // ultimate base scalar | ||||
|       static constexpr bool   is_complex = ::Grid::EigenIO::is_complex<scalar_type>::value; | ||||
|       static constexpr int   TensorLevel = BaseTraits::TensorLevel; | ||||
|       static constexpr int          Rank = BaseTraits::Rank; | ||||
|       static constexpr std::size_t count = BaseTraits::count; | ||||
|       static constexpr int Dimension(int dim) { return BaseTraits::Dimension(dim); } | ||||
|     }; | ||||
|  | ||||
|     // Is this a fixed-size Eigen tensor | ||||
|     template<typename T> struct is_tensor_fixed : public std::false_type {}; | ||||
|     template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType> | ||||
|     struct is_tensor_fixed<Eigen::TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType>> | ||||
|         : public std::true_type {}; | ||||
|     template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType, | ||||
|               int MapOptions_, template <class> class MapPointer_> | ||||
|     struct is_tensor_fixed<Eigen::TensorMap<Eigen::TensorFixedSize<Scalar_, Dimensions_, | ||||
|                                             Options_, IndexType>, MapOptions_, MapPointer_>> | ||||
|         : public std::true_type {}; | ||||
|  | ||||
|     // Is this a variable-size Eigen tensor | ||||
|     template<typename T, typename V = void> struct is_tensor_variable : public std::false_type {}; | ||||
|     template<typename T> struct is_tensor_variable<T, typename std::enable_if<is_tensor<T>::value | ||||
|         && !is_tensor_fixed<T>::value>::type> : public std::true_type {}; | ||||
|   } | ||||
|  | ||||
|   // Abstract writer/reader classes //////////////////////////////////////////// | ||||
|   // static polymorphism implemented using CRTP idiom | ||||
|   class Serializable; | ||||
| @@ -113,10 +49,10 @@ namespace Grid { | ||||
|     void push(const std::string &s); | ||||
|     void pop(void); | ||||
|     template <typename U> | ||||
|     typename std::enable_if<std::is_base_of<Serializable, U>::value>::type | ||||
|     typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type | ||||
|     write(const std::string& s, const U &output); | ||||
|     template <typename U> | ||||
|     typename std::enable_if<!std::is_base_of<Serializable, U>::value && !EigenIO::is_tensor<U>::value>::type | ||||
|     typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type | ||||
|     write(const std::string& s, const U &output); | ||||
|     template <typename U> | ||||
|     void write(const std::string &s, const iScalar<U> &output); | ||||
| @@ -124,42 +60,6 @@ namespace Grid { | ||||
|     void write(const std::string &s, const iVector<U, N> &output); | ||||
|     template <typename U, int N> | ||||
|     void write(const std::string &s, const iMatrix<U, N> &output); | ||||
|     template <typename ETensor> | ||||
|     typename std::enable_if<EigenIO::is_tensor<ETensor>::value>::type | ||||
|     write(const std::string &s, const ETensor &output); | ||||
|  | ||||
|     // Helper functions for Scalar vs Container specialisations | ||||
|     template <typename ETensor> | ||||
|     inline typename std::enable_if<EigenIO::is_tensor_of_scalar<ETensor>::value, | ||||
|     const typename ETensor::Scalar *>::type | ||||
|     getFirstScalar(const ETensor &output) | ||||
|     { | ||||
|       return output.data(); | ||||
|     } | ||||
|      | ||||
|     template <typename ETensor> | ||||
|     inline typename std::enable_if<EigenIO::is_tensor_of_container<ETensor>::value, | ||||
|     const typename EigenIO::Traits<ETensor>::scalar_type *>::type | ||||
|     getFirstScalar(const ETensor &output) | ||||
|     { | ||||
|       return output.data()->begin(); | ||||
|     } | ||||
|      | ||||
|     template <typename S> | ||||
|     inline typename std::enable_if<EigenIO::is_scalar<S>::value, void>::type | ||||
|     copyScalars(S * &pCopy, const S &Source) | ||||
|     { | ||||
|       * pCopy ++ = Source; | ||||
|     } | ||||
|      | ||||
|     template <typename S> | ||||
|     inline typename std::enable_if<isGridTensor<S>::value, void>::type | ||||
|     copyScalars(typename GridTypeMapper<S>::scalar_type * &pCopy, const S &Source) | ||||
|     { | ||||
|       for( const typename GridTypeMapper<S>::scalar_type &item : Source ) | ||||
|         * pCopy ++ = item; | ||||
|     } | ||||
|  | ||||
|     void         scientificFormat(const bool set); | ||||
|     bool         isScientific(void); | ||||
|     void         setPrecision(const unsigned int prec); | ||||
| @@ -183,8 +83,7 @@ namespace Grid { | ||||
|     typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type | ||||
|     read(const std::string& s, U &output); | ||||
|     template <typename U> | ||||
|     typename std::enable_if<!std::is_base_of<Serializable, U>::value | ||||
|                          && !EigenIO::is_tensor<U>::value, void>::type | ||||
|     typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type | ||||
|     read(const std::string& s, U &output); | ||||
|     template <typename U> | ||||
|     void read(const std::string &s, iScalar<U> &output); | ||||
| @@ -192,32 +91,6 @@ namespace Grid { | ||||
|     void read(const std::string &s, iVector<U, N> &output); | ||||
|     template <typename U, int N> | ||||
|     void read(const std::string &s, iMatrix<U, N> &output); | ||||
|     template <typename ETensor> | ||||
|     typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type | ||||
|     read(const std::string &s, ETensor &output); | ||||
|     template <typename ETensor> | ||||
|     typename std::enable_if<EigenIO::is_tensor_fixed<ETensor>::value, void>::type | ||||
|     Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims ); | ||||
|     template <typename ETensor> | ||||
|     typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type | ||||
|     Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims ); | ||||
|    | ||||
|     // Helper functions for Scalar vs Container specialisations | ||||
|     template <typename S> | ||||
|     inline typename std::enable_if<EigenIO::is_scalar<S>::value, void>::type | ||||
|     copyScalars(S &Dest, const S * &pSource) | ||||
|     { | ||||
|       Dest = * pSource ++; | ||||
|     } | ||||
|      | ||||
|     template <typename S> | ||||
|     inline typename std::enable_if<isGridTensor<S>::value, void>::type | ||||
|     copyScalars(S &Dest, const typename GridTypeMapper<S>::scalar_type * &pSource) | ||||
|     { | ||||
|       for( typename GridTypeMapper<S>::scalar_type &item : Dest ) | ||||
|         item = * pSource ++; | ||||
|     } | ||||
|      | ||||
|   protected: | ||||
|     template <typename U> | ||||
|     void fromString(U &output, const std::string &s); | ||||
| @@ -262,14 +135,12 @@ namespace Grid { | ||||
|    | ||||
|   template <typename T> | ||||
|   template <typename U> | ||||
|   typename std::enable_if<!std::is_base_of<Serializable, U>::value | ||||
|                        && !EigenIO::is_tensor<U>::value, void>::type | ||||
|   typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type | ||||
|   Writer<T>::write(const std::string &s, const U &output) | ||||
|   { | ||||
|     upcast->writeDefault(s, output); | ||||
|   } | ||||
|  | ||||
|  | ||||
|   template <typename T> | ||||
|   template <typename U> | ||||
|   void Writer<T>::write(const std::string &s, const iScalar<U> &output) | ||||
| @@ -291,57 +162,6 @@ namespace Grid { | ||||
|     upcast->writeDefault(s, tensorToVec(output)); | ||||
|   } | ||||
|  | ||||
|   // Eigen::Tensors of Grid tensors (iScalar, iVector, iMatrix) | ||||
|   template <typename T> | ||||
|   template <typename ETensor> | ||||
|   typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type | ||||
|   Writer<T>::write(const std::string &s, const ETensor &output) | ||||
|   { | ||||
|     using Index = typename ETensor::Index; | ||||
|     using Container = typename ETensor::Scalar; // NB: could be same as scalar | ||||
|     using Traits = EigenIO::Traits<ETensor>; | ||||
|     using Scalar = typename Traits::scalar_type; // type of the underlying scalar | ||||
|     constexpr unsigned int TensorRank{ETensor::NumIndices}; | ||||
|     constexpr unsigned int ContainerRank{Traits::Rank}; // Only non-zero for containers | ||||
|     constexpr unsigned int TotalRank{TensorRank + ContainerRank}; | ||||
|     const Index NumElements{output.size()}; | ||||
|     assert( NumElements > 0 ); | ||||
|  | ||||
|     // Get the dimensionality of the tensor | ||||
|     std::vector<std::size_t>  TotalDims(TotalRank); | ||||
|     for(auto i = 0; i < TensorRank; i++ ) { | ||||
|       auto dim = output.dimension(i); | ||||
|       TotalDims[i] = static_cast<size_t>(dim); | ||||
|       assert( TotalDims[i] == dim ); // check we didn't lose anything in the conversion | ||||
|     } | ||||
|     for(auto i = 0; i < ContainerRank; i++ ) | ||||
|       TotalDims[TensorRank + i] = Traits::Dimension(i); | ||||
|  | ||||
|     // If the Tensor isn't in Row-Major order, then we'll need to copy it's data | ||||
|     const bool CopyData{NumElements > 1 && ETensor::Layout != Eigen::StorageOptions::RowMajor}; | ||||
|     const Scalar * pWriteBuffer; | ||||
|     std::vector<Scalar> CopyBuffer; | ||||
|     const Index TotalNumElements = NumElements * Traits::count; | ||||
|     if( !CopyData ) { | ||||
|       pWriteBuffer = getFirstScalar( output ); | ||||
|     } else { | ||||
|       // Regardless of the Eigen::Tensor storage order, the copy will be Row Major | ||||
|       CopyBuffer.resize( TotalNumElements ); | ||||
|       Scalar * pCopy = &CopyBuffer[0]; | ||||
|       pWriteBuffer = pCopy; | ||||
|       std::array<Index, TensorRank> MyIndex; | ||||
|       for( auto &idx : MyIndex ) idx = 0; | ||||
|       for( auto n = 0; n < NumElements; n++ ) { | ||||
|         const Container & c = output( MyIndex ); | ||||
|         copyScalars( pCopy, c ); | ||||
|         // Now increment the index | ||||
|         for( int i = output.NumDimensions - 1; i >= 0 && ++MyIndex[i] == output.dimension(i); i-- ) | ||||
|           MyIndex[i] = 0; | ||||
|       } | ||||
|     } | ||||
|     upcast->template writeMultiDim<Scalar>(s, TotalDims, pWriteBuffer, TotalNumElements); | ||||
|   } | ||||
|  | ||||
|   template <typename T> | ||||
|   void Writer<T>::scientificFormat(const bool set) | ||||
|   { | ||||
| @@ -395,8 +215,7 @@ namespace Grid { | ||||
|    | ||||
|   template <typename T> | ||||
|   template <typename U> | ||||
|   typename std::enable_if<!std::is_base_of<Serializable, U>::value | ||||
|                        && !EigenIO::is_tensor<U>::value, void>::type | ||||
|   typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type | ||||
|   Reader<T>::read(const std::string &s, U &output) | ||||
|   { | ||||
|     upcast->readDefault(s, output); | ||||
| @@ -432,79 +251,6 @@ namespace Grid { | ||||
|     vecToTensor(output, v); | ||||
|   } | ||||
|  | ||||
|   template <typename T> | ||||
|   template <typename ETensor> | ||||
|   typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type | ||||
|   Reader<T>::read(const std::string &s, ETensor &output) | ||||
|   { | ||||
|     using Index = typename ETensor::Index; | ||||
|     using Container = typename ETensor::Scalar; // NB: could be same as scalar | ||||
|     using Traits = EigenIO::Traits<ETensor>; | ||||
|     using Scalar = typename Traits::scalar_type; // type of the underlying scalar | ||||
|     constexpr unsigned int TensorRank{ETensor::NumIndices}; | ||||
|     constexpr unsigned int ContainerRank{Traits::Rank}; // Only non-zero for containers | ||||
|     constexpr unsigned int TotalRank{TensorRank + ContainerRank}; | ||||
|     using ETDims = std::array<Index, TensorRank>; // Dimensions of the tensor | ||||
|  | ||||
|     // read the (flat) data and dimensionality | ||||
|     std::vector<std::size_t> dimData; | ||||
|     std::vector<Scalar> buf; | ||||
|     upcast->readMultiDim( s, buf, dimData ); | ||||
|     assert(dimData.size() == TotalRank && "EigenIO: Tensor rank mismatch" ); | ||||
|     // Make sure that the number of elements read matches dimensions read | ||||
|     std::size_t NumContainers = 1; | ||||
|     for( auto i = 0 ; i < TensorRank ; i++ ) | ||||
|       NumContainers *= dimData[i]; | ||||
|     // If our scalar object is a Container, make sure it's dimensions match what we read back | ||||
|     std::size_t ElementsPerContainer = 1; | ||||
|     for( auto i = 0 ; i < ContainerRank ; i++ ) { | ||||
|       assert( dimData[TensorRank+i] == Traits::Dimension(i) && "Tensor Container dimensions don't match data" ); | ||||
|       ElementsPerContainer *= dimData[TensorRank+i]; | ||||
|     } | ||||
|     assert( NumContainers * ElementsPerContainer == buf.size() && "EigenIO: Number of elements != product of dimensions" ); | ||||
|     // Now see whether the tensor is the right shape, or can be made to be | ||||
|     const auto & dims = output.dimensions(); | ||||
|     bool bShapeOK = (output.data() != nullptr); | ||||
|     for( auto i = 0; bShapeOK && i < TensorRank ; i++ ) | ||||
|       if( dims[i] != dimData[i] ) | ||||
|         bShapeOK = false; | ||||
|     // Make the tensor the same size as the data read | ||||
|     ETDims MyIndex; | ||||
|     if( !bShapeOK ) { | ||||
|       for( auto i = 0 ; i < TensorRank ; i++ ) | ||||
|         MyIndex[i] = dimData[i]; | ||||
|       Reshape(output, MyIndex); | ||||
|     } | ||||
|     // Copy the data into the tensor | ||||
|     for( auto &d : MyIndex ) d = 0; | ||||
|     const Scalar * pSource = &buf[0]; | ||||
|     for( std::size_t n = 0 ; n < NumContainers ; n++ ) { | ||||
|       Container & c = output( MyIndex ); | ||||
|       copyScalars( c, pSource ); | ||||
|       // Now increment the index | ||||
|       for( int i = TensorRank - 1; i != -1 && ++MyIndex[i] == dims[i]; i-- ) | ||||
|         MyIndex[i] = 0; | ||||
|     } | ||||
|     assert( pSource == &buf[NumContainers * ElementsPerContainer] ); | ||||
|   } | ||||
|  | ||||
|   template <typename T> | ||||
|   template <typename ETensor> | ||||
|   typename std::enable_if<EigenIO::is_tensor_fixed<ETensor>::value, void>::type | ||||
|   Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims ) | ||||
|   { | ||||
|     assert( 0 && "EigenIO: Fixed tensor dimensions can't be changed" ); | ||||
|   } | ||||
|  | ||||
|   template <typename T> | ||||
|   template <typename ETensor> | ||||
|   typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type | ||||
|   Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims ) | ||||
|   { | ||||
|     //t.reshape( dims ); | ||||
|     t.resize( dims ); | ||||
|   } | ||||
|  | ||||
|   template <typename T> | ||||
|   template <typename U> | ||||
|   void Reader<T>::fromString(U &output, const std::string &s) | ||||
| @@ -543,68 +289,6 @@ namespace Grid { | ||||
|     { | ||||
|       return os; | ||||
|     } | ||||
|  | ||||
|     template <typename T1, typename T2> | ||||
|     static inline typename std::enable_if<!EigenIO::is_tensor<T1>::value || !EigenIO::is_tensor<T2>::value, bool>::type | ||||
|     CompareMember(const T1 &lhs, const T2 &rhs) { | ||||
|       return lhs == rhs; | ||||
|     } | ||||
|  | ||||
|     template <typename T1, typename T2> | ||||
|     static inline typename std::enable_if<EigenIO::is_tensor<T1>::value && EigenIO::is_tensor<T2>::value, bool>::type | ||||
|     CompareMember(const T1 &lhs, const T2 &rhs) { | ||||
|       // First check whether dimensions match (Eigen tensor library will assert if they don't match) | ||||
|       bool bReturnValue = (T1::NumIndices == T2::NumIndices); | ||||
|       for( auto i = 0 ; bReturnValue && i < T1::NumIndices ; i++ ) | ||||
|           bReturnValue = ( lhs.dimension(i) == rhs.dimension(i) ); | ||||
|       if( bReturnValue ) { | ||||
|         Eigen::Tensor<bool, 0, T1::Options> bResult = (lhs == rhs).all(); | ||||
|         bReturnValue = bResult(0); | ||||
|       } | ||||
|       return bReturnValue; | ||||
|     } | ||||
|  | ||||
|     template <typename T> | ||||
|     static inline typename std::enable_if<EigenIO::is_tensor<T>::value, bool>::type | ||||
|     CompareMember(const std::vector<T> &lhs, const std::vector<T> &rhs) { | ||||
|       const auto NumElements = lhs.size(); | ||||
|       bool bResult = ( NumElements == rhs.size() ); | ||||
|       for( auto i = 0 ; i < NumElements && bResult ; i++ ) | ||||
|         bResult = CompareMember(lhs[i], rhs[i]); | ||||
|       return bResult; | ||||
|     } | ||||
|  | ||||
|     template <typename T> | ||||
|     static inline typename std::enable_if<!EigenIO::is_tensor<T>::value, void>::type | ||||
|     WriteMember(std::ostream &os, const T &object) { | ||||
|       os << object; | ||||
|     } | ||||
|      | ||||
|     template <typename T> | ||||
|     static inline typename std::enable_if<EigenIO::is_tensor<T>::value, void>::type | ||||
|     WriteMember(std::ostream &os, const T &object) { | ||||
|       using Index = typename T::Index; | ||||
|       const Index NumElements{object.size()}; | ||||
|       assert( NumElements > 0 ); | ||||
|       Index count = 1; | ||||
|       os << "T<"; | ||||
|       for( int i = 0; i < T::NumIndices; i++ ) { | ||||
|         Index dim = object.dimension(i); | ||||
|         count *= dim; | ||||
|         if( i ) | ||||
|           os << ","; | ||||
|         os << dim; | ||||
|       } | ||||
|       assert( count == NumElements && "Number of elements doesn't match tensor dimensions" ); | ||||
|       os << ">{"; | ||||
|       const typename T::Scalar * p = object.data(); | ||||
|       for( Index i = 0; i < count; i++ ) { | ||||
|         if( i ) | ||||
|           os << ","; | ||||
|         os << *p++; | ||||
|       } | ||||
|       os << "}"; | ||||
|     } | ||||
|   }; | ||||
|    | ||||
|   // Generic writer interface ////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -51,8 +51,6 @@ namespace Grid { | ||||
|     template <typename U> | ||||
|     void writeDefault(const std::string &s, const std::vector<U> &x); | ||||
|     void writeDefault(const std::string &s, const char *x); | ||||
|     template <typename U> | ||||
|     void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements); | ||||
|   private: | ||||
|     std::ofstream file_; | ||||
|   }; | ||||
| @@ -68,8 +66,6 @@ namespace Grid { | ||||
|     void readDefault(const std::string &s, U &output); | ||||
|     template <typename U> | ||||
|     void readDefault(const std::string &s, std::vector<U> &output); | ||||
|     template <typename U> | ||||
|     void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim); | ||||
|   private: | ||||
|     std::ifstream file_; | ||||
|   }; | ||||
| @@ -96,27 +92,6 @@ namespace Grid { | ||||
|     } | ||||
|   } | ||||
|    | ||||
|   template <typename U> | ||||
|   void BinaryWriter::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements) | ||||
|   { | ||||
|     uint64_t rank = static_cast<uint64_t>( Dimensions.size() ); | ||||
|     uint64_t tmp = 1; | ||||
|     for( auto i = 0 ; i < rank ; i++ ) | ||||
|       tmp *= Dimensions[i]; | ||||
|     assert( tmp == NumElements && "Dimensions don't match size of data being written" ); | ||||
|     // Total number of elements | ||||
|     write("", tmp); | ||||
|     // Number of dimensions | ||||
|     write("", rank); | ||||
|     // Followed by each dimension | ||||
|     for( auto i = 0 ; i < rank ; i++ ) { | ||||
|       tmp = Dimensions[i]; | ||||
|       write("", tmp); | ||||
|     } | ||||
|     for( auto i = 0; i < NumElements; ++i) | ||||
|       write("", pDataRowMajor[i]); | ||||
|   } | ||||
|  | ||||
|   // Reader template implementation //////////////////////////////////////////// | ||||
|   template <typename U> | ||||
|   void BinaryReader::readDefault(const std::string &s, U &output) | ||||
| @@ -139,30 +114,6 @@ namespace Grid { | ||||
|       read("", output[i]); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template <typename U> | ||||
|   void BinaryReader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim) | ||||
|   { | ||||
|     // Number of elements | ||||
|     uint64_t NumElements; | ||||
|     read("", NumElements); | ||||
|     // Number of dimensions | ||||
|     uint64_t rank; | ||||
|     read("", rank); | ||||
|     // Followed by each dimension | ||||
|     uint64_t count = 1; | ||||
|     dim.resize(rank); | ||||
|     uint64_t tmp; | ||||
|     for( auto i = 0 ; i < rank ; i++ ) { | ||||
|       read("", tmp); | ||||
|       dim[i] = tmp; | ||||
|       count *= tmp; | ||||
|     } | ||||
|     assert( count == NumElements && "Dimensions don't match size of data being read" ); | ||||
|     buf.resize(count); | ||||
|     for( auto i = 0; i < count; ++i) | ||||
|       read("", buf[i]); | ||||
|   } | ||||
| } | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -61,9 +61,9 @@ Group & Hdf5Writer::getGroup(void) | ||||
| } | ||||
|  | ||||
| // Reader implementation /////////////////////////////////////////////////////// | ||||
| Hdf5Reader::Hdf5Reader(const std::string &fileName, const bool readOnly) | ||||
| Hdf5Reader::Hdf5Reader(const std::string &fileName) | ||||
| : fileName_(fileName) | ||||
| , file_(fileName.c_str(), readOnly ? H5F_ACC_RDONLY : H5F_ACC_RDWR) | ||||
| , file_(fileName.c_str(), H5F_ACC_RDWR) | ||||
| { | ||||
|   group_ = file_.openGroup("/"); | ||||
|   readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold", | ||||
|   | ||||
| @@ -3,7 +3,6 @@ | ||||
|  | ||||
| #include <stack> | ||||
| #include <string> | ||||
| #include <list> | ||||
| #include <vector> | ||||
| #include <H5Cpp.h> | ||||
| #include <Grid/tensors/Tensors.h> | ||||
| @@ -39,8 +38,6 @@ namespace Grid | ||||
|     template <typename U> | ||||
|     typename std::enable_if<!element<std::vector<U>>::is_number, void>::type | ||||
|     writeDefault(const std::string &s, const std::vector<U> &x); | ||||
|     template <typename U> | ||||
|     void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements); | ||||
|     H5NS::Group & getGroup(void); | ||||
|   private: | ||||
|     template <typename U> | ||||
| @@ -51,13 +48,13 @@ namespace Grid | ||||
|     std::vector<std::string> path_; | ||||
|     H5NS::H5File             file_; | ||||
|     H5NS::Group              group_; | ||||
|     const unsigned int       dataSetThres_{HDF5_DEF_DATASET_THRES}; | ||||
|     unsigned int             dataSetThres_{HDF5_DEF_DATASET_THRES}; | ||||
|   }; | ||||
|    | ||||
|   class Hdf5Reader: public Reader<Hdf5Reader> | ||||
|   { | ||||
|   public: | ||||
|     Hdf5Reader(const std::string &fileName, const bool readOnly = true); | ||||
|     Hdf5Reader(const std::string &fileName); | ||||
|     virtual ~Hdf5Reader(void) = default; | ||||
|     bool push(const std::string &s); | ||||
|     void pop(void); | ||||
| @@ -69,8 +66,6 @@ namespace Grid | ||||
|     template <typename U> | ||||
|     typename std::enable_if<!element<std::vector<U>>::is_number, void>::type | ||||
|     readDefault(const std::string &s, std::vector<U> &x); | ||||
|     template <typename U> | ||||
|     void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim); | ||||
|     H5NS::Group & getGroup(void); | ||||
|   private: | ||||
|     template <typename U> | ||||
| @@ -106,75 +101,6 @@ namespace Grid | ||||
|   template <> | ||||
|   void Hdf5Writer::writeDefault(const std::string &s, const std::string &x); | ||||
|    | ||||
|   template <typename U> | ||||
|   void Hdf5Writer::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements) | ||||
|   { | ||||
|     // Hdf5 needs the dimensions as hsize_t | ||||
|     const int rank = static_cast<int>(Dimensions.size()); | ||||
|     std::vector<hsize_t> dim(rank); | ||||
|     for(int i = 0; i < rank; i++) | ||||
|       dim[i] = Dimensions[i]; | ||||
|     // write the entire dataset to file | ||||
|     H5NS::DataSpace dataSpace(rank, dim.data()); | ||||
|  | ||||
|     if (NumElements > dataSetThres_) | ||||
|     { | ||||
|       // Make sure 1) each dimension; and 2) chunk size is < 4GB | ||||
|       const hsize_t MaxElements = ( sizeof( U ) == 1 ) ? 0xffffffff : 0x100000000 / sizeof( U ); | ||||
|       hsize_t ElementsPerChunk = 1; | ||||
|       bool bTooBig = false; | ||||
|       for( int i = rank - 1 ; i != -1 ; i-- ) { | ||||
|         auto &d = dim[i]; | ||||
|         if( bTooBig ) | ||||
|           d = 1; // Chunk size is already as big as can be - remaining dimensions = 1 | ||||
|         else { | ||||
|           // If individual dimension too big, reduce by prime factors if possible | ||||
|           while( d > MaxElements && ( d & 1 ) == 0 ) | ||||
|             d >>= 1; | ||||
|           const char ErrorMsg[] = " dimension > 4GB and not divisible by 2^n. " | ||||
|                                   "Hdf5IO chunk size will be inefficient. NB Serialisation is not intended for large datasets - please consider alternatives."; | ||||
|           if( d > MaxElements ) { | ||||
|             std::cout << GridLogWarning << "Individual" << ErrorMsg << std::endl; | ||||
|             hsize_t quotient = d / MaxElements; | ||||
|             if( d % MaxElements ) | ||||
|               quotient++; | ||||
|             d /= quotient; | ||||
|           } | ||||
|           // Now make sure overall size is not too big | ||||
|           hsize_t OverflowCheck = ElementsPerChunk; | ||||
|           ElementsPerChunk *= d; | ||||
|           assert( OverflowCheck == ElementsPerChunk / d && "Product of dimensions overflowed hsize_t" ); | ||||
|           // If product of dimensions too big, reduce by prime factors | ||||
|           while( ElementsPerChunk > MaxElements && ( ElementsPerChunk & 1 ) == 0 ) { | ||||
|             bTooBig = true; | ||||
|             d >>= 1; | ||||
|             ElementsPerChunk >>= 1; | ||||
|           } | ||||
|           if( ElementsPerChunk > MaxElements ) { | ||||
|             std::cout << GridLogWarning << "Product of" << ErrorMsg << std::endl; | ||||
|             hsize_t quotient = ElementsPerChunk / MaxElements; | ||||
|             if( ElementsPerChunk % MaxElements ) | ||||
|               quotient++; | ||||
|             d /= quotient; | ||||
|             ElementsPerChunk /= quotient; | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|       H5NS::DataSet           dataSet; | ||||
|       H5NS::DSetCreatPropList plist; | ||||
|       plist.setChunk(rank, dim.data()); | ||||
|       plist.setFletcher32(); | ||||
|       dataSet = group_.createDataSet(s, Hdf5Type<U>::type(), dataSpace, plist); | ||||
|       dataSet.write(pDataRowMajor, Hdf5Type<U>::type()); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|       H5NS::Attribute attribute; | ||||
|       attribute = group_.createAttribute(s, Hdf5Type<U>::type(), dataSpace); | ||||
|       attribute.write(Hdf5Type<U>::type(), pDataRowMajor); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template <typename U> | ||||
|   typename std::enable_if<element<std::vector<U>>::is_number, void>::type | ||||
|   Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x) | ||||
| @@ -184,11 +110,31 @@ namespace Grid | ||||
|      | ||||
|     // flatten the vector and getting dimensions | ||||
|     Flatten<std::vector<U>> flat(x); | ||||
|     std::vector<size_t> dim; | ||||
|     std::vector<hsize_t> dim; | ||||
|     const auto           &flatx = flat.getFlatVector(); | ||||
|      | ||||
|     for (auto &d: flat.getDim()) | ||||
|     { | ||||
|       dim.push_back(d); | ||||
|     writeMultiDim<Element>(s, dim, &flatx[0], flatx.size()); | ||||
|     } | ||||
|      | ||||
|     // write to file | ||||
|     H5NS::DataSpace dataSpace(dim.size(), dim.data()); | ||||
|      | ||||
|     if (flatx.size() > dataSetThres_) | ||||
|     { | ||||
|       H5NS::DataSet dataSet; | ||||
|        | ||||
|       dataSet = group_.createDataSet(s, Hdf5Type<Element>::type(), dataSpace); | ||||
|       dataSet.write(flatx.data(), Hdf5Type<Element>::type()); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|       H5NS::Attribute attribute; | ||||
|        | ||||
|       attribute = group_.createAttribute(s, Hdf5Type<Element>::type(), dataSpace); | ||||
|       attribute.write(Hdf5Type<Element>::type(), flatx.data()); | ||||
|     } | ||||
|   } | ||||
|    | ||||
|   template <typename U> | ||||
| @@ -226,7 +172,8 @@ namespace Grid | ||||
|   void Hdf5Reader::readDefault(const std::string &s, std::string &x); | ||||
|    | ||||
|   template <typename U> | ||||
|   void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim) | ||||
|   typename std::enable_if<element<std::vector<U>>::is_number, void>::type | ||||
|   Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x) | ||||
|   { | ||||
|     // alias to element type | ||||
|     typedef typename element<std::vector<U>>::type Element; | ||||
| @@ -234,6 +181,7 @@ namespace Grid | ||||
|     // read the dimensions | ||||
|     H5NS::DataSpace       dataSpace; | ||||
|     std::vector<hsize_t>  hdim; | ||||
|     std::vector<size_t>   dim; | ||||
|     hsize_t               size = 1; | ||||
|      | ||||
|     if (group_.attrExists(s)) | ||||
| @@ -253,7 +201,7 @@ namespace Grid | ||||
|     } | ||||
|      | ||||
|     // read the flat vector | ||||
|     buf.resize(size); | ||||
|     std::vector<Element> buf(size); | ||||
|  | ||||
|     if (size > dataSetThres_) | ||||
|     { | ||||
| @@ -269,18 +217,6 @@ namespace Grid | ||||
|       attribute = group_.openAttribute(s); | ||||
|       attribute.read(Hdf5Type<Element>::type(), buf.data()); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template <typename U> | ||||
|   typename std::enable_if<element<std::vector<U>>::is_number, void>::type | ||||
|   Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x) | ||||
|   { | ||||
|     // alias to element type | ||||
|     typedef typename element<std::vector<U>>::type Element; | ||||
|  | ||||
|     std::vector<size_t>   dim; | ||||
|     std::vector<Element>  buf; | ||||
|     readMultiDim( s, buf, dim ); | ||||
|      | ||||
|     // reconstruct the multidimensional vector | ||||
|     Reconstruct<std::vector<U>> r(buf, dim); | ||||
|   | ||||
| @@ -109,8 +109,8 @@ THE SOFTWARE. | ||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| #define GRID_MACRO_MEMBER(A,B)        A B; | ||||
| #define GRID_MACRO_COMP_MEMBER(A,B) result = (result and CompareMember(lhs. B, rhs. B)); | ||||
| #define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" " #B << " = "; WriteMember( os, obj. B ); os << " ; " <<std::endl; | ||||
| #define GRID_MACRO_COMP_MEMBER(A,B) result = (result and (lhs. B == rhs. B)); | ||||
| #define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" " #B << " = " << obj. B << " ; " <<std::endl; | ||||
| #define GRID_MACRO_READ_MEMBER(A,B) Grid::read(RD,#B,obj. B); | ||||
| #define GRID_MACRO_WRITE_MEMBER(A,B) Grid::write(WR,#B,obj. B); | ||||
|  | ||||
|   | ||||
| @@ -51,8 +51,6 @@ namespace Grid | ||||
|     void writeDefault(const std::string &s, const U &x); | ||||
|     template <typename U> | ||||
|     void writeDefault(const std::string &s, const std::vector<U> &x); | ||||
|     template <typename U> | ||||
|     void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements); | ||||
|   private: | ||||
|     void indent(void); | ||||
|   private: | ||||
| @@ -71,8 +69,6 @@ namespace Grid | ||||
|     void readDefault(const std::string &s, U &output); | ||||
|     template <typename U> | ||||
|     void readDefault(const std::string &s, std::vector<U> &output); | ||||
|     template <typename U> | ||||
|     void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim); | ||||
|   private: | ||||
|     void checkIndent(void); | ||||
|   private: | ||||
| @@ -100,17 +96,6 @@ namespace Grid | ||||
|     } | ||||
|   } | ||||
|    | ||||
|   template <typename U> | ||||
|   void TextWriter::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements) | ||||
|   { | ||||
|     uint64_t Rank = Dimensions.size(); | ||||
|     write(s, Rank); | ||||
|     for( uint64_t d : Dimensions ) | ||||
|       write(s, d); | ||||
|     while( NumElements-- ) | ||||
|       write(s, *pDataRowMajor++); | ||||
|   } | ||||
|  | ||||
|   // Reader template implementation //////////////////////////////////////////// | ||||
|   template <typename U> | ||||
|   void TextReader::readDefault(const std::string &s, U &output) | ||||
| @@ -136,23 +121,6 @@ namespace Grid | ||||
|       read("", output[i]); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template <typename U> | ||||
|   void TextReader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim) | ||||
|   { | ||||
|     const char sz[] = ""; | ||||
|     uint64_t Rank; | ||||
|     read(sz, Rank); | ||||
|     dim.resize( Rank ); | ||||
|     size_t NumElements = 1; | ||||
|     for( auto &d : dim ) { | ||||
|       read(sz, d); | ||||
|       NumElements *= d; | ||||
|     } | ||||
|     buf.resize( NumElements ); | ||||
|     for( auto &x : buf ) | ||||
|       read(s, x); | ||||
|   } | ||||
| } | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -1,32 +1,3 @@ | ||||
| /************************************************************************************* | ||||
|   | ||||
|  Grid physics library, www.github.com/paboyle/Grid | ||||
|   | ||||
|  Source file: ./Grid/serialisation/VectorUtils.h | ||||
|   | ||||
|  Copyright (C) 2015 | ||||
|   | ||||
|  Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|   | ||||
|  This program is free software; you can redistribute it and/or modify | ||||
|  it under the terms of the GNU General Public License as published by | ||||
|  the Free Software Foundation; either version 2 of the License, or | ||||
|  (at your option) any later version. | ||||
|   | ||||
|  This program is distributed in the hope that it will be useful, | ||||
|  but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  GNU General Public License for more details. | ||||
|   | ||||
|  You should have received a copy of the GNU General Public License along | ||||
|  with this program; if not, write to the Free Software Foundation, Inc., | ||||
|  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|   | ||||
|  See the full license in the file "LICENSE" in the top level distribution directory | ||||
|  *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_SERIALISATION_VECTORUTILS_H | ||||
| #define GRID_SERIALISATION_VECTORUTILS_H | ||||
|  | ||||
| @@ -82,17 +53,6 @@ namespace Grid { | ||||
|     return os; | ||||
|   } | ||||
|    | ||||
|   // std::vector<std:vector<...>> nested to specified Rank ////////////////////////////////// | ||||
|   template<typename T, unsigned int Rank> | ||||
|   struct NestedStdVector { | ||||
|     typedef typename std::vector<typename NestedStdVector<T, Rank - 1>::type> type; | ||||
|   }; | ||||
|    | ||||
|   template<typename T> | ||||
|   struct NestedStdVector<T,0> { | ||||
|     typedef T type; | ||||
|   }; | ||||
|    | ||||
|   // Grid scalar tensors to nested std::vectors ////////////////////////////////// | ||||
|   template <typename T> | ||||
|   struct TensorToVec | ||||
|   | ||||
| @@ -57,8 +57,6 @@ namespace Grid | ||||
|     void writeDefault(const std::string &s, const U &x); | ||||
|     template <typename U> | ||||
|     void writeDefault(const std::string &s, const std::vector<U> &x); | ||||
|     template <typename U> | ||||
|     void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements); | ||||
|     std::string docString(void); | ||||
|     std::string string(void); | ||||
|   private: | ||||
| @@ -81,8 +79,6 @@ namespace Grid | ||||
|     void readDefault(const std::string &s, U &output); | ||||
|     template <typename U> | ||||
|     void readDefault(const std::string &s, std::vector<U> &output); | ||||
|     template <typename U> | ||||
|     void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim); | ||||
|     void readCurrentSubtree(std::string &s); | ||||
|   private: | ||||
|     void checkParse(const pugi::xml_parse_result &result, const std::string name); | ||||
| @@ -126,41 +122,9 @@ namespace Grid | ||||
|   void XmlWriter::writeDefault(const std::string &s, const std::vector<U> &x) | ||||
|   { | ||||
|     push(s); | ||||
|     for( auto &u : x ) | ||||
|     for (auto &x_i: x) | ||||
|     { | ||||
|       write("elem", u); | ||||
|     } | ||||
|     pop(); | ||||
|   } | ||||
|  | ||||
|   template <typename U> | ||||
|   void XmlWriter::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements) | ||||
|   { | ||||
|     push(s); | ||||
|     size_t count = 1; | ||||
|     const int Rank = static_cast<int>( Dimensions.size() ); | ||||
|     write("rank", Rank ); | ||||
|     std::vector<size_t> MyIndex( Rank ); | ||||
|     for( auto d : Dimensions ) { | ||||
|       write("dim", d); | ||||
|       count *= d; | ||||
|     } | ||||
|     assert( count == NumElements && "XmlIO : element count doesn't match dimensions" ); | ||||
|     static const char sName[] = "tensor"; | ||||
|     for( int i = 0 ; i < Rank ; i++ ) { | ||||
|       MyIndex[i] = 0; | ||||
|       push(sName); | ||||
|     } | ||||
|     while (NumElements--) { | ||||
|       write("elem", *pDataRowMajor++); | ||||
|       int i; | ||||
|       for( i = Rank - 1 ; i != -1 && ++MyIndex[i] == Dimensions[i] ; i-- ) | ||||
|         MyIndex[i] = 0; | ||||
|       int Rollover = Rank - 1 - i; | ||||
|       for( i = 0 ; i < Rollover ; i++ ) | ||||
|         pop(); | ||||
|       for( i = 0 ; NumElements && i < Rollover ; i++ ) | ||||
|         push(sName); | ||||
|       write("elem", x_i); | ||||
|     } | ||||
|     pop(); | ||||
|   } | ||||
| @@ -181,66 +145,25 @@ namespace Grid | ||||
|   template <typename U> | ||||
|   void XmlReader::readDefault(const std::string &s, std::vector<U> &output) | ||||
|   { | ||||
|     std::string    buf; | ||||
|     unsigned int   i = 0; | ||||
|      | ||||
|     if (!push(s)) | ||||
|     { | ||||
|       std::cout << GridLogWarning << "XML: cannot open node '" << s << "'"; | ||||
|       std::cout << std::endl; | ||||
|     } else { | ||||
|       for(unsigned int i = 0; node_.child("elem"); ) | ||||
|  | ||||
|       return;  | ||||
|     } | ||||
|     while (node_.child("elem")) | ||||
|     { | ||||
|       output.resize(i + 1); | ||||
|         read("elem", output[i++]); | ||||
|       read("elem", output[i]); | ||||
|       node_.child("elem").set_name("elem-done"); | ||||
|       i++; | ||||
|     } | ||||
|     pop(); | ||||
|   } | ||||
|   } | ||||
|    | ||||
|   template <typename U> | ||||
|   void XmlReader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim) | ||||
|   { | ||||
|     if (!push(s)) | ||||
|     { | ||||
|       std::cout << GridLogWarning << "XML: cannot open node '" << s << "'"; | ||||
|       std::cout << std::endl; | ||||
|     } else { | ||||
|       static const char sName[] = "tensor"; | ||||
|       static const char sNameDone[] = "tensor-done"; | ||||
|       int Rank; | ||||
|       read("rank", Rank); | ||||
|       dim.resize( Rank ); | ||||
|       size_t NumElements = 1; | ||||
|       for( auto &d : dim ) | ||||
|       { | ||||
|         read("dim", d); | ||||
|         node_.child("dim").set_name("dim-done"); | ||||
|         NumElements *= d; | ||||
|       } | ||||
|       buf.resize( NumElements ); | ||||
|       std::vector<size_t> MyIndex( Rank ); | ||||
|       for( int i = 0 ; i < Rank ; i++ ) { | ||||
|         MyIndex[i] = 0; | ||||
|         push(sName); | ||||
|       } | ||||
|  | ||||
|       for( auto &x : buf ) | ||||
|       { | ||||
|         NumElements--; | ||||
|         read("elem", x); | ||||
|         node_.child("elem").set_name("elem-done"); | ||||
|         int i; | ||||
|         for( i = Rank - 1 ; i != -1 && ++MyIndex[i] == dim[i] ; i-- ) | ||||
|           MyIndex[i] = 0; | ||||
|         int Rollover = Rank - 1 - i; | ||||
|         for( i = 0 ; i < Rollover ; i++ ) { | ||||
|           node_.set_name(sNameDone); | ||||
|           pop(); | ||||
|         } | ||||
|         for( i = 0 ; NumElements && i < Rollover ; i++ ) | ||||
|           push(sName); | ||||
|       } | ||||
|       pop(); | ||||
|     } | ||||
|   } | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -485,6 +485,83 @@ namespace Optimization { | ||||
|   // Some Template specialization | ||||
|  | ||||
|   // Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases | ||||
| #ifndef __INTEL_COMPILER | ||||
| #warning "Slow reduction due to incomplete reduce intrinsics" | ||||
|   //Complex float Reduce | ||||
|   template<> | ||||
|     inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){ | ||||
|     __m512 v1,v2; | ||||
|     v1=Optimization::Permute::Permute0(in); // avx 512; quad complex single | ||||
|     v1= _mm512_add_ps(v1,in); | ||||
|     v2=Optimization::Permute::Permute1(v1);  | ||||
|     v1 = _mm512_add_ps(v1,v2); | ||||
|     v2=Optimization::Permute::Permute2(v1);  | ||||
|     v1 = _mm512_add_ps(v1,v2); | ||||
|     u512f conv; conv.v = v1; | ||||
|     return Grid::ComplexF(conv.f[0],conv.f[1]); | ||||
|   } | ||||
|    | ||||
|   //Real float Reduce | ||||
|   template<> | ||||
|     inline Grid::RealF Reduce<Grid::RealF, __m512>::operator()(__m512 in){ | ||||
|     __m512 v1,v2; | ||||
|     v1 = Optimization::Permute::Permute0(in); // avx 512; octo-double | ||||
|     v1 = _mm512_add_ps(v1,in); | ||||
|     v2 = Optimization::Permute::Permute1(v1);  | ||||
|     v1 = _mm512_add_ps(v1,v2); | ||||
|     v2 = Optimization::Permute::Permute2(v1);  | ||||
|     v1 = _mm512_add_ps(v1,v2); | ||||
|     v2 = Optimization::Permute::Permute3(v1);  | ||||
|     v1 = _mm512_add_ps(v1,v2); | ||||
|     u512f conv; conv.v=v1; | ||||
|     return conv.f[0]; | ||||
|   } | ||||
|    | ||||
|    | ||||
|   //Complex double Reduce | ||||
|   template<> | ||||
|     inline Grid::ComplexD Reduce<Grid::ComplexD, __m512d>::operator()(__m512d in){ | ||||
|     __m512d v1; | ||||
|     v1 = Optimization::Permute::Permute0(in); // sse 128; paired complex single | ||||
|     v1 = _mm512_add_pd(v1,in); | ||||
|     v1 = Optimization::Permute::Permute1(in); // sse 128; paired complex single | ||||
|     v1 = _mm512_add_pd(v1,in); | ||||
|     u512d conv; conv.v = v1; | ||||
|     return Grid::ComplexD(conv.f[0],conv.f[1]); | ||||
|   } | ||||
|    | ||||
|   //Real double Reduce | ||||
|   template<> | ||||
|     inline Grid::RealD Reduce<Grid::RealD, __m512d>::operator()(__m512d in){ | ||||
|     __m512d v1,v2; | ||||
|     v1 = Optimization::Permute::Permute0(in); // avx 512; quad double | ||||
|     v1 = _mm512_add_pd(v1,in); | ||||
|       v2 = Optimization::Permute::Permute1(v1);  | ||||
|       v1 = _mm512_add_pd(v1,v2); | ||||
|       v2 = Optimization::Permute::Permute2(v1);  | ||||
|       v1 = _mm512_add_pd(v1,v2); | ||||
|      u512d conv; conv.v = v1; | ||||
|      return conv.f[0]; | ||||
|   } | ||||
|    | ||||
|   //Integer Reduce | ||||
|   template<> | ||||
|   inline Integer Reduce<Integer, __m512i>::operator()(__m512i in){ | ||||
|     // No full vector reduce, use AVX to add upper and lower halves of register | ||||
|     // and perform AVX reduction. | ||||
|     __m256i v1, v2, v3; | ||||
|     __m128i u1, u2, ret; | ||||
|     v1  = _mm512_castsi512_si256(in);       // upper half | ||||
|     v2  = _mm512_extracti32x8_epi32(in, 1); // lower half | ||||
|     v3  = _mm256_add_epi32(v1, v2); | ||||
|     v1  = _mm256_hadd_epi32(v3, v3); | ||||
|     v2  = _mm256_hadd_epi32(v1, v1); | ||||
|     u1  = _mm256_castsi256_si128(v2);        // upper half | ||||
|     u2  = _mm256_extracti128_si256(v2, 1);  // lower half | ||||
|     ret = _mm_add_epi32(u1, u2); | ||||
|     return _mm_cvtsi128_si32(ret); | ||||
|   } | ||||
| #else | ||||
|   //Complex float Reduce | ||||
|   template<> | ||||
|   inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){ | ||||
| @@ -513,6 +590,8 @@ namespace Optimization { | ||||
|   inline Integer Reduce<Integer, __m512i>::operator()(__m512i in){ | ||||
|     return _mm512_reduce_add_epi32(in); | ||||
|   } | ||||
| #endif | ||||
|    | ||||
|    | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -10,7 +10,6 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Guido Cossu <cossu@iroiro-pc.kek.jp> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: neo <cossu@post.kek.jp> | ||||
| Author: Michael Marshall <michael.marshall@ed.ac.au> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| @@ -90,25 +89,17 @@ template <typename Condition, typename ReturnType> using NotEnableIf = Invoke<st | ||||
| //////////////////////////////////////////////////////// | ||||
| // Check for complexity with type traits | ||||
| template <typename T> struct is_complex : public std::false_type {}; | ||||
| template <> struct is_complex<ComplexD> : public std::true_type {}; | ||||
| template <> struct is_complex<ComplexF> : public std::true_type {}; | ||||
| template <> struct is_complex<std::complex<double> > : public std::true_type {}; | ||||
| template <> struct is_complex<std::complex<float> > : public std::true_type {}; | ||||
|  | ||||
| template<typename T, typename V=void> struct is_real : public std::false_type {}; | ||||
| template<typename T> struct is_real<T, typename std::enable_if<std::is_floating_point<T>::value, | ||||
|   void>::type> : public std::true_type {}; | ||||
|  | ||||
| template<typename T, typename V=void> struct is_integer : public std::false_type {}; | ||||
| template<typename T> struct is_integer<T, typename std::enable_if<std::is_integral<T>::value, | ||||
|   void>::type> : public std::true_type {}; | ||||
|    | ||||
| template <typename T>              using IfReal    = Invoke<std::enable_if<is_real<T>::value, int> >; | ||||
| template <typename T>              using IfReal    = Invoke<std::enable_if<std::is_floating_point<T>::value, int> >; | ||||
| template <typename T>              using IfComplex = Invoke<std::enable_if<is_complex<T>::value, int> >; | ||||
| template <typename T>              using IfInteger = Invoke<std::enable_if<is_integer<T>::value, int> >; | ||||
| template <typename T>              using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value, int> >; | ||||
| template <typename T1,typename T2> using IfSame    = Invoke<std::enable_if<std::is_same<T1,T2>::value, int> >; | ||||
|  | ||||
| template <typename T>              using IfNotReal    = Invoke<std::enable_if<!is_real<T>::value, int> >; | ||||
| template <typename T>              using IfNotReal    = Invoke<std::enable_if<!std::is_floating_point<T>::value, int> >; | ||||
| template <typename T>              using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value, int> >; | ||||
| template <typename T>              using IfNotInteger = Invoke<std::enable_if<!is_integer<T>::value, int> >; | ||||
| template <typename T>              using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value, int> >; | ||||
| template <typename T1,typename T2> using IfNotSame    = Invoke<std::enable_if<!std::is_same<T1,T2>::value, int> >; | ||||
|  | ||||
| //////////////////////////////////////////////////////// | ||||
| @@ -866,10 +857,8 @@ template <typename T> | ||||
| struct is_simd : public std::false_type {}; | ||||
| template <> struct is_simd<vRealF>     : public std::true_type {}; | ||||
| template <> struct is_simd<vRealD>     : public std::true_type {}; | ||||
| template <> struct is_simd<vRealH>     : public std::true_type {}; | ||||
| template <> struct is_simd<vComplexF>  : public std::true_type {}; | ||||
| template <> struct is_simd<vComplexD>  : public std::true_type {}; | ||||
| template <> struct is_simd<vComplexH>  : public std::true_type {}; | ||||
| template <> struct is_simd<vInteger>   : public std::true_type {}; | ||||
|  | ||||
| template <typename T> using IfSimd    = Invoke<std::enable_if<is_simd<T>::value, int> >; | ||||
|   | ||||
| @@ -5,7 +5,6 @@ Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Michael Marshall <michael.marshall@ed.ac.au> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| @@ -43,26 +42,27 @@ namespace Grid { | ||||
| // | ||||
| class GridTensorBase {}; | ||||
|  | ||||
| // Too late to remove these traits from Grid Tensors, so inherit from GridTypeMapper | ||||
| #define GridVector_CopyTraits \ | ||||
|   using element = vtype; \ | ||||
|   using scalar_type     = typename Traits::scalar_type; \ | ||||
|   using vector_type     = typename Traits::vector_type; \ | ||||
|   using vector_typeD    = typename Traits::vector_typeD; \ | ||||
|   using tensor_reduced  = typename Traits::tensor_reduced; \ | ||||
|   using scalar_object   = typename Traits::scalar_object; \ | ||||
|   using Complexified    = typename Traits::Complexified; \ | ||||
|   using Realified       = typename Traits::Realified; \ | ||||
|   using DoublePrecision = typename Traits::DoublePrecision; \ | ||||
|   static constexpr int TensorLevel = Traits::TensorLevel | ||||
|  | ||||
| template <class vtype> | ||||
| class iScalar { | ||||
|  public: | ||||
|   vtype _internal; | ||||
|  | ||||
|   using Traits = GridTypeMapper<iScalar<vtype> >; | ||||
|   GridVector_CopyTraits; | ||||
|   typedef vtype element; | ||||
|   typedef typename GridTypeMapper<vtype>::scalar_type scalar_type; | ||||
|   typedef typename GridTypeMapper<vtype>::vector_type vector_type; | ||||
|   typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD; | ||||
|   typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v; | ||||
|   typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object; | ||||
|   typedef iScalar<tensor_reduced_v> tensor_reduced; | ||||
|   typedef iScalar<recurse_scalar_object> scalar_object; | ||||
|   // substitutes a real or complex version with same tensor structure | ||||
|   typedef iScalar<typename GridTypeMapper<vtype>::Complexified> Complexified; | ||||
|   typedef iScalar<typename GridTypeMapper<vtype>::Realified> Realified; | ||||
|  | ||||
|   // get double precision version | ||||
|   typedef iScalar<typename GridTypeMapper<vtype>::DoublePrecision> DoublePrecision; | ||||
|    | ||||
|   enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 }; | ||||
|  | ||||
|   // Scalar no action | ||||
|   //  template<int Level> using tensor_reduce_level = typename | ||||
| @@ -173,10 +173,7 @@ class iScalar { | ||||
|     return stream; | ||||
|   }; | ||||
|  | ||||
|   strong_inline const scalar_type * begin() const { return reinterpret_cast<const scalar_type *>(&_internal); } | ||||
|   strong_inline       scalar_type * begin()       { return reinterpret_cast<      scalar_type *>(&_internal); } | ||||
|   strong_inline const scalar_type * end()   const { return begin() + Traits::count; } | ||||
|   strong_inline       scalar_type * end()         { return begin() + Traits::count; } | ||||
|  | ||||
| }; | ||||
| /////////////////////////////////////////////////////////// | ||||
| // Allows to turn scalar<scalar<scalar<double>>>> back to double. | ||||
| @@ -197,8 +194,21 @@ class iVector { | ||||
|  public: | ||||
|   vtype _internal[N]; | ||||
|  | ||||
|   using Traits = GridTypeMapper<iVector<vtype, N> >; | ||||
|   GridVector_CopyTraits; | ||||
|   typedef vtype element; | ||||
|   typedef typename GridTypeMapper<vtype>::scalar_type scalar_type; | ||||
|   typedef typename GridTypeMapper<vtype>::vector_type vector_type; | ||||
|   typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD; | ||||
|   typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v; | ||||
|   typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object; | ||||
|   typedef iScalar<tensor_reduced_v> tensor_reduced; | ||||
|   typedef iVector<recurse_scalar_object, N> scalar_object; | ||||
|  | ||||
|   // substitutes a real or complex version with same tensor structure | ||||
|   typedef iVector<typename GridTypeMapper<vtype>::Complexified, N> Complexified; | ||||
|   typedef iVector<typename GridTypeMapper<vtype>::Realified, N> Realified; | ||||
|  | ||||
|   // get double precision version | ||||
|   typedef iVector<typename GridTypeMapper<vtype>::DoublePrecision, N> DoublePrecision; | ||||
|    | ||||
|   template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type | ||||
|                          * = nullptr> | ||||
| @@ -208,6 +218,7 @@ class iVector { | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
|   enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 }; | ||||
|   iVector(const Zero &z) { *this = zero; }; | ||||
|   iVector() = default; | ||||
|   /* | ||||
| @@ -292,11 +303,6 @@ class iVector { | ||||
|   //    strong_inline vtype && operator ()(int i) { | ||||
|   //      return _internal[i]; | ||||
|   //    } | ||||
|  | ||||
|   strong_inline const scalar_type * begin() const { return reinterpret_cast<const scalar_type *>(_internal); } | ||||
|   strong_inline       scalar_type * begin()       { return reinterpret_cast<      scalar_type *>(_internal); } | ||||
|   strong_inline const scalar_type * end()   const { return begin() + Traits::count; } | ||||
|   strong_inline       scalar_type * end()         { return begin() + Traits::count; } | ||||
| }; | ||||
|  | ||||
| template <class vtype, int N> | ||||
| @@ -304,8 +310,25 @@ class iMatrix { | ||||
|  public: | ||||
|   vtype _internal[N][N]; | ||||
|  | ||||
|   using Traits = GridTypeMapper<iMatrix<vtype, N> >; | ||||
|   GridVector_CopyTraits; | ||||
|   typedef vtype element; | ||||
|   typedef typename GridTypeMapper<vtype>::scalar_type scalar_type; | ||||
|   typedef typename GridTypeMapper<vtype>::vector_type vector_type; | ||||
|   typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD; | ||||
|   typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v; | ||||
|   typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object; | ||||
|  | ||||
|   // substitutes a real or complex version with same tensor structure | ||||
|   typedef iMatrix<typename GridTypeMapper<vtype>::Complexified, N> Complexified; | ||||
|   typedef iMatrix<typename GridTypeMapper<vtype>::Realified, N> Realified; | ||||
|  | ||||
|   // get double precision version | ||||
|   typedef iMatrix<typename GridTypeMapper<vtype>::DoublePrecision, N> DoublePrecision; | ||||
|    | ||||
|   // Tensor removal | ||||
|   typedef iScalar<tensor_reduced_v> tensor_reduced; | ||||
|   typedef iMatrix<recurse_scalar_object, N> scalar_object; | ||||
|  | ||||
|   enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 }; | ||||
|  | ||||
|   iMatrix(const Zero &z) { *this = zero; }; | ||||
|   iMatrix() = default; | ||||
| @@ -435,11 +458,6 @@ class iMatrix { | ||||
|   //  strong_inline vtype && operator ()(int i,int j) { | ||||
|   //    return _internal[i][j]; | ||||
|   //  } | ||||
|  | ||||
|   strong_inline const scalar_type * begin() const { return reinterpret_cast<const scalar_type *>(_internal[0]); } | ||||
|   strong_inline       scalar_type * begin()       { return reinterpret_cast<      scalar_type *>(_internal[0]); } | ||||
|   strong_inline const scalar_type * end()   const { return begin() + Traits::count; } | ||||
|   strong_inline       scalar_type * end()         { return begin() + Traits::count; } | ||||
| }; | ||||
|  | ||||
| template <class v> | ||||
| @@ -462,3 +480,6 @@ void vprefetch(const iMatrix<v, N> &vv) { | ||||
| } | ||||
| } | ||||
| #endif | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -5,7 +5,6 @@ | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Christopher Kelly <ckelly@phys.columbia.edu> | ||||
| Author: Michael Marshall <michael.marshall@ed.ac.au> | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
| @@ -27,17 +26,6 @@ Author: Michael Marshall <michael.marshall@ed.ac.au> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   // Forward declarations | ||||
|   template<class T>        class iScalar; | ||||
|   template<class T, int N> class iVector; | ||||
|   template<class T, int N> class iMatrix; | ||||
|  | ||||
|   // These are the Grid tensors | ||||
|   template<typename T>     struct isGridTensor                : public std::false_type { static constexpr bool notvalue = true; }; | ||||
|   template<class T>        struct isGridTensor<iScalar<T>>    : public std::true_type  { static constexpr bool notvalue = false; }; | ||||
|   template<class T, int N> struct isGridTensor<iVector<T, N>> : public std::true_type  { static constexpr bool notvalue = false; }; | ||||
|   template<class T, int N> struct isGridTensor<iMatrix<T, N>> : public std::true_type  { static constexpr bool notvalue = false; }; | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////////////// | ||||
| // Want to recurse: GridTypeMapper<Matrix<vComplexD> >::scalar_type == ComplexD. | ||||
| // Use of a helper class like this allows us to template specialise and "dress" | ||||
| @@ -53,25 +41,24 @@ namespace Grid { | ||||
| // | ||||
| ////////////////////////////////////////////////////////////////////////////////// | ||||
|    | ||||
|   // This saves repeating common properties for supported Grid Scalar types | ||||
|   // TensorLevel    How many nested grid tensors | ||||
|   // Rank           Rank of the grid tensor | ||||
|   // count          Total number of elements, i.e. product of dimensions | ||||
|   // Dimension(dim) Size of dimension dim | ||||
|   struct GridTypeMapper_Base { | ||||
|     static constexpr int TensorLevel = 0; | ||||
|     static constexpr int Rank = 0; | ||||
|     static constexpr std::size_t count = 1; | ||||
|     static constexpr int Dimension(int dim) { return 0; } | ||||
|   template <class T> class GridTypeMapper { | ||||
|   public: | ||||
|     typedef typename T::scalar_type scalar_type; | ||||
|     typedef typename T::vector_type vector_type; | ||||
|     typedef typename T::vector_typeD vector_typeD; | ||||
|     typedef typename T::tensor_reduced tensor_reduced; | ||||
|     typedef typename T::scalar_object scalar_object; | ||||
|     typedef typename T::Complexified Complexified; | ||||
|     typedef typename T::Realified Realified; | ||||
|     typedef typename T::DoublePrecision DoublePrecision; | ||||
|     enum { TensorLevel = T::TensorLevel }; | ||||
|   }; | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////////////// | ||||
| // Recursion stops with these template specialisations | ||||
| ////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|   template<typename T> struct GridTypeMapper {}; | ||||
|  | ||||
|   template<> struct GridTypeMapper<RealF> : public GridTypeMapper_Base { | ||||
|   template<> class GridTypeMapper<RealF> { | ||||
|   public: | ||||
|     typedef RealF scalar_type; | ||||
|     typedef RealF vector_type; | ||||
|     typedef RealD vector_typeD; | ||||
| @@ -80,8 +67,10 @@ namespace Grid { | ||||
|     typedef ComplexF Complexified; | ||||
|     typedef RealF Realified; | ||||
|     typedef RealD DoublePrecision; | ||||
|     enum { TensorLevel = 0 }; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<RealD> : public GridTypeMapper_Base { | ||||
|   template<> class GridTypeMapper<RealD> { | ||||
|   public: | ||||
|     typedef RealD scalar_type; | ||||
|     typedef RealD vector_type; | ||||
|     typedef RealD vector_typeD; | ||||
| @@ -90,8 +79,10 @@ namespace Grid { | ||||
|     typedef ComplexD Complexified; | ||||
|     typedef RealD Realified; | ||||
|     typedef RealD DoublePrecision; | ||||
|     enum { TensorLevel = 0 }; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<ComplexF> : public GridTypeMapper_Base { | ||||
|   template<> class GridTypeMapper<ComplexF> { | ||||
|   public: | ||||
|     typedef ComplexF scalar_type; | ||||
|     typedef ComplexF vector_type; | ||||
|     typedef ComplexD vector_typeD; | ||||
| @@ -100,8 +91,10 @@ namespace Grid { | ||||
|     typedef ComplexF Complexified; | ||||
|     typedef RealF Realified; | ||||
|     typedef ComplexD DoublePrecision; | ||||
|     enum { TensorLevel = 0 }; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<ComplexD> : public GridTypeMapper_Base { | ||||
|   template<> class GridTypeMapper<ComplexD> { | ||||
|   public: | ||||
|     typedef ComplexD scalar_type; | ||||
|     typedef ComplexD vector_type; | ||||
|     typedef ComplexD vector_typeD; | ||||
| @@ -110,8 +103,10 @@ namespace Grid { | ||||
|     typedef ComplexD Complexified; | ||||
|     typedef RealD Realified; | ||||
|     typedef ComplexD DoublePrecision; | ||||
|     enum { TensorLevel = 0 }; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<Integer> : public GridTypeMapper_Base { | ||||
|   template<> class GridTypeMapper<Integer> { | ||||
|   public: | ||||
|     typedef Integer scalar_type; | ||||
|     typedef Integer vector_type; | ||||
|     typedef Integer vector_typeD; | ||||
| @@ -120,9 +115,11 @@ namespace Grid { | ||||
|     typedef void Complexified; | ||||
|     typedef void Realified; | ||||
|     typedef void DoublePrecision; | ||||
|     enum { TensorLevel = 0 }; | ||||
|   }; | ||||
|  | ||||
|   template<> struct GridTypeMapper<vRealF> : public GridTypeMapper_Base { | ||||
|   template<> class GridTypeMapper<vRealF> { | ||||
|   public: | ||||
|     typedef RealF  scalar_type; | ||||
|     typedef vRealF vector_type; | ||||
|     typedef vRealD vector_typeD; | ||||
| @@ -131,8 +128,10 @@ namespace Grid { | ||||
|     typedef vComplexF Complexified; | ||||
|     typedef vRealF Realified; | ||||
|     typedef vRealD DoublePrecision; | ||||
|     enum { TensorLevel = 0 }; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<vRealD> : public GridTypeMapper_Base { | ||||
|   template<> class GridTypeMapper<vRealD> { | ||||
|   public: | ||||
|     typedef RealD  scalar_type; | ||||
|     typedef vRealD vector_type; | ||||
|     typedef vRealD vector_typeD; | ||||
| @@ -141,20 +140,10 @@ namespace Grid { | ||||
|     typedef vComplexD Complexified; | ||||
|     typedef vRealD Realified; | ||||
|     typedef vRealD DoublePrecision; | ||||
|     enum { TensorLevel = 0 }; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<vRealH> : public GridTypeMapper_Base { | ||||
|     // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types | ||||
|     typedef RealF  scalar_type; | ||||
|     typedef vRealH vector_type; | ||||
|     typedef vRealD vector_typeD; | ||||
|     typedef vRealH tensor_reduced; | ||||
|     typedef RealF  scalar_object; | ||||
|     typedef vComplexH Complexified; | ||||
|     typedef vRealH Realified; | ||||
|     typedef vRealD DoublePrecision; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<vComplexH> : public GridTypeMapper_Base { | ||||
|     // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types | ||||
|   template<> class GridTypeMapper<vComplexH> { | ||||
|   public: | ||||
|     typedef ComplexF  scalar_type; | ||||
|     typedef vComplexH vector_type; | ||||
|     typedef vComplexD vector_typeD; | ||||
| @@ -163,8 +152,10 @@ namespace Grid { | ||||
|     typedef vComplexH Complexified; | ||||
|     typedef vRealH Realified; | ||||
|     typedef vComplexD DoublePrecision; | ||||
|     enum { TensorLevel = 0 }; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<vComplexF> : public GridTypeMapper_Base { | ||||
|   template<> class GridTypeMapper<vComplexF> { | ||||
|   public: | ||||
|     typedef ComplexF  scalar_type; | ||||
|     typedef vComplexF vector_type; | ||||
|     typedef vComplexD vector_typeD; | ||||
| @@ -173,8 +164,10 @@ namespace Grid { | ||||
|     typedef vComplexF Complexified; | ||||
|     typedef vRealF Realified; | ||||
|     typedef vComplexD DoublePrecision; | ||||
|     enum { TensorLevel = 0 }; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<vComplexD> : public GridTypeMapper_Base { | ||||
|   template<> class GridTypeMapper<vComplexD> { | ||||
|   public: | ||||
|     typedef ComplexD  scalar_type; | ||||
|     typedef vComplexD vector_type; | ||||
|     typedef vComplexD vector_typeD; | ||||
| @@ -183,8 +176,10 @@ namespace Grid { | ||||
|     typedef vComplexD Complexified; | ||||
|     typedef vRealD Realified; | ||||
|     typedef vComplexD DoublePrecision; | ||||
|     enum { TensorLevel = 0 }; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<vInteger> : public GridTypeMapper_Base { | ||||
|   template<> class GridTypeMapper<vInteger> { | ||||
|   public: | ||||
|     typedef  Integer scalar_type; | ||||
|     typedef vInteger vector_type; | ||||
|     typedef vInteger vector_typeD; | ||||
| @@ -193,52 +188,57 @@ namespace Grid { | ||||
|     typedef void Complexified; | ||||
|     typedef void Realified; | ||||
|     typedef void DoublePrecision; | ||||
|     enum { TensorLevel = 0 }; | ||||
|   }; | ||||
|  | ||||
| #define GridTypeMapper_RepeatedTypes \ | ||||
|   using BaseTraits   = GridTypeMapper<T>; \ | ||||
|   using scalar_type  = typename BaseTraits::scalar_type; \ | ||||
|   using vector_type  = typename BaseTraits::vector_type; \ | ||||
|   using vector_typeD = typename BaseTraits::vector_typeD; \ | ||||
|   static constexpr int TensorLevel = BaseTraits::TensorLevel + 1 | ||||
|  | ||||
|   template<typename T> struct GridTypeMapper<iScalar<T>> { | ||||
|     GridTypeMapper_RepeatedTypes; | ||||
|     using tensor_reduced  = iScalar<typename BaseTraits::tensor_reduced>; | ||||
|     using scalar_object   = iScalar<typename BaseTraits::scalar_object>; | ||||
|     using Complexified    = iScalar<typename BaseTraits::Complexified>; | ||||
|     using Realified       = iScalar<typename BaseTraits::Realified>; | ||||
|     using DoublePrecision = iScalar<typename BaseTraits::DoublePrecision>; | ||||
|     static constexpr int Rank = BaseTraits::Rank + 1; | ||||
|     static constexpr std::size_t count = BaseTraits::count; | ||||
|     static constexpr int Dimension(int dim) { | ||||
|       return ( dim == 0 ) ? 1 : BaseTraits::Dimension(dim - 1); } | ||||
|   // First some of my own traits | ||||
|   template<typename T> struct isGridTensor { | ||||
|     static const bool value = true; | ||||
|     static const bool notvalue = false; | ||||
|   }; | ||||
|  | ||||
|   template<typename T, int N> struct GridTypeMapper<iVector<T, N>> { | ||||
|     GridTypeMapper_RepeatedTypes; | ||||
|     using tensor_reduced  = iScalar<typename BaseTraits::tensor_reduced>; | ||||
|     using scalar_object   = iVector<typename BaseTraits::scalar_object,   N>; | ||||
|     using Complexified    = iVector<typename BaseTraits::Complexified,    N>; | ||||
|     using Realified       = iVector<typename BaseTraits::Realified,       N>; | ||||
|     using DoublePrecision = iVector<typename BaseTraits::DoublePrecision, N>; | ||||
|     static constexpr int Rank = BaseTraits::Rank + 1; | ||||
|     static constexpr std::size_t count = BaseTraits::count * N; | ||||
|     static constexpr int Dimension(int dim) { | ||||
|       return ( dim == 0 ) ? N : BaseTraits::Dimension(dim - 1); } | ||||
|   template<> struct isGridTensor<int > { | ||||
|     static const bool value = false; | ||||
|     static const bool notvalue = true; | ||||
|   }; | ||||
|  | ||||
|   template<typename T, int N> struct GridTypeMapper<iMatrix<T, N>> { | ||||
|     GridTypeMapper_RepeatedTypes; | ||||
|     using tensor_reduced  = iScalar<typename BaseTraits::tensor_reduced>; | ||||
|     using scalar_object   = iMatrix<typename BaseTraits::scalar_object,   N>; | ||||
|     using Complexified    = iMatrix<typename BaseTraits::Complexified,    N>; | ||||
|     using Realified       = iMatrix<typename BaseTraits::Realified,       N>; | ||||
|     using DoublePrecision = iMatrix<typename BaseTraits::DoublePrecision, N>; | ||||
|     static constexpr int Rank = BaseTraits::Rank + 2; | ||||
|     static constexpr std::size_t count = BaseTraits::count * N * N; | ||||
|     static constexpr int Dimension(int dim) { | ||||
|       return ( dim == 0 || dim == 1 ) ? N : BaseTraits::Dimension(dim - 2); } | ||||
|   template<> struct isGridTensor<RealD > { | ||||
|     static const bool value = false; | ||||
|     static const bool notvalue = true; | ||||
|   }; | ||||
|   template<> struct isGridTensor<RealF > { | ||||
|     static const bool value = false; | ||||
|     static const bool notvalue = true; | ||||
|   }; | ||||
|   template<> struct isGridTensor<ComplexD > { | ||||
|     static const bool value = false; | ||||
|     static const bool notvalue = true; | ||||
|   }; | ||||
|   template<> struct isGridTensor<ComplexF > { | ||||
|     static const bool value = false; | ||||
|     static const bool notvalue = true; | ||||
|   }; | ||||
|   template<> struct isGridTensor<Integer > { | ||||
|     static const bool value = false; | ||||
|     static const bool notvalue = true; | ||||
|   }; | ||||
|   template<> struct isGridTensor<vRealD > { | ||||
|     static const bool value = false; | ||||
|     static const bool notvalue = true; | ||||
|   }; | ||||
|   template<> struct isGridTensor<vRealF > { | ||||
|     static const bool value = false; | ||||
|     static const bool notvalue = true; | ||||
|   }; | ||||
|   template<> struct isGridTensor<vComplexD > { | ||||
|     static const bool value = false; | ||||
|     static const bool notvalue = true; | ||||
|   }; | ||||
|   template<> struct isGridTensor<vComplexF > { | ||||
|     static const bool value = false; | ||||
|     static const bool notvalue = true; | ||||
|   }; | ||||
|   template<> struct isGridTensor<vInteger > { | ||||
|     static const bool value = false; | ||||
|     static const bool notvalue = true; | ||||
|   }; | ||||
|  | ||||
|   // Match the index | ||||
| @@ -263,12 +263,19 @@ namespace Grid { | ||||
|     typedef T type; | ||||
|   }; | ||||
|    | ||||
|   //Query whether a tensor or Lattice<Tensor> is SIMD vector or scalar | ||||
|   template<typename T, typename V=void> struct isSIMDvectorized : public std::false_type {}; | ||||
|   template<typename U> struct isSIMDvectorized<U, typename std::enable_if< !std::is_same< | ||||
|     typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type, | ||||
|     typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, void>::type> | ||||
|   : public std::true_type {}; | ||||
|   //Query if a tensor or Lattice<Tensor> is SIMD vector or scalar | ||||
|   template<typename T> | ||||
|   class isSIMDvectorized{ | ||||
|     template<typename U> | ||||
|     static typename std::enable_if< !std::is_same< typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type,    | ||||
|       typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, char>::type test(void *); | ||||
|  | ||||
|     template<typename U> | ||||
|     static double test(...); | ||||
|    | ||||
|   public: | ||||
|     enum {value = sizeof(test<T>(0)) == sizeof(char) }; | ||||
|   }; | ||||
|    | ||||
|   //Get the precision of a Lattice, tensor or scalar type in units of sizeof(float) | ||||
|   template<typename T> | ||||
|   | ||||
| @@ -47,7 +47,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #else | ||||
| #define PARALLEL_FOR_LOOP | ||||
| #define PARALLEL_FOR_LOOP_INTERN | ||||
| #define PARALLEL_FOR_LOOP_REDUCE(op, var) | ||||
| #define PARALLEL_NESTED_LOOP2 | ||||
| #define PARALLEL_NESTED_LOOP5 | ||||
| #define PARALLEL_REGION | ||||
| @@ -59,7 +58,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for | ||||
| #define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for | ||||
| #define parallel_for_nest5 PARALLEL_NESTED_LOOP5 for | ||||
| #define parallel_critical PARALLEL_CRITICAL | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -289,11 +289,6 @@ void Grid_init(int *argc,char ***argv) | ||||
|     std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"<<std::endl; | ||||
|     std::cout << "GNU General Public License for more details."<<std::endl; | ||||
|     printHash(); | ||||
|   #ifdef GRID_BUILD_REF | ||||
|   #define _GRID_BUILD_STR(x) #x | ||||
|   #define GRID_BUILD_STR(x) _GRID_BUILD_STR(x) | ||||
|     std::cout << "Build " << GRID_BUILD_STR(GRID_BUILD_REF) << std::endl; | ||||
|   #endif | ||||
|     std::cout << std::endl; | ||||
|   } | ||||
|  | ||||
|   | ||||
| @@ -28,31 +28,16 @@ | ||||
| extern "C" { | ||||
| #include <openssl/sha.h> | ||||
| } | ||||
| #ifdef USE_IPP | ||||
| #include "ipp.h" | ||||
| #endif | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| class GridChecksum | ||||
| { | ||||
| public: | ||||
|   static inline uint32_t crc32(const void *data, size_t bytes) | ||||
|   static inline uint32_t crc32(void *data,size_t bytes) | ||||
|   { | ||||
|     return ::crc32(0L,(unsigned char *)data,bytes); | ||||
|   } | ||||
|  | ||||
| #ifdef USE_IPP | ||||
|   static inline uint32_t crc32c(const void* data, size_t bytes) | ||||
|   { | ||||
|       uint32_t crc32c = ~(uint32_t)0; | ||||
|       ippsCRC32C_8u(reinterpret_cast<const unsigned char *>(data), bytes, &crc32c); | ||||
|       ippsSwapBytes_32u_I(&crc32c, 1); | ||||
|    | ||||
|       return ~crc32c; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   template <typename T> | ||||
|   static inline std::string sha256_string(const std::vector<T> &hash) | ||||
|   { | ||||
|   | ||||
| @@ -1,6 +0,0 @@ | ||||
| SUBDIRS = .  | ||||
|  | ||||
| include Make.inc | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -1,198 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./tests/Test_hmc_EODWFRatio.cc | ||||
|  | ||||
| Copyright (C) 2015-2016 | ||||
|  | ||||
| Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||
| Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| int main(int argc, char **argv) { | ||||
|   using namespace Grid; | ||||
|   using namespace Grid::QCD; | ||||
|  | ||||
|   Grid_init(&argc, &argv); | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   // here make a routine to print all the relevant information on the run | ||||
|   std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; | ||||
|  | ||||
|    // Typedefs to simplify notation | ||||
|   typedef WilsonImplR FermionImplPolicy; | ||||
|   typedef MobiusFermionR FermionAction; | ||||
|   typedef typename FermionAction::FermionField FermionField; | ||||
|  | ||||
|   typedef Grid::XmlReader       Serialiser; | ||||
|    | ||||
|   //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: | ||||
|   IntegratorParameters MD; | ||||
|   //  typedef GenericHMCRunner<LeapFrog> HMCWrapper;  | ||||
|   //  MD.name    = std::string("Leap Frog"); | ||||
|   //  typedef GenericHMCRunner<ForceGradient> HMCWrapper;  | ||||
|   //  MD.name    = std::string("Force Gradient"); | ||||
|   typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;  | ||||
|   MD.name    = std::string("MinimumNorm2"); | ||||
|   MD.MDsteps = 20; | ||||
|   MD.trajL   = 1.0; | ||||
|    | ||||
|   HMCparameters HMCparams; | ||||
|   HMCparams.StartTrajectory  = 0; | ||||
|   HMCparams.Trajectories     = 200; | ||||
|   HMCparams.NoMetropolisUntil=  20; | ||||
|   // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; | ||||
|   HMCparams.StartingType     =std::string("ColdStart"); | ||||
|   HMCparams.MD = MD; | ||||
|   HMCWrapper TheHMC(HMCparams); | ||||
|  | ||||
|   // Grid from the command line arguments --grid and --mpi | ||||
|   TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition | ||||
|    | ||||
|   CheckpointerParameters CPparams; | ||||
|   CPparams.config_prefix = "ckpoint_EODWF_lat"; | ||||
|   CPparams.rng_prefix    = "ckpoint_EODWF_rng"; | ||||
|   CPparams.saveInterval  = 10; | ||||
|   CPparams.format        = "IEEE64BIG"; | ||||
|   TheHMC.Resources.LoadNerscCheckpointer(CPparams); | ||||
|  | ||||
|   RNGModuleParameters RNGpar; | ||||
|   RNGpar.serial_seeds = "1 2 3 4 5"; | ||||
|   RNGpar.parallel_seeds = "6 7 8 9 10"; | ||||
|   TheHMC.Resources.SetRNGSeeds(RNGpar); | ||||
|  | ||||
|   // Construct observables | ||||
|   // here there is too much indirection  | ||||
|   typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs; | ||||
|   TheHMC.Resources.AddObservable<PlaqObs>(); | ||||
|   ////////////////////////////////////////////// | ||||
|  | ||||
|   const int Ls      = 16; | ||||
|   Real beta         = 2.13; | ||||
|   Real light_mass   = 0.01; | ||||
|   Real strange_mass = 0.04; | ||||
|   Real pv_mass      = 1.0; | ||||
|   RealD M5  = 1.8; | ||||
|   RealD b   = 1.0; // Scale factor two | ||||
|   RealD c   = 0.0; | ||||
|  | ||||
|   OneFlavourRationalParams OFRp; | ||||
|   OFRp.lo       = 1.0e-2; | ||||
|   OFRp.hi       = 64; | ||||
|   OFRp.MaxIter  = 10000; | ||||
|   OFRp.tolerance= 1.0e-10; | ||||
|   OFRp.degree   = 14; | ||||
|   OFRp.precision= 40; | ||||
|  | ||||
|   std::vector<Real> hasenbusch({ 0.1 }); | ||||
|  | ||||
|   auto GridPtr   = TheHMC.Resources.GetCartesian(); | ||||
|   auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); | ||||
|   auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); | ||||
|   auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); | ||||
|  | ||||
|   IwasakiGaugeActionR GaugeAction(beta); | ||||
|  | ||||
|   // temporarily need a gauge field | ||||
|   LatticeGaugeField U(GridPtr); | ||||
|  | ||||
|   // These lines are unecessary if BC are all periodic | ||||
|   std::vector<Complex> boundary = {1,1,1,-1}; | ||||
|   FermionAction::ImplParams Params(boundary); | ||||
|    | ||||
|   double StoppingCondition = 1e-10; | ||||
|   double MaxCGIterations = 30000; | ||||
|   ConjugateGradient<FermionField>  CG(StoppingCondition,MaxCGIterations); | ||||
|  | ||||
|   //////////////////////////////////// | ||||
|   // Collect actions | ||||
|   //////////////////////////////////// | ||||
|   ActionLevel<HMCWrapper::Field> Level1(1); | ||||
|   ActionLevel<HMCWrapper::Field> Level2(4); | ||||
|  | ||||
|   //////////////////////////////////// | ||||
|   // Strange action | ||||
|   //////////////////////////////////// | ||||
|  | ||||
|   //  FermionAction StrangeOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_mass,M5,b,c, Params); | ||||
|   //  DomainWallEOFAFermionR Strange_Op_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5); | ||||
|   //  DomainWallEOFAFermionR Strange_Op_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5); | ||||
|   //  ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L,Strange_Op_R,CG,ofp, false); | ||||
|  | ||||
|   FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); | ||||
|   FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass,  M5,b,c, Params); | ||||
|  | ||||
|   //  OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp); | ||||
|   OneFlavourRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp); | ||||
|   //  TwoFlavourRationalTesterPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion1F(StrangeOp,OFRp); | ||||
|   //  TwoFlavourPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion2F(StrangeOp,CG,CG); | ||||
|   //  Level1.push_back(&StrangePseudoFermion2F); | ||||
|   //  Level1.push_back(&StrangePseudoFermion); | ||||
|  | ||||
|   //////////////////////////////////// | ||||
|   // up down action | ||||
|   //////////////////////////////////// | ||||
|   std::vector<Real> light_den; | ||||
|   std::vector<Real> light_num; | ||||
|  | ||||
|   int n_hasenbusch = hasenbusch.size(); | ||||
|   light_den.push_back(light_mass); | ||||
|   for(int h=0;h<n_hasenbusch;h++){ | ||||
|     light_den.push_back(hasenbusch[h]); | ||||
|     light_num.push_back(hasenbusch[h]); | ||||
|   } | ||||
|   light_num.push_back(pv_mass); | ||||
|  | ||||
|   std::vector<FermionAction *> Numerators; | ||||
|   std::vector<FermionAction *> Denominators; | ||||
|   std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients; | ||||
|  | ||||
|   for(int h=0;h<n_hasenbusch+1;h++){ | ||||
|     std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl; | ||||
|     Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params)); | ||||
|     Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params)); | ||||
|     Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG)); | ||||
|   } | ||||
|  | ||||
|   for(int h=0;h<n_hasenbusch+1;h++){ | ||||
|     Level1.push_back(Quotients[h]); | ||||
|   } | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // Gauge action | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   Level2.push_back(&GaugeAction); | ||||
|   TheHMC.TheAction.push_back(Level1); | ||||
|   TheHMC.TheAction.push_back(Level2); | ||||
|   std::cout << GridLogMessage << " Action complete "<< std::endl; | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // HMC parameters are serialisable | ||||
|  | ||||
|   std::cout << GridLogMessage << " Running the HMC "<< std::endl; | ||||
|   TheHMC.Run();  // no smearing | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } // main | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -1,452 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file:  | ||||
|  | ||||
| Copyright (C) 2015-2016 | ||||
|  | ||||
| Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||
| Author: Guido Cossu | ||||
| Author: David Murphy | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| #ifdef GRID_DEFAULT_PRECISION_DOUBLE | ||||
| #define MIXED_PRECISION | ||||
| #endif | ||||
|  | ||||
| namespace Grid{  | ||||
|   namespace QCD{ | ||||
|  | ||||
|   /* | ||||
|    * Need a plan for gauge field update for mixed precision in HMC                      (2x speed up) | ||||
|    *    -- Store the single prec action operator. | ||||
|    *    -- Clone the gauge field from the operator function argument. | ||||
|    *    -- Build the mixed precision operator dynamically from the passed operator and single prec clone. | ||||
|    */ | ||||
|  | ||||
|   template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF>  | ||||
|   class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> { | ||||
|   public: | ||||
|     typedef typename FermionOperatorD::FermionField FieldD; | ||||
|     typedef typename FermionOperatorF::FermionField FieldF; | ||||
|  | ||||
|     RealD   Tolerance; | ||||
|     RealD   InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed | ||||
|     Integer MaxInnerIterations; | ||||
|     Integer MaxOuterIterations; | ||||
|     GridBase* SinglePrecGrid4; //Grid for single-precision fields | ||||
|     GridBase* SinglePrecGrid5; //Grid for single-precision fields | ||||
|     RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance | ||||
|  | ||||
|     FermionOperatorF &FermOpF; | ||||
|     FermionOperatorD &FermOpD;; | ||||
|     SchurOperatorF &LinOpF; | ||||
|     SchurOperatorD &LinOpD; | ||||
|  | ||||
|     Integer TotalInnerIterations; //Number of inner CG iterations | ||||
|     Integer TotalOuterIterations; //Number of restarts | ||||
|     Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step | ||||
|  | ||||
|     MixedPrecisionConjugateGradientOperatorFunction(RealD tol,  | ||||
| 						    Integer maxinnerit,  | ||||
| 						    Integer maxouterit,  | ||||
| 						    GridBase* _sp_grid4,  | ||||
| 						    GridBase* _sp_grid5,  | ||||
| 						    FermionOperatorF &_FermOpF, | ||||
| 						    FermionOperatorD &_FermOpD, | ||||
| 						    SchurOperatorF   &_LinOpF, | ||||
| 						    SchurOperatorD   &_LinOpD):  | ||||
|       LinOpF(_LinOpF), | ||||
|       LinOpD(_LinOpD), | ||||
|       FermOpF(_FermOpF), | ||||
|       FermOpD(_FermOpD), | ||||
|       Tolerance(tol),  | ||||
|       InnerTolerance(tol),  | ||||
|       MaxInnerIterations(maxinnerit),  | ||||
|       MaxOuterIterations(maxouterit),  | ||||
|       SinglePrecGrid4(_sp_grid4), | ||||
|       SinglePrecGrid5(_sp_grid5), | ||||
|       OuterLoopNormMult(100.)  | ||||
|     {  | ||||
|       /* Debugging instances of objects; references are stored | ||||
|       std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl; | ||||
|       std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl; | ||||
|       std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl; | ||||
|       std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl; | ||||
|       */ | ||||
|     }; | ||||
|  | ||||
|     void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) { | ||||
|  | ||||
|       std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl; | ||||
|  | ||||
|       SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU); | ||||
|        | ||||
|       //      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl; | ||||
|       //      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpD " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl; | ||||
|       // Assumption made in code to extract gauge field | ||||
|       // We could avoid storing LinopD reference alltogether ? | ||||
|       assert(&(SchurOpU->_Mat)==&(LinOpD._Mat)); | ||||
|  | ||||
|       //////////////////////////////////////////////////////////////////////////////////// | ||||
|       // Must snarf a single precision copy of the gauge field in Linop_d argument | ||||
|       //////////////////////////////////////////////////////////////////////////////////// | ||||
|       typedef typename FermionOperatorF::GaugeField GaugeFieldF; | ||||
|       typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF; | ||||
|       typedef typename FermionOperatorD::GaugeField GaugeFieldD; | ||||
|       typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD; | ||||
|  | ||||
|       GridBase * GridPtrF = SinglePrecGrid4; | ||||
|       GridBase * GridPtrD = FermOpD.Umu._grid; | ||||
|       GaugeFieldF     U_f  (GridPtrF); | ||||
|       GaugeLinkFieldF Umu_f(GridPtrF); | ||||
|       //      std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d | ||||
|       //      std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d | ||||
|  | ||||
|       //////////////////////////////////////////////////////////////////////////////////// | ||||
|       // Moving this to a Clone method of fermion operator would allow to duplicate the  | ||||
|       // physics parameters and decrease gauge field copies | ||||
|       //////////////////////////////////////////////////////////////////////////////////// | ||||
|       GaugeLinkFieldD Umu_d(GridPtrD); | ||||
|       for(int mu=0;mu<Nd*2;mu++){  | ||||
| 	Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu); | ||||
| 	precisionChange(Umu_f,Umu_d); | ||||
| 	PokeIndex<LorentzIndex>(FermOpF.Umu, Umu_f, mu); | ||||
|       } | ||||
|       pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); | ||||
|       pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); | ||||
|  | ||||
|       //////////////////////////////////////////////////////////////////////////////////// | ||||
|       // Could test to make sure that LinOpF and LinOpD agree to single prec? | ||||
|       //////////////////////////////////////////////////////////////////////////////////// | ||||
|       /* | ||||
|       GridBase *Fgrid = psi._grid; | ||||
|       FieldD tmp2(Fgrid); | ||||
|       FieldD tmp1(Fgrid); | ||||
|       LinOpU.Op(src,tmp1); | ||||
|       LinOpD.Op(src,tmp2); | ||||
|       std::cout << " Double gauge field "<< norm2(FermOpD.Umu)<<std::endl; | ||||
|       std::cout << " Single gauge field "<< norm2(FermOpF.Umu)<<std::endl; | ||||
|       std::cout << " Test of operators "<<norm2(tmp1)<<std::endl; | ||||
|       std::cout << " Test of operators "<<norm2(tmp2)<<std::endl; | ||||
|       tmp1=tmp1-tmp2; | ||||
|       std::cout << " Test of operators diff "<<norm2(tmp1)<<std::endl; | ||||
|       */ | ||||
|  | ||||
|       //////////////////////////////////////////////////////////////////////////////////// | ||||
|       // Make a mixed precision conjugate gradient | ||||
|       //////////////////////////////////////////////////////////////////////////////////// | ||||
|       MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); | ||||
|       std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl; | ||||
|       MPCG(src,psi); | ||||
|     } | ||||
|   }; | ||||
| }}; | ||||
|  | ||||
| int main(int argc, char **argv) { | ||||
|   using namespace Grid; | ||||
|   using namespace Grid::QCD; | ||||
|  | ||||
|   Grid_init(&argc, &argv); | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   // here make a routine to print all the relevant information on the run | ||||
|   std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; | ||||
|  | ||||
|    // Typedefs to simplify notation | ||||
|   typedef WilsonImplR FermionImplPolicy; | ||||
|   typedef MobiusFermionR FermionAction; | ||||
|   typedef MobiusFermionF FermionActionF; | ||||
|   typedef MobiusEOFAFermionR FermionEOFAAction; | ||||
|   typedef MobiusEOFAFermionF FermionEOFAActionF; | ||||
|   typedef typename FermionAction::FermionField FermionField; | ||||
|   typedef typename FermionActionF::FermionField FermionFieldF; | ||||
|  | ||||
|   typedef Grid::XmlReader       Serialiser; | ||||
|    | ||||
|   //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: | ||||
|   IntegratorParameters MD; | ||||
|   //  typedef GenericHMCRunner<LeapFrog> HMCWrapper;  | ||||
|   //  MD.name    = std::string("Leap Frog"); | ||||
|   typedef GenericHMCRunner<ForceGradient> HMCWrapper;  | ||||
|   MD.name    = std::string("Force Gradient"); | ||||
|   //  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;  | ||||
|   //  MD.name    = std::string("MinimumNorm2"); | ||||
|   MD.MDsteps = 6; | ||||
|   MD.trajL   = 1.0; | ||||
|    | ||||
|   HMCparameters HMCparams; | ||||
|   HMCparams.StartTrajectory  = 590; | ||||
|   HMCparams.Trajectories     = 1000; | ||||
|   HMCparams.NoMetropolisUntil=  0; | ||||
|   //  "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; | ||||
|   //  HMCparams.StartingType     =std::string("ColdStart"); | ||||
|   HMCparams.StartingType     =std::string("CheckpointStart"); | ||||
|   HMCparams.MD = MD; | ||||
|   HMCWrapper TheHMC(HMCparams); | ||||
|  | ||||
|   // Grid from the command line arguments --grid and --mpi | ||||
|   TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition | ||||
|    | ||||
|   CheckpointerParameters CPparams; | ||||
|   CPparams.config_prefix = "ckpoint_EODWF_lat"; | ||||
|   CPparams.rng_prefix    = "ckpoint_EODWF_rng"; | ||||
|   CPparams.saveInterval  = 10; | ||||
|   CPparams.format        = "IEEE64BIG"; | ||||
|   TheHMC.Resources.LoadNerscCheckpointer(CPparams); | ||||
|  | ||||
|   RNGModuleParameters RNGpar; | ||||
|   RNGpar.serial_seeds = "1 2 3 4 5"; | ||||
|   RNGpar.parallel_seeds = "6 7 8 9 10"; | ||||
|   TheHMC.Resources.SetRNGSeeds(RNGpar); | ||||
|  | ||||
|   // Construct observables | ||||
|   // here there is too much indirection  | ||||
|   typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs; | ||||
|   TheHMC.Resources.AddObservable<PlaqObs>(); | ||||
|   ////////////////////////////////////////////// | ||||
|  | ||||
|   const int Ls      = 16; | ||||
|   Real beta         = 2.13; | ||||
|   Real light_mass   = 0.01; | ||||
|   Real strange_mass = 0.04; | ||||
|   Real pv_mass      = 1.0; | ||||
|   RealD M5  = 1.8; | ||||
|   RealD b   = 1.0;  | ||||
|   RealD c   = 0.0; | ||||
|  | ||||
|   std::vector<Real> hasenbusch({ 0.1, 0.3, 0.6 }); | ||||
|  | ||||
|   auto GridPtr   = TheHMC.Resources.GetCartesian(); | ||||
|   auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); | ||||
|   auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); | ||||
|   auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); | ||||
|  | ||||
|   std::vector<int> latt  = GridDefaultLatt(); | ||||
|   std::vector<int> mpi   = GridDefaultMpi(); | ||||
|   std::vector<int> simdF = GridDefaultSimd(Nd,vComplexF::Nsimd()); | ||||
|   std::vector<int> simdD = GridDefaultSimd(Nd,vComplexD::Nsimd()); | ||||
|   auto GridPtrF   = SpaceTimeGrid::makeFourDimGrid(latt,simdF,mpi); | ||||
|   auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF); | ||||
|   auto FGridF     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF); | ||||
|   auto FrbGridF   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF); | ||||
|  | ||||
|   IwasakiGaugeActionR GaugeAction(beta); | ||||
|  | ||||
|   // temporarily need a gauge field | ||||
|   LatticeGaugeField U(GridPtr); | ||||
|   LatticeGaugeFieldF UF(GridPtrF); | ||||
|  | ||||
|   // These lines are unecessary if BC are all periodic | ||||
|   std::vector<Complex> boundary = {1,1,1,-1}; | ||||
|   FermionAction::ImplParams Params(boundary); | ||||
|   FermionActionF::ImplParams ParamsF(boundary); | ||||
|    | ||||
|   double ActionStoppingCondition     = 1e-10; | ||||
|   double DerivativeStoppingCondition = 1e-6; | ||||
|   double MaxCGIterations = 30000; | ||||
|  | ||||
|   //////////////////////////////////// | ||||
|   // Collect actions | ||||
|   //////////////////////////////////// | ||||
|   ActionLevel<HMCWrapper::Field> Level1(1); | ||||
|   ActionLevel<HMCWrapper::Field> Level2(8); | ||||
|  | ||||
|   //////////////////////////////////// | ||||
|   // Strange action | ||||
|   //////////////////////////////////// | ||||
|   typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF; | ||||
|   typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD; | ||||
|   typedef SchurDiagMooeeOperator<FermionEOFAActionF,FermionFieldF> LinearOperatorEOFAF; | ||||
|   typedef SchurDiagMooeeOperator<FermionEOFAAction ,FermionField > LinearOperatorEOFAD; | ||||
|  | ||||
|   typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG; | ||||
|   typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusEOFAFermionD,MobiusEOFAFermionF,LinearOperatorEOFAD,LinearOperatorEOFAF> MxPCG_EOFA; | ||||
|  | ||||
|   // DJM: setup for EOFA ratio (Mobius) | ||||
|   OneFlavourRationalParams OFRp; | ||||
|   OFRp.lo       = 0.1; | ||||
|   OFRp.hi       = 25.0; | ||||
|   OFRp.MaxIter  = 10000; | ||||
|   OFRp.tolerance= 1.0e-9; | ||||
|   OFRp.degree   = 14; | ||||
|   OFRp.precision= 50; | ||||
|  | ||||
|    | ||||
|   MobiusEOFAFermionR Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); | ||||
|   MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); | ||||
|   MobiusEOFAFermionR Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c); | ||||
|   MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c); | ||||
|  | ||||
|   ConjugateGradient<FermionField>      ActionCG(ActionStoppingCondition,MaxCGIterations); | ||||
|   ConjugateGradient<FermionField>  DerivativeCG(DerivativeStoppingCondition,MaxCGIterations); | ||||
| #ifdef MIXED_PRECISION | ||||
|   const int MX_inner = 1000; | ||||
|   // Mixed precision EOFA | ||||
|   LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L); | ||||
|   LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R); | ||||
|   LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF); | ||||
|   LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF); | ||||
|  | ||||
|   MxPCG_EOFA ActionCGL(ActionStoppingCondition, | ||||
| 		       MX_inner, | ||||
| 		       MaxCGIterations, | ||||
| 		       GridPtrF, | ||||
| 		       FrbGridF, | ||||
| 		       Strange_Op_LF,Strange_Op_L, | ||||
| 		       Strange_LinOp_LF,Strange_LinOp_L); | ||||
|  | ||||
|   MxPCG_EOFA DerivativeCGL(DerivativeStoppingCondition, | ||||
| 			   MX_inner, | ||||
| 			   MaxCGIterations, | ||||
| 			   GridPtrF, | ||||
| 			   FrbGridF, | ||||
| 			   Strange_Op_LF,Strange_Op_L, | ||||
| 			   Strange_LinOp_LF,Strange_LinOp_L); | ||||
|    | ||||
|   MxPCG_EOFA ActionCGR(ActionStoppingCondition, | ||||
| 		       MX_inner, | ||||
| 		       MaxCGIterations, | ||||
| 		       GridPtrF, | ||||
| 		       FrbGridF, | ||||
| 		       Strange_Op_RF,Strange_Op_R, | ||||
| 		       Strange_LinOp_RF,Strange_LinOp_R); | ||||
|    | ||||
|   MxPCG_EOFA DerivativeCGR(DerivativeStoppingCondition, | ||||
| 			   MX_inner, | ||||
| 			   MaxCGIterations, | ||||
| 			   GridPtrF, | ||||
| 			   FrbGridF, | ||||
| 			   Strange_Op_RF,Strange_Op_R, | ||||
| 			   Strange_LinOp_RF,Strange_LinOp_R); | ||||
|  | ||||
|   ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>  | ||||
|     EOFA(Strange_Op_L, Strange_Op_R,  | ||||
| 	 ActionCG,  | ||||
| 	 ActionCGL, ActionCGR, | ||||
| 	 DerivativeCGL, DerivativeCGR, | ||||
| 	 OFRp, true); | ||||
| #else | ||||
|   ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>  | ||||
|     EOFA(Strange_Op_L, Strange_Op_R,  | ||||
| 	 ActionCG, | ||||
| 	 ActionCG, ActionCG, | ||||
| 	 DerivativeCG, DerivativeCG, | ||||
| 	 OFRp, true); | ||||
| #endif | ||||
|   Level1.push_back(&EOFA); | ||||
|  | ||||
|   //////////////////////////////////// | ||||
|   // up down action | ||||
|   //////////////////////////////////// | ||||
|   std::vector<Real> light_den; | ||||
|   std::vector<Real> light_num; | ||||
|  | ||||
|   int n_hasenbusch = hasenbusch.size(); | ||||
|   light_den.push_back(light_mass); | ||||
|   for(int h=0;h<n_hasenbusch;h++){ | ||||
|     light_den.push_back(hasenbusch[h]); | ||||
|     light_num.push_back(hasenbusch[h]); | ||||
|   } | ||||
|   light_num.push_back(pv_mass); | ||||
|  | ||||
|   ////////////////////////////////////////////////////////////// | ||||
|   // Forced to replicate the MxPCG and DenominatorsF etc.. because | ||||
|   // there is no convenient way to "Clone" physics params from double op | ||||
|   // into single op for any operator pair. | ||||
|   // Same issue prevents using MxPCG in the Heatbath step | ||||
|   ////////////////////////////////////////////////////////////// | ||||
|   std::vector<FermionAction *> Numerators; | ||||
|   std::vector<FermionAction *> Denominators; | ||||
|   std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients; | ||||
|   std::vector<MxPCG *> ActionMPCG; | ||||
|   std::vector<MxPCG *> MPCG; | ||||
|   std::vector<FermionActionF *> DenominatorsF; | ||||
|   std::vector<LinearOperatorD *> LinOpD; | ||||
|   std::vector<LinearOperatorF *> LinOpF;  | ||||
|  | ||||
|   for(int h=0;h<n_hasenbusch+1;h++){ | ||||
|  | ||||
|     std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl; | ||||
|  | ||||
|     Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params)); | ||||
|     Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params)); | ||||
|  | ||||
| #ifdef MIXED_PRECISION | ||||
|     //////////////////////////////////////////////////////////////////////////// | ||||
|     // Mixed precision CG for 2f force | ||||
|     //////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|     DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsF)); | ||||
|     LinOpD.push_back(new LinearOperatorD(*Denominators[h])); | ||||
|     LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h])); | ||||
|  | ||||
|     MPCG.push_back(new MxPCG(DerivativeStoppingCondition, | ||||
| 			     MX_inner, | ||||
| 			     MaxCGIterations, | ||||
| 			     GridPtrF, | ||||
| 			     FrbGridF, | ||||
| 			     *DenominatorsF[h],*Denominators[h], | ||||
| 			     *LinOpF[h], *LinOpD[h]) ); | ||||
|  | ||||
|     ActionMPCG.push_back(new MxPCG(ActionStoppingCondition, | ||||
| 				   MX_inner, | ||||
| 				   MaxCGIterations, | ||||
| 				   GridPtrF, | ||||
| 				   FrbGridF, | ||||
| 				   *DenominatorsF[h],*Denominators[h], | ||||
| 				   *LinOpF[h], *LinOpD[h]) ); | ||||
|  | ||||
|     // Heatbath not mixed yet. As inverts numerators not so important as raised mass. | ||||
|     Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],ActionCG)); | ||||
| #else | ||||
|     //////////////////////////////////////////////////////////////////////////// | ||||
|     // Standard CG for 2f force | ||||
|     //////////////////////////////////////////////////////////////////////////// | ||||
|     Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG)); | ||||
| #endif | ||||
|  | ||||
|   } | ||||
|  | ||||
|   for(int h=0;h<n_hasenbusch+1;h++){ | ||||
|     Level1.push_back(Quotients[h]); | ||||
|   } | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // Gauge action | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   Level2.push_back(&GaugeAction); | ||||
|   TheHMC.TheAction.push_back(Level1); | ||||
|   TheHMC.TheAction.push_back(Level2); | ||||
|   std::cout << GridLogMessage << " Action complete "<< std::endl; | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // HMC parameters are serialisable | ||||
|  | ||||
|   std::cout << GridLogMessage << " Running the HMC "<< std::endl; | ||||
|   TheHMC.Run();  // no smearing | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } // main | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -1,198 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./tests/Test_hmc_EODWFRatio.cc | ||||
|  | ||||
| Copyright (C) 2015-2016 | ||||
|  | ||||
| Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||
| Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| int main(int argc, char **argv) { | ||||
|   using namespace Grid; | ||||
|   using namespace Grid::QCD; | ||||
|  | ||||
|   Grid_init(&argc, &argv); | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   // here make a routine to print all the relevant information on the run | ||||
|   std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; | ||||
|  | ||||
|    // Typedefs to simplify notation | ||||
|   typedef WilsonImplR FermionImplPolicy; | ||||
|   typedef MobiusFermionR FermionAction; | ||||
|   typedef typename FermionAction::FermionField FermionField; | ||||
|  | ||||
|   typedef Grid::XmlReader       Serialiser; | ||||
|    | ||||
|   //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: | ||||
|   IntegratorParameters MD; | ||||
|   //  typedef GenericHMCRunner<LeapFrog> HMCWrapper;  | ||||
|   //  MD.name    = std::string("Leap Frog"); | ||||
|   //  typedef GenericHMCRunner<ForceGradient> HMCWrapper;  | ||||
|   //  MD.name    = std::string("Force Gradient"); | ||||
|   typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;  | ||||
|   MD.name    = std::string("MinimumNorm2"); | ||||
|   MD.MDsteps = 20; | ||||
|   MD.trajL   = 1.0; | ||||
|    | ||||
|   HMCparameters HMCparams; | ||||
|   HMCparams.StartTrajectory  = 30; | ||||
|   HMCparams.Trajectories     = 200; | ||||
|   HMCparams.NoMetropolisUntil=  0; | ||||
|   // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; | ||||
|   //  HMCparams.StartingType     =std::string("ColdStart"); | ||||
|   HMCparams.StartingType     =std::string("CheckpointStart"); | ||||
|   HMCparams.MD = MD; | ||||
|   HMCWrapper TheHMC(HMCparams); | ||||
|  | ||||
|   // Grid from the command line arguments --grid and --mpi | ||||
|   TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition | ||||
|    | ||||
|   CheckpointerParameters CPparams; | ||||
|   CPparams.config_prefix = "ckpoint_EODWF_lat"; | ||||
|   CPparams.rng_prefix    = "ckpoint_EODWF_rng"; | ||||
|   CPparams.saveInterval  = 10; | ||||
|   CPparams.format        = "IEEE64BIG"; | ||||
|   TheHMC.Resources.LoadNerscCheckpointer(CPparams); | ||||
|  | ||||
|   RNGModuleParameters RNGpar; | ||||
|   RNGpar.serial_seeds = "1 2 3 4 5"; | ||||
|   RNGpar.parallel_seeds = "6 7 8 9 10"; | ||||
|   TheHMC.Resources.SetRNGSeeds(RNGpar); | ||||
|  | ||||
|   // Construct observables | ||||
|   // here there is too much indirection  | ||||
|   typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs; | ||||
|   TheHMC.Resources.AddObservable<PlaqObs>(); | ||||
|   ////////////////////////////////////////////// | ||||
|  | ||||
|   const int Ls      = 16; | ||||
|   Real beta         = 2.13; | ||||
|   Real light_mass   = 0.01; | ||||
|   Real strange_mass = 0.04; | ||||
|   Real pv_mass      = 1.0; | ||||
|   RealD M5  = 1.8; | ||||
|   RealD b   = 1.0;  | ||||
|   RealD c   = 0.0; | ||||
|    | ||||
|   // FIXME: | ||||
|   // Same in MC and MD  | ||||
|   // Need to mix precision too | ||||
|   OneFlavourRationalParams OFRp; | ||||
|   OFRp.lo       = 4.0e-3; | ||||
|   OFRp.hi       = 30.0; | ||||
|   OFRp.MaxIter  = 10000; | ||||
|   OFRp.tolerance= 1.0e-10; | ||||
|   OFRp.degree   = 16; | ||||
|   OFRp.precision= 50; | ||||
|  | ||||
|   std::vector<Real> hasenbusch({ 0.1 }); | ||||
|  | ||||
|   auto GridPtr   = TheHMC.Resources.GetCartesian(); | ||||
|   auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); | ||||
|   auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); | ||||
|   auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); | ||||
|  | ||||
|   IwasakiGaugeActionR GaugeAction(beta); | ||||
|  | ||||
|   // temporarily need a gauge field | ||||
|   LatticeGaugeField U(GridPtr); | ||||
|  | ||||
|   // These lines are unecessary if BC are all periodic | ||||
|   std::vector<Complex> boundary = {1,1,1,-1}; | ||||
|   FermionAction::ImplParams Params(boundary); | ||||
|    | ||||
|   double StoppingCondition = 1e-10; | ||||
|   double MaxCGIterations = 30000; | ||||
|   ConjugateGradient<FermionField>  CG(StoppingCondition,MaxCGIterations); | ||||
|  | ||||
|   //////////////////////////////////// | ||||
|   // Collect actions | ||||
|   //////////////////////////////////// | ||||
|   ActionLevel<HMCWrapper::Field> Level1(1); | ||||
|   ActionLevel<HMCWrapper::Field> Level2(4); | ||||
|  | ||||
|   //////////////////////////////////// | ||||
|   // Strange action | ||||
|   //////////////////////////////////// | ||||
|  | ||||
|   //  FermionAction StrangeOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_mass,M5,b,c, Params); | ||||
|   //  DomainWallEOFAFermionR Strange_Op_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5); | ||||
|   //  DomainWallEOFAFermionR Strange_Op_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5); | ||||
|   //  ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L,Strange_Op_R,CG,ofp, false); | ||||
|  | ||||
|   FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); | ||||
|   FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass,  M5,b,c, Params); | ||||
|  | ||||
|   OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp); | ||||
|   Level1.push_back(&StrangePseudoFermion); | ||||
|  | ||||
|   //////////////////////////////////// | ||||
|   // up down action | ||||
|   //////////////////////////////////// | ||||
|   std::vector<Real> light_den; | ||||
|   std::vector<Real> light_num; | ||||
|  | ||||
|   int n_hasenbusch = hasenbusch.size(); | ||||
|   light_den.push_back(light_mass); | ||||
|   for(int h=0;h<n_hasenbusch;h++){ | ||||
|     light_den.push_back(hasenbusch[h]); | ||||
|     light_num.push_back(hasenbusch[h]); | ||||
|   } | ||||
|   light_num.push_back(pv_mass); | ||||
|  | ||||
|   std::vector<FermionAction *> Numerators; | ||||
|   std::vector<FermionAction *> Denominators; | ||||
|   std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients; | ||||
|  | ||||
|   for(int h=0;h<n_hasenbusch+1;h++){ | ||||
|     std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl; | ||||
|     Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params)); | ||||
|     Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params)); | ||||
|     Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG)); | ||||
|   } | ||||
|  | ||||
|   for(int h=0;h<n_hasenbusch+1;h++){ | ||||
|     Level1.push_back(Quotients[h]); | ||||
|   } | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // Gauge action | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   Level2.push_back(&GaugeAction); | ||||
|   TheHMC.TheAction.push_back(Level1); | ||||
|   TheHMC.TheAction.push_back(Level2); | ||||
|   std::cout << GridLogMessage << " Action complete "<< std::endl; | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // HMC parameters are serialisable | ||||
|  | ||||
|   std::cout << GridLogMessage << " Running the HMC "<< std::endl; | ||||
|   TheHMC.Run();  // no smearing | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } // main | ||||
|  | ||||
|  | ||||
|  | ||||
							
								
								
									
										109
									
								
								HMC/README
									
									
									
									
									
								
							
							
						
						
									
										109
									
								
								HMC/README
									
									
									
									
									
								
							| @@ -1,109 +0,0 @@ | ||||
| ******************************************************************** | ||||
| TODO:  | ||||
| ******************************************************************** | ||||
|  | ||||
| i) Got mixed precision in 2f and EOFA force and action solves. | ||||
|    But need mixed precision in the heatbath solve. Best for Fermop to have a "clone" method, to | ||||
|    reduce the number of solver and action objects. Needed ideally for the EOFA heatbath. | ||||
|    15% perhaps | ||||
|    Combine with 2x trajectory length? | ||||
|  | ||||
| ii) Rational on EOFA HB  -- relax order | ||||
|                          -- Test the approx as per David email | ||||
|  | ||||
| Resume / roll.sh  | ||||
|  | ||||
| ---------------------------------------------------------------- | ||||
|  | ||||
| - 16^3 Currently 10 traj per hour | ||||
|  | ||||
| - EOFA use a different derivative solver from action solver | ||||
| - EOFA fix Davids hack to the SchurRedBlack guessing | ||||
|  | ||||
| *** Reduce precision/tolerance  in EOFA with second CG param.                          (10% speed up) | ||||
| *** Force gradient - reduced precision solve for the gradient                          (4/3x speedup) | ||||
|  | ||||
|  | ||||
| *** Need a plan for gauge field update for mixed precision in HMC                      (2x speed up) | ||||
|     -- Store the single prec action operator. | ||||
|     -- Clone the gauge field from the operator function argument. | ||||
|     -- Build the mixed precision operator dynamically from the passed operator and single prec clone. | ||||
|  | ||||
| *** Mixed precision CG into EOFA portion          | ||||
| *** Further reduce precision in forces to 10^-6 ? | ||||
|  | ||||
| *** Overall: a 3x or so is still possible => 500s -> 160s and 20 traj per hour on 16^3. | ||||
|  | ||||
| - Use mixed precision CG in HMC                            | ||||
| - SchurRedBlack.h: stop use of operator function; use LinearOperator or similar instead. | ||||
| - Or make an OperatorFunction for mixed precision as a wrapper | ||||
|  | ||||
| ******************************************************************** | ||||
| * Signed off 2+1f HMC with Hasenbush and strange RHMC 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish | ||||
| * Signed off 2+1f HMC with Hasenbush and strange EOFA 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish | ||||
| * Wilson plaquette cross checked against CPS and literature GwilsonFnone | ||||
| ******************************************************************** | ||||
|  | ||||
| ******************************************************************** | ||||
| * RHMC: Timesteps & eigenranges matched from previous CPS 16^3 x 32 runs: | ||||
| ******************************************************************** | ||||
|  | ||||
| **** | ||||
| Strange (m=0.04)  has eigenspan  | ||||
| ****  | ||||
| 16^3 done as 1+1+1 with separate PV's.  | ||||
| /dirac1/archive/QCDOC/host/QCDDWF/DWF/2+1f/16nt32/IWASAKI/b2.13/ls16/M1_8/ms0.04/mu0.01/rhmc_multitimescale/evol5/work | ||||
| **** | ||||
| 2+1f 16^3  - [ 4e^-4, 2.42 ]    for strange | ||||
|  | ||||
| **** | ||||
| 24^3 done as 1+1+1 at strange, and single quotient https://arxiv.org/pdf/0804.0473.pdf Eq 83, | ||||
| **** | ||||
| double lambda_low =   4.0000000000000002e-04 <- strange | ||||
| double lambda_low =   1.0000000000000000e-02 <- pauli villars | ||||
| And high = 2.5 | ||||
|  | ||||
| Array bsn_mass[3] = {  | ||||
| double bsn_mass[0] =   1.0000000000000000e+00 | ||||
| double bsn_mass[1] =   1.0000000000000000e+00 | ||||
| double bsn_mass[2] =   1.0000000000000000e+00 | ||||
| } | ||||
| Array frm_mass[3] = {  | ||||
| double frm_mass[0] =   4.0000000000000001e-02 | ||||
| double frm_mass[1] =   4.0000000000000001e-02 | ||||
| double frm_mass[2] =   4.0000000000000001e-02 | ||||
| } | ||||
|  | ||||
| *** | ||||
| 32^3  | ||||
| /dirac1/archive/QCDOC/host/QCDDWF/DWF/2+1f/32nt64/IWASAKI/b2.25/ls16/M1_8/ms0.03/mu0.004/evol6/work | ||||
| *** | ||||
| Similar det scheme | ||||
| double lambda_low =   4.0000000000000002e-04 | ||||
| double lambda_low =   1.0000000000000000e-02 | ||||
|  | ||||
| Array bsn_mass[3] = {  | ||||
| double bsn_mass[0] =   1.0000000000000000e+00 | ||||
| double bsn_mass[1] =   1.0000000000000000e+00 | ||||
| double bsn_mass[2] =   1.0000000000000000e+00 | ||||
| } | ||||
| Array frm_mass[3] = {  | ||||
| double frm_mass[0] =   3.0000000000000002e-02 | ||||
| double frm_mass[1] =   3.0000000000000002e-02 | ||||
| double frm_mass[2] =   3.0000000000000002e-02 | ||||
| } | ||||
|  | ||||
| ******************************************************************** | ||||
| * Grid: Power method bounds check | ||||
| ******************************************************************** | ||||
| - Finding largest eigenvalue approx 25 not 2.5 | ||||
| - Conventions: | ||||
|  | ||||
| Grid MpcDagMpc based on: | ||||
|  | ||||
|    (Moo-Moe Mee^-1 Meo)^dag(Moo-Moe Mee^-1 Meo) | ||||
|  | ||||
| - with  Moo = 5-M5 = 3.2 | ||||
| - CPS use(d) Moo = 1 | ||||
| - Eigenrange in Grid is 3.2^2 rescaled so factor of 10 accounted for | ||||
|  | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/A2AMatrix.hpp | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| @@ -32,19 +32,11 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| #include <Hadrons/Global.hpp> | ||||
| #include <Hadrons/TimerArray.hpp> | ||||
| #include <Grid/Eigen/unsupported/CXX11/Tensor> | ||||
| #ifdef USE_MKL | ||||
| #include "mkl.h" | ||||
| #include "mkl_cblas.h" | ||||
| #endif | ||||
|  | ||||
| #ifndef HADRONS_A2AM_NAME  | ||||
| #define HADRONS_A2AM_NAME "a2aMatrix" | ||||
| #endif | ||||
|  | ||||
| #ifndef HADRONS_A2AM_IO_TYPE | ||||
| #define HADRONS_A2AM_IO_TYPE ComplexF | ||||
| #endif | ||||
|  | ||||
| #define HADRONS_A2AM_PARALLEL_IO | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
| @@ -59,12 +51,6 @@ BEGIN_HADRONS_NAMESPACE | ||||
| template <typename T> | ||||
| using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>; | ||||
|  | ||||
| template <typename T> | ||||
| using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>; | ||||
|  | ||||
| template <typename T> | ||||
| using A2AMatrixTr = Eigen::Matrix<T, -1, -1, Eigen::ColMajor>; | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                      Abstract class for A2A kernels                        * | ||||
|  ******************************************************************************/ | ||||
| @@ -90,15 +76,10 @@ public: | ||||
|     // constructors | ||||
|     A2AMatrixIo(void) = default; | ||||
|     A2AMatrixIo(std::string filename, std::string dataname,  | ||||
|                 const unsigned int nt, const unsigned int ni = 0, | ||||
|                 const unsigned int nj = 0); | ||||
|                 const unsigned int nt, const unsigned int ni, | ||||
|                 const unsigned int nj); | ||||
|     // destructor | ||||
|     ~A2AMatrixIo(void) = default; | ||||
|     // access | ||||
|     unsigned int getNi(void) const; | ||||
|     unsigned int getNj(void) const; | ||||
|     unsigned int getNt(void) const; | ||||
|     size_t       getSize(void) const; | ||||
|     // file allocation | ||||
|     template <typename MetadataType> | ||||
|     void initFile(const MetadataType &d, const unsigned int chunkSize); | ||||
| @@ -107,11 +88,9 @@ public: | ||||
|                    const unsigned int blockSizei, const unsigned int blockSizej); | ||||
|     void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str, | ||||
|                    const unsigned int i, const unsigned int j); | ||||
|     template <template <class> class Vec, typename VecT> | ||||
|     void load(Vec<VecT> &v, double *tRead = nullptr); | ||||
| private: | ||||
|     std::string  filename_{""}, dataname_{""}; | ||||
|     unsigned int nt_{0}, ni_{0}, nj_{0}; | ||||
|     std::string  filename_, dataname_; | ||||
|     unsigned int nt_, ni_, nj_; | ||||
| }; | ||||
|  | ||||
| /****************************************************************************** | ||||
| @@ -157,226 +136,6 @@ private: | ||||
|     std::vector<IoHelper> nodeIo_; | ||||
| }; | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                       A2A matrix contraction kernels                       * | ||||
|  ******************************************************************************/ | ||||
| class A2AContraction | ||||
| { | ||||
| public: | ||||
|     // accTrMul(acc, a, b): acc += tr(a*b) | ||||
|     template <typename C, typename MatLeft, typename MatRight> | ||||
|     static inline void accTrMul(C &acc, const MatLeft &a, const MatRight &b) | ||||
|     { | ||||
|         if ((MatLeft::Options == Eigen::RowMajor) and | ||||
|             (MatRight::Options == Eigen::ColMajor)) | ||||
|         { | ||||
|             parallel_for (unsigned int r = 0; r < a.rows(); ++r) | ||||
|             { | ||||
|                 C tmp; | ||||
| #ifdef USE_MKL | ||||
|                 dotuRow(tmp, r, a, b); | ||||
| #else | ||||
|                 tmp = a.row(r).conjugate().dot(b.col(r)); | ||||
| #endif | ||||
|                 parallel_critical | ||||
|                 { | ||||
|                     acc += tmp; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             parallel_for (unsigned int c = 0; c < a.cols(); ++c) | ||||
|             { | ||||
|                 C tmp; | ||||
| #ifdef USE_MKL  | ||||
|                 dotuCol(tmp, c, a, b); | ||||
| #else | ||||
|                 tmp = a.col(c).conjugate().dot(b.row(c)); | ||||
| #endif | ||||
|                 parallel_critical | ||||
|                 { | ||||
|                     acc += tmp; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     template <typename MatLeft, typename MatRight> | ||||
|     static inline double accTrMulFlops(const MatLeft &a, const MatRight &b) | ||||
|     { | ||||
|         double n = a.rows()*a.cols(); | ||||
|  | ||||
|         return 8.*n; | ||||
|     } | ||||
|  | ||||
|     // mul(res, a, b): res = a*b | ||||
| #ifdef USE_MKL | ||||
|     template <template <class, int...> class Mat, int... Opts> | ||||
|     static inline void mul(Mat<ComplexD, Opts...> &res,  | ||||
|                            const Mat<ComplexD, Opts...> &a,  | ||||
|                            const Mat<ComplexD, Opts...> &b) | ||||
|     { | ||||
|         static const ComplexD one(1., 0.), zero(0., 0.); | ||||
|  | ||||
|         if ((res.rows() != a.rows()) or (res.cols() != b.cols())) | ||||
|         { | ||||
|             res.resize(a.rows(), b.cols()); | ||||
|         } | ||||
|         if (Mat<ComplexD, Opts...>::Options == Eigen::RowMajor) | ||||
|         { | ||||
|             cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(), | ||||
|                         a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero, | ||||
|                         res.data(), res.cols()); | ||||
|         } | ||||
|         else if (Mat<ComplexD, Opts...>::Options == Eigen::ColMajor) | ||||
|         { | ||||
|             cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(), | ||||
|                         a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero, | ||||
|                         res.data(), res.rows()); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     template <template <class, int...> class Mat, int... Opts> | ||||
|     static inline void mul(Mat<ComplexF, Opts...> &res,  | ||||
|                            const Mat<ComplexF, Opts...> &a,  | ||||
|                            const Mat<ComplexF, Opts...> &b) | ||||
|     { | ||||
|         static const ComplexF one(1., 0.), zero(0., 0.); | ||||
|  | ||||
|         if ((res.rows() != a.rows()) or (res.cols() != b.cols())) | ||||
|         { | ||||
|             res.resize(a.rows(), b.cols()); | ||||
|         } | ||||
|         if (Mat<ComplexF, Opts...>::Options == Eigen::RowMajor) | ||||
|         { | ||||
|             cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(), | ||||
|                         a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero, | ||||
|                         res.data(), res.cols()); | ||||
|         } | ||||
|         else if (Mat<ComplexF, Opts...>::Options == Eigen::ColMajor) | ||||
|         { | ||||
|             cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(), | ||||
|                         a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero, | ||||
|                         res.data(), res.rows()); | ||||
|         } | ||||
|     } | ||||
| #else | ||||
|     template <typename Mat> | ||||
|     static inline void mul(Mat &res, const Mat &a, const Mat &b) | ||||
|     { | ||||
|         res = a*b; | ||||
|     } | ||||
| #endif | ||||
|     template <typename Mat> | ||||
|     static inline double mulFlops(const Mat &a, const Mat &b) | ||||
|     { | ||||
|         double nr = a.rows(), nc = a.cols(); | ||||
|  | ||||
|         return nr*nr*(6.*nc + 2.*(nc - 1.)); | ||||
|     } | ||||
| private: | ||||
|     template <typename C, typename MatLeft, typename MatRight> | ||||
|     static inline void makeDotRowPt(C * &aPt, unsigned int &aInc, C * &bPt,  | ||||
|                                     unsigned int &bInc, const unsigned int aRow,  | ||||
|                                     const MatLeft &a, const MatRight &b) | ||||
|     { | ||||
|         if (MatLeft::Options == Eigen::RowMajor) | ||||
|         { | ||||
|             aPt  = a.data() + aRow*a.cols(); | ||||
|             aInc = 1; | ||||
|         } | ||||
|         else if (MatLeft::Options == Eigen::ColMajor) | ||||
|         { | ||||
|             aPt  = a.data() + aRow; | ||||
|             aInc = a.rows(); | ||||
|         } | ||||
|         if (MatRight::Options == Eigen::RowMajor) | ||||
|         { | ||||
|             bPt  = b.data() + aRow; | ||||
|             bInc = b.cols(); | ||||
|         } | ||||
|         else if (MatRight::Options == Eigen::ColMajor) | ||||
|         { | ||||
|             bPt  = b.data() + aRow*b.rows(); | ||||
|             bInc = 1; | ||||
|         } | ||||
|     } | ||||
|  | ||||
| #ifdef USE_MKL | ||||
|     template <typename C, typename MatLeft, typename MatRight> | ||||
|     static inline void makeDotColPt(C * &aPt, unsigned int &aInc, C * &bPt,  | ||||
|                                     unsigned int &bInc, const unsigned int aCol,  | ||||
|                                     const MatLeft &a, const MatRight &b) | ||||
|     { | ||||
|         if (MatLeft::Options == Eigen::RowMajor) | ||||
|         { | ||||
|             aPt  = a.data() + aCol; | ||||
|             aInc = a.cols(); | ||||
|         } | ||||
|         else if (MatLeft::Options == Eigen::ColMajor) | ||||
|         { | ||||
|             aPt  = a.data() + aCol*a.rows(); | ||||
|             aInc = 1; | ||||
|         } | ||||
|         if (MatRight::Options == Eigen::RowMajor) | ||||
|         { | ||||
|             bPt  = b.data() + aCol*b.cols(); | ||||
|             bInc = 1; | ||||
|         } | ||||
|         else if (MatRight::Options == Eigen::ColMajor) | ||||
|         { | ||||
|             bPt  = b.data() + aCol; | ||||
|             bInc = b.rows(); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     template <typename MatLeft, typename MatRight> | ||||
|     static inline void dotuRow(ComplexF &res, const unsigned int aRow, | ||||
|                                const MatLeft &a, const MatRight &b) | ||||
|     { | ||||
|         const ComplexF *aPt, *bPt; | ||||
|         unsigned int   aInc, bInc; | ||||
|  | ||||
|         makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b); | ||||
|         cblas_cdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res); | ||||
|     } | ||||
|  | ||||
|     template <typename MatLeft, typename MatRight> | ||||
|     static inline void dotuCol(ComplexF &res, const unsigned int aCol, | ||||
|                                const MatLeft &a, const MatRight &b) | ||||
|     { | ||||
|         const ComplexF *aPt, *bPt; | ||||
|         unsigned int   aInc, bInc; | ||||
|  | ||||
|         makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b); | ||||
|         cblas_cdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res); | ||||
|     } | ||||
|  | ||||
|     template <typename MatLeft, typename MatRight> | ||||
|     static inline void dotuRow(ComplexD &res, const unsigned int aRow, | ||||
|                                const MatLeft &a, const MatRight &b) | ||||
|     { | ||||
|         const ComplexD *aPt, *bPt; | ||||
|         unsigned int   aInc, bInc; | ||||
|  | ||||
|         makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b); | ||||
|         cblas_zdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res); | ||||
|     } | ||||
|  | ||||
|     template <typename MatLeft, typename MatRight> | ||||
|     static inline void dotuCol(ComplexD &res, const unsigned int aCol, | ||||
|                                const MatLeft &a, const MatRight &b) | ||||
|     { | ||||
|         const ComplexD *aPt, *bPt; | ||||
|         unsigned int   aInc, bInc; | ||||
|  | ||||
|         makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b); | ||||
|         cblas_zdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res); | ||||
|     } | ||||
| #endif | ||||
| }; | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                     A2AMatrixIo template implementation                    * | ||||
|  ******************************************************************************/ | ||||
| @@ -389,31 +148,6 @@ A2AMatrixIo<T>::A2AMatrixIo(std::string filename, std::string dataname, | ||||
| , nt_(nt), ni_(ni), nj_(nj) | ||||
| {} | ||||
|  | ||||
| // access ////////////////////////////////////////////////////////////////////// | ||||
| template <typename T> | ||||
| unsigned int A2AMatrixIo<T>::getNt(void) const | ||||
| { | ||||
|     return nt_; | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| unsigned int A2AMatrixIo<T>::getNi(void) const | ||||
| { | ||||
|     return ni_; | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| unsigned int A2AMatrixIo<T>::getNj(void) const | ||||
| { | ||||
|     return nj_; | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| size_t A2AMatrixIo<T>::getSize(void) const | ||||
| { | ||||
|     return nt_*ni_*nj_*sizeof(T); | ||||
| } | ||||
|  | ||||
| // file allocation ///////////////////////////////////////////////////////////// | ||||
| template <typename T> | ||||
| template <typename MetadataType> | ||||
| @@ -437,12 +171,11 @@ void A2AMatrixIo<T>::initFile(const MetadataType &d, const unsigned int chunkSiz | ||||
|     } | ||||
|  | ||||
|     // create the dataset | ||||
|     Hdf5Reader reader(filename_, false); | ||||
|     Hdf5Reader reader(filename_); | ||||
|  | ||||
|     push(reader, dataname_); | ||||
|     auto &group = reader.getGroup(); | ||||
|     plist.setChunk(chunk.size(), chunk.data()); | ||||
|     plist.setFletcher32(); | ||||
|     dataset = group.createDataSet(HADRONS_A2AM_NAME, Hdf5Type<T>::type(), dataspace, plist); | ||||
| #else | ||||
|     HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library"); | ||||
| @@ -458,7 +191,7 @@ void A2AMatrixIo<T>::saveBlock(const T *data, | ||||
|                                const unsigned int blockSizej) | ||||
| { | ||||
| #ifdef HAVE_HDF5 | ||||
|     Hdf5Reader           reader(filename_, false); | ||||
|     Hdf5Reader           reader(filename_); | ||||
|     std::vector<hsize_t> count = {nt_, blockSizei, blockSizej}, | ||||
|                          offset = {0, static_cast<hsize_t>(i), | ||||
|                                    static_cast<hsize_t>(j)}, | ||||
| @@ -493,80 +226,6 @@ void A2AMatrixIo<T>::saveBlock(const A2AMatrixSet<T> &m, | ||||
|     saveBlock(m.data() + offset, i, j, blockSizei, blockSizej); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| template <template <class> class Vec, typename VecT> | ||||
| void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead) | ||||
| { | ||||
| #ifdef HAVE_HDF5 | ||||
|     Hdf5Reader           reader(filename_); | ||||
|     std::vector<hsize_t> hdim; | ||||
|     H5NS::DataSet        dataset; | ||||
|     H5NS::DataSpace      dataspace; | ||||
|     H5NS::CompType       datatype; | ||||
|      | ||||
|     push(reader, dataname_); | ||||
|     auto &group = reader.getGroup(); | ||||
|     dataset     = group.openDataSet(HADRONS_A2AM_NAME); | ||||
|     datatype    = dataset.getCompType(); | ||||
|     dataspace   = dataset.getSpace(); | ||||
|     hdim.resize(dataspace.getSimpleExtentNdims()); | ||||
|     dataspace.getSimpleExtentDims(hdim.data()); | ||||
|     if ((nt_*ni_*nj_ != 0) and | ||||
|         ((hdim[0] != nt_) or (hdim[1] != ni_) or (hdim[2] != nj_))) | ||||
|     { | ||||
|         HADRONS_ERROR(Size, "all-to-all matrix size mismatch (got " | ||||
|             + std::to_string(hdim[0]) + "x" + std::to_string(hdim[1]) + "x" | ||||
|             + std::to_string(hdim[2]) + ", expected " | ||||
|             + std::to_string(nt_) + "x" + std::to_string(ni_) + "x" | ||||
|             + std::to_string(nj_)); | ||||
|     } | ||||
|     else if (ni_*nj_ == 0) | ||||
|     { | ||||
|         if (hdim[0] != nt_) | ||||
|         { | ||||
|             HADRONS_ERROR(Size, "all-to-all time size mismatch (got " | ||||
|                 + std::to_string(hdim[0]) + ", expected " | ||||
|                 + std::to_string(nt_) + ")"); | ||||
|         } | ||||
|         ni_ = hdim[1]; | ||||
|         nj_ = hdim[2]; | ||||
|     } | ||||
|  | ||||
|     A2AMatrix<T>         buf(ni_, nj_); | ||||
|     std::vector<hsize_t> count    = {1, static_cast<hsize_t>(ni_), | ||||
|                                      static_cast<hsize_t>(nj_)}, | ||||
|                          stride   = {1, 1, 1}, | ||||
|                          block    = {1, 1, 1}, | ||||
|                          memCount = {static_cast<hsize_t>(ni_), | ||||
|                                      static_cast<hsize_t>(nj_)}; | ||||
|     H5NS::DataSpace      memspace(memCount.size(), memCount.data()); | ||||
|  | ||||
|     std::cout << "Loading timeslice"; | ||||
|     std::cout.flush(); | ||||
|     *tRead = 0.; | ||||
|     for (unsigned int tp1 = nt_; tp1 > 0; --tp1) | ||||
|     { | ||||
|         unsigned int         t      = tp1 - 1; | ||||
|         std::vector<hsize_t> offset = {static_cast<hsize_t>(t), 0, 0}; | ||||
|          | ||||
|         if (t % 10 == 0) | ||||
|         { | ||||
|             std::cout << " " << t; | ||||
|             std::cout.flush(); | ||||
|         } | ||||
|         dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(), | ||||
|                                   stride.data(), block.data()); | ||||
|         if (tRead) *tRead -= usecond();     | ||||
|         dataset.read(buf.data(), datatype, memspace, dataspace); | ||||
|         if (tRead) *tRead += usecond(); | ||||
|         v[t] = buf.template cast<VecT>(); | ||||
|     } | ||||
|     std::cout << std::endl; | ||||
| #else | ||||
|     HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library"); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *               A2AMatrixBlockComputation template implementation            * | ||||
|  ******************************************************************************/ | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/A2AVectors.hpp | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
| Author: fionnoh <fionnoh@gmail.com> | ||||
| @@ -36,7 +36,7 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                 Class to generate V & W all-to-all vectors                 * | ||||
|  *               Classes to generate V & W all-to-all vectors                 * | ||||
|  ******************************************************************************/ | ||||
| template <typename FImpl> | ||||
| class A2AVectorsSchurDiagTwo | ||||
| @@ -70,42 +70,6 @@ private: | ||||
|     SchurDiagTwoOperator<FMat, FermionField> op_; | ||||
| }; | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                  Methods for V & W all-to-all vectors I/O                  * | ||||
|  ******************************************************************************/ | ||||
| class A2AVectorsIo | ||||
| { | ||||
| public: | ||||
|     struct Record: Serializable | ||||
|     { | ||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Record, | ||||
|                                         unsigned int, index); | ||||
|         Record(void): index(0) {} | ||||
|     }; | ||||
| public: | ||||
|     template <typename Field> | ||||
|     static void write(const std::string fileStem, std::vector<Field> &vec,  | ||||
|                       const bool multiFile, const int trajectory = -1); | ||||
|     template <typename Field> | ||||
|     static void read(std::vector<Field> &vec, const std::string fileStem, | ||||
|                      const bool multiFile, const int trajectory = -1); | ||||
| private: | ||||
|     static inline std::string vecFilename(const std::string stem, const int traj,  | ||||
|                                           const bool multiFile) | ||||
|     { | ||||
|         std::string t = (traj < 0) ? "" : ("." + std::to_string(traj)); | ||||
|  | ||||
|         if (multiFile) | ||||
|         { | ||||
|             return stem + t; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             return stem + t + ".bin"; | ||||
|         } | ||||
|     } | ||||
| }; | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *               A2AVectorsSchurDiagTwo template implementation               * | ||||
|  ******************************************************************************/ | ||||
| @@ -253,90 +217,6 @@ void A2AVectorsSchurDiagTwo<FImpl>::makeHighModeW5D(FermionField &wout_4d, | ||||
|     } | ||||
| } | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *               all-to-all vectors I/O template implementation               * | ||||
|  ******************************************************************************/ | ||||
| template <typename Field> | ||||
| void A2AVectorsIo::write(const std::string fileStem, std::vector<Field> &vec,  | ||||
|                          const bool multiFile, const int trajectory) | ||||
| { | ||||
|     Record       record; | ||||
|     GridBase     *grid = vec[0]._grid; | ||||
|     ScidacWriter binWriter(grid->IsBoss()); | ||||
|     std::string  filename = vecFilename(fileStem, trajectory, multiFile); | ||||
|  | ||||
|     if (multiFile) | ||||
|     { | ||||
|         std::string fullFilename; | ||||
|  | ||||
|         for (unsigned int i = 0; i < vec.size(); ++i) | ||||
|         { | ||||
|             fullFilename = filename + "/elem" + std::to_string(i) + ".bin"; | ||||
|  | ||||
|             LOG(Message) << "Writing vector " << i << std::endl; | ||||
|             makeFileDir(fullFilename, grid); | ||||
|             binWriter.open(fullFilename); | ||||
|             record.index = i; | ||||
|             binWriter.writeScidacFieldRecord(vec[i], record); | ||||
|             binWriter.close(); | ||||
|         } | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         makeFileDir(filename, grid); | ||||
|         binWriter.open(filename); | ||||
|         for (unsigned int i = 0; i < vec.size(); ++i) | ||||
|         { | ||||
|             LOG(Message) << "Writing vector " << i << std::endl; | ||||
|             record.index = i; | ||||
|             binWriter.writeScidacFieldRecord(vec[i], record); | ||||
|         } | ||||
|         binWriter.close(); | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <typename Field> | ||||
| void A2AVectorsIo::read(std::vector<Field> &vec, const std::string fileStem,  | ||||
|                         const bool multiFile, const int trajectory) | ||||
| { | ||||
|     Record       record; | ||||
|     ScidacReader binReader; | ||||
|     std::string  filename = vecFilename(fileStem, trajectory, multiFile); | ||||
|  | ||||
|     if (multiFile) | ||||
|     { | ||||
|         std::string fullFilename; | ||||
|  | ||||
|         for (unsigned int i = 0; i < vec.size(); ++i) | ||||
|         { | ||||
|             fullFilename = filename + "/elem" + std::to_string(i) + ".bin"; | ||||
|  | ||||
|             LOG(Message) << "Reading vector " << i << std::endl; | ||||
|             binReader.open(fullFilename); | ||||
|             binReader.readScidacFieldRecord(vec[i], record); | ||||
|             binReader.close(); | ||||
|             if (record.index != i) | ||||
|             { | ||||
|                 HADRONS_ERROR(Io, "vector index mismatch"); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         binReader.open(filename); | ||||
|         for (unsigned int i = 0; i < vec.size(); ++i) | ||||
|         { | ||||
|             LOG(Message) << "Reading vector " << i << std::endl; | ||||
|             binReader.readScidacFieldRecord(vec[i], record); | ||||
|             if (record.index != i) | ||||
|             { | ||||
|                 HADRONS_ERROR(Io, "vector index mismatch"); | ||||
|             } | ||||
|         } | ||||
|         binReader.close(); | ||||
|     } | ||||
| } | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // A2A_Vectors_hpp_ | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/Application.cc | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
| @@ -48,9 +48,6 @@ Application::Application(void) | ||||
| { | ||||
|     initLogger(); | ||||
|     auto dim = GridDefaultLatt(), mpi = GridDefaultMpi(), loc(dim); | ||||
|  | ||||
|     if (dim.size()) | ||||
|     { | ||||
|     locVol_ = 1; | ||||
|     for (unsigned int d = 0; d < dim.size(); ++d) | ||||
|     { | ||||
| @@ -73,7 +70,6 @@ Application::Application(void) | ||||
|                  << MACOUT(HADRONS_DEFAULT_LANCZOS_NBASIS) << std::endl; | ||||
|     LOG(Message) << "Schur decomposition     : " << MACOUTS(HADRONS_DEFAULT_SCHUR) << std::endl; | ||||
|     LOG(Message) << std::endl; | ||||
|     } | ||||
| } | ||||
|  | ||||
| Application::Application(const Application::GlobalPar &par) | ||||
| @@ -112,28 +108,10 @@ void Application::run(void) | ||||
|         HADRONS_ERROR(Definition, "run id is empty"); | ||||
|     } | ||||
|     LOG(Message) << "RUN ID '" << getPar().runId << "'" << std::endl; | ||||
|     BinaryIO::latticeWriteMaxRetry = getPar().parallelWriteMaxRetry; | ||||
|     LOG(Message) << "Attempt(s) for resilient parallel I/O: "  | ||||
|                  << BinaryIO::latticeWriteMaxRetry << std::endl; | ||||
|     vm().setRunId(getPar().runId); | ||||
|     vm().printContent(); | ||||
|     env().printContent(); | ||||
|     if (getPar().saveSchedule or getPar().scheduleFile.empty()) | ||||
|     { | ||||
|     schedule(); | ||||
|         if (getPar().saveSchedule) | ||||
|         { | ||||
|             std::string filename; | ||||
|  | ||||
|             filename = (getPar().scheduleFile.empty()) ?  | ||||
|                          "hadrons.sched" : getPar().scheduleFile; | ||||
|             saveSchedule(filename); | ||||
|         } | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         loadSchedule(getPar().scheduleFile); | ||||
|     } | ||||
|     printSchedule(); | ||||
|     if (!getPar().graphFile.empty()) | ||||
|     { | ||||
| @@ -180,13 +158,12 @@ void Application::parseParameterFile(const std::string parameterFileName) | ||||
|     pop(reader); | ||||
| } | ||||
|  | ||||
| void Application::saveParameterFile(const std::string parameterFileName, unsigned int prec) | ||||
| void Application::saveParameterFile(const std::string parameterFileName) | ||||
| { | ||||
|     LOG(Message) << "Saving application to '" << parameterFileName << "'..." << std::endl; | ||||
|     if (env().getGrid()->IsBoss()) | ||||
|     { | ||||
|         XmlWriter          writer(parameterFileName); | ||||
|         writer.setPrecision(prec); | ||||
|         ObjectId           id; | ||||
|         const unsigned int nMod = vm().getNModule(); | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/Application.hpp | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
| @@ -56,11 +56,7 @@ public: | ||||
|                                         TrajRange,                  trajCounter, | ||||
|                                         VirtualMachine::GeneticPar, genetic, | ||||
|                                         std::string,                runId, | ||||
|                                         std::string,                graphFile, | ||||
|                                         std::string,                scheduleFile, | ||||
|                                         bool,                       saveSchedule, | ||||
|                                         int,                        parallelWriteMaxRetry); | ||||
|         GlobalPar(void): parallelWriteMaxRetry{-1} {} | ||||
|                                         std::string,                graphFile); | ||||
|     }; | ||||
| public: | ||||
|     // constructors | ||||
| @@ -81,7 +77,7 @@ public: | ||||
|     void run(void); | ||||
|     // XML parameter file I/O | ||||
|     void parseParameterFile(const std::string parameterFileName); | ||||
|     void saveParameterFile(const std::string parameterFileName, unsigned int prec=15); | ||||
|     void saveParameterFile(const std::string parameterFileName); | ||||
|     // schedule computation | ||||
|     void schedule(void); | ||||
|     void saveSchedule(const std::string filename); | ||||
|   | ||||
| @@ -4,11 +4,9 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/DilutedNoise.hpp | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
| Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk> | ||||
| Author: Vera Guelpers <vmg1n14@soton.ac.uk> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| @@ -78,22 +76,6 @@ private: | ||||
|     unsigned int nt_; | ||||
| }; | ||||
|  | ||||
| template <typename FImpl> | ||||
| class FullVolumeSpinColorDiagonalNoise: public DilutedNoise<FImpl> | ||||
| { | ||||
| public: | ||||
|     typedef typename FImpl::FermionField FermionField; | ||||
| public: | ||||
|     // constructor/destructor | ||||
|     FullVolumeSpinColorDiagonalNoise(GridCartesian *g, unsigned int n_src); | ||||
|     virtual ~FullVolumeSpinColorDiagonalNoise(void) = default; | ||||
|     // generate noise | ||||
|     virtual void generateNoise(GridParallelRNG &rng); | ||||
| private: | ||||
|     unsigned int nSrc_; | ||||
| }; | ||||
|  | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                    DilutedNoise template implementation                    * | ||||
|  ******************************************************************************/ | ||||
| @@ -204,47 +186,6 @@ void TimeDilutedSpinColorDiagonalNoise<FImpl>::generateNoise(GridParallelRNG &rn | ||||
|     } | ||||
| } | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *        FullVolumeSpinColorDiagonalNoise template implementation           * | ||||
|  ******************************************************************************/ | ||||
| template <typename FImpl> | ||||
| FullVolumeSpinColorDiagonalNoise<FImpl>:: | ||||
| FullVolumeSpinColorDiagonalNoise(GridCartesian *g, unsigned int nSrc) | ||||
| : DilutedNoise<FImpl>(g, nSrc*Ns*FImpl::Dimension), nSrc_(nSrc) | ||||
| {} | ||||
|  | ||||
| template <typename FImpl> | ||||
| void FullVolumeSpinColorDiagonalNoise<FImpl>::generateNoise(GridParallelRNG &rng) | ||||
| { | ||||
|     typedef decltype(peekColour((*this)[0], 0)) SpinField; | ||||
|  | ||||
|     auto                       &noise = *this; | ||||
|     auto                       g      = this->getGrid(); | ||||
|     auto                       nd     = g->GlobalDimensions().size(); | ||||
|     auto                       nc     = FImpl::Dimension; | ||||
|     Complex                    shift(1., 1.); | ||||
|     LatticeComplex             eta(g); | ||||
|     SpinField                  etas(g); | ||||
|     unsigned int               i = 0; | ||||
|  | ||||
|     bernoulli(rng, eta); | ||||
|     eta = (2.*eta - shift)*(1./::sqrt(2.)); | ||||
|     for (unsigned int n = 0; n < nSrc_; ++n) | ||||
|     { | ||||
|         for (unsigned int s = 0; s < Ns; ++s) | ||||
|         { | ||||
|             etas = zero; | ||||
|             pokeSpin(etas, eta, s); | ||||
|             for (unsigned int c = 0; c < nc; ++c) | ||||
|             { | ||||
|                 noise[i] = zero; | ||||
|                 pokeColour(noise[i], etas, c); | ||||
|                 i++; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_DilutedNoise_hpp_ | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/DiskVector.hpp | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
| @@ -29,7 +29,6 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| #define Hadrons_DiskVector_hpp_ | ||||
|  | ||||
| #include <Hadrons/Global.hpp> | ||||
| #include <Hadrons/A2AMatrix.hpp> | ||||
| #include <deque> | ||||
| #include <sys/stat.h> | ||||
| #include <ftw.h> | ||||
| @@ -60,18 +59,14 @@ public: | ||||
|         : master_(master), cmaster_(master), i_(i) {} | ||||
|  | ||||
|         // operator=: somebody is trying to store a vector element | ||||
|         // write to cache and tag as modified | ||||
|         // write to disk and cache | ||||
|         T &operator=(const T &obj) const | ||||
|         { | ||||
|             auto &cache    = *master_.cachePtr_; | ||||
|             auto &modified = *master_.modifiedPtr_; | ||||
|             auto &index    = *master_.indexPtr_; | ||||
|  | ||||
|             DV_DEBUG_MSG(&master_, "writing to " << i_); | ||||
|             master_.cacheInsert(i_, obj); | ||||
|             modified[index.at(i_)] = true; | ||||
|             master_.save(master_.filename(i_), obj); | ||||
|              | ||||
|             return cache[index.at(i_)]; | ||||
|             return master_.cachePtr_->at(i_); | ||||
|         } | ||||
|  | ||||
|         // implicit cast to const object reference and redirection | ||||
| @@ -88,7 +83,6 @@ public: | ||||
| public: | ||||
|     DiskVectorBase(const std::string dirname, const unsigned int size = 0, | ||||
|                    const unsigned int cacheSize = 1, const bool clean = true); | ||||
|     DiskVectorBase(DiskVectorBase<T> &&v) = default; | ||||
|     virtual ~DiskVectorBase(void); | ||||
|     const T & operator[](const unsigned int i) const; | ||||
|     RwAccessHelper operator[](const unsigned int i); | ||||
| @@ -109,10 +103,7 @@ private: | ||||
|     bool                                       clean_; | ||||
|     // using pointers to allow modifications when class is const | ||||
|     // semantic: const means data unmodified, but cache modification allowed | ||||
|     std::unique_ptr<std::vector<T>>                       cachePtr_; | ||||
|     std::unique_ptr<std::vector<bool>>                    modifiedPtr_; | ||||
|     std::unique_ptr<std::map<unsigned int, unsigned int>> indexPtr_; | ||||
|     std::unique_ptr<std::stack<unsigned int>>             freePtr_; | ||||
|     std::unique_ptr<std::map<unsigned int, T>> cachePtr_; | ||||
|     std::unique_ptr<std::deque<unsigned int>>  loadsPtr_;                 | ||||
| }; | ||||
|  | ||||
| @@ -144,7 +135,7 @@ private: | ||||
|  *                      Specialisation for Eigen matrices                     * | ||||
|  ******************************************************************************/ | ||||
| template <typename T> | ||||
| using EigenDiskVectorMat = A2AMatrix<T>; | ||||
| using EigenDiskVectorMat = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>; | ||||
|  | ||||
| template <typename T> | ||||
| class EigenDiskVector: public DiskVectorBase<EigenDiskVectorMat<T>> | ||||
| @@ -162,30 +153,23 @@ private: | ||||
|     virtual void load(EigenDiskVectorMat<T> &obj, const std::string filename) const | ||||
|     { | ||||
|         std::ifstream              f(filename, std::ios::binary); | ||||
|         uint32_t      crc, check; | ||||
|         std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH); | ||||
|         Eigen::Index               nRow, nCol; | ||||
|         size_t                     matSize; | ||||
|         double        tRead, tHash; | ||||
|         double                     t; | ||||
|  | ||||
|         f.read(reinterpret_cast<char *>(&crc), sizeof(crc)); | ||||
|         f.read(reinterpret_cast<char *>(&nRow), sizeof(nRow)); | ||||
|         f.read(reinterpret_cast<char *>(&nCol), sizeof(nCol)); | ||||
|         f.read(reinterpret_cast<char *>(hash.data()), hash.size()*sizeof(unsigned char)); | ||||
|         f.read(reinterpret_cast<char *>(&nRow), sizeof(Eigen::Index)); | ||||
|         f.read(reinterpret_cast<char *>(&nCol), sizeof(Eigen::Index)); | ||||
|         obj.resize(nRow, nCol); | ||||
|         matSize = nRow*nCol*sizeof(T); | ||||
|         tRead  = -usecond(); | ||||
|         t  = -usecond(); | ||||
|         f.read(reinterpret_cast<char *>(obj.data()), matSize); | ||||
|         tRead += usecond(); | ||||
|         tHash  = -usecond(); | ||||
| #ifdef USE_IPP | ||||
|         check  = GridChecksum::crc32c(obj.data(), matSize); | ||||
| #else | ||||
|         check  = GridChecksum::crc32(obj.data(), matSize); | ||||
| #endif | ||||
|         tHash += usecond(); | ||||
|         DV_DEBUG_MSG(this, "Eigen read " << tRead/1.0e6 << " sec " << matSize/tRead*1.0e6/1024/1024 << " MB/s"); | ||||
|         DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << check << std::dec  | ||||
|                      << " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s"); | ||||
|         if (crc != check) | ||||
|         t += usecond(); | ||||
|         DV_DEBUG_MSG(this, "Eigen read " << matSize/t*1.0e6/1024/1024 << " MB/s"); | ||||
|         auto check = GridChecksum::sha256(obj.data(), matSize); | ||||
|         DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(check)); | ||||
|         if (hash != check) | ||||
|         { | ||||
|             HADRONS_ERROR(Io, "checksum failed") | ||||
|         } | ||||
| @@ -194,30 +178,23 @@ private: | ||||
|     virtual void save(const std::string filename, const EigenDiskVectorMat<T> &obj) const | ||||
|     { | ||||
|         std::ofstream              f(filename, std::ios::binary); | ||||
|         uint32_t      crc; | ||||
|         std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH); | ||||
|         Eigen::Index               nRow, nCol; | ||||
|         size_t                     matSize; | ||||
|         double        tWrite, tHash; | ||||
|         double                     t; | ||||
|          | ||||
|         nRow    = obj.rows(); | ||||
|         nCol    = obj.cols(); | ||||
|         matSize = nRow*nCol*sizeof(T); | ||||
|         tHash   = -usecond(); | ||||
| #ifdef USE_IPP | ||||
|         crc     = GridChecksum::crc32c(obj.data(), matSize); | ||||
| #else | ||||
|         crc     = GridChecksum::crc32(obj.data(), matSize); | ||||
| #endif | ||||
|         tHash  += usecond(); | ||||
|         f.write(reinterpret_cast<char *>(&crc), sizeof(crc)); | ||||
|         f.write(reinterpret_cast<char *>(&nRow), sizeof(nRow)); | ||||
|         f.write(reinterpret_cast<char *>(&nCol), sizeof(nCol)); | ||||
|         tWrite = -usecond(); | ||||
|         hash    = GridChecksum::sha256(obj.data(), matSize); | ||||
|         DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(hash)); | ||||
|         f.write(reinterpret_cast<char *>(hash.data()), hash.size()*sizeof(unsigned char)); | ||||
|         f.write(reinterpret_cast<char *>(&nRow), sizeof(Eigen::Index)); | ||||
|         f.write(reinterpret_cast<char *>(&nCol), sizeof(Eigen::Index)); | ||||
|         t  = -usecond(); | ||||
|         f.write(reinterpret_cast<const char *>(obj.data()), matSize); | ||||
|         tWrite += usecond(); | ||||
|         DV_DEBUG_MSG(this, "Eigen write " << tWrite/1.0e6 << " sec " << matSize/tWrite*1.0e6/1024/1024 << " MB/s"); | ||||
|         DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << crc << std::dec | ||||
|                      << " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s"); | ||||
|         t += usecond(); | ||||
|         DV_DEBUG_MSG(this, "Eigen write " << matSize/t*1.0e6/1024/1024 << " MB/s"); | ||||
|     } | ||||
| }; | ||||
|  | ||||
| @@ -230,10 +207,7 @@ DiskVectorBase<T>::DiskVectorBase(const std::string dirname, | ||||
|                                   const unsigned int cacheSize, | ||||
|                                   const bool clean) | ||||
| : dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean) | ||||
| , cachePtr_(new std::vector<T>(size)) | ||||
| , modifiedPtr_(new std::vector<bool>(size, false)) | ||||
| , indexPtr_(new std::map<unsigned int, unsigned int>()) | ||||
| , freePtr_(new std::stack<unsigned int>) | ||||
| , cachePtr_(new std::map<unsigned int, T>()) | ||||
| , loadsPtr_(new std::deque<unsigned int>()) | ||||
| { | ||||
|     struct stat s; | ||||
| @@ -243,10 +217,6 @@ DiskVectorBase<T>::DiskVectorBase(const std::string dirname, | ||||
|         HADRONS_ERROR(Io, "directory '" + dirname + "' already exists") | ||||
|     } | ||||
|     mkdir(dirname); | ||||
|     for (unsigned int i = 0; i < cacheSize_; ++i) | ||||
|     { | ||||
|         freePtr_->push(i); | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| @@ -262,8 +232,6 @@ template <typename T> | ||||
| const T & DiskVectorBase<T>::operator[](const unsigned int i) const | ||||
| { | ||||
|     auto &cache  = *cachePtr_; | ||||
|     auto &index   = *indexPtr_; | ||||
|     auto &freeInd = *freePtr_; | ||||
|     auto &loads  = *loadsPtr_; | ||||
|  | ||||
|     DV_DEBUG_MSG(this, "accessing " << i << " (RO)"); | ||||
| @@ -273,7 +241,7 @@ const T & DiskVectorBase<T>::operator[](const unsigned int i) const | ||||
|         HADRONS_ERROR(Size, "index out of range"); | ||||
|     } | ||||
|     const_cast<double &>(access_)++; | ||||
|     if (index.find(i) == index.end()) | ||||
|     if (cache.find(i) == cache.end()) | ||||
|     { | ||||
|         // cache miss | ||||
|         DV_DEBUG_MSG(this, "cache miss"); | ||||
| @@ -300,7 +268,7 @@ const T & DiskVectorBase<T>::operator[](const unsigned int i) const | ||||
|     DV_DEBUG_MSG(this, "in cache: " << msg); | ||||
| #endif | ||||
|  | ||||
|     return cache[index.at(i)]; | ||||
|     return cache.at(i); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| @@ -339,23 +307,12 @@ template <typename T> | ||||
| void DiskVectorBase<T>::evict(void) const | ||||
| { | ||||
|     auto &cache = *cachePtr_; | ||||
|     auto &modified = *modifiedPtr_; | ||||
|     auto &index    = *indexPtr_; | ||||
|     auto &freeInd  = *freePtr_; | ||||
|     auto &loads = *loadsPtr_; | ||||
|  | ||||
|     if (index.size() >= cacheSize_) | ||||
|     if (cache.size() >= cacheSize_) | ||||
|     { | ||||
|         unsigned int i = loads.front(); | ||||
|          | ||||
|         DV_DEBUG_MSG(this, "evicting " << i); | ||||
|         if (modified[index.at(i)]) | ||||
|         { | ||||
|             DV_DEBUG_MSG(this, "element " << i << " modified, saving to disk"); | ||||
|             save(filename(i), cache[index.at(i)]); | ||||
|         } | ||||
|         freeInd.push(index.at(i)); | ||||
|         index.erase(i); | ||||
|         DV_DEBUG_MSG(this, "evicting " << loads.front()); | ||||
|         cache.erase(loads.front()); | ||||
|         loads.pop_front(); | ||||
|     } | ||||
| } | ||||
| @@ -364,57 +321,29 @@ template <typename T> | ||||
| void DiskVectorBase<T>::fetch(const unsigned int i) const | ||||
| { | ||||
|     auto &cache = *cachePtr_; | ||||
|     auto &modified = *modifiedPtr_; | ||||
|     auto &index    = *indexPtr_; | ||||
|     auto &freeInd  = *freePtr_; | ||||
|     auto &loads = *loadsPtr_; | ||||
|  | ||||
|     struct stat s; | ||||
|  | ||||
|     DV_DEBUG_MSG(this, "loading " << i << " from disk"); | ||||
|  | ||||
|     evict(); | ||||
|      | ||||
|     if(stat(filename(i).c_str(), &s) != 0) | ||||
|     { | ||||
|         HADRONS_ERROR(Io, "disk vector element " + std::to_string(i) + " uninitialised"); | ||||
|     } | ||||
|     index[i] = freeInd.top(); | ||||
|     freeInd.pop(); | ||||
|     load(cache[index.at(i)], filename(i)); | ||||
|     load(cache[i], filename(i)); | ||||
|     loads.push_back(i); | ||||
|     modified[index.at(i)] = false; | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const | ||||
| { | ||||
|     auto &cache = *cachePtr_; | ||||
|     auto &modified = *modifiedPtr_; | ||||
|     auto &index    = *indexPtr_; | ||||
|     auto &freeInd  = *freePtr_; | ||||
|     auto &loads = *loadsPtr_; | ||||
|  | ||||
|     // cache miss, evict and store | ||||
|     if (index.find(i) == index.end()) | ||||
|     { | ||||
|     evict(); | ||||
|         index[i] = freeInd.top(); | ||||
|         freeInd.pop(); | ||||
|         cache[index.at(i)] = obj; | ||||
|     cache[i] = obj; | ||||
|     loads.push_back(i); | ||||
|         modified[index.at(i)] = false; | ||||
|     } | ||||
|     // cache hit, modify current value | ||||
|     else | ||||
|     { | ||||
|         auto pos = std::find(loads.begin(), loads.end(), i); | ||||
|          | ||||
|         cache[index.at(i)]    = obj; | ||||
|         modified[index.at(i)] = true; | ||||
|         loads.erase(pos); | ||||
|         loads.push_back(i); | ||||
|     } | ||||
|  | ||||
| #ifdef DV_DEBUG | ||||
|     std::string msg; | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/EigenPack.hpp | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
| @@ -309,8 +309,6 @@ class CoarseEigenPack: public EigenPack<FineF, FineFIo> | ||||
| { | ||||
| public: | ||||
|     typedef CoarseF CoarseField;          | ||||
|     typedef CoarseFIo CoarseFieldIo; | ||||
| public:       | ||||
|     std::vector<CoarseF> evecCoarse; | ||||
|     std::vector<RealD>   evalCoarse; | ||||
| public: | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/Environment.cc | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
| @@ -45,11 +45,13 @@ Environment::Environment(void) | ||||
| { | ||||
|     dim_         = GridDefaultLatt(); | ||||
|     nd_          = dim_.size(); | ||||
|     createGrid<vComplex>(1); | ||||
|     vol_ = 1.; | ||||
|     for (auto d: dim_) | ||||
|     { | ||||
|         vol_ *= d; | ||||
|     } | ||||
|     rng4d_.reset(new GridParallelRNG(getGrid())); | ||||
| } | ||||
|  | ||||
| // grids /////////////////////////////////////////////////////////////////////// | ||||
| @@ -74,13 +76,8 @@ double Environment::getVolume(void) const | ||||
| } | ||||
|  | ||||
| // random number generator ///////////////////////////////////////////////////// | ||||
| GridParallelRNG * Environment::get4dRng(void) | ||||
| GridParallelRNG * Environment::get4dRng(void) const | ||||
| { | ||||
|     if (rng4d_ == nullptr) | ||||
|     { | ||||
|         rng4d_.reset(new GridParallelRNG(getGrid())); | ||||
|     } | ||||
|  | ||||
|     return rng4d_.get(); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/Environment.hpp | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
| @@ -113,7 +113,7 @@ public: | ||||
|     unsigned int            getNd(void) const; | ||||
|     double                  getVolume(void) const; | ||||
|     // random number generator | ||||
|     GridParallelRNG *       get4dRng(void); | ||||
|     GridParallelRNG *       get4dRng(void) const; | ||||
|     // general memory management | ||||
|     void                    addObject(const std::string name, | ||||
|                                       const int moduleAddress = -1); | ||||
| @@ -182,7 +182,7 @@ private: | ||||
|     std::map<CoarseGridKey, GridPt>     gridCoarse5d_; | ||||
|     unsigned int                        nd_; | ||||
|     // random number generator | ||||
|     RngPt                               rng4d_{nullptr}; | ||||
|     RngPt                               rng4d_; | ||||
|     // object store | ||||
|     std::vector<ObjInfo>                object_; | ||||
|     std::map<std::string, unsigned int> objectAddress_; | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/Exceptions.cc | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/Exceptions.hpp | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/Factory.hpp | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/GeneticScheduler.hpp | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: Hadrons/Global.cc | ||||
|  | ||||
| Copyright (C) 2015-2019 | ||||
| Copyright (C) 2015-2018 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
| @@ -166,13 +166,7 @@ std::string Hadrons::dirname(const std::string &s) | ||||
|  | ||||
| void Hadrons::makeFileDir(const std::string filename, GridBase *g) | ||||
| { | ||||
|     bool doIt = true; | ||||
|  | ||||
|     if (g) | ||||
|     { | ||||
|         doIt = g->IsBoss(); | ||||
|     } | ||||
|     if (doIt) | ||||
|     if (g->IsBoss()) | ||||
|     { | ||||
|         std::string dir    = dirname(filename); | ||||
|         int         status = mkdir(dir); | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user