diff --git a/Grid/algorithms/LinearOperator.h b/Grid/algorithms/LinearOperator.h index b86863b8..a1be48f4 100644 --- a/Grid/algorithms/LinearOperator.h +++ b/Grid/algorithms/LinearOperator.h @@ -178,7 +178,7 @@ namespace Grid { ////////////////////////////////////////////////////////// template - class SchurOperatorBase : public LinearOperatorBase { + class SchurOperatorBase : public LinearOperatorBase { public: virtual RealD Mpc (const Field &in, Field &out) =0; virtual RealD MpcDag (const Field &in, Field &out) =0; @@ -211,10 +211,9 @@ namespace Grid { } }; template - class SchurDiagMooeeOperator : public SchurOperatorBase { - protected: - Matrix &_Mat; + class SchurDiagMooeeOperator : public SchurOperatorBase { public: + Matrix &_Mat; SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; virtual RealD Mpc (const Field &in, Field &out) { Field tmp(in._grid); diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h index c8daa995..c1717e2a 100644 --- a/Grid/algorithms/iterative/ConjugateGradient.h +++ b/Grid/algorithms/iterative/ConjugateGradient.h @@ -89,6 +89,8 @@ class ConjugateGradient : public OperatorFunction { // Check if guess is really REALLY good :) if (cp <= rsq) { + std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl; + IterationsToComplete = 0; return; } @@ -104,7 +106,7 @@ class ConjugateGradient : public OperatorFunction { SolverTimer.Start(); int k; - for (k = 1; k <= MaxIterations*1000; k++) { + for (k = 1; k <= MaxIterations; k++) { c = cp; MatrixTimer.Start(); @@ -165,8 +167,7 @@ class ConjugateGradient : public OperatorFunction { return; } } - std::cout << GridLogMessage << "ConjugateGradient did NOT converge" - << std::endl; + std::cout << GridLogMessage << "ConjugateGradient did NOT converge "< namespace Grid { + //Mixed precision restarted defect correction CG - template::value == 2, int>::type = 0,typename std::enable_if< getPrecision::value == 1, int>::type = 0> + template::value == 2, int>::type = 0, + typename std::enable_if< getPrecision::value == 1, int>::type = 0> class MixedPrecisionConjugateGradient : public LinearFunction { public: RealD Tolerance; @@ -50,7 +53,12 @@ namespace Grid { //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess LinearFunction *guesser; - MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase &_Linop_f, LinearOperatorBase &_Linop_d) : + MixedPrecisionConjugateGradient(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid, + LinearOperatorBase &_Linop_f, + LinearOperatorBase &_Linop_d) : Linop_f(_Linop_f), Linop_d(_Linop_d), Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), OuterLoopNormMult(100.), guesser(NULL){ }; @@ -149,6 +157,8 @@ namespace Grid { } }; + + } #endif diff --git a/Grid/algorithms/iterative/Deflation.h b/Grid/algorithms/iterative/Deflation.h index ceba1c09..8df67b3c 100644 --- a/Grid/algorithms/iterative/Deflation.h +++ b/Grid/algorithms/iterative/Deflation.h @@ -35,7 +35,11 @@ class ZeroGuesser: public LinearFunction { public: virtual void operator()(const Field &src, Field &guess) { guess = zero; }; }; - +template +class DoNothingGuesser: public LinearFunction { +public: + virtual void operator()(const Field &src, Field &guess) { }; +}; template class SourceGuesser: public LinearFunction { public: diff --git a/Grid/algorithms/iterative/SchurRedBlack.h b/Grid/algorithms/iterative/SchurRedBlack.h index 6c1e1f1c..9f63e8c0 100644 --- a/Grid/algorithms/iterative/SchurRedBlack.h +++ b/Grid/algorithms/iterative/SchurRedBlack.h @@ -261,7 +261,7 @@ namespace Grid { } ///////////////////////////////////////////////////////////// - // Override in derived. Not virtual as template methods + // Override in derived. ///////////////////////////////////////////////////////////// virtual void RedBlackSource (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) =0; virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) =0; diff --git a/Grid/communicator/SharedMemory.h b/Grid/communicator/SharedMemory.h index 9f6b1a25..63cfc7a6 100644 --- a/Grid/communicator/SharedMemory.h +++ b/Grid/communicator/SharedMemory.h @@ -103,6 +103,8 @@ class GlobalSharedMemory { ////////////////////////////////////////////////////////////////////////////////////// static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD static void OptimalCommunicator(const std::vector &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian + static void OptimalCommunicatorHypercube(const std::vector &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian + static void OptimalCommunicatorSharedMemory(const std::vector &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian /////////////////////////////////////////////////// // Provide shared memory facilities off comm world /////////////////////////////////////////////////// diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc index d3b094ad..b2a6ab53 100644 --- a/Grid/communicator/SharedMemoryMPI.cc +++ b/Grid/communicator/SharedMemoryMPI.cc @@ -132,7 +132,22 @@ int Log2Size(int TwoToPower,int MAXLOG2) } void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors,Grid_MPI_Comm & optimal_comm) { -#ifdef HYPERCUBE + ////////////////////////////////////////////////////////////////////////////// + // Look and see if it looks like an HPE 8600 based on hostname conventions + ////////////////////////////////////////////////////////////////////////////// + const int namelen = _POSIX_HOST_NAME_MAX; + char name[namelen]; + int R; + int I; + int N; + gethostname(name,namelen); + int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ; + + if(nscan==3) OptimalCommunicatorHypercube(processors,optimal_comm); + else OptimalCommunicatorSharedMemory(processors,optimal_comm); +} +void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector &processors,Grid_MPI_Comm & optimal_comm) +{ //////////////////////////////////////////////////////////////// // Assert power of two shm_size. //////////////////////////////////////////////////////////////// @@ -253,7 +268,9 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors, ///////////////////////////////////////////////////////////////// int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm); assert(ierr==0); -#else +} +void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const std::vector &processors,Grid_MPI_Comm & optimal_comm) +{ //////////////////////////////////////////////////////////////// // Assert power of two shm_size. //////////////////////////////////////////////////////////////// @@ -306,7 +323,6 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors, ///////////////////////////////////////////////////////////////// int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm); assert(ierr==0); -#endif } //////////////////////////////////////////////////////////////////////////////////////////// // SHMGET @@ -337,7 +353,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) int errsv = errno; printf("Errno %d\n",errsv); printf("key %d\n",key); - printf("size %lld\n",size); + printf("size %ld\n",size); printf("flags %d\n",flags); perror("shmget"); exit(1); diff --git a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h index 54a2c594..25285565 100644 --- a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h +++ b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h @@ -58,13 +58,29 @@ namespace QCD{ bool use_heatbath_forecasting; AbstractEOFAFermion& Lop; // the basic LH operator AbstractEOFAFermion& Rop; // the basic RH operator - SchurRedBlackDiagMooeeSolve Solver; + SchurRedBlackDiagMooeeSolve SolverHB; + SchurRedBlackDiagMooeeSolve SolverL; + SchurRedBlackDiagMooeeSolve SolverR; + SchurRedBlackDiagMooeeSolve DerivativeSolverL; + SchurRedBlackDiagMooeeSolve DerivativeSolverR; FermionField Phi; // the pseudofermion field for this trajectory public: - ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, AbstractEOFAFermion& _Rop, - OperatorFunction& S, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), - Solver(S, false, true), Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc) + ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, + AbstractEOFAFermion& _Rop, + OperatorFunction& HeatbathCG, + OperatorFunction& ActionCGL, OperatorFunction& ActionCGR, + OperatorFunction& DerivCGL , OperatorFunction& DerivCGR, + Params& p, + bool use_fc=false) : + Lop(_Lop), + Rop(_Rop), + SolverHB(HeatbathCG,false,true), + SolverL(ActionCGL, false, true), SolverR(ActionCGR, false, true), + DerivativeSolverL(DerivCGL, false, true), DerivativeSolverR(DerivCGR, false, true), + Phi(_Lop.FermionGrid()), + param(p), + use_heatbath_forecasting(use_fc) { AlgRemez remez(param.lo, param.hi, param.precision); @@ -98,6 +114,9 @@ namespace QCD{ // We generate a Gaussian noise vector \eta, and then compute // \Phi = M_{\rm EOFA}^{-1/2} * \eta // using a rational approximation to the inverse square root + // + // As a check of rational require \Phi^dag M_{EOFA} \Phi == eta^dag M^-1/2^dag M M^-1/2 eta = eta^dag eta + // virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) { Lop.ImportGauge(U); @@ -118,7 +137,6 @@ namespace QCD{ RealD scale = std::sqrt(0.5); gaussian(pRNG,eta); eta = eta * scale; - printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta)); // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta RealD N(PowerNegHalf.norm); @@ -139,11 +157,11 @@ namespace QCD{ if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles Lop.Mdag(CG_src, Forecast_src); CG_soln = Forecast(Lop, Forecast_src, prev_solns); - Solver(Lop, CG_src, CG_soln); + SolverHB(Lop, CG_src, CG_soln); prev_solns.push_back(CG_soln); } else { CG_soln = zero; // Just use zero as the initial guess - Solver(Lop, CG_src, CG_soln); + SolverHB(Lop, CG_src, CG_soln); } Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0]; @@ -166,11 +184,11 @@ namespace QCD{ if(use_heatbath_forecasting){ Rop.Mdag(CG_src, Forecast_src); CG_soln = Forecast(Rop, Forecast_src, prev_solns); - Solver(Rop, CG_src, CG_soln); + SolverHB(Rop, CG_src, CG_soln); prev_solns.push_back(CG_soln); } else { CG_soln = zero; - Solver(Rop, CG_src, CG_soln); + SolverHB(Rop, CG_src, CG_soln); } Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0]; @@ -182,8 +200,47 @@ namespace QCD{ // Reset shift coefficients for energy and force evals Lop.RefreshShiftCoefficients(0.0); Rop.RefreshShiftCoefficients(-1.0); + + // Bounds check + RealD EtaDagEta = norm2(eta); + // RealD PhiDagMPhi= norm2(eta); + }; + void Meofa(const GaugeField& U,const FermionField &phi, FermionField & Mphi) + { +#if 0 + Lop.ImportGauge(U); + Rop.ImportGauge(U); + + FermionField spProj_Phi(Lop.FermionGrid()); + FermionField mPhi(Lop.FermionGrid()); + std::vector tmp(2, Lop.FermionGrid()); + mPhi = phi; + + // LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi> + spProj(Phi, spProj_Phi, -1, Lop.Ls); + Lop.Omega(spProj_Phi, tmp[0], -1, 0); + G5R5(tmp[1], tmp[0]); + tmp[0] = zero; + SolverL(Lop, tmp[1], tmp[0]); + Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back + Lop.Omega(tmp[1], tmp[0], -1, 1); + mPhi = mPhi - Lop.k * innerProduct(spProj_Phi, tmp[0]).real(); + + // RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb) + // - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi> + spProj(Phi, spProj_Phi, 1, Rop.Ls); + Rop.Omega(spProj_Phi, tmp[0], 1, 0); + G5R5(tmp[1], tmp[0]); + tmp[0] = zero; + SolverR(Rop, tmp[1], tmp[0]); + Rop.Dtilde(tmp[0], tmp[1]); + Rop.Omega(tmp[1], tmp[0], 1, 1); + action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real(); +#endif + } + // EOFA action: see Eqn. (10) of arXiv:1706.05843 virtual RealD S(const GaugeField& U) { @@ -201,7 +258,7 @@ namespace QCD{ Lop.Omega(spProj_Phi, tmp[0], -1, 0); G5R5(tmp[1], tmp[0]); tmp[0] = zero; - Solver(Lop, tmp[1], tmp[0]); + SolverL(Lop, tmp[1], tmp[0]); Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back Lop.Omega(tmp[1], tmp[0], -1, 1); action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real(); @@ -212,7 +269,7 @@ namespace QCD{ Rop.Omega(spProj_Phi, tmp[0], 1, 0); G5R5(tmp[1], tmp[0]); tmp[0] = zero; - Solver(Rop, tmp[1], tmp[0]); + SolverR(Rop, tmp[1], tmp[0]); Rop.Dtilde(tmp[0], tmp[1]); Rop.Omega(tmp[1], tmp[0], 1, 1); action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real(); @@ -245,7 +302,7 @@ namespace QCD{ Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0); G5R5(CG_src, Omega_spProj_Phi); spProj_Phi = zero; - Solver(Lop, CG_src, spProj_Phi); + DerivativeSolverL(Lop, CG_src, spProj_Phi); Lop.Dtilde(spProj_Phi, Chi); G5R5(g5_R5_Chi, Chi); Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo); @@ -257,7 +314,7 @@ namespace QCD{ Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0); G5R5(CG_src, Omega_spProj_Phi); spProj_Phi = zero; - Solver(Rop, CG_src, spProj_Phi); + DerivativeSolverR(Rop, CG_src, spProj_Phi); Rop.Dtilde(spProj_Phi, Chi); G5R5(g5_R5_Chi, Chi); Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo); diff --git a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h index 0f57fe9c..e9a8853a 100644 --- a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h +++ b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h @@ -46,6 +46,7 @@ namespace Grid{ OperatorFunction &DerivativeSolver; OperatorFunction &ActionSolver; + OperatorFunction &HeatbathSolver; FermionField PhiOdd; // the pseudo fermion field for this trajectory FermionField PhiEven; // the pseudo fermion field for this trajectory @@ -54,11 +55,18 @@ namespace Grid{ TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator &_NumOp, FermionOperator &_DenOp, OperatorFunction & DS, - OperatorFunction & AS) : + OperatorFunction & AS ) : + TwoFlavourEvenOddRatioPseudoFermionAction(_NumOp,_DenOp, DS,AS,AS) {}; + + TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator &_NumOp, + FermionOperator &_DenOp, + OperatorFunction & DS, + OperatorFunction & AS, OperatorFunction & HS) : NumOp(_NumOp), DenOp(_DenOp), DerivativeSolver(DS), ActionSolver(AS), + HeatbathSolver(HS), PhiEven(_NumOp.FermionRedBlackGrid()), PhiOdd(_NumOp.FermionRedBlackGrid()) { @@ -111,7 +119,7 @@ namespace Grid{ // Odd det factors Mpc.MpcDag(etaOdd,PhiOdd); tmp=zero; - ActionSolver(Vpc,PhiOdd,tmp); + HeatbathSolver(Vpc,PhiOdd,tmp); Vpc.Mpc(tmp,PhiOdd); // Even det factors diff --git a/Grid/qcd/hmc/integrators/Integrator.h b/Grid/qcd/hmc/integrators/Integrator.h index 7d65ed60..9d4376fc 100644 --- a/Grid/qcd/hmc/integrators/Integrator.h +++ b/Grid/qcd/hmc/integrators/Integrator.h @@ -54,7 +54,7 @@ public: template ::value, int >::type = 0 > IntegratorParameters(ReaderClass & Reader){ - std::cout << "Reading integrator\n"; + std::cout << GridLogMessage << "Reading integrator\n"; read(Reader, "Integrator", *this); } @@ -132,7 +132,7 @@ class Integrator { double end_full = usecond(); double time_full = (end_full - start_full) / 1e3; double time_force = (end_force - start_force) / 1e3; - std::cout << GridLogIntegrator << "["<is_smeared); + Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared); as[level].actions.at(actionID)->refresh(Us, pRNG); } @@ -251,13 +250,11 @@ class Integrator { // over the representations struct _S { template - void operator()(std::vector*> repr_set, Repr& Rep, - int level, RealD& H) { + void operator()(std::vector*> repr_set, Repr& Rep, int level, RealD& H) { for (int a = 0; a < repr_set.size(); ++a) { RealD Hterm = repr_set.at(a)->S(Rep.U); - std::cout << GridLogMessage << "S Level " << level << " term " << a - << " H Hirep = " << Hterm << std::endl; + std::cout << GridLogMessage << "S Level " << level << " term " << a << " H Hirep = " << Hterm << std::endl; H += Hterm; } @@ -267,9 +264,10 @@ class Integrator { // Calculate action RealD S(Field& U) { // here also U not used + std::cout << GridLogIntegrator << "Integrator action\n"; RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom - std::cout << " Momentum hamiltonian "<< -H<is_smeared); + std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl; Hterm = as[level].actions.at(actionID)->S(Us); - std::cout << GridLogMessage << "S Level " << level << " term " - << actionID << " H = " << Hterm << std::endl; + std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl; H += Hterm; } as[level].apply(S_hireps, Representations, level, H); @@ -305,8 +303,7 @@ class Integrator { // Check the clocks all match on all levels for (int level = 0; level < as.size(); ++level) { assert(fabs(t_U - t_P[level]) < 1.0e-6); // must be the same - std::cout << GridLogIntegrator << " times[" << level - << "]= " << t_P[level] << " " << t_U << std::endl; + std::cout << GridLogIntegrator << " times[" << level << "]= " << t_P[level] << " " << t_U << std::endl; } // and that we indeed got to the end of the trajectory diff --git a/Grid/qcd/hmc/integrators/Integrator_algorithm.h b/Grid/qcd/hmc/integrators/Integrator_algorithm.h index 13a37aeb..47574026 100644 --- a/Grid/qcd/hmc/integrators/Integrator_algorithm.h +++ b/Grid/qcd/hmc/integrators/Integrator_algorithm.h @@ -231,8 +231,7 @@ class ForceGradient : public Integratorstep(U, level + 1, first_step, 0); } - this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps), - (1.0 - 2.0 * lambda) * eps); + this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps), (1.0 - 2.0 * lambda) * eps); if (level == fl) { // lowest level this->update_U(U, 0.5 * eps); diff --git a/HMC/Mobius2p1fEOFA.cc b/HMC/Mobius2p1fEOFA.cc index a6e42ceb..61b06829 100644 --- a/HMC/Mobius2p1fEOFA.cc +++ b/HMC/Mobius2p1fEOFA.cc @@ -30,6 +30,137 @@ directory /* END LEGAL */ #include +#define MIXED_PRECISION + +namespace Grid{ + namespace QCD{ + + /* + * Need a plan for gauge field update for mixed precision in HMC (2x speed up) + * -- Store the single prec action operator. + * -- Clone the gauge field from the operator function argument. + * -- Build the mixed precision operator dynamically from the passed operator and single prec clone. + */ + + template + class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + + MixedPrecisionConjugateGradientOperatorFunction(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + InnerTolerance(tol), + MaxInnerIterations(maxinnerit), + MaxOuterIterations(maxouterit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5), + OuterLoopNormMult(100.) + { + /* Debugging instances of objects; references are stored + std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " < &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); + + // std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <_Mat)<_Mat)==&(LinOpD._Mat)); + + //////////////////////////////////////////////////////////////////////////////////// + // Must snarf a single precision copy of the gauge field in Linop_d argument + //////////////////////////////////////////////////////////////////////////////////// + typedef typename FermionOperatorF::GaugeField GaugeFieldF; + typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF; + typedef typename FermionOperatorD::GaugeField GaugeFieldD; + typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD; + + GridBase * GridPtrF = SinglePrecGrid4; + GridBase * GridPtrD = FermOpD.Umu._grid; + GaugeFieldF U_f (GridPtrF); + GaugeLinkFieldF Umu_f(GridPtrF); + // std::cout << " Dim gauge field "<Nd()<Nd()<(FermOpD.Umu, mu); + precisionChange(Umu_f,Umu_d); + PokeIndex(FermOpF.Umu, Umu_f, mu); + } + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Could test to make sure that LinOpF and LinOpD agree to single prec? + //////////////////////////////////////////////////////////////////////////////////// + /* + GridBase *Fgrid = psi._grid; + FieldD tmp2(Fgrid); + FieldD tmp1(Fgrid); + LinOpU.Op(src,tmp1); + LinOpD.Op(src,tmp2); + std::cout << " Double gauge field "<< norm2(FermOpD.Umu)< MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); + std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" < HMCWrapper; // MD.name = std::string("MinimumNorm2"); - MD.MDsteps = 8; + MD.MDsteps = 6; MD.trajL = 1.0; HMCparameters HMCparams; - HMCparams.StartTrajectory = 70; - HMCparams.Trajectories = 200; + HMCparams.StartTrajectory = 590; + HMCparams.Trajectories = 1000; HMCparams.NoMetropolisUntil= 0; // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; // HMCparams.StartingType =std::string("ColdStart"); @@ -97,42 +232,53 @@ int main(int argc, char **argv) { RealD b = 1.0; RealD c = 0.0; - std::vector hasenbusch({ 0.1, 0.3 }); + std::vector hasenbusch({ 0.1, 0.3, 0.6 }); auto GridPtr = TheHMC.Resources.GetCartesian(); auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + std::vector latt = GridDefaultLatt(); + std::vector mpi = GridDefaultMpi(); + std::vector simdF = GridDefaultSimd(Nd,vComplexF::Nsimd()); + std::vector simdD = GridDefaultSimd(Nd,vComplexD::Nsimd()); + auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt,simdF,mpi); + auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF); + auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF); + auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF); + IwasakiGaugeActionR GaugeAction(beta); // temporarily need a gauge field LatticeGaugeField U(GridPtr); + LatticeGaugeFieldF UF(GridPtrF); // These lines are unecessary if BC are all periodic std::vector boundary = {1,1,1,-1}; FermionAction::ImplParams Params(boundary); + FermionActionF::ImplParams ParamsF(boundary); double ActionStoppingCondition = 1e-10; - double DerivativeStoppingCondition = 1e-7; + double DerivativeStoppingCondition = 1e-6; double MaxCGIterations = 30000; - ConjugateGradient ActionCG(ActionStoppingCondition,MaxCGIterations); - ConjugateGradient DerivativeCG(DerivativeStoppingCondition,MaxCGIterations); //////////////////////////////////// // Collect actions //////////////////////////////////// ActionLevel Level1(1); - ActionLevel Level2(4); + ActionLevel Level2(8); //////////////////////////////////// // Strange action //////////////////////////////////// + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + typedef SchurDiagMooeeOperator LinearOperatorEOFAF; + typedef SchurDiagMooeeOperator LinearOperatorEOFAD; - // FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); - // FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); - // OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp); - // Level1.push_back(&StrangePseudoFermion); + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG_EOFA; // DJM: setup for EOFA ratio (Mobius) OneFlavourRationalParams OFRp; @@ -143,9 +289,67 @@ int main(int argc, char **argv) { OFRp.degree = 14; OFRp.precision= 50; - MobiusEOFAFermionR Strange_Op_L(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); - MobiusEOFAFermionR Strange_Op_R(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); - ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L, Strange_Op_R, ActionCG, OFRp, true); + + MobiusEOFAFermionR Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionR Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + + ConjugateGradient ActionCG(ActionStoppingCondition,MaxCGIterations); + ConjugateGradient DerivativeCG(DerivativeStoppingCondition,MaxCGIterations); +#ifdef MIXED_PRECISION + const int MX_inner = 1000; + // Mixed precision EOFA + LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L); + LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R); + LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF); + LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF); + + MxPCG_EOFA ActionCGL(ActionStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA DerivativeCGL(DerivativeStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA ActionCGR(ActionStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + MxPCG_EOFA DerivativeCGR(DerivativeStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + ExactOneFlavourRatioPseudoFermionAction + EOFA(Strange_Op_L, Strange_Op_R, + ActionCG, + ActionCGL, ActionCGR, + DerivativeCGL, DerivativeCGR, + OFRp, true); +#else + ExactOneFlavourRatioPseudoFermionAction + EOFA(Strange_Op_L, Strange_Op_R, + ActionCG, ActionCG, + DerivativeCG, DerivativeCG, + OFRp, true); +#endif Level1.push_back(&EOFA); //////////////////////////////////// @@ -162,15 +366,62 @@ int main(int argc, char **argv) { } light_num.push_back(pv_mass); + ////////////////////////////////////////////////////////////// + // Forced to replicate the MxPCG and DenominatorsF etc.. because + // there is no convenient way to "Clone" physics params from double op + // into single op for any operator pair. + // Same issue prevents using MxPCG in the Heatbath step + ////////////////////////////////////////////////////////////// std::vector Numerators; std::vector Denominators; std::vector *> Quotients; + std::vector ActionMPCG; + std::vector MPCG; + std::vector DenominatorsF; + std::vector LinOpD; + std::vector LinOpF; for(int h=0;h(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],ActionCG)); +#else + //////////////////////////////////////////////////////////////////////////// + // Standard CG for 2f force + //////////////////////////////////////////////////////////////////////////// Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG)); +#endif + } for(int h=0;h 500s -> 160s and 20 traj per hour on 16^3. + +- Use mixed precision CG in HMC +- SchurRedBlack.h: stop use of operator function; use LinearOperator or similar instead. +- Or make an OperatorFunction for mixed precision as a wrapper + +******************************************************************** +* Signed off 2+1f HMC with Hasenbush and strange RHMC 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish +* Signed off 2+1f HMC with Hasenbush and strange EOFA 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish +* Wilson plaquette cross checked against CPS and literature GwilsonFnone +******************************************************************** + +******************************************************************** +* RHMC: Timesteps & eigenranges matched from previous CPS 16^3 x 32 runs: +******************************************************************** + +**** Strange (m=0.04) has eigenspan **** 16^3 done as 1+1+1 with separate PV's. diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index d1ec1309..522e0cb5 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -370,5 +370,18 @@ int main(int argc,char **argv) tensorConvTest(rng, SpinMatrix); tensorConvTest(rng, SpinVector); + { + HMCparameters HMCparams; + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.StartTrajectory =7; + HMCparams.Trajectories =1000; + HMCparams.NoMetropolisUntil=0; + HMCparams.MD.name =std::string("Force Gradient"); + HMCparams.MD.MDsteps = 10; + HMCparams.MD.trajL = 1.0; + + XmlWriter HMCwr("HMCparameters.xml"); + write(HMCwr,"HMCparameters",HMCparams); + } Grid_finalize(); }