From e1124d9572fd582738d1440c83a470f915764210 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 23 Apr 2019 21:50:15 +0100 Subject: [PATCH 01/13] Integrator verbosity updates --- Grid/qcd/hmc/integrators/Integrator.h | 21 +++++++------------ .../hmc/integrators/Integrator_algorithm.h | 6 ++---- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/Grid/qcd/hmc/integrators/Integrator.h b/Grid/qcd/hmc/integrators/Integrator.h index 7d65ed60..940a35d3 100644 --- a/Grid/qcd/hmc/integrators/Integrator.h +++ b/Grid/qcd/hmc/integrators/Integrator.h @@ -54,7 +54,7 @@ public: template ::value, int >::type = 0 > IntegratorParameters(ReaderClass & Reader){ - std::cout << "Reading integrator\n"; + std::cout << GridLogMessage << "Reading integrator\n"; read(Reader, "Integrator", *this); } @@ -132,7 +132,7 @@ class Integrator { double end_full = usecond(); double time_full = (end_full - start_full) / 1e3; double time_force = (end_force - start_force) / 1e3; - std::cout << GridLogIntegrator << "["<is_smeared); + Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared); as[level].actions.at(actionID)->refresh(Us, pRNG); } @@ -251,13 +250,11 @@ class Integrator { // over the representations struct _S { template - void operator()(std::vector*> repr_set, Repr& Rep, - int level, RealD& H) { + void operator()(std::vector*> repr_set, Repr& Rep, int level, RealD& H) { for (int a = 0; a < repr_set.size(); ++a) { RealD Hterm = repr_set.at(a)->S(Rep.U); - std::cout << GridLogMessage << "S Level " << level << " term " << a - << " H Hirep = " << Hterm << std::endl; + std::cout << GridLogMessage << "S Level " << level << " term " << a << " H Hirep = " << Hterm << std::endl; H += Hterm; } @@ -269,7 +266,7 @@ class Integrator { RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom - std::cout << " Momentum hamiltonian "<< -H<is_smeared); Hterm = as[level].actions.at(actionID)->S(Us); - std::cout << GridLogMessage << "S Level " << level << " term " - << actionID << " H = " << Hterm << std::endl; + std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl; H += Hterm; } as[level].apply(S_hireps, Representations, level, H); @@ -305,8 +301,7 @@ class Integrator { // Check the clocks all match on all levels for (int level = 0; level < as.size(); ++level) { assert(fabs(t_U - t_P[level]) < 1.0e-6); // must be the same - std::cout << GridLogIntegrator << " times[" << level - << "]= " << t_P[level] << " " << t_U << std::endl; + std::cout << GridLogIntegrator << " times[" << level << "]= " << t_P[level] << " " << t_U << std::endl; } // and that we indeed got to the end of the trajectory diff --git a/Grid/qcd/hmc/integrators/Integrator_algorithm.h b/Grid/qcd/hmc/integrators/Integrator_algorithm.h index 13a37aeb..47574026 100644 --- a/Grid/qcd/hmc/integrators/Integrator_algorithm.h +++ b/Grid/qcd/hmc/integrators/Integrator_algorithm.h @@ -231,8 +231,7 @@ class ForceGradient : public Integratorstep(U, level + 1, first_step, 0); } - this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps), - (1.0 - 2.0 * lambda) * eps); + this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps), (1.0 - 2.0 * lambda) * eps); if (level == fl) { // lowest level this->update_U(U, 0.5 * eps); From b0de7ab7db06748f470cde7acc7e08062af03243 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 23 Apr 2019 21:50:45 +0100 Subject: [PATCH 02/13] Extra do nothing guesser --- Grid/algorithms/iterative/Deflation.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Grid/algorithms/iterative/Deflation.h b/Grid/algorithms/iterative/Deflation.h index ceba1c09..8df67b3c 100644 --- a/Grid/algorithms/iterative/Deflation.h +++ b/Grid/algorithms/iterative/Deflation.h @@ -35,7 +35,11 @@ class ZeroGuesser: public LinearFunction { public: virtual void operator()(const Field &src, Field &guess) { guess = zero; }; }; - +template +class DoNothingGuesser: public LinearFunction { +public: + virtual void operator()(const Field &src, Field &guess) { }; +}; template class SourceGuesser: public LinearFunction { public: From b595f58e4cb13801545ec8ac35d1ec80adee82d5 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 23 Apr 2019 21:51:23 +0100 Subject: [PATCH 03/13] Allow HMC to acces matrix --- Grid/algorithms/LinearOperator.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Grid/algorithms/LinearOperator.h b/Grid/algorithms/LinearOperator.h index b86863b8..a1be48f4 100644 --- a/Grid/algorithms/LinearOperator.h +++ b/Grid/algorithms/LinearOperator.h @@ -178,7 +178,7 @@ namespace Grid { ////////////////////////////////////////////////////////// template - class SchurOperatorBase : public LinearOperatorBase { + class SchurOperatorBase : public LinearOperatorBase { public: virtual RealD Mpc (const Field &in, Field &out) =0; virtual RealD MpcDag (const Field &in, Field &out) =0; @@ -211,10 +211,9 @@ namespace Grid { } }; template - class SchurDiagMooeeOperator : public SchurOperatorBase { - protected: - Matrix &_Mat; + class SchurDiagMooeeOperator : public SchurOperatorBase { public: + Matrix &_Mat; SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; virtual RealD Mpc (const Field &in, Field &out) { Field tmp(in._grid); From 6505efcb57e078104a0cb275501833b548007e38 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 23 Apr 2019 21:51:57 +0100 Subject: [PATCH 04/13] Set iteration count if guess is already good --- Grid/algorithms/iterative/ConjugateGradient.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h index c8daa995..25d206f5 100644 --- a/Grid/algorithms/iterative/ConjugateGradient.h +++ b/Grid/algorithms/iterative/ConjugateGradient.h @@ -89,6 +89,8 @@ class ConjugateGradient : public OperatorFunction { // Check if guess is really REALLY good :) if (cp <= rsq) { + std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl; + IterationsToComplete = 0; return; } From 5921b1d2b9ef3a44ebefaae3d1b82bf118882f0e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 23 Apr 2019 21:52:33 +0100 Subject: [PATCH 05/13] Layout/whitespace changes --- .../iterative/ConjugateGradientMixedPrec.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h index c7332455..4c2cf12f 100644 --- a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h +++ b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h @@ -30,8 +30,11 @@ Author: Christopher Kelly namespace Grid { + //Mixed precision restarted defect correction CG - template::value == 2, int>::type = 0,typename std::enable_if< getPrecision::value == 1, int>::type = 0> + template::value == 2, int>::type = 0, + typename std::enable_if< getPrecision::value == 1, int>::type = 0> class MixedPrecisionConjugateGradient : public LinearFunction { public: RealD Tolerance; @@ -50,7 +53,12 @@ namespace Grid { //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess LinearFunction *guesser; - MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase &_Linop_f, LinearOperatorBase &_Linop_d) : + MixedPrecisionConjugateGradient(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid, + LinearOperatorBase &_Linop_f, + LinearOperatorBase &_Linop_d) : Linop_f(_Linop_f), Linop_d(_Linop_d), Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), OuterLoopNormMult(100.), guesser(NULL){ }; @@ -149,6 +157,8 @@ namespace Grid { } }; + + } #endif From 262a73c964540686f2e72ccce8bed27df6da43b1 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 23 Apr 2019 21:52:58 +0100 Subject: [PATCH 06/13] COmment improvement --- Grid/algorithms/iterative/SchurRedBlack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/algorithms/iterative/SchurRedBlack.h b/Grid/algorithms/iterative/SchurRedBlack.h index 6c1e1f1c..9f63e8c0 100644 --- a/Grid/algorithms/iterative/SchurRedBlack.h +++ b/Grid/algorithms/iterative/SchurRedBlack.h @@ -261,7 +261,7 @@ namespace Grid { } ///////////////////////////////////////////////////////////// - // Override in derived. Not virtual as template methods + // Override in derived. ///////////////////////////////////////////////////////////// virtual void RedBlackSource (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) =0; virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) =0; From 73d467699766e82b0efaff5dab392941108846cf Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 23 Apr 2019 21:53:44 +0100 Subject: [PATCH 07/13] Action and Deriv solvers allowed to differ --- .../pseudofermion/ExactOneFlavourRatio.h | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h index 54a2c594..feba94c8 100644 --- a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h +++ b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h @@ -59,12 +59,23 @@ namespace QCD{ AbstractEOFAFermion& Lop; // the basic LH operator AbstractEOFAFermion& Rop; // the basic RH operator SchurRedBlackDiagMooeeSolve Solver; + SchurRedBlackDiagMooeeSolve DerivativeSolver; FermionField Phi; // the pseudofermion field for this trajectory public: - ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, AbstractEOFAFermion& _Rop, - OperatorFunction& S, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), - Solver(S, false, true), Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc) + ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, + AbstractEOFAFermion& _Rop, + OperatorFunction& ActionCG, + OperatorFunction& DerivCG, + Params& p, + bool use_fc=false) : + Lop(_Lop), + Rop(_Rop), + Solver(ActionCG, false, true), + DerivativeSolver(DerivCG, false, true), + Phi(_Lop.FermionGrid()), + param(p), + use_heatbath_forecasting(use_fc) { AlgRemez remez(param.lo, param.hi, param.precision); @@ -245,7 +256,7 @@ namespace QCD{ Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0); G5R5(CG_src, Omega_spProj_Phi); spProj_Phi = zero; - Solver(Lop, CG_src, spProj_Phi); + DerivativeSolver(Lop, CG_src, spProj_Phi); Lop.Dtilde(spProj_Phi, Chi); G5R5(g5_R5_Chi, Chi); Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo); @@ -257,7 +268,7 @@ namespace QCD{ Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0); G5R5(CG_src, Omega_spProj_Phi); spProj_Phi = zero; - Solver(Rop, CG_src, spProj_Phi); + DerivativeSolver(Rop, CG_src, spProj_Phi); Rop.Dtilde(spProj_Phi, Chi); G5R5(g5_R5_Chi, Chi); Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo); From 7894ea6263a04e1a5bebfdaf7b424b2c18aacc55 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 23 Apr 2019 21:54:19 +0100 Subject: [PATCH 08/13] Now have mixed precision solves in the 2f sector --- HMC/Mobius2p1fEOFA.cc | 197 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 185 insertions(+), 12 deletions(-) diff --git a/HMC/Mobius2p1fEOFA.cc b/HMC/Mobius2p1fEOFA.cc index a6e42ceb..8f093d44 100644 --- a/HMC/Mobius2p1fEOFA.cc +++ b/HMC/Mobius2p1fEOFA.cc @@ -30,6 +30,134 @@ directory /* END LEGAL */ #include +namespace Grid{ + namespace QCD{ + + /* + * Need a plan for gauge field update for mixed precision in HMC (2x speed up) + * -- Store the single prec action operator. + * -- Clone the gauge field from the operator function argument. + * -- Build the mixed precision operator dynamically from the passed operator and single prec clone. + */ + + template + class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + + //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess + LinearFunction *guesser; + + MixedPrecisionConjugateGradientOperatorFunction(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + InnerTolerance(tol), + MaxInnerIterations(maxinnerit), + MaxOuterIterations(maxouterit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5), + OuterLoopNormMult(100.), + guesser(NULL) + { + std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " < &g){ + guesser = &g; + } + + void operator()(LinearOperatorBase &LinOpU, const FieldD &src, FieldD &psi) { + + + SchurOperatorD * SchurOpU = static_cast(&LinOpU); + + std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <_Mat)<_Mat)==&(LinOpD._Mat)); + + //////////////////////////////////////////////////////////////////////////////////// + // Must snarf a single precision copy of the gauge field in Linop_d argument + //////////////////////////////////////////////////////////////////////////////////// + typedef typename FermionOperatorF::GaugeField GaugeFieldF; + typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF; + typedef typename FermionOperatorD::GaugeField GaugeFieldD; + typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD; + + GridBase * GridPtrF = SinglePrecGrid4; + GridBase * GridPtrD = FermOpD.Umu._grid; + GaugeFieldF U_f (GridPtrF); + GaugeLinkFieldF Umu_f(GridPtrF); + std::cout << " Dim gauge field "<Nd()<Nd()<(FermOpD.Umu, mu); + precisionChange(Umu_f,Umu_d); + PokeIndex(FermOpF.Umu, Umu_f, mu); + } + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + + //////////////////////////////////////////////////////////////////////////////////// + // Could test to make sure that LinOpF and LinOpD agree to single prec? + //////////////////////////////////////////////////////////////////////////////////// + GridBase *Fgrid = psi._grid; + FieldD tmp2(Fgrid); + FieldD tmp1(Fgrid); + LinOpU.Op(src,tmp1); + LinOpD.Op(src,tmp2); + std::cout << " Double gauge field "<< norm2(FermOpD.Umu)< MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); + std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" < HMCWrapper; // MD.name = std::string("MinimumNorm2"); - MD.MDsteps = 8; + MD.MDsteps = 6; MD.trajL = 1.0; HMCparameters HMCparams; - HMCparams.StartTrajectory = 70; - HMCparams.Trajectories = 200; + HMCparams.StartTrajectory = 530; + HMCparams.Trajectories = 1000; HMCparams.NoMetropolisUntil= 0; // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; // HMCparams.StartingType =std::string("ColdStart"); @@ -97,24 +227,35 @@ int main(int argc, char **argv) { RealD b = 1.0; RealD c = 0.0; - std::vector hasenbusch({ 0.1, 0.3 }); + std::vector hasenbusch({ 0.1, 0.3, 0.6 }); auto GridPtr = TheHMC.Resources.GetCartesian(); auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + std::vector latt = GridDefaultLatt(); + std::vector mpi = GridDefaultMpi(); + std::vector simdF = GridDefaultSimd(Nd,vComplexF::Nsimd()); + std::vector simdD = GridDefaultSimd(Nd,vComplexD::Nsimd()); + auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt,simdF,mpi); + auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF); + auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF); + auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF); + IwasakiGaugeActionR GaugeAction(beta); // temporarily need a gauge field LatticeGaugeField U(GridPtr); + LatticeGaugeFieldF UF(GridPtrF); // These lines are unecessary if BC are all periodic std::vector boundary = {1,1,1,-1}; FermionAction::ImplParams Params(boundary); + FermionActionF::ImplParams ParamsF(boundary); double ActionStoppingCondition = 1e-10; - double DerivativeStoppingCondition = 1e-7; + double DerivativeStoppingCondition = 1e-6; double MaxCGIterations = 30000; ConjugateGradient ActionCG(ActionStoppingCondition,MaxCGIterations); ConjugateGradient DerivativeCG(DerivativeStoppingCondition,MaxCGIterations); @@ -123,17 +264,12 @@ int main(int argc, char **argv) { // Collect actions //////////////////////////////////// ActionLevel Level1(1); - ActionLevel Level2(4); + ActionLevel Level2(8); //////////////////////////////////// // Strange action //////////////////////////////////// - // FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); - // FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); - // OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp); - // Level1.push_back(&StrangePseudoFermion); - // DJM: setup for EOFA ratio (Mobius) OneFlavourRationalParams OFRp; OFRp.lo = 0.1; @@ -145,7 +281,8 @@ int main(int argc, char **argv) { MobiusEOFAFermionR Strange_Op_L(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); MobiusEOFAFermionR Strange_Op_R(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); - ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L, Strange_Op_R, ActionCG, OFRp, true); + + ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L, Strange_Op_R, ActionCG, DerivativeCG, OFRp, true); Level1.push_back(&EOFA); //////////////////////////////////// @@ -162,15 +299,51 @@ int main(int argc, char **argv) { } light_num.push_back(pv_mass); + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + std::vector Numerators; std::vector Denominators; + std::vector LinOpD; + + std::vector DenominatorsF; + std::vector LinOpF; + + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; + std::vector MPCG; + std::vector *> Quotients; for(int h=0;h(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG)); +#else + //////////////////////////////////////////////////////////////////////////// + // Mixed precision CG for 2f force + //////////////////////////////////////////////////////////////////////////// + DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsF)); + LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h])); + LinOpD.push_back(new LinearOperatorD(*Denominators[h])); + MPCG.push_back(new MxPCG(DerivativeStoppingCondition, + 200, + MaxCGIterations, + GridPtrF, + FrbGridF, + *DenominatorsF[h],*Denominators[h], + *LinOpF[h], *LinOpD[h]) ); + Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],*MPCG[h],ActionCG)); +#endif + } for(int h=0;h Date: Tue, 23 Apr 2019 21:54:45 +0100 Subject: [PATCH 09/13] Evolving HMC status --- HMC/README | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/HMC/README b/HMC/README index 7bea2782..bf97701d 100644 --- a/HMC/README +++ b/HMC/README @@ -1,14 +1,39 @@ -* Sign off 2+1f HMC with Hasenbush and strange RHMC - -- Wilson plaquette cross checked against CPS and literature GwilsonFnone -- Timesteps matched - -- Use 16^3x32 - ******************************************************************** -* From previous CPS runs: +TODO: ******************************************************************** +- Currently 10 traj per hour + +- EOFA use a different derivative solver from action solver +- EOFA fix Davids hack to the SchurRedBlack guessing + +*** Reduce precision/tolerance in EOFA with second CG param. (10% speed up) +*** Force gradient - reduced precision solve for the gradient (4/3x speedup) +*** Need a plan for gauge field update for mixed precision in HMC (2x speed up) + -- Store the single prec action operator. + -- Clone the gauge field from the operator function argument. + -- Build the mixed precision operator dynamically from the passed operator and single prec clone. + +*** Mixed precision CG into EOFA portion +*** Further reduce precision in forces to 10^-6 ? + +*** Overall: a 3x or so is still possible => 500s -> 160s and 20 traj per hour on 16^3. + +- Use mixed precision CG in HMC +- SchurRedBlack.h: stop use of operator function; use LinearOperator or similar instead. +- Or make an OperatorFunction for mixed precision as a wrapper + +******************************************************************** +* Signed off 2+1f HMC with Hasenbush and strange RHMC 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish +* Signed off 2+1f HMC with Hasenbush and strange EOFA 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish +* Wilson plaquette cross checked against CPS and literature GwilsonFnone +******************************************************************** + +******************************************************************** +* RHMC: Timesteps & eigenranges matched from previous CPS 16^3 x 32 runs: +******************************************************************** + +**** Strange (m=0.04) has eigenspan **** 16^3 done as 1+1+1 with separate PV's. From 94ebcf551c1402d7ebff22959f0ff40361f66205 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 24 Apr 2019 06:28:14 +0100 Subject: [PATCH 10/13] Iteratoin range fix --- Grid/algorithms/iterative/ConjugateGradient.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h index 25d206f5..c1717e2a 100644 --- a/Grid/algorithms/iterative/ConjugateGradient.h +++ b/Grid/algorithms/iterative/ConjugateGradient.h @@ -106,7 +106,7 @@ class ConjugateGradient : public OperatorFunction { SolverTimer.Start(); int k; - for (k = 1; k <= MaxIterations*1000; k++) { + for (k = 1; k <= MaxIterations; k++) { c = cp; MatrixTimer.Start(); @@ -167,8 +167,7 @@ class ConjugateGradient : public OperatorFunction { return; } } - std::cout << GridLogMessage << "ConjugateGradient did NOT converge" - << std::endl; + std::cout << GridLogMessage << "ConjugateGradient did NOT converge "< Date: Wed, 24 Apr 2019 06:29:52 +0100 Subject: [PATCH 11/13] Verbose --- Grid/qcd/hmc/integrators/Integrator.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Grid/qcd/hmc/integrators/Integrator.h b/Grid/qcd/hmc/integrators/Integrator.h index 940a35d3..9d4376fc 100644 --- a/Grid/qcd/hmc/integrators/Integrator.h +++ b/Grid/qcd/hmc/integrators/Integrator.h @@ -264,6 +264,7 @@ class Integrator { // Calculate action RealD S(Field& U) { // here also U not used + std::cout << GridLogIntegrator << "Integrator action\n"; RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom @@ -275,6 +276,7 @@ class Integrator { // get gauge field from the SmearingPolicy and // based on the boolean is_smeared in actionID Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared); + std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl; Hterm = as[level].actions.at(actionID)->S(Us); std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl; H += Hterm; From 2095c12eac86057fb9159cfda3d562dc6ef0aecb Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 22 May 2019 09:54:21 +0100 Subject: [PATCH 12/13] Make detection of HPE 8600 automatic --- Grid/communicator/SharedMemory.h | 2 ++ Grid/communicator/SharedMemoryMPI.cc | 24 ++++++++++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/Grid/communicator/SharedMemory.h b/Grid/communicator/SharedMemory.h index 9f6b1a25..63cfc7a6 100644 --- a/Grid/communicator/SharedMemory.h +++ b/Grid/communicator/SharedMemory.h @@ -103,6 +103,8 @@ class GlobalSharedMemory { ////////////////////////////////////////////////////////////////////////////////////// static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD static void OptimalCommunicator(const std::vector &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian + static void OptimalCommunicatorHypercube(const std::vector &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian + static void OptimalCommunicatorSharedMemory(const std::vector &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian /////////////////////////////////////////////////// // Provide shared memory facilities off comm world /////////////////////////////////////////////////// diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc index d3b094ad..b2a6ab53 100644 --- a/Grid/communicator/SharedMemoryMPI.cc +++ b/Grid/communicator/SharedMemoryMPI.cc @@ -132,7 +132,22 @@ int Log2Size(int TwoToPower,int MAXLOG2) } void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors,Grid_MPI_Comm & optimal_comm) { -#ifdef HYPERCUBE + ////////////////////////////////////////////////////////////////////////////// + // Look and see if it looks like an HPE 8600 based on hostname conventions + ////////////////////////////////////////////////////////////////////////////// + const int namelen = _POSIX_HOST_NAME_MAX; + char name[namelen]; + int R; + int I; + int N; + gethostname(name,namelen); + int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ; + + if(nscan==3) OptimalCommunicatorHypercube(processors,optimal_comm); + else OptimalCommunicatorSharedMemory(processors,optimal_comm); +} +void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector &processors,Grid_MPI_Comm & optimal_comm) +{ //////////////////////////////////////////////////////////////// // Assert power of two shm_size. //////////////////////////////////////////////////////////////// @@ -253,7 +268,9 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors, ///////////////////////////////////////////////////////////////// int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm); assert(ierr==0); -#else +} +void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const std::vector &processors,Grid_MPI_Comm & optimal_comm) +{ //////////////////////////////////////////////////////////////// // Assert power of two shm_size. //////////////////////////////////////////////////////////////// @@ -306,7 +323,6 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors, ///////////////////////////////////////////////////////////////// int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm); assert(ierr==0); -#endif } //////////////////////////////////////////////////////////////////////////////////////////// // SHMGET @@ -337,7 +353,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) int errsv = errno; printf("Errno %d\n",errsv); printf("key %d\n",key); - printf("size %lld\n",size); + printf("size %ld\n",size); printf("flags %d\n",flags); perror("shmget"); exit(1); From 44b53c3ba275a5389110b52afcf43038474ca970 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 22 May 2019 09:56:26 +0100 Subject: [PATCH 13/13] F1 ensemble running with 96%~ acceptance etc.. --- .../pseudofermion/ExactOneFlavourRatio.h | 76 ++++++-- .../pseudofermion/TwoFlavourEvenOddRatio.h | 12 +- HMC/Mobius2p1fEOFA.cc | 162 +++++++++++++----- HMC/README | 17 +- tests/IO/Test_serialisation.cc | 13 ++ 5 files changed, 220 insertions(+), 60 deletions(-) diff --git a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h index feba94c8..25285565 100644 --- a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h +++ b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h @@ -58,21 +58,26 @@ namespace QCD{ bool use_heatbath_forecasting; AbstractEOFAFermion& Lop; // the basic LH operator AbstractEOFAFermion& Rop; // the basic RH operator - SchurRedBlackDiagMooeeSolve Solver; - SchurRedBlackDiagMooeeSolve DerivativeSolver; + SchurRedBlackDiagMooeeSolve SolverHB; + SchurRedBlackDiagMooeeSolve SolverL; + SchurRedBlackDiagMooeeSolve SolverR; + SchurRedBlackDiagMooeeSolve DerivativeSolverL; + SchurRedBlackDiagMooeeSolve DerivativeSolverR; FermionField Phi; // the pseudofermion field for this trajectory public: ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, AbstractEOFAFermion& _Rop, - OperatorFunction& ActionCG, - OperatorFunction& DerivCG, + OperatorFunction& HeatbathCG, + OperatorFunction& ActionCGL, OperatorFunction& ActionCGR, + OperatorFunction& DerivCGL , OperatorFunction& DerivCGR, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), - Solver(ActionCG, false, true), - DerivativeSolver(DerivCG, false, true), + SolverHB(HeatbathCG,false,true), + SolverL(ActionCGL, false, true), SolverR(ActionCGR, false, true), + DerivativeSolverL(DerivCGL, false, true), DerivativeSolverR(DerivCGR, false, true), Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc) @@ -109,6 +114,9 @@ namespace QCD{ // We generate a Gaussian noise vector \eta, and then compute // \Phi = M_{\rm EOFA}^{-1/2} * \eta // using a rational approximation to the inverse square root + // + // As a check of rational require \Phi^dag M_{EOFA} \Phi == eta^dag M^-1/2^dag M M^-1/2 eta = eta^dag eta + // virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) { Lop.ImportGauge(U); @@ -129,7 +137,6 @@ namespace QCD{ RealD scale = std::sqrt(0.5); gaussian(pRNG,eta); eta = eta * scale; - printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta)); // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta RealD N(PowerNegHalf.norm); @@ -150,11 +157,11 @@ namespace QCD{ if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles Lop.Mdag(CG_src, Forecast_src); CG_soln = Forecast(Lop, Forecast_src, prev_solns); - Solver(Lop, CG_src, CG_soln); + SolverHB(Lop, CG_src, CG_soln); prev_solns.push_back(CG_soln); } else { CG_soln = zero; // Just use zero as the initial guess - Solver(Lop, CG_src, CG_soln); + SolverHB(Lop, CG_src, CG_soln); } Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0]; @@ -177,11 +184,11 @@ namespace QCD{ if(use_heatbath_forecasting){ Rop.Mdag(CG_src, Forecast_src); CG_soln = Forecast(Rop, Forecast_src, prev_solns); - Solver(Rop, CG_src, CG_soln); + SolverHB(Rop, CG_src, CG_soln); prev_solns.push_back(CG_soln); } else { CG_soln = zero; - Solver(Rop, CG_src, CG_soln); + SolverHB(Rop, CG_src, CG_soln); } Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0]; @@ -193,8 +200,47 @@ namespace QCD{ // Reset shift coefficients for energy and force evals Lop.RefreshShiftCoefficients(0.0); Rop.RefreshShiftCoefficients(-1.0); + + // Bounds check + RealD EtaDagEta = norm2(eta); + // RealD PhiDagMPhi= norm2(eta); + }; + void Meofa(const GaugeField& U,const FermionField &phi, FermionField & Mphi) + { +#if 0 + Lop.ImportGauge(U); + Rop.ImportGauge(U); + + FermionField spProj_Phi(Lop.FermionGrid()); + FermionField mPhi(Lop.FermionGrid()); + std::vector tmp(2, Lop.FermionGrid()); + mPhi = phi; + + // LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi> + spProj(Phi, spProj_Phi, -1, Lop.Ls); + Lop.Omega(spProj_Phi, tmp[0], -1, 0); + G5R5(tmp[1], tmp[0]); + tmp[0] = zero; + SolverL(Lop, tmp[1], tmp[0]); + Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back + Lop.Omega(tmp[1], tmp[0], -1, 1); + mPhi = mPhi - Lop.k * innerProduct(spProj_Phi, tmp[0]).real(); + + // RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb) + // - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi> + spProj(Phi, spProj_Phi, 1, Rop.Ls); + Rop.Omega(spProj_Phi, tmp[0], 1, 0); + G5R5(tmp[1], tmp[0]); + tmp[0] = zero; + SolverR(Rop, tmp[1], tmp[0]); + Rop.Dtilde(tmp[0], tmp[1]); + Rop.Omega(tmp[1], tmp[0], 1, 1); + action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real(); +#endif + } + // EOFA action: see Eqn. (10) of arXiv:1706.05843 virtual RealD S(const GaugeField& U) { @@ -212,7 +258,7 @@ namespace QCD{ Lop.Omega(spProj_Phi, tmp[0], -1, 0); G5R5(tmp[1], tmp[0]); tmp[0] = zero; - Solver(Lop, tmp[1], tmp[0]); + SolverL(Lop, tmp[1], tmp[0]); Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back Lop.Omega(tmp[1], tmp[0], -1, 1); action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real(); @@ -223,7 +269,7 @@ namespace QCD{ Rop.Omega(spProj_Phi, tmp[0], 1, 0); G5R5(tmp[1], tmp[0]); tmp[0] = zero; - Solver(Rop, tmp[1], tmp[0]); + SolverR(Rop, tmp[1], tmp[0]); Rop.Dtilde(tmp[0], tmp[1]); Rop.Omega(tmp[1], tmp[0], 1, 1); action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real(); @@ -256,7 +302,7 @@ namespace QCD{ Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0); G5R5(CG_src, Omega_spProj_Phi); spProj_Phi = zero; - DerivativeSolver(Lop, CG_src, spProj_Phi); + DerivativeSolverL(Lop, CG_src, spProj_Phi); Lop.Dtilde(spProj_Phi, Chi); G5R5(g5_R5_Chi, Chi); Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo); @@ -268,7 +314,7 @@ namespace QCD{ Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0); G5R5(CG_src, Omega_spProj_Phi); spProj_Phi = zero; - DerivativeSolver(Rop, CG_src, spProj_Phi); + DerivativeSolverR(Rop, CG_src, spProj_Phi); Rop.Dtilde(spProj_Phi, Chi); G5R5(g5_R5_Chi, Chi); Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo); diff --git a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h index 0f57fe9c..e9a8853a 100644 --- a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h +++ b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h @@ -46,6 +46,7 @@ namespace Grid{ OperatorFunction &DerivativeSolver; OperatorFunction &ActionSolver; + OperatorFunction &HeatbathSolver; FermionField PhiOdd; // the pseudo fermion field for this trajectory FermionField PhiEven; // the pseudo fermion field for this trajectory @@ -54,11 +55,18 @@ namespace Grid{ TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator &_NumOp, FermionOperator &_DenOp, OperatorFunction & DS, - OperatorFunction & AS) : + OperatorFunction & AS ) : + TwoFlavourEvenOddRatioPseudoFermionAction(_NumOp,_DenOp, DS,AS,AS) {}; + + TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator &_NumOp, + FermionOperator &_DenOp, + OperatorFunction & DS, + OperatorFunction & AS, OperatorFunction & HS) : NumOp(_NumOp), DenOp(_DenOp), DerivativeSolver(DS), ActionSolver(AS), + HeatbathSolver(HS), PhiEven(_NumOp.FermionRedBlackGrid()), PhiOdd(_NumOp.FermionRedBlackGrid()) { @@ -111,7 +119,7 @@ namespace Grid{ // Odd det factors Mpc.MpcDag(etaOdd,PhiOdd); tmp=zero; - ActionSolver(Vpc,PhiOdd,tmp); + HeatbathSolver(Vpc,PhiOdd,tmp); Vpc.Mpc(tmp,PhiOdd); // Even det factors diff --git a/HMC/Mobius2p1fEOFA.cc b/HMC/Mobius2p1fEOFA.cc index 8f093d44..61b06829 100644 --- a/HMC/Mobius2p1fEOFA.cc +++ b/HMC/Mobius2p1fEOFA.cc @@ -30,6 +30,8 @@ directory /* END LEGAL */ #include +#define MIXED_PRECISION + namespace Grid{ namespace QCD{ @@ -63,9 +65,6 @@ namespace Grid{ Integer TotalOuterIterations; //Number of restarts Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step - //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess - LinearFunction *guesser; - MixedPrecisionConjugateGradientOperatorFunction(RealD tol, Integer maxinnerit, Integer maxouterit, @@ -85,27 +84,26 @@ namespace Grid{ MaxOuterIterations(maxouterit), SinglePrecGrid4(_sp_grid4), SinglePrecGrid5(_sp_grid5), - OuterLoopNormMult(100.), - guesser(NULL) + OuterLoopNormMult(100.) { + /* Debugging instances of objects; references are stored std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " < &g){ - guesser = &g; - } - void operator()(LinearOperatorBase &LinOpU, const FieldD &src, FieldD &psi) { + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); - std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <_Mat)<_Mat)<_Mat)==&(LinOpD._Mat)); //////////////////////////////////////////////////////////////////////////////////// @@ -120,9 +118,13 @@ namespace Grid{ GridBase * GridPtrD = FermOpD.Umu._grid; GaugeFieldF U_f (GridPtrF); GaugeLinkFieldF Umu_f(GridPtrF); - std::cout << " Dim gauge field "<Nd()<Nd()<Nd()<Nd()<(FermOpD.Umu, mu); @@ -131,11 +133,11 @@ namespace Grid{ } pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); - //////////////////////////////////////////////////////////////////////////////////// // Could test to make sure that LinOpF and LinOpD agree to single prec? //////////////////////////////////////////////////////////////////////////////////// + /* GridBase *Fgrid = psi._grid; FieldD tmp2(Fgrid); FieldD tmp1(Fgrid); @@ -147,6 +149,7 @@ namespace Grid{ std::cout << " Test of operators "< ActionCG(ActionStoppingCondition,MaxCGIterations); - ConjugateGradient DerivativeCG(DerivativeStoppingCondition,MaxCGIterations); //////////////////////////////////// // Collect actions @@ -269,6 +272,13 @@ int main(int argc, char **argv) { //////////////////////////////////// // Strange action //////////////////////////////////// + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + typedef SchurDiagMooeeOperator LinearOperatorEOFAF; + typedef SchurDiagMooeeOperator LinearOperatorEOFAD; + + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG_EOFA; // DJM: setup for EOFA ratio (Mobius) OneFlavourRationalParams OFRp; @@ -279,10 +289,67 @@ int main(int argc, char **argv) { OFRp.degree = 14; OFRp.precision= 50; - MobiusEOFAFermionR Strange_Op_L(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); - MobiusEOFAFermionR Strange_Op_R(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + + MobiusEOFAFermionR Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionR Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); - ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L, Strange_Op_R, ActionCG, DerivativeCG, OFRp, true); + ConjugateGradient ActionCG(ActionStoppingCondition,MaxCGIterations); + ConjugateGradient DerivativeCG(DerivativeStoppingCondition,MaxCGIterations); +#ifdef MIXED_PRECISION + const int MX_inner = 1000; + // Mixed precision EOFA + LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L); + LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R); + LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF); + LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF); + + MxPCG_EOFA ActionCGL(ActionStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA DerivativeCGL(DerivativeStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA ActionCGR(ActionStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + MxPCG_EOFA DerivativeCGR(DerivativeStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + ExactOneFlavourRatioPseudoFermionAction + EOFA(Strange_Op_L, Strange_Op_R, + ActionCG, + ActionCGL, ActionCGR, + DerivativeCGL, DerivativeCGR, + OFRp, true); +#else + ExactOneFlavourRatioPseudoFermionAction + EOFA(Strange_Op_L, Strange_Op_R, + ActionCG, ActionCG, + DerivativeCG, DerivativeCG, + OFRp, true); +#endif Level1.push_back(&EOFA); //////////////////////////////////// @@ -299,21 +366,20 @@ int main(int argc, char **argv) { } light_num.push_back(pv_mass); - typedef SchurDiagMooeeOperator LinearOperatorF; - typedef SchurDiagMooeeOperator LinearOperatorD; - + ////////////////////////////////////////////////////////////// + // Forced to replicate the MxPCG and DenominatorsF etc.. because + // there is no convenient way to "Clone" physics params from double op + // into single op for any operator pair. + // Same issue prevents using MxPCG in the Heatbath step + ////////////////////////////////////////////////////////////// std::vector Numerators; std::vector Denominators; - std::vector LinOpD; - - std::vector DenominatorsF; - std::vector LinOpF; - - typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; - std::vector MPCG; - std::vector *> Quotients; + std::vector ActionMPCG; + std::vector MPCG; + std::vector DenominatorsF; + std::vector LinOpD; + std::vector LinOpF; for(int h=0;h(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG)); -#else +#ifdef MIXED_PRECISION //////////////////////////////////////////////////////////////////////////// // Mixed precision CG for 2f force //////////////////////////////////////////////////////////////////////////// + DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsF)); - LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h])); LinOpD.push_back(new LinearOperatorD(*Denominators[h])); + LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h])); + MPCG.push_back(new MxPCG(DerivativeStoppingCondition, - 200, + MX_inner, MaxCGIterations, GridPtrF, FrbGridF, *DenominatorsF[h],*Denominators[h], *LinOpF[h], *LinOpD[h]) ); - Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],*MPCG[h],ActionCG)); + + ActionMPCG.push_back(new MxPCG(ActionStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + *DenominatorsF[h],*Denominators[h], + *LinOpF[h], *LinOpD[h]) ); + + // Heatbath not mixed yet. As inverts numerators not so important as raised mass. + Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],ActionCG)); +#else + //////////////////////////////////////////////////////////////////////////// + // Standard CG for 2f force + //////////////////////////////////////////////////////////////////////////// + Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG)); #endif } diff --git a/HMC/README b/HMC/README index bf97701d..ad1af242 100644 --- a/HMC/README +++ b/HMC/README @@ -2,13 +2,28 @@ TODO: ******************************************************************** -- Currently 10 traj per hour +i) Got mixed precision in 2f and EOFA force and action solves. + But need mixed precision in the heatbath solve. Best for Fermop to have a "clone" method, to + reduce the number of solver and action objects. Needed ideally for the EOFA heatbath. + 15% perhaps + Combine with 2x trajectory length? + +ii) Rational on EOFA HB -- relax order + -- Test the approx as per David email + +Resume / roll.sh + +---------------------------------------------------------------- + +- 16^3 Currently 10 traj per hour - EOFA use a different derivative solver from action solver - EOFA fix Davids hack to the SchurRedBlack guessing *** Reduce precision/tolerance in EOFA with second CG param. (10% speed up) *** Force gradient - reduced precision solve for the gradient (4/3x speedup) + + *** Need a plan for gauge field update for mixed precision in HMC (2x speed up) -- Store the single prec action operator. -- Clone the gauge field from the operator function argument. diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index d1ec1309..522e0cb5 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -370,5 +370,18 @@ int main(int argc,char **argv) tensorConvTest(rng, SpinMatrix); tensorConvTest(rng, SpinVector); + { + HMCparameters HMCparams; + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.StartTrajectory =7; + HMCparams.Trajectories =1000; + HMCparams.NoMetropolisUntil=0; + HMCparams.MD.name =std::string("Force Gradient"); + HMCparams.MD.MDsteps = 10; + HMCparams.MD.trajL = 1.0; + + XmlWriter HMCwr("HMCparameters.xml"); + write(HMCwr,"HMCparameters",HMCparams); + } Grid_finalize(); }