diff --git a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h index feba94c8..25285565 100644 --- a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h +++ b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h @@ -58,21 +58,26 @@ namespace QCD{ bool use_heatbath_forecasting; AbstractEOFAFermion& Lop; // the basic LH operator AbstractEOFAFermion& Rop; // the basic RH operator - SchurRedBlackDiagMooeeSolve Solver; - SchurRedBlackDiagMooeeSolve DerivativeSolver; + SchurRedBlackDiagMooeeSolve SolverHB; + SchurRedBlackDiagMooeeSolve SolverL; + SchurRedBlackDiagMooeeSolve SolverR; + SchurRedBlackDiagMooeeSolve DerivativeSolverL; + SchurRedBlackDiagMooeeSolve DerivativeSolverR; FermionField Phi; // the pseudofermion field for this trajectory public: ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, AbstractEOFAFermion& _Rop, - OperatorFunction& ActionCG, - OperatorFunction& DerivCG, + OperatorFunction& HeatbathCG, + OperatorFunction& ActionCGL, OperatorFunction& ActionCGR, + OperatorFunction& DerivCGL , OperatorFunction& DerivCGR, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), - Solver(ActionCG, false, true), - DerivativeSolver(DerivCG, false, true), + SolverHB(HeatbathCG,false,true), + SolverL(ActionCGL, false, true), SolverR(ActionCGR, false, true), + DerivativeSolverL(DerivCGL, false, true), DerivativeSolverR(DerivCGR, false, true), Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc) @@ -109,6 +114,9 @@ namespace QCD{ // We generate a Gaussian noise vector \eta, and then compute // \Phi = M_{\rm EOFA}^{-1/2} * \eta // using a rational approximation to the inverse square root + // + // As a check of rational require \Phi^dag M_{EOFA} \Phi == eta^dag M^-1/2^dag M M^-1/2 eta = eta^dag eta + // virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) { Lop.ImportGauge(U); @@ -129,7 +137,6 @@ namespace QCD{ RealD scale = std::sqrt(0.5); gaussian(pRNG,eta); eta = eta * scale; - printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta)); // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta RealD N(PowerNegHalf.norm); @@ -150,11 +157,11 @@ namespace QCD{ if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles Lop.Mdag(CG_src, Forecast_src); CG_soln = Forecast(Lop, Forecast_src, prev_solns); - Solver(Lop, CG_src, CG_soln); + SolverHB(Lop, CG_src, CG_soln); prev_solns.push_back(CG_soln); } else { CG_soln = zero; // Just use zero as the initial guess - Solver(Lop, CG_src, CG_soln); + SolverHB(Lop, CG_src, CG_soln); } Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0]; @@ -177,11 +184,11 @@ namespace QCD{ if(use_heatbath_forecasting){ Rop.Mdag(CG_src, Forecast_src); CG_soln = Forecast(Rop, Forecast_src, prev_solns); - Solver(Rop, CG_src, CG_soln); + SolverHB(Rop, CG_src, CG_soln); prev_solns.push_back(CG_soln); } else { CG_soln = zero; - Solver(Rop, CG_src, CG_soln); + SolverHB(Rop, CG_src, CG_soln); } Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0]; @@ -193,8 +200,47 @@ namespace QCD{ // Reset shift coefficients for energy and force evals Lop.RefreshShiftCoefficients(0.0); Rop.RefreshShiftCoefficients(-1.0); + + // Bounds check + RealD EtaDagEta = norm2(eta); + // RealD PhiDagMPhi= norm2(eta); + }; + void Meofa(const GaugeField& U,const FermionField &phi, FermionField & Mphi) + { +#if 0 + Lop.ImportGauge(U); + Rop.ImportGauge(U); + + FermionField spProj_Phi(Lop.FermionGrid()); + FermionField mPhi(Lop.FermionGrid()); + std::vector tmp(2, Lop.FermionGrid()); + mPhi = phi; + + // LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi> + spProj(Phi, spProj_Phi, -1, Lop.Ls); + Lop.Omega(spProj_Phi, tmp[0], -1, 0); + G5R5(tmp[1], tmp[0]); + tmp[0] = zero; + SolverL(Lop, tmp[1], tmp[0]); + Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back + Lop.Omega(tmp[1], tmp[0], -1, 1); + mPhi = mPhi - Lop.k * innerProduct(spProj_Phi, tmp[0]).real(); + + // RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb) + // - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi> + spProj(Phi, spProj_Phi, 1, Rop.Ls); + Rop.Omega(spProj_Phi, tmp[0], 1, 0); + G5R5(tmp[1], tmp[0]); + tmp[0] = zero; + SolverR(Rop, tmp[1], tmp[0]); + Rop.Dtilde(tmp[0], tmp[1]); + Rop.Omega(tmp[1], tmp[0], 1, 1); + action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real(); +#endif + } + // EOFA action: see Eqn. (10) of arXiv:1706.05843 virtual RealD S(const GaugeField& U) { @@ -212,7 +258,7 @@ namespace QCD{ Lop.Omega(spProj_Phi, tmp[0], -1, 0); G5R5(tmp[1], tmp[0]); tmp[0] = zero; - Solver(Lop, tmp[1], tmp[0]); + SolverL(Lop, tmp[1], tmp[0]); Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back Lop.Omega(tmp[1], tmp[0], -1, 1); action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real(); @@ -223,7 +269,7 @@ namespace QCD{ Rop.Omega(spProj_Phi, tmp[0], 1, 0); G5R5(tmp[1], tmp[0]); tmp[0] = zero; - Solver(Rop, tmp[1], tmp[0]); + SolverR(Rop, tmp[1], tmp[0]); Rop.Dtilde(tmp[0], tmp[1]); Rop.Omega(tmp[1], tmp[0], 1, 1); action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real(); @@ -256,7 +302,7 @@ namespace QCD{ Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0); G5R5(CG_src, Omega_spProj_Phi); spProj_Phi = zero; - DerivativeSolver(Lop, CG_src, spProj_Phi); + DerivativeSolverL(Lop, CG_src, spProj_Phi); Lop.Dtilde(spProj_Phi, Chi); G5R5(g5_R5_Chi, Chi); Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo); @@ -268,7 +314,7 @@ namespace QCD{ Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0); G5R5(CG_src, Omega_spProj_Phi); spProj_Phi = zero; - DerivativeSolver(Rop, CG_src, spProj_Phi); + DerivativeSolverR(Rop, CG_src, spProj_Phi); Rop.Dtilde(spProj_Phi, Chi); G5R5(g5_R5_Chi, Chi); Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo); diff --git a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h index 0f57fe9c..e9a8853a 100644 --- a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h +++ b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h @@ -46,6 +46,7 @@ namespace Grid{ OperatorFunction &DerivativeSolver; OperatorFunction &ActionSolver; + OperatorFunction &HeatbathSolver; FermionField PhiOdd; // the pseudo fermion field for this trajectory FermionField PhiEven; // the pseudo fermion field for this trajectory @@ -54,11 +55,18 @@ namespace Grid{ TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator &_NumOp, FermionOperator &_DenOp, OperatorFunction & DS, - OperatorFunction & AS) : + OperatorFunction & AS ) : + TwoFlavourEvenOddRatioPseudoFermionAction(_NumOp,_DenOp, DS,AS,AS) {}; + + TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator &_NumOp, + FermionOperator &_DenOp, + OperatorFunction & DS, + OperatorFunction & AS, OperatorFunction & HS) : NumOp(_NumOp), DenOp(_DenOp), DerivativeSolver(DS), ActionSolver(AS), + HeatbathSolver(HS), PhiEven(_NumOp.FermionRedBlackGrid()), PhiOdd(_NumOp.FermionRedBlackGrid()) { @@ -111,7 +119,7 @@ namespace Grid{ // Odd det factors Mpc.MpcDag(etaOdd,PhiOdd); tmp=zero; - ActionSolver(Vpc,PhiOdd,tmp); + HeatbathSolver(Vpc,PhiOdd,tmp); Vpc.Mpc(tmp,PhiOdd); // Even det factors diff --git a/HMC/Mobius2p1fEOFA.cc b/HMC/Mobius2p1fEOFA.cc index 8f093d44..61b06829 100644 --- a/HMC/Mobius2p1fEOFA.cc +++ b/HMC/Mobius2p1fEOFA.cc @@ -30,6 +30,8 @@ directory /* END LEGAL */ #include +#define MIXED_PRECISION + namespace Grid{ namespace QCD{ @@ -63,9 +65,6 @@ namespace Grid{ Integer TotalOuterIterations; //Number of restarts Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step - //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess - LinearFunction *guesser; - MixedPrecisionConjugateGradientOperatorFunction(RealD tol, Integer maxinnerit, Integer maxouterit, @@ -85,27 +84,26 @@ namespace Grid{ MaxOuterIterations(maxouterit), SinglePrecGrid4(_sp_grid4), SinglePrecGrid5(_sp_grid5), - OuterLoopNormMult(100.), - guesser(NULL) + OuterLoopNormMult(100.) { + /* Debugging instances of objects; references are stored std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " < &g){ - guesser = &g; - } - void operator()(LinearOperatorBase &LinOpU, const FieldD &src, FieldD &psi) { + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); - std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <_Mat)<_Mat)<_Mat)==&(LinOpD._Mat)); //////////////////////////////////////////////////////////////////////////////////// @@ -120,9 +118,13 @@ namespace Grid{ GridBase * GridPtrD = FermOpD.Umu._grid; GaugeFieldF U_f (GridPtrF); GaugeLinkFieldF Umu_f(GridPtrF); - std::cout << " Dim gauge field "<Nd()<Nd()<Nd()<Nd()<(FermOpD.Umu, mu); @@ -131,11 +133,11 @@ namespace Grid{ } pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); - //////////////////////////////////////////////////////////////////////////////////// // Could test to make sure that LinOpF and LinOpD agree to single prec? //////////////////////////////////////////////////////////////////////////////////// + /* GridBase *Fgrid = psi._grid; FieldD tmp2(Fgrid); FieldD tmp1(Fgrid); @@ -147,6 +149,7 @@ namespace Grid{ std::cout << " Test of operators "< ActionCG(ActionStoppingCondition,MaxCGIterations); - ConjugateGradient DerivativeCG(DerivativeStoppingCondition,MaxCGIterations); //////////////////////////////////// // Collect actions @@ -269,6 +272,13 @@ int main(int argc, char **argv) { //////////////////////////////////// // Strange action //////////////////////////////////// + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + typedef SchurDiagMooeeOperator LinearOperatorEOFAF; + typedef SchurDiagMooeeOperator LinearOperatorEOFAD; + + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG_EOFA; // DJM: setup for EOFA ratio (Mobius) OneFlavourRationalParams OFRp; @@ -279,10 +289,67 @@ int main(int argc, char **argv) { OFRp.degree = 14; OFRp.precision= 50; - MobiusEOFAFermionR Strange_Op_L(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); - MobiusEOFAFermionR Strange_Op_R(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + + MobiusEOFAFermionR Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionR Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); - ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L, Strange_Op_R, ActionCG, DerivativeCG, OFRp, true); + ConjugateGradient ActionCG(ActionStoppingCondition,MaxCGIterations); + ConjugateGradient DerivativeCG(DerivativeStoppingCondition,MaxCGIterations); +#ifdef MIXED_PRECISION + const int MX_inner = 1000; + // Mixed precision EOFA + LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L); + LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R); + LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF); + LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF); + + MxPCG_EOFA ActionCGL(ActionStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA DerivativeCGL(DerivativeStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA ActionCGR(ActionStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + MxPCG_EOFA DerivativeCGR(DerivativeStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + ExactOneFlavourRatioPseudoFermionAction + EOFA(Strange_Op_L, Strange_Op_R, + ActionCG, + ActionCGL, ActionCGR, + DerivativeCGL, DerivativeCGR, + OFRp, true); +#else + ExactOneFlavourRatioPseudoFermionAction + EOFA(Strange_Op_L, Strange_Op_R, + ActionCG, ActionCG, + DerivativeCG, DerivativeCG, + OFRp, true); +#endif Level1.push_back(&EOFA); //////////////////////////////////// @@ -299,21 +366,20 @@ int main(int argc, char **argv) { } light_num.push_back(pv_mass); - typedef SchurDiagMooeeOperator LinearOperatorF; - typedef SchurDiagMooeeOperator LinearOperatorD; - + ////////////////////////////////////////////////////////////// + // Forced to replicate the MxPCG and DenominatorsF etc.. because + // there is no convenient way to "Clone" physics params from double op + // into single op for any operator pair. + // Same issue prevents using MxPCG in the Heatbath step + ////////////////////////////////////////////////////////////// std::vector Numerators; std::vector Denominators; - std::vector LinOpD; - - std::vector DenominatorsF; - std::vector LinOpF; - - typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; - std::vector MPCG; - std::vector *> Quotients; + std::vector ActionMPCG; + std::vector MPCG; + std::vector DenominatorsF; + std::vector LinOpD; + std::vector LinOpF; for(int h=0;h(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG)); -#else +#ifdef MIXED_PRECISION //////////////////////////////////////////////////////////////////////////// // Mixed precision CG for 2f force //////////////////////////////////////////////////////////////////////////// + DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsF)); - LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h])); LinOpD.push_back(new LinearOperatorD(*Denominators[h])); + LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h])); + MPCG.push_back(new MxPCG(DerivativeStoppingCondition, - 200, + MX_inner, MaxCGIterations, GridPtrF, FrbGridF, *DenominatorsF[h],*Denominators[h], *LinOpF[h], *LinOpD[h]) ); - Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],*MPCG[h],ActionCG)); + + ActionMPCG.push_back(new MxPCG(ActionStoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + *DenominatorsF[h],*Denominators[h], + *LinOpF[h], *LinOpD[h]) ); + + // Heatbath not mixed yet. As inverts numerators not so important as raised mass. + Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],ActionCG)); +#else + //////////////////////////////////////////////////////////////////////////// + // Standard CG for 2f force + //////////////////////////////////////////////////////////////////////////// + Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG)); #endif } diff --git a/HMC/README b/HMC/README index bf97701d..ad1af242 100644 --- a/HMC/README +++ b/HMC/README @@ -2,13 +2,28 @@ TODO: ******************************************************************** -- Currently 10 traj per hour +i) Got mixed precision in 2f and EOFA force and action solves. + But need mixed precision in the heatbath solve. Best for Fermop to have a "clone" method, to + reduce the number of solver and action objects. Needed ideally for the EOFA heatbath. + 15% perhaps + Combine with 2x trajectory length? + +ii) Rational on EOFA HB -- relax order + -- Test the approx as per David email + +Resume / roll.sh + +---------------------------------------------------------------- + +- 16^3 Currently 10 traj per hour - EOFA use a different derivative solver from action solver - EOFA fix Davids hack to the SchurRedBlack guessing *** Reduce precision/tolerance in EOFA with second CG param. (10% speed up) *** Force gradient - reduced precision solve for the gradient (4/3x speedup) + + *** Need a plan for gauge field update for mixed precision in HMC (2x speed up) -- Store the single prec action operator. -- Clone the gauge field from the operator function argument. diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index d1ec1309..522e0cb5 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -370,5 +370,18 @@ int main(int argc,char **argv) tensorConvTest(rng, SpinMatrix); tensorConvTest(rng, SpinVector); + { + HMCparameters HMCparams; + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.StartTrajectory =7; + HMCparams.Trajectories =1000; + HMCparams.NoMetropolisUntil=0; + HMCparams.MD.name =std::string("Force Gradient"); + HMCparams.MD.MDsteps = 10; + HMCparams.MD.trajL = 1.0; + + XmlWriter HMCwr("HMCparameters.xml"); + write(HMCwr,"HMCparameters",HMCparams); + } Grid_finalize(); }