mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Fixed an obscure but reproducible hang in the RHMC caused by the bounds check being activated by a random number that wasn't synchronized over the nodes
HMC now also reports the "L-infinity norm" of the impulse, aka the largest site norm
This commit is contained in:
parent
c2676853ca
commit
daa095c519
@ -232,13 +232,19 @@ NAMESPACE_BEGIN(Grid);
|
||||
multiShiftInverse(Denominator, ApproxNegHalfPowerAction, param.MaxIter, X,Y);
|
||||
|
||||
// Randomly apply rational bounds checks.
|
||||
if ( param.BoundsCheckFreq != 0 && (rand()%param.BoundsCheckFreq)==0 ) {
|
||||
int rcheck = rand();
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&rcheck,sizeof(int)); //make sure all nodes have the same number or you will sporadically hang and spend days trying to find out why (trust me - CK)
|
||||
|
||||
if ( param.BoundsCheckFreq != 0 && (rcheck % param.BoundsCheckFreq)==0 ) {
|
||||
std::cout<<GridLogMessage << action_name() << " compute action: doing bounds check" << std::endl;
|
||||
FermionField gauss(NumOp.FermionRedBlackGrid());
|
||||
gauss = PhiOdd;
|
||||
SchurDifferentiableOperator<Impl> MdagM(DenOp);
|
||||
std::cout<<GridLogMessage << action_name() << " compute action: checking high bounds" << std::endl;
|
||||
HighBoundCheck(MdagM,gauss,param.hi);
|
||||
std::cout<<GridLogMessage << action_name() << " compute action: full approximation" << std::endl;
|
||||
InversePowerBoundsCheck(param.inv_pow,param.MaxIter,param.action_tolerance*100,MdagM,gauss,ApproxNegPowerAction);
|
||||
std::cout<<GridLogMessage << action_name() << " compute action: bounds check complete" << std::endl;
|
||||
}
|
||||
|
||||
// Phidag VdagV^1/(2*inv_pow) MdagM^-1/(2*inv_pow) MdagM^-1/(2*inv_pow) VdagV^1/(2*inv_pow) Phi
|
||||
|
@ -124,8 +124,14 @@ protected:
|
||||
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
|
||||
force = FieldImplementation::projectForce(force); // Ta for gauge fields
|
||||
double end_force = usecond();
|
||||
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites());
|
||||
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << " Time step: " << ep << " Impulse average: " << force_abs * ep * HMC_MOMENTUM_DENOMINATOR << std::endl;
|
||||
|
||||
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
|
||||
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
|
||||
|
||||
Real max_force_abs = std::sqrt(maxLocalNorm2(force));
|
||||
Real max_impulse_abs = max_force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
|
||||
|
||||
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << " Max force: " << max_force_abs << " Time step: " << ep << " Impulse average: " << impulse_abs << " Max impulse: " << max_impulse_abs << std::endl;
|
||||
Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;;
|
||||
double end_full = usecond();
|
||||
double time_full = (end_full - start_full) / 1e3;
|
||||
|
@ -284,6 +284,7 @@ int main(int argc, char **argv) {
|
||||
typedef typename FermionActionF::FermionField FermionFieldF;
|
||||
|
||||
typedef GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicyD,FermionImplPolicyF> MixedPrecRHMC;
|
||||
typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicyD> DoublePrecRHMC;
|
||||
|
||||
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
||||
IntegratorParameters MD;
|
||||
@ -380,8 +381,10 @@ int main(int argc, char **argv) {
|
||||
rat_act_params_l.precision= 60;
|
||||
rat_act_params_l.MaxIter = 10000;
|
||||
user_params.rat_quo_l.Export(rat_act_params_l);
|
||||
std::cout << GridLogMessage << " Light quark bounds check every " << rat_act_params_l.BoundsCheckFreq << " trajectories (avg)" << std::endl;
|
||||
|
||||
MixedPrecRHMC Quotient_l(Denominator_lD, Numerator_lD, Denominator_lF, Numerator_lF, rat_act_params_l, user_params.rat_quo_l.reliable_update_freq);
|
||||
//DoublePrecRHMC Quotient_l(Denominator_lD, Numerator_lD, rat_act_params_l);
|
||||
Level1.push_back(&Quotient_l);
|
||||
|
||||
|
||||
@ -399,8 +402,10 @@ int main(int argc, char **argv) {
|
||||
rat_act_params_s.precision= 60;
|
||||
rat_act_params_s.MaxIter = 10000;
|
||||
user_params.rat_quo_s.Export(rat_act_params_s);
|
||||
std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_l.BoundsCheckFreq << " trajectories (avg)" << std::endl;
|
||||
|
||||
MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq);
|
||||
//DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s);
|
||||
Level1.push_back(&Quotient_s);
|
||||
|
||||
|
||||
@ -435,8 +440,8 @@ int main(int argc, char **argv) {
|
||||
TheHMC.initializeGaugeFieldAndRNGs(Ud);
|
||||
if(eigenrange_l) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_l, FGridD, FrbGridD, Ud, Numerator_lD, TheHMC.Resources.GetParallelRNG());
|
||||
if(eigenrange_s) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG());
|
||||
if(tune_rhmc_l) checkRHMC<FermionActionD, FermionFieldD, MixedPrecRHMC>(FGridD, FrbGridD, Ud, Numerator_lD, Denominator_lD, Quotient_l, TheHMC.Resources.GetParallelRNG(), 2, "light");
|
||||
if(tune_rhmc_s) checkRHMC<FermionActionD, FermionFieldD, MixedPrecRHMC>(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange");
|
||||
if(tune_rhmc_l) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_l)>(FGridD, FrbGridD, Ud, Numerator_lD, Denominator_lD, Quotient_l, TheHMC.Resources.GetParallelRNG(), 2, "light");
|
||||
if(tune_rhmc_s) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_s)>(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange");
|
||||
|
||||
std::cout << GridLogMessage << " Done" << std::endl;
|
||||
Grid_finalize();
|
||||
|
Loading…
Reference in New Issue
Block a user