diff --git a/Grid/qcd/action/ActionBase.h b/Grid/qcd/action/ActionBase.h index 86457400..d34702c1 100644 --- a/Grid/qcd/action/ActionBase.h +++ b/Grid/qcd/action/ActionBase.h @@ -44,7 +44,7 @@ public: ConfigurationBase() {} virtual ~ConfigurationBase() {} virtual void set_Field(Field& U) =0; - virtual void smeared_force(Field&) const = 0; + virtual void smeared_force(Field&) = 0; virtual Field& get_SmearedU() =0; virtual Field &get_U(bool smeared = false) = 0; }; diff --git a/Grid/qcd/hmc/HMC.h b/Grid/qcd/hmc/HMC.h index d4739fb0..c56ae6ad 100644 --- a/Grid/qcd/hmc/HMC.h +++ b/Grid/qcd/hmc/HMC.h @@ -283,12 +283,13 @@ public: std::cout << GridLogHMC << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl; TheIntegrator.print_timer(); - + + TheIntegrator.Smearer.set_Field(Ucur); for (int obs = 0; obs < Observables.size(); obs++) { std::cout << GridLogDebug << "Observables # " << obs << std::endl; std::cout << GridLogDebug << "Observables total " << Observables.size() << std::endl; std::cout << GridLogDebug << "Observables pointer " << Observables[obs] << std::endl; - Observables[obs]->TrajectoryComplete(traj + 1, Ucur, sRNG, pRNG); + Observables[obs]->TrajectoryComplete(traj + 1, TheIntegrator.Smearer, sRNG, pRNG); } std::cout << GridLogHMC << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl; } diff --git a/Grid/qcd/hmc/checkpointers/BaseCheckpointer.h b/Grid/qcd/hmc/checkpointers/BaseCheckpointer.h index c09fdeeb..72f5e39b 100644 --- a/Grid/qcd/hmc/checkpointers/BaseCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/BaseCheckpointer.h @@ -35,13 +35,16 @@ class CheckpointerParameters : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(CheckpointerParameters, std::string, config_prefix, + std::string, smeared_prefix, std::string, rng_prefix, int, saveInterval, + bool, saveSmeared, std::string, format, ); - CheckpointerParameters(std::string cf = "cfg", std::string rn = "rng", + CheckpointerParameters(std::string cf = "cfg", std::string sf="cfg_smr" , std::string rn = "rng", int savemodulo = 1, const std::string &f = "IEEE64BIG") : config_prefix(cf), + smeared_prefix(sf), rng_prefix(rn), saveInterval(savemodulo), format(f){}; @@ -61,13 +64,21 @@ template class BaseHmcCheckpointer : public HmcObservable { public: void build_filenames(int traj, CheckpointerParameters &Params, - std::string &conf_file, std::string &rng_file) { + std::string &conf_file, + std::string &smear_file, + std::string &rng_file) { { std::ostringstream os; os << Params.rng_prefix << "." << traj; rng_file = os.str(); } + { + std::ostringstream os; + os << Params.smeared_prefix << "." << traj; + smear_file = os.str(); + } + { std::ostringstream os; os << Params.config_prefix << "." << traj; @@ -84,6 +95,11 @@ public: } virtual void initialize(const CheckpointerParameters &Params) = 0; + virtual void TrajectoryComplete(int traj, + typename Impl::Field &U, + GridSerialRNG &sRNG, + GridParallelRNG &pRNG) { assert(0); } ; // HMC should pass the smart config with smeared and unsmeared + virtual void CheckpointRestore(int traj, typename Impl::Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) = 0; diff --git a/Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h b/Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h index ef9e6194..a91ec67f 100644 --- a/Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h @@ -61,11 +61,14 @@ public: fout.close(); } - void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { + void TrajectoryComplete(int traj, + ConfigurationBase &SmartConfig, + GridSerialRNG &sRNG, GridParallelRNG &pRNG) + { if ((traj % Params.saveInterval) == 0) { - std::string config, rng; - this->build_filenames(traj, Params, config, rng); + std::string config, rng, smr; + this->build_filenames(traj, Params, config, smr, rng); uint32_t nersc_csum; uint32_t scidac_csuma; @@ -74,9 +77,15 @@ public: BinarySimpleUnmunger munge; truncate(rng); BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); - truncate(config); + std::cout << GridLogMessage << "Written Binary RNG " << rng + << " checksum " << std::hex + << nersc_csum <<"/" + << scidac_csuma <<"/" + << scidac_csumb + << std::dec << std::endl; - BinaryIO::writeLatticeObject(U, config, munge, 0, Params.format, + truncate(config); + BinaryIO::writeLatticeObject(SmartConfig.get_U(false), config, munge, 0, Params.format, nersc_csum,scidac_csuma,scidac_csumb); std::cout << GridLogMessage << "Written Binary Configuration " << config @@ -85,6 +94,18 @@ public: << scidac_csuma <<"/" << scidac_csumb << std::dec << std::endl; + + if ( Params.saveSmeared ) { + truncate(smr); + BinaryIO::writeLatticeObject(SmartConfig.get_U(true), smr, munge, 0, Params.format, + nersc_csum,scidac_csuma,scidac_csumb); + std::cout << GridLogMessage << "Written Binary Smeared Configuration " << smr + << " checksum " << std::hex + << nersc_csum <<"/" + << scidac_csuma <<"/" + << scidac_csumb + << std::dec << std::endl; + } } }; diff --git a/Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h b/Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h index 1bb8aa1a..12d391ef 100644 --- a/Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h @@ -69,17 +69,27 @@ public: } } - void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG, + void TrajectoryComplete(int traj, + ConfigurationBase &SmartConfig, + GridSerialRNG &sRNG, GridParallelRNG &pRNG) { if ((traj % Params.saveInterval) == 0) { - std::string config, rng; + std::string config, rng, smr; this->build_filenames(traj, Params, config, rng); - GridBase *grid = U.Grid(); + GridBase *grid = SmartConfig.get_U(false).Grid(); uint32_t nersc_csum,scidac_csuma,scidac_csumb; BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); + std::cout << GridLogMessage << "Written BINARY RNG " << rng + << " checksum " << std::hex + << nersc_csum<<"/" + << scidac_csuma<<"/" + << scidac_csumb + << std::dec << std::endl; + + IldgWriter _IldgWriter(grid->IsBoss()); _IldgWriter.open(config); - _IldgWriter.writeConfiguration(U, traj, config, config); + _IldgWriter.writeConfiguration(SmartConfig.get_U(false), traj, config, config); _IldgWriter.close(); std::cout << GridLogMessage << "Written ILDG Configuration on " << config @@ -88,6 +98,21 @@ public: << scidac_csuma<<"/" << scidac_csumb << std::dec << std::endl; + + if ( Params.saveSmeared ) { + IldgWriter _IldgWriter(grid->IsBoss()); + _IldgWriter.open(smr); + _IldgWriter.writeConfiguration(SmartConfig.get_U(true), traj, config, config); + _IldgWriter.close(); + + std::cout << GridLogMessage << "Written ILDG Configuration on " << smr + << " checksum " << std::hex + << nersc_csum<<"/" + << scidac_csuma<<"/" + << scidac_csumb + << std::dec << std::endl; + } + } }; diff --git a/Grid/qcd/hmc/checkpointers/NerscCheckpointer.h b/Grid/qcd/hmc/checkpointers/NerscCheckpointer.h index 4534e4c4..79424300 100644 --- a/Grid/qcd/hmc/checkpointers/NerscCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/NerscCheckpointer.h @@ -52,23 +52,29 @@ public: Params.format = "IEEE64BIG"; // fixed, overwrite any other choice } - void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG, - GridParallelRNG &pRNG) { + virtual void TrajectoryComplete(int traj, + ConfigurationBase &SmartConfig, + GridSerialRNG &sRNG, + GridParallelRNG &pRNG) + { if ((traj % Params.saveInterval) == 0) { - std::string config, rng; - this->build_filenames(traj, Params, config, rng); - + std::string config, rng, smr; + this->build_filenames(traj, Params, config, smr, rng); + int precision32 = 1; int tworow = 0; NerscIO::writeRNGState(sRNG, pRNG, rng); - NerscIO::writeConfiguration(U, config, tworow, precision32); + NerscIO::writeConfiguration(SmartConfig.get_U(false), config, tworow, precision32); + if ( Params.saveSmeared ) { + NerscIO::writeConfiguration(SmartConfig.get_U(true), smr, tworow, precision32); + } } }; void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { - std::string config, rng; - this->build_filenames(traj, Params, config, rng); + std::string config, rng, smr; + this->build_filenames(traj, Params, config, smr, rng ); this->check_filename(rng); this->check_filename(config); diff --git a/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h b/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h index 986585ea..259a44b0 100644 --- a/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h @@ -70,19 +70,37 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer { } } - void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, + void TrajectoryComplete(int traj, + ConfigurationBase &SmartConfig, + GridSerialRNG &sRNG, GridParallelRNG &pRNG) { if ((traj % Params.saveInterval) == 0) { - std::string config, rng; - this->build_filenames(traj, Params, config, rng); - GridBase *grid = U.Grid(); + std::string config, rng,smr; + this->build_filenames(traj, Params, config, smr, rng); + GridBase *grid = SmartConfig.get_U(false).Grid(); uint32_t nersc_csum,scidac_csuma,scidac_csumb; BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); - ScidacWriter _ScidacWriter(grid->IsBoss()); - _ScidacWriter.open(config); - _ScidacWriter.writeScidacFieldRecord(U, MData); - _ScidacWriter.close(); + std::cout << GridLogMessage << "Written Binary RNG " << rng + << " checksum " << std::hex + << nersc_csum <<"/" + << scidac_csuma <<"/" + << scidac_csumb + << std::dec << std::endl; + + { + ScidacWriter _ScidacWriter(grid->IsBoss()); + _ScidacWriter.open(config); + _ScidacWriter.writeScidacFieldRecord(SmartConfig.get_U(false), MData); + _ScidacWriter.close(); + } + + if ( Params.saveSmeared ) { + ScidacWriter _ScidacWriter(grid->IsBoss()); + _ScidacWriter.open(smr); + _ScidacWriter.writeScidacFieldRecord(SmartConfig.get_U(true), MData); + _ScidacWriter.close(); + } std::cout << GridLogMessage << "Written Scidac Configuration on " << config << std::endl; } }; diff --git a/Grid/qcd/hmc/integrators/Integrator.h b/Grid/qcd/hmc/integrators/Integrator.h index 62e7ad44..4dd5a634 100644 --- a/Grid/qcd/hmc/integrators/Integrator.h +++ b/Grid/qcd/hmc/integrators/Integrator.h @@ -66,6 +66,7 @@ public: template class Integrator { protected: +public: typedef FieldImplementation_ FieldImplementation; typedef typename FieldImplementation::Field MomentaField; //for readability typedef typename FieldImplementation::Field Field; @@ -96,7 +97,6 @@ protected: { t_P[level] += ep; update_P(P, U, level, ep); - std::cout << GridLogIntegrator << "[" << level << "] P " << " dt " << ep << " : t_P " << t_P[level] << std::endl; } @@ -130,28 +130,20 @@ protected: Field force(U.Grid()); conformable(U.Grid(), Mom.Grid()); - Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared); double start_force = usecond(); - std::cout << GridLogMessage << "AuditForce["<deriv_timer_start(); - as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta + as[level].actions.at(a)->deriv(Smearer, force); // deriv should NOT include Ta as[level].actions.at(a)->deriv_timer_stop(); - std::cout << GridLogMessage << "AuditForce["<is_smeared << std::endl; auto name = as[level].actions.at(a)->action_name(); - if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force); force = FieldImplementation::projectForce(force); // Ta for gauge fields double end_force = usecond(); - - // DumpSliceNorm("force ",force,Nd-1); + MomFilter->applyFilter(force); + std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x]) Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR; @@ -377,14 +369,9 @@ public: auto name = as[level].actions.at(actionID)->action_name(); std::cout << GridLogMessage << "refresh [" << level << "][" << actionID << "] "<is_smeared); - - std::cout << GridLogMessage << "AuditRefresh["<refresh_timer_start(); - as[level].actions.at(actionID)->refresh(Us, sRNG, pRNG); + as[level].actions.at(actionID)->refresh(Smearer, sRNG, pRNG); as[level].actions.at(actionID)->refresh_timer_stop(); - std::cout << GridLogMessage << "AuditRefresh["<is_smeared); std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl; as[level].actions.at(actionID)->S_timer_start(); - Hterm = as[level].actions.at(actionID)->S(Us); + Hterm = as[level].actions.at(actionID)->S(Smearer); as[level].actions.at(actionID)->S_timer_stop(); std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl; H += Hterm; @@ -469,12 +455,11 @@ public: for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { // get gauge field from the SmearingPolicy and // based on the boolean is_smeared in actionID - Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared); std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl; - as[level].actions.at(actionID)->S_timer_start(); - Hterm = as[level].actions.at(actionID)->Sinitial(Us); - as[level].actions.at(actionID)->S_timer_stop(); + as[level].actions.at(actionID)->S_timer_start(); + Hterm = as[level].actions.at(actionID)->S(Smearer); + as[level].actions.at(actionID)->S_timer_stop(); std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl; H += Hterm; diff --git a/Grid/qcd/observables/hmc_observable.h b/Grid/qcd/observables/hmc_observable.h index c28c376d..277ded28 100644 --- a/Grid/qcd/observables/hmc_observable.h +++ b/Grid/qcd/observables/hmc_observable.h @@ -34,6 +34,13 @@ NAMESPACE_BEGIN(Grid); template class HmcObservable { public: + virtual void TrajectoryComplete(int traj, + ConfigurationBase &SmartConfig, + GridSerialRNG &sRNG, + GridParallelRNG &pRNG) + { + TrajectoryComplete(traj,SmartConfig.get_U(false),sRNG,pRNG); // Unsmeared observable + }; virtual void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, diff --git a/Grid/qcd/observables/plaquette.h b/Grid/qcd/observables/plaquette.h index f038e455..9f67c7cb 100644 --- a/Grid/qcd/observables/plaquette.h +++ b/Grid/qcd/observables/plaquette.h @@ -42,6 +42,18 @@ public: // necessary for HmcObservable compatibility typedef typename Impl::Field Field; + virtual void TrajectoryComplete(int traj, + ConfigurationBase &SmartConfig, + GridSerialRNG &sRNG, + GridParallelRNG &pRNG) + { + std::cout << GridLogMessage << "+++++++++++++++++++"< 0) { @@ -272,14 +272,16 @@ public: } double end = usecond(); double time = (end - start)/ 1e3; - std::cout << GridLogMessage << "Smearing force in " << time << " ms" << std::endl; + std::cout << GridLogMessage << " GaugeConfiguration: Smeared Force chain rule took " << time << " ms" << std::endl; } // if smearingLevels = 0 do nothing + SigmaTilde=Gimpl::projectForce(SigmaTilde); // Ta + } //==================================================================== - GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; } + virtual GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; } - GaugeField &get_U(bool smeared = false) + virtual GaugeField &get_U(bool smeared = false) { // get the config, thin links by default if (smeared) diff --git a/Grid/qcd/smearing/GaugeConfigurationMasked.h b/Grid/qcd/smearing/GaugeConfigurationMasked.h index 23c8dba9..4309c470 100644 --- a/Grid/qcd/smearing/GaugeConfigurationMasked.h +++ b/Grid/qcd/smearing/GaugeConfigurationMasked.h @@ -131,6 +131,7 @@ public: AdjMatrixField X(grid); Complex ci(0,1); + RealD t0 = usecond(); Ident = ComplexD(1.0); for(int d=0;dStoutSmearing->BaseSmear(C, U); Cmu = peekLorentz(C, mu); @@ -169,7 +172,10 @@ public: ////////////////////////////////////////////////////////////////// // Ta so Z lives in Lie algabra Zx = Ta(Cmu * adj(Umu[mu])); + time+=usecond(); + std::cout << GridLogMessage << "Z took "< dJdX; dJdX.resize(8,grid); AdjMatrixField tbXn(grid); AdjMatrixField sumXtbX(grid); @@ -220,12 +232,17 @@ public: } dJdX[b] = -dt2; } + time+=usecond(); + std::cout << GridLogMessage << "dJx took "<& Stout,bool domask=false) + SmearedConfigurationMasked(GridCartesian* _UGrid, unsigned int Nsmear, Smear_Stout& Stout) : SmearedConfiguration(_UGrid, Nsmear,Stout) { - if(domask) assert(Nsmear%(2*Nd)==0); // Or multiply by 8?? + assert(Nsmear%(2*Nd)==0); // Or multiply by 8?? // was resized in base class assert(this->SmearedSet.size()==Nsmear); @@ -712,26 +751,20 @@ public: for (unsigned int i = 0; i < this->smearingLevels; ++i) { masks.push_back(*(new LatticeLorentzComplex(_UGrid))); - if (domask) { - int mu= (i/2) %Nd; - int cb= (i%2); - LatticeComplex tmpcb(UrbGrid); + int mu= (i/2) %Nd; + int cb= (i%2); + LatticeComplex tmpcb(UrbGrid); - masks[i]=Zero(); - //////////////////// - // Setup the mask - //////////////////// - tmp = Zero(); - pickCheckerboard(cb,tmpcb,one); - setCheckerboard(tmp,tmpcb); - PokeIndex(masks[i],tmp, mu); + masks[i]=Zero(); + //////////////////// + // Setup the mask + //////////////////// + tmp = Zero(); + pickCheckerboard(cb,tmpcb,one); + setCheckerboard(tmp,tmpcb); + PokeIndex(masks[i],tmp, mu); - } else { - for(int mu=0;mu(masks[i],one, mu); - } - } } delete UrbGrid; } @@ -764,10 +797,14 @@ public: tmp_mu = peekLorentz(*this->ThinLinks, mu) * peekLorentz(force, mu); pokeLorentz(SigmaTilde, tmp_mu, mu); } + + double end = usecond(); double time = (end - start)/ 1e3; - std::cout << GridLogMessage << " GaugeConfigurationMasked: Smeared Force chain rule took " << time << " ms" << std::endl; + std::cout << GridLogMessage << " GaugeConfigurationMasked: Smeared Force chain rule took " << time << " ms" << std::endl; + } // if smearingLevels = 0 do nothing + SigmaTilde=Gimpl::projectForce(SigmaTilde); // Ta } }; diff --git a/Grid/qcd/smearing/JacobianAction.h b/Grid/qcd/smearing/JacobianAction.h index 2ab2c083..6db59bec 100644 --- a/Grid/qcd/smearing/JacobianAction.h +++ b/Grid/qcd/smearing/JacobianAction.h @@ -2,15 +2,11 @@ Grid physics library, www.github.com/paboyle/Grid -Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h +Source file: ./lib/qcd/action/gauge/JacobianAction.h Copyright (C) 2015 -Author: Azusa Yamaguchi Author: Peter Boyle -Author: neo -Author: paboyle -Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/Grid/qcd/utils/SUn.h b/Grid/qcd/utils/SUn.h index aaabc4d4..00cd7f40 100644 --- a/Grid/qcd/utils/SUn.h +++ b/Grid/qcd/utils/SUn.h @@ -34,6 +34,59 @@ directory NAMESPACE_BEGIN(Grid); +template +Lattice > > > Determinant(const Lattice > > > &Umu) +{ + GridBase *grid=Umu.Grid(); + auto lvol = grid->lSites(); + Lattice > > > ret(grid); + + autoView(Umu_v,Umu,CpuRead); + autoView(ret_v,ret,CpuWrite); + thread_for(site,lvol,{ + Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N); + Coordinate lcoor; + grid->LocalIndexToLocalCoor(site, lcoor); + iScalar > > Us; + peekLocalSite(Us, Umu_v, lcoor); + for(int i=0;i +static void ProjectSUn(Lattice > > > &Umu) +{ + Umu = ProjectOnGroup(Umu); + auto det = Determinant(Umu); + + det = conjugate(det); + + for(int i=0;i(Umu,N-1,i); + element = element * det; + PokeIndex(Umu,element,Nc-1,i); + } +} +template +static void ProjectSUn(Lattice >,Nd> > &U) +{ + GridBase *grid=U.Grid(); + // Reunitarise + for(int mu=0;mu(U,mu); + Umu = ProjectOnGroup(Umu); + ProjectSUn(Umu); + PokeIndex(U,Umu,mu); + } +} + template class SU { public: @@ -741,8 +794,14 @@ public: typedef Lattice LatticeMatrixType; LatticeMatrixType Umu(out.Grid()); + LatticeMatrixType tmp(out.Grid()); for (int mu = 0; mu < Nd; mu++) { - LieRandomize(pRNG, Umu, 1.0); + // LieRandomize(pRNG, Umu, 1.0); + // PokeIndex(out, Umu, mu); + gaussian(pRNG,Umu); + tmp = Ta(Umu); + taExp(tmp,Umu); + ProjectSUn(Umu); PokeIndex(out, Umu, mu); } } @@ -798,30 +857,6 @@ public: } }; -template -LatticeComplexD Determinant(const Lattice > > > &Umu) -{ - GridBase *grid=Umu.Grid(); - auto lvol = grid->lSites(); - LatticeComplexD ret(grid); - - autoView(Umu_v,Umu,CpuRead); - autoView(ret_v,ret,CpuWrite); - thread_for(site,lvol,{ - Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N); - Coordinate lcoor; - grid->LocalIndexToLocalCoor(site, lcoor); - iScalar > > Us; - peekLocalSite(Us, Umu_v, lcoor); - for(int i=0;i Lattice > > > Inverse(const Lattice > > > &Umu) { @@ -851,32 +886,6 @@ Lattice > > > Inverse(const Lattice -static void ProjectSUn(Lattice > > > &Umu) -{ - Umu = ProjectOnGroup(Umu); - auto det = Determinant(Umu); - - det = conjugate(det); - - for(int i=0;i(Umu,N-1,i); - element = element * det; - PokeIndex(Umu,element,Nc-1,i); - } -} -template -static void ProjectSUn(Lattice >,Nd> > &U) -{ - GridBase *grid=U.Grid(); - // Reunitarise - for(int mu=0;mu(U,mu); - Umu = ProjectOnGroup(Umu); - ProjectSUn(Umu); - PokeIndex(U,Umu,mu); - } -} // Explicit specialisation for SU(3). // Explicit specialisation for SU(3). static void diff --git a/systems/Sunspot/benchmarks/bench.pbs b/systems/Sunspot/benchmarks/bench.pbs new file mode 100644 index 00000000..f1a26aa4 --- /dev/null +++ b/systems/Sunspot/benchmarks/bench.pbs @@ -0,0 +1,46 @@ +#!/bin/bash + +#PBS -l select=1:system=sunspot,place=scatter +#PBS -A LatticeQCD_aesp_CNDA +#PBS -l walltime=01:00:00 +#PBS -N dwf +#PBS -k doe + +HDIR=/home/paboyle/ +module use /soft/testing/modulefiles/ +module load intel-UMD23.05.25593.11/23.05.25593.11 +module load tools/pti-gpu +export LD_LIBRARY_PATH=$HDIR/tools/lib64:$LD_LIBRARY_PATH +export PATH=$HDIR/tools/bin:$PATH + +export TZ='/usr/share/zoneinfo/US/Central' +export OMP_PROC_BIND=spread +export OMP_NUM_THREADS=3 +unset OMP_PLACES + +cd $PBS_O_WORKDIR + +qsub jobscript.pbs + +echo Jobid: $PBS_JOBID +echo Running on host `hostname` +echo Running on nodes `cat $PBS_NODEFILE` + +echo NODES +cat $PBS_NODEFILE +NNODES=`wc -l < $PBS_NODEFILE` +NRANKS=12 # Number of MPI ranks per node +NDEPTH=4 # Number of hardware threads per rank, spacing between MPI ranks on a node +NTHREADS=$OMP_NUM_THREADS # Number of OMP threads per rank, given to OMP_NUM_THREADS + +NTOTRANKS=$(( NNODES * NRANKS )) + +echo "NUM_NODES=${NNODES} TOTAL_RANKS=${NTOTRANKS} RANKS_PER_NODE=${NRANKS} THREADS_PER_RANK=${OMP_NUM_THREADS}" +echo "OMP_PROC_BIND=$OMP_PROC_BIND OMP_PLACES=$OMP_PLACES" + + +CMD="mpiexec -np ${NTOTRANKS} -ppn ${NRANKS} -d ${NDEPTH} --cpu-bind=depth -envall \ + ./gpu_tile_compact.sh \ + ./Benchmark_dwf_fp32 --mpi 1.1.2.6 --grid 16.32.64.192 --comms-overlap \ + --shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32" + diff --git a/systems/Sunspot/benchmarks/gpu_tile_compact.sh b/systems/Sunspot/benchmarks/gpu_tile_compact.sh new file mode 100755 index 00000000..ec532b1b --- /dev/null +++ b/systems/Sunspot/benchmarks/gpu_tile_compact.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +display_help() { + echo " Will map gpu tile to rank in compact and then round-robin fashion" + echo " Usage (only work for one node of ATS/PVC):" + echo " mpiexec --np N gpu_tile_compact.sh ./a.out" + echo + echo " Example 3 GPU of 2 Tiles with 7 Ranks:" + echo " 0 Rank 0.0" + echo " 1 Rank 0.1" + echo " 2 Rank 1.0" + echo " 3 Rank 1.1" + echo " 4 Rank 2.0" + echo " 5 Rank 2.1" + echo " 6 Rank 0.0" + echo + echo " Hacked together by apl@anl.gov, please contact if bug found" + exit 1 +} + +#This give the exact GPU count i915 knows about and I use udev to only enumerate the devices with physical presence. +#works? num_gpu=$(/usr/bin/udevadm info /sys/module/i915/drivers/pci\:i915/* |& grep -v Unknown | grep -c "P: /devices") +num_gpu=6 +num_tile=2 + +if [ "$#" -eq 0 ] || [ "$1" == "--help" ] || [ "$1" == "-h" ] || [ "$num_gpu" = 0 ]; then + display_help +fi + +gpu_id=$(( (PALS_LOCAL_RANKID / num_tile ) % num_gpu )) +tile_id=$((PALS_LOCAL_RANKID % num_tile)) + +unset EnableWalkerPartition +export EnableImplicitScaling=0 +export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1 +export ZE_AFFINITY_MASK=$gpu_id.$tile_id +export ONEAPI_DEVICE_FILTER=gpu,level_zero +export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0 +export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 +export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2 +export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1 +#export SYCL_PI_LEVEL_ZERO_USM_RESIDENT=1 + +echo "rank $PALS_RANKID ; local rank $PALS_LOCAL_RANKID ; ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" + +if [ $PALS_LOCAL_RANKID = 0 ] +then + onetrace --chrome-device-timeline "$@" +# "$@" +else +"$@" +fi diff --git a/systems/Sunspot/config-command b/systems/Sunspot/config-command new file mode 100644 index 00000000..7adf7117 --- /dev/null +++ b/systems/Sunspot/config-command @@ -0,0 +1,16 @@ +TOOLS=$HOME/tools +../../configure \ + --enable-simd=GPU \ + --enable-gen-simd-width=64 \ + --enable-comms=mpi-auto \ + --enable-accelerator-cshift \ + --disable-gparity \ + --disable-fermion-reps \ + --enable-shm=nvlink \ + --enable-accelerator=sycl \ + --enable-unified=no \ + MPICXX=mpicxx \ + CXX=icpx \ + LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -lapmidg -L$TOOLS/lib64/" \ + CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -I$TOOLS/include" + diff --git a/systems/mac-arm/config-command-mpi b/systems/mac-arm/config-command-mpi index a140a2c5..84a656f4 100644 --- a/systems/mac-arm/config-command-mpi +++ b/systems/mac-arm/config-command-mpi @@ -1,4 +1,4 @@ BREW=/opt/local/ -MPICXX=mpicxx CXX=c++-12 ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity +MPICXX=mpicxx CXX=c++-12 ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity --disable-debug diff --git a/tests/forces/Test_fthmc.cc b/tests/forces/Test_fthmc.cc index 0a64cdb9..50f60f99 100644 --- a/tests/forces/Test_fthmc.cc +++ b/tests/forces/Test_fthmc.cc @@ -32,9 +32,12 @@ Author: Peter Boyle using namespace std; using namespace Grid; +typedef MobiusFermionD FermionAction; +typedef WilsonImplD FimplD; +typedef WilsonImplD FermionImplPolicy; template -void ForceTest(Action &action,SmearedConfigurationMasked & smU,MomentumFilterBase &Filter) +void ForceTest(Action &action,ConfigurationBase & smU,MomentumFilterBase &Filter) { LatticeGaugeField U = smU.get_U(false); // unsmeared config GridBase *UGrid = U.Grid(); @@ -51,14 +54,14 @@ void ForceTest(Action &action,SmearedConfigurationMasked &action,SmearedConfigurationMasked &action,SmearedConfigurationMasked(UdSdU,mu); Pmu= PeekIndex(P,mu); - dS = dS - trace(Pmu*UdSdUmu)*eps*2.0*2.0; + dS = dS - trace(Pmu*UdSdUmu)*eps*2.0*HMC_MOMENTUM_DENOMINATOR; } ComplexD dSpred = sum(dS); RealD diff = S2-S1-dSpred.real(); @@ -125,8 +128,11 @@ int main (int argc, char ** argv) const int Ls=12; const int Nt = latt_size[3]; GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + ///////////////////// Gauge Field and Gauge Forces //////////////////////////// LatticeGaugeField U(UGrid); @@ -141,17 +147,40 @@ int main (int argc, char ** argv) #endif - RealD beta=6.0; - WilsonGaugeActionR PlaqAction(beta); - IwasakiGaugeActionR RectAction(beta); + WilsonGaugeActionR PlaqAction(6.0); + IwasakiGaugeActionR RectAction(2.13); + PlaqAction.is_smeared = true; + RectAction.is_smeared = true; + //////////////////////////////////// + // Fermion Action + //////////////////////////////////// + RealD mass=0.01; + RealD pvmass=1.0; + RealD M5=1.8; + RealD b=1.5; + RealD c=0.5; + + // Double versions + std::vector boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + FermionAction DdwfPeriodic(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,Params); + FermionAction PVPeriodic (U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,pvmass,M5,b,c,Params); + double StoppingCondition = 1.0e-8; + double MaxCGIterations = 50000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + + TwoFlavourRatioPseudoFermionAction Nf2(PVPeriodic, DdwfPeriodic,CG,CG); + Nf2.is_smeared = true; + //////////////////////////////////////////////// // Plaquette only FTHMC smearer //////////////////////////////////////////////// double rho = 0.1; Smear_Stout Smearer(rho); - SmearedConfigurationMasked SmartConfig(UGrid,2*Nd,Smearer,true); + SmearedConfigurationMasked SmartConfig(UGrid,2*Nd,Smearer); + SmearedConfiguration StoutConfig(UGrid,1,Smearer); JacobianAction Jacobian(&SmartConfig); @@ -159,12 +188,32 @@ int main (int argc, char ** argv) // Run some tests //////////////////////////////////////////////// MomentumFilterNone FilterNone; + + std::cout << " ********* FIELD TRANSFORM SMEARING ***** "<(PlaqAction,SmartConfig,FilterNone); + SmartConfig.set_Field(U); ForceTest(RectAction,SmartConfig,FilterNone); + SmartConfig.set_Field(U); ForceTest(Jacobian,SmartConfig,FilterNone); + SmartConfig.set_Field(U); + ForceTest(Nf2,SmartConfig,FilterNone); + + std::cout << " ********* STOUT SMEARING ***** "<(PlaqAction,StoutConfig,FilterNone); + + StoutConfig.set_Field(U); + ForceTest(RectAction,StoutConfig,FilterNone); + + StoutConfig.set_Field(U); + ForceTest(Nf2,StoutConfig,FilterNone); + + Grid_finalize(); }