From 6e4a06e180f7500df13ceea362b71294b8da74ff Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 20 Oct 2016 15:04:00 +0100 Subject: [PATCH 001/177] qed-fvol: initial commit --- Makefile.am | 2 +- configure.ac | 2 ++ programs/Makefile.am | 1 + programs/qed-fvol/Global.cc | 11 +++++++++ programs/qed-fvol/Global.hpp | 42 +++++++++++++++++++++++++++++++++++ programs/qed-fvol/Makefile.am | 9 ++++++++ programs/qed-fvol/qed-fvol.cc | 36 ++++++++++++++++++++++++++++++ 7 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 programs/Makefile.am create mode 100644 programs/qed-fvol/Global.cc create mode 100644 programs/qed-fvol/Global.hpp create mode 100644 programs/qed-fvol/Makefile.am create mode 100644 programs/qed-fvol/qed-fvol.cc diff --git a/Makefile.am b/Makefile.am index 90c5cd71..8cc860a9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,5 @@ # additional include paths necessary to compile the C++ library -SUBDIRS = lib benchmarks tests +SUBDIRS = lib benchmarks tests programs AM_CXXFLAGS += -I$(top_builddir)/include ACLOCAL_AMFLAGS = -I m4 diff --git a/configure.ac b/configure.ac index 7bcdc49f..81ced467 100644 --- a/configure.ac +++ b/configure.ac @@ -326,6 +326,8 @@ AC_CONFIG_FILES(tests/hmc/Makefile) AC_CONFIG_FILES(tests/solver/Makefile) AC_CONFIG_FILES(tests/qdpxx/Makefile) AC_CONFIG_FILES(benchmarks/Makefile) +AC_CONFIG_FILES(programs/Makefile) +AC_CONFIG_FILES(programs/qed-fvol/Makefile) AC_OUTPUT echo " diff --git a/programs/Makefile.am b/programs/Makefile.am new file mode 100644 index 00000000..ff7f6584 --- /dev/null +++ b/programs/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = qed-fvol diff --git a/programs/qed-fvol/Global.cc b/programs/qed-fvol/Global.cc new file mode 100644 index 00000000..57ed97cc --- /dev/null +++ b/programs/qed-fvol/Global.cc @@ -0,0 +1,11 @@ +#include + +using namespace Grid; +using namespace QCD; +using namespace QedFVol; + +QedFVolLogger QedFVol::QedFVolLogError(1,"Error"); +QedFVolLogger QedFVol::QedFVolLogWarning(1,"Warning"); +QedFVolLogger QedFVol::QedFVolLogMessage(1,"Message"); +QedFVolLogger QedFVol::QedFVolLogIterative(1,"Iterative"); +QedFVolLogger QedFVol::QedFVolLogDebug(1,"Debug"); diff --git a/programs/qed-fvol/Global.hpp b/programs/qed-fvol/Global.hpp new file mode 100644 index 00000000..7f07200d --- /dev/null +++ b/programs/qed-fvol/Global.hpp @@ -0,0 +1,42 @@ +#ifndef QedFVol_Global_hpp_ +#define QedFVol_Global_hpp_ + +#include + +#define BEGIN_QEDFVOL_NAMESPACE \ +namespace Grid {\ +using namespace QCD;\ +namespace QedFVol {\ +using Grid::operator<<; +#define END_QEDFVOL_NAMESPACE }} + +/* the 'using Grid::operator<<;' statement prevents a very nasty compilation + * error with GCC (clang compiles fine without it). + */ + +BEGIN_QEDFVOL_NAMESPACE + +class QedFVolLogger: public Logger +{ +public: + QedFVolLogger(int on, std::string nm): Logger("QedFVol", on, nm, + GridLogColours, "BLACK"){}; +}; + +#define LOG(channel) std::cout << QedFVolLog##channel +#define QEDFVOL_ERROR(msg)\ +LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\ + << __LINE__ << ")" << std::endl;\ +abort(); + +#define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl; + +extern QedFVolLogger QedFVolLogError; +extern QedFVolLogger QedFVolLogWarning; +extern QedFVolLogger QedFVolLogMessage; +extern QedFVolLogger QedFVolLogIterative; +extern QedFVolLogger QedFVolLogDebug; + +END_QEDFVOL_NAMESPACE + +#endif // QedFVol_Global_hpp_ diff --git a/programs/qed-fvol/Makefile.am b/programs/qed-fvol/Makefile.am new file mode 100644 index 00000000..cd762e94 --- /dev/null +++ b/programs/qed-fvol/Makefile.am @@ -0,0 +1,9 @@ +AM_CXXFLAGS += -I$(top_srcdir)/programs -I../$(top_srcdir)/programs + +bin_PROGRAMS = qed-fvol + +qed_fvol_SOURCES = \ + qed-fvol.cc \ + Global.cc + +qed_fvol_LDADD = -lGrid diff --git a/programs/qed-fvol/qed-fvol.cc b/programs/qed-fvol/qed-fvol.cc new file mode 100644 index 00000000..bb3204c6 --- /dev/null +++ b/programs/qed-fvol/qed-fvol.cc @@ -0,0 +1,36 @@ +#include + +using namespace Grid; +using namespace QCD; +using namespace QedFVol; + +int main(int argc, char *argv[]) +{ + // parse command line + std::string parameterFileName; + + if (argc < 2) + { + std::cerr << "usage: " << argv[0] << " [Grid options]"; + std::cerr << std::endl; + std::exit(EXIT_FAILURE); + } + parameterFileName = argv[1]; + + // initialization + Grid_init(&argc, &argv); + QedFVolLogError.Active(GridLogError.isActive()); + QedFVolLogWarning.Active(GridLogWarning.isActive()); + QedFVolLogMessage.Active(GridLogMessage.isActive()); + QedFVolLogIterative.Active(GridLogIterative.isActive()); + QedFVolLogDebug.Active(GridLogDebug.isActive()); + LOG(Message) << "Grid initialized" << std::endl; + + + + // epilogue + LOG(Message) << "Grid is finalizing now" << std::endl; + Grid_finalize(); + + return EXIT_SUCCESS; +} From 0d889b70410bfdaf70b5cbffe2fb92157943cc03 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 21 Oct 2016 15:21:32 +0100 Subject: [PATCH 002/177] QedFVol: first attempt at generating a QED field --- programs/qed-fvol/qed-fvol.cc | 38 ++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/programs/qed-fvol/qed-fvol.cc b/programs/qed-fvol/qed-fvol.cc index bb3204c6..53e01de9 100644 --- a/programs/qed-fvol/qed-fvol.cc +++ b/programs/qed-fvol/qed-fvol.cc @@ -4,6 +4,31 @@ using namespace Grid; using namespace QCD; using namespace QedFVol; +template +class QedGimpl +{ +public: + typedef S Simd; + + template + using iImplGaugeLink = iScalar>>; + template + using iImplGaugeField = iVector>, Nd>; + + typedef iImplGaugeLink SiteGaugeLink; + typedef iImplGaugeField SiteGaugeField; + + typedef Lattice GaugeLinkField; // bit ugly naming; polarised + // gauge field, lorentz... all + // ugly + typedef Lattice GaugeField; +}; + +typedef QedGimpl QedGimplR; +typedef Photon PhotonR; +typedef PhotonR::GaugeField EmField; +typedef PhotonR::GaugeLinkField EmComp; + int main(int argc, char *argv[]) { // parse command line @@ -26,8 +51,19 @@ int main(int argc, char *argv[]) QedFVolLogDebug.Active(GridLogDebug.isActive()); LOG(Message) << "Grid initialized" << std::endl; + // QED stuff + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(4, vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + GridCartesian grid(latt_size,simd_layout,mpi_layout); + GridParallelRNG pRNG(&grid); + PhotonR photon(PhotonR::Gauge::Feynman, + PhotonR::ZmScheme::QedL); + EmField a(&grid); + + pRNG.SeedRandomDevice(); + photon.StochasticField(a, pRNG); - // epilogue LOG(Message) << "Grid is finalizing now" << std::endl; Grid_finalize(); From 3ab4c8c0bbde6a572d41074405c2baa8e9a0119c Mon Sep 17 00:00:00 2001 From: James Harrison Date: Tue, 25 Oct 2016 13:32:02 +0100 Subject: [PATCH 003/177] QedFVol: calculate plaquette and 2x2 Wilson loop of stochastic QED field --- programs/qed-fvol/WilsonLoops.h | 167 ++++++++++++++++++++++++++++++++ programs/qed-fvol/qed-fvol.cc | 45 +++++++++ 2 files changed, 212 insertions(+) create mode 100644 programs/qed-fvol/WilsonLoops.h diff --git a/programs/qed-fvol/WilsonLoops.h b/programs/qed-fvol/WilsonLoops.h new file mode 100644 index 00000000..610fdc7b --- /dev/null +++ b/programs/qed-fvol/WilsonLoops.h @@ -0,0 +1,167 @@ +#ifndef QEDFVOL_WILSONLOOPS_H +#define QEDFVOL_WILSONLOOPS_H + +#include + +BEGIN_QEDFVOL_NAMESPACE + +template class WilsonLoops : public Gimpl { +public: + INHERIT_GIMPL_TYPES(Gimpl); + + typedef typename Gimpl::GaugeLinkField GaugeMat; + typedef typename Gimpl::GaugeField GaugeLorentz; + + ////////////////////////////////////////////////// + // directed plaquette oriented in mu,nu plane + ////////////////////////////////////////////////// + static void dirPlaquette(GaugeMat &plaq, const std::vector &U, + const int mu, const int nu) { + // Annoyingly, must use either scope resolution to find dependent base + // class, + // or this-> ; there is no "this" in a static method. This forces explicit + // Gimpl scope + // resolution throughout the usage in this file, and rather defeats the + // purpose of deriving + // from Gimpl. + plaq = Gimpl::CovShiftBackward( + U[mu], mu, Gimpl::CovShiftBackward( + U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu]))); + } + ////////////////////////////////////////////////// + // trace of directed plaquette oriented in mu,nu plane + ////////////////////////////////////////////////// + static void traceDirPlaquette(LatticeComplex &plaq, + const std::vector &U, const int mu, + const int nu) { + GaugeMat sp(U[0]._grid); + dirPlaquette(sp, U, mu, nu); + plaq = trace(sp); + } + ////////////////////////////////////////////////// + // sum over all planes of plaquette + ////////////////////////////////////////////////// + static void sitePlaquette(LatticeComplex &Plaq, + const std::vector &U) { + LatticeComplex sitePlaq(U[0]._grid); + Plaq = zero; + for (int mu = 1; mu < Nd; mu++) { + for (int nu = 0; nu < mu; nu++) { + traceDirPlaquette(sitePlaq, U, mu, nu); + Plaq = Plaq + sitePlaq; + } + } + } + ////////////////////////////////////////////////// + // sum over all x,y,z,t and over all planes of plaquette + ////////////////////////////////////////////////// + static RealD sumPlaquette(const GaugeLorentz &Umu) { + std::vector U(4, Umu._grid); + + for (int mu = 0; mu < Nd; mu++) { + U[mu] = PeekIndex(Umu, mu); + } + + LatticeComplex Plaq(Umu._grid); + + sitePlaquette(Plaq, U); + + TComplex Tp = sum(Plaq); + Complex p = TensorRemove(Tp); + return p.real(); + } + ////////////////////////////////////////////////// + // average over all x,y,z,t and over all planes of plaquette + ////////////////////////////////////////////////// + static RealD avgPlaquette(const GaugeLorentz &Umu) { + RealD sumplaq = sumPlaquette(Umu); + double vol = Umu._grid->gSites(); + double faces = (1.0 * Nd * (Nd - 1)) / 2.0; + return sumplaq / vol / faces / Nc; // Nd , Nc dependent... FIXME + } + + ////////////////////////////////////////////////// + // Wilson loop of size (R1, R2), oriented in mu,nu plane + ////////////////////////////////////////////////// + static void wilsonLoop(GaugeMat &wl, const std::vector &U, + const int Rmu, const int Rnu, + const int mu, const int nu) { + wl = U[nu]; + + for(int i = 0; i < Rnu-1; i++){ + wl = Gimpl::CovShiftForward(U[nu], nu, wl); + } + + for(int i = 0; i < Rmu; i++){ + wl = Gimpl::CovShiftForward(U[mu], mu, wl); + } + + for(int i = 0; i < Rnu; i++){ + wl = Gimpl::CovShiftBackward(U[nu], nu, wl); + } + + for(int i = 0; i < Rmu; i++){ + wl = Gimpl::CovShiftBackward(U[mu], mu, wl); + } + } + ////////////////////////////////////////////////// + // trace of Wilson Loop oriented in mu,nu plane + ////////////////////////////////////////////////// + static void traceWilsonLoop(LatticeComplex &wl, + const std::vector &U, + const int Rmu, const int Rnu, + const int mu, const int nu) { + GaugeMat sp(U[0]._grid); + WilsonLoop(sp, U, Rmu, Rnu, mu, nu); + wl = trace(sp); + } + ////////////////////////////////////////////////// + // sum over all planes of Wilson loop + ////////////////////////////////////////////////// + static void siteWilsonLoop(LatticeComplex &Wl, + const std::vector &U + const int R1, const int R2) { + LatticeComplex siteWl(U[0]._grid); + Wl = zero; + for (int mu = 1; mu < Nd; mu++) { + for (int nu = 0; nu < mu; nu++) { + traceWilsonLoop(siteWl, U, R1, R2, mu, nu); + Wl = Wl + siteWl; + traceWilsonLoop(siteWl, U, R2, R1, mu, nu); + Wl = Wl + siteWl; + } + } + } + ////////////////////////////////////////////////// + // sum over all x,y,z,t and over all planes of Wilson loop + ////////////////////////////////////////////////// + static RealD sumWilsonLoop(const GaugeLorentz &Umu, + const int R1, const int R2) { + std::vector U(4, Umu._grid); + + for (int mu = 0; mu < Nd; mu++) { + U[mu] = PeekIndex(Umu, mu); + } + + LatticeComplex Wl(Umu._grid); + + siteWilsonLoop(Wl, U, R1, R2); + + TComplex Tp = sum(Wl); + Complex p = TensorRemove(Tp); + return p.real(); + } + ////////////////////////////////////////////////// + // average over all x,y,z,t and over all planes of Wilson loop + ////////////////////////////////////////////////// + static RealD avgPlaquette(const GaugeLorentz &Umu, + const int R1, const int R2) { + RealD sumWl = sumWilsonLoop(Umu); + double vol = Umu._grid->gSites(); + double faces = 1.0 * Nd * (Nd - 1); + return sumWl / vol / faces / Nc; // Nd , Nc dependent... FIXME + } + +END_QEDFVOL_NAMESPACE + +#endif // QEDFVOL_WILSONLOOPS_H \ No newline at end of file diff --git a/programs/qed-fvol/qed-fvol.cc b/programs/qed-fvol/qed-fvol.cc index 53e01de9..02c36a67 100644 --- a/programs/qed-fvol/qed-fvol.cc +++ b/programs/qed-fvol/qed-fvol.cc @@ -64,6 +64,51 @@ int main(int argc, char *argv[]) pRNG.SeedRandomDevice(); photon.StochasticField(a, pRNG); + // Calculate log of plaquette + EmComp plaqA(&grid); + EmComp wlA(&grid); + EmComp tmp(&grid); + std::vector a_comp(4, &grid); + + for (int dir = 0; dir < Nd; dir++) { + a_comp[dir] = PeekIndex(a, dir); + } + + plaqA = zero; + wlA = zero; + + for(int mu = 1; mu < Nd; mu++) { + for(int nu = 0; nu < mu; nu++) { + tmp = a_comp[mu] + Cshift(a_comp[nu], mu, 1) - Cshift(a_comp[mu], nu, 1) - a_comp[nu]; + plaqA = plaqA + cos(tmp); + + tmp = a_comp[mu] + Cshift(a_comp[mu], mu, 1) + + Cshift(a_comp[nu], mu, 2) + Cshift(Cshift(a_comp[nu], mu, 2), nu, 1) + - Cshift(Cshift(a_comp[mu], nu, 2), mu, 1) - Cshift(a_comp[mu], nu, 2) + - Cshift(a_comp[nu], nu, 1) - a_comp[nu]; + wlA = wlA + cos(tmp); + } + } + + double vol = grid.gSites(); + double faces = (1.0 * Nd * (Nd - 1)) / 2.0; + + Complex avgPlaqA = sum(trace(plaqA)); + avgPlaqA = avgPlaqA / vol / faces; + + Complex avgWlA = sum(trace(wlA)); + avgWlA = avgWlA / vol / faces; + + TComplex tplaqsite; + LatticeComplex plaqtrace = trace(plaqA); + std::vector site0 = {0,0,0,0}; + peekSite(tplaqsite, plaqtrace, site0); + Complex plaqsite = TensorRemove(tplaqsite); + + LOG(Message) << "Plaquette average: " << avgPlaqA << std::endl; + LOG(Message) << "2x2 Wilson Loop average: " << avgWlA << std::endl; + LOG(Message) << "Plaquette (one site): " << plaqsite / faces << std::endl; + // epilogue LOG(Message) << "Grid is finalizing now" << std::endl; Grid_finalize(); From 78c7bcee36f7d937c8f5c6afe0f2088f85ebda51 Mon Sep 17 00:00:00 2001 From: James Harrison Date: Tue, 1 Nov 2016 13:30:11 +0000 Subject: [PATCH 004/177] QedFVol: Change variables of type "double" to type "Real". --- programs/qed-fvol/qed-fvol.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/qed-fvol/qed-fvol.cc b/programs/qed-fvol/qed-fvol.cc index 02c36a67..fd780edf 100644 --- a/programs/qed-fvol/qed-fvol.cc +++ b/programs/qed-fvol/qed-fvol.cc @@ -90,8 +90,8 @@ int main(int argc, char *argv[]) } } - double vol = grid.gSites(); - double faces = (1.0 * Nd * (Nd - 1)) / 2.0; + Real vol = grid.gSites(); + Real faces = (1.0 * Nd * (Nd - 1)) / 2.0; Complex avgPlaqA = sum(trace(plaqA)); avgPlaqA = avgPlaqA / vol / faces; From c30d96ea5097ab1760a3f0a6ea1aed8ac1e6142b Mon Sep 17 00:00:00 2001 From: James Harrison Date: Wed, 9 Nov 2016 11:06:20 +0000 Subject: [PATCH 005/177] QedFVol: x86intrin.h namespace fix --- lib/PerfCount.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/PerfCount.h b/lib/PerfCount.h index 9ac58883..5ab07c02 100644 --- a/lib/PerfCount.h +++ b/lib/PerfCount.h @@ -43,6 +43,9 @@ Author: paboyle #else #include #endif +#ifdef __x86_64__ +#include +#endif namespace Grid { @@ -86,7 +89,6 @@ inline uint64_t cyclecount(void){ return tmp; } #elif defined __x86_64__ -#include inline uint64_t cyclecount(void){ return __rdtsc(); // unsigned int dummy; From cf167d0cd1c561bed3557eaf89350b8d8eb8d9b1 Mon Sep 17 00:00:00 2001 From: James Harrison Date: Mon, 14 Nov 2016 17:02:29 +0000 Subject: [PATCH 006/177] QedFVol: implement exponentiation of photon field --- programs/qed-fvol/WilsonLoops.h | 19 ++++++++++--------- programs/qed-fvol/qed-fvol.cc | 32 +++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/programs/qed-fvol/WilsonLoops.h b/programs/qed-fvol/WilsonLoops.h index 610fdc7b..c40fbaf3 100644 --- a/programs/qed-fvol/WilsonLoops.h +++ b/programs/qed-fvol/WilsonLoops.h @@ -5,7 +5,7 @@ BEGIN_QEDFVOL_NAMESPACE -template class WilsonLoops : public Gimpl { +template class NewWilsonLoops : public Gimpl { public: INHERIT_GIMPL_TYPES(Gimpl); @@ -55,7 +55,7 @@ public: ////////////////////////////////////////////////// // sum over all x,y,z,t and over all planes of plaquette ////////////////////////////////////////////////// - static RealD sumPlaquette(const GaugeLorentz &Umu) { + static Real sumPlaquette(const GaugeLorentz &Umu) { std::vector U(4, Umu._grid); for (int mu = 0; mu < Nd; mu++) { @@ -73,8 +73,8 @@ public: ////////////////////////////////////////////////// // average over all x,y,z,t and over all planes of plaquette ////////////////////////////////////////////////// - static RealD avgPlaquette(const GaugeLorentz &Umu) { - RealD sumplaq = sumPlaquette(Umu); + static Real avgPlaquette(const GaugeLorentz &Umu) { + Real sumplaq = sumPlaquette(Umu); double vol = Umu._grid->gSites(); double faces = (1.0 * Nd * (Nd - 1)) / 2.0; return sumplaq / vol / faces / Nc; // Nd , Nc dependent... FIXME @@ -112,14 +112,14 @@ public: const int Rmu, const int Rnu, const int mu, const int nu) { GaugeMat sp(U[0]._grid); - WilsonLoop(sp, U, Rmu, Rnu, mu, nu); + wilsonLoop(sp, U, Rmu, Rnu, mu, nu); wl = trace(sp); } ////////////////////////////////////////////////// // sum over all planes of Wilson loop ////////////////////////////////////////////////// static void siteWilsonLoop(LatticeComplex &Wl, - const std::vector &U + const std::vector &U, const int R1, const int R2) { LatticeComplex siteWl(U[0]._grid); Wl = zero; @@ -135,7 +135,7 @@ public: ////////////////////////////////////////////////// // sum over all x,y,z,t and over all planes of Wilson loop ////////////////////////////////////////////////// - static RealD sumWilsonLoop(const GaugeLorentz &Umu, + static Real sumWilsonLoop(const GaugeLorentz &Umu, const int R1, const int R2) { std::vector U(4, Umu._grid); @@ -154,13 +154,14 @@ public: ////////////////////////////////////////////////// // average over all x,y,z,t and over all planes of Wilson loop ////////////////////////////////////////////////// - static RealD avgPlaquette(const GaugeLorentz &Umu, + static Real avgWilsonLoop(const GaugeLorentz &Umu, const int R1, const int R2) { - RealD sumWl = sumWilsonLoop(Umu); + Real sumWl = sumWilsonLoop(Umu, R1, R2); double vol = Umu._grid->gSites(); double faces = 1.0 * Nd * (Nd - 1); return sumWl / vol / faces / Nc; // Nd , Nc dependent... FIXME } +}; END_QEDFVOL_NAMESPACE diff --git a/programs/qed-fvol/qed-fvol.cc b/programs/qed-fvol/qed-fvol.cc index fd780edf..68705b8f 100644 --- a/programs/qed-fvol/qed-fvol.cc +++ b/programs/qed-fvol/qed-fvol.cc @@ -1,4 +1,5 @@ #include +#include using namespace Grid; using namespace QCD; @@ -24,10 +25,11 @@ public: typedef Lattice GaugeField; }; -typedef QedGimpl QedGimplR; -typedef Photon PhotonR; -typedef PhotonR::GaugeField EmField; -typedef PhotonR::GaugeLinkField EmComp; +typedef QedGimpl QedGimplR; +typedef PeriodicGaugeImpl QedPeriodicGimplR; +typedef Photon PhotonR; +typedef PhotonR::GaugeField EmField; +typedef PhotonR::GaugeLinkField EmComp; int main(int argc, char *argv[]) { @@ -60,11 +62,18 @@ int main(int argc, char *argv[]) PhotonR photon(PhotonR::Gauge::Feynman, PhotonR::ZmScheme::QedL); EmField a(&grid); + EmField expA(&grid); + + Real avgPlaqAexp, avgWl2x2Aexp; pRNG.SeedRandomDevice(); photon.StochasticField(a, pRNG); - // Calculate log of plaquette + // Exponentiate photon field + Complex imag_unit(0, 1); + expA = exp(imag_unit*0.5*(a+conjugate(a))); + + // Calculate plaquette from photon field EmComp plaqA(&grid); EmComp wlA(&grid); EmComp tmp(&grid); @@ -105,8 +114,17 @@ int main(int argc, char *argv[]) peekSite(tplaqsite, plaqtrace, site0); Complex plaqsite = TensorRemove(tplaqsite); - LOG(Message) << "Plaquette average: " << avgPlaqA << std::endl; - LOG(Message) << "2x2 Wilson Loop average: " << avgWlA << std::endl; + // Calculate plaquette from exponentiated photon field + avgPlaqAexp = NewWilsonLoops::avgPlaquette(expA); + avgWl2x2Aexp = NewWilsonLoops::avgWilsonLoop(expA, 2, 2); + + avgPlaqAexp = avgPlaqAexp*3; + avgWl2x2Aexp = avgWl2x2Aexp*3; + + LOG(Message) << "Plaquette average (from A): " << avgPlaqA << std::endl; + LOG(Message) << "Plaquette average (from exp(A)): " << avgPlaqAexp << std::endl; + LOG(Message) << "2x2 Wilson Loop average (from A): " << avgWlA << std::endl; + LOG(Message) << "2x2 Wilson Loop average (from exp(A)): " << avgWl2x2Aexp << std::endl; LOG(Message) << "Plaquette (one site): " << plaqsite / faces << std::endl; // epilogue From f4ebea3381046026276864f3f908cb10b114d6a5 Mon Sep 17 00:00:00 2001 From: James Harrison Date: Mon, 14 Nov 2016 17:51:53 +0000 Subject: [PATCH 007/177] QedFVol: add functions for computing spatial and timelike Wilson loops --- programs/qed-fvol/WilsonLoops.h | 117 +++++++++++++++++++++++++++++--- programs/qed-fvol/qed-fvol.cc | 8 ++- 2 files changed, 114 insertions(+), 11 deletions(-) diff --git a/programs/qed-fvol/WilsonLoops.h b/programs/qed-fvol/WilsonLoops.h index c40fbaf3..98db6b7a 100644 --- a/programs/qed-fvol/WilsonLoops.h +++ b/programs/qed-fvol/WilsonLoops.h @@ -45,7 +45,7 @@ public: const std::vector &U) { LatticeComplex sitePlaq(U[0]._grid); Plaq = zero; - for (int mu = 1; mu < Nd; mu++) { + for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { for (int nu = 0; nu < mu; nu++) { traceDirPlaquette(sitePlaq, U, mu, nu); Plaq = Plaq + sitePlaq; @@ -58,7 +58,7 @@ public: static Real sumPlaquette(const GaugeLorentz &Umu) { std::vector U(4, Umu._grid); - for (int mu = 0; mu < Nd; mu++) { + for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { U[mu] = PeekIndex(Umu, mu); } @@ -74,10 +74,11 @@ public: // average over all x,y,z,t and over all planes of plaquette ////////////////////////////////////////////////// static Real avgPlaquette(const GaugeLorentz &Umu) { + int ndim = Umu._grid->_ndimension; Real sumplaq = sumPlaquette(Umu); - double vol = Umu._grid->gSites(); - double faces = (1.0 * Nd * (Nd - 1)) / 2.0; - return sumplaq / vol / faces / Nc; // Nd , Nc dependent... FIXME + Real vol = Umu._grid->gSites(); + Real faces = (1.0 * ndim * (ndim - 1)) / 2.0; + return sumplaq / vol / faces / Nc; // Nc dependent... FIXME } ////////////////////////////////////////////////// @@ -123,7 +124,42 @@ public: const int R1, const int R2) { LatticeComplex siteWl(U[0]._grid); Wl = zero; - for (int mu = 1; mu < Nd; mu++) { + for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { + for (int nu = 0; nu < mu; nu++) { + traceWilsonLoop(siteWl, U, R1, R2, mu, nu); + Wl = Wl + siteWl; + traceWilsonLoop(siteWl, U, R2, R1, mu, nu); + Wl = Wl + siteWl; + } + } + } + ////////////////////////////////////////////////// + // sum over planes of Wilson loop with length R1 + // in the time direction + ////////////////////////////////////////////////// + static void siteTimelikeWilsonLoop(LatticeComplex &Wl, + const std::vector &U, + const int R1, const int R2) { + LatticeComplex siteWl(U[0]._grid); + + int ndim = U[0]._grid->_ndimension; + + Wl = zero; + for (int nu = 0; nu < ndim - 1; nu++) { + traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu); + Wl = Wl + siteWl; + } + } + ////////////////////////////////////////////////// + // sum Wilson loop over all planes orthogonal to the time direction + ////////////////////////////////////////////////// + static void siteSpatialWilsonLoop(LatticeComplex &Wl, + const std::vector &U, + const int R1, const int R2) { + LatticeComplex siteWl(U[0]._grid); + + Wl = zero; + for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) { for (int nu = 0; nu < mu; nu++) { traceWilsonLoop(siteWl, U, R1, R2, mu, nu); Wl = Wl + siteWl; @@ -139,7 +175,7 @@ public: const int R1, const int R2) { std::vector U(4, Umu._grid); - for (int mu = 0; mu < Nd; mu++) { + for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { U[mu] = PeekIndex(Umu, mu); } @@ -152,14 +188,75 @@ public: return p.real(); } ////////////////////////////////////////////////// + // sum over all x,y,z,t and over all planes of timelike Wilson loop + ////////////////////////////////////////////////// + static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu, + const int R1, const int R2) { + std::vector U(4, Umu._grid); + + for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { + U[mu] = PeekIndex(Umu, mu); + } + + LatticeComplex Wl(Umu._grid); + + siteTimelikeWilsonLoop(Wl, U, R1, R2); + + TComplex Tp = sum(Wl); + Complex p = TensorRemove(Tp); + return p.real(); + } + ////////////////////////////////////////////////// + // sum over all x,y,z,t and over all planes of spatial Wilson loop + ////////////////////////////////////////////////// + static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu, + const int R1, const int R2) { + std::vector U(4, Umu._grid); + + for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { + U[mu] = PeekIndex(Umu, mu); + } + + LatticeComplex Wl(Umu._grid); + + siteSpatialWilsonLoop(Wl, U, R1, R2); + + TComplex Tp = sum(Wl); + Complex p = TensorRemove(Tp); + return p.real(); + } + ////////////////////////////////////////////////// // average over all x,y,z,t and over all planes of Wilson loop ////////////////////////////////////////////////// static Real avgWilsonLoop(const GaugeLorentz &Umu, const int R1, const int R2) { + int ndim = Umu._grid->_ndimension; Real sumWl = sumWilsonLoop(Umu, R1, R2); - double vol = Umu._grid->gSites(); - double faces = 1.0 * Nd * (Nd - 1); - return sumWl / vol / faces / Nc; // Nd , Nc dependent... FIXME + Real vol = Umu._grid->gSites(); + Real faces = 1.0 * ndim * (ndim - 1); + return sumWl / vol / faces / Nc; // Nc dependent... FIXME + } + ////////////////////////////////////////////////// + // average over all x,y,z,t and over all planes of timelike Wilson loop + ////////////////////////////////////////////////// + static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu, + const int R1, const int R2) { + int ndim = Umu._grid->_ndimension; + Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2); + Real vol = Umu._grid->gSites(); + Real faces = 1.0 * (ndim - 1); + return sumWl / vol / faces / Nc; // Nc dependent... FIXME + } + ////////////////////////////////////////////////// + // average over all x,y,z,t and over all planes of spatial Wilson loop + ////////////////////////////////////////////////// + static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu, + const int R1, const int R2) { + int ndim = Umu._grid->_ndimension; + Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2); + Real vol = Umu._grid->gSites(); + Real faces = 1.0 * (ndim - 1) * (ndim - 2); + return sumWl / vol / faces / Nc; // Nc dependent... FIXME } }; diff --git a/programs/qed-fvol/qed-fvol.cc b/programs/qed-fvol/qed-fvol.cc index 68705b8f..d026057e 100644 --- a/programs/qed-fvol/qed-fvol.cc +++ b/programs/qed-fvol/qed-fvol.cc @@ -64,7 +64,7 @@ int main(int argc, char *argv[]) EmField a(&grid); EmField expA(&grid); - Real avgPlaqAexp, avgWl2x2Aexp; + Real avgPlaqAexp, avgWl2x2Aexp, avgWl2x2Aexp_time, avgWl2x2Aexp_space; pRNG.SeedRandomDevice(); photon.StochasticField(a, pRNG); @@ -117,14 +117,20 @@ int main(int argc, char *argv[]) // Calculate plaquette from exponentiated photon field avgPlaqAexp = NewWilsonLoops::avgPlaquette(expA); avgWl2x2Aexp = NewWilsonLoops::avgWilsonLoop(expA, 2, 2); + avgWl2x2Aexp_time = NewWilsonLoops::avgTimelikeWilsonLoop(expA, 2, 2); + avgWl2x2Aexp_space = NewWilsonLoops::avgSpatialWilsonLoop(expA, 2, 2); avgPlaqAexp = avgPlaqAexp*3; avgWl2x2Aexp = avgWl2x2Aexp*3; + avgWl2x2Aexp_time = avgWl2x2Aexp_time*3; + avgWl2x2Aexp_space = avgWl2x2Aexp_space*3; LOG(Message) << "Plaquette average (from A): " << avgPlaqA << std::endl; LOG(Message) << "Plaquette average (from exp(A)): " << avgPlaqAexp << std::endl; LOG(Message) << "2x2 Wilson Loop average (from A): " << avgWlA << std::endl; LOG(Message) << "2x2 Wilson Loop average (from exp(A)): " << avgWl2x2Aexp << std::endl; + LOG(Message) << "2x2 Wilson Loop timelike average (from exp(A)): " << avgWl2x2Aexp_time << std::endl; + LOG(Message) << "2x2 Wilson Loop spatial average (from exp(A)): " << avgWl2x2Aexp_space << std::endl; LOG(Message) << "Plaquette (one site): " << plaqsite / faces << std::endl; // epilogue From 92ec3404f8a404e7d6420ebfa0f113af5eb6ec6d Mon Sep 17 00:00:00 2001 From: James Harrison Date: Mon, 14 Nov 2016 17:59:02 +0000 Subject: [PATCH 008/177] Set imaginary part of stochastic QED field to zero after FFT into position space --- lib/qcd/action/gauge/Photon.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/qcd/action/gauge/Photon.h b/lib/qcd/action/gauge/Photon.h index 852ecb3e..ca0a8d40 100644 --- a/lib/qcd/action/gauge/Photon.h +++ b/lib/qcd/action/gauge/Photon.h @@ -172,6 +172,8 @@ namespace QCD{ pokeLorentz(aTilde, r, mu); } fft.FFT_all_dim(out, aTilde, FFT::backward); + + out = 0.5*(out + conjugate(out)); } // template // void Photon::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out, From a71b69389b6fc7360dbebbf5ed8d4fa3a6952016 Mon Sep 17 00:00:00 2001 From: James Harrison Date: Mon, 14 Nov 2016 18:23:04 +0000 Subject: [PATCH 009/177] QedFVol: calculate square Wilson loops up to 10x10 --- programs/qed-fvol/qed-fvol.cc | 74 +++++++---------------------------- 1 file changed, 14 insertions(+), 60 deletions(-) diff --git a/programs/qed-fvol/qed-fvol.cc b/programs/qed-fvol/qed-fvol.cc index d026057e..31312b1e 100644 --- a/programs/qed-fvol/qed-fvol.cc +++ b/programs/qed-fvol/qed-fvol.cc @@ -64,75 +64,29 @@ int main(int argc, char *argv[]) EmField a(&grid); EmField expA(&grid); - Real avgPlaqAexp, avgWl2x2Aexp, avgWl2x2Aexp_time, avgWl2x2Aexp_space; + Real wlA, logWlA; pRNG.SeedRandomDevice(); photon.StochasticField(a, pRNG); // Exponentiate photon field Complex imag_unit(0, 1); - expA = exp(imag_unit*0.5*(a+conjugate(a))); + expA = exp(imag_unit*a); - // Calculate plaquette from photon field - EmComp plaqA(&grid); - EmComp wlA(&grid); - EmComp tmp(&grid); - std::vector a_comp(4, &grid); - - for (int dir = 0; dir < Nd; dir++) { - a_comp[dir] = PeekIndex(a, dir); + // Calculate Wilson loops + for(int i=1; i<=10; i++){ + LOG(Message) << i << 'x' << i << " Wilson loop" << std::endl; + wlA = NewWilsonLoops::avgWilsonLoop(expA, i, i) * 3; + logWlA = -2*log(wlA); + LOG(Message) << "-2log(W) average: " << logWlA << std::endl; + wlA = NewWilsonLoops::avgTimelikeWilsonLoop(expA, i, i) * 3; + logWlA = -2*log(wlA); + LOG(Message) << "-2log(W) timelike: " << logWlA << std::endl; + wlA = NewWilsonLoops::avgSpatialWilsonLoop(expA, i, i) * 3; + logWlA = -2*log(wlA); + LOG(Message) << "-2log(W) spatial: " << logWlA << std::endl; } - plaqA = zero; - wlA = zero; - - for(int mu = 1; mu < Nd; mu++) { - for(int nu = 0; nu < mu; nu++) { - tmp = a_comp[mu] + Cshift(a_comp[nu], mu, 1) - Cshift(a_comp[mu], nu, 1) - a_comp[nu]; - plaqA = plaqA + cos(tmp); - - tmp = a_comp[mu] + Cshift(a_comp[mu], mu, 1) - + Cshift(a_comp[nu], mu, 2) + Cshift(Cshift(a_comp[nu], mu, 2), nu, 1) - - Cshift(Cshift(a_comp[mu], nu, 2), mu, 1) - Cshift(a_comp[mu], nu, 2) - - Cshift(a_comp[nu], nu, 1) - a_comp[nu]; - wlA = wlA + cos(tmp); - } - } - - Real vol = grid.gSites(); - Real faces = (1.0 * Nd * (Nd - 1)) / 2.0; - - Complex avgPlaqA = sum(trace(plaqA)); - avgPlaqA = avgPlaqA / vol / faces; - - Complex avgWlA = sum(trace(wlA)); - avgWlA = avgWlA / vol / faces; - - TComplex tplaqsite; - LatticeComplex plaqtrace = trace(plaqA); - std::vector site0 = {0,0,0,0}; - peekSite(tplaqsite, plaqtrace, site0); - Complex plaqsite = TensorRemove(tplaqsite); - - // Calculate plaquette from exponentiated photon field - avgPlaqAexp = NewWilsonLoops::avgPlaquette(expA); - avgWl2x2Aexp = NewWilsonLoops::avgWilsonLoop(expA, 2, 2); - avgWl2x2Aexp_time = NewWilsonLoops::avgTimelikeWilsonLoop(expA, 2, 2); - avgWl2x2Aexp_space = NewWilsonLoops::avgSpatialWilsonLoop(expA, 2, 2); - - avgPlaqAexp = avgPlaqAexp*3; - avgWl2x2Aexp = avgWl2x2Aexp*3; - avgWl2x2Aexp_time = avgWl2x2Aexp_time*3; - avgWl2x2Aexp_space = avgWl2x2Aexp_space*3; - - LOG(Message) << "Plaquette average (from A): " << avgPlaqA << std::endl; - LOG(Message) << "Plaquette average (from exp(A)): " << avgPlaqAexp << std::endl; - LOG(Message) << "2x2 Wilson Loop average (from A): " << avgWlA << std::endl; - LOG(Message) << "2x2 Wilson Loop average (from exp(A)): " << avgWl2x2Aexp << std::endl; - LOG(Message) << "2x2 Wilson Loop timelike average (from exp(A)): " << avgWl2x2Aexp_time << std::endl; - LOG(Message) << "2x2 Wilson Loop spatial average (from exp(A)): " << avgWl2x2Aexp_space << std::endl; - LOG(Message) << "Plaquette (one site): " << plaqsite / faces << std::endl; - // epilogue LOG(Message) << "Grid is finalizing now" << std::endl; Grid_finalize(); From 739c2308b5ce9a9464dbbd9057dbe49f6b04cf59 Mon Sep 17 00:00:00 2001 From: James Harrison Date: Tue, 15 Nov 2016 13:07:52 +0000 Subject: [PATCH 010/177] Set imaginary part of stochastic QED field to zero using real() instead of conjugate(). --- lib/qcd/action/gauge/Photon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/qcd/action/gauge/Photon.h b/lib/qcd/action/gauge/Photon.h index ca0a8d40..b6c1b76f 100644 --- a/lib/qcd/action/gauge/Photon.h +++ b/lib/qcd/action/gauge/Photon.h @@ -173,7 +173,7 @@ namespace QCD{ } fft.FFT_all_dim(out, aTilde, FFT::backward); - out = 0.5*(out + conjugate(out)); + out = real(out); } // template // void Photon::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out, From 6ad73145bc9754a5f26093eee5a34473ba0cff82 Mon Sep 17 00:00:00 2001 From: James Harrison Date: Wed, 30 Nov 2016 15:17:22 +0000 Subject: [PATCH 011/177] Calculate Wilson loop average over multiple configurations. --- programs/qed-fvol/qed-fvol.cc | 47 +++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/programs/qed-fvol/qed-fvol.cc b/programs/qed-fvol/qed-fvol.cc index 31312b1e..f0f5079f 100644 --- a/programs/qed-fvol/qed-fvol.cc +++ b/programs/qed-fvol/qed-fvol.cc @@ -31,6 +31,9 @@ typedef Photon PhotonR; typedef PhotonR::GaugeField EmField; typedef PhotonR::GaugeLinkField EmComp; +const int NCONFIGS = 10; +const int NWILSON = 10; + int main(int argc, char *argv[]) { // parse command line @@ -64,27 +67,39 @@ int main(int argc, char *argv[]) EmField a(&grid); EmField expA(&grid); - Real wlA, logWlA; + Complex imag_unit(0, 1); + + Real wlA; + std::vector logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0); pRNG.SeedRandomDevice(); - photon.StochasticField(a, pRNG); - // Exponentiate photon field - Complex imag_unit(0, 1); - expA = exp(imag_unit*a); + LOG(Message) << "Wilson loop calculation beginning" << std::endl; + for(int ic = 0; ic < NCONFIGS; ic++){ + LOG(Message) << "Configuration " << ic <::avgWilsonLoop(expA, iw, iw) * 3; + logWlAvg[iw-1] -= 2*log(wlA); + wlA = NewWilsonLoops::avgTimelikeWilsonLoop(expA, iw, iw) * 3; + logWlTime[iw-1] -= 2*log(wlA); + wlA = NewWilsonLoops::avgSpatialWilsonLoop(expA, iw, iw) * 3; + logWlSpace[iw-1] -= 2*log(wlA); + } + } + LOG(Message) << "Wilson loop calculation completed" << std::endl; + // Calculate Wilson loops - for(int i=1; i<=10; i++){ - LOG(Message) << i << 'x' << i << " Wilson loop" << std::endl; - wlA = NewWilsonLoops::avgWilsonLoop(expA, i, i) * 3; - logWlA = -2*log(wlA); - LOG(Message) << "-2log(W) average: " << logWlA << std::endl; - wlA = NewWilsonLoops::avgTimelikeWilsonLoop(expA, i, i) * 3; - logWlA = -2*log(wlA); - LOG(Message) << "-2log(W) timelike: " << logWlA << std::endl; - wlA = NewWilsonLoops::avgSpatialWilsonLoop(expA, i, i) * 3; - logWlA = -2*log(wlA); - LOG(Message) << "-2log(W) spatial: " << logWlA << std::endl; + for(int iw=1; iw<=10; iw++){ + LOG(Message) << iw << 'x' << iw << " Wilson loop" << std::endl; + LOG(Message) << "-2log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl; + LOG(Message) << "-2log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl; + LOG(Message) << "-2log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl; } // epilogue From 2e3c5890b6035a4c9d661102c2117c53f93f00fd Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 15 Dec 2016 20:06:46 +0000 Subject: [PATCH 012/177] qed-fvol: build fix --- extras/Makefile.am | 2 +- extras/qed-fvol/Makefile.am | 2 +- lib/qcd/action/Actions.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/extras/Makefile.am b/extras/Makefile.am index d8c2b675..416a9fc8 100644 --- a/extras/Makefile.am +++ b/extras/Makefile.am @@ -1 +1 @@ -SUBDIRS = Hadrons \ No newline at end of file +SUBDIRS = Hadrons qed-fvol \ No newline at end of file diff --git a/extras/qed-fvol/Makefile.am b/extras/qed-fvol/Makefile.am index cd762e94..0a9030c7 100644 --- a/extras/qed-fvol/Makefile.am +++ b/extras/qed-fvol/Makefile.am @@ -1,4 +1,4 @@ -AM_CXXFLAGS += -I$(top_srcdir)/programs -I../$(top_srcdir)/programs +AM_CXXFLAGS += -I$(top_srcdir)/extras bin_PROGRAMS = qed-fvol diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index 4a30f8c3..fea75f8a 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -57,6 +57,7 @@ Author: paboyle //////////////////////////////////////////// // Gauge Actions //////////////////////////////////////////// +#include #include #include From 2af9ab903445291377bb323ee349ddf9c7e94abf Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 20 Dec 2016 12:40:26 +0100 Subject: [PATCH 013/177] old Makefile cleaning --- programs/Makefile.am | 1 - 1 file changed, 1 deletion(-) delete mode 100644 programs/Makefile.am diff --git a/programs/Makefile.am b/programs/Makefile.am deleted file mode 100644 index ff7f6584..00000000 --- a/programs/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -SUBDIRS = qed-fvol From 9ac3ac41df095e3208c126f4b52bdf9f1b58937a Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 20 Dec 2016 12:41:01 +0100 Subject: [PATCH 014/177] serialisable Photon parameters --- lib/qcd/action/gauge/Photon.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/qcd/action/gauge/Photon.h b/lib/qcd/action/gauge/Photon.h index b6c1b76f..bbe3ebf7 100644 --- a/lib/qcd/action/gauge/Photon.h +++ b/lib/qcd/action/gauge/Photon.h @@ -28,6 +28,7 @@ #ifndef QCD_PHOTON_ACTION_H #define QCD_PHOTON_ACTION_H + namespace Grid{ namespace QCD{ @@ -36,8 +37,8 @@ namespace QCD{ { public: INHERIT_GIMPL_TYPES(Gimpl); - enum class Gauge {Feynman, Coulomb, Landau}; - enum class ZmScheme {QedL, QedTL}; + GRID_SERIALIZABLE_ENUM(Gauge, undef, feynman, 1, coulomb, 2, landau, 3); + GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2); public: Photon(Gauge gauge, ZmScheme zmScheme); virtual ~Photon(void) = default; @@ -104,7 +105,7 @@ namespace QCD{ switch (zmScheme_) { - case ZmScheme::QedTL: + case ZmScheme::qedTL: { std::vector zm(nd,0); TComplex Tzero = Complex(0.0,0.0); @@ -113,7 +114,7 @@ namespace QCD{ break; } - case ZmScheme::QedL: + case ZmScheme::qedL: { LatticeInteger spNrm(grid), coor(grid); GaugeLinkField z(grid); From db9c28a773c5d93d3c757ea4f8af75876106b948 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 20 Dec 2016 12:41:39 +0100 Subject: [PATCH 015/177] qed-fvol: Photon parameter name fix --- extras/qed-fvol/qed-fvol.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extras/qed-fvol/qed-fvol.cc b/extras/qed-fvol/qed-fvol.cc index f0f5079f..951c36ad 100644 --- a/extras/qed-fvol/qed-fvol.cc +++ b/extras/qed-fvol/qed-fvol.cc @@ -62,8 +62,8 @@ int main(int argc, char *argv[]) std::vector mpi_layout = GridDefaultMpi(); GridCartesian grid(latt_size,simd_layout,mpi_layout); GridParallelRNG pRNG(&grid); - PhotonR photon(PhotonR::Gauge::Feynman, - PhotonR::ZmScheme::QedL); + PhotonR photon(PhotonR::Gauge::feynman, + PhotonR::ZmScheme::qedL); EmField a(&grid); EmField expA(&grid); From 17b3a10d46e46823f0a380647c4953c2ffd74ea4 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 22 Dec 2016 00:29:19 +0100 Subject: [PATCH 016/177] stochastic QED: function to cache 1/sqrt(khat^2) --- lib/qcd/action/gauge/Photon.h | 47 +++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/lib/qcd/action/gauge/Photon.h b/lib/qcd/action/gauge/Photon.h index bbe3ebf7..faa63b42 100644 --- a/lib/qcd/action/gauge/Photon.h +++ b/lib/qcd/action/gauge/Photon.h @@ -44,7 +44,10 @@ namespace QCD{ virtual ~Photon(void) = default; void FreePropagator(const GaugeField &in, GaugeField &out); void MomentumSpacePropagator(const GaugeField &in, GaugeField &out); + void StochasticWeight(GaugeLinkField &weight); void StochasticField(GaugeField &out, GridParallelRNG &rng); + void StochasticField(GaugeField &out, GridParallelRNG &rng, + const GaugeLinkField &weight); private: void invKHatSquared(GaugeLinkField &out); void zmSub(GaugeLinkField &out); @@ -148,32 +151,50 @@ namespace QCD{ } template - void Photon::StochasticField(GaugeField &out, GridParallelRNG &rng) + void Photon::StochasticWeight(GaugeLinkField &weight) { - auto *grid = dynamic_cast(out._grid); - const unsigned int nd = grid->_ndimension; - std::vector latt_size = grid->_fdimensions; - GaugeLinkField sqrtK2Inv(grid), r(grid); - GaugeField aTilde(grid); - FFT fft(grid); + auto *grid = dynamic_cast(weight._grid); + const unsigned int nd = grid->_ndimension; + std::vector latt_size = grid->_fdimensions; Integer vol = 1; for(int d = 0; d < nd; d++) { vol = vol * latt_size[d]; } - - invKHatSquared(sqrtK2Inv); - sqrtK2Inv = sqrt(vol*real(sqrtK2Inv)); - zmSub(sqrtK2Inv); + invKHatSquared(weight); + weight = sqrt(vol*real(weight)); + zmSub(weight); + } + + template + void Photon::StochasticField(GaugeField &out, GridParallelRNG &rng) + { + auto *grid = dynamic_cast(out._grid); + GaugeLinkField weight(grid); + + StochasticWeight(weight); + StochasticField(out, rng, weight); + } + + template + void Photon::StochasticField(GaugeField &out, GridParallelRNG &rng, + const GaugeLinkField &weight) + { + auto *grid = dynamic_cast(out._grid); + const unsigned int nd = grid->_ndimension; + GaugeLinkField r(grid); + GaugeField aTilde(grid); + FFT fft(grid); + for(int mu = 0; mu < nd; mu++) { gaussian(rng, r); - r = sqrtK2Inv*r; + r = weight*r; pokeLorentz(aTilde, r, mu); } fft.FFT_all_dim(out, aTilde, FFT::backward); - + out = real(out); } // template From 4c3fd9fa3f6976c1297715d1e2239797bb0dd45b Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 22 Dec 2016 00:29:41 +0100 Subject: [PATCH 017/177] stochastic QED field module in Hadrons --- extras/Hadrons/Modules.hpp | 1 + extras/Hadrons/Modules/MGauge/StochEm.cc | 88 +++++++++++++++++++++ extras/Hadrons/Modules/MGauge/StochEm.hpp | 96 +++++++++++++++++++++++ extras/Hadrons/modules.inc | 2 + 4 files changed, 187 insertions(+) create mode 100644 extras/Hadrons/Modules/MGauge/StochEm.cc create mode 100644 extras/Hadrons/Modules/MGauge/StochEm.hpp diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 77ae08b7..5d1a456c 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -32,6 +32,7 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include +#include #include #include #include diff --git a/extras/Hadrons/Modules/MGauge/StochEm.cc b/extras/Hadrons/Modules/MGauge/StochEm.cc new file mode 100644 index 00000000..c7a9fc4f --- /dev/null +++ b/extras/Hadrons/Modules/MGauge/StochEm.cc @@ -0,0 +1,88 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MGauge/StochEm.cc + +Copyright (C) 2015 +Copyright (C) 2016 + + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace Grid; +using namespace Hadrons; +using namespace MGauge; + +/****************************************************************************** +* TStochEm implementation * +******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +TStochEm::TStochEm(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +std::vector TStochEm::getInput(void) +{ + std::vector in; + + return in; +} + +std::vector TStochEm::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +void TStochEm::setup(void) +{ + if (!env().hasRegisteredObject("_" + getName() + "_weight")) + { + env().registerLattice("_" + getName() + "_weight"); + } + env().registerLattice(getName()); +} + +// execution /////////////////////////////////////////////////////////////////// +void TStochEm::execute(void) +{ + PhotonR photon(par().gauge, par().zmScheme); + EmField &a = *env().createLattice(getName()); + EmComp *w; + + if (!env().hasCreatedObject("_" + getName() + "_weight")) + { + LOG(Message) << "Caching stochatic EM potential weight (gauge: " + << par().gauge << ", zero-mode scheme: " + << par().zmScheme << ")..." << std::endl; + w = env().createLattice("_" + getName() + "_weight"); + photon.StochasticWeight(*w); + } + else + { + w = env().getObject("_" + getName() + "_weight"); + } + LOG(Message) << "Generating stochatic EM potential..." << std::endl; + photon.StochasticField(a, *env().get4dRng(), *w); +} diff --git a/extras/Hadrons/Modules/MGauge/StochEm.hpp b/extras/Hadrons/Modules/MGauge/StochEm.hpp new file mode 100644 index 00000000..04a7c48c --- /dev/null +++ b/extras/Hadrons/Modules/MGauge/StochEm.hpp @@ -0,0 +1,96 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp + +Copyright (C) 2015 +Copyright (C) 2016 + + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef Hadrons_StochEm_hpp_ +#define Hadrons_StochEm_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * StochEm * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MGauge) + +template +class QedGimpl +{ +public: + typedef S Simd; + + template + using iImplGaugeLink = iScalar>>; + template + using iImplGaugeField = iVector>, Nd>; + + typedef iImplGaugeLink SiteGaugeLink; + typedef iImplGaugeField SiteGaugeField; + + typedef Lattice GaugeLinkField; + typedef Lattice GaugeField; +}; + +typedef QedGimpl QedGimplR; +typedef Photon PhotonR; + +class StochEmPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar, + PhotonR::Gauge, gauge, + PhotonR::ZmScheme, zmScheme); +}; + +class TStochEm: public Module +{ +public: + typedef PhotonR::GaugeField EmField; + typedef PhotonR::GaugeLinkField EmComp; +public: + // constructor + TStochEm(const std::string name); + // destructor + virtual ~TStochEm(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(StochEm, TStochEm, MGauge); + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_StochEm_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 4251ffa3..8b559024 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -1,6 +1,7 @@ modules_cc =\ Modules/MGauge/Load.cc \ Modules/MGauge/Random.cc \ + Modules/MGauge/StochEm.cc \ Modules/MGauge/Unit.cc modules_hpp =\ @@ -10,6 +11,7 @@ modules_hpp =\ Modules/MContraction/Meson.hpp \ Modules/MGauge/Load.hpp \ Modules/MGauge/Random.hpp \ + Modules/MGauge/StochEm.hpp \ Modules/MGauge/Unit.hpp \ Modules/MSolver/RBPrecCG.hpp \ Modules/MSource/Point.hpp \ From 8c3cc3236447b4b8eef95a29da1b48166b5eb03d Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 29 Dec 2016 22:42:58 +0100 Subject: [PATCH 018/177] Scalar action --- lib/qcd/action/Actions.h | 5 + lib/qcd/action/scalar/Scalar.h | 211 +++++++++++++++++++++++++++++++++ 2 files changed, 216 insertions(+) create mode 100644 lib/qcd/action/scalar/Scalar.h diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index fea75f8a..efd6a5bc 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -292,4 +292,9 @@ typedef MobiusFermion GparityMobiusFermionD; #include #include +//////////////////// +// Scalar actions +//////////////////// +#include + #endif diff --git a/lib/qcd/action/scalar/Scalar.h b/lib/qcd/action/scalar/Scalar.h new file mode 100644 index 00000000..194f6767 --- /dev/null +++ b/lib/qcd/action/scalar/Scalar.h @@ -0,0 +1,211 @@ +#ifndef QCD_SCALAR_ACTION_H +#define QCD_SCALAR_ACTION_H + +#define INHERIT_SIMPL_TYPES(Impl)\ +typedef typename Impl::SiteScalar SiteScalar; \ +typedef typename Impl::SiteSpinor SiteSpinor; \ +typedef typename Impl::SitePropagator SitePropagator; \ +typedef typename Impl::ScalarField ScalarField; \ +typedef typename Impl::FermionField FermionField; \ +typedef typename Impl::PropagatorField PropagatorField; \ +typedef typename Impl::StencilImpl StencilImpl; + +namespace Grid{ +namespace QCD{ + // Scalar implementation class /////////////////////////////////////////////// + // FIXME: it is not very nice to have the FImpl aliases + template , + class _Coeff_t = RealD> + class ScalarImpl: + public PeriodicGaugeImpl> + { + public: + static constexpr unsigned int rDim = Representation::Dimension; + public: + // gauge types + typedef PeriodicGaugeImpl> Gimpl; + INHERIT_GIMPL_TYPES(Gimpl); + // site types + // (using classes instead of aliases to allow for partial specialisation) + template + class iImplScalar + { + public: + typedef iScalar>> type; + }; + template + class iImplScalar + { + public: + typedef iScalar>> type; + }; + template + class iImplPropagator + { + public: + typedef iScalar>> type; + }; + template + class iImplPropagator + { + public: + typedef iScalar>> type; + }; + // type aliases + typedef typename iImplScalar::type SiteScalar; + typedef SiteScalar SiteSpinor; + typedef typename iImplPropagator::type SitePropagator; + typedef Lattice ScalarField; + typedef ScalarField FermionField; + typedef Lattice PropagatorField; + typedef CartesianStencil StencilImpl; + }; + + // single scalar implementation + typedef ScalarImpl ScalarImplR; + + // Scalar action ///////////////////////////////////////////////////////////// + template + class Scalar: + public CheckerBoardedSparseMatrixBase, + public SImpl + { + public: + INHERIT_GIMPL_TYPES(SImpl); + INHERIT_SIMPL_TYPES(SImpl); + public: + // constructor + Scalar(GaugeField &_Umu, GridCartesian &Sgrid, GridRedBlackCartesian &Hgrid, + RealD _mass) + : _grid(&Sgrid) + , _cbgrid(&Hgrid) + , mass(_mass) + , Lebesgue(_grid) + , LebesgueEvenOdd(_cbgrid) + , Umu(&Sgrid) + , UmuEven(&Hgrid) + , UmuOdd(&Hgrid) + { + Umu = _Umu; + pickCheckerboard(Even, UmuEven, Umu); + pickCheckerboard(Odd, UmuOdd, Umu); + } + // grid access + virtual GridBase *RedBlackGrid(void) {return _grid;} + // half checkerboard operations + // FIXME: do implementation + virtual void Meooe(const ScalarField &in, ScalarField &out) + { + assert(0); + } + virtual void Mooee(const ScalarField &in, ScalarField &out) + { + assert(0); + } + virtual void MooeeInv(const ScalarField &in, ScalarField &out) + { + assert(0); + } + virtual void MeooeDag(const ScalarField &in, ScalarField &out) + { + assert(0); + } + virtual void MooeeDag(const ScalarField &in, ScalarField &out) + { + assert(0); + } + virtual void MooeeInvDag(const ScalarField &in, ScalarField &out) + { + assert(0); + } + // free propagators + static void MomentumSpacePropagator(ScalarField &out, RealD m); + static void FreePropagator(const ScalarField &in, ScalarField &out, + const ScalarField &momKernel); + static void FreePropagator(const ScalarField &in, ScalarField &out, RealD m); + public: + RealD mass; + + GridBase *_grid; + GridBase *_cbgrid; + + // Defines the stencils for even and odd + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; + + // Copy of the gauge field, with even and odd subsets + GaugeField Umu; + GaugeField UmuEven; + GaugeField UmuOdd; + + LebesgueOrder Lebesgue; + LebesgueOrder LebesgueEvenOdd; + }; + + template + void Scalar::MomentumSpacePropagator(ScalarField &out, RealD m) + { + GridBase *grid = out._grid; + ScalarField kmu(grid); + const unsigned int nd = grid->_ndimension; + std::vector &l = grid->_fdimensions; + + out = m*m; + for(int mu = 0; mu < nd; mu++) + { + Real twoPiL = M_PI*2./l[mu]; + + LatticeCoordinate(kmu,mu); + kmu = 2.*sin(.5*twoPiL*kmu); + out = out + kmu*kmu; + } + } + + template + void Scalar::FreePropagator(const ScalarField &in, ScalarField &out, + const ScalarField &FTKernel) + { + FFT fft((GridCartesian *)in._grid); + ScalarField inFT(in._grid); + + fft.FFT_all_dim(inFT, in, FFT::forward); + inFT = inFT*FTKernel; + fft.FFT_all_dim(out, inFT, FFT::backward); + } + + template + void Scalar::FreePropagator(const ScalarField &in, ScalarField &out, + RealD m) + { + ScalarField FTKernel(in._grid); + + MomentumSpacePropagator(FTKernel, m); + FreePropagator(in, out, FTKernel); + } + + template + void ScalarToProp(typename SImpl::PropagatorField &p, + const typename SImpl::ScalarField &s, + const int c) + { + for(int i = 0; i < SImpl::rDim; ++i) + { + pokeColour(p, peekColour(s, i), i); + } + } + + template + void PropToScalar(typename SImpl::ScalarField &s, + const typename SImpl::PropagatorField &p, + const int c) + { + for(int i = 0; i < SImpl::rDim; ++i) + { + pokeColour(s, peekColour(p, i), i); + } + } +}} + +#endif From afbf7d4c37df8f134aa6bb191d1fd29d1709b16e Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 29 Dec 2016 22:43:38 +0100 Subject: [PATCH 019/177] QED Gimpl moved in Photon.h --- extras/Hadrons/Modules/MGauge/StochEm.hpp | 21 --------------------- extras/qed-fvol/qed-fvol.cc | 22 ---------------------- lib/qcd/action/gauge/Photon.h | 22 +++++++++++++++++++++- 3 files changed, 21 insertions(+), 44 deletions(-) diff --git a/extras/Hadrons/Modules/MGauge/StochEm.hpp b/extras/Hadrons/Modules/MGauge/StochEm.hpp index 04a7c48c..50a77435 100644 --- a/extras/Hadrons/Modules/MGauge/StochEm.hpp +++ b/extras/Hadrons/Modules/MGauge/StochEm.hpp @@ -39,27 +39,6 @@ BEGIN_HADRONS_NAMESPACE ******************************************************************************/ BEGIN_MODULE_NAMESPACE(MGauge) -template -class QedGimpl -{ -public: - typedef S Simd; - - template - using iImplGaugeLink = iScalar>>; - template - using iImplGaugeField = iVector>, Nd>; - - typedef iImplGaugeLink SiteGaugeLink; - typedef iImplGaugeField SiteGaugeField; - - typedef Lattice GaugeLinkField; - typedef Lattice GaugeField; -}; - -typedef QedGimpl QedGimplR; -typedef Photon PhotonR; - class StochEmPar: Serializable { public: diff --git a/extras/qed-fvol/qed-fvol.cc b/extras/qed-fvol/qed-fvol.cc index 951c36ad..3ecac2fc 100644 --- a/extras/qed-fvol/qed-fvol.cc +++ b/extras/qed-fvol/qed-fvol.cc @@ -5,29 +5,7 @@ using namespace Grid; using namespace QCD; using namespace QedFVol; -template -class QedGimpl -{ -public: - typedef S Simd; - - template - using iImplGaugeLink = iScalar>>; - template - using iImplGaugeField = iVector>, Nd>; - - typedef iImplGaugeLink SiteGaugeLink; - typedef iImplGaugeField SiteGaugeField; - - typedef Lattice GaugeLinkField; // bit ugly naming; polarised - // gauge field, lorentz... all - // ugly - typedef Lattice GaugeField; -}; - -typedef QedGimpl QedGimplR; typedef PeriodicGaugeImpl QedPeriodicGimplR; -typedef Photon PhotonR; typedef PhotonR::GaugeField EmField; typedef PhotonR::GaugeLinkField EmComp; diff --git a/lib/qcd/action/gauge/Photon.h b/lib/qcd/action/gauge/Photon.h index faa63b42..73405297 100644 --- a/lib/qcd/action/gauge/Photon.h +++ b/lib/qcd/action/gauge/Photon.h @@ -28,9 +28,27 @@ #ifndef QCD_PHOTON_ACTION_H #define QCD_PHOTON_ACTION_H - namespace Grid{ namespace QCD{ + template + class QedGimpl + { + public: + typedef S Simd; + + template + using iImplGaugeLink = iScalar>>; + template + using iImplGaugeField = iVector>, Nd>; + + typedef iImplGaugeLink SiteGaugeLink; + typedef iImplGaugeField SiteGaugeField; + + typedef Lattice GaugeLinkField; + typedef Lattice GaugeField; + }; + + typedef QedGimpl QedGimplR; template class Photon @@ -56,6 +74,8 @@ namespace QCD{ ZmScheme zmScheme_; }; + typedef Photon PhotonR; + template Photon::Photon(Gauge gauge, ZmScheme zmScheme) : gauge_(gauge), zmScheme_(zmScheme) From 4c60e31070f3d05ba7e52c15c4bf6e59644a046a Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 29 Dec 2016 22:44:08 +0100 Subject: [PATCH 020/177] Hadrons: code cleaning --- extras/Hadrons/Modules/Quark.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extras/Hadrons/Modules/Quark.hpp b/extras/Hadrons/Modules/Quark.hpp index e441a096..0cf7314b 100644 --- a/extras/Hadrons/Modules/Quark.hpp +++ b/extras/Hadrons/Modules/Quark.hpp @@ -133,7 +133,7 @@ void TQuark::execute(void) for (unsigned int c = 0; c < Nc; ++c) { LOG(Message) << "Inversion for spin= " << s << ", color= " << c - << std::endl; + << std::endl; // source conversion for 4D sources if (!env().isObject5d(par().source)) { From bbc0eff078cfd331ca31f2dd0c95b3030ee8a261 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 29 Dec 2016 22:44:22 +0100 Subject: [PATCH 021/177] Hadrons: scalar sources --- extras/Hadrons/Modules/MSource/Point.hpp | 5 +++-- extras/Hadrons/Modules/MSource/Z2.hpp | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/extras/Hadrons/Modules/MSource/Point.hpp b/extras/Hadrons/Modules/MSource/Point.hpp index a0ecbc2a..8d0b4de8 100644 --- a/extras/Hadrons/Modules/MSource/Point.hpp +++ b/extras/Hadrons/Modules/MSource/Point.hpp @@ -63,7 +63,7 @@ template class TPoint: public Module { public: - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); public: // constructor TPoint(const std::string name); @@ -78,7 +78,8 @@ public: virtual void execute(void); }; -MODULE_REGISTER_NS(Point, TPoint, MSource); +MODULE_REGISTER_NS(Point, TPoint, MSource); +MODULE_REGISTER_NS(ScalarPoint, TPoint, MSource); /****************************************************************************** * TPoint template implementation * diff --git a/extras/Hadrons/Modules/MSource/Z2.hpp b/extras/Hadrons/Modules/MSource/Z2.hpp index cd5727be..6fa49cfe 100644 --- a/extras/Hadrons/Modules/MSource/Z2.hpp +++ b/extras/Hadrons/Modules/MSource/Z2.hpp @@ -67,7 +67,7 @@ template class TZ2: public Module { public: - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); public: // constructor TZ2(const std::string name); @@ -82,7 +82,8 @@ public: virtual void execute(void); }; -MODULE_REGISTER_NS(Z2, TZ2, MSource); +MODULE_REGISTER_NS(Z2, TZ2, MSource); +MODULE_REGISTER_NS(ScalarZ2, TZ2, MSource); /****************************************************************************** * TZ2 template implementation * From 673994b281e6c464b4021c62c80a9976e0035176 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 29 Dec 2016 22:44:58 +0100 Subject: [PATCH 022/177] Hadrons: modules for scalar propagators --- extras/Hadrons/Global.hpp | 25 ++++++-- extras/Hadrons/Modules.hpp | 30 +--------- extras/Hadrons/Modules/MScalar/ChargedProp.cc | 40 +++++++++++++ .../Hadrons/Modules/MScalar/ChargedProp.hpp | 44 ++++++++++++++ extras/Hadrons/Modules/MScalar/FreeProp.cc | 57 +++++++++++++++++++ extras/Hadrons/Modules/MScalar/FreeProp.hpp | 47 +++++++++++++++ extras/Hadrons/modules.inc | 6 +- 7 files changed, 215 insertions(+), 34 deletions(-) create mode 100644 extras/Hadrons/Modules/MScalar/ChargedProp.cc create mode 100644 extras/Hadrons/Modules/MScalar/ChargedProp.hpp create mode 100644 extras/Hadrons/Modules/MScalar/FreeProp.cc create mode 100644 extras/Hadrons/Modules/MScalar/FreeProp.hpp diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index 81afab13..bcb282fc 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -51,23 +51,38 @@ using Grid::operator<<; * error with GCC 5 (clang & GCC 6 compile fine without it). */ -// FIXME: find a way to do that in a more general fashion #ifndef FIMPL #define FIMPL WilsonImplR #endif +#ifndef SIMPL +#define SIMPL ScalarImplR +#endif BEGIN_HADRONS_NAMESPACE // type aliases -#define TYPE_ALIASES(FImpl, suffix)\ +#define FERM_TYPE_ALIASES(FImpl, suffix)\ typedef FermionOperator FMat##suffix; \ typedef typename FImpl::FermionField FermionField##suffix; \ typedef typename FImpl::PropagatorField PropagatorField##suffix; \ -typedef typename FImpl::SitePropagator SitePropagator##suffix; \ -typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;\ -typedef std::function SolverFn##suffix; +#define TYPE_ALIASES(FImpl, suffix)\ +FERM_TYPE_ALIASES(FImpl, suffix)\ +GAUGE_TYPE_ALIASES(FImpl, suffix)\ +SOLVER_TYPE_ALIASES(FImpl, suffix) + // logger class HadronsLogger: public Logger { diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 5d1a456c..ad31d2a7 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -1,31 +1,3 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: extras/Hadrons/Modules.hpp - -Copyright (C) 2015 -Copyright (C) 2016 - -Author: Antonin Portelli - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ #include #include #include @@ -34,6 +6,8 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include +#include +#include #include #include #include diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc new file mode 100644 index 00000000..1137c6f0 --- /dev/null +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -0,0 +1,40 @@ +#include + +using namespace Grid; +using namespace Hadrons; +using namespace MScalar; + +/****************************************************************************** +* TChargedProp implementation * +******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +TChargedProp::TChargedProp(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +std::vector TChargedProp::getInput(void) +{ + std::vector in; + + return in; +} + +std::vector TChargedProp::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +void TChargedProp::setup(void) +{ + +} + +// execution /////////////////////////////////////////////////////////////////// +void TChargedProp::execute(void) +{ + +} diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp new file mode 100644 index 00000000..7a60c2ad --- /dev/null +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp @@ -0,0 +1,44 @@ +#ifndef Hadrons_ChargedProp_hpp_ +#define Hadrons_ChargedProp_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * ChargedProp * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MScalar) + +class ChargedPropPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(ChargedPropPar, + unsigned int, i); +}; + +class TChargedProp: public Module +{ +public: + // constructor + TChargedProp(const std::string name); + // destructor + virtual ~TChargedProp(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar); + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_ChargedProp_hpp_ diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.cc b/extras/Hadrons/Modules/MScalar/FreeProp.cc new file mode 100644 index 00000000..7419a954 --- /dev/null +++ b/extras/Hadrons/Modules/MScalar/FreeProp.cc @@ -0,0 +1,57 @@ +#include + +using namespace Grid; +using namespace Hadrons; +using namespace MScalar; + +/****************************************************************************** +* TFreeProp implementation * +******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +TFreeProp::TFreeProp(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +std::vector TFreeProp::getInput(void) +{ + std::vector in = {par().source}; + + return in; +} + +std::vector TFreeProp::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +void TFreeProp::setup(void) +{ + env().registerLattice(getName()); +} + +// execution /////////////////////////////////////////////////////////////////// +void TFreeProp::execute(void) +{ + ScalarField &prop = *env().createLattice(getName()); + ScalarField &source = *env().getObject(par().source); + ScalarField *momKernel; + std::string kerName = "_" + getName() + "_momKernel"; + + if (!env().hasCreatedObject(kerName)) + { + LOG(Message) << "Caching momentum space free scalar propagator" + << "(mass= " << par().mass << ")..." << std::endl; + momKernel = env().template createLattice(kerName); + Scalar::MomentumSpacePropagator(*momKernel, par().mass); + } + else + { + momKernel = env().getObject(kerName); + } + LOG(Message) << "Computing free scalar propagator..." << std::endl; + Scalar::FreePropagator(source, prop, *momKernel); +} diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.hpp b/extras/Hadrons/Modules/MScalar/FreeProp.hpp new file mode 100644 index 00000000..6a0cd930 --- /dev/null +++ b/extras/Hadrons/Modules/MScalar/FreeProp.hpp @@ -0,0 +1,47 @@ +#ifndef Hadrons_FreeProp_hpp_ +#define Hadrons_FreeProp_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * FreeProp * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MScalar) + +class FreePropPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(FreePropPar, + std::string, source, + double, mass); +}; + +class TFreeProp: public Module +{ +public: + SCALAR_TYPE_ALIASES(SIMPL,); +public: + // constructor + TFreeProp(const std::string name); + // destructor + virtual ~TFreeProp(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar); + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_FreeProp_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 8b559024..b091c38b 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -2,7 +2,9 @@ modules_cc =\ Modules/MGauge/Load.cc \ Modules/MGauge/Random.cc \ Modules/MGauge/StochEm.cc \ - Modules/MGauge/Unit.cc + Modules/MGauge/Unit.cc \ + Modules/MScalar/ChargedProp.cc \ + Modules/MScalar/FreeProp.cc modules_hpp =\ Modules/MAction/DWF.hpp \ @@ -13,6 +15,8 @@ modules_hpp =\ Modules/MGauge/Random.hpp \ Modules/MGauge/StochEm.hpp \ Modules/MGauge/Unit.hpp \ + Modules/MScalar/ChargedProp.hpp \ + Modules/MScalar/FreeProp.hpp \ Modules/MSolver/RBPrecCG.hpp \ Modules/MSource/Point.hpp \ Modules/MSource/SeqGamma.hpp \ From 82b3f546970fde47b1e1220679f3c6c772d5eff0 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 5 Jan 2017 14:58:07 +0000 Subject: [PATCH 023/177] scalar free propagator fix --- lib/qcd/action/scalar/Scalar.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/qcd/action/scalar/Scalar.h b/lib/qcd/action/scalar/Scalar.h index 194f6767..c053e15e 100644 --- a/lib/qcd/action/scalar/Scalar.h +++ b/lib/qcd/action/scalar/Scalar.h @@ -148,10 +148,11 @@ namespace QCD{ void Scalar::MomentumSpacePropagator(ScalarField &out, RealD m) { GridBase *grid = out._grid; - ScalarField kmu(grid); + ScalarField kmu(grid), one(grid); const unsigned int nd = grid->_ndimension; std::vector &l = grid->_fdimensions; + one = Complex(1.0,0.0); out = m*m; for(int mu = 0; mu < nd; mu++) { @@ -161,6 +162,7 @@ namespace QCD{ kmu = 2.*sin(.5*twoPiL*kmu); out = out + kmu*kmu; } + out = one/out; } template From 97843e2b5818667ab5f6802003bb4c04d6076503 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 5 Jan 2017 14:58:55 +0000 Subject: [PATCH 024/177] Hadrons: free scalar buffer fix and output --- extras/Hadrons/Modules/MScalar/FreeProp.cc | 34 ++++++++++++++++++--- extras/Hadrons/Modules/MScalar/FreeProp.hpp | 5 +-- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.cc b/extras/Hadrons/Modules/MScalar/FreeProp.cc index 7419a954..ba85e041 100644 --- a/extras/Hadrons/Modules/MScalar/FreeProp.cc +++ b/extras/Hadrons/Modules/MScalar/FreeProp.cc @@ -1,11 +1,13 @@ #include +#define KERNAME "_" + getName() + "_momKernel" + using namespace Grid; using namespace Hadrons; using namespace MScalar; /****************************************************************************** -* TFreeProp implementation * +* TFreeProp implementation * ******************************************************************************/ // constructor ///////////////////////////////////////////////////////////////// TFreeProp::TFreeProp(const std::string name) @@ -30,6 +32,12 @@ std::vector TFreeProp::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// void TFreeProp::setup(void) { + std::string kerName = KERNAME; + + if (!env().hasRegisteredObject(kerName)) + { + env().registerLattice(kerName); + } env().registerLattice(getName()); } @@ -39,13 +47,13 @@ void TFreeProp::execute(void) ScalarField &prop = *env().createLattice(getName()); ScalarField &source = *env().getObject(par().source); ScalarField *momKernel; - std::string kerName = "_" + getName() + "_momKernel"; - + std::string kerName = KERNAME; + if (!env().hasCreatedObject(kerName)) { LOG(Message) << "Caching momentum space free scalar propagator" - << "(mass= " << par().mass << ")..." << std::endl; - momKernel = env().template createLattice(kerName); + << " (mass= " << par().mass << ")..." << std::endl; + momKernel = env().createLattice(kerName); Scalar::MomentumSpacePropagator(*momKernel, par().mass); } else @@ -54,4 +62,20 @@ void TFreeProp::execute(void) } LOG(Message) << "Computing free scalar propagator..." << std::endl; Scalar::FreePropagator(source, prop, *momKernel); + + if (!par().output.empty()) + { + TextWriter writer(par().output + "." + + std::to_string(env().getTrajectory())); + std::vector buf; + std::vector result; + + sliceSum(prop, buf, Tp); + result.resize(buf.size()); + for (unsigned int t = 0; t < buf.size(); ++t) + { + result[t] = TensorRemove(buf[t]); + } + write(writer, "prop", result); + } } diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.hpp b/extras/Hadrons/Modules/MScalar/FreeProp.hpp index 6a0cd930..81bb8121 100644 --- a/extras/Hadrons/Modules/MScalar/FreeProp.hpp +++ b/extras/Hadrons/Modules/MScalar/FreeProp.hpp @@ -8,7 +8,7 @@ BEGIN_HADRONS_NAMESPACE /****************************************************************************** - * FreeProp * + * FreeProp * ******************************************************************************/ BEGIN_MODULE_NAMESPACE(MScalar) @@ -17,7 +17,8 @@ class FreePropPar: Serializable public: GRID_SERIALIZABLE_CLASS_MEMBERS(FreePropPar, std::string, source, - double, mass); + double, mass, + std::string, output); }; class TFreeProp: public Module From fc760016b3e12b5fea25c8ca288525d4e2dad7c7 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 11 Jan 2017 18:39:58 +0000 Subject: [PATCH 025/177] More uniform cache name for scalar momentum propagators --- extras/Hadrons/Modules.hpp | 1 + extras/Hadrons/Modules/MScalar/FreeProp.cc | 22 ++++++++++----------- extras/Hadrons/Modules/MScalar/FreeProp.hpp | 2 ++ extras/Hadrons/Modules/MScalar/Scalar.hpp | 6 ++++++ extras/Hadrons/modules.inc | 1 + 5 files changed, 20 insertions(+), 12 deletions(-) create mode 100644 extras/Hadrons/Modules/MScalar/Scalar.hpp diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index ad31d2a7..a25419c5 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.cc b/extras/Hadrons/Modules/MScalar/FreeProp.cc index ba85e041..f0a503ff 100644 --- a/extras/Hadrons/Modules/MScalar/FreeProp.cc +++ b/extras/Hadrons/Modules/MScalar/FreeProp.cc @@ -1,6 +1,5 @@ #include - -#define KERNAME "_" + getName() + "_momKernel" +#include using namespace Grid; using namespace Hadrons; @@ -32,11 +31,11 @@ std::vector TFreeProp::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// void TFreeProp::setup(void) { - std::string kerName = KERNAME; + freeMomPropName_ = FREEMOMPROP(par().mass); - if (!env().hasRegisteredObject(kerName)) + if (!env().hasRegisteredObject(freeMomPropName_)) { - env().registerLattice(kerName); + env().registerLattice(freeMomPropName_); } env().registerLattice(getName()); } @@ -46,22 +45,21 @@ void TFreeProp::execute(void) { ScalarField &prop = *env().createLattice(getName()); ScalarField &source = *env().getObject(par().source); - ScalarField *momKernel; - std::string kerName = KERNAME; + ScalarField *freeMomProp; - if (!env().hasCreatedObject(kerName)) + if (!env().hasCreatedObject(freeMomPropName_)) { LOG(Message) << "Caching momentum space free scalar propagator" << " (mass= " << par().mass << ")..." << std::endl; - momKernel = env().createLattice(kerName); - Scalar::MomentumSpacePropagator(*momKernel, par().mass); + freeMomProp = env().createLattice(freeMomPropName_); + Scalar::MomentumSpacePropagator(*freeMomProp, par().mass); } else { - momKernel = env().getObject(kerName); + freeMomProp = env().getObject(freeMomPropName_); } LOG(Message) << "Computing free scalar propagator..." << std::endl; - Scalar::FreePropagator(source, prop, *momKernel); + Scalar::FreePropagator(source, prop, *freeMomProp); if (!par().output.empty()) { diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.hpp b/extras/Hadrons/Modules/MScalar/FreeProp.hpp index 81bb8121..29f15eda 100644 --- a/extras/Hadrons/Modules/MScalar/FreeProp.hpp +++ b/extras/Hadrons/Modules/MScalar/FreeProp.hpp @@ -37,6 +37,8 @@ public: virtual void setup(void); // execution virtual void execute(void); +private: + std::string freeMomPropName_; }; MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar); diff --git a/extras/Hadrons/Modules/MScalar/Scalar.hpp b/extras/Hadrons/Modules/MScalar/Scalar.hpp new file mode 100644 index 00000000..db702ff2 --- /dev/null +++ b/extras/Hadrons/Modules/MScalar/Scalar.hpp @@ -0,0 +1,6 @@ +#ifndef Hadrons_Scalar_hpp_ +#define Hadrons_Scalar_hpp_ + +#define FREEMOMPROP(m) "_scalar_mom_prop_" + std::to_string(m) + +#endif // Hadrons_Scalar_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index b091c38b..dfbe85ff 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -17,6 +17,7 @@ modules_hpp =\ Modules/MGauge/Unit.hpp \ Modules/MScalar/ChargedProp.hpp \ Modules/MScalar/FreeProp.hpp \ + Modules/MScalar/Scalar.hpp \ Modules/MSolver/RBPrecCG.hpp \ Modules/MSource/Point.hpp \ Modules/MSource/SeqGamma.hpp \ From ad98b6193d4b08ad42c9da79370b4ccd7382b4cb Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 11 Jan 2017 18:40:43 +0000 Subject: [PATCH 026/177] creating the necessary caches for the FFT EM scalar propagator --- extras/Hadrons/Modules/MScalar/ChargedProp.cc | 68 ++++++++++++++++++- .../Hadrons/Modules/MScalar/ChargedProp.hpp | 10 ++- 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc index 1137c6f0..1cd0cae6 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.cc +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -1,4 +1,5 @@ #include +#include using namespace Grid; using namespace Hadrons; @@ -15,7 +16,7 @@ TChargedProp::TChargedProp(const std::string name) // dependencies/products /////////////////////////////////////////////////////// std::vector TChargedProp::getInput(void) { - std::vector in; + std::vector in = {par().source, par().emField}; return in; } @@ -30,11 +31,72 @@ std::vector TChargedProp::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// void TChargedProp::setup(void) { - + freeMomPropName_ = FREEMOMPROP(par().mass); + shiftedMomPropName_.clear(); + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + shiftedMomPropName_.push_back(freeMomPropName_ + "_" + + std::to_string(mu)); + } + if (!env().hasRegisteredObject(freeMomPropName_)) + { + env().registerLattice(freeMomPropName_); + } + if (!env().hasRegisteredObject(shiftedMomPropName_[0])) + { + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + env().registerLattice(shiftedMomPropName_[mu]); + } + } + env().registerLattice(getName()); + } // execution /////////////////////////////////////////////////////////////////// void TChargedProp::execute(void) { - + ScalarField &prop = *env().createLattice(getName()); + ScalarField &source = *env().getObject(par().source); + ScalarField *freeMomProp; + std::vector shiftedMomProp; + Complex ci(0.0,1.0); + + if (!env().hasCreatedObject(freeMomPropName_)) + { + LOG(Message) << "Caching momentum space free scalar propagator" + << " (mass= " << par().mass << ")..." << std::endl; + freeMomProp = env().createLattice(freeMomPropName_); + Scalar::MomentumSpacePropagator(*freeMomProp, par().mass); + } + else + { + freeMomProp = env().getObject(freeMomPropName_); + } + if (!env().hasCreatedObject(shiftedMomPropName_[0])) + { + std::vector &l = env().getGrid()->_fdimensions; + + LOG(Message) << "Caching shifted momentum space free scalar propagator" + << " (mass= " << par().mass << ")..." << std::endl; + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Real twoPiL = M_PI*2./l[mu]; + + shiftedMomProp.push_back( + env().createLattice(shiftedMomPropName_[mu])); + LatticeCoordinate(*(shiftedMomProp[mu]), mu); + *(shiftedMomProp[mu]) = exp(ci*twoPiL*(*(shiftedMomProp[mu]))) + *(*freeMomProp); + } + } + else + { + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + shiftedMomProp.push_back( + env().getObject(shiftedMomPropName_[mu])); + } + } + } diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp index 7a60c2ad..91ea2355 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp @@ -16,11 +16,16 @@ class ChargedPropPar: Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(ChargedPropPar, - unsigned int, i); + std::string, emField, + std::string, source, + double, mass, + std::string, output); }; class TChargedProp: public Module { +public: + SCALAR_TYPE_ALIASES(SIMPL,); public: // constructor TChargedProp(const std::string name); @@ -33,6 +38,9 @@ public: virtual void setup(void); // execution virtual void execute(void); +private: + std::string freeMomPropName_; + std::vector shiftedMomPropName_; }; MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar); From 889d828bc289d2ee4ea5939af29ff56fe7466db5 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 12 Jan 2017 18:17:44 +0000 Subject: [PATCH 027/177] Code cleaning --- extras/Hadrons/Modules/MScalar/ChargedProp.cc | 2 +- extras/Hadrons/Modules/MScalar/ChargedProp.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc index 1cd0cae6..ff53fa0b 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.cc +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -6,7 +6,7 @@ using namespace Hadrons; using namespace MScalar; /****************************************************************************** -* TChargedProp implementation * +* TChargedProp implementation * ******************************************************************************/ // constructor ///////////////////////////////////////////////////////////////// TChargedProp::TChargedProp(const std::string name) diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp index 91ea2355..001f6494 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp @@ -8,7 +8,7 @@ BEGIN_HADRONS_NAMESPACE /****************************************************************************** - * ChargedProp * + * Charged scalar propagator * ******************************************************************************/ BEGIN_MODULE_NAMESPACE(MScalar) From 65987a8a5810434b3f7ee54e8b6a4cf400108c74 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 12 Jan 2017 20:44:23 +0000 Subject: [PATCH 028/177] First implementation of the scalar QED propagator, runs but absolutely not checked --- extras/Hadrons/Modules/MScalar/ChargedProp.cc | 122 +++++++++++++++--- .../Hadrons/Modules/MScalar/ChargedProp.hpp | 13 +- 2 files changed, 115 insertions(+), 20 deletions(-) diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc index ff53fa0b..dd260798 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.cc +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -32,23 +32,28 @@ std::vector TChargedProp::getOutput(void) void TChargedProp::setup(void) { freeMomPropName_ = FREEMOMPROP(par().mass); - shiftedMomPropName_.clear(); + phaseName_.clear(); for (unsigned int mu = 0; mu < env().getNd(); ++mu) { - shiftedMomPropName_.push_back(freeMomPropName_ + "_" + phaseName_.push_back(freeMomPropName_ + "_" + std::to_string(mu)); } + GFSrcName_ = "_" + getName() + "_DinvSrc"; if (!env().hasRegisteredObject(freeMomPropName_)) { env().registerLattice(freeMomPropName_); } - if (!env().hasRegisteredObject(shiftedMomPropName_[0])) + if (!env().hasRegisteredObject(phaseName_[0])) { for (unsigned int mu = 0; mu < env().getNd(); ++mu) { - env().registerLattice(shiftedMomPropName_[mu]); + env().registerLattice(phaseName_[mu]); } } + if (!env().hasRegisteredObject(GFSrcName_)) + { + env().registerLattice(GFSrcName_); + } env().registerLattice(getName()); } @@ -56,24 +61,26 @@ void TChargedProp::setup(void) // execution /////////////////////////////////////////////////////////////////// void TChargedProp::execute(void) { + // CACHING ANALYTIC EXPRESSIONS ScalarField &prop = *env().createLattice(getName()); ScalarField &source = *env().getObject(par().source); - ScalarField *freeMomProp; - std::vector shiftedMomProp; - Complex ci(0.0,1.0); + Complex ci(0.0,1.0); + FFT fft(env().getGrid()); + // cache free scalar propagator if (!env().hasCreatedObject(freeMomPropName_)) { LOG(Message) << "Caching momentum space free scalar propagator" << " (mass= " << par().mass << ")..." << std::endl; - freeMomProp = env().createLattice(freeMomPropName_); - Scalar::MomentumSpacePropagator(*freeMomProp, par().mass); + freeMomProp_ = env().createLattice(freeMomPropName_); + Scalar::MomentumSpacePropagator(*freeMomProp_, par().mass); } else { - freeMomProp = env().getObject(freeMomPropName_); + freeMomProp_ = env().getObject(freeMomPropName_); } - if (!env().hasCreatedObject(shiftedMomPropName_[0])) + // cache phases + if (!env().hasCreatedObject(phaseName_[0])) { std::vector &l = env().getGrid()->_fdimensions; @@ -83,20 +90,99 @@ void TChargedProp::execute(void) { Real twoPiL = M_PI*2./l[mu]; - shiftedMomProp.push_back( - env().createLattice(shiftedMomPropName_[mu])); - LatticeCoordinate(*(shiftedMomProp[mu]), mu); - *(shiftedMomProp[mu]) = exp(ci*twoPiL*(*(shiftedMomProp[mu]))) - *(*freeMomProp); + phase_.push_back(env().createLattice(phaseName_[mu])); + LatticeCoordinate(*(phase_[mu]), mu); + *(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu]))); } } else { for (unsigned int mu = 0; mu < env().getNd(); ++mu) { - shiftedMomProp.push_back( - env().getObject(shiftedMomPropName_[mu])); + phase_.push_back(env().getObject(phaseName_[mu])); } } + // cache G*F*src + if (!env().hasCreatedObject(GFSrcName_)) + + { + GFSrc_ = env().createLattice(GFSrcName_); + fft.FFT_all_dim(*GFSrc_, source, FFT::forward); + *GFSrc_ = (*freeMomProp_)*(*GFSrc_); + } + else + { + GFSrc_ = env().getObject(GFSrcName_); + } + // PROPAGATOR CALCULATION + ScalarField buf(env().getGrid()); + ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_; + double q = par().charge; + + // G*F*Src + prop = GFSrc; + // - q*G*momD1*G*F*Src (momD1 = F*D1*Finv) + buf = GFSrc; + momD1(buf, fft); + buf = G*buf; + prop = prop - q*buf; + // + q^2*G*momD1*G*momD1*G*F*Src (here buf = G*momD1*G*F*Src) + momD1(buf, fft); + prop = prop + q*q*G*buf; + // + q^2*G*momD2*G*F*Src (momD1 = F*D2*Finv) + buf = GFSrc; + momD2(buf, fft); + prop = prop + q*q*G*buf; + // final FT + fft.FFT_all_dim(prop, prop, FFT::backward); +} + +void TChargedProp::momD1(ScalarField &s, FFT &fft) +{ + EmField &A = *env().getObject(par().emField); + ScalarField buf(env().getGrid()), Amu(env().getGrid()); + Complex ci(0.0,1.0); + + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Amu = peekLorentz(A, mu); + fft.FFT_all_dim(buf, s, FFT::backward); + buf = Amu*buf; + fft.FFT_all_dim(buf, buf, FFT::forward); + s = s + ci*adj(*phase_[mu])*buf; + } + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Amu = peekLorentz(A, mu); + buf = (*phase_[mu])*s; + fft.FFT_all_dim(buf, buf, FFT::backward); + buf = Amu*buf; + fft.FFT_all_dim(buf, buf, FFT::forward); + s = s - ci*buf; + } +} + +void TChargedProp::momD2(ScalarField &s, FFT &fft) +{ + EmField &A = *env().getObject(par().emField); + ScalarField buf(env().getGrid()), Amu(env().getGrid()); + + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Amu = peekLorentz(A, mu); + fft.FFT_all_dim(buf, s, FFT::backward); + buf = Amu*Amu*buf; + fft.FFT_all_dim(buf, buf, FFT::forward); + s = s + .5*adj(*phase_[mu])*buf; + } + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Amu = peekLorentz(A, mu); + buf = (*phase_[mu])*s; + fft.FFT_all_dim(buf, buf, FFT::backward); + buf = Amu*Amu*buf; + fft.FFT_all_dim(buf, buf, FFT::forward); + s = s + .5*buf; + } } diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp index 001f6494..8bb5faa0 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp @@ -19,6 +19,7 @@ public: std::string, emField, std::string, source, double, mass, + double, charge, std::string, output); }; @@ -26,6 +27,8 @@ class TChargedProp: public Module { public: SCALAR_TYPE_ALIASES(SIMPL,); + typedef PhotonR::GaugeField EmField; + typedef PhotonR::GaugeLinkField EmComp; public: // constructor TChargedProp(const std::string name); @@ -39,8 +42,14 @@ public: // execution virtual void execute(void); private: - std::string freeMomPropName_; - std::vector shiftedMomPropName_; + void momD1(ScalarField &s, FFT &fft); + void momD2(ScalarField &s, FFT &fft); +private: + std::string freeMomPropName_, GFSrcName_; + std::vector phaseName_; + ScalarField *freeMomProp_, *GFSrc_; + std::vector phase_; + EmField *A; }; MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar); From 92f8950a5658f75fa4e184fdffda492d5e45b200 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 13 Jan 2017 13:30:56 +0000 Subject: [PATCH 029/177] Charged scalar prop: cleaning and output --- extras/Hadrons/Modules/MScalar/ChargedProp.cc | 61 +++++++++++++------ 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc index dd260798..f8323705 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.cc +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -35,8 +35,7 @@ void TChargedProp::setup(void) phaseName_.clear(); for (unsigned int mu = 0; mu < env().getNd(); ++mu) { - phaseName_.push_back(freeMomPropName_ + "_" - + std::to_string(mu)); + phaseName_.push_back("_shiftphase_" + std::to_string(mu)); } GFSrcName_ = "_" + getName() + "_DinvSrc"; if (!env().hasRegisteredObject(freeMomPropName_)) @@ -55,14 +54,12 @@ void TChargedProp::setup(void) env().registerLattice(GFSrcName_); } env().registerLattice(getName()); - } // execution /////////////////////////////////////////////////////////////////// void TChargedProp::execute(void) { // CACHING ANALYTIC EXPRESSIONS - ScalarField &prop = *env().createLattice(getName()); ScalarField &source = *env().getObject(par().source); Complex ci(0.0,1.0); FFT fft(env().getGrid()); @@ -79,13 +76,24 @@ void TChargedProp::execute(void) { freeMomProp_ = env().getObject(freeMomPropName_); } + // cache G*F*src + if (!env().hasCreatedObject(GFSrcName_)) + + { + GFSrc_ = env().createLattice(GFSrcName_); + fft.FFT_all_dim(*GFSrc_, source, FFT::forward); + *GFSrc_ = (*freeMomProp_)*(*GFSrc_); + } + else + { + GFSrc_ = env().getObject(GFSrcName_); + } // cache phases if (!env().hasCreatedObject(phaseName_[0])) { std::vector &l = env().getGrid()->_fdimensions; - LOG(Message) << "Caching shifted momentum space free scalar propagator" - << " (mass= " << par().mass << ")..." << std::endl; + LOG(Message) << "Caching shift phases..." << std::endl; for (unsigned int mu = 0; mu < env().getNd(); ++mu) { Real twoPiL = M_PI*2./l[mu]; @@ -102,20 +110,13 @@ void TChargedProp::execute(void) phase_.push_back(env().getObject(phaseName_[mu])); } } - // cache G*F*src - if (!env().hasCreatedObject(GFSrcName_)) - - { - GFSrc_ = env().createLattice(GFSrcName_); - fft.FFT_all_dim(*GFSrc_, source, FFT::forward); - *GFSrc_ = (*freeMomProp_)*(*GFSrc_); - } - else - { - GFSrc_ = env().getObject(GFSrcName_); - } - + // PROPAGATOR CALCULATION + LOG(Message) << "Computing charged scalar propagator" + << " (mass= " << par().mass + << ", charge= " << par().charge << ")..." << std::endl; + + ScalarField &prop = *env().createLattice(getName()); ScalarField buf(env().getGrid()); ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_; double q = par().charge; @@ -136,6 +137,28 @@ void TChargedProp::execute(void) prop = prop + q*q*G*buf; // final FT fft.FFT_all_dim(prop, prop, FFT::backward); + + // OUTPUT IF NECESSARY + if (!par().output.empty()) + { + std::string filename = par().output + "." + + std::to_string(env().getTrajectory()); + + LOG(Message) << "Saving zero-momentum projection to '" + << filename << "'..." << std::endl; + + TextWriter writer(filename); + std::vector vecBuf; + std::vector result; + + sliceSum(prop, vecBuf, Tp); + result.resize(vecBuf.size()); + for (unsigned int t = 0; t < vecBuf.size(); ++t) + { + result[t] = TensorRemove(vecBuf[t]); + } + write(writer, "prop", result); + } } void TChargedProp::momD1(ScalarField &s, FFT &fft) From ae99e99da235ebf5d9a149ef4a7ad0b93d1f7474 Mon Sep 17 00:00:00 2001 From: James Harrison Date: Mon, 23 Jan 2017 17:27:50 +0000 Subject: [PATCH 030/177] Fixed bug in ChargedProp --- extras/Hadrons/Modules/MScalar/ChargedProp.cc | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc index f8323705..d88fdc45 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.cc +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -123,18 +123,22 @@ void TChargedProp::execute(void) // G*F*Src prop = GFSrc; + // - q*G*momD1*G*F*Src (momD1 = F*D1*Finv) buf = GFSrc; momD1(buf, fft); buf = G*buf; prop = prop - q*buf; + // + q^2*G*momD1*G*momD1*G*F*Src (here buf = G*momD1*G*F*Src) momD1(buf, fft); prop = prop + q*q*G*buf; - // + q^2*G*momD2*G*F*Src (momD1 = F*D2*Finv) + + // - q^2*G*momD2*G*F*Src (momD2 = F*D2*Finv) buf = GFSrc; momD2(buf, fft); - prop = prop + q*q*G*buf; + prop = prop - q*q*G*buf; + // final FT fft.FFT_all_dim(prop, prop, FFT::backward); @@ -164,16 +168,18 @@ void TChargedProp::execute(void) void TChargedProp::momD1(ScalarField &s, FFT &fft) { EmField &A = *env().getObject(par().emField); - ScalarField buf(env().getGrid()), Amu(env().getGrid()); + ScalarField buf(env().getGrid()), fs(env().getGrid()), result(env().getGrid()), Amu(env().getGrid()); Complex ci(0.0,1.0); - + + result = zero; + + fft.FFT_all_dim(fs, s, FFT::backward); for (unsigned int mu = 0; mu < env().getNd(); ++mu) { Amu = peekLorentz(A, mu); - fft.FFT_all_dim(buf, s, FFT::backward); - buf = Amu*buf; + buf = Amu*fs; fft.FFT_all_dim(buf, buf, FFT::forward); - s = s + ci*adj(*phase_[mu])*buf; + result = result + ci*adj(*phase_[mu])*buf; } for (unsigned int mu = 0; mu < env().getNd(); ++mu) { @@ -182,22 +188,26 @@ void TChargedProp::momD1(ScalarField &s, FFT &fft) fft.FFT_all_dim(buf, buf, FFT::backward); buf = Amu*buf; fft.FFT_all_dim(buf, buf, FFT::forward); - s = s - ci*buf; + result = result - ci*buf; } + + s = result; } void TChargedProp::momD2(ScalarField &s, FFT &fft) { EmField &A = *env().getObject(par().emField); - ScalarField buf(env().getGrid()), Amu(env().getGrid()); + ScalarField buf(env().getGrid()), fs(env().getGrid()), result(env().getGrid()), Amu(env().getGrid()); + + result = zero; + fft.FFT_all_dim(fs, s, FFT::backward); for (unsigned int mu = 0; mu < env().getNd(); ++mu) { - Amu = peekLorentz(A, mu); - fft.FFT_all_dim(buf, s, FFT::backward); - buf = Amu*Amu*buf; + Amu = peekLorentz(A, mu); + buf = Amu*Amu*fs; fft.FFT_all_dim(buf, buf, FFT::forward); - s = s + .5*adj(*phase_[mu])*buf; + result = result + .5*adj(*phase_[mu])*buf; } for (unsigned int mu = 0; mu < env().getNd(); ++mu) { @@ -206,6 +216,8 @@ void TChargedProp::momD2(ScalarField &s, FFT &fft) fft.FFT_all_dim(buf, buf, FFT::backward); buf = Amu*Amu*buf; fft.FFT_all_dim(buf, buf, FFT::forward); - s = s + .5*buf; + result = result + .5*buf; } + + s = result; } From f65a585236f420d7ed966b8f9e0b7cbfb0857d8c Mon Sep 17 00:00:00 2001 From: James Harrison Date: Thu, 26 Jan 2017 15:02:30 +0000 Subject: [PATCH 031/177] ChargedProp: Switch to HDF5 output --- extras/Hadrons/Modules/MScalar/ChargedProp.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc index d88fdc45..f2890b2a 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.cc +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -151,7 +151,7 @@ void TChargedProp::execute(void) LOG(Message) << "Saving zero-momentum projection to '" << filename << "'..." << std::endl; - TextWriter writer(filename); + Hdf5Writer writer(filename); std::vector vecBuf; std::vector result; @@ -161,6 +161,7 @@ void TChargedProp::execute(void) { result[t] = TensorRemove(vecBuf[t]); } + write(writer, "charge", q); write(writer, "prop", result); } } From ee93f0218bebd84d98211d0ed87cff48951d76d7 Mon Sep 17 00:00:00 2001 From: James Harrison Date: Fri, 27 Jan 2017 12:22:48 +0000 Subject: [PATCH 032/177] ChargedProp: remove ScalarField fs --- extras/Hadrons/Modules/MScalar/ChargedProp.cc | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc index f2890b2a..40d4504c 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.cc +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -169,19 +169,12 @@ void TChargedProp::execute(void) void TChargedProp::momD1(ScalarField &s, FFT &fft) { EmField &A = *env().getObject(par().emField); - ScalarField buf(env().getGrid()), fs(env().getGrid()), result(env().getGrid()), Amu(env().getGrid()); + ScalarField buf(env().getGrid()), result(env().getGrid()), + Amu(env().getGrid()); Complex ci(0.0,1.0); result = zero; - fft.FFT_all_dim(fs, s, FFT::backward); - for (unsigned int mu = 0; mu < env().getNd(); ++mu) - { - Amu = peekLorentz(A, mu); - buf = Amu*fs; - fft.FFT_all_dim(buf, buf, FFT::forward); - result = result + ci*adj(*phase_[mu])*buf; - } for (unsigned int mu = 0; mu < env().getNd(); ++mu) { Amu = peekLorentz(A, mu); @@ -191,6 +184,14 @@ void TChargedProp::momD1(ScalarField &s, FFT &fft) fft.FFT_all_dim(buf, buf, FFT::forward); result = result - ci*buf; } + fft.FFT_all_dim(s, s, FFT::backward); + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Amu = peekLorentz(A, mu); + buf = Amu*s; + fft.FFT_all_dim(buf, buf, FFT::forward); + result = result + ci*adj(*phase_[mu])*buf; + } s = result; } @@ -198,18 +199,11 @@ void TChargedProp::momD1(ScalarField &s, FFT &fft) void TChargedProp::momD2(ScalarField &s, FFT &fft) { EmField &A = *env().getObject(par().emField); - ScalarField buf(env().getGrid()), fs(env().getGrid()), result(env().getGrid()), Amu(env().getGrid()); + ScalarField buf(env().getGrid()), result(env().getGrid()), + Amu(env().getGrid()); result = zero; - fft.FFT_all_dim(fs, s, FFT::backward); - for (unsigned int mu = 0; mu < env().getNd(); ++mu) - { - Amu = peekLorentz(A, mu); - buf = Amu*Amu*fs; - fft.FFT_all_dim(buf, buf, FFT::forward); - result = result + .5*adj(*phase_[mu])*buf; - } for (unsigned int mu = 0; mu < env().getNd(); ++mu) { Amu = peekLorentz(A, mu); @@ -219,6 +213,14 @@ void TChargedProp::momD2(ScalarField &s, FFT &fft) fft.FFT_all_dim(buf, buf, FFT::forward); result = result + .5*buf; } + fft.FFT_all_dim(s, s, FFT::backward); + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Amu = peekLorentz(A, mu); + buf = Amu*Amu*s; + fft.FFT_all_dim(buf, buf, FFT::forward); + result = result + .5*adj(*phase_[mu])*buf; + } s = result; } From b39f0d1fb675f453e710ed953583bb68bfe2b18f Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 27 Jan 2017 18:12:35 -0800 Subject: [PATCH 033/177] Hadrons: default I/O to HDF5 if possible, XML otherwise --- extras/Hadrons/Global.hpp | 9 +++++++++ extras/Hadrons/Modules/MScalar/ChargedProp.cc | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index bcb282fc..8dbb08ca 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -160,6 +160,15 @@ std::string typeName(void) return typeName(typeIdPt()); } +// default writers/readers +#ifdef HAVE_HDF5 +typedef Hdf5Reader CorrReader; +typedef Hdf5Writer CorrWriter; +#else +typedef XmlReader CorrReader; +typedef XmlWriter CorrWriter; +#endif + END_HADRONS_NAMESPACE #endif // Hadrons_Global_hpp_ diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc index 40d4504c..dc6481f3 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.cc +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -151,7 +151,7 @@ void TChargedProp::execute(void) LOG(Message) << "Saving zero-momentum projection to '" << filename << "'..." << std::endl; - Hdf5Writer writer(filename); + CorrWriter writer(filename); std::vector vecBuf; std::vector result; From 831ca4e3bf8e0b4231f395b2af9308e007b73186 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 14 Mar 2017 14:55:18 +0900 Subject: [PATCH 034/177] Added Scalar action for fields in the adjoint representation --- lib/qcd/action/Actions.h | 5 + lib/qcd/action/scalar/ScalarAction.h | 61 ++++++----- lib/qcd/action/scalar/ScalarImpl.h | 93 ++++++++-------- .../action/scalar/ScalarInteractionAction.h | 84 +++++++-------- lib/qcd/hmc/GenericHMCrunner.h | 3 + lib/qcd/representations/hmc_types.h | 2 +- tests/hmc/Test_hmc_ScalarActionNxN.cc | 100 ++++++++++++++++++ 7 files changed, 227 insertions(+), 121 deletions(-) create mode 100644 tests/hmc/Test_hmc_ScalarActionNxN.cc diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index daf64f3d..0214b8f4 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -69,6 +69,7 @@ Author: paboyle //////////////////////////////////////////// #include #include +#include namespace Grid { namespace QCD { @@ -106,6 +107,10 @@ typedef ScalarAction ScalarActionR; typedef ScalarAction ScalarActionF; typedef ScalarAction ScalarActionD; +typedef ScalarInteractionAction ScalarAdjActionR; +typedef ScalarInteractionAction ScalarAdjActionF; +typedef ScalarInteractionAction ScalarAdjActionD; + }} //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/lib/qcd/action/scalar/ScalarAction.h b/lib/qcd/action/scalar/ScalarAction.h index f10ec9a6..2c82d2e3 100644 --- a/lib/qcd/action/scalar/ScalarAction.h +++ b/lib/qcd/action/scalar/ScalarAction.h @@ -6,10 +6,10 @@ Copyright (C) 2015 -Author: Azusa Yamaguchi -Author: Peter Boyle -Author: neo -Author: paboyle + Author: Azusa Yamaguchi + Author: Peter Boyle + Author: neo + Author: paboyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,50 +35,49 @@ directory namespace Grid { // FIXME drop the QCD namespace everywhere here - - template - class ScalarAction : public QCD::Action { - public: + +template +class ScalarAction : public QCD::Action { + public: INHERIT_FIELD_TYPES(Impl); - - private: + + private: RealD mass_square; RealD lambda; - - public: - ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l){}; - virtual std::string LogParameters(){ + public: + ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l) {} + + virtual std::string LogParameters() { std::stringstream sstream; sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl; sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl; return sstream.str(); - } - - virtual std::string action_name(){return "ScalarAction";} - - virtual void refresh(const Field &U, - GridParallelRNG &pRNG){}; // noop as no pseudoferms - + virtual std::string action_name() {return "ScalarAction";} + + virtual void refresh(const Field &U, GridParallelRNG &pRNG) {} // noop as no pseudoferms + virtual RealD S(const Field &p) { return (mass_square * 0.5 + QCD::Nd) * ScalarObs::sumphisquared(p) + - (lambda / 24.) * ScalarObs::sumphifourth(p) + - ScalarObs::sumphider(p); + (lambda / 24.) * ScalarObs::sumphifourth(p) + + ScalarObs::sumphider(p); }; - + virtual void deriv(const Field &p, - Field &force) { + Field &force) { Field tmp(p._grid); Field p2(p._grid); ScalarObs::phisquared(p2, p); tmp = -(Cshift(p, 0, -1) + Cshift(p, 0, 1)); for (int mu = 1; mu < QCD::Nd; mu++) tmp -= Cshift(p, mu, -1) + Cshift(p, mu, 1); - - force=+(mass_square + 2. * QCD::Nd) * p + (lambda / 6.) * p2 * p + tmp; - }; - }; - -} // Grid + + force =+(mass_square + 2. * QCD::Nd) * p + (lambda / 6.) * p2 * p + tmp; + } +}; + + + +} // namespace Grid #endif // SCALAR_ACTION_H diff --git a/lib/qcd/action/scalar/ScalarImpl.h b/lib/qcd/action/scalar/ScalarImpl.h index ee2d2fb8..6d14b61a 100644 --- a/lib/qcd/action/scalar/ScalarImpl.h +++ b/lib/qcd/action/scalar/ScalarImpl.h @@ -5,96 +5,99 @@ namespace Grid { //namespace QCD { - template - class ScalarImplTypes { - public: +template +class ScalarImplTypes { + public: typedef S Simd; - + template using iImplField = iScalar > >; - + typedef iImplField SiteField; - - + typedef Lattice Field; - - static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ + + static inline void generate_momenta(Field& P, GridParallelRNG& pRNG) { gaussian(pRNG, P); } - + static inline Field projectForce(Field& P){return P;} - - static inline void update_field(Field& P, Field& U, double ep){ + + static inline void update_field(Field& P, Field& U, double ep) { U += P*ep; } - - static inline RealD FieldSquareNorm(Field& U){ + + static inline RealD FieldSquareNorm(Field& U) { return (- sum(trace(U*U))/2.0); } - + static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { gaussian(pRNG, U); } - + static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { gaussian(pRNG, U); } - + static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { U = 1.0; } - + }; template - class ScalarMatrixImplTypes { + class ScalarAdjMatrixImplTypes { public: typedef S Simd; - template using iImplField = iScalar > >; - + typedef iImplField SiteField; - - + typedef Lattice Field; - - static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ - gaussian(pRNG, P); + + static inline void generate_momenta(Field& P, GridParallelRNG& pRNG) { + QCD::SU::GaussianFundamentalLieAlgebraMatrix(pRNG, P); } - - static inline Field projectForce(Field& P){return P;} - - static inline void update_field(Field& P, Field& U, double ep){ + + static inline Field projectForce(Field& P) {return P;} + + static inline void update_field(Field& P, Field& U, double ep) { U += P*ep; } - - static inline RealD FieldSquareNorm(Field& U){ - return (TensorRemove(- sum(trace(U*U))*0.5).real()); + + static inline RealD FieldSquareNorm(Field& U) { + return (TensorRemove(sum(trace(U*U))).real()); } - + static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { - gaussian(pRNG, U); + QCD::SU::LieRandomize(pRNG, U); } - + static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { - gaussian(pRNG, U); + QCD::SU::LieRandomize(pRNG, U, 0.01); } - + static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { - U = 1.0; + U = zero; } - + }; - - + + typedef ScalarImplTypes ScalarImplR; typedef ScalarImplTypes ScalarImplF; typedef ScalarImplTypes ScalarImplD; - - //} -} + + // Hardcoding here the size of the matrices + typedef ScalarAdjMatrixImplTypes ScalarAdjImplR; + typedef ScalarAdjMatrixImplTypes ScalarAdjImplF; + typedef ScalarAdjMatrixImplTypes ScalarAdjImplD; + + + //} +} #endif diff --git a/lib/qcd/action/scalar/ScalarInteractionAction.h b/lib/qcd/action/scalar/ScalarInteractionAction.h index bd54a010..2607b041 100644 --- a/lib/qcd/action/scalar/ScalarInteractionAction.h +++ b/lib/qcd/action/scalar/ScalarInteractionAction.h @@ -6,10 +6,7 @@ Copyright (C) 2015 -Author: Azusa Yamaguchi -Author: Peter Boyle -Author: neo -Author: paboyle + Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,55 +27,54 @@ directory *************************************************************************************/ /* END LEGAL */ -#ifndef SCALAR_ACTION_H -#define SCALAR_ACTION_H +#ifndef SCALAR_INT_ACTION_H +#define SCALAR_INT_ACTION_H namespace Grid { // FIXME drop the QCD namespace everywhere here - - template - class ScalarInteractionAction : public QCD::Action { - public: - INHERIT_FIELD_TYPES(Impl); - - private: + +template +class ScalarInteractionAction : public QCD::Action { RealD mass_square; RealD lambda; - - public: - ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l){}; - virtual std::string LogParameters(){ + public: + INHERIT_FIELD_TYPES(Impl); + ScalarInteractionAction(RealD ms, RealD l) : mass_square(ms), lambda(l) {} + + virtual std::string LogParameters() { std::stringstream sstream; sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl; sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl; return sstream.str(); - } - - virtual std::string action_name(){return "ScalarAction";} - - virtual void refresh(const Field &U, - GridParallelRNG &pRNG){}; // noop as no pseudoferms - - virtual RealD S(const Field &p) { - return (mass_square * 0.5 + QCD::Nd) * ScalarObs::sumphisquared(p) + - (lambda / 24.) * ScalarObs::sumphifourth(p) + - ScalarObs::sumphider(p); - }; - - virtual void deriv(const Field &p, - Field &force) { - Field tmp(p._grid); - Field p2(p._grid); - ScalarObs::phisquared(p2, p); - tmp = -(Cshift(p, 0, -1) + Cshift(p, 0, 1)); - for (int mu = 1; mu < QCD::Nd; mu++) tmp -= Cshift(p, mu, -1) + Cshift(p, mu, 1); - - force=+(mass_square + 2. * QCD::Nd) * p + (lambda / 6.) * p2 * p + tmp; - }; - }; - -} // Grid -#endif // SCALAR_ACTION_H + virtual std::string action_name() {return "ScalarAction";} + + virtual void refresh(const Field &U, + GridParallelRNG &pRNG) {} // noop as no pseudoferms + + virtual RealD S(const Field &p) { + Field action(p._grid); + Field pshift(p._grid); + Field phisquared(p._grid); + phisquared = p*p; + action = (2.0*QCD::Nd + mass_square)*phisquared + lambda*phisquared*phisquared; + for (int mu = 0; mu < QCD::Nd; mu++) { + pshift = Cshift(p, mu, +1); // not efficient implement with stencils + action -= pshift*p + p*pshift; + } + return -(TensorRemove(sum(trace(action)))).real(); + }; + + virtual void deriv(const Field &p, + Field &force) { + force = (2.0*QCD::Nd + mass_square)*p + 2.0*lambda*p*p*p; + // following is inefficient + for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); + } +}; + +} // namespace Grid + +#endif // SCALAR_INT_ACTION_H diff --git a/lib/qcd/hmc/GenericHMCrunner.h b/lib/qcd/hmc/GenericHMCrunner.h index 66b16435..a97fb4e4 100644 --- a/lib/qcd/hmc/GenericHMCrunner.h +++ b/lib/qcd/hmc/GenericHMCrunner.h @@ -202,6 +202,9 @@ using GenericHMCRunnerTemplate = HMCWrapperTemplate ScalarGenericHMCRunner; +typedef HMCWrapperTemplate + ScalarAdjGenericHMCRunner; + } // namespace QCD } // namespace Grid diff --git a/lib/qcd/representations/hmc_types.h b/lib/qcd/representations/hmc_types.h index 3701c9b2..b4991941 100644 --- a/lib/qcd/representations/hmc_types.h +++ b/lib/qcd/representations/hmc_types.h @@ -62,7 +62,7 @@ class Representations { typedef Representations NoHirep; typedef Representations > ScalarFields; - //typedef Representations > ScalarMatrixFields; +typedef Representations > ScalarMatrixFields; // Helper classes to access the elements // Strips the first N parameters from the tuple diff --git a/tests/hmc/Test_hmc_ScalarActionNxN.cc b/tests/hmc/Test_hmc_ScalarActionNxN.cc new file mode 100644 index 00000000..8b93efde --- /dev/null +++ b/tests/hmc/Test_hmc_ScalarActionNxN.cc @@ -0,0 +1,100 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_hmc_WilsonFermionGauge.cc + +Copyright (C) 2016 + +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +namespace Grid{ +class ScalarActionParameters : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(ScalarActionParameters, + double, mass_squared, + double, lambda); +}; + +} +int main(int argc, char **argv) { + using namespace Grid; + using namespace Grid::QCD; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + // here make a routine to print all the relevant information on the run + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + // Typedefs to simplify notation + typedef ScalarAdjGenericHMCRunner HMCWrapper; // Uses the default minimum norm, real scalar fields + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + HMCWrapper TheHMC; + + // Grid from the command line + GridModule ScalarGrid; + ScalarGrid.set_full( SpaceTimeGrid::makeFourDimGrid( + GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), + GridDefaultMpi())); + ScalarGrid.set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(ScalarGrid.get_full())); + TheHMC.Resources.AddGrid("scalar", ScalarGrid); + // Possibile to create the module by hand + // hardcoding parameters or using a Reader + + // Checkpointer definition + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_scalar_lat"; + CPparams.rng_prefix = "ckpoint_scalar_rng"; + CPparams.saveInterval = 50; + CPparams.format = "IEEE64BIG"; + + TheHMC.Resources.LoadBinaryCheckpointer(CPparams); + + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + ///////////////////////////////////////////////////////////// + // Collect actions, here use more encapsulation + + // Scalar action in adjoint representation + ScalarActionParameters SPar; + SPar.mass_squared = 0.5; + SPar.lambda = 0.1; + ScalarAdjActionR Saction(SPar.mass_squared, SPar.lambda); + + // Collect actions + ActionLevel Level1(1); + Level1.push_back(&Saction); + TheHMC.TheAction.push_back(Level1); + ///////////////////////////////////////////////////////////// + + // HMC parameters are serialisable + TheHMC.Parameters.MD.MDsteps = 10; + TheHMC.Parameters.MD.trajL = 1.0; + + TheHMC.ReadCommandLine(argc, argv); + TheHMC.Run(); + + Grid_finalize(); + +} // main From 38806343a873ea10264c79103db31182d6770947 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 15 Mar 2017 15:16:16 +0900 Subject: [PATCH 035/177] Improving efficiency of the force term --- .../action/scalar/ScalarInteractionAction.h | 91 ++++++++++++++++--- tests/Test_stencil.cc | 43 +++++---- tests/hmc/Test_hmc_ScalarActionNxN.cc | 11 +-- 3 files changed, 104 insertions(+), 41 deletions(-) diff --git a/lib/qcd/action/scalar/ScalarInteractionAction.h b/lib/qcd/action/scalar/ScalarInteractionAction.h index 2607b041..5a322a5e 100644 --- a/lib/qcd/action/scalar/ScalarInteractionAction.h +++ b/lib/qcd/action/scalar/ScalarInteractionAction.h @@ -30,17 +30,34 @@ directory #ifndef SCALAR_INT_ACTION_H #define SCALAR_INT_ACTION_H + +// Note: this action can completely absorb the ScalarAction for real float fields +// use the scalarObjs to generalise the structure + namespace Grid { // FIXME drop the QCD namespace everywhere here template class ScalarInteractionAction : public QCD::Action { +public: + INHERIT_FIELD_TYPES(Impl); +private: RealD mass_square; RealD lambda; + + typedef typename Field::vector_object vobj; + typedef CartesianStencil Stencil; + + SimpleCompressor compressor; + int npoint = 8; + std::vector directions = {0,1,2,3,0,1,2,3}; // forcing 4 dimensions + std::vector displacements = {1,1,1,1, -1,-1,-1,-1}; + + public: - INHERIT_FIELD_TYPES(Impl); - ScalarInteractionAction(RealD ms, RealD l) : mass_square(ms), lambda(l) {} + + ScalarInteractionAction(RealD ms, RealD l) : mass_square(ms), lambda(l){} virtual std::string LogParameters() { std::stringstream sstream; @@ -51,27 +68,75 @@ class ScalarInteractionAction : public QCD::Action { virtual std::string action_name() {return "ScalarAction";} - virtual void refresh(const Field &U, - GridParallelRNG &pRNG) {} // noop as no pseudoferms + virtual void refresh(const Field &U, GridParallelRNG &pRNG) {} virtual RealD S(const Field &p) { - Field action(p._grid); - Field pshift(p._grid); - Field phisquared(p._grid); + static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); + phiStencil.HaloExchange(p, compressor); + + Field action(p._grid), pshift(p._grid), phisquared(p._grid); phisquared = p*p; action = (2.0*QCD::Nd + mass_square)*phisquared + lambda*phisquared*phisquared; for (int mu = 0; mu < QCD::Nd; mu++) { - pshift = Cshift(p, mu, +1); // not efficient implement with stencils - action -= pshift*p + p*pshift; + // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils + PARALLEL_FOR_LOOP + for (int i = 0; i < p._grid->oSites(); i++) { + int permute_type; + StencilEntry *SE; + vobj temp2; + vobj *temp; + vobj *t_p; + + SE = phiStencil.GetEntry(permute_type, mu, i); + t_p = &p._odata[i]; + if ( SE->_is_local ) { + temp = &p._odata[SE->_offset]; + if ( SE->_permute ) { + permute(temp2, *temp, permute_type); + action._odata[i] -= temp2*(*t_p) + (*t_p)*temp2; + } else { + action._odata[i] -= *temp*(*t_p) + (*t_p)*(*temp); + } + } else { + action._odata[i] -= phiStencil.CommBuf()[SE->_offset]*(*t_p) + (*t_p)*phiStencil.CommBuf()[SE->_offset]; + } + } + // action -= pshift*p + p*pshift; } + // NB the trace in the algebra is normalised to 1/2 + // minus sign coming from the antihermitian fields return -(TensorRemove(sum(trace(action)))).real(); }; - virtual void deriv(const Field &p, - Field &force) { + virtual void deriv(const Field &p, Field &force) { force = (2.0*QCD::Nd + mass_square)*p + 2.0*lambda*p*p*p; - // following is inefficient - for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); + // move this outside + static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); + phiStencil.HaloExchange(p, compressor); + + //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); + for (int point = 0; point < npoint; point++) { + PARALLEL_FOR_LOOP + for (int i = 0; i < p._grid->oSites(); i++) { + vobj *temp; + vobj temp2; + int permute_type; + StencilEntry *SE; + SE = phiStencil.GetEntry(permute_type, point, i); + + if ( SE->_is_local ) { + temp = &p._odata[SE->_offset]; + if ( SE->_permute ) { + permute(temp2, *temp, permute_type); + force._odata[i] -= temp2; + } else { + force._odata[i] -= *temp; + } + } else { + force._odata[i] -= phiStencil.CommBuf()[SE->_offset]; + } + } + } } }; diff --git a/tests/Test_stencil.cc b/tests/Test_stencil.cc index 1b71b8a5..1d35e1bb 100644 --- a/tests/Test_stencil.cc +++ b/tests/Test_stencil.cc @@ -1,6 +1,6 @@ /************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid + Grid physics library, www.github.com/paboyle/Grid Source file: ./tests/Test_stencil.cc @@ -33,9 +33,8 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -int main (int argc, char ** argv) -{ - Grid_init(&argc,&argv); +int main(int argc, char ** argv) { + Grid_init(&argc, &argv); // typedef LatticeColourMatrix Field; typedef LatticeComplex Field; @@ -47,7 +46,7 @@ int main (int argc, char ** argv) std::vector mpi_layout = GridDefaultMpi(); double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; - + GridCartesian Fine(latt_size,simd_layout,mpi_layout); GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout); GridParallelRNG fRNG(&Fine); @@ -55,14 +54,14 @@ int main (int argc, char ** argv) // fRNG.SeedRandomDevice(); std::vector seeds({1,2,3,4}); fRNG.SeedFixedIntegers(seeds); - + Field Foo(&Fine); Field Bar(&Fine); Field Check(&Fine); Field Diff(&Fine); LatticeComplex lex(&Fine); - lex = zero; + lex = zero; random(fRNG,Foo); gaussian(fRNG,Bar); @@ -98,7 +97,7 @@ int main (int argc, char ** argv) Fine.oCoorFromOindex(ocoor,o); ocoor[dir]=(ocoor[dir]+disp)%Fine._rdimensions[dir]; } - + SimpleCompressor compress; myStencil.HaloExchange(Foo,compress); @@ -106,16 +105,16 @@ int main (int argc, char ** argv) // Implement a stencil code that should agree with cshift! for(int i=0;ioSites();i++){ - + int permute_type; StencilEntry *SE; SE = myStencil.GetEntry(permute_type,0,i); - + if ( SE->_is_local && SE->_permute ) permute(Check._odata[i],Foo._odata[SE->_offset],permute_type); else if (SE->_is_local) Check._odata[i] = Foo._odata[SE->_offset]; - else + else Check._odata[i] = myStencil.CommBuf()[SE->_offset]; } @@ -144,7 +143,7 @@ int main (int argc, char ** argv) <<") " < compress; EStencil.HaloExchange(EFoo,compress); OStencil.HaloExchange(OFoo,compress); - + Bar = Cshift(Foo,dir,disp); if ( disp & 0x1 ) { ECheck.checkerboard = Even; OCheck.checkerboard = Odd; - } else { + } else { ECheck.checkerboard = Odd; OCheck.checkerboard = Even; } @@ -206,7 +205,7 @@ int main (int argc, char ** argv) permute(OCheck._odata[i],EFoo._odata[SE->_offset],permute_type); else if (SE->_is_local) OCheck._odata[i] = EFoo._odata[SE->_offset]; - else + else OCheck._odata[i] = EStencil.CommBuf()[SE->_offset]; } for(int i=0;ioSites();i++){ @@ -214,18 +213,18 @@ int main (int argc, char ** argv) StencilEntry *SE; SE = OStencil.GetEntry(permute_type,0,i); // std::cout << "ODD source "<< i<<" -> " <_offset << " "<< SE->_is_local<_is_local && SE->_permute ) permute(ECheck._odata[i],OFoo._odata[SE->_offset],permute_type); else if (SE->_is_local) ECheck._odata[i] = OFoo._odata[SE->_offset]; - else + else ECheck._odata[i] = OStencil.CommBuf()[SE->_offset]; } - + setCheckerboard(Check,ECheck); setCheckerboard(Check,OCheck); - + Real nrmC = norm2(Check); Real nrmB = norm2(Bar); Diff = Check-Bar; @@ -248,10 +247,10 @@ int main (int argc, char ** argv) diff =norm2(ddiff); if ( diff > 0){ std::cout <<"Coor (" << coor[0]<<","< -namespace Grid{ +namespace Grid { class ScalarActionParameters : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(ScalarActionParameters, @@ -44,7 +44,7 @@ int main(int argc, char **argv) { // here make a routine to print all the relevant information on the run std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; - // Typedefs to simplify notation + // Typedefs to simplify notation typedef ScalarAdjGenericHMCRunner HMCWrapper; // Uses the default minimum norm, real scalar fields //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: @@ -52,7 +52,7 @@ int main(int argc, char **argv) { // Grid from the command line GridModule ScalarGrid; - ScalarGrid.set_full( SpaceTimeGrid::makeFourDimGrid( + ScalarGrid.set_full(SpaceTimeGrid::makeFourDimGrid( GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi())); ScalarGrid.set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(ScalarGrid.get_full())); @@ -89,12 +89,11 @@ int main(int argc, char **argv) { ///////////////////////////////////////////////////////////// // HMC parameters are serialisable - TheHMC.Parameters.MD.MDsteps = 10; + TheHMC.Parameters.MD.MDsteps = 20; TheHMC.Parameters.MD.trajL = 1.0; TheHMC.ReadCommandLine(argc, argv); TheHMC.Run(); Grid_finalize(); - -} // main +} // main From 038b6ee9cdfc5902b27a8645b1f1758c9db3656f Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 16 Mar 2017 01:09:24 +0900 Subject: [PATCH 036/177] Fixing JSON compilation error --- lib/json/json.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/json/json.hpp b/lib/json/json.hpp index 97214f0b..bfb38c3e 100644 --- a/lib/json/json.hpp +++ b/lib/json/json.hpp @@ -64,7 +64,7 @@ SOFTWARE. #endif #elif defined(__GNUC__) #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) - #if GCC_VERSION < 40900 + #if GCC_VERSION < 40800 #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" #endif #endif From 7b03d8d0879d7f7922b8867eefa9346cb0e5c425 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 5 Apr 2017 16:17:46 +0100 Subject: [PATCH 037/177] Fixing the remaining merge conflicts --- lib/qcd/action/scalar/Scalar.h | 5 +++++ tests/Test_stencil.cc | 7 ------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/qcd/action/scalar/Scalar.h b/lib/qcd/action/scalar/Scalar.h index e5bea275..cae38360 100644 --- a/lib/qcd/action/scalar/Scalar.h +++ b/lib/qcd/action/scalar/Scalar.h @@ -31,6 +31,7 @@ directory #include #include +#include namespace Grid { namespace QCD { @@ -39,6 +40,10 @@ namespace QCD { typedef ScalarAction ScalarActionF; typedef ScalarAction ScalarActionD; + typedef ScalarInteractionAction ScalarAdjActionR; + typedef ScalarInteractionAction ScalarAdjActionF; + typedef ScalarInteractionAction ScalarAdjActionD; + } } diff --git a/tests/Test_stencil.cc b/tests/Test_stencil.cc index 2a4744f3..fa4b0b57 100644 --- a/tests/Test_stencil.cc +++ b/tests/Test_stencil.cc @@ -189,13 +189,6 @@ int main(int argc, char ** argv) { SimpleCompressor compress; -<<<<<<< HEAD - EStencil.HaloExchange(EFoo,compress); - OStencil.HaloExchange(OFoo,compress); - -======= - ->>>>>>> feature/hmc_generalise Bar = Cshift(Foo,dir,disp); if ( disp & 0x1 ) { From 140741875555fb9c788e78bb8b6080e480776c0f Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 13 Apr 2017 15:32:30 +0100 Subject: [PATCH 038/177] Old qed-fvol program build disabled --- extras/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extras/Makefile.am b/extras/Makefile.am index 416a9fc8..d8c2b675 100644 --- a/extras/Makefile.am +++ b/extras/Makefile.am @@ -1 +1 @@ -SUBDIRS = Hadrons qed-fvol \ No newline at end of file +SUBDIRS = Hadrons \ No newline at end of file From 741bc836f69d37623cba76cf4aee06dee3f6c84e Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 5 May 2017 17:36:43 +0100 Subject: [PATCH 039/177] Exposing support for Ncolours and Ndimensions and JSON input file for the ScalarAction --- lib/qcd/action/scalar/Scalar.h | 6 +- lib/qcd/action/scalar/ScalarImpl.h | 5 +- .../action/scalar/ScalarInteractionAction.h | 152 +++++++++--------- lib/qcd/hmc/GenericHMCrunner.h | 3 + lib/qcd/representations/hmc_types.h | 3 + lib/stencil/Stencil.h | 2 +- tests/hmc/Test_hmc_ScalarActionNxN.cc | 104 ++++++++---- 7 files changed, 168 insertions(+), 107 deletions(-) diff --git a/lib/qcd/action/scalar/Scalar.h b/lib/qcd/action/scalar/Scalar.h index cae38360..485a6765 100644 --- a/lib/qcd/action/scalar/Scalar.h +++ b/lib/qcd/action/scalar/Scalar.h @@ -40,9 +40,9 @@ namespace QCD { typedef ScalarAction ScalarActionF; typedef ScalarAction ScalarActionD; - typedef ScalarInteractionAction ScalarAdjActionR; - typedef ScalarInteractionAction ScalarAdjActionF; - typedef ScalarInteractionAction ScalarAdjActionD; + template using ScalarAdjActionR = ScalarInteractionAction, Dimensions>; + template using ScalarAdjActionF = ScalarInteractionAction, Dimensions>; + template using ScalarAdjActionD = ScalarInteractionAction, Dimensions>; } } diff --git a/lib/qcd/action/scalar/ScalarImpl.h b/lib/qcd/action/scalar/ScalarImpl.h index 6d14b61a..8b5e3aa2 100644 --- a/lib/qcd/action/scalar/ScalarImpl.h +++ b/lib/qcd/action/scalar/ScalarImpl.h @@ -96,7 +96,10 @@ class ScalarImplTypes { typedef ScalarAdjMatrixImplTypes ScalarAdjImplF; typedef ScalarAdjMatrixImplTypes ScalarAdjImplD; - + template using ScalarNxNAdjImplR = ScalarAdjMatrixImplTypes; + template using ScalarNxNAdjImplF = ScalarAdjMatrixImplTypes; + template using ScalarNxNAdjImplD = ScalarAdjMatrixImplTypes; + //} } diff --git a/lib/qcd/action/scalar/ScalarInteractionAction.h b/lib/qcd/action/scalar/ScalarInteractionAction.h index 5a322a5e..ca8207bd 100644 --- a/lib/qcd/action/scalar/ScalarInteractionAction.h +++ b/lib/qcd/action/scalar/ScalarInteractionAction.h @@ -37,11 +37,11 @@ directory namespace Grid { // FIXME drop the QCD namespace everywhere here -template -class ScalarInteractionAction : public QCD::Action { -public: + template + class ScalarInteractionAction : public QCD::Action { + public: INHERIT_FIELD_TYPES(Impl); -private: + private: RealD mass_square; RealD lambda; @@ -50,14 +50,19 @@ private: typedef CartesianStencil Stencil; SimpleCompressor compressor; - int npoint = 8; - std::vector directions = {0,1,2,3,0,1,2,3}; // forcing 4 dimensions - std::vector displacements = {1,1,1,1, -1,-1,-1,-1}; + int npoint = 2*Ndim; + std::vector directions;// = {0,1,2,3,0,1,2,3}; // forcing 4 dimensions + std::vector displacements;// = {1,1,1,1, -1,-1,-1,-1}; - public: + public: - ScalarInteractionAction(RealD ms, RealD l) : mass_square(ms), lambda(l){} + ScalarInteractionAction(RealD ms, RealD l) : mass_square(ms), lambda(l), displacements(2*Ndim,0), directions(2*Ndim,0){ + for (int mu = 0 ; mu < Ndim; mu++){ + directions[mu] = mu; directions[mu+Ndim] = mu; + displacements[mu] = 1; displacements[mu+Ndim] = -1; + } + } virtual std::string LogParameters() { std::stringstream sstream; @@ -71,75 +76,74 @@ private: virtual void refresh(const Field &U, GridParallelRNG &pRNG) {} virtual RealD S(const Field &p) { - static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); - phiStencil.HaloExchange(p, compressor); - - Field action(p._grid), pshift(p._grid), phisquared(p._grid); - phisquared = p*p; - action = (2.0*QCD::Nd + mass_square)*phisquared + lambda*phisquared*phisquared; - for (int mu = 0; mu < QCD::Nd; mu++) { - // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils - PARALLEL_FOR_LOOP - for (int i = 0; i < p._grid->oSites(); i++) { - int permute_type; - StencilEntry *SE; - vobj temp2; - vobj *temp; - vobj *t_p; - - SE = phiStencil.GetEntry(permute_type, mu, i); - t_p = &p._odata[i]; - if ( SE->_is_local ) { - temp = &p._odata[SE->_offset]; - if ( SE->_permute ) { - permute(temp2, *temp, permute_type); - action._odata[i] -= temp2*(*t_p) + (*t_p)*temp2; - } else { - action._odata[i] -= *temp*(*t_p) + (*t_p)*(*temp); - } - } else { - action._odata[i] -= phiStencil.CommBuf()[SE->_offset]*(*t_p) + (*t_p)*phiStencil.CommBuf()[SE->_offset]; - } - } - // action -= pshift*p + p*pshift; - } - // NB the trace in the algebra is normalised to 1/2 - // minus sign coming from the antihermitian fields - return -(TensorRemove(sum(trace(action)))).real(); + assert(p._grid->Nd() == Ndim); + static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); + phiStencil.HaloExchange(p, compressor); + Field action(p._grid), pshift(p._grid), phisquared(p._grid); + phisquared = p*p; + action = (2.0*Ndim + mass_square)*phisquared + lambda*phisquared*phisquared; + for (int mu = 0; mu < Ndim; mu++) { + // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils + parallel_for (int i = 0; i < p._grid->oSites(); i++) { + int permute_type; + StencilEntry *SE; + vobj temp2; + vobj *temp; + vobj *t_p; + + SE = phiStencil.GetEntry(permute_type, mu, i); + t_p = &p._odata[i]; + if ( SE->_is_local ) { + temp = &p._odata[SE->_offset]; + if ( SE->_permute ) { + permute(temp2, *temp, permute_type); + action._odata[i] -= temp2*(*t_p) + (*t_p)*temp2; + } else { + action._odata[i] -= *temp*(*t_p) + (*t_p)*(*temp); + } + } else { + action._odata[i] -= phiStencil.CommBuf()[SE->_offset]*(*t_p) + (*t_p)*phiStencil.CommBuf()[SE->_offset]; + } + } + // action -= pshift*p + p*pshift; + } + // NB the trace in the algebra is normalised to 1/2 + // minus sign coming from the antihermitian fields + return -(TensorRemove(sum(trace(action)))).real(); }; virtual void deriv(const Field &p, Field &force) { - force = (2.0*QCD::Nd + mass_square)*p + 2.0*lambda*p*p*p; - // move this outside - static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); - phiStencil.HaloExchange(p, compressor); - - //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); - for (int point = 0; point < npoint; point++) { - PARALLEL_FOR_LOOP - for (int i = 0; i < p._grid->oSites(); i++) { - vobj *temp; - vobj temp2; - int permute_type; - StencilEntry *SE; - SE = phiStencil.GetEntry(permute_type, point, i); - - if ( SE->_is_local ) { - temp = &p._odata[SE->_offset]; - if ( SE->_permute ) { - permute(temp2, *temp, permute_type); - force._odata[i] -= temp2; - } else { - force._odata[i] -= *temp; - } - } else { - force._odata[i] -= phiStencil.CommBuf()[SE->_offset]; - } - } - } + assert(p._grid->Nd() == Ndim); + force = (2.0*Ndim + mass_square)*p + 2.0*lambda*p*p*p; + // move this outside + static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); + phiStencil.HaloExchange(p, compressor); + + //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); + for (int point = 0; point < npoint; point++) { + parallel_for (int i = 0; i < p._grid->oSites(); i++) { + vobj *temp; + vobj temp2; + int permute_type; + StencilEntry *SE; + SE = phiStencil.GetEntry(permute_type, point, i); + + if ( SE->_is_local ) { + temp = &p._odata[SE->_offset]; + if ( SE->_permute ) { + permute(temp2, *temp, permute_type); + force._odata[i] -= temp2; + } else { + force._odata[i] -= *temp; + } + } else { + force._odata[i] -= phiStencil.CommBuf()[SE->_offset]; + } + } + } } -}; - + }; + } // namespace Grid #endif // SCALAR_INT_ACTION_H diff --git a/lib/qcd/hmc/GenericHMCrunner.h b/lib/qcd/hmc/GenericHMCrunner.h index 353b4905..4f6c1af0 100644 --- a/lib/qcd/hmc/GenericHMCrunner.h +++ b/lib/qcd/hmc/GenericHMCrunner.h @@ -210,6 +210,9 @@ typedef HMCWrapperTemplate typedef HMCWrapperTemplate ScalarAdjGenericHMCRunner; +template +using ScalarNxNAdjGenericHMCRunner = HMCWrapperTemplate < ScalarNxNAdjImplR, MinimumNorm2, ScalarNxNMatrixFields >; + } // namespace QCD } // namespace Grid diff --git a/lib/qcd/representations/hmc_types.h b/lib/qcd/representations/hmc_types.h index b4991941..3fee377e 100644 --- a/lib/qcd/representations/hmc_types.h +++ b/lib/qcd/representations/hmc_types.h @@ -64,6 +64,9 @@ typedef Representations NoHirep; typedef Representations > ScalarFields; typedef Representations > ScalarMatrixFields; +template < int Colours> +using ScalarNxNMatrixFields = Representations::Field> >; + // Helper classes to access the elements // Strips the first N parameters from the tuple // sequence of classes to obtain the S sequence diff --git a/lib/stencil/Stencil.h b/lib/stencil/Stencil.h index d1c28e78..887142c4 100644 --- a/lib/stencil/Stencil.h +++ b/lib/stencil/Stencil.h @@ -286,7 +286,7 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal { int dimension = _directions[point]; int displacement = _distances[point]; - + int fd = _grid->_fdimensions[dimension]; int rd = _grid->_rdimensions[dimension]; diff --git a/tests/hmc/Test_hmc_ScalarActionNxN.cc b/tests/hmc/Test_hmc_ScalarActionNxN.cc index f63936b5..b3ce6840 100644 --- a/tests/hmc/Test_hmc_ScalarActionNxN.cc +++ b/tests/hmc/Test_hmc_ScalarActionNxN.cc @@ -32,68 +32,116 @@ class ScalarActionParameters : Serializable { GRID_SERIALIZABLE_CLASS_MEMBERS(ScalarActionParameters, double, mass_squared, double, lambda); + + template + ScalarActionParameters(Reader& Reader){ + read(Reader, "ScalarAction", *this); + } + }; } int main(int argc, char **argv) { using namespace Grid; using namespace Grid::QCD; - + typedef Grid::JSONReader Serialiser; + Grid_init(&argc, &argv); int threads = GridThread::GetThreads(); // here make a routine to print all the relevant information on the run std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; // Typedefs to simplify notation - typedef ScalarAdjGenericHMCRunner HMCWrapper; // Uses the default minimum norm, real scalar fields - + constexpr int Ncolours = 4; + constexpr int Ndimensions = 3; + typedef ScalarNxNAdjGenericHMCRunner HMCWrapper; // Uses the default minimum norm, real scalar fields + typedef ScalarAdjActionR ScalarAction; //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: HMCWrapper TheHMC; + TheHMC.ReadCommandLine(argc, argv); + + if (TheHMC.ParameterFile.empty()){ + std::cout << "Input file not specified." + << "Use --ParameterFile option in the command line.\nAborting" + << std::endl; + exit(1); + } + Serialiser Reader(TheHMC.ParameterFile); // Grid from the command line GridModule ScalarGrid; - ScalarGrid.set_full(SpaceTimeGrid::makeFourDimGrid( - GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), - GridDefaultMpi())); - ScalarGrid.set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(ScalarGrid.get_full())); + if (GridDefaultLatt().size() != Ndimensions){ + std::cout << "Incorrect dimension of the grid\n. Expected dim="<< Ndimensions << std::endl; + exit(1); + } + if (GridDefaultMpi().size() != Ndimensions){ + std::cout << "Incorrect dimension of the mpi grid\n. Expected dim="<< Ndimensions << std::endl; + exit(1); + } + ScalarGrid.set_full(new GridCartesian(GridDefaultLatt(),GridDefaultSimd(Ndimensions, vComplex::Nsimd()),GridDefaultMpi())); + ScalarGrid.set_rb(new GridRedBlackCartesian(ScalarGrid.get_full())); TheHMC.Resources.AddGrid("scalar", ScalarGrid); - // Possibile to create the module by hand - // hardcoding parameters or using a Reader + std::cout << "Lattice size : " << GridDefaultLatt() << std::endl; // Checkpointer definition - CheckpointerParameters CPparams; - CPparams.config_prefix = "ckpoint_scalar_lat"; - CPparams.rng_prefix = "ckpoint_scalar_rng"; - CPparams.saveInterval = 50; - CPparams.format = "IEEE64BIG"; - + CheckpointerParameters CPparams(Reader); TheHMC.Resources.LoadBinaryCheckpointer(CPparams); - RNGModuleParameters RNGpar; - RNGpar.serial_seeds = "1 2 3 4 5"; - RNGpar.parallel_seeds = "6 7 8 9 10"; + RNGModuleParameters RNGpar(Reader); TheHMC.Resources.SetRNGSeeds(RNGpar); ///////////////////////////////////////////////////////////// // Collect actions, here use more encapsulation // Scalar action in adjoint representation - ScalarActionParameters SPar; - SPar.mass_squared = 0.5; - SPar.lambda = 0.1; - ScalarAdjActionR Saction(SPar.mass_squared, SPar.lambda); + ScalarActionParameters SPar(Reader); + ScalarAction Saction(SPar.mass_squared, SPar.lambda); // Collect actions - ActionLevel Level1(1); + ActionLevel> Level1(1); Level1.push_back(&Saction); TheHMC.TheAction.push_back(Level1); ///////////////////////////////////////////////////////////// + TheHMC.Parameters.initialize(Reader); - // HMC parameters are serialisable - TheHMC.Parameters.MD.MDsteps = 20; - TheHMC.Parameters.MD.trajL = 1.0; - - TheHMC.ReadCommandLine(argc, argv); TheHMC.Run(); Grid_finalize(); } // main + +/* Examples for input files + +JSON + +{ + "Checkpointer": { + "config_prefix": "ckpoint_scalar_lat", + "rng_prefix": "ckpoint_scalar_rng", + "saveInterval": 1, + "format": "IEEE64BIG" + }, + "RandomNumberGenerator": { + "serial_seeds": "1 2 3 4 6", + "parallel_seeds": "6 7 8 9 11" + }, + "ScalarAction":{ + "mass_squared": 0.5, + "lambda": 0.1 + }, + "HMC":{ + "StartTrajectory": 0, + "Trajectories": 100, + "MetropolisTest": true, + "NoMetropolisUntil": 10, + "StartingType": "HotStart", + "MD":{ + "name": "MinimumNorm2", + "MDsteps": 15, + "trajL": 2.0 + } + } +} + + +XML example not provided yet + +*/ \ No newline at end of file From 43c817cc67c6447bbf69bfc7d7772fba4e7ff9eb Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 11 May 2017 00:07:17 +0100 Subject: [PATCH 040/177] Scalar action: const fix --- lib/qcd/action/scalar/ScalarInteractionAction.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/qcd/action/scalar/ScalarInteractionAction.h b/lib/qcd/action/scalar/ScalarInteractionAction.h index ca8207bd..5f4c630c 100644 --- a/lib/qcd/action/scalar/ScalarInteractionAction.h +++ b/lib/qcd/action/scalar/ScalarInteractionAction.h @@ -88,8 +88,7 @@ namespace Grid { int permute_type; StencilEntry *SE; vobj temp2; - vobj *temp; - vobj *t_p; + const vobj *temp, *t_p; SE = phiStencil.GetEntry(permute_type, mu, i); t_p = &p._odata[i]; @@ -122,7 +121,7 @@ namespace Grid { //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); for (int point = 0; point < npoint; point++) { parallel_for (int i = 0; i < p._grid->oSites(); i++) { - vobj *temp; + const vobj *temp; vobj temp2; int permute_type; StencilEntry *SE; From d1ece741370d1b829f5946afc7c21c585a158d31 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 11 May 2017 11:40:44 +0100 Subject: [PATCH 041/177] HMC scalar test: magnetisation measurement --- tests/hmc/Test_hmc_ScalarActionNxN.cc | 54 ++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/tests/hmc/Test_hmc_ScalarActionNxN.cc b/tests/hmc/Test_hmc_ScalarActionNxN.cc index b3ce6840..bcaee31d 100644 --- a/tests/hmc/Test_hmc_ScalarActionNxN.cc +++ b/tests/hmc/Test_hmc_ScalarActionNxN.cc @@ -39,11 +39,50 @@ class ScalarActionParameters : Serializable { } }; - } + +using namespace Grid; +using namespace Grid::QCD; + +template +class MagLogger : public HmcObservable { +public: + typedef typename Impl::Field Field; + typedef typename Impl::Simd::scalar_type Trace; + + void TrajectoryComplete(int traj, + Field &U, + GridSerialRNG &sRNG, + GridParallelRNG &pRNG) { + + int def_prec = std::cout.precision(); + + std::cout << std::setprecision(std::numeric_limits::digits10 + 1); + std::cout << GridLogMessage + << "m= " << TensorRemove(trace(sum(U))) << std::endl; + std::cout << GridLogMessage + << "m^2= " << TensorRemove(trace(sum(U)*sum(U))) << std::endl; + std::cout.precision(def_prec); + + } +private: + +}; + +template +class MagMod: public ObservableModule, NoParameters>{ + typedef ObservableModule, NoParameters> ObsBase; + using ObsBase::ObsBase; // for constructors + + // acquire resource + virtual void initialize(){ + this->ObservablePtr.reset(new MagLogger()); + } +public: + MagMod(): ObsBase(NoParameters()){} +}; + int main(int argc, char **argv) { - using namespace Grid; - using namespace Grid::QCD; typedef Grid::JSONReader Serialiser; Grid_init(&argc, &argv); @@ -52,7 +91,7 @@ int main(int argc, char **argv) { std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; // Typedefs to simplify notation - constexpr int Ncolours = 4; + constexpr int Ncolours = 2; constexpr int Ndimensions = 3; typedef ScalarNxNAdjGenericHMCRunner HMCWrapper; // Uses the default minimum norm, real scalar fields typedef ScalarAdjActionR ScalarAction; @@ -89,6 +128,11 @@ int main(int argc, char **argv) { RNGModuleParameters RNGpar(Reader); TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + typedef MagMod MagObs; + TheHMC.Resources.AddObservable(); + ///////////////////////////////////////////////////////////// // Collect actions, here use more encapsulation @@ -144,4 +188,4 @@ JSON XML example not provided yet -*/ \ No newline at end of file +*/ From 3f858d675557536feb6bac6312e4205c987857d9 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 17 May 2017 13:25:14 +0200 Subject: [PATCH 042/177] Scalar: phi^2 observable --- tests/hmc/Test_hmc_ScalarActionNxN.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/hmc/Test_hmc_ScalarActionNxN.cc b/tests/hmc/Test_hmc_ScalarActionNxN.cc index bcaee31d..a7490f51 100644 --- a/tests/hmc/Test_hmc_ScalarActionNxN.cc +++ b/tests/hmc/Test_hmc_ScalarActionNxN.cc @@ -62,6 +62,8 @@ public: << "m= " << TensorRemove(trace(sum(U))) << std::endl; std::cout << GridLogMessage << "m^2= " << TensorRemove(trace(sum(U)*sum(U))) << std::endl; + std::cout << GridLogMessage + << "phi^2= " << TensorRemove(sum(trace(U*U))) << std::endl; std::cout.precision(def_prec); } From a8c10b1933948d491371da0d4df32cb3059c3b97 Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 25 May 2017 11:43:33 +0100 Subject: [PATCH 043/177] Use a global-X x Local-Y chunksize for parallel binary I/O. Gives O(32 x 8 x 18*8*8) chunk size on configuration I/O. At 150KB should be getting close to packet sizes and 4MB filesystem block sizes that are reasonably (!?) performant. We shall see once I move this off my laptop and over to BNL and time it. --- lib/parallelIO/BinaryIO.h | 196 +++++++++++++++++++++----------------- lib/parallelIO/NerscIO.h | 6 +- 2 files changed, 113 insertions(+), 89 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index afa7eb2e..ab449f92 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -217,32 +217,34 @@ class BinaryIO { Umu = zero; uint32_t csum=0; uint64_t bytes=0; - fobj file_object; - sobj munged; - + + int lx = grid->_fdimensions[0]; + std::vector file_object(lx); + std::vector munged(lx); for(int t=0;t_fdimensions[3];t++){ for(int z=0;z_fdimensions[2];z++){ for(int y=0;y_fdimensions[1];y++){ - for(int x=0;x_fdimensions[0];x++){ - - std::vector site({x,y,z,t}); - + { + bytes += sizeof(fobj)*lx; if (grid->IsBoss()) { - fin.read((char *)&file_object, sizeof(file_object));assert( fin.fail()==0); - bytes += sizeof(file_object); - if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object)); - if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object)); - if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object)); - if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object)); - - munge(file_object, munged, csum); + fin.read((char *)&file_object[0], sizeof(fobj)*lx); assert( fin.fail()==0); + for(int x=0;x site({x,y,z,t}); + // The boss who read the file has their value poked + pokeSite(munged[x],Umu,site); } - // The boss who read the file has their value poked - pokeSite(munged,Umu,site); }}}} timer.Stop(); std::cout<Broadcast(0,(void *)&csum,sizeof(csum)); return csum; @@ -274,31 +276,34 @@ class BinaryIO { } uint64_t bytes=0; uint32_t csum=0; - fobj file_object; - sobj unmunged; + int lx = grid->_fdimensions[0]; + std::vector file_object(lx); + std::vector unmunged(lx); for(int t=0;t_fdimensions[3];t++){ for(int z=0;z_fdimensions[2];z++){ for(int y=0;y_fdimensions[1];y++){ - for(int x=0;x_fdimensions[0];x++){ + { - std::vector site({x,y,z,t}); + std::vector site({0,y,z,t}); // peek & write - peekSite(unmunged,Umu,site); - - munge(unmunged,file_object,csum); - + for(int x=0;xIsBoss() ) { - if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); - if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); - if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); - if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); - - // NB could gather an xstrip as an optimisation. - fout.write((char *)&file_object,sizeof(file_object));assert( fout.fail()==0); - bytes+=sizeof(file_object); + for(int x=0;xGlobalIndexToGlobalCoor(gidx,gcoor); grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); int l_idx=parallel.generator_idx(o_idx,i_idx); - //std::cout << GridLogDebug << "l_idx " << l_idx << " o_idx " << o_idx - // << " i_idx " << i_idx << " rank " << rank << std::endl; if ( grid->IsBoss() ) { fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); @@ -460,14 +463,12 @@ class BinaryIO { int ieee64 = (format == std::string("IEEE64")); - // Take into account block size of parallel file systems want about - // 4-16MB chunks. // Ideally one reader/writer per xy plane and read these contiguously // with comms from nominated I/O nodes. std::ifstream fin; int nd = grid->_ndimension; - std::vector parallel(nd,1); + std::vector parallel(nd,1); parallel[0] = 0; std::vector ioproc (nd); std::vector start(nd); std::vector range(nd); @@ -479,9 +480,15 @@ class BinaryIO { uint64_t slice_vol = 1; int IOnode = 1; - for(int d=0;d_ndimension;d++) { + int gstrip = grid->_gdimensions[0]; + int lstrip = grid->_ldimensions[0]; - if ( d == 0 ) parallel[d] = 0; + int chunk ; + if ( nd==1) chunk = gstrip; + else chunk = gstrip*grid->_ldimensions[1]; + + for(int d=0;d_ndimension;d++) { + if (parallel[d]) { range[d] = grid->_ldimensions[d]; start[d] = grid->_processor_coor[d]*range[d]; @@ -500,13 +507,16 @@ class BinaryIO { uint32_t tmp = IOnode; grid->GlobalSum(tmp); std::cout<< std::dec ; - std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <_ndimension;d++){ std::cout<< range[d]; if( d< grid->_ndimension-1 ) std::cout<< " x "; } std::cout << std::endl; + std::cout<< GridLogMessage<< "Parallel I/O local strip size is "<< lstrip <ThisRank(); int iorank = grid->RankFromProcessorCoor(ioproc); - if (!ILDG.is_ILDG) - if ( IOnode ) { - fin.open(file,std::ios::binary|std::ios::in); - } + if (!ILDG.is_ILDG) { + if ( IOnode ) { + fin.open(file,std::ios::binary|std::ios::in); + } + } ////////////////////////////////////////////////////////// // Find the location of each site and send to primary node @@ -528,16 +539,15 @@ class BinaryIO { Umu = zero; static uint32_t csum; csum=0;//static for SHMEM - fobj fileObj; - static sobj siteObj; // Static to place in symmetric region for SHMEM + std::vector fileObj(chunk); // FIXME + std::vector siteObj(chunk); // Use comm allocator to place in symmetric region for SHMEM - // need to implement these loops in Nd independent way with a lexico conversion - for(int tlex=0;tlex tsite(nd); // temporary mixed up site std::vector gsite(nd); std::vector lsite(nd); - std::vector iosite(nd); Lexicographic::CoorFromIndex(tsite,tlex,range); @@ -546,53 +556,68 @@ class BinaryIO { gsite[d] = tsite[d]+start[d]; // global site } - - ///////////////////////// - // Get the rank of owner of data - ///////////////////////// + /////////////////////////////////////////// + // Get the global lexico base of the chunk + /////////////////////////////////////////// int rank, o_idx,i_idx, g_idx; grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); grid->GlobalCoorToGlobalIndex(gsite,g_idx); - + //////////////////////////////// // iorank reads from the seek //////////////////////////////// if (myrank == iorank) { - if (ILDG.is_ILDG){ - // use C-LIME to populate the record - #ifdef HAVE_LIME - uint64_t sizeFO = sizeof(fileObj); +#ifdef HAVE_LIME + // use C-LIME to populate the record + uint64_t sizeFO = sizeof(fobj)*chunk; limeReaderSeek(ILDG.LR, g_idx*sizeFO, SEEK_SET); - int status = limeReaderReadData((void *)&fileObj, &sizeFO, ILDG.LR); - #endif + int status = limeReaderReadData((void *)&fileObj[0], &sizeFO, ILDG.LR); +#endif } else{ - fin.seekg(offset+g_idx*sizeof(fileObj)); - fin.read((char *)&fileObj,sizeof(fileObj)); + fin.seekg(offset+g_idx*sizeof(fobj)); + fin.read((char *)&fileObj[0],sizeof(fobj)*chunk); } - bytes+=sizeof(fileObj); + bytes+=sizeof(fobj)*chunk; - if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32big) be32toh_v((void *)&fileObj[0],sizeof(fobj)*chunk); + if(ieee32) le32toh_v((void *)&fileObj[0],sizeof(fobj)*chunk); + if(ieee64big) be64toh_v((void *)&fileObj[0],sizeof(fobj)*chunk); + if(ieee64) le64toh_v((void *)&fileObj[0],sizeof(fobj)*chunk); - munge(fileObj,siteObj,csum); + for(int c=0;cSendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); + for(int cc=0;cc_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site } + grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); + + if ( rank != iorank ) { + if ( (myrank == rank) || (myrank==iorank) ) { + grid->SendRecvPacket((void *)&siteObj[cc],(void *)&siteObj[cc],iorank,rank,sizeof(sobj)*lstrip); + } + } + // Poke at destination + if ( myrank == rank ) { + for(int x=0;xBarrier(); // necessary? } - // Poke at destination - if ( myrank == rank ) { - pokeLocalSite(siteObj,Umu,lsite); - } - grid->Barrier(); // necessary? } grid->GlobalSum(csum); @@ -601,7 +626,7 @@ class BinaryIO { timer.Stop(); std::cout< tsite(nd); // temporary mixed up site std::vector gsite(nd); std::vector lsite(nd); - std::vector iosite(nd); Lexicographic::CoorFromIndex(tsite, tlex, range); diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index f0159d41..cd20c841 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -30,6 +30,9 @@ #ifndef GRID_NERSC_IO_H #define GRID_NERSC_IO_H +#define PARALLEL_READ +#undef PARALLEL_WRITE + #include #include #include @@ -326,8 +329,6 @@ namespace Grid { ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Now the meat: the object readers ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#define PARALLEL_READ -#define PARALLEL_WRITE template static inline void readConfiguration(Lattice > &Umu,NerscField& header,std::string file) @@ -399,6 +400,7 @@ namespace Grid { <<" header "< Date: Thu, 25 May 2017 13:32:24 +0100 Subject: [PATCH 044/177] Attempts to speed up the parallel IO --- lib/parallelIO/BinaryIO.h | 204 +++++++++++++++++++++----------------- lib/parallelIO/NerscIO.h | 18 +++- tests/IO/Test_nersc_io.cc | 2 +- 3 files changed, 133 insertions(+), 91 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index ab449f92..c1fca348 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -228,11 +228,11 @@ class BinaryIO { bytes += sizeof(fobj)*lx; if (grid->IsBoss()) { fin.read((char *)&file_object[0], sizeof(fobj)*lx); assert( fin.fail()==0); + if (ieee32big) be32toh_v((void *)&file_object[0], sizeof(fobj)*lx); + if (ieee32) le32toh_v((void *)&file_object[0], sizeof(fobj)*lx); + if (ieee64big) be64toh_v((void *)&file_object[0], sizeof(fobj)*lx); + if (ieee64) le64toh_v((void *)&file_object[0], sizeof(fobj)*lx); for(int x=0;xIsBoss() ) { for(int x=0;xThisRank() ){ - // std::cout << "rank" << rank<<" Getting state for index "<Broadcast(rank, (void *)&saved[0], bytes); + + if ( rank != 0 ) { + grid->Broadcast(rank, (void *)&saved[0], bytes); + } + + grid->Barrier(); if ( grid->IsBoss() ) { Uint32Checksum((uint32_t *)&saved[0],bytes,csum); @@ -370,8 +375,9 @@ class BinaryIO { grid->Broadcast(0, (void *)&csum, sizeof(csum)); - if (grid->IsBoss()) + if (grid->IsBoss()) { fout.close(); + } timer.Stop(); @@ -426,6 +432,7 @@ class BinaryIO { } grid->Broadcast(0,(void *)&saved[0],bytes); + grid->Barrier(); if( rank == grid->ThisRank() ){ parallel.SetState(saved,l_idx); @@ -434,8 +441,8 @@ class BinaryIO { if ( grid->IsBoss() ) { fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); - serial.SetState(saved,0); Uint32Checksum((uint32_t *)&saved[0],bytes,csum); + serial.SetState(saved,0); } std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; @@ -445,7 +452,6 @@ class BinaryIO { return csum; } - template static inline uint32_t readObjectParallel(Lattice &Umu, std::string file, @@ -528,6 +534,10 @@ class BinaryIO { if (!ILDG.is_ILDG) { if ( IOnode ) { fin.open(file,std::ios::binary|std::ios::in); + if ( !fin.is_open() ) { + std::cout << GridLogMessage << "readObjectParallel: Error opening file " << file << std::endl; + exit(0); + } } } @@ -540,7 +550,7 @@ class BinaryIO { static uint32_t csum; csum=0;//static for SHMEM std::vector fileObj(chunk); // FIXME - std::vector siteObj(chunk); // Use comm allocator to place in symmetric region for SHMEM + std::vector siteObj(chunk); // Use alignedAllocator to place in symmetric region for SHMEM // need to implement these loops in Nd independent way with a lexico conversion for(int tlex=0;tlex gsite(nd); std::vector lsite(nd); - Lexicographic::CoorFromIndex(tsite,tlex,range); - - for(int d=0;d_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site - } + int rank, o_idx,i_idx, g_idx; /////////////////////////////////////////// // Get the global lexico base of the chunk /////////////////////////////////////////// - int rank, o_idx,i_idx, g_idx; + Lexicographic::CoorFromIndex(tsite,tlex,range); + for(int d=0;dGlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); grid->GlobalCoorToGlobalIndex(gsite,g_idx); @@ -571,11 +577,14 @@ class BinaryIO { if (ILDG.is_ILDG){ #ifdef HAVE_LIME // use C-LIME to populate the record - uint64_t sizeFO = sizeof(fobj)*chunk; + uint64_t sizeFO = sizeof(fobj); + uint64_t sizeChunk= sizeFO*chunk; limeReaderSeek(ILDG.LR, g_idx*sizeFO, SEEK_SET); - int status = limeReaderReadData((void *)&fileObj[0], &sizeFO, ILDG.LR); + int status = limeReaderReadData((void *)&fileObj[0], &sizeChunk, ILDG.LR); +#else + assert(0); #endif - } else{ + } else { fin.seekg(offset+g_idx*sizeof(fobj)); fin.read((char *)&fileObj[0],sizeof(fobj)*chunk); } @@ -630,6 +639,7 @@ class BinaryIO { return csum; } + ////////////////////////////////////////////////////////// // Parallel writer ////////////////////////////////////////////////////////// @@ -643,9 +653,9 @@ class BinaryIO { GridBase *grid = Umu._grid; int ieee32big = (format == std::string("IEEE32BIG")); - int ieee32 = (format == std::string("IEEE32")); + int ieee32 = (format == std::string("IEEE32")); int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); + int ieee64 = (format == std::string("IEEE64")); if (!(ieee32big || ieee32 || ieee64big || ieee64)) { std::cout << GridLogError << "Unrecognized file format " << format << std::endl; @@ -658,7 +668,9 @@ class BinaryIO { assert(grid->CheckerBoarded(d) == 0); } - std::vector parallel(nd, 1); + // Parallel in yzt, serial funnelled in "x". + // gx x ly chunk size + std::vector parallel(nd, 1); parallel[0] = 0; std::vector ioproc(nd); std::vector start(nd); std::vector range(nd); @@ -666,9 +678,13 @@ class BinaryIO { uint64_t slice_vol = 1; int IOnode = 1; + int gstrip = grid->_gdimensions[0]; + int lstrip = grid->_ldimensions[0]; + int chunk; + if ( nd==1) chunk = gstrip; + else chunk = gstrip*grid->_ldimensions[1]; for (int d = 0; d < grid->_ndimension; d++) { - if (d != grid->_ndimension - 1) parallel[d] = 0; if (parallel[d]) { range[d] = grid->_ldimensions[d]; @@ -688,14 +704,16 @@ class BinaryIO { { uint32_t tmp = IOnode; grid->GlobalSum(tmp); - std::cout<< GridLogMessage<< "Parallel write I/O from "<< file - << " with " <_ndimension;d++){ std::cout<< range[d]; if( d< grid->_ndimension-1 ) std::cout<< " x "; } std::cout << std::endl; + std::cout<< GridLogMessage<< "Parallel I/O local strip size is "<< lstrip <RankFromProcessorCoor(ioproc); // Take into account block size of parallel file systems want about - // 4-16MB chunks. // Ideally one reader/writer per xy plane and read these contiguously // with comms from nominated I/O nodes. std::ofstream fout; - if (!ILDG.is_ILDG) - if (IOnode){ - fout.open(file, std::ios::binary | std::ios::in | std::ios::out); - if (!fout.is_open()) { - std::cout << GridLogMessage << "writeObjectParallel: Error opening file " << file - << std::endl; - exit(0); - } - } - - + if (!ILDG.is_ILDG) { + if (IOnode){ + fout.open(file, std::ios::binary | std::ios::in | std::ios::out); + if (!fout.is_open()) { + std::cout << GridLogMessage << "writeObjectParallel: Error opening file " << file << std::endl; + exit(0); + } + } + } + ////////////////////////////////////////////////////////// // Find the location of each site and send to primary node // Take loop order from Chroma; defines loop order now that NERSC doc no @@ -729,72 +745,82 @@ class BinaryIO { ////////////////////////////////////////////////////////// uint32_t csum = 0; - fobj fileObj; - static sobj siteObj; // static for SHMEM target; otherwise dynamic allocate - // with AlignedAllocator + std::vector fileObj(chunk); + std::vector siteObj(chunk); // should aggregate a whole chunk and then write. // need to implement these loops in Nd independent way with a lexico // conversion - for (int tlex = 0; tlex < slice_vol; tlex++) { + for (int tlex = 0; tlex < slice_vol; tlex+=chunk) { std::vector tsite(nd); // temporary mixed up site std::vector gsite(nd); std::vector lsite(nd); - Lexicographic::CoorFromIndex(tsite, tlex, range); - - for(int d = 0;d < nd; d++){ - lsite[d] = tsite[d] % grid->_ldimensions[d]; // local site - gsite[d] = tsite[d] + start[d]; // global site - } - - ///////////////////////// - // Get the rank of owner of data - ///////////////////////// int rank, o_idx, i_idx, g_idx; - grid->GlobalCoorToRankIndex(rank, o_idx, i_idx, gsite); - grid->GlobalCoorToGlobalIndex(gsite, g_idx); - //////////////////////////////// - // iorank writes from the seek - //////////////////////////////// + // Possibly do transport through pt2pt + for(int cc=0;ccSendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); + for(int d=0;d_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site + } + grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); + + // Owner of data peeks it over lstrip + if ( myrank == rank ) { + for(int x=0;xSendRecvPacket((void *)&siteObj[cc],(void *)&siteObj[cc],rank,iorank,sizeof(sobj)*lstrip); + } } } grid->Barrier(); // necessary? + ///////////////////////// + // Get the global lexico base of the chunk + ///////////////////////// + Lexicographic::CoorFromIndex(tsite, tlex, range); + for(int d = 0;d < nd; d++){ gsite[d] = tsite[d] + start[d];} + grid->GlobalCoorToRankIndex(rank, o_idx, i_idx, gsite); + grid->GlobalCoorToGlobalIndex(gsite, g_idx); + if (myrank == iorank) { - munge(siteObj, fileObj, csum); - if (ieee32big) htobe32_v((void *)&fileObj, sizeof(fileObj)); - if (ieee32) htole32_v((void *)&fileObj, sizeof(fileObj)); - if (ieee64big) htobe64_v((void *)&fileObj, sizeof(fileObj)); - if (ieee64) htole64_v((void *)&fileObj, sizeof(fileObj)); + for(int c=0;cBarrier(); // necessary? - if (IOnode) - fout.close(); - + if (!ILDG.is_ILDG) { + if (IOnode) { + fout.close(); + } + } return csum; } diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index cd20c841..cf3e41e4 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -31,7 +31,7 @@ #define GRID_NERSC_IO_H #define PARALLEL_READ -#undef PARALLEL_WRITE +#define PARALLEL_WRITE #include #include @@ -401,6 +401,18 @@ namespace Grid { std::cout<= 1.0e-5 ) { + std::cout << " Plaquette mismatch "< uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset); + if ( csum != header.checksum ) { + std::cerr << "checksum mismatch "< U(4,&Fine); - SU3::ColdConfiguration(pRNGa,Umu); + SU3::HotConfiguration(pRNGa,Umu); NerscField header; std::string file("./ckpoint_lat.4000"); From 69470ccc10e688908b9d17ea94a6e18759a8dc1a Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 25 May 2017 13:41:26 +0100 Subject: [PATCH 045/177] Update to do list --- TODO | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/TODO b/TODO index 672879cd..a5d4cabd 100644 --- a/TODO +++ b/TODO @@ -2,9 +2,9 @@ TODO: --------------- Peter's work list: -2)- Precision conversion and sort out localConvert <-- -3)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started -4)- Binary I/O speed up & x-strips +1)- Precision conversion and sort out localConvert <-- +2)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- + -- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet -- Physical propagator interface -- Conserved currents @@ -13,6 +13,7 @@ Peter's work list: -- HDCR resume Recent DONE +-- Binary I/O speed up & x-strips <-- DONE -- Cut down the exterior overhead <-- DONE -- Interior legs from SHM comms <-- DONE -- Half-precision comms <-- DONE From 725c513d9421732e212fe693120de64020299275 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 29 May 2017 16:47:32 -0400 Subject: [PATCH 046/177] Better MPI3 benchmarking --- benchmarks/Benchmark_comms.cc | 127 ++++++++++++++++++++-------------- 1 file changed, 75 insertions(+), 52 deletions(-) diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index ce881ef6..532532f8 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -66,7 +66,7 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); std::cout<1) nmu++; @@ -88,6 +88,9 @@ int main (int argc, char ** argv) lat*mpi_layout[3]}); GridCartesian Grid(latt_size,simd_layout,mpi_layout); + RealD Nrank = Grid._Nprocessors; + RealD Nnode = Grid.NodeCount(); + RealD ppn = Nrank/Nnode; std::vector > xbuf(8,std::vector(lat*lat*lat*Ls)); std::vector > rbuf(8,std::vector(lat*lat*lat*Ls)); @@ -132,13 +135,13 @@ int main (int argc, char ** argv) } Grid.SendToRecvFromComplete(requests); Grid.Barrier(); - double stop=usecond(); - t_time[i] = stop-start; // microseconds + double stop=usecond(); + t_time[i] = stop-start; // microseconds } timestat.statistics(t_time); - double dbytes = bytes; + double dbytes = bytes*ppn; double xbytes = dbytes*2.0*ncomm; double rbytes = xbytes; double bidibytes = xbytes+rbytes; @@ -165,6 +168,9 @@ int main (int argc, char ** argv) std::vector latt_size ({lat,lat,lat,lat}); GridCartesian Grid(latt_size,simd_layout,mpi_layout); + RealD Nrank = Grid._Nprocessors; + RealD Nnode = Grid.NodeCount(); + RealD ppn = Nrank/Nnode; std::vector > xbuf(8,std::vector(lat*lat*lat*Ls)); std::vector > rbuf(8,std::vector(lat*lat*lat*Ls)); @@ -213,14 +219,14 @@ int main (int argc, char ** argv) } } Grid.Barrier(); - double stop=usecond(); - t_time[i] = stop-start; // microseconds + double stop=usecond(); + t_time[i] = stop-start; // microseconds } timestat.statistics(t_time); - double dbytes = bytes; + double dbytes = bytes*ppn; double xbytes = dbytes*2.0*ncomm; double rbytes = xbytes; double bidibytes = xbytes+rbytes; @@ -251,6 +257,9 @@ int main (int argc, char ** argv) lat*mpi_layout[3]}); GridCartesian Grid(latt_size,simd_layout,mpi_layout); + RealD Nrank = Grid._Nprocessors; + RealD Nnode = Grid.NodeCount(); + RealD ppn = Nrank/Nnode; std::vector xbuf(8); std::vector rbuf(8); @@ -258,59 +267,66 @@ int main (int argc, char ** argv) for(int d=0;d<8;d++){ xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); } int ncomm; int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); + double dbytes; for(int i=0;i requests; - ncomm=0; for(int mu=0;mu<4;mu++){ + if (mpi_layout[mu]>1 ) { ncomm++; int comm_proc=1; int xmit_to_rank; int recv_from_rank; - Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[mu][0], - xmit_to_rank, - (void *)&rbuf[mu][0], - recv_from_rank, - bytes); + dbytes+= + Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[mu][0], + xmit_to_rank, + (void *)&rbuf[mu][0], + recv_from_rank, + bytes); comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[mu+4][0], - xmit_to_rank, - (void *)&rbuf[mu+4][0], - recv_from_rank, - bytes); + dbytes+= + Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[mu+4][0], + xmit_to_rank, + (void *)&rbuf[mu+4][0], + recv_from_rank, + bytes); } } Grid.StencilSendToRecvFromComplete(requests); Grid.Barrier(); - double stop=usecond(); - t_time[i] = stop-start; // microseconds - + double stop=usecond(); + t_time[i] = stop-start; // microseconds + } timestat.statistics(t_time); - double dbytes = bytes; - double xbytes = dbytes*2.0*ncomm; - double rbytes = xbytes; - double bidibytes = xbytes+rbytes; + dbytes=dbytes*ppn; + double xbytes = dbytes*0.5; + double rbytes = dbytes*0.5; + double bidibytes = dbytes; std::cout< xbuf(8); std::vector rbuf(8); @@ -345,16 +364,18 @@ int main (int argc, char ** argv) for(int d=0;d<8;d++){ xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); } int ncomm; int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); - + double dbytes; for(int i=0;i requests; - + dbytes=0; ncomm=0; for(int mu=0;mu<4;mu++){ @@ -366,41 +387,43 @@ int main (int argc, char ** argv) int recv_from_rank; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[mu][0], - xmit_to_rank, - (void *)&rbuf[mu][0], - recv_from_rank, - bytes); + dbytes+= + Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[mu][0], + xmit_to_rank, + (void *)&rbuf[mu][0], + recv_from_rank, + bytes); Grid.StencilSendToRecvFromComplete(requests); requests.resize(0); comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[mu+4][0], - xmit_to_rank, - (void *)&rbuf[mu+4][0], - recv_from_rank, - bytes); + dbytes+= + Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[mu+4][0], + xmit_to_rank, + (void *)&rbuf[mu+4][0], + recv_from_rank, + bytes); Grid.StencilSendToRecvFromComplete(requests); requests.resize(0); } } - Grid.Barrier(); - double stop=usecond(); - t_time[i] = stop-start; // microseconds - + Grid.Barrier(); + double stop=usecond(); + t_time[i] = stop-start; // microseconds + } timestat.statistics(t_time); - double dbytes = bytes; - double xbytes = dbytes*2.0*ncomm; - double rbytes = xbytes; - double bidibytes = xbytes+rbytes; + dbytes=dbytes*ppn; + double xbytes = dbytes*0.5; + double rbytes = dbytes*0.5; + double bidibytes = dbytes; std::cout< Date: Tue, 30 May 2017 23:37:02 +0100 Subject: [PATCH 047/177] Precision safe compile --- tests/forces/Test_contfrac_force.cc | 2 +- tests/forces/Test_dwf_force.cc | 2 +- tests/forces/Test_dwf_gpforce.cc | 6 +++--- tests/forces/Test_gp_rect_force.cc | 2 +- tests/forces/Test_gpdwf_force.cc | 2 +- tests/forces/Test_gpwilson_force.cc | 2 +- tests/forces/Test_laplacian_force.cc | 2 +- tests/forces/Test_mobius_force.cc | 2 +- tests/forces/Test_partfrac_force.cc | 2 +- tests/forces/Test_rect_force.cc | 2 +- tests/forces/Test_wilson_force.cc | 6 +++--- tests/forces/Test_zmobius_force.cc | 2 +- 12 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/forces/Test_contfrac_force.cc b/tests/forces/Test_contfrac_force.cc index 227ad5a0..2afb4dde 100644 --- a/tests/forces/Test_contfrac_force.cc +++ b/tests/forces/Test_contfrac_force.cc @@ -139,7 +139,7 @@ int main (int argc, char ** argv) } - Complex dSpred = sum(dS); + ComplexD dSpred = sum(dS); std::cout << GridLogMessage << " S "< Date: Tue, 30 May 2017 23:38:02 +0100 Subject: [PATCH 048/177] Cleaner code --- lib/simd/Grid_vector_types.h | 31 +++++++------------------------ lib/simd/Grid_vector_unops.h | 7 ------- 2 files changed, 7 insertions(+), 31 deletions(-) diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 0048382f..1ebe7379 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -327,10 +327,6 @@ class Grid_simd { // provides support /////////////////////////////////////// - //#if (__GNUC__ == 5 ) || ( ( __GNUC__ == 6 ) && __GNUC_MINOR__ < 3 ) - //#pragma GCC push_options - //#pragma GCC optimize ("O0") - //#endif template friend inline Grid_simd SimdApply(const functor &func, const Grid_simd &v) { Grid_simd ret; @@ -364,9 +360,6 @@ class Grid_simd { ret.v = cx.v; return ret; } - //#if (__GNUC__ == 5 ) || ( ( __GNUC__ == 6 ) && __GNUC_MINOR__ < 3 ) - //#pragma GCC pop_options - //#endif /////////////////////// // Exchange // Al Ah , Bl Bh -> Al Bl Ah,Bh @@ -428,7 +421,6 @@ class Grid_simd { }; // end of Grid_simd class definition - inline void permute(ComplexD &y,ComplexD b, int perm) { y=b; } inline void permute(ComplexF &y,ComplexF b, int perm) { y=b; } inline void permute(RealD &y,RealD b, int perm) { y=b; } @@ -838,8 +830,6 @@ inline void precisionChange(vComplexD *out,vComplexF *in,int nvec){ precisionCha inline void precisionChange(vComplexD *out,vComplexH *in,int nvec){ precisionChange((vRealD *)out,(vRealH *)in,nvec);} inline void precisionChange(vComplexF *out,vComplexH *in,int nvec){ precisionChange((vRealF *)out,(vRealH *)in,nvec);} - - // Check our vector types are of an appropriate size. #if defined QPX static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect"); @@ -854,21 +844,14 @@ static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths inc ///////////////////////////////////////// template struct is_simd : public std::false_type {}; -template <> -struct is_simd : public std::true_type {}; -template <> -struct is_simd : public std::true_type {}; -template <> -struct is_simd : public std::true_type {}; -template <> -struct is_simd : public std::true_type {}; -template <> -struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; -template -using IfSimd = Invoke::value, int> >; -template -using IfNotSimd = Invoke::value, unsigned> >; +template using IfSimd = Invoke::value, int> >; +template using IfNotSimd = Invoke::value, unsigned> >; } #endif diff --git a/lib/simd/Grid_vector_unops.h b/lib/simd/Grid_vector_unops.h index 2afac190..2244566f 100644 --- a/lib/simd/Grid_vector_unops.h +++ b/lib/simd/Grid_vector_unops.h @@ -179,13 +179,6 @@ inline Grid_simd div(const Grid_simd &r, Integer y) { //////////////////////////////////////////////////////////////////////////// // Allows us to assign into **conformable** real vectors from complex //////////////////////////////////////////////////////////////////////////// -// template < class S, class V > -// inline auto ComplexRemove(const Grid_simd &c) -> -// Grid_simd::Real,V> { -// Grid_simd::Real,V> ret; -// ret.v = c.v; -// return ret; -// } template struct AndFunctor { scalar operator()(const scalar &x, const scalar &y) const { return x & y; } From 58e8d0a10d69c794c2839e6b9093bee3c2b32da2 Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 30 May 2017 23:38:30 +0100 Subject: [PATCH 049/177] reverse direction lexico mapping --- lib/lattice/Lattice_transfer.h | 50 +++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 68de52d0..c8ba0928 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -551,7 +551,10 @@ void Replicate(Lattice &coarse,Lattice & fine) //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order template -typename std::enable_if::value && !isSIMDvectorized::value, void>::type unvectorizeToLexOrdArray(std::vector &out, const Lattice &in){ +typename std::enable_if::value && !isSIMDvectorized::value, void>::type +unvectorizeToLexOrdArray(std::vector &out, const Lattice &in) +{ + typedef typename vobj::vector_type vtype; GridBase* in_grid = in._grid; @@ -590,6 +593,51 @@ typename std::enable_if::value && !isSIMDvectorized extract1(in_vobj, out_ptrs, 0); } } +//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order +template +typename std::enable_if::value && !isSIMDvectorized::value, void>::type +vectorizeFromLexOrdArray(std::vector &in, const Lattice &out) +{ + + typedef typename vobj::vector_type vtype; + + GridBase* grid = out._grid; + assert(in.size()==grid->lSites()); + + int ndim = grid->Nd(); + int nsimd = vtype::Nsimd(); + + std::vector > icoor(nsimd); + + for(int lane=0; lane < nsimd; lane++){ + icoor[lane].resize(ndim); + grid->iCoorFromIindex(icoor[lane],lane); + } + + parallel_for(int oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index + //Assemble vector of pointers to output elements + std::vector ptrs(nsimd); + + std::vector ocoor(ndim); + grid->oCoorFromOindex(ocoor, oidx); + + std::vector lcoor(grid->Nd()); + + for(int lane=0; lane < nsimd; lane++){ + for(int mu=0;mu_rdimensions[mu]*icoor[lane][mu]; + + int lex; + Lexicographic::IndexFromCoor(lcoor, lex, grid->_ldimensions); + ptrs[lane] = &in[lex]; + } + + //pack from those ptrs + vobj vecobj; + merge1(vecobj, ptrs, 0); + out._odata[oidx] = vecobj; + } +} //Convert a Lattice from one precision to another template From e30fa9f4b8fcce40211e69d598617992899b03d4 Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 30 May 2017 23:39:16 +0100 Subject: [PATCH 050/177] RankCount; need to clean up ambigious ProcessCount --- lib/communicator/Communicator_base.cc | 2 ++ lib/communicator/Communicator_base.h | 2 ++ lib/communicator/Communicator_mpi3.cc | 1 + 3 files changed, 5 insertions(+) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 98d2abf4..557fef48 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -60,6 +60,7 @@ void CartesianCommunicator::ShmBufferFreeAll(void) { ///////////////////////////////// // Grid information queries ///////////////////////////////// +int CartesianCommunicator::Dimensions(void) { return _ndimension; }; int CartesianCommunicator::IsBoss(void) { return _processor==0; }; int CartesianCommunicator::BossRank(void) { return 0; }; int CartesianCommunicator::ThisRank(void) { return _processor; }; @@ -91,6 +92,7 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; +int CartesianCommunicator::RankCount(void) { return ProcessorCount();}; double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, void *xmit, diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index e0b9f2c3..23d4f647 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -148,6 +148,7 @@ class CartesianCommunicator { int RankFromProcessorCoor(std::vector &coor); void ProcessorCoorFromRank(int rank,std::vector &coor); + int Dimensions(void) ; int IsBoss(void) ; int BossRank(void) ; int ThisRank(void) ; @@ -155,6 +156,7 @@ class CartesianCommunicator { const std::vector & ProcessorGrid(void) ; int ProcessorCount(void) ; int NodeCount(void) ; + int RankCount(void) ; //////////////////////////////////////////////////////////////////////////////// // very VERY rarely (Log, serial RNG) we need world without a grid diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index a8bffc14..54a0f9b5 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -65,6 +65,7 @@ std::vector CartesianCommunicator::MyGroup; std::vector CartesianCommunicator::ShmCommBufs; int CartesianCommunicator::NodeCount(void) { return GroupSize;}; +int CartesianCommunicator::RankCount(void) { return WorldSize;}; #undef FORCE_COMMS From 53a9aeb9653a312ffed057eccf65f7de0e193742 Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 30 May 2017 23:39:53 +0100 Subject: [PATCH 051/177] Cosmetic only --- lib/tensors/Tensor_traits.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/tensors/Tensor_traits.h b/lib/tensors/Tensor_traits.h index ab20b807..c1ef397a 100644 --- a/lib/tensors/Tensor_traits.h +++ b/lib/tensors/Tensor_traits.h @@ -281,8 +281,8 @@ namespace Grid { template class getPrecision{ public: - typedef typename getVectorType::type vector_obj; //get the vector_obj (i.e. a grid Tensor) if its a Lattice, do nothing otherwise (i.e. if fundamental or grid Tensor) - + //get the vector_obj (i.e. a grid Tensor) if its a Lattice, do nothing otherwise (i.e. if fundamental or grid Tensor) + typedef typename getVectorType::type vector_obj; typedef typename GridTypeMapper::scalar_type scalar_type; //get the associated scalar type. Works on fundamental and tensor types typedef typename GridTypeMapper::Realified real_scalar_type; //remove any std::complex wrapper, should get us to the fundamental type From ef1b7db374ede8eee0011b1db3fc6cd076d9bfb8 Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 30 May 2017 23:40:11 +0100 Subject: [PATCH 052/177] Diff comparison check --- tests/IO/Test_nersc_io.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/IO/Test_nersc_io.cc b/tests/IO/Test_nersc_io.cc index cf919a7d..8507df13 100644 --- a/tests/IO/Test_nersc_io.cc +++ b/tests/IO/Test_nersc_io.cc @@ -88,7 +88,12 @@ int main (int argc, char ** argv) int precision32 = 0; int tworow = 0; NerscIO::writeConfiguration(Umu,file,tworow,precision32); + Umu_saved = Umu; NerscIO::readConfiguration(Umu,header,file); + Umu_diff = Umu - Umu_saved; + //std::cout << "Umu_save "< Date: Tue, 30 May 2017 23:40:39 +0100 Subject: [PATCH 053/177] Beginning move to MPI IO --- lib/parallelIO/NerscIO.h | 43 ++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index cf3e41e4..ab535dac 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -30,7 +30,10 @@ #ifndef GRID_NERSC_IO_H #define GRID_NERSC_IO_H -#define PARALLEL_READ +#undef PARALLEL_READ +#undef SERIAL_READ +#define MPI_READ + #define PARALLEL_WRITE #include @@ -355,7 +358,12 @@ namespace Grid { #ifdef PARALLEL_READ csum=BinaryIO::readObjectParallel, LorentzColour2x3F> (Umu,file,Nersc3x2munger(), offset,format); -#else +#endif +#ifdef MPI_READ + csum=BinaryIO::readObjectMPI, LorentzColour2x3F> + (Umu,file,Nersc3x2munger(), offset,format); +#endif +#ifdef SERIAL_READ csum=BinaryIO::readObjectSerial, LorentzColour2x3F> (Umu,file,Nersc3x2munger(), offset,format); #endif @@ -364,7 +372,12 @@ namespace Grid { #ifdef PARALLEL_READ csum=BinaryIO::readObjectParallel, LorentzColour2x3D> (Umu,file,Nersc3x2munger(),offset,format); -#else +#endif +#ifdef MPI_READ + csum=BinaryIO::readObjectMPI, LorentzColour2x3D> + (Umu,file,Nersc3x2munger(),offset,format); +#endif +#ifdef SERIAL_READ csum=BinaryIO::readObjectSerial, LorentzColour2x3D> (Umu,file,Nersc3x2munger(),offset,format); #endif @@ -374,7 +387,12 @@ namespace Grid { #ifdef PARALLEL_READ csum=BinaryIO::readObjectParallel,LorentzColourMatrixF> (Umu,file,NerscSimpleMunger(),offset,format); -#else +#endif +#ifdef MPI_READ + csum=BinaryIO::readObjectMPI,LorentzColourMatrixF> + (Umu,file,NerscSimpleMunger(),offset,format); +#endif +#ifdef SERIAL_READ csum=BinaryIO::readObjectSerial,LorentzColourMatrixF> (Umu,file,NerscSimpleMunger(),offset,format); #endif @@ -383,7 +401,12 @@ namespace Grid { #ifdef PARALLEL_READ csum=BinaryIO::readObjectParallel,LorentzColourMatrixD> (Umu,file,NerscSimpleMunger(),offset,format); -#else +#endif +#ifdef MPI_READ + csum=BinaryIO::readObjectMPI,LorentzColourMatrixD> + (Umu,file,NerscSimpleMunger(),offset,format); +#endif +#ifdef SERIAL_READ csum=BinaryIO::readObjectSerial,LorentzColourMatrixD> (Umu,file,NerscSimpleMunger(),offset,format); #endif @@ -411,13 +434,13 @@ namespace Grid { std::cerr << " plaqs " << clone.plaquette << " " << header.plaquette << std::endl; std::cerr << " trace " << clone.link_trace<< " " << header.link_trace<< std::endl; std::cerr << " csum " < From 1e429a0d57aa4c5efaa458a198cd7d7a49cb2f34 Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 30 May 2017 23:41:07 +0100 Subject: [PATCH 054/177] Added MPI version --- lib/parallelIO/BinaryIO.h | 145 +++++++++++++++++++++++++++++++++++++- 1 file changed, 144 insertions(+), 1 deletion(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index c1fca348..cbc619ef 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -250,6 +250,149 @@ class BinaryIO { return csum; } + template + static inline uint32_t readObjectMPI(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) + { + typedef typename vobj::scalar_object sobj; + + GridBase *grid = Umu._grid; + + std::cout<< GridLogMessage<< "MPI read I/O "<< file<< std::endl; + GridStopWatch timer; timer.Start(); + + Umu = zero; + uint32_t csum=0; + uint64_t bytes=0; + + int ndim = grid->Dimensions(); + int nrank = grid->ProcessorCount(); + int myrank = grid->ThisRank(); + + std::vector psizes = grid->ProcessorGrid(); + std::vector pcoor = grid->ThisProcessorCoor(); + std::vector gLattice= grid->GlobalDimensions(); + std::vector lLattice= grid->LocalDimensions(); + + std::vector distribs(ndim,MPI_DISTRIBUTE_BLOCK); + std::vector dargs (ndim,MPI_DISTRIBUTE_DFLT_DARG); + + std::vector lStart(ndim); + std::vector gStart(ndim); + + // Flatten the file + int lsites = grid->lSites(); + std::vector scalardata(lsites); + std::vector iodata(lsites); // Munge, checksum, byte order in here + + for(int d=0;dcommunicator, file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); + assert(ierr==0); + ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); + // std::cout<< "MPI File set view returned " <GlobalSum(csum); + grid->Barrier(); + + vectorizeFromLexOrdArray(scalardata,Umu); + + timer.Stop(); + std::cout< static inline uint32_t writeObjectSerial(Lattice &Umu,std::string file,munger munge,int offset, const std::string & format) @@ -597,7 +740,7 @@ class BinaryIO { for(int c=0;c Date: Thu, 1 Jun 2017 17:36:18 -0400 Subject: [PATCH 055/177] As local vols increase, use 64 bits for safety --- lib/lattice/Lattice_transfer.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index c8ba0928..cbf31f86 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -595,8 +595,9 @@ unvectorizeToLexOrdArray(std::vector &out, const Lattice &in) } //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order template -typename std::enable_if::value && !isSIMDvectorized::value, void>::type -vectorizeFromLexOrdArray(std::vector &in, const Lattice &out) +typename std::enable_if::value + && !isSIMDvectorized::value, void>::type +vectorizeFromLexOrdArray( std::vector &in, Lattice &out) { typedef typename vobj::vector_type vtype; @@ -614,7 +615,7 @@ vectorizeFromLexOrdArray(std::vector &in, const Lattice &out) grid->iCoorFromIindex(icoor[lane],lane); } - parallel_for(int oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index + parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index //Assemble vector of pointers to output elements std::vector ptrs(nsimd); @@ -624,8 +625,10 @@ vectorizeFromLexOrdArray(std::vector &in, const Lattice &out) std::vector lcoor(grid->Nd()); for(int lane=0; lane < nsimd; lane++){ - for(int mu=0;mu_rdimensions[mu]*icoor[lane][mu]; + } int lex; Lexicographic::IndexFromCoor(lcoor, lex, grid->_ldimensions); @@ -663,7 +666,7 @@ void precisionChange(Lattice &out, const Lattice &in){ std::vector in_slex_conv(in_grid->lSites()); unvectorizeToLexOrdArray(in_slex_conv, in); - parallel_for(int out_oidx=0;out_oidxoSites();out_oidx++){ + parallel_for(uint64_t out_oidx=0;out_oidxoSites();out_oidx++){ std::vector out_ocoor(ndim); out_grid->oCoorFromOindex(out_ocoor, out_oidx); From 21421656abb44bc872ca85c3364eed638fff8a5f Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 1 Jun 2017 17:36:53 -0400 Subject: [PATCH 056/177] Big changes improving the code to use MPI IO --- lib/parallelIO/BinaryIO.h | 1065 +++++++++++-------------------------- 1 file changed, 297 insertions(+), 768 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index cbc619ef..13341927 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -38,7 +38,12 @@ #include #include +namespace Grid { + +///////////////////////////////////////////////////////////////////////////////// +// Byte reversal garbage +///////////////////////////////////////////////////////////////////////////////// inline uint32_t byte_reverse32(uint32_t f) { f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; return f; @@ -60,63 +65,155 @@ inline uint64_t Grid_ntohll(uint64_t A) { } #endif -namespace Grid { - - // A little helper - inline void removeWhitespace(std::string &key) - { - key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end()); +///////////////////////////////////////////////////////////////////////////////// +// Simple classes for precision conversion +///////////////////////////////////////////////////////////////////////////////// +template +struct BinarySimpleUnmunger { + typedef typename getPrecision::real_scalar_type fobj_stype; + typedef typename getPrecision::real_scalar_type sobj_stype; + + void operator()(sobj &in, fobj &out) { + // take word by word and transform accoding to the status + fobj_stype *out_buffer = (fobj_stype *)&out; + sobj_stype *in_buffer = (sobj_stype *)∈ + size_t fobj_words = sizeof(out) / sizeof(fobj_stype); + size_t sobj_words = sizeof(in) / sizeof(sobj_stype); + assert(fobj_words == sobj_words); + + for (unsigned int word = 0; word < sobj_words; word++) + out_buffer[word] = in_buffer[word]; // type conversion on the fly + } +}; +template +struct BinarySimpleMunger { + typedef typename getPrecision::real_scalar_type fobj_stype; + typedef typename getPrecision::real_scalar_type sobj_stype; + + void operator()(fobj &in, sobj &out) { + // take word by word and transform accoding to the status + fobj_stype *in_buffer = (fobj_stype *)∈ + sobj_stype *out_buffer = (sobj_stype *)&out; + size_t fobj_words = sizeof(in) / sizeof(fobj_stype); + size_t sobj_words = sizeof(out) / sizeof(sobj_stype); + assert(fobj_words == sobj_words); + + for (unsigned int word = 0; word < sobj_words; word++) + out_buffer[word] = in_buffer[word]; // type conversion on the fly + + } +}; +// A little helper +inline void removeWhitespace(std::string &key) +{ + key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end()); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Static class holding the parallel IO code +// Could just use a namespace +/////////////////////////////////////////////////////////////////////////////////////////////////// class BinaryIO { - public: + ///////////////////////////////////////////////////////////////////////////// + // more byte manipulation helpers + ///////////////////////////////////////////////////////////////////////////// + static inline void Uint32Checksum(uint32_t *buf,uint64_t buf_size_bytes,uint32_t &csum) + { +#pragma omp parallel + { + uint32_t csum_thr=0; + uint64_t count = buf_size_bytes/sizeof(uint32_t); +#pragma omp for + for(uint64_t i=0;i>8) | ((f&0xFF000000UL)>>24) ; fp[i] = ntohl(f); } } - // BE is same as network - static inline void be64toh_v(void *file_object,uint32_t bytes) + static inline void be64toh_v(void *file_object,uint64_t bytes) { uint64_t * f = (uint64_t *)file_object; - for(int i=0;i*sizeof(uint64_t)>8) | ((f&0xFF000000UL)>>24) ; @@ -126,143 +223,23 @@ class BinaryIO { fp[i] = Grid_ntohll(g); } } - - template static inline void Uint32Checksum(Lattice &lat,munger munge,uint32_t &csum) + ///////////////////////////////////////////////////////////////////////////// + // Real action: + // Read or Write distributed lexico array of ANY object to a specific location in file + ////////////////////////////////////////////////////////////////////////////////////// + template + static inline uint32_t IOobject(word w, + GridBase *grid, + std::vector &iodata, + std::string file, + int offset, + const std::string &format, int doread) { - typedef typename vobj::scalar_object sobj; - GridBase *grid = lat._grid ; - std::cout <Barrier(); + GridStopWatch timer; + GridStopWatch bstimer; - csum = 0; - std::vector lcoor; - for(int l=0;llSites();l++){ - Lexicographic::CoorFromIndex(lcoor,l,grid->_ldimensions); - peekLocalSite(siteObj,lat,lcoor); - munge(siteObj,fileObj,csum); - } - grid->GlobalSum(csum); - } - - static inline void Uint32Checksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum) - { - for(int i=0;i*sizeof(uint32_t) - struct BinarySimpleUnmunger { - typedef typename getPrecision::real_scalar_type fobj_stype; - typedef typename getPrecision::real_scalar_type sobj_stype; - - void operator()(sobj &in, fobj &out, uint32_t &csum) { - // take word by word and transform accoding to the status - fobj_stype *out_buffer = (fobj_stype *)&out; - sobj_stype *in_buffer = (sobj_stype *)∈ - size_t fobj_words = sizeof(out) / sizeof(fobj_stype); - size_t sobj_words = sizeof(in) / sizeof(sobj_stype); - assert(fobj_words == sobj_words); - - for (unsigned int word = 0; word < sobj_words; word++) - out_buffer[word] = in_buffer[word]; // type conversion on the fly - - BinaryIO::Uint32Checksum((uint32_t *)&out, sizeof(out), csum); - } - }; - - template - struct BinarySimpleMunger { - typedef typename getPrecision::real_scalar_type fobj_stype; - typedef typename getPrecision::real_scalar_type sobj_stype; - - void operator()(fobj &in, sobj &out, uint32_t &csum) { - // take word by word and transform accoding to the status - fobj_stype *in_buffer = (fobj_stype *)∈ - sobj_stype *out_buffer = (sobj_stype *)&out; - size_t fobj_words = sizeof(in) / sizeof(fobj_stype); - size_t sobj_words = sizeof(out) / sizeof(sobj_stype); - assert(fobj_words == sobj_words); - - for (unsigned int word = 0; word < sobj_words; word++) - out_buffer[word] = in_buffer[word]; // type conversion on the fly - - BinaryIO::Uint32Checksum((uint32_t *)&in, sizeof(in), csum); - } - }; - - template - static inline uint32_t readObjectSerial(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) - { - typedef typename vobj::scalar_object sobj; - - GridBase *grid = Umu._grid; - - std::cout<< GridLogMessage<< "Serial read I/O "<< file<< std::endl; - GridStopWatch timer; timer.Start(); - - int ieee32big = (format == std::string("IEEE32BIG")); - int ieee32 = (format == std::string("IEEE32")); - int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); - - // Find the location of each site and send to primary node - // Take loop order from Chroma; defines loop order now that NERSC doc no longer - // available (how short sighted is that?) - std::ifstream fin(file,std::ios::binary|std::ios::in); - fin.seekg(offset); - - Umu = zero; uint32_t csum=0; - uint64_t bytes=0; - - int lx = grid->_fdimensions[0]; - std::vector file_object(lx); - std::vector munged(lx); - for(int t=0;t_fdimensions[3];t++){ - for(int z=0;z_fdimensions[2];z++){ - for(int y=0;y_fdimensions[1];y++){ - { - bytes += sizeof(fobj)*lx; - if (grid->IsBoss()) { - fin.read((char *)&file_object[0], sizeof(fobj)*lx); assert( fin.fail()==0); - if (ieee32big) be32toh_v((void *)&file_object[0], sizeof(fobj)*lx); - if (ieee32) le32toh_v((void *)&file_object[0], sizeof(fobj)*lx); - if (ieee64big) be64toh_v((void *)&file_object[0], sizeof(fobj)*lx); - if (ieee64) le64toh_v((void *)&file_object[0], sizeof(fobj)*lx); - for(int x=0;x site({x,y,z,t}); - // The boss who read the file has their value poked - pokeSite(munged[x],Umu,site); - } - }}}} - timer.Stop(); - std::cout<Broadcast(0,(void *)&csum,sizeof(csum)); - return csum; - } - - template - static inline uint32_t readObjectMPI(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) - { - typedef typename vobj::scalar_object sobj; - - GridBase *grid = Umu._grid; - - std::cout<< GridLogMessage<< "MPI read I/O "<< file<< std::endl; - GridStopWatch timer; timer.Start(); - - Umu = zero; - uint32_t csum=0; - uint64_t bytes=0; int ndim = grid->Dimensions(); int nrank = grid->ProcessorCount(); @@ -280,9 +257,8 @@ class BinaryIO { std::vector gStart(ndim); // Flatten the file - int lsites = grid->lSites(); - std::vector scalardata(lsites); - std::vector iodata(lsites); // Munge, checksum, byte order in here + uint64_t lsites = grid->lSites(); + iodata.resize(lsites); for(int d=0;dcommunicator, file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); - assert(ierr==0); - ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); - // std::cout<< "MPI File set view returned " <communicator, file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0); + ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); + ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); + timer.Stop(); + grid->Barrier(); + + bstimer.Start(); + if (ieee32big) be32toh_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); + if (ieee32) le32toh_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); + if (ieee64big) be64toh_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); + if (ieee64) le64toh_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); + bstimer.Stop(); + + } else { + std::cout<< GridLogMessage<< "MPI write I/O "<< file<< std::endl; + bstimer.Start(); + if (ieee32big) htobe32_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); + if (ieee32) htole32_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); + if (ieee64big) htobe64_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); + if (ieee64) htole64_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); + bstimer.Stop(); + + grid->Barrier(); + + timer.Start(); + ierr=MPI_File_open(grid->communicator, file.c_str(), MPI_MODE_RDWR|MPI_MODE_CREATE,MPI_INFO_NULL, &fh); assert(ierr==0); + ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); + ierr=MPI_File_write_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); + timer.Stop(); + + } + ////////////////////////////////////////////////////////////////////////////// - // Munge [ .e.g 3rd row recon ] + // Finish up MPI I/O ////////////////////////////////////////////////////////////////////////////// - for(int x=0;xBarrier(); grid->GlobalSum(csum); grid->Barrier(); - vectorizeFromLexOrdArray(scalardata,Umu); - - timer.Stop(); - std::cout< - static inline uint32_t writeObjectSerial(Lattice &Umu,std::string file,munger munge,int offset, - const std::string & format) + ///////////////////////////////////////////////////////////////////////////// + // Read a Lattice of object + ////////////////////////////////////////////////////////////////////////////////////// + template + static inline uint32_t readLatticeObject(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) { - typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_object sobj; + typedef typename vobj::Realified::scalar_type word; word w=0; GridBase *grid = Umu._grid; + int lsites = grid->lSites(); - int ieee32big = (format == std::string("IEEE32BIG")); - int ieee32 = (format == std::string("IEEE32")); - int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); - - ////////////////////////////////////////////////// - // Serialise through node zero - ////////////////////////////////////////////////// - std::cout<< GridLogMessage<< "Serial write I/O "<< file< scalardata(lsites); + std::vector iodata(lsites); // Munge, checksum, byte order in here - std::ofstream fout; - if ( grid->IsBoss() ) { - fout.open(file,std::ios::binary|std::ios::out|std::ios::in); - fout.seekp(offset); - } - uint64_t bytes=0; - uint32_t csum=0; - int lx = grid->_fdimensions[0]; - std::vector file_object(lx); - std::vector unmunged(lx); - for(int t=0;t_fdimensions[3];t++){ - for(int z=0;z_fdimensions[2];z++){ - for(int y=0;y_fdimensions[1];y++){ - { + int doread=1; + uint32_t csum= IOobject(w,grid,iodata,file,offset,format,doread); - std::vector site({0,y,z,t}); - // peek & write - for(int x=0;xIsBoss() ) { - for(int x=0;xBarrier(); timer.Stop(); - std::cout<Broadcast(0,(void *)&csum,sizeof(csum)); + std::cout< + static inline uint32_t writeLatticeObject(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) + { + typedef typename vobj::scalar_object sobj; + typedef typename vobj::Realified::scalar_type word; word w=0; + GridBase *grid = Umu._grid; + int lsites = grid->lSites(); + + std::vector scalardata(lsites); + std::vector iodata(lsites); // Munge, checksum, byte order in here + + ////////////////////////////////////////////////////////////////////////////// + // Munge [ .e.g 3rd row recon ] + ////////////////////////////////////////////////////////////////////////////// + GridStopWatch timer; timer.Start(); + unvectorizeToLexOrdArray(scalardata,Umu); + + parallel_for(int x=0;xBarrier(); + timer.Stop(); + + int dowrite=0; + uint32_t csum= IOobject(w,grid,iodata,file,offset,format,dowrite); + + std::cout< RNGstate; + typedef RngStateType word; word w=0; - GridBase *grid = parallel._grid; - int gsites = grid->_gsites; - - GridStopWatch timer; timer.Start(); - ////////////////////////////////////////////////// - // Serialise through node zero - ////////////////////////////////////////////////// - std::ofstream fout; - if (grid->IsBoss()) { - fout.open(file, std::ios::binary | std::ios::out); - if (!fout.is_open()) { - std::cout << GridLogMessage << "writeRNGSerial: Error opening file " << file << std::endl; - exit(0);// write better error handling - } - fout.seekp(offset); - } - - std::cout << GridLogMessage << "Serial RNG write I/O on file " << file << std::endl; uint32_t csum = 0; - std::vector saved(RngStateCount); - int bytes = sizeof(RngStateType) * saved.size(); - std::cout << GridLogDebug << "RngStateCount: " << RngStateCount << std::endl; - std::cout << GridLogDebug << "Type has " << bytes << " bytes" << std::endl; - std::vector gcoor; - - for(int gidx=0;gidxGlobalIndexToGlobalCoor(gidx,gcoor); - grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); - int l_idx=parallel.generator_idx(o_idx,i_idx); - - if( rank == grid->ThisRank() ){ - parallel.GetState(saved,l_idx); - } - - if ( rank != 0 ) { - grid->Broadcast(rank, (void *)&saved[0], bytes); - } - - grid->Barrier(); - - if ( grid->IsBoss() ) { - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - fout.write((char *)&saved[0],bytes);assert( fout.fail()==0); - } - - } - - if ( grid->IsBoss() ) { - serial.GetState(saved,0); - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - fout.write((char *)&saved[0],bytes);assert( fout.fail()==0); - } - - grid->Broadcast(0, (void *)&csum, sizeof(csum)); - - if (grid->IsBoss()) { - fout.close(); - } - - timer.Stop(); - - std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; - std::cout << GridLogMessage << "RNG state saved in " << timer.Elapsed() << std::endl; - return csum; - } - - - static inline uint32_t readRNGSerial(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file,int offset) - { - typedef typename GridSerialRNG::RngStateType RngStateType; - const int RngStateCount = GridSerialRNG::RngStateCount; + std::string format = "IEEE32BIG"; GridBase *grid = parallel._grid; - int gsites = grid->_gsites; + int gsites = grid->gSites(); + int lsites = grid->lSites(); - ////////////////////////////////////////////////// - // Serialise through node zero - ////////////////////////////////////////////////// - std::cout<< GridLogMessage<< "Serial RNG read I/O of file "<IsBoss()) { - fin.open(file, std::ios::binary | std::ios::in); - if (!fin.is_open()) { - std::cout << GridLogMessage << "readRNGSerial: Error opening file " << file << std::endl; - exit(0);// write better error handling - } - fin.seekg(offset); - } - - - uint32_t csum=0; - std::vector saved(RngStateCount); - int bytes = sizeof(RngStateType)*saved.size(); - std::cout << GridLogDebug << "RngStateCount: " << RngStateCount << std::endl; - std::cout << GridLogDebug << "Type has " << bytes << " bytes" << std::endl; - std::vector gcoor; - - std::cout << GridLogDebug << "gsites: " << gsites << " loop" << std::endl; - for(int gidx=0;gidxGlobalIndexToGlobalCoor(gidx,gcoor); - grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); - int l_idx=parallel.generator_idx(o_idx,i_idx); - - if ( grid->IsBoss() ) { - fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - } - - grid->Broadcast(0,(void *)&saved[0],bytes); - grid->Barrier(); - - if( rank == grid->ThisRank() ){ - parallel.SetState(saved,l_idx); - } - } - - if ( grid->IsBoss() ) { - fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - serial.SetState(saved,0); - } - - std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; - - grid->Broadcast(0,(void *)&csum,sizeof(csum)); - - return csum; - } - - template - static inline uint32_t readObjectParallel(Lattice &Umu, - std::string file, - munger munge, - int offset, - const std::string &format, - ILDGtype ILDG = ILDGtype()) { - typedef typename vobj::scalar_object sobj; - - GridBase *grid = Umu._grid; - - int ieee32big = (format == std::string("IEEE32BIG")); - int ieee32 = (format == std::string("IEEE32")); - int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); - - - // Ideally one reader/writer per xy plane and read these contiguously - // with comms from nominated I/O nodes. - std::ifstream fin; - - int nd = grid->_ndimension; - std::vector parallel(nd,1); parallel[0] = 0; - std::vector ioproc (nd); - std::vector start(nd); - std::vector range(nd); - - for(int d=0;dCheckerBoarded(d) == 0); - } - - uint64_t slice_vol = 1; - - int IOnode = 1; - int gstrip = grid->_gdimensions[0]; - int lstrip = grid->_ldimensions[0]; - - int chunk ; - if ( nd==1) chunk = gstrip; - else chunk = gstrip*grid->_ldimensions[1]; - - for(int d=0;d_ndimension;d++) { - - if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; - } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; - - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; - } - slice_vol = slice_vol * range[d]; - } - - { - uint32_t tmp = IOnode; - grid->GlobalSum(tmp); - std::cout<< std::dec ; - std::cout<< GridLogMessage<< "Parallel read I/O from "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; - } - std::cout << std::endl; - std::cout<< GridLogMessage<< "Parallel I/O local strip size is "<< lstrip <ThisRank(); - int iorank = grid->RankFromProcessorCoor(ioproc); - - if (!ILDG.is_ILDG) { - if ( IOnode ) { - fin.open(file,std::ios::binary|std::ios::in); - if ( !fin.is_open() ) { - std::cout << GridLogMessage << "readObjectParallel: Error opening file " << file << std::endl; - exit(0); - } - } - } - - ////////////////////////////////////////////////////////// - // Find the location of each site and send to primary node - // Take loop order from Chroma; defines loop order now that NERSC doc no longer - // available (how short sighted is that?) - ////////////////////////////////////////////////////////// - Umu = zero; - static uint32_t csum; csum=0;//static for SHMEM - - std::vector fileObj(chunk); // FIXME - std::vector siteObj(chunk); // Use alignedAllocator to place in symmetric region for SHMEM - - // need to implement these loops in Nd independent way with a lexico conversion - for(int tlex=0;tlex tsite(nd); // temporary mixed up site - std::vector gsite(nd); - std::vector lsite(nd); - - int rank, o_idx,i_idx, g_idx; - - /////////////////////////////////////////// - // Get the global lexico base of the chunk - /////////////////////////////////////////// - Lexicographic::CoorFromIndex(tsite,tlex,range); - for(int d=0;dGlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); - grid->GlobalCoorToGlobalIndex(gsite,g_idx); - - //////////////////////////////// - // iorank reads from the seek - //////////////////////////////// - if (myrank == iorank) { - - if (ILDG.is_ILDG){ -#ifdef HAVE_LIME - // use C-LIME to populate the record - uint64_t sizeFO = sizeof(fobj); - uint64_t sizeChunk= sizeFO*chunk; - limeReaderSeek(ILDG.LR, g_idx*sizeFO, SEEK_SET); - int status = limeReaderReadData((void *)&fileObj[0], &sizeChunk, ILDG.LR); -#else - assert(0); -#endif - } else { - fin.seekg(offset+g_idx*sizeof(fobj)); - fin.read((char *)&fileObj[0],sizeof(fobj)*chunk); - } - bytes+=sizeof(fobj)*chunk; - - if(ieee32big) be32toh_v((void *)&fileObj[0],sizeof(fobj)*chunk); - if(ieee32) le32toh_v((void *)&fileObj[0],sizeof(fobj)*chunk); - if(ieee64big) be64toh_v((void *)&fileObj[0],sizeof(fobj)*chunk); - if(ieee64) le64toh_v((void *)&fileObj[0],sizeof(fobj)*chunk); - - for(int c=0;c_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site - } - grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); - - if ( rank != iorank ) { - if ( (myrank == rank) || (myrank==iorank) ) { - grid->SendRecvPacket((void *)&siteObj[cc],(void *)&siteObj[cc],iorank,rank,sizeof(sobj)*lstrip); - } - } - // Poke at destination - if ( myrank == rank ) { - for(int x=0;xBarrier(); // necessary? - } - } - - grid->GlobalSum(csum); - grid->GlobalSum(bytes); - grid->Barrier(); - - timer.Stop(); - std::cout< - static inline uint32_t writeObjectParallel(Lattice &Umu, - std::string file, munger munge, - int offset, - const std::string &format, - ILDGtype ILDG = ILDGtype()) { - typedef typename vobj::scalar_object sobj; - GridBase *grid = Umu._grid; - - int ieee32big = (format == std::string("IEEE32BIG")); - int ieee32 = (format == std::string("IEEE32")); - int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); - - if (!(ieee32big || ieee32 || ieee64big || ieee64)) { - std::cout << GridLogError << "Unrecognized file format " << format << std::endl; - std::cout << GridLogError << "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64" << std::endl; - exit(0); - } - - int nd = grid->_ndimension; - for (int d = 0; d < nd; d++) { - assert(grid->CheckerBoarded(d) == 0); - } - - // Parallel in yzt, serial funnelled in "x". - // gx x ly chunk size - std::vector parallel(nd, 1); parallel[0] = 0; - std::vector ioproc(nd); - std::vector start(nd); - std::vector range(nd); - - uint64_t slice_vol = 1; - - int IOnode = 1; - int gstrip = grid->_gdimensions[0]; - int lstrip = grid->_ldimensions[0]; - int chunk; - if ( nd==1) chunk = gstrip; - else chunk = gstrip*grid->_ldimensions[1]; - - for (int d = 0; d < grid->_ndimension; d++) { - - if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; - } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; - - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; - } - - slice_vol = slice_vol * range[d]; - } - - { - uint32_t tmp = IOnode; - grid->GlobalSum(tmp); - std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; - } - std::cout << std::endl; - std::cout<< GridLogMessage<< "Parallel I/O local strip size is "<< lstrip < iodata(lsites); + csum= IOobject(w,grid,iodata,file,offset,format,doread); + timer.Start(); - uint64_t bytes=0; - - int myrank = grid->ThisRank(); - int iorank = grid->RankFromProcessorCoor(ioproc); - - // Take into account block size of parallel file systems want about - // Ideally one reader/writer per xy plane and read these contiguously - // with comms from nominated I/O nodes. - std::ofstream fout; - if (!ILDG.is_ILDG) { - if (IOnode){ - fout.open(file, std::ios::binary | std::ios::in | std::ios::out); - if (!fout.is_open()) { - std::cout << GridLogMessage << "writeObjectParallel: Error opening file " << file << std::endl; - exit(0); - } - } + parallel_for(int lidx=0;lidx tmp(RngStateCount); + std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin()); + parallel.SetState(tmp,lidx); } - - ////////////////////////////////////////////////////////// - // Find the location of each site and send to primary node - // Take loop order from Chroma; defines loop order now that NERSC doc no - // longer - // available (how short sighted is that?) - ////////////////////////////////////////////////////////// + timer.Stop(); + + std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; + std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl; + return csum; + } + ///////////////////////////////////////////////////////////////////////////// + // Write a RNG; lexico map to an array of state and use IOobject + ////////////////////////////////////////////////////////////////////////////////////// + static inline uint32_t writeRNG(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file,int offset) + { + typedef typename GridSerialRNG::RngStateType RngStateType; + typedef RngStateType word; word w=0; + const int RngStateCount = GridSerialRNG::RngStateCount; + typedef std::array RNGstate; uint32_t csum = 0; - std::vector fileObj(chunk); - std::vector siteObj(chunk); - // should aggregate a whole chunk and then write. - // need to implement these loops in Nd independent way with a lexico - // conversion - for (int tlex = 0; tlex < slice_vol; tlex+=chunk) { + GridBase *grid = parallel._grid; + int gsites = grid->gSites(); + int lsites = grid->lSites(); - std::vector tsite(nd); // temporary mixed up site - std::vector gsite(nd); - std::vector lsite(nd); + GridStopWatch timer; + std::string format = "IEEE32BIG"; - int rank, o_idx, i_idx, g_idx; + std::cout << GridLogMessage << "RNG write I/O on file " << file << std::endl; - // Possibly do transport through pt2pt - for(int cc=0;cc_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site - } - grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); - - // Owner of data peeks it over lstrip - if ( myrank == rank ) { - for(int x=0;xSendRecvPacket((void *)&siteObj[cc],(void *)&siteObj[cc],rank,iorank,sizeof(sobj)*lstrip); - } - } - } - - grid->Barrier(); // necessary? - - ///////////////////////// - // Get the global lexico base of the chunk - ///////////////////////// - Lexicographic::CoorFromIndex(tsite, tlex, range); - for(int d = 0;d < nd; d++){ gsite[d] = tsite[d] + start[d];} - grid->GlobalCoorToRankIndex(rank, o_idx, i_idx, gsite); - grid->GlobalCoorToGlobalIndex(gsite, g_idx); - - if (myrank == iorank) { - - for(int c=0;c iodata(lsites); + parallel_for(int lidx=0;lidx tmp(RngStateCount); + parallel.GetState(tmp,lidx); + std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin()); } - - grid->GlobalSum(csum); - grid->GlobalSum(bytes); - timer.Stop(); - std::cout << GridLogPerformance << "writeObjectParallel: wrote " << bytes - << " bytes in " << timer.Elapsed() << " " - << (double)bytes / timer.useconds() << " MB/s " << std::endl; - grid->Barrier(); // necessary? - if (!ILDG.is_ILDG) { - if (IOnode) { - fout.close(); - } - } + int dowrite=0; + csum= IOobject(w,grid,iodata,file,offset,format,dowrite); + std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; + std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl; return csum; } }; } - #endif From 1a1f6d55f9ac7c94b7ddd1f129d26ddf87d29c9c Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 1 Jun 2017 17:37:26 -0400 Subject: [PATCH 057/177] Roll over to MPI IO for parallel IO --- lib/parallelIO/NerscIO.h | 175 ++++++++++++--------------------------- 1 file changed, 52 insertions(+), 123 deletions(-) diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index ab535dac..ba9d23de 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -30,12 +30,6 @@ #ifndef GRID_NERSC_IO_H #define GRID_NERSC_IO_H -#undef PARALLEL_READ -#undef SERIAL_READ -#define MPI_READ - -#define PARALLEL_WRITE - #include #include #include @@ -133,10 +127,6 @@ namespace Grid { ////////////////////////////////////////////////////////////////////// // Utilities ; these are QCD aware ////////////////////////////////////////////////////////////////////// - inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum) - { - BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum); - } inline void reconstruct3(LorentzColourMatrix & cm) { const int x=0; @@ -151,43 +141,38 @@ namespace Grid { template struct NerscSimpleMunger{ - void operator()(fobj &in, sobj &out, uint32_t &csum) { + void operator()(fobj &in, sobj &out) { for (int mu = 0; mu < Nd; mu++) { for (int i = 0; i < Nc; i++) { - for (int j = 0; j < Nc; j++) { - out(mu)()(i, j) = in(mu)()(i, j); - } - } + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} } - NerscChecksum((uint32_t *)&in, sizeof(in), csum); }; }; template struct NerscSimpleUnmunger { - void operator()(sobj &in, fobj &out, uint32_t &csum) { + + void operator()(sobj &in, fobj &out) { for (int mu = 0; mu < Nd; mu++) { for (int i = 0; i < Nc; i++) { - for (int j = 0; j < Nc; j++) { - out(mu)()(i, j) = in(mu)()(i, j); - } - } + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} } - NerscChecksum((uint32_t *)&out, sizeof(out), csum); }; }; template struct Nersc3x2munger{ - void operator() (fobj &in,sobj &out,uint32_t &csum){ - - NerscChecksum((uint32_t *)&in,sizeof(in),csum); + void operator() (fobj &in,sobj &out){ for(int mu=0;mu<4;mu++){ for(int i=0;i<2;i++){ - for(int j=0;j<3;j++){ - out(mu)()(i,j) = in(mu)(i)(j); - }} + for(int j=0;j<3;j++){ + out(mu)()(i,j) = in(mu)(i)(j); + }} } reconstruct3(out); } @@ -196,18 +181,13 @@ namespace Grid { template struct Nersc3x2unmunger{ - void operator() (sobj &in,fobj &out,uint32_t &csum){ - - + void operator() (sobj &in,fobj &out){ for(int mu=0;mu<4;mu++){ for(int i=0;i<2;i++){ - for(int j=0;j<3;j++){ - out(mu)(i)(j) = in(mu)()(i,j); - }} + for(int j=0;j<3;j++){ + out(mu)(i)(j) = in(mu)()(i,j); + }} } - - NerscChecksum((uint32_t *)&out,sizeof(out),csum); - } }; @@ -333,9 +313,9 @@ namespace Grid { // Now the meat: the object readers ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - template - static inline void readConfiguration(Lattice > &Umu,NerscField& header,std::string file) - { + template + static inline void readConfiguration(Lattice > &Umu,NerscField& header,std::string file) + { typedef Lattice > GaugeField; GridBase *grid = Umu._grid; @@ -354,62 +334,22 @@ namespace Grid { // depending on datatype, set up munger; // munger is a function of if ( header.data_type == std::string("4D_SU3_GAUGE") ) { - if ( ieee32 || ieee32big ) { -#ifdef PARALLEL_READ - csum=BinaryIO::readObjectParallel, LorentzColour2x3F> - (Umu,file,Nersc3x2munger(), offset,format); -#endif -#ifdef MPI_READ - csum=BinaryIO::readObjectMPI, LorentzColour2x3F> - (Umu,file,Nersc3x2munger(), offset,format); -#endif -#ifdef SERIAL_READ - csum=BinaryIO::readObjectSerial, LorentzColour2x3F> - (Umu,file,Nersc3x2munger(), offset,format); -#endif - } - if ( ieee64 || ieee64big ) { -#ifdef PARALLEL_READ - csum=BinaryIO::readObjectParallel, LorentzColour2x3D> - (Umu,file,Nersc3x2munger(),offset,format); -#endif -#ifdef MPI_READ - csum=BinaryIO::readObjectMPI, LorentzColour2x3D> - (Umu,file,Nersc3x2munger(),offset,format); -#endif -#ifdef SERIAL_READ - csum=BinaryIO::readObjectSerial, LorentzColour2x3D> - (Umu,file,Nersc3x2munger(),offset,format); -#endif - } - } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { if ( ieee32 || ieee32big ) { -#ifdef PARALLEL_READ - csum=BinaryIO::readObjectParallel,LorentzColourMatrixF> - (Umu,file,NerscSimpleMunger(),offset,format); -#endif -#ifdef MPI_READ - csum=BinaryIO::readObjectMPI,LorentzColourMatrixF> - (Umu,file,NerscSimpleMunger(),offset,format); -#endif -#ifdef SERIAL_READ - csum=BinaryIO::readObjectSerial,LorentzColourMatrixF> - (Umu,file,NerscSimpleMunger(),offset,format); -#endif + csum=BinaryIO::readLatticeObject, LorentzColour2x3F> + (Umu,file,Nersc3x2munger(), offset,format); } if ( ieee64 || ieee64big ) { -#ifdef PARALLEL_READ - csum=BinaryIO::readObjectParallel,LorentzColourMatrixD> + csum=BinaryIO::readLatticeObject, LorentzColour2x3D> + (Umu,file,Nersc3x2munger(),offset,format); + } + } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { + if ( ieee32 || ieee32big ) { + csum=BinaryIO::readLatticeObject,LorentzColourMatrixF> + (Umu,file,NerscSimpleMunger(),offset,format); + } + if ( ieee64 || ieee64big ) { + csum=BinaryIO::readLatticeObject,LorentzColourMatrixD> (Umu,file,NerscSimpleMunger(),offset,format); -#endif -#ifdef MPI_READ - csum=BinaryIO::readObjectMPI,LorentzColourMatrixD> - (Umu,file,NerscSimpleMunger(),offset,format); -#endif -#ifdef SERIAL_READ - csum=BinaryIO::readObjectSerial,LorentzColourMatrixD> - (Umu,file,NerscSimpleMunger(),offset,format); -#endif } } else { assert(0); @@ -434,14 +374,14 @@ namespace Grid { std::cerr << " plaqs " << clone.plaquette << " " << header.plaquette << std::endl; std::cerr << " trace " << clone.link_trace<< " " << header.link_trace<< std::endl; std::cerr << " csum " < static inline void writeConfiguration(Lattice > &Umu,std::string file, int two_row,int bits32) @@ -466,41 +406,29 @@ namespace Grid { NerscStatistics(Umu,header); NerscMachineCharacteristics(header); - uint32_t csum; int offset; truncate(file); if ( two_row ) { - header.floating_point = std::string("IEEE64BIG"); header.data_type = std::string("4D_SU3_GAUGE"); Nersc3x2unmunger munge; - BinaryIO::Uint32Checksum(Umu, munge,header.checksum); offset = writeHeader(header,file); -#ifdef PARALLEL_WRITE - csum=BinaryIO::writeObjectParallel(Umu,file,munge,offset,header.floating_point); -#else - csum=BinaryIO::writeObjectSerial(Umu,file,munge,offset,header.floating_point); -#endif + header.checksum=BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point); + writeHeader(header,file); } else { header.floating_point = std::string("IEEE64BIG"); header.data_type = std::string("4D_SU3_GAUGE_3x3"); NerscSimpleUnmunger munge; - BinaryIO::Uint32Checksum(Umu, munge,header.checksum); offset = writeHeader(header,file); -#ifdef PARALLEL_WRITE - csum=BinaryIO::writeObjectParallel(Umu,file,munge,offset,header.floating_point); -#else - csum=BinaryIO::writeObjectSerial(Umu,file,munge,offset,header.floating_point); -#endif + header.checksum=BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point); + writeHeader(header,file); } - - std::cout< - uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset); + uint32_t csum=BinaryIO::readRNG(serial,parallel,file,offset); if ( csum != header.checksum ) { std::cerr << "checksum mismatch "< Date: Thu, 1 Jun 2017 17:38:18 -0400 Subject: [PATCH 058/177] Roll over to MPI version of I/O --- tests/IO/Test_nersc_io.cc | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/IO/Test_nersc_io.cc b/tests/IO/Test_nersc_io.cc index 8507df13..0a0f8977 100644 --- a/tests/IO/Test_nersc_io.cc +++ b/tests/IO/Test_nersc_io.cc @@ -38,10 +38,13 @@ int main (int argc, char ** argv) { Grid_init(&argc,&argv); + std::cout < simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); - std::vector latt_size ({16,16,16,16}); + std::vector latt_size ({48,48,48,96}); + //std::vector latt_size ({32,32,32,32}); + //std::vector latt_size ({16,16,16,32}); std::vector clatt_size ({4,4,4,8}); int orthodir=3; int orthosz =latt_size[orthodir]; @@ -49,14 +52,17 @@ int main (int argc, char ** argv) GridCartesian Fine(latt_size,simd_layout,mpi_layout); GridCartesian Coarse(clatt_size,simd_layout,mpi_layout); + GridParallelRNG pRNGa(&Fine); GridParallelRNG pRNGb(&Fine); GridSerialRNG sRNGa; GridSerialRNG sRNGb; + std::cout <({45,12,81,9})); sRNGa.SeedFixedIntegers(std::vector({45,12,81,9})); - + std::cout < Plaq_T(orthosz); sliceSum(Plaq,Plaq_T,Nd-1); From 094c3d091afb3f29e7e370562cb0def29b3b26f0 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 2 Jun 2017 00:38:58 +0100 Subject: [PATCH 059/177] Improved and RNG's now survive checkpoint --- lib/parallelIO/BinaryIO.h | 175 ++++++++++++------ .../hmc/checkpointers/BinaryCheckpointer.h | 12 +- tests/IO/Test_nersc_io.cc | 4 +- 3 files changed, 124 insertions(+), 67 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index 13341927..e427a25b 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -133,7 +133,6 @@ class BinaryIO { } #pragma omp critical csum = csum + csum_thr; - } } // Network is big endian @@ -227,13 +226,20 @@ class BinaryIO { // Real action: // Read or Write distributed lexico array of ANY object to a specific location in file ////////////////////////////////////////////////////////////////////////////////////// + + static const int BINARYIO_MASTER_APPEND = 0x10; + static const int BINARYIO_UNORDERED = 0x08; + static const int BINARYIO_LEXICOGRAPHIC = 0x04; + static const int BINARYIO_READ = 0x02; + static const int BINARYIO_WRITE = 0x01; + template - static inline uint32_t IOobject(word w, - GridBase *grid, - std::vector &iodata, - std::string file, - int offset, - const std::string &format, int doread) + static inline uint32_t IOobject(word w, + GridBase *grid, + std::vector &iodata, + std::string file, + int offset, + const std::string &format, int control) { grid->Barrier(); GridStopWatch timer; @@ -250,21 +256,24 @@ class BinaryIO { std::vector gLattice= grid->GlobalDimensions(); std::vector lLattice= grid->LocalDimensions(); - std::vector distribs(ndim,MPI_DISTRIBUTE_BLOCK); - std::vector dargs (ndim,MPI_DISTRIBUTE_DFLT_DARG); - std::vector lStart(ndim); std::vector gStart(ndim); // Flatten the file uint64_t lsites = grid->lSites(); - iodata.resize(lsites); - + if ( control & BINARYIO_MASTER_APPEND ) { + assert(iodata.size()==1); + } else { + assert(lsites==iodata.size()); + } for(int d=0;d distribs(ndim,MPI_DISTRIBUTE_BLOCK); + std::vector dargs (ndim,MPI_DISTRIBUTE_DFLT_DARG); MPI_Datatype mpiObject; MPI_Datatype fileArray; MPI_Datatype localArray; @@ -281,7 +290,6 @@ class BinaryIO { numword = sizeof(fobj)/sizeof(double); mpiword = MPI_DOUBLE; } - ////////////////////////////////////////////////////////////////////////////// // Sobj in MPI phrasing @@ -301,6 +309,7 @@ class BinaryIO { ////////////////////////////////////////////////////////////////////////////// ierr=MPI_Type_create_subarray(ndim,&lLattice[0],&lLattice[0],&lStart[0],MPI_ORDER_FORTRAN, mpiObject,&localArray); assert(ierr==0); ierr=MPI_Type_commit(&localArray); assert(ierr==0); +#endif ////////////////////////////////////////////////////////////////////////////// // Byte order @@ -311,55 +320,91 @@ class BinaryIO { int ieee64 = (format == std::string("IEEE64")); ////////////////////////////////////////////////////////////////////////////// - // Do the MPI I/O read + // Do the I/O ////////////////////////////////////////////////////////////////////////////// - if ( doread ) { - std::cout<< GridLogMessage<< "MPI read I/O "<< file<< std::endl; + if ( control & BINARYIO_READ ) { + timer.Start(); - ierr=MPI_File_open(grid->communicator, file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0); - ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); - ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); + + if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { +#ifdef USE_MPI_IO + std::cout<< GridLogMessage<< "MPI read I/O "<< file<< std::endl; + ierr=MPI_File_open(grid->communicator, file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0); + ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); + ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); + MPI_File_close(&fh); + MPI_Type_free(&fileArray); + MPI_Type_free(&localArray); +#else + assert(0); +#endif + } else { + std::cout<< GridLogMessage<< "C++ read I/O "<< file<< std::endl; + std::ifstream fin; + fin.open(file,std::ios::binary|std::ios::in); + if ( control & BINARYIO_MASTER_APPEND ) { + fin.seekg(-sizeof(fobj),fin.end); + } else { + fin.seekg(offset+myrank*lsites*sizeof(fobj)); + } + fin.read((char *)&iodata[0],iodata.size()*sizeof(fobj));assert( fin.fail()==0); + fin.close(); + } timer.Stop(); grid->Barrier(); bstimer.Start(); - if (ieee32big) be32toh_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); - if (ieee32) le32toh_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); - if (ieee64big) be64toh_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); - if (ieee64) le64toh_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); + if (ieee32big) be32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); + if (ieee32) le32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); + if (ieee64big) be64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); + if (ieee64) le64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); bstimer.Stop(); - - } else { - std::cout<< GridLogMessage<< "MPI write I/O "<< file<< std::endl; - bstimer.Start(); - if (ieee32big) htobe32_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); - if (ieee32) htole32_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); - if (ieee64big) htobe64_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); - if (ieee64) htole64_v((void *)&iodata[0], sizeof(fobj)*lsites,csum); - bstimer.Stop(); - - grid->Barrier(); - - timer.Start(); - ierr=MPI_File_open(grid->communicator, file.c_str(), MPI_MODE_RDWR|MPI_MODE_CREATE,MPI_INFO_NULL, &fh); assert(ierr==0); - ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); - ierr=MPI_File_write_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); - timer.Stop(); - } - - ////////////////////////////////////////////////////////////////////////////// - // Finish up MPI I/O - ////////////////////////////////////////////////////////////////////////////// - MPI_File_close(&fh); - MPI_Type_free(&fileArray); - MPI_Type_free(&localArray); + + if ( control & BINARYIO_WRITE ) { + + bstimer.Start(); + if (ieee32big) htobe32_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); + if (ieee32) htole32_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); + if (ieee64big) htobe64_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); + if (ieee64) htole64_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); + bstimer.Stop(); + + grid->Barrier(); + + timer.Start(); + if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { +#ifdef USE_MPI_IO + std::cout<< GridLogMessage<< "MPI write I/O "<< file<< std::endl; + ierr=MPI_File_open(grid->communicator, file.c_str(), MPI_MODE_RDWR|MPI_MODE_CREATE,MPI_INFO_NULL, &fh); assert(ierr==0); + ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); + ierr=MPI_File_write_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); + MPI_File_close(&fh); + MPI_Type_free(&fileArray); + MPI_Type_free(&localArray); +#else + assert(0); +#endif + } else { + std::cout<< GridLogMessage<< "C++ write I/O "<< file<< std::endl; + std::ofstream fout; + fout.open(file,std::ios::binary|std::ios::out|std::ios::in); + if ( control & BINARYIO_MASTER_APPEND ) { + fout.seekp(0,fout.end); + } else { + fout.seekp(offset+myrank*lsites*sizeof(fobj)); + } + fout.write((char *)&iodata[0],iodata.size()*sizeof(fobj));assert( fout.fail()==0); + fout.close(); + } + timer.Stop(); + } std::cout< scalardata(lsites); std::vector iodata(lsites); // Munge, checksum, byte order in here - int doread=1; - uint32_t csum= IOobject(w,grid,iodata,file,offset,format,doread); + uint32_t csum= IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC); GridStopWatch timer; timer.Start(); @@ -432,8 +476,7 @@ class BinaryIO { grid->Barrier(); timer.Stop(); - int dowrite=0; - uint32_t csum= IOobject(w,grid,iodata,file,offset,format,dowrite); + uint32_t csum= IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC); std::cout< iodata(lsites); - csum= IOobject(w,grid,iodata,file,offset,format,doread); + csum= IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC); timer.Start(); parallel_for(int lidx=0;lidx tmp(RngStateCount); + std::copy(iodata[0].begin(),iodata[0].end(),tmp.begin()); + serial.SetState(tmp,0); + } + std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl; return csum; @@ -507,9 +557,16 @@ class BinaryIO { } timer.Stop(); - int dowrite=0; - csum= IOobject(w,grid,iodata,file,offset,format,dowrite); + csum= IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC); + iodata.resize(1); + { + std::vector tmp(RngStateCount); + serial.GetState(tmp,0); + std::copy(tmp.begin(),tmp.end(),iodata[0].begin()); + } + csum+= IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_MASTER_APPEND); + std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl; return csum; diff --git a/lib/qcd/hmc/checkpointers/BinaryCheckpointer.h b/lib/qcd/hmc/checkpointers/BinaryCheckpointer.h index 251ed042..6116a46c 100644 --- a/lib/qcd/hmc/checkpointers/BinaryCheckpointer.h +++ b/lib/qcd/hmc/checkpointers/BinaryCheckpointer.h @@ -68,11 +68,11 @@ class BinaryHmcCheckpointer : public BaseHmcCheckpointer { std::string config, rng; this->build_filenames(traj, Params, config, rng); - BinaryIO::BinarySimpleUnmunger munge; + BinarySimpleUnmunger munge; truncate(rng); - BinaryIO::writeRNGSerial(sRNG, pRNG, rng, 0); + BinaryIO::writeRNG(sRNG, pRNG, rng, 0); truncate(config); - uint32_t csum = BinaryIO::writeObjectParallel( + uint32_t csum = BinaryIO::writeLatticeObject( U, config, munge, 0, Params.format); std::cout << GridLogMessage << "Written Binary Configuration " << config @@ -85,9 +85,9 @@ class BinaryHmcCheckpointer : public BaseHmcCheckpointer { std::string config, rng; this->build_filenames(traj, Params, config, rng); - BinaryIO::BinarySimpleMunger munge; - BinaryIO::readRNGSerial(sRNG, pRNG, rng, 0); - uint32_t csum = BinaryIO::readObjectParallel( + BinarySimpleMunger munge; + BinaryIO::readRNG(sRNG, pRNG, rng, 0); + uint32_t csum = BinaryIO::readLatticeObject( U, config, munge, 0, Params.format); std::cout << GridLogMessage << "Read Binary Configuration " << config diff --git a/tests/IO/Test_nersc_io.cc b/tests/IO/Test_nersc_io.cc index 0a0f8977..14c6080d 100644 --- a/tests/IO/Test_nersc_io.cc +++ b/tests/IO/Test_nersc_io.cc @@ -42,9 +42,9 @@ int main (int argc, char ** argv) std::vector simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); - std::vector latt_size ({48,48,48,96}); + //std::vector latt_size ({48,48,48,96}); //std::vector latt_size ({32,32,32,32}); - //std::vector latt_size ({16,16,16,32}); + std::vector latt_size ({16,16,16,32}); std::vector clatt_size ({4,4,4,8}); int orthodir=3; int orthosz =latt_size[orthodir]; From 092dcd4e04c1e069fe63984cfc7d9f1a0da9e703 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 2 Jun 2017 22:50:25 +0100 Subject: [PATCH 060/177] MPI I/O only if MPI compiled --- lib/parallelIO/BinaryIO.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index e427a25b..8b8d4165 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -29,12 +29,16 @@ #ifndef GRID_BINARY_IO_H #define GRID_BINARY_IO_H - -#include "IldgIOtypes.h" +#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) +#define USE_MPI_IO +#else +#undef USE_MPI_IO +#endif #ifdef HAVE_ENDIAN_H #include #endif + #include #include From 22749699a30da633f58a4d47642721c639048f31 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 6 Jun 2017 11:45:30 -0500 Subject: [PATCH 061/177] Fixes after merge and point sink module --- extras/Hadrons/Environment.cc | 56 ++++++++- extras/Hadrons/Environment.hpp | 44 ++++++- extras/Hadrons/Global.hpp | 9 +- extras/Hadrons/Modules.hpp | 1 + extras/Hadrons/Modules/MAction/DWF.hpp | 8 +- extras/Hadrons/Modules/MAction/Wilson.hpp | 6 +- .../Hadrons/Modules/MContraction/Baryon.hpp | 14 +-- .../Hadrons/Modules/MContraction/DiscLoop.hpp | 8 +- .../Hadrons/Modules/MContraction/Gamma3pt.hpp | 12 +- extras/Hadrons/Modules/MContraction/Meson.hpp | 102 ++++++++++------ .../Modules/MContraction/WeakHamiltonian.hpp | 8 +- .../MContraction/WeakHamiltonianEye.hpp | 6 +- .../MContraction/WeakHamiltonianNonEye.hpp | 6 +- .../MContraction/WeakNeutral4ptDisc.hpp | 6 +- extras/Hadrons/Modules/MGauge/Load.hpp | 6 +- extras/Hadrons/Modules/MGauge/Random.hpp | 6 +- extras/Hadrons/Modules/MGauge/StochEm.hpp | 6 +- extras/Hadrons/Modules/MGauge/Unit.hpp | 6 +- extras/Hadrons/Modules/MLoop/NoiseLoop.hpp | 8 +- .../Hadrons/Modules/MScalar/ChargedProp.hpp | 6 +- extras/Hadrons/Modules/MScalar/FreeProp.hpp | 6 +- extras/Hadrons/Modules/MSink/Point.hpp | 114 ++++++++++++++++++ extras/Hadrons/Modules/MSolver/RBPrecCG.hpp | 8 +- extras/Hadrons/Modules/MSource/Point.hpp | 6 +- extras/Hadrons/Modules/MSource/SeqGamma.hpp | 8 +- extras/Hadrons/Modules/MSource/Wall.hpp | 8 +- extras/Hadrons/Modules/MSource/Z2.hpp | 6 +- extras/Hadrons/Modules/Quark.hpp | 2 +- .../templates/Module_in_NS.hpp.template | 6 +- .../templates/Module_tmp_in_NS.hpp.template | 6 +- extras/Hadrons/modules.inc | 1 + tests/hadrons/Test_hadrons_spectrum.cc | 24 ++-- 32 files changed, 385 insertions(+), 134 deletions(-) create mode 100644 extras/Hadrons/Modules/MSink/Point.hpp diff --git a/extras/Hadrons/Environment.cc b/extras/Hadrons/Environment.cc index 37f2a3d7..0e7a4326 100644 --- a/extras/Hadrons/Environment.cc +++ b/extras/Hadrons/Environment.cc @@ -41,9 +41,10 @@ using namespace Hadrons; // constructor ///////////////////////////////////////////////////////////////// Environment::Environment(void) { - nd_ = GridDefaultLatt().size(); + dim_ = GridDefaultLatt(); + nd_ = dim_.size(); grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( - GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()), + dim_, GridDefaultSimd(nd_, vComplex::Nsimd()), GridDefaultMpi())); gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); auto loc = getGrid()->LocalDimensions(); @@ -132,6 +133,16 @@ unsigned int Environment::getNd(void) const return nd_; } +std::vector Environment::getDim(void) const +{ + return dim_; +} + +int Environment::getDim(const unsigned int mu) const +{ + return dim_[mu]; +} + // random number generator ///////////////////////////////////////////////////// void Environment::setSeed(const std::vector &seed) { @@ -271,6 +282,21 @@ std::string Environment::getModuleType(const std::string name) const return getModuleType(getModuleAddress(name)); } +std::string Environment::getModuleNamespace(const unsigned int address) const +{ + std::string type = getModuleType(address), ns; + + auto pos2 = type.rfind("::"); + auto pos1 = type.rfind("::", pos2 - 2); + + return type.substr(pos1 + 2, pos2 - pos1 - 2); +} + +std::string Environment::getModuleNamespace(const std::string name) const +{ + return getModuleNamespace(getModuleAddress(name)); +} + bool Environment::hasModule(const unsigned int address) const { return (address < module_.size()); @@ -492,7 +518,14 @@ std::string Environment::getObjectType(const unsigned int address) const { if (hasRegisteredObject(address)) { - return typeName(object_[address].type); + if (object_[address].type) + { + return typeName(object_[address].type); + } + else + { + return ""; + } } else if (hasObject(address)) { @@ -532,6 +565,23 @@ Environment::Size Environment::getObjectSize(const std::string name) const return getObjectSize(getObjectAddress(name)); } +unsigned int Environment::getObjectModule(const unsigned int address) const +{ + if (hasObject(address)) + { + return object_[address].module; + } + else + { + HADRON_ERROR("no object with address " + std::to_string(address)); + } +} + +unsigned int Environment::getObjectModule(const std::string name) const +{ + return getObjectModule(getObjectAddress(name)); +} + unsigned int Environment::getObjectLs(const unsigned int address) const { if (hasRegisteredObject(address)) diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index 2628e5a0..13264bd5 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -106,6 +106,8 @@ public: void createGrid(const unsigned int Ls); GridCartesian * getGrid(const unsigned int Ls = 1) const; GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; + std::vector getDim(void) const; + int getDim(const unsigned int mu) const; unsigned int getNd(void) const; // random number generator void setSeed(const std::vector &seed); @@ -131,6 +133,8 @@ public: std::string getModuleName(const unsigned int address) const; std::string getModuleType(const unsigned int address) const; std::string getModuleType(const std::string name) const; + std::string getModuleNamespace(const unsigned int address) const; + std::string getModuleNamespace(const std::string name) const; bool hasModule(const unsigned int address) const; bool hasModule(const std::string name) const; Graph makeModuleGraph(void) const; @@ -171,6 +175,8 @@ public: std::string getObjectType(const std::string name) const; Size getObjectSize(const unsigned int address) const; Size getObjectSize(const std::string name) const; + unsigned int getObjectModule(const unsigned int address) const; + unsigned int getObjectModule(const std::string name) const; unsigned int getObjectLs(const unsigned int address) const; unsigned int getObjectLs(const std::string name) const; bool hasObject(const unsigned int address) const; @@ -181,6 +187,10 @@ public: bool hasCreatedObject(const std::string name) const; bool isObject5d(const unsigned int address) const; bool isObject5d(const std::string name) const; + template + bool isObjectOfType(const unsigned int address) const; + template + bool isObjectOfType(const std::string name) const; Environment::Size getTotalSize(void) const; void addOwnership(const unsigned int owner, const unsigned int property); @@ -197,6 +207,7 @@ private: bool dryRun_{false}; unsigned int traj_, locVol_; // grids + std::vector dim_; GridPt grid4d_; std::map grid5d_; GridRbPt gridRb4d_; @@ -343,7 +354,7 @@ T * Environment::getObject(const unsigned int address) const else { HADRON_ERROR("object with address " + std::to_string(address) + - " does not have type '" + typeid(T).name() + + " does not have type '" + typeName(&typeid(T)) + "' (has type '" + getObjectType(address) + "')"); } } @@ -380,6 +391,37 @@ T * Environment::createLattice(const std::string name) return createLattice(getObjectAddress(name)); } +template +bool Environment::isObjectOfType(const unsigned int address) const +{ + if (hasRegisteredObject(address)) + { + if (auto h = dynamic_cast *>(object_[address].data.get())) + { + return true; + } + else + { + return false; + } + } + else if (hasObject(address)) + { + HADRON_ERROR("object with address " + std::to_string(address) + + " exists but is not registered"); + } + else + { + HADRON_ERROR("no object with address " + std::to_string(address)); + } +} + +template +bool Environment::isObjectOfType(const std::string name) const +{ + return isObjectOfType(getObjectAddress(name)); +} + END_HADRONS_NAMESPACE #endif // Hadrons_Environment_hpp_ diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index 3ff79ea3..9de01623 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -65,7 +65,9 @@ BEGIN_HADRONS_NAMESPACE typedef FermionOperator FMat##suffix; \ typedef typename FImpl::FermionField FermionField##suffix; \ typedef typename FImpl::PropagatorField PropagatorField##suffix; \ -typedef typename FImpl::SitePropagator SitePropagator##suffix; +typedef typename FImpl::SitePropagator SitePropagator##suffix; \ +typedef std::vector \ + SlicedPropagator##suffix; #define GAUGE_TYPE_ALIASES(FImpl, suffix)\ typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix; @@ -78,7 +80,10 @@ typedef typename SImpl::Field PropagatorField##suffix; typedef std::function SolverFn##suffix; -#define TYPE_ALIASES(FImpl, suffix)\ +#define SINK_TYPE_ALIASES(suffix)\ +typedef std::function SinkFn##suffix; + +#define FGS_TYPE_ALIASES(FImpl, suffix)\ FERM_TYPE_ALIASES(FImpl, suffix)\ GAUGE_TYPE_ALIASES(FImpl, suffix)\ SOLVER_TYPE_ALIASES(FImpl, suffix) diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 7155c02a..42a1f651 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/extras/Hadrons/Modules/MAction/DWF.hpp b/extras/Hadrons/Modules/MAction/DWF.hpp index 880fe7b9..78e0916c 100644 --- a/extras/Hadrons/Modules/MAction/DWF.hpp +++ b/extras/Hadrons/Modules/MAction/DWF.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_DWF_hpp_ -#define Hadrons_DWF_hpp_ +#ifndef Hadrons_MAction_DWF_hpp_ +#define Hadrons_MAction_DWF_hpp_ #include #include @@ -56,7 +56,7 @@ template class TDWF: public Module { public: - TYPE_ALIASES(FImpl,); + FGS_TYPE_ALIASES(FImpl,); public: // constructor TDWF(const std::string name); @@ -137,4 +137,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_DWF_hpp_ +#endif // Hadrons_MAction_DWF_hpp_ diff --git a/extras/Hadrons/Modules/MAction/Wilson.hpp b/extras/Hadrons/Modules/MAction/Wilson.hpp index 4b84bda5..aab54245 100644 --- a/extras/Hadrons/Modules/MAction/Wilson.hpp +++ b/extras/Hadrons/Modules/MAction/Wilson.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Wilson_hpp_ -#define Hadrons_Wilson_hpp_ +#ifndef Hadrons_MAction_Wilson_hpp_ +#define Hadrons_MAction_Wilson_hpp_ #include #include @@ -54,7 +54,7 @@ template class TWilson: public Module { public: - TYPE_ALIASES(FImpl,); + FGS_TYPE_ALIASES(FImpl,); public: // constructor TWilson(const std::string name); diff --git a/extras/Hadrons/Modules/MContraction/Baryon.hpp b/extras/Hadrons/Modules/MContraction/Baryon.hpp index be7d919c..78bde5a2 100644 --- a/extras/Hadrons/Modules/MContraction/Baryon.hpp +++ b/extras/Hadrons/Modules/MContraction/Baryon.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Baryon_hpp_ -#define Hadrons_Baryon_hpp_ +#ifndef Hadrons_MContraction_Baryon_hpp_ +#define Hadrons_MContraction_Baryon_hpp_ #include #include @@ -55,9 +55,9 @@ template class TBaryon: public Module { public: - TYPE_ALIASES(FImpl1, 1); - TYPE_ALIASES(FImpl2, 2); - TYPE_ALIASES(FImpl3, 3); + FERM_TYPE_ALIASES(FImpl1, 1); + FERM_TYPE_ALIASES(FImpl2, 2); + FERM_TYPE_ALIASES(FImpl3, 3); class Result: Serializable { public: @@ -121,11 +121,11 @@ void TBaryon::execute(void) // FIXME: do contractions - write(writer, "meson", result); + // write(writer, "meson", result); } END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Baryon_hpp_ +#endif // Hadrons_MContraction_Baryon_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp index 4ad12e90..4f782cd3 100644 --- a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp +++ b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_DiscLoop_hpp_ -#define Hadrons_DiscLoop_hpp_ +#ifndef Hadrons_MContraction_DiscLoop_hpp_ +#define Hadrons_MContraction_DiscLoop_hpp_ #include #include @@ -52,7 +52,7 @@ public: template class TDiscLoop: public Module { - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); class Result: Serializable { public: @@ -141,4 +141,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_DiscLoop_hpp_ +#endif // Hadrons_MContraction_DiscLoop_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp index e5e73fa6..7f643d49 100644 --- a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp +++ b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Gamma3pt_hpp_ -#define Hadrons_Gamma3pt_hpp_ +#ifndef Hadrons_MContraction_Gamma3pt_hpp_ +#define Hadrons_MContraction_Gamma3pt_hpp_ #include #include @@ -72,9 +72,9 @@ public: template class TGamma3pt: public Module { - TYPE_ALIASES(FImpl1, 1); - TYPE_ALIASES(FImpl2, 2); - TYPE_ALIASES(FImpl3, 3); + FERM_TYPE_ALIASES(FImpl1, 1); + FERM_TYPE_ALIASES(FImpl2, 2); + FERM_TYPE_ALIASES(FImpl3, 3); class Result: Serializable { public: @@ -167,4 +167,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Gamma3pt_hpp_ +#endif // Hadrons_MContraction_Gamma3pt_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 09c2a6e1..7810326a 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -29,8 +29,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Meson_hpp_ -#define Hadrons_Meson_hpp_ +#ifndef Hadrons_MContraction_Meson_hpp_ +#define Hadrons_MContraction_Meson_hpp_ #include #include @@ -69,7 +69,7 @@ public: std::string, q1, std::string, q2, std::string, gammas, - std::string, mom, + std::string, sink, std::string, output); }; @@ -77,8 +77,10 @@ template class TMeson: public Module { public: - TYPE_ALIASES(FImpl1, 1); - TYPE_ALIASES(FImpl2, 2); + FERM_TYPE_ALIASES(FImpl1, 1); + FERM_TYPE_ALIASES(FImpl2, 2); + FERM_TYPE_ALIASES(ScalarImplCR, Scalar); + SINK_TYPE_ALIASES(Scalar); class Result: Serializable { public: @@ -115,7 +117,7 @@ TMeson::TMeson(const std::string name) template std::vector TMeson::getInput(void) { - std::vector input = {par().q1, par().q2}; + std::vector input = {par().q1, par().q2, par().sink}; return input; } @@ -154,6 +156,9 @@ void TMeson::parseGammaString(std::vector &gammaList) // execution /////////////////////////////////////////////////////////////////// +#define mesonConnected(q1, q2, gSnk, gSrc) \ +(g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2) + template void TMeson::execute(void) { @@ -161,43 +166,72 @@ void TMeson::execute(void) << " quarks '" << par().q1 << "' and '" << par().q2 << "'" << std::endl; - CorrWriter writer(par().output); - PropagatorField1 &q1 = *env().template getObject(par().q1); - PropagatorField2 &q2 = *env().template getObject(par().q2); - LatticeComplex c(env().getGrid()); - Gamma g5(Gamma::Algebra::Gamma5); - std::vector gammaList; + CorrWriter writer(par().output); std::vector buf; std::vector result; - std::vector p; - - p = strToVec(par().mom); - LatticeComplex ph(env().getGrid()), coor(env().getGrid()); - Complex i(0.0,1.0); - ph = zero; - for(unsigned int mu = 0; mu < env().getNd(); mu++) - { - LatticeCoordinate(coor, mu); - ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); - } - ph = exp((Real)(2*M_PI)*i*ph); + Gamma g5(Gamma::Algebra::Gamma5); + std::vector gammaList; + int nt = env().getDim(Tp); parseGammaString(gammaList); - result.resize(gammaList.size()); for (unsigned int i = 0; i < result.size(); ++i) { - Gamma gSnk(gammaList[i].first); - Gamma gSrc(gammaList[i].second); - c = trace((g5*gSnk)*q1*(adj(gSrc)*g5)*adj(q2))*ph; - sliceSum(c, buf, Tp); - result[i].gamma_snk = gammaList[i].first; result[i].gamma_src = gammaList[i].second; - result[i].corr.resize(buf.size()); - for (unsigned int t = 0; t < buf.size(); ++t) + result[i].corr.resize(nt); + } + if (env().template isObjectOfType(par().q1) and + env().template isObjectOfType(par().q2)) + { + SlicedPropagator1 &q1 = *env().template getObject(par().q1); + SlicedPropagator2 &q2 = *env().template getObject(par().q2); + + LOG(Message) << "(propagator already sinked)" << std::endl; + for (unsigned int i = 0; i < result.size(); ++i) { - result[i].corr[t] = TensorRemove(buf[t]); + Gamma gSnk(gammaList[i].first); + Gamma gSrc(gammaList[i].second); + + for (unsigned int t = 0; t < buf.size(); ++t) + { + result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc))); + } + } + } + else + { + PropagatorField1 &q1 = *env().template getObject(par().q1); + PropagatorField2 &q2 = *env().template getObject(par().q2); + LatticeComplex c(env().getGrid()); + + LOG(Message) << "(using sink '" << par().sink << "')" << std::endl; + for (unsigned int i = 0; i < result.size(); ++i) + { + Gamma gSnk(gammaList[i].first); + Gamma gSrc(gammaList[i].second); + std::string ns; + + ns = env().getModuleNamespace(env().getObjectModule(par().sink)); + if (ns == "MSource") + { + PropagatorField1 &sink = + *env().template getObject(par().sink); + + c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink); + sliceSum(c, buf, Tp); + } + else if (ns == "MSink") + { + SinkFnScalar &sink = *env().template getObject(par().sink); + + c = trace(mesonConnected(q1, q2, gSnk, gSrc)); + buf = sink(c); + } + for (unsigned int t = 0; t < buf.size(); ++t) + { + result[i].corr[t] = TensorRemove(buf[t]); + } } } write(writer, "meson", result); @@ -207,4 +241,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Meson_hpp_ +#endif // Hadrons_MContraction_Meson_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp index 23482feb..0a3c2e31 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_WeakHamiltonian_hpp_ -#define Hadrons_WeakHamiltonian_hpp_ +#ifndef Hadrons_MContraction_WeakHamiltonian_hpp_ +#define Hadrons_MContraction_WeakHamiltonian_hpp_ #include #include @@ -83,7 +83,7 @@ public: class T##modname: public Module\ {\ public:\ - TYPE_ALIASES(FIMPL,)\ + FERM_TYPE_ALIASES(FIMPL,)\ class Result: Serializable\ {\ public:\ @@ -111,4 +111,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_WeakHamiltonian_hpp_ +#endif // Hadrons_MContraction_WeakHamiltonian_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp index 2ee87895..3a2b9309 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_WeakHamiltonianEye_hpp_ -#define Hadrons_WeakHamiltonianEye_hpp_ +#ifndef Hadrons_MContraction_WeakHamiltonianEye_hpp_ +#define Hadrons_MContraction_WeakHamiltonianEye_hpp_ #include @@ -55,4 +55,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_WeakHamiltonianEye_hpp_ +#endif // Hadrons_MContraction_WeakHamiltonianEye_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp index 69bb8005..eb5abe3c 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_WeakHamiltonianNonEye_hpp_ -#define Hadrons_WeakHamiltonianNonEye_hpp_ +#ifndef Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ +#define Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ #include @@ -54,4 +54,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_WeakHamiltonianNonEye_hpp_ +#endif // Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp index c0d8f829..f26d4636 100644 --- a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_WeakNeutral4ptDisc_hpp_ -#define Hadrons_WeakNeutral4ptDisc_hpp_ +#ifndef Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ +#define Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ #include @@ -56,4 +56,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_WeakNeutral4ptDisc_hpp_ +#endif // Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ diff --git a/extras/Hadrons/Modules/MGauge/Load.hpp b/extras/Hadrons/Modules/MGauge/Load.hpp index c41f9b8c..5ff6da0f 100644 --- a/extras/Hadrons/Modules/MGauge/Load.hpp +++ b/extras/Hadrons/Modules/MGauge/Load.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Load_hpp_ -#define Hadrons_Load_hpp_ +#ifndef Hadrons_MGauge_Load_hpp_ +#define Hadrons_MGauge_Load_hpp_ #include #include @@ -70,4 +70,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Load_hpp_ +#endif // Hadrons_MGauge_Load_hpp_ diff --git a/extras/Hadrons/Modules/MGauge/Random.hpp b/extras/Hadrons/Modules/MGauge/Random.hpp index e3fbcf1a..a97d25cf 100644 --- a/extras/Hadrons/Modules/MGauge/Random.hpp +++ b/extras/Hadrons/Modules/MGauge/Random.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Random_hpp_ -#define Hadrons_Random_hpp_ +#ifndef Hadrons_MGauge_Random_hpp_ +#define Hadrons_MGauge_Random_hpp_ #include #include @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Random_hpp_ +#endif // Hadrons_MGauge_Random_hpp_ diff --git a/extras/Hadrons/Modules/MGauge/StochEm.hpp b/extras/Hadrons/Modules/MGauge/StochEm.hpp index 50a77435..12ce9fdc 100644 --- a/extras/Hadrons/Modules/MGauge/StochEm.hpp +++ b/extras/Hadrons/Modules/MGauge/StochEm.hpp @@ -25,8 +25,8 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_StochEm_hpp_ -#define Hadrons_StochEm_hpp_ +#ifndef Hadrons_MGauge_StochEm_hpp_ +#define Hadrons_MGauge_StochEm_hpp_ #include #include @@ -72,4 +72,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_StochEm_hpp_ +#endif // Hadrons_MGauge_StochEm_hpp_ diff --git a/extras/Hadrons/Modules/MGauge/Unit.hpp b/extras/Hadrons/Modules/MGauge/Unit.hpp index 2ff10bfd..7cd15ef7 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.hpp +++ b/extras/Hadrons/Modules/MGauge/Unit.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Unit_hpp_ -#define Hadrons_Unit_hpp_ +#ifndef Hadrons_MGauge_Unit_hpp_ +#define Hadrons_MGauge_Unit_hpp_ #include #include @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Unit_hpp_ +#endif // Hadrons_MGauge_Unit_hpp_ diff --git a/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp b/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp index 3d2850d1..5d2c4a13 100644 --- a/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp +++ b/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_NoiseLoop_hpp_ -#define Hadrons_NoiseLoop_hpp_ +#ifndef Hadrons_MLoop_NoiseLoop_hpp_ +#define Hadrons_MLoop_NoiseLoop_hpp_ #include #include @@ -65,7 +65,7 @@ template class TNoiseLoop: public Module { public: - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); public: // constructor TNoiseLoop(const std::string name); @@ -129,4 +129,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_NoiseLoop_hpp_ +#endif // Hadrons_MLoop_NoiseLoop_hpp_ diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp index 8bb5faa0..fbe75c05 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp @@ -1,5 +1,5 @@ -#ifndef Hadrons_ChargedProp_hpp_ -#define Hadrons_ChargedProp_hpp_ +#ifndef Hadrons_MScalar_ChargedProp_hpp_ +#define Hadrons_MScalar_ChargedProp_hpp_ #include #include @@ -58,4 +58,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_ChargedProp_hpp_ +#endif // Hadrons_MScalar_ChargedProp_hpp_ diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.hpp b/extras/Hadrons/Modules/MScalar/FreeProp.hpp index 29f15eda..97cf288a 100644 --- a/extras/Hadrons/Modules/MScalar/FreeProp.hpp +++ b/extras/Hadrons/Modules/MScalar/FreeProp.hpp @@ -1,5 +1,5 @@ -#ifndef Hadrons_FreeProp_hpp_ -#define Hadrons_FreeProp_hpp_ +#ifndef Hadrons_MScalar_FreeProp_hpp_ +#define Hadrons_MScalar_FreeProp_hpp_ #include #include @@ -47,4 +47,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_FreeProp_hpp_ +#endif // Hadrons_MScalar_FreeProp_hpp_ diff --git a/extras/Hadrons/Modules/MSink/Point.hpp b/extras/Hadrons/Modules/MSink/Point.hpp new file mode 100644 index 00000000..7b3aa9de --- /dev/null +++ b/extras/Hadrons/Modules/MSink/Point.hpp @@ -0,0 +1,114 @@ +#ifndef Hadrons_MSink_Point_hpp_ +#define Hadrons_MSink_Point_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * Point * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MSink) + +class PointPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar, + std::string, mom); +}; + +template +class TPoint: public Module +{ +public: + FERM_TYPE_ALIASES(FImpl,); + SINK_TYPE_ALIASES(); +public: + // constructor + TPoint(const std::string name); + // destructor + virtual ~TPoint(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(Point, TPoint, MSink); +MODULE_REGISTER_NS(ScalarPoint, TPoint, MSink); + +/****************************************************************************** + * TPoint implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TPoint::TPoint(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TPoint::getInput(void) +{ + std::vector in; + + return in; +} + +template +std::vector TPoint::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TPoint::setup(void) +{ + unsigned int size; + + size = env().template lattice4dSize(); + env().registerObject(getName(), size); +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TPoint::execute(void) +{ + std::vector p = strToVec(par().mom); + LatticeComplex ph(env().getGrid()), coor(env().getGrid()); + Complex i(0.0,1.0); + + LOG(Message) << "Setting up point sink function for momentum [" + << par().mom << "]" << std::endl; + ph = zero; + for(unsigned int mu = 0; mu < env().getNd(); mu++) + { + LatticeCoordinate(coor, mu); + ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor; + } + ph = exp((Real)(2*M_PI)*i*ph); + auto sink = [ph](const PropagatorField &field) + { + SlicedPropagator res; + PropagatorField tmp = ph*field; + + sliceSum(tmp, res, Tp); + + return res; + }; + env().setObject(getName(), new SinkFn(sink)); +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MSink_Point_hpp_ diff --git a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp index d7220271..b1f63a5d 100644 --- a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp +++ b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_RBPrecCG_hpp_ -#define Hadrons_RBPrecCG_hpp_ +#ifndef Hadrons_MSolver_RBPrecCG_hpp_ +#define Hadrons_MSolver_RBPrecCG_hpp_ #include #include @@ -53,7 +53,7 @@ template class TRBPrecCG: public Module { public: - TYPE_ALIASES(FImpl,); + FGS_TYPE_ALIASES(FImpl,); public: // constructor TRBPrecCG(const std::string name); @@ -129,4 +129,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_RBPrecCG_hpp_ +#endif // Hadrons_MSolver_RBPrecCG_hpp_ diff --git a/extras/Hadrons/Modules/MSource/Point.hpp b/extras/Hadrons/Modules/MSource/Point.hpp index 3c0fc9a1..0c415807 100644 --- a/extras/Hadrons/Modules/MSource/Point.hpp +++ b/extras/Hadrons/Modules/MSource/Point.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Point_hpp_ -#define Hadrons_Point_hpp_ +#ifndef Hadrons_MSource_Point_hpp_ +#define Hadrons_MSource_Point_hpp_ #include #include @@ -133,4 +133,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Point_hpp_ +#endif // Hadrons_MSource_Point_hpp_ diff --git a/extras/Hadrons/Modules/MSource/SeqGamma.hpp b/extras/Hadrons/Modules/MSource/SeqGamma.hpp index 366ebee7..e2129a46 100644 --- a/extras/Hadrons/Modules/MSource/SeqGamma.hpp +++ b/extras/Hadrons/Modules/MSource/SeqGamma.hpp @@ -28,8 +28,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_SeqGamma_hpp_ -#define Hadrons_SeqGamma_hpp_ +#ifndef Hadrons_MSource_SeqGamma_hpp_ +#define Hadrons_MSource_SeqGamma_hpp_ #include #include @@ -72,7 +72,7 @@ template class TSeqGamma: public Module { public: - TYPE_ALIASES(FImpl,); + FGS_TYPE_ALIASES(FImpl,); public: // constructor TSeqGamma(const std::string name); @@ -161,4 +161,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_SeqGamma_hpp_ +#endif // Hadrons_MSource_SeqGamma_hpp_ diff --git a/extras/Hadrons/Modules/MSource/Wall.hpp b/extras/Hadrons/Modules/MSource/Wall.hpp index 8722876f..4de37e4d 100644 --- a/extras/Hadrons/Modules/MSource/Wall.hpp +++ b/extras/Hadrons/Modules/MSource/Wall.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_WallSource_hpp_ -#define Hadrons_WallSource_hpp_ +#ifndef Hadrons_MSource_WallSource_hpp_ +#define Hadrons_MSource_WallSource_hpp_ #include #include @@ -64,7 +64,7 @@ template class TWall: public Module { public: - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); public: // constructor TWall(const std::string name); @@ -144,4 +144,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_WallSource_hpp_ +#endif // Hadrons_MSource_WallSource_hpp_ diff --git a/extras/Hadrons/Modules/MSource/Z2.hpp b/extras/Hadrons/Modules/MSource/Z2.hpp index 761ae139..a7f7a3e6 100644 --- a/extras/Hadrons/Modules/MSource/Z2.hpp +++ b/extras/Hadrons/Modules/MSource/Z2.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Z2_hpp_ -#define Hadrons_Z2_hpp_ +#ifndef Hadrons_MSource_Z2_hpp_ +#define Hadrons_MSource_Z2_hpp_ #include #include @@ -149,4 +149,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Z2_hpp_ +#endif // Hadrons_MSource_Z2_hpp_ diff --git a/extras/Hadrons/Modules/Quark.hpp b/extras/Hadrons/Modules/Quark.hpp index c0d1f65a..cf7d4c28 100644 --- a/extras/Hadrons/Modules/Quark.hpp +++ b/extras/Hadrons/Modules/Quark.hpp @@ -51,7 +51,7 @@ template class TQuark: public Module { public: - TYPE_ALIASES(FImpl,); + FGS_TYPE_ALIASES(FImpl,); public: // constructor TQuark(const std::string name); diff --git a/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template b/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template index ece2bb58..ea77b12a 100644 --- a/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template +++ b/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template @@ -1,5 +1,5 @@ -#ifndef Hadrons____FILEBASENAME____hpp_ -#define Hadrons____FILEBASENAME____hpp_ +#ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ +#define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ #include #include @@ -41,4 +41,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons____FILEBASENAME____hpp_ +#endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ diff --git a/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template b/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template index a330652d..b79c0ad3 100644 --- a/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template +++ b/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template @@ -1,5 +1,5 @@ -#ifndef Hadrons____FILEBASENAME____hpp_ -#define Hadrons____FILEBASENAME____hpp_ +#ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ +#define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ #include #include @@ -82,4 +82,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons____FILEBASENAME____hpp_ +#endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 3cf69144..f51ede5a 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -28,6 +28,7 @@ modules_hpp =\ Modules/MScalar/ChargedProp.hpp \ Modules/MScalar/FreeProp.hpp \ Modules/MScalar/Scalar.hpp \ + Modules/MSink/Point.hpp \ Modules/MSolver/RBPrecCG.hpp \ Modules/MSource/Point.hpp \ Modules/MSource/SeqGamma.hpp \ diff --git a/tests/hadrons/Test_hadrons_spectrum.cc b/tests/hadrons/Test_hadrons_spectrum.cc index 55f3346e..8f7b30c8 100644 --- a/tests/hadrons/Test_hadrons_spectrum.cc +++ b/tests/hadrons/Test_hadrons_spectrum.cc @@ -63,6 +63,10 @@ int main(int argc, char *argv[]) MSource::Point::Par ptPar; ptPar.position = "0 0 0 0"; application.createModule("pt", ptPar); + // sink + MSink::Point::Par sinkPar; + sinkPar.mom = "0 0 0"; + application.createModule("sink", sinkPar); // set fermion boundary conditions to be periodic space, antiperiodic time. std::string boundary = "1 1 1 -1"; @@ -98,19 +102,19 @@ int main(int argc, char *argv[]) { MContraction::Meson::Par mesPar; - mesPar.output = "mesons/pt_" + flavour[i] + flavour[j]; - mesPar.q1 = "Qpt_" + flavour[i]; - mesPar.q2 = "Qpt_" + flavour[j]; - mesPar.gammas = "all"; - mesPar.mom = "0. 0. 0. 0."; + mesPar.output = "mesons/pt_" + flavour[i] + flavour[j]; + mesPar.q1 = "Qpt_" + flavour[i]; + mesPar.q2 = "Qpt_" + flavour[j]; + mesPar.gammas = "all"; + mesPar.sink = "sink"; application.createModule("meson_pt_" + flavour[i] + flavour[j], mesPar); - mesPar.output = "mesons/Z2_" + flavour[i] + flavour[j]; - mesPar.q1 = "QZ2_" + flavour[i]; - mesPar.q2 = "QZ2_" + flavour[j]; - mesPar.gammas = "all"; - mesPar.mom = "0. 0. 0. 0."; + mesPar.output = "mesons/Z2_" + flavour[i] + flavour[j]; + mesPar.q1 = "QZ2_" + flavour[i]; + mesPar.q2 = "QZ2_" + flavour[j]; + mesPar.gammas = "all"; + mesPar.sink = "sink"; application.createModule("meson_Z2_" + flavour[i] + flavour[j], mesPar); From 5f55bca378f0e379b8595a82d096e79e8a7ed92d Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 7 Jun 2017 20:10:48 -0500 Subject: [PATCH 062/177] Hadrons: Quark module renamed MFermion::GaugeProp --- extras/Hadrons/Modules.hpp | 2 +- .../{Quark.hpp => MFermion/GaugeProp.hpp} | 79 +++++++------------ extras/Hadrons/modules.inc | 4 +- tests/hadrons/Test_hadrons_meson_3pt.cc | 14 ++-- tests/hadrons/Test_hadrons_spectrum.cc | 6 +- 5 files changed, 42 insertions(+), 63 deletions(-) rename extras/Hadrons/Modules/{Quark.hpp => MFermion/GaugeProp.hpp} (65%) diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 42a1f651..c27254aa 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -22,4 +23,3 @@ #include #include #include -#include diff --git a/extras/Hadrons/Modules/Quark.hpp b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp similarity index 65% rename from extras/Hadrons/Modules/Quark.hpp rename to extras/Hadrons/Modules/MFermion/GaugeProp.hpp index cf7d4c28..b4f9edcc 100644 --- a/extras/Hadrons/Modules/Quark.hpp +++ b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp @@ -1,34 +1,5 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: extras/Hadrons/Modules/Quark.hpp - -Copyright (C) 2015 -Copyright (C) 2016 - -Author: Antonin Portelli - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#ifndef Hadrons_Quark_hpp_ -#define Hadrons_Quark_hpp_ +#ifndef Hadrons_MFermion_GaugeProp_hpp_ +#define Hadrons_MFermion_GaugeProp_hpp_ #include #include @@ -37,27 +8,29 @@ See the full license in the file "LICENSE" in the top level distribution directo BEGIN_HADRONS_NAMESPACE /****************************************************************************** - * TQuark * + * GaugeProp * ******************************************************************************/ -class QuarkPar: Serializable +BEGIN_MODULE_NAMESPACE(MFermion) + +class GaugePropPar: Serializable { public: - GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar, + GRID_SERIALIZABLE_CLASS_MEMBERS(GaugePropPar, std::string, source, std::string, solver); }; template -class TQuark: public Module +class TGaugeProp: public Module { public: FGS_TYPE_ALIASES(FImpl,); public: // constructor - TQuark(const std::string name); + TGaugeProp(const std::string name); // destructor - virtual ~TQuark(void) = default; - // dependencies/products + virtual ~TGaugeProp(void) = default; + // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); // setup @@ -69,20 +42,20 @@ private: SolverFn *solver_{nullptr}; }; -MODULE_REGISTER(Quark, TQuark); +MODULE_REGISTER_NS(GaugeProp, TGaugeProp, MFermion); /****************************************************************************** - * TQuark implementation * + * TGaugeProp implementation * ******************************************************************************/ // constructor ///////////////////////////////////////////////////////////////// template -TQuark::TQuark(const std::string name) -: Module(name) +TGaugeProp::TGaugeProp(const std::string name) +: Module(name) {} // dependencies/products /////////////////////////////////////////////////////// template -std::vector TQuark::getInput(void) +std::vector TGaugeProp::getInput(void) { std::vector in = {par().source, par().solver}; @@ -90,7 +63,7 @@ std::vector TQuark::getInput(void) } template -std::vector TQuark::getOutput(void) +std::vector TGaugeProp::getOutput(void) { std::vector out = {getName(), getName() + "_5d"}; @@ -99,7 +72,7 @@ std::vector TQuark::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// template -void TQuark::setup(void) +void TGaugeProp::setup(void) { Ls_ = env().getObjectLs(par().solver); env().template registerLattice(getName()); @@ -111,13 +84,13 @@ void TQuark::setup(void) // execution /////////////////////////////////////////////////////////////////// template -void TQuark::execute(void) +void TGaugeProp::execute(void) { LOG(Message) << "Computing quark propagator '" << getName() << "'" - << std::endl; + << std::endl; FermionField source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)), - tmp(env().getGrid()); + tmp(env().getGrid()); std::string propName = (Ls_ == 1) ? getName() : (getName() + "_5d"); PropagatorField &prop = *env().template createLattice(propName); PropagatorField &fullSrc = *env().template getObject(par().source); @@ -128,12 +101,12 @@ void TQuark::execute(void) } LOG(Message) << "Inverting using solver '" << par().solver - << "' on source '" << par().source << "'" << std::endl; + << "' on source '" << par().source << "'" << std::endl; for (unsigned int s = 0; s < Ns; ++s) for (unsigned int c = 0; c < Nc; ++c) { LOG(Message) << "Inversion for spin= " << s << ", color= " << c - << std::endl; + << std::endl; // source conversion for 4D sources if (!env().isObject5d(par().source)) { @@ -170,7 +143,7 @@ void TQuark::execute(void) if (Ls_ > 1) { PropagatorField &p4d = - *env().template getObject(getName()); + *env().template getObject(getName()); axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); @@ -180,6 +153,8 @@ void TQuark::execute(void) } } +END_MODULE_NAMESPACE + END_HADRONS_NAMESPACE -#endif // Hadrons_Quark_hpp_ +#endif // Hadrons_MFermion_GaugeProp_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index f51ede5a..669b08ba 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -20,6 +20,7 @@ modules_hpp =\ Modules/MContraction/WeakHamiltonianEye.hpp \ Modules/MContraction/WeakHamiltonianNonEye.hpp \ Modules/MContraction/WeakNeutral4ptDisc.hpp \ + Modules/MFermion/GaugeProp.hpp \ Modules/MGauge/Load.hpp \ Modules/MGauge/Random.hpp \ Modules/MGauge/StochEm.hpp \ @@ -33,6 +34,5 @@ modules_hpp =\ Modules/MSource/Point.hpp \ Modules/MSource/SeqGamma.hpp \ Modules/MSource/Wall.hpp \ - Modules/MSource/Z2.hpp \ - Modules/Quark.hpp + Modules/MSource/Z2.hpp diff --git a/tests/hadrons/Test_hadrons_meson_3pt.cc b/tests/hadrons/Test_hadrons_meson_3pt.cc index 7e487153..382c39d4 100644 --- a/tests/hadrons/Test_hadrons_meson_3pt.cc +++ b/tests/hadrons/Test_hadrons_meson_3pt.cc @@ -65,6 +65,10 @@ int main(int argc, char *argv[]) // set fermion boundary conditions to be periodic space, antiperiodic time. std::string boundary = "1 1 1 -1"; + // sink + MSink::Point::Par sinkPar; + sinkPar.mom = "0 0 0"; + application.createModule("sink", sinkPar); for (unsigned int i = 0; i < flavour.size(); ++i) { // actions @@ -115,15 +119,15 @@ int main(int argc, char *argv[]) } // propagators - Quark::Par quarkPar; + MFermion::GaugeProp::Par quarkPar; quarkPar.solver = "CG_" + flavour[i]; quarkPar.source = srcName; - application.createModule(qName[i], quarkPar); + application.createModule(qName[i], quarkPar); for (unsigned int mu = 0; mu < Nd; ++mu) { quarkPar.source = seqName[i][mu]; seqName[i][mu] = "Q_" + flavour[i] + "-" + seqName[i][mu]; - application.createModule(seqName[i][mu], quarkPar); + application.createModule(seqName[i][mu], quarkPar); } } @@ -136,7 +140,7 @@ int main(int argc, char *argv[]) mesPar.q1 = qName[i]; mesPar.q2 = qName[j]; mesPar.gammas = "all"; - mesPar.mom = "0. 0. 0. 0."; + mesPar.sink = "sink"; application.createModule("meson_Z2_" + std::to_string(t) + "_" @@ -155,7 +159,7 @@ int main(int argc, char *argv[]) mesPar.q1 = qName[i]; mesPar.q2 = seqName[j][mu]; mesPar.gammas = "all"; - mesPar.mom = "0. 0. 0. 0."; + mesPar.sink = "sink"; application.createModule("3pt_Z2_" + std::to_string(t) + "_" diff --git a/tests/hadrons/Test_hadrons_spectrum.cc b/tests/hadrons/Test_hadrons_spectrum.cc index 8f7b30c8..801674f7 100644 --- a/tests/hadrons/Test_hadrons_spectrum.cc +++ b/tests/hadrons/Test_hadrons_spectrum.cc @@ -90,12 +90,12 @@ int main(int argc, char *argv[]) solverPar); // propagators - Quark::Par quarkPar; + MFermion::GaugeProp::Par quarkPar; quarkPar.solver = "CG_" + flavour[i]; quarkPar.source = "pt"; - application.createModule("Qpt_" + flavour[i], quarkPar); + application.createModule("Qpt_" + flavour[i], quarkPar); quarkPar.source = "z2"; - application.createModule("QZ2_" + flavour[i], quarkPar); + application.createModule("QZ2_" + flavour[i], quarkPar); } for (unsigned int i = 0; i < flavour.size(); ++i) for (unsigned int j = i; j < flavour.size(); ++j) From 24908162970faae02a878ce3298d3ebc79a47fb9 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 7 Jun 2017 20:11:02 -0500 Subject: [PATCH 063/177] Hadrons: rare kaon program removed --- tests/hadrons/Test_hadrons.hpp | 368 ------------------------- tests/hadrons/Test_hadrons_rarekaon.cc | 342 ----------------------- 2 files changed, 710 deletions(-) delete mode 100644 tests/hadrons/Test_hadrons.hpp delete mode 100644 tests/hadrons/Test_hadrons_rarekaon.cc diff --git a/tests/hadrons/Test_hadrons.hpp b/tests/hadrons/Test_hadrons.hpp deleted file mode 100644 index 26d02a5c..00000000 --- a/tests/hadrons/Test_hadrons.hpp +++ /dev/null @@ -1,368 +0,0 @@ -/******************************************************************************* - Grid physics library, www.github.com/paboyle/Grid - - Source file: tests/hadrons/Test_hadrons.hpp - - Copyright (C) 2017 - - Author: Andrew Lawson - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution - directory. - *******************************************************************************/ - -#include - -using namespace Grid; -using namespace Hadrons; - -/******************************************************************************* - * Macros to reduce code duplication. - ******************************************************************************/ -// Useful definitions -#define ZERO_MOM "0. 0. 0. 0." -#define INIT_INDEX(s, n) (std::string(s) + "_" + std::to_string(n)) -#define ADD_INDEX(s, n) (s + "_" + std::to_string(n)) -#define LABEL_3PT(s, t1, t2) ADD_INDEX(INIT_INDEX(s, t1), t2) -#define LABEL_4PT(s, t1, t2, t3) ADD_INDEX(ADD_INDEX(INIT_INDEX(s, t1), t2), t3) -#define LABEL_4PT_NOISE(s, t1, t2, t3, nn) ADD_INDEX(ADD_INDEX(ADD_INDEX(INIT_INDEX(s, t1), t2), t3), nn) - -// Wall source/sink macros -#define NAME_3MOM_WALL_SOURCE(t, mom) ("wall_" + std::to_string(t) + "_" + mom) -#define NAME_WALL_SOURCE(t) NAME_3MOM_WALL_SOURCE(t, ZERO_MOM) -#define NAME_POINT_SOURCE(pos) ("point_" + pos) - -#define MAKE_3MOM_WALL_PROP(tW, mom, propName, solver)\ -{\ - std::string srcName = NAME_3MOM_WALL_SOURCE(tW, mom);\ - makeWallSource(application, srcName, tW, mom);\ - makePropagator(application, propName, srcName, solver);\ -} - -#define MAKE_WALL_PROP(tW, propName, solver)\ - MAKE_3MOM_WALL_PROP(tW, ZERO_MOM, propName, solver) - -// Sequential source macros -#define MAKE_SEQUENTIAL_PROP(tS, qSrc, mom, propName, solver)\ -{\ - std::string srcName = ADD_INDEX(qSrc + "_seq", tS);\ - makeSequentialSource(application, srcName, qSrc, tS, mom);\ - makePropagator(application, propName, srcName, solver);\ -} - -// Point source macros -#define MAKE_POINT_PROP(pos, propName, solver)\ -{\ - std::string srcName = NAME_POINT_SOURCE(pos);\ - makePointSource(application, srcName, pos);\ - makePropagator(application, propName, srcName, solver);\ -} - -/******************************************************************************* - * Functions for propagator construction. - ******************************************************************************/ - -/******************************************************************************* - * Name: makePointSource - * Purpose: Construct point source and add to application module. - * Parameters: application - main application that stores modules. - * srcName - name of source module to create. - * pos - Position of point source. - * Returns: None. - ******************************************************************************/ -inline void makePointSource(Application &application, std::string srcName, - std::string pos) -{ - // If the source already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(srcName))) - { - MSource::Point::Par pointPar; - pointPar.position = pos; - application.createModule(srcName, pointPar); - } -} - -/******************************************************************************* - * Name: makeSequentialSource - * Purpose: Construct sequential source and add to application module. - * Parameters: application - main application that stores modules. - * srcName - name of source module to create. - * qSrc - Input quark for sequential inversion. - * tS - sequential source timeslice. - * mom - momentum insertion (default is zero). - * Returns: None. - ******************************************************************************/ -inline void makeSequentialSource(Application &application, std::string srcName, - std::string qSrc, unsigned int tS, - std::string mom = ZERO_MOM) -{ - // If the source already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(srcName))) - { - MSource::SeqGamma::Par seqPar; - seqPar.q = qSrc; - seqPar.tA = tS; - seqPar.tB = tS; - seqPar.mom = mom; - seqPar.gamma = Gamma::Algebra::GammaT; - application.createModule(srcName, seqPar); - } -} - -/******************************************************************************* - * Name: makeWallSource - * Purpose: Construct wall source and add to application module. - * Parameters: application - main application that stores modules. - * srcName - name of source module to create. - * tW - wall source timeslice. - * mom - momentum insertion (default is zero). - * Returns: None. - ******************************************************************************/ -inline void makeWallSource(Application &application, std::string srcName, - unsigned int tW, std::string mom = ZERO_MOM) -{ - // If the source already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(srcName))) - { - MSource::Wall::Par wallPar; - wallPar.tW = tW; - wallPar.mom = mom; - application.createModule(srcName, wallPar); - } -} - -/******************************************************************************* - * Name: makeWallSink - * Purpose: Wall sink smearing of a propagator. - * Parameters: application - main application that stores modules. - * propName - name of input propagator. - * wallName - name of smeared propagator. - * mom - momentum insertion (default is zero). - * Returns: None. - ******************************************************************************/ -inline void makeWallSink(Application &application, std::string propName, - std::string wallName, std::string mom = ZERO_MOM) -{ - // If the propagator has already been smeared, don't smear it again. - // Temporarily removed, strategy for sink smearing likely to change. - /*if (!(Environment::getInstance().hasModule(wallName))) - { - MSink::Wall::Par wallPar; - wallPar.q = propName; - wallPar.mom = mom; - application.createModule(wallName, wallPar); - }*/ -} - -/******************************************************************************* - * Name: makePropagator - * Purpose: Construct source and propagator then add to application module. - * Parameters: application - main application that stores modules. - * propName - name of propagator module to create. - * srcName - name of source module to use. - * solver - solver to use (default is CG). - * Returns: None. - ******************************************************************************/ -inline void makePropagator(Application &application, std::string &propName, - std::string &srcName, std::string &solver) -{ - // If the propagator already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(propName))) - { - Quark::Par quarkPar; - quarkPar.source = srcName; - quarkPar.solver = solver; - application.createModule(propName, quarkPar); - } -} - -/******************************************************************************* - * Name: makeLoop - * Purpose: Use noise source and inversion result to make loop propagator, then - * add to application module. - * Parameters: application - main application that stores modules. - * propName - name of propagator module to create. - * srcName - name of noise source module to use. - * resName - name of inversion result on given noise source. - * Returns: None. - ******************************************************************************/ -inline void makeLoop(Application &application, std::string &propName, - std::string &srcName, std::string &resName) -{ - // If the loop propagator already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(propName))) - { - MLoop::NoiseLoop::Par loopPar; - loopPar.q = resName; - loopPar.eta = srcName; - application.createModule(propName, loopPar); - } -} - -/******************************************************************************* - * Contraction module creation. - ******************************************************************************/ - -/******************************************************************************* - * Name: mesonContraction - * Purpose: Create meson contraction module and add to application module. - * Parameters: application - main application that stores modules. - * npt - specify n-point correlator (for labelling). - * q1 - quark propagator 1. - * q2 - quark propagator 2. - * label - unique label to construct module name. - * mom - momentum to project (default is zero) - * gammas - gamma insertions at source and sink. - * Returns: None. - ******************************************************************************/ -inline void mesonContraction(Application &application, unsigned int npt, - std::string &q1, std::string &q2, - std::string &label, - std::string mom = ZERO_MOM, - std::string gammas = "") -{ - std::string modName = std::to_string(npt) + "pt_" + label; - if (!(Environment::getInstance().hasModule(modName))) - { - MContraction::Meson::Par mesPar; - mesPar.output = std::to_string(npt) + "pt/" + label; - mesPar.q1 = q1; - mesPar.q2 = q2; - mesPar.mom = mom; - mesPar.gammas = gammas; - application.createModule(modName, mesPar); - } - } - -/******************************************************************************* - * Name: gamma3ptContraction - * Purpose: Create gamma3pt contraction module and add to application module. - * Parameters: application - main application that stores modules. - * npt - specify n-point correlator (for labelling). - * q1 - quark propagator 1. - * q2 - quark propagator 2. - * q3 - quark propagator 3. - * label - unique label to construct module name. - * gamma - gamma insertions between q2 and q3. - * Returns: None. - ******************************************************************************/ -inline void gamma3ptContraction(Application &application, unsigned int npt, - std::string &q1, std::string &q2, - std::string &q3, std::string &label, - Gamma::Algebra gamma = Gamma::Algebra::Identity) -{ - std::string modName = std::to_string(npt) + "pt_" + label; - if (!(Environment::getInstance().hasModule(modName))) - { - MContraction::Gamma3pt::Par gamma3ptPar; - gamma3ptPar.output = std::to_string(npt) + "pt/" + label; - gamma3ptPar.q1 = q1; - gamma3ptPar.q2 = q2; - gamma3ptPar.q3 = q3; - gamma3ptPar.gamma = gamma; - application.createModule(modName, gamma3ptPar); - } - } - -/******************************************************************************* - * Name: weakContraction[Eye,NonEye] - * Purpose: Create Weak Hamiltonian contraction module for Eye/NonEye topology - * and add to application module. - * Parameters: application - main application that stores modules. - * npt - specify n-point correlator (for labelling). - * q1 - quark propagator 1. - * q2 - quark propagator 2. - * q3 - quark propagator 3. - * q4 - quark propagator 4. - * label - unique label to construct module name. - * Returns: None. - ******************************************************************************/ -#define HW_CONTRACTION(top) \ -inline void weakContraction##top(Application &application, unsigned int npt,\ - std::string &q1, std::string &q2, \ - std::string &q3, std::string &q4, \ - std::string &label)\ -{\ - std::string modName = std::to_string(npt) + "pt_" + label;\ - if (!(Environment::getInstance().hasModule(modName)))\ - {\ - MContraction::WeakHamiltonian##top::Par weakPar;\ - weakPar.output = std::to_string(npt) + "pt/" + label;\ - weakPar.q1 = q1;\ - weakPar.q2 = q2;\ - weakPar.q3 = q3;\ - weakPar.q4 = q4;\ - application.createModule(modName, weakPar);\ - }\ -} -HW_CONTRACTION(Eye) // weakContractionEye -HW_CONTRACTION(NonEye) // weakContractionNonEye - -/******************************************************************************* - * Name: disc0Contraction - * Purpose: Create contraction module for 4pt Weak Hamiltonian + current - * disconnected topology for neutral mesons and add to application - * module. - * Parameters: application - main application that stores modules. - * q1 - quark propagator 1. - * q2 - quark propagator 2. - * q3 - quark propagator 3. - * q4 - quark propagator 4. - * label - unique label to construct module name. - * Returns: None. - ******************************************************************************/ -inline void disc0Contraction(Application &application, - std::string &q1, std::string &q2, - std::string &q3, std::string &q4, - std::string &label) -{ - std::string modName = "4pt_" + label; - if (!(Environment::getInstance().hasModule(modName))) - { - MContraction::WeakNeutral4ptDisc::Par disc0Par; - disc0Par.output = "4pt/" + label; - disc0Par.q1 = q1; - disc0Par.q2 = q2; - disc0Par.q3 = q3; - disc0Par.q4 = q4; - application.createModule(modName, disc0Par); - } - } - -/******************************************************************************* - * Name: discLoopContraction - * Purpose: Create contraction module for disconnected loop and add to - * application module. - * Parameters: application - main application that stores modules. - * q_loop - loop quark propagator. - * modName - unique module name. - * gamma - gamma matrix to use in contraction. - * Returns: None. - ******************************************************************************/ -inline void discLoopContraction(Application &application, - std::string &q_loop, std::string &modName, - Gamma::Algebra gamma = Gamma::Algebra::Identity) -{ - if (!(Environment::getInstance().hasModule(modName))) - { - MContraction::DiscLoop::Par discPar; - discPar.output = "disc/" + modName; - discPar.q_loop = q_loop; - discPar.gamma = gamma; - application.createModule(modName, discPar); - } - } diff --git a/tests/hadrons/Test_hadrons_rarekaon.cc b/tests/hadrons/Test_hadrons_rarekaon.cc deleted file mode 100644 index ab4d3ef1..00000000 --- a/tests/hadrons/Test_hadrons_rarekaon.cc +++ /dev/null @@ -1,342 +0,0 @@ -/******************************************************************************* - Grid physics library, www.github.com/paboyle/Grid - - Source file: tests/hadrons/Test_hadrons_rarekaon.cc - - Copyright (C) 2017 - - Author: Andrew Lawson - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution - directory. - *******************************************************************************/ - -#include "Test_hadrons.hpp" - -using namespace Grid; -using namespace Hadrons; - -enum quarks -{ - light = 0, - strange = 1, - charm = 2 -}; - -int main(int argc, char *argv[]) -{ - // parse command line ////////////////////////////////////////////////////// - std::string configStem; - - if (argc < 2) - { - std::cerr << "usage: " << argv[0] << " [Grid options]"; - std::cerr << std::endl; - std::exit(EXIT_FAILURE); - } - configStem = argv[1]; - - // initialization ////////////////////////////////////////////////////////// - Grid_init(&argc, &argv); - HadronsLogError.Active(GridLogError.isActive()); - HadronsLogWarning.Active(GridLogWarning.isActive()); - HadronsLogMessage.Active(GridLogMessage.isActive()); - HadronsLogIterative.Active(GridLogIterative.isActive()); - HadronsLogDebug.Active(GridLogDebug.isActive()); - LOG(Message) << "Grid initialized" << std::endl; - - // run setup /////////////////////////////////////////////////////////////// - Application application; - std::vector mass = {.01, .04, .2}; - std::vector flavour = {"l", "s", "c"}; - std::vector solvers = {"CG_l", "CG_s", "CG_c"}; - std::string kmom = "0. 0. 0. 0."; - std::string pmom = "1. 0. 0. 0."; - std::string qmom = "-1. 0. 0. 0."; - std::string mqmom = "1. 0. 0. 0."; - std::vector tKs = {0}; - unsigned int dt_pi = 16; - std::vector tJs = {8}; - unsigned int n_noise = 1; - unsigned int nt = 32; - bool do_disconnected(false); - - // Global parameters. - Application::GlobalPar globalPar; - globalPar.trajCounter.start = 1500; - globalPar.trajCounter.end = 1520; - globalPar.trajCounter.step = 20; - globalPar.seed = "1 2 3 4"; - globalPar.genetic.maxGen = 1000; - globalPar.genetic.maxCstGen = 200; - globalPar.genetic.popSize = 20; - globalPar.genetic.mutationRate = .1; - application.setPar(globalPar); - - // gauge field - if (configStem == "None") - { - application.createModule("gauge"); - } - else - { - MGauge::Load::Par gaugePar; - gaugePar.file = configStem; - application.createModule("gauge", gaugePar); - } - - // set fermion boundary conditions to be periodic space, antiperiodic time. - std::string boundary = "1 1 1 -1"; - - for (unsigned int i = 0; i < flavour.size(); ++i) - { - // actions - MAction::DWF::Par actionPar; - actionPar.gauge = "gauge"; - actionPar.Ls = 16; - actionPar.M5 = 1.8; - actionPar.mass = mass[i]; - actionPar.boundary = boundary; - application.createModule("DWF_" + flavour[i], actionPar); - - // solvers - // RBPrecCG -> CG - MSolver::RBPrecCG::Par solverPar; - solverPar.action = "DWF_" + flavour[i]; - solverPar.residual = 1.0e-8; - application.createModule(solvers[i], - solverPar); - } - - // Create noise propagators for loops. - std::vector noiseSrcs; - std::vector> noiseRes; - std::vector> noiseProps; - if (n_noise > 0) - { - MSource::Z2::Par noisePar; - noisePar.tA = 0; - noisePar.tB = nt - 1; - std::string loop_stem = "loop_"; - - noiseRes.resize(flavour.size()); - noiseProps.resize(flavour.size()); - for (unsigned int nn = 0; nn < n_noise; ++nn) - { - std::string eta = INIT_INDEX("noise", nn); - application.createModule(eta, noisePar); - noiseSrcs.push_back(eta); - - for (unsigned int f = 0; f < flavour.size(); ++f) - { - std::string loop_prop = INIT_INDEX(loop_stem + flavour[f], nn); - std::string loop_res = loop_prop + "_res"; - makePropagator(application, loop_res, eta, solvers[f]); - makeLoop(application, loop_prop, eta, loop_res); - noiseRes[f].push_back(loop_res); - noiseProps[f].push_back(loop_prop); - } - } - } - - // Translate rare kaon decay across specified timeslices. - for (unsigned int i = 0; i < tKs.size(); ++i) - { - // Zero-momentum wall source propagators for kaon and pion. - unsigned int tK = tKs[i]; - unsigned int tpi = (tK + dt_pi) % nt; - std::string q_Kl_0 = INIT_INDEX("Q_l_0", tK); - std::string q_pil_0 = INIT_INDEX("Q_l_0", tpi); - MAKE_WALL_PROP(tK, q_Kl_0, solvers[light]); - MAKE_WALL_PROP(tpi, q_pil_0, solvers[light]); - - // Wall sources for kaon and pion with momentum insertion. If either - // p or k are zero, or p = k, re-use the existing name to avoid - // duplicating a propagator. - std::string q_Ks_k = INIT_INDEX("Q_Ks_k", tK); - std::string q_Ks_p = INIT_INDEX((kmom == pmom) ? "Q_Ks_k" : "Q_Ks_p", tK); - std::string q_pil_k = INIT_INDEX((kmom == ZERO_MOM) ? "Q_l_0" : "Q_l_k", tpi); - std::string q_pil_p = INIT_INDEX((pmom == kmom) ? q_pil_k : ((pmom == ZERO_MOM) ? "Q_l_0" : "Q_l_p"), tpi); - MAKE_3MOM_WALL_PROP(tK, kmom, q_Ks_k, solvers[strange]); - MAKE_3MOM_WALL_PROP(tK, pmom, q_Ks_p, solvers[strange]); - MAKE_3MOM_WALL_PROP(tpi, kmom, q_pil_k, solvers[light]); - MAKE_3MOM_WALL_PROP(tpi, pmom, q_pil_p, solvers[light]); - - /*********************************************************************** - * CONTRACTIONS: pi and K 2pt contractions with mom = p, k. - **********************************************************************/ - // Wall-Point - std::string PW_K_k = INIT_INDEX("PW_K_k", tK); - std::string PW_K_p = INIT_INDEX("PW_K_p", tK); - std::string PW_pi_k = INIT_INDEX("PW_pi_k", tpi); - std::string PW_pi_p = INIT_INDEX("PW_pi_p", tpi); - mesonContraction(application, 2, q_Kl_0, q_Ks_k, PW_K_k, kmom); - mesonContraction(application, 2, q_Kl_0, q_Ks_p, PW_K_p, pmom); - mesonContraction(application, 2, q_pil_k, q_pil_0, PW_pi_k, kmom); - mesonContraction(application, 2, q_pil_p, q_pil_0, PW_pi_p, pmom); - // Wall-Wall, to be done - requires modification of meson module. - - /*********************************************************************** - * CONTRACTIONS: 3pt Weak Hamiltonian, C & W (non-Eye type) classes. - **********************************************************************/ - std::string HW_CW_k = LABEL_3PT("HW_CW_k", tK, tpi); - std::string HW_CW_p = LABEL_3PT("HW_CW_p", tK, tpi); - weakContractionNonEye(application, 3, q_Kl_0, q_Ks_k, q_pil_k, q_pil_0, HW_CW_k); - weakContractionNonEye(application, 3, q_Kl_0, q_Ks_p, q_pil_p, q_pil_0, HW_CW_p); - - /*********************************************************************** - * CONTRACTIONS: 3pt sd insertion. - **********************************************************************/ - // Note: eventually will use wall sink smeared q_Kl_0 instead. - std::string sd_k = LABEL_3PT("sd_k", tK, tpi); - std::string sd_p = LABEL_3PT("sd_p", tK, tpi); - gamma3ptContraction(application, 3, q_Kl_0, q_Ks_k, q_pil_k, sd_k); - gamma3ptContraction(application, 3, q_Kl_0, q_Ks_p, q_pil_p, sd_p); - - for (unsigned int nn = 0; nn < n_noise; ++nn) - { - /******************************************************************* - * CONTRACTIONS: 3pt Weak Hamiltonian, S and E (Eye type) classes. - ******************************************************************/ - // Note: eventually will use wall sink smeared q_Kl_0 instead. - for (unsigned int f = 0; f < flavour.size(); ++f) - { - if ((f != strange) || do_disconnected) - { - std::string HW_SE_k = LABEL_3PT("HW_SE_k_" + flavour[f], tK, tpi); - std::string HW_SE_p = LABEL_3PT("HW_SE_p_" + flavour[f], tK, tpi); - std::string loop_q = noiseProps[f][nn]; - weakContractionEye(application, 3, q_Kl_0, q_Ks_k, q_pil_k, loop_q, HW_CW_k); - weakContractionEye(application, 3, q_Kl_0, q_Ks_p, q_pil_p, loop_q, HW_CW_p); - } - } - } - - // Perform separate contractions for each t_J position. - for (unsigned int j = 0; j < tJs.size(); ++j) - { - // Sequential sources for current insertions. Local for now, - // gamma_0 only. - unsigned int tJ = (tJs[j] + tK) % nt; - MSource::SeqGamma::Par seqPar; - std::string q_KlCl_q = LABEL_3PT("Q_KlCl_q", tK, tJ); - std::string q_KsCs_mq = LABEL_3PT("Q_KsCs_mq", tK, tJ); - std::string q_pilCl_q = LABEL_3PT("Q_pilCl_q", tpi, tJ); - std::string q_pilCl_mq = LABEL_3PT("Q_pilCl_mq", tpi, tJ); - MAKE_SEQUENTIAL_PROP(tJ, q_Kl_0, qmom, q_KlCl_q, solvers[light]); - MAKE_SEQUENTIAL_PROP(tJ, q_Ks_k, mqmom, q_KsCs_mq, solvers[strange]); - MAKE_SEQUENTIAL_PROP(tJ, q_pil_p, qmom, q_pilCl_q, solvers[light]); - MAKE_SEQUENTIAL_PROP(tJ, q_pil_0, mqmom, q_pilCl_mq, solvers[light]); - - /******************************************************************* - * CONTRACTIONS: pi and K 3pt contractions with current insertion. - ******************************************************************/ - // Wall-Point - std::string C_PW_Kl = LABEL_3PT("C_PW_Kl", tK, tJ); - std::string C_PW_Ksb = LABEL_3PT("C_PW_Ksb", tK, tJ); - std::string C_PW_pilb = LABEL_3PT("C_PW_pilb", tK, tJ); - std::string C_PW_pil = LABEL_3PT("C_PW_pil", tK, tJ); - mesonContraction(application, 3, q_KlCl_q, q_Ks_k, C_PW_Kl, pmom); - mesonContraction(application, 3, q_Kl_0, q_KsCs_mq, C_PW_Ksb, pmom); - mesonContraction(application, 3, q_pil_0, q_pilCl_q, C_PW_pilb, kmom); - mesonContraction(application, 3, q_pilCl_mq, q_pil_p, C_PW_pil, kmom); - // Wall-Wall, to be done. - - /******************************************************************* - * CONTRACTIONS: 4pt contractions, C & W classes. - ******************************************************************/ - std::string CW_Kl = LABEL_4PT("CW_Kl", tK, tJ, tpi); - std::string CW_Ksb = LABEL_4PT("CW_Ksb", tK, tJ, tpi); - std::string CW_pilb = LABEL_4PT("CW_pilb", tK, tJ, tpi); - std::string CW_pil = LABEL_4PT("CW_pil", tK, tJ, tpi); - weakContractionNonEye(application, 4, q_KlCl_q, q_Ks_k, q_pil_p, q_pil_0, CW_Kl); - weakContractionNonEye(application, 4, q_Kl_0, q_KsCs_mq, q_pil_p, q_pil_0, CW_Ksb); - weakContractionNonEye(application, 4, q_Kl_0, q_Ks_k, q_pilCl_q, q_pil_0, CW_pilb); - weakContractionNonEye(application, 4, q_Kl_0, q_Ks_k, q_pil_p, q_pilCl_mq, CW_pil); - - /******************************************************************* - * CONTRACTIONS: 4pt contractions, sd insertions. - ******************************************************************/ - // Note: eventually will use wall sink smeared q_Kl_0/q_KlCl_q instead. - std::string sd_Kl = LABEL_4PT("sd_Kl", tK, tJ, tpi); - std::string sd_Ksb = LABEL_4PT("sd_Ksb", tK, tJ, tpi); - std::string sd_pilb = LABEL_4PT("sd_pilb", tK, tJ, tpi); - gamma3ptContraction(application, 4, q_KlCl_q, q_Ks_k, q_pil_p, sd_Kl); - gamma3ptContraction(application, 4, q_Kl_0, q_KsCs_mq, q_pil_p, sd_Ksb); - gamma3ptContraction(application, 4, q_Kl_0, q_Ks_k, q_pilCl_q, sd_pilb); - - // Sequential sources for each noise propagator. - for (unsigned int nn = 0; nn < n_noise; ++nn) - { - std::string loop_stem = "loop_"; - - // Contraction required for each quark flavour - alternatively - // drop the strange loop if not performing disconnected - // contractions or neglecting H_W operators Q_3 -> Q_10. - for (unsigned int f = 0; f < flavour.size(); ++f) - { - if ((f != strange) || do_disconnected) - { - std::string eta = noiseSrcs[nn]; - std::string loop_q = noiseProps[f][nn]; - std::string loop_qCq = LABEL_3PT(loop_stem + flavour[f], tJ, nn); - std::string loop_qCq_res = loop_qCq + "_res"; - MAKE_SEQUENTIAL_PROP(tJ, noiseRes[f][nn], qmom, - loop_qCq_res, solvers[f]); - makeLoop(application, loop_qCq, eta, loop_qCq_res); - - /******************************************************* - * CONTRACTIONS: 4pt contractions, S & E classes. - ******************************************************/ - // Note: eventually will use wall sink smeared q_Kl_0/q_KlCl_q instead. - std::string SE_Kl = LABEL_4PT_NOISE("SE_Kl", tK, tJ, tpi, nn); - std::string SE_Ksb = LABEL_4PT_NOISE("SE_Ksb", tK, tJ, tpi, nn); - std::string SE_pilb = LABEL_4PT_NOISE("SE_pilb", tK, tJ, tpi, nn); - std::string SE_loop = LABEL_4PT_NOISE("SE_loop", tK, tJ, tpi, nn); - weakContractionEye(application, 4, q_KlCl_q, q_Ks_k, q_pil_p, loop_q, SE_Kl); - weakContractionEye(application, 4, q_Kl_0, q_KsCs_mq, q_pil_p, loop_q, SE_Ksb); - weakContractionEye(application, 4, q_Kl_0, q_Ks_k, q_pilCl_q, loop_q, SE_pilb); - weakContractionEye(application, 4, q_Kl_0, q_Ks_k, q_pil_p, loop_qCq, SE_loop); - - /******************************************************* - * CONTRACTIONS: 4pt contractions, pi0 disconnected - * loop. - ******************************************************/ - std::string disc0 = LABEL_4PT_NOISE("disc0", tK, tJ, tpi, nn); - disc0Contraction(application, q_Kl_0, q_Ks_k, q_pilCl_q, loop_q, disc0); - - /******************************************************* - * CONTRACTIONS: Disconnected loop. - ******************************************************/ - std::string discLoop = "disc_" + loop_qCq; - discLoopContraction(application, loop_qCq, discLoop); - } - } - } - } - } - // execution - std::string par_file_name = "rarekaon_000_100_tK0_tpi16_tJ8_noloop_mc0.2.xml"; - application.saveParameterFile(par_file_name); - application.run(); - - // epilogue - LOG(Message) << "Grid is finalizing now" << std::endl; - Grid_finalize(); - - return EXIT_SUCCESS; -} From 2bc4d0a20ec038786f6544783b368fed3bbfb804 Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Thu, 8 Jun 2017 22:21:25 +0100 Subject: [PATCH 064/177] Move code into utils --- tests/core/Test_fft_gfix.cc | 242 ++++-------------------------------- 1 file changed, 26 insertions(+), 216 deletions(-) diff --git a/tests/core/Test_fft_gfix.cc b/tests/core/Test_fft_gfix.cc index 7938241e..9732eb85 100644 --- a/tests/core/Test_fft_gfix.cc +++ b/tests/core/Test_fft_gfix.cc @@ -28,212 +28,6 @@ Author: Peter Boyle /* END LEGAL */ #include -using namespace Grid; -using namespace Grid::QCD; - -template -class FourierAcceleratedGaugeFixer : public Gimpl { - public: - INHERIT_GIMPL_TYPES(Gimpl); - - typedef typename Gimpl::GaugeLinkField GaugeMat; - typedef typename Gimpl::GaugeField GaugeLorentz; - - static void GaugeLinkToLieAlgebraField(const std::vector &U,std::vector &A) { - for(int mu=0;mu &A,GaugeMat &dmuAmu) { - dmuAmu=zero; - for(int mu=0;mu::avgPlaquette(Umu); - Real org_link_trace=WilsonLoops::linkTrace(Umu); - Real old_trace = org_link_trace; - Real trG; - - std::vector U(Nd,grid); - GaugeMat dmuAmu(grid); - - for(int i=0;i(Umu,mu); - //trG = SteepestDescentStep(U,alpha,dmuAmu); - trG = FourierAccelSteepestDescentStep(U,alpha,dmuAmu); - for(int mu=0;mu(Umu,U[mu],mu); - // Monitor progress and convergence test - // infrequently to minimise cost overhead - if ( i %20 == 0 ) { - Real plaq =WilsonLoops::avgPlaquette(Umu); - Real link_trace=WilsonLoops::linkTrace(Umu); - - std::cout << GridLogMessage << " Iteration "< &U,Real & alpha, GaugeMat & dmuAmu) { - GridBase *grid = U[0]._grid; - - std::vector A(Nd,grid); - GaugeMat g(grid); - - GaugeLinkToLieAlgebraField(U,A); - ExpiAlphaDmuAmu(A,g,alpha,dmuAmu); - - - Real vol = grid->gSites(); - Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; - - SU::GaugeTransform(U,g); - - return trG; - } - - static Real FourierAccelSteepestDescentStep(std::vector &U,Real & alpha, GaugeMat & dmuAmu) { - - GridBase *grid = U[0]._grid; - - Real vol = grid->gSites(); - - FFT theFFT((GridCartesian *)grid); - - LatticeComplex Fp(grid); - LatticeComplex psq(grid); psq=zero; - LatticeComplex pmu(grid); - LatticeComplex one(grid); one = Complex(1.0,0.0); - - GaugeMat g(grid); - GaugeMat dmuAmu_p(grid); - std::vector A(Nd,grid); - - GaugeLinkToLieAlgebraField(U,A); - - DmuAmu(A,dmuAmu); - - theFFT.FFT_all_dim(dmuAmu_p,dmuAmu,FFT::forward); - - ////////////////////////////////// - // Work out Fp = psq_max/ psq... - ////////////////////////////////// - std::vector latt_size = grid->GlobalDimensions(); - std::vector coor(grid->_ndimension,0); - for(int mu=0;mu::taExp(ciadmam,g); - - Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; - - SU::GaugeTransform(U,g); - - return trG; - } - - static void ExpiAlphaDmuAmu(const std::vector &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) { - GridBase *grid = g._grid; - Complex cialpha(0.0,-alpha); - GaugeMat ciadmam(grid); - DmuAmu(A,dmuAmu); - ciadmam = dmuAmu*cialpha; - SU::taExp(ciadmam,g); - } -/* - //////////////////////////////////////////////////////////////// - // NB The FT for fields living on links has an extra phase in it - // Could add these to the FFT class as a later task since this code - // might be reused elsewhere ???? - //////////////////////////////////////////////////////////////// - static void InverseFourierTransformAmu(FFT &theFFT,const std::vector &Ap,std::vector &Ax) { - GridBase * grid = theFFT.Grid(); - std::vector latt_size = grid->GlobalDimensions(); - - ComplexField pmu(grid); - ComplexField pha(grid); - GaugeMat Apha(grid); - - Complex ci(0.0,1.0); - - for(int mu=0;mu &Ax,std::vector &Ap) { - GridBase * grid = theFFT.Grid(); - std::vector latt_size = grid->GlobalDimensions(); - - ComplexField pmu(grid); - ComplexField pha(grid); - Complex ci(0.0,1.0); - - // Sign convention for FFTW calls: - // A(x)= Sum_p e^ipx A(p) / V - // A(p)= Sum_p e^-ipx A(x) - - for(int mu=0;mu seeds({1,2,3,4}); @@ -264,22 +58,24 @@ int main (int argc, char ** argv) std::cout<< "*****************************************************************" <::avgPlaquette(Umu); std::cout << " Initial plaquette "<::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-10, 1.0e-10); + Umu = Urnd; + FourierAcceleratedGaugeFixer::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,false); plaq=WilsonLoops::avgPlaquette(Umu); std::cout << " Final plaquette "<::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,true); - // std::cout<< "*****************************************************************" <::avgPlaquette(Umu); + std::cout << " Final plaquette "<::avgPlaquette(Umu); + std::cout << " Initial plaquette "<::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,true); + + plaq=WilsonLoops::avgPlaquette(Umu); + std::cout << " Final plaquette "< Date: Thu, 8 Jun 2017 22:21:50 +0100 Subject: [PATCH 065/177] Move Gfix into utils --- lib/Grid.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Grid.h b/lib/Grid.h index 543b0330..bf548211 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -41,6 +41,7 @@ Author: paboyle #include #include #include +#include #include #include From 70ab598c96401761996b78f6d0343f16267c6e73 Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Thu, 8 Jun 2017 22:22:23 +0100 Subject: [PATCH 066/177] Move gfix into utils --- lib/qcd/utils/GaugeFix.h | 188 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 lib/qcd/utils/GaugeFix.h diff --git a/lib/qcd/utils/GaugeFix.h b/lib/qcd/utils/GaugeFix.h new file mode 100644 index 00000000..4ff216e4 --- /dev/null +++ b/lib/qcd/utils/GaugeFix.h @@ -0,0 +1,188 @@ + /************************************************************************************* + + grid` physics library, www.github.com/paboyle/Grid + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +//#include + +using namespace Grid; +using namespace Grid::QCD; + +template +class FourierAcceleratedGaugeFixer : public Gimpl { + public: + INHERIT_GIMPL_TYPES(Gimpl); + + typedef typename Gimpl::GaugeLinkField GaugeMat; + typedef typename Gimpl::GaugeField GaugeLorentz; + + static void GaugeLinkToLieAlgebraField(const std::vector &U,std::vector &A) { + for(int mu=0;mu &A,GaugeMat &dmuAmu) { + dmuAmu=zero; + for(int mu=0;mu::avgPlaquette(Umu); + Real org_link_trace=WilsonLoops::linkTrace(Umu); + Real old_trace = org_link_trace; + Real trG; + + std::vector U(Nd,grid); + GaugeMat dmuAmu(grid); + + for(int i=0;i(Umu,mu); + if ( Fourier==false ) { + trG = SteepestDescentStep(U,alpha,dmuAmu); + } else { + trG = FourierAccelSteepestDescentStep(U,alpha,dmuAmu); + } + for(int mu=0;mu(Umu,U[mu],mu); + // Monitor progress and convergence test + // infrequently to minimise cost overhead + if ( i %20 == 0 ) { + Real plaq =WilsonLoops::avgPlaquette(Umu); + Real link_trace=WilsonLoops::linkTrace(Umu); + + if (Fourier) + std::cout << GridLogMessage << "Fourier Iteration "< &U,Real & alpha, GaugeMat & dmuAmu) { + GridBase *grid = U[0]._grid; + + std::vector A(Nd,grid); + GaugeMat g(grid); + + GaugeLinkToLieAlgebraField(U,A); + ExpiAlphaDmuAmu(A,g,alpha,dmuAmu); + + + Real vol = grid->gSites(); + Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; + + SU::GaugeTransform(U,g); + + return trG; + } + + static Real FourierAccelSteepestDescentStep(std::vector &U,Real & alpha, GaugeMat & dmuAmu) { + + GridBase *grid = U[0]._grid; + + Real vol = grid->gSites(); + + FFT theFFT((GridCartesian *)grid); + + LatticeComplex Fp(grid); + LatticeComplex psq(grid); psq=zero; + LatticeComplex pmu(grid); + LatticeComplex one(grid); one = Complex(1.0,0.0); + + GaugeMat g(grid); + GaugeMat dmuAmu_p(grid); + std::vector A(Nd,grid); + + GaugeLinkToLieAlgebraField(U,A); + + DmuAmu(A,dmuAmu); + + theFFT.FFT_all_dim(dmuAmu_p,dmuAmu,FFT::forward); + + ////////////////////////////////// + // Work out Fp = psq_max/ psq... + ////////////////////////////////// + std::vector latt_size = grid->GlobalDimensions(); + std::vector coor(grid->_ndimension,0); + for(int mu=0;mu::taExp(ciadmam,g); + + Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; + + SU::GaugeTransform(U,g); + + return trG; + } + + static void ExpiAlphaDmuAmu(const std::vector &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) { + GridBase *grid = g._grid; + Complex cialpha(0.0,-alpha); + GaugeMat ciadmam(grid); + DmuAmu(A,dmuAmu); + ciadmam = dmuAmu*cialpha; + SU::taExp(ciadmam,g); + } +}; + From 3bfd1f13e67735d2273127eefabafb779c00996d Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 11 Jun 2017 23:14:10 +0100 Subject: [PATCH 067/177] I/O improvements --- benchmarks/Benchmark_memory_bandwidth.cc | 4 +- benchmarks/Benchmark_su3.cc | 4 +- configure.ac | 2 +- extras/Hadrons/Modules/MGauge/Load.cc | 4 +- lib/Grid.h | 1 + lib/GridStd.h | 1 + lib/cartesian/Cartesian_base.h | 9 +- lib/cartesian/Cartesian_full.h | 8 +- lib/cartesian/Cartesian_red_black.h | 4 + lib/communicator/Communicator_base.h | 2 + lib/communicator/Communicator_mpi.cc | 8 + lib/communicator/Communicator_mpi3.cc | 8 + lib/communicator/Communicator_none.cc | 2 + lib/parallelIO/BinaryIO.h | 249 ++++++--- lib/parallelIO/IldgIO.h | 472 ++++++++++++------ lib/parallelIO/IldgIOtypes.h | 110 ++-- lib/parallelIO/NerscIO.h | 301 +++-------- .../hmc/checkpointers/BinaryCheckpointer.h | 38 +- lib/qcd/hmc/checkpointers/ILDGCheckpointer.h | 32 +- lib/qcd/hmc/checkpointers/NerscCheckpointer.h | 2 +- lib/qcd/utils/Utils.h | 3 - lib/serialisation/XmlIO.cc | 58 ++- lib/serialisation/XmlIO.h | 11 +- tests/IO/Test_nersc_io.cc | 4 +- tests/IO/Test_nersc_read.cc | 2 +- tests/IO/Test_serialisation.cc | 19 +- 26 files changed, 779 insertions(+), 579 deletions(-) diff --git a/benchmarks/Benchmark_memory_bandwidth.cc b/benchmarks/Benchmark_memory_bandwidth.cc index d57c4df5..1aa088f8 100644 --- a/benchmarks/Benchmark_memory_bandwidth.cc +++ b/benchmarks/Benchmark_memory_bandwidth.cc @@ -55,8 +55,8 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index 1321715a..3d7f9bc9 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -35,9 +35,9 @@ using namespace Grid::QCD; int main (int argc, char ** argv) { Grid_init(&argc,&argv); -#define LMAX (32) +#define LMAX (64) - int Nloop=200; + int Nloop=20; std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); diff --git a/configure.ac b/configure.ac index 62b7545b..2fc9dfec 100644 --- a/configure.ac +++ b/configure.ac @@ -27,7 +27,7 @@ AX_GXX_VERSION AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], [version of g++ that will compile the code]) -CXXFLAGS="-O3 $CXXFLAGS" +CXXFLAGS="-g $CXXFLAGS" ############### Checks for typedefs, structures, and compiler characteristics diff --git a/extras/Hadrons/Modules/MGauge/Load.cc b/extras/Hadrons/Modules/MGauge/Load.cc index e5ee8abb..062e7e98 100644 --- a/extras/Hadrons/Modules/MGauge/Load.cc +++ b/extras/Hadrons/Modules/MGauge/Load.cc @@ -65,7 +65,7 @@ void TLoad::setup(void) // execution /////////////////////////////////////////////////////////////////// void TLoad::execute(void) { - NerscField header; + FieldMetaData header; std::string fileName = par().file + "." + std::to_string(env().getTrajectory()); @@ -74,5 +74,5 @@ void TLoad::execute(void) LatticeGaugeField &U = *env().createLattice(getName()); NerscIO::readConfiguration(U, header, fileName); LOG(Message) << "NERSC header:" << std::endl; - dump_nersc_header(header, LOG(Message)); + dump_meta_data(header, LOG(Message)); } diff --git a/lib/Grid.h b/lib/Grid.h index 543b0330..ce16894f 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -42,6 +42,7 @@ Author: paboyle #include #include #include +#include #include #endif diff --git a/lib/GridStd.h b/lib/GridStd.h index fb5e5b21..959ba9ac 100644 --- a/lib/GridStd.h +++ b/lib/GridStd.h @@ -18,6 +18,7 @@ #include #include #include +#include /////////////////// // Grid config diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index b31b3b5f..0db6ce0d 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -50,7 +50,6 @@ public: GridBase(const std::vector & processor_grid) : CartesianCommunicator(processor_grid) {}; - // Physics Grid information. std::vector _simd_layout;// Which dimensions get relayed out over simd lanes. std::vector _fdimensions;// (full) Global dimensions of array prior to cb removal @@ -63,13 +62,12 @@ public: int _isites; int _fsites; // _isites*_osites = product(dimensions). int _gsites; - std::vector _slice_block; // subslice information + std::vector _slice_block;// subslice information std::vector _slice_stride; std::vector _slice_nblock; - // Might need these at some point - // std::vector _lstart; // local start of array in gcoors. _processor_coor[d]*_ldimensions[d] - // std::vector _lend; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 + std::vector _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d] + std::vector _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 public: @@ -176,6 +174,7 @@ public: inline int gSites(void) const { return _isites*_osites*_Nprocessors; }; inline int Nd (void) const { return _ndimension;}; + inline const std::vector LocalStarts(void) { return _lstart; }; inline const std::vector &FullDimensions(void) { return _fdimensions;}; inline const std::vector &GlobalDimensions(void) { return _gdimensions;}; inline const std::vector &LocalDimensions(void) { return _ldimensions;}; diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index 7e29d311..b0e47fa4 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -76,6 +76,8 @@ public: _ldimensions.resize(_ndimension); _rdimensions.resize(_ndimension); _simd_layout.resize(_ndimension); + _lstart.resize(_ndimension); + _lend.resize(_ndimension); _ostride.resize(_ndimension); _istride.resize(_ndimension); @@ -94,8 +96,10 @@ public: // Use a reduced simd grid _ldimensions[d]= _gdimensions[d]/_processors[d]; //local dimensions _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition - _osites *= _rdimensions[d]; - _isites *= _simd_layout[d]; + _lstart[d] = _processor_coor[d]*_ldimensions[d]; + _lend[d] = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1; + _osites *= _rdimensions[d]; + _isites *= _simd_layout[d]; // Addressing support if ( d==0 ) { diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index 2f132c19..3037de00 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -151,6 +151,8 @@ public: _ldimensions.resize(_ndimension); _rdimensions.resize(_ndimension); _simd_layout.resize(_ndimension); + _lstart.resize(_ndimension); + _lend.resize(_ndimension); _ostride.resize(_ndimension); _istride.resize(_ndimension); @@ -169,6 +171,8 @@ public: _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard } _ldimensions[d] = _gdimensions[d]/_processors[d]; + _lstart[d] = _processor_coor[d]*_ldimensions[d]; + _lend[d] = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1; // Use a reduced simd grid _simd_layout[d] = simd_layout[d]; diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 23d4f647..12a8429f 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -177,6 +177,8 @@ class CartesianCommunicator { void GlobalSumVector(ComplexF *c,int N); void GlobalSum(ComplexD &c); void GlobalSumVector(ComplexD *c,int N); + void GlobalXOR(uint32_t &); + void GlobalXOR(uint64_t &); template void GlobalSum(obj &o){ typedef typename obj::scalar_type scalar_type; diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 470a06c7..bd2a62fb 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -83,6 +83,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); assert(ierr==0); } +void CartesianCommunicator::GlobalXOR(uint32_t &u){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalXOR(uint64_t &u){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); + assert(ierr==0); +} void CartesianCommunicator::GlobalSum(float &f){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 54a0f9b5..632eb991 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -510,6 +510,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); assert(ierr==0); } +void CartesianCommunicator::GlobalXOR(uint32_t &u){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalXOR(uint64_t &u){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); + assert(ierr==0); +} void CartesianCommunicator::GlobalSum(float &f){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index ace2868b..5319ab93 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -59,6 +59,8 @@ void CartesianCommunicator::GlobalSum(double &){} void CartesianCommunicator::GlobalSum(uint32_t &){} void CartesianCommunicator::GlobalSum(uint64_t &){} void CartesianCommunicator::GlobalSumVector(double *,int N){} +void CartesianCommunicator::GlobalXOR(uint32_t &){} +void CartesianCommunicator::GlobalXOR(uint64_t &){} void CartesianCommunicator::SendRecvPacket(void *xmit, void *recv, diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index 8b8d4165..bc3da38b 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -125,57 +125,94 @@ class BinaryIO { ///////////////////////////////////////////////////////////////////////////// // more byte manipulation helpers ///////////////////////////////////////////////////////////////////////////// - static inline void Uint32Checksum(uint32_t *buf,uint64_t buf_size_bytes,uint32_t &csum) + + template static inline void Uint32Checksum(Lattice &lat, + uint32_t &nersc_csum, + uint32_t &scidac_csuma, + uint32_t &scidac_csumb) + { + typedef typename vobj::scalar_object sobj; + + GridBase *grid = lat._grid; + int lsites = grid->lSites(); + + std::vector scalardata(lsites); + unvectorizeToLexOrdArray(scalardata,lat); + + Uint32Checksum(grid,scalardata,nersc_csum,scidac_csuma,scidac_csumb); + } + + template + static inline void Uint32Checksum(GridBase *grid, + std::vector &fbuf, + uint32_t &nersc_csum, + uint32_t &scidac_csuma, + uint32_t &scidac_csumb) + { + const uint64_t size32 = sizeof(fobj)/sizeof(uint32_t); + + + int nd = grid->_ndimension; + + uint64_t lsites =grid->lSites(); + std::vector local_vol =grid->LocalDimensions(); + std::vector local_start =grid->LocalStarts(); + std::vector global_vol =grid->FullDimensions(); + #pragma omp parallel { - uint32_t csum_thr=0; - uint64_t count = buf_size_bytes/sizeof(uint32_t); + std::vector coor(nd); + uint32_t nersc_csum_thr=0; + uint32_t scidac_csuma_thr=0; + uint32_t scidac_csumb_thr=0; + uint32_t site_crc=0; + uint32_t zcrc = crc32(0L, Z_NULL, 0); + #pragma omp for - for(uint64_t i=0;i>(32-gsite29); + scidac_csumb_thr ^= site_crc<>(32-gsite31); } + #pragma omp critical - csum = csum + csum_thr; + { + nersc_csum += nersc_csum_thr; + scidac_csuma^= scidac_csuma_thr; + scidac_csumb^= scidac_csumb_thr; + } } } + // Network is big endian - static inline void htobe32_v(void *file_object,uint64_t bytes,uint32_t &csum){ - Uint32Checksum((uint32_t *)file_object,bytes,csum); - htobe32_v(file_object,bytes); - } - static inline void htobe64_v(void *file_object,uint64_t bytes,uint32_t &csum){ - Uint32Checksum((uint32_t *)file_object,bytes,csum); - htobe64_v(file_object,bytes); - } - static inline void htole32_v(void *file_object,uint64_t bytes,uint32_t &csum){ - Uint32Checksum((uint32_t *)file_object,bytes,csum); - htole32_v(file_object,bytes); - } - static inline void htole64_v(void *file_object,uint64_t bytes,uint32_t &csum){ - Uint32Checksum((uint32_t *)file_object,bytes,csum); - htole64_v(file_object,bytes); - } - static inline void be32toh_v(void *file_object,uint64_t bytes,uint32_t &csum){ - be32toh_v(file_object,bytes); - Uint32Checksum((uint32_t *)file_object,bytes,csum); - } - static inline void be64toh_v(void *file_object,uint64_t bytes,uint32_t &csum){ - be64toh_v(file_object,bytes); - Uint32Checksum((uint32_t *)file_object,bytes,csum); - } - static inline void le32toh_v(void *file_object,uint64_t bytes,uint32_t &csum){ - le32toh_v(file_object,bytes); - Uint32Checksum((uint32_t *)file_object,bytes,csum); - } - static inline void le64toh_v(void *file_object,uint64_t bytes,uint32_t &csum){ - le64toh_v(file_object,bytes); - Uint32Checksum((uint32_t *)file_object,bytes,csum); - } - static inline void htobe32_v(void *file_object,uint64_t bytes){ be32toh_v(file_object,bytes);} - static inline void htobe64_v(void *file_object,uint64_t bytes){ be64toh_v(file_object,bytes);} - static inline void htole32_v(void *file_object,uint64_t bytes){ le32toh_v(file_object,bytes);} - static inline void htole64_v(void *file_object,uint64_t bytes){ le64toh_v(file_object,bytes);} + static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);} + static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);} + static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);} + static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);} static inline void be32toh_v(void *file_object,uint64_t bytes) { @@ -199,6 +236,7 @@ class BinaryIO { fp[i] = ntohl(f); } } + // BE is same as network static inline void be64toh_v(void *file_object,uint64_t bytes) { @@ -238,18 +276,23 @@ class BinaryIO { static const int BINARYIO_WRITE = 0x01; template - static inline uint32_t IOobject(word w, - GridBase *grid, - std::vector &iodata, - std::string file, - int offset, - const std::string &format, int control) + static inline void IOobject(word w, + GridBase *grid, + std::vector &iodata, + std::string file, + int offset, + const std::string &format, int control, + uint32_t &nersc_csum, + uint32_t &scidac_csuma, + uint32_t &scidac_csumb) { grid->Barrier(); GridStopWatch timer; GridStopWatch bstimer; - uint32_t csum=0; + nersc_csum=0; + scidac_csuma=0; + scidac_csumb=0; int ndim = grid->Dimensions(); int nrank = grid->ProcessorCount(); @@ -359,20 +402,22 @@ class BinaryIO { grid->Barrier(); bstimer.Start(); - if (ieee32big) be32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); - if (ieee32) le32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); - if (ieee64big) be64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); - if (ieee64) le64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); + if (ieee32big) be32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + if (ieee32) le32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + if (ieee64big) be64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + if (ieee64) le64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + Uint32Checksum(grid,iodata,nersc_csum,scidac_csuma,scidac_csumb); bstimer.Stop(); } if ( control & BINARYIO_WRITE ) { bstimer.Start(); - if (ieee32big) htobe32_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); - if (ieee32) htole32_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); - if (ieee64big) htobe64_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); - if (ieee64) htole64_v((void *)&iodata[0], sizeof(fobj)*iodata.size(),csum); + Uint32Checksum(grid,iodata,nersc_csum,scidac_csuma,scidac_csumb); + if (ieee32big) htobe32_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + if (ieee32) htole32_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + if (ieee64big) htobe64_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + if (ieee64) htole64_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); bstimer.Stop(); grid->Barrier(); @@ -418,17 +463,27 @@ class BinaryIO { // Safety check ////////////////////////////////////////////////////////////////////////////// grid->Barrier(); - grid->GlobalSum(csum); + grid->GlobalSum(nersc_csum); + grid->GlobalXOR(scidac_csuma); + grid->GlobalXOR(scidac_csumb); grid->Barrier(); - - return csum; + // std::cout << "Binary IO NERSC checksum 0x"< - static inline uint32_t readLatticeObject(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) + static inline void readLatticeObject(Lattice &Umu, + std::string file, + munger munge, + int offset, + const std::string &format, + uint32_t &nersc_csum, + uint32_t &scidac_csuma, + uint32_t &scidac_csumb) { typedef typename vobj::scalar_object sobj; typedef typename vobj::Realified::scalar_type word; word w=0; @@ -439,7 +494,8 @@ class BinaryIO { std::vector scalardata(lsites); std::vector iodata(lsites); // Munge, checksum, byte order in here - uint32_t csum= IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC); + IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC, + nersc_csum,scidac_csuma,scidac_csumb); GridStopWatch timer; timer.Start(); @@ -451,15 +507,20 @@ class BinaryIO { timer.Stop(); std::cout< - static inline uint32_t writeLatticeObject(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) + static inline void writeLatticeObject(Lattice &Umu, + std::string file, + munger munge, + int offset, + const std::string &format, + uint32_t &nersc_csum, + uint32_t &scidac_csuma, + uint32_t &scidac_csumb) { typedef typename vobj::scalar_object sobj; typedef typename vobj::Realified::scalar_type word; word w=0; @@ -480,36 +541,45 @@ class BinaryIO { grid->Barrier(); timer.Stop(); - uint32_t csum= IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC); + IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC, + nersc_csum,scidac_csuma,scidac_csumb); std::cout< RNGstate; typedef RngStateType word; word w=0; - uint32_t csum = 0; std::string format = "IEEE32BIG"; GridBase *grid = parallel._grid; int gsites = grid->gSites(); int lsites = grid->lSites(); + uint32_t nersc_csum_tmp; + uint32_t scidac_csuma_tmp; + uint32_t scidac_csumb_tmp; + GridStopWatch timer; std::cout << GridLogMessage << "RNG read I/O on file " << file << std::endl; std::vector iodata(lsites); - csum= IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC); + IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC, + nersc_csum,scidac_csuma,scidac_csumb); timer.Start(); parallel_for(int lidx=0;lidx tmp(RngStateCount); std::copy(iodata[0].begin(),iodata[0].end(),tmp.begin()); serial.SetState(tmp,0); } - std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; + nersc_csum = nersc_csum + nersc_csum_tmp; + scidac_csuma = scidac_csuma ^ scidac_csuma_tmp; + scidac_csumb = scidac_csumb ^ scidac_csumb_tmp; + + // std::cout << GridLogMessage << "RNG file nersc_checksum " << std::hex << nersc_csum << std::dec << std::endl; + // std::cout << GridLogMessage << "RNG file scidac_checksuma " << std::hex << scidac_csuma << std::dec << std::endl; + // std::cout << GridLogMessage << "RNG file scidac_checksumb " << std::hex << scidac_csumb << std::dec << std::endl; + std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl; - return csum; } ///////////////////////////////////////////////////////////////////////////// // Write a RNG; lexico map to an array of state and use IOobject ////////////////////////////////////////////////////////////////////////////////////// - static inline uint32_t writeRNG(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file,int offset) + static inline void writeRNG(GridSerialRNG &serial, + GridParallelRNG ¶llel, + std::string file, + int offset, + uint32_t &nersc_csum, + uint32_t &scidac_csuma, + uint32_t &scidac_csumb) { typedef typename GridSerialRNG::RngStateType RngStateType; typedef RngStateType word; word w=0; const int RngStateCount = GridSerialRNG::RngStateCount; typedef std::array RNGstate; - uint32_t csum = 0; - GridBase *grid = parallel._grid; int gsites = grid->gSites(); int lsites = grid->lSites(); + uint32_t nersc_csum_tmp; + uint32_t scidac_csuma_tmp; + uint32_t scidac_csumb_tmp; + GridStopWatch timer; std::string format = "IEEE32BIG"; @@ -561,7 +647,8 @@ class BinaryIO { } timer.Stop(); - csum= IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC); + IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC, + nersc_csum,scidac_csuma,scidac_csumb); iodata.resize(1); { @@ -569,11 +656,11 @@ class BinaryIO { serial.GetState(tmp,0); std::copy(tmp.begin(),tmp.end(),iodata[0].begin()); } - csum+= IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_MASTER_APPEND); + IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_MASTER_APPEND, + nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp); - std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; + // std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl; - return csum; } }; } diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index 0912e2f6..237edf43 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -43,201 +43,351 @@ extern "C" { // for linkage #include "lime.h" } + +// Unused SCIDAC records names +// SCIDAC_PRIVATE_FILE_XML "scidac-private-file-xml" +// SCIDAC_SITELIST "scidac-sitelist" +// SCIDAC_FILE_XML "scidac-file-xml" +// SCIDAC_RIVATE_RECORD_XML "scidac-private-record-xml" +// SCIDAC_RECORD_XML "scidac-record-xml" +// SCIDAC_BINARY_DATA "scidac-binary-data" +// +// Scidac checksum: CRC32 every site, xor reduce some hash of this. +// https://github.com/usqcd-software/qio/blob/master/lib/dml/DML_utils.c + namespace Grid { namespace QCD { -inline void ILDGGrid(GridBase *grid, ILDGField &header) { - assert(grid->_ndimension == 4); // emit error if not - header.dimension.resize(4); - header.boundary.resize(4); - for (int d = 0; d < 4; d++) { - header.dimension[d] = grid->_fdimensions[d]; - // Read boundary conditions from ... ? - header.boundary[d] = std::string("periodic"); - } -} - -inline void ILDGChecksum(uint32_t *buf, uint32_t buf_size_bytes, - uint32_t &csum) { - BinaryIO::Uint32Checksum(buf, buf_size_bytes, csum); -} - -////////////////////////////////////////////////////////////////////// -// Utilities ; these are QCD aware -////////////////////////////////////////////////////////////////////// -template -inline void ILDGStatistics(GaugeField &data, ILDGField &header) { - // How to convert data precision etc... - header.link_trace = Grid::QCD::WilsonLoops::linkTrace(data); - header.plaquette = Grid::QCD::WilsonLoops::avgPlaquette(data); - // header.polyakov = -} - -// Forcing QCD here -template -struct ILDGMunger { - void operator()(fobj &in, sobj &out, uint32_t &csum) { - for (int mu = 0; mu < 4; mu++) { - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - out(mu)()(i, j) = in(mu)()(i, j); - } - } - } - ILDGChecksum((uint32_t *)&in, sizeof(in), csum); - }; -}; - -template -struct ILDGUnmunger { - void operator()(sobj &in, fobj &out, uint32_t &csum) { - for (int mu = 0; mu < 4; mu++) { - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - out(mu)()(i, j) = in(mu)()(i, j); - } - } - } - ILDGChecksum((uint32_t *)&out, sizeof(out), csum); - }; -}; - -//////////////////////////////////////////////////////////////////////////////// -// Write and read from fstream; compute header offset for payload -//////////////////////////////////////////////////////////////////////////////// -enum ILDGstate {ILDGread, ILDGwrite}; - -class ILDGIO : public BinaryIO { - FILE *File; - LimeWriter *LimeW; - LimeRecordHeader *LimeHeader; - LimeReader *LimeR; - std::string filename; - - +class IldgIO : public BinaryIO { public: - ILDGIO(std::string file, ILDGstate RW) { - filename = file; - if (RW == ILDGwrite){ - File = fopen(file.c_str(), "w"); - // check if opened correctly - LimeW = limeCreateWriter(File); - } else { - File = fopen(file.c_str(), "r"); - // check if opened correctly - - LimeR = limeCreateReader(File); - } - } - - ~ILDGIO() { fclose(File); } - - int createHeader(std::string message, int MB, int ME, size_t PayloadSize, LimeWriter* L){ + static int createHeader(std::string message, int MB, int ME, size_t PayloadSize, LimeWriter* L) + { LimeRecordHeader *h; h = limeCreateHeader(MB, ME, const_cast(message.c_str()), PayloadSize); - int status = limeWriteRecordHeader(h, L); - if (status < 0) { - std::cerr << "ILDG Header error\n"; - return status; - } + assert(limeWriteRecordHeader(h, L) >= 0); limeDestroyHeader(h); return LIME_SUCCESS; } - unsigned int writeHeader(ILDGField &header) { - // write header in LIME - n_uint64_t nbytes; - int MB_flag = 1, ME_flag = 0; - - char message[] = "ildg-format"; - nbytes = strlen(message); - LimeHeader = limeCreateHeader(MB_flag, ME_flag, message, nbytes); - limeWriteRecordHeader(LimeHeader, LimeW); - limeDestroyHeader(LimeHeader); - // save the xml header here - // use the xml_writer to c++ streams in pugixml - // and convert to char message - limeWriteRecordData(message, &nbytes, LimeW); + template + static void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, LimeWriter *LimeW) + { + std::string xmlstring; + { + XmlWriter WR("",""); + write(WR,object_name,object); + xmlstring = WR.XmlString(); + } + uint64_t nbytes = xmlstring.size(); + LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); + assert(limeWriteRecordHeader(h, LimeW)>=0); + assert(limeWriteRecordData(&xmlstring[0], &nbytes, LimeW)>=0); limeWriterCloseRecord(LimeW); - - return 0; + limeDestroyHeader(h); } - unsigned int readHeader(ILDGField &header) { + static unsigned int writeHeader(FieldMetaData &header, LimeWriter *LimeW) { + + uint64_t nbytes; + + ildgFormat ildgfmt ; + usqcdInfo info; + + ////////////////////////////////////////////////////// + // Fill ILDG header data struct + ////////////////////////////////////////////////////// + ildgfmt.field = std::string("su3gauge"); + ildgfmt.precision = 64; + ildgfmt.version = 1.0; + ildgfmt.lx = header.dimension[0]; + ildgfmt.ly = header.dimension[1]; + ildgfmt.lz = header.dimension[2]; + ildgfmt.lt = header.dimension[3]; + assert(header.nd==4); + assert(header.nd==header.dimension.size()); + + info.version=1.0; + info.plaq = header.plaquette; + info.linktr = header.link_trace; + + // Following scidac file downloaded from NERSC under MILC + // Begin message, keep open on successive records + //Message 1 + // Type: scidac-private-file-xml 1.1416 16 16 48 0 + // Type: scidac-file-xml MILC ILDG archival gauge configuration + //Message 2 + // Type: scidac-private-record-xml 1.0Thu May 11 00:11:33 2006 UTC0 + // QDP_F3_ColorMatrixF3724 + // Type: scidac-record-xml + // Type: ildg-format + // Type: ildg-data-lfn + // Type: ildg-binary-data + // Type: scidac-checksum + + writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT),LimeW); + writeLimeObject(0,0,info ,std::string("usqcdInfo" ),std::string(USQCD_INFO ),LimeW); + writeLimeObject(0,0,ildgfmt,std::string("ildgFormat") ,std::string(ILDG_FORMAT),LimeW); + // LFN is not a serializable object + { + std::string LFN = header.ildg_lfn; + uint64_t PayloadSize = LFN.size(); + createHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize, LimeW); + limeWriteRecordData(const_cast(LFN.c_str()), &PayloadSize, LimeW); + limeWriterCloseRecord(LimeW); + } return 0; } template - uint32_t readConfiguration(Lattice > &Umu) { - typedef Lattice > GaugeField; - typedef LorentzColourMatrixD sobjd; - typedef LorentzColourMatrixF sobjf; - typedef iLorentzColourMatrix itype; - typedef LorentzColourMatrix sobj; - GridBase *grid = Umu._grid; + static void writeConfiguration(std::string filename,Lattice > &Umu, std::string format) { - ILDGField header; - readHeader(header); + FILE *File = fopen(filename.c_str(), "w"); + LimeWriter *LimeW = limeCreateWriter(File); - // now just the conf, ignore the header - std::string format = std::string("IEEE64BIG"); - do {limeReaderNextRecord(LimeR);} - while (strncmp(limeReaderType(LimeR), "ildg-binary-data",16)); - - n_uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) - - - ILDGtype ILDGt(true, LimeR); - // this is special for double prec data, just for the moment - uint32_t csum = BinaryIO::readObjectParallel< itype, sobjd >( - Umu, filename, ILDGMunger(), 0, format, ILDGt); - - // Check configuration - // todo - - return csum; - } - - template - uint32_t writeConfiguration(Lattice > &Umu, std::string format) { typedef Lattice > GaugeField; typedef iLorentzColourMatrix vobj; typedef typename vobj::scalar_object sobj; typedef LorentzColourMatrixD fobj; - ILDGField header; - // fill the header + GridBase * grid = Umu._grid; + + //////////////////////////////////////// + // fill the headers + //////////////////////////////////////// + FieldMetaData header; + + GridMetaData(grid,header); + GaugeStatistics(Umu,header); + MachineCharacteristics(header); + + assert( (format=="IEEE64BIG") || (format=="IEEE32BIG")); header.floating_point = format; + header.checksum = 0x0; // unused in ILDG + writeHeader(header,LimeW); - ILDGUnmunger munge; - unsigned int offset = writeHeader(header); - - BinaryIO::Uint32Checksum(Umu, munge, header.checksum); - + //////////////////////////////////////// // Write data record header - n_uint64_t PayloadSize = sizeof(fobj) * Umu._grid->_gsites; - createHeader("ildg-binary-data", 0, 1, PayloadSize, LimeW); - - ILDGtype ILDGt(true, LimeW); - uint32_t csum = BinaryIO::writeObjectParallel( - Umu, filename, munge, 0, header.floating_point, ILDGt); - + //////////////////////////////////////// + uint64_t PayloadSize = sizeof(fobj) * Umu._grid->_gsites; + createHeader(ILDG_BINARY_DATA, 0, 0, PayloadSize, LimeW); + + off_t offset = ftell(File); + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + GaugeSimpleMunger munge; + BinaryIO::writeLatticeObject(Umu, filename, munge, offset, header.floating_point, + nersc_csum,scidac_csuma,scidac_csumb); limeWriterCloseRecord(LimeW); - // Last record - // the logical file name LNF - // look into documentation on how to generate this string - std::string LNF = "empty"; + //////////////////////////////////////// + // Write checksum element, propagaing forward from the BinaryIO + //////////////////////////////////////// + scidacChecksum checksum; + checksum.suma= scidac_csuma; + checksum.sumb= scidac_csumb; + // std::cout << " writing scidac checksums "< + static void readConfiguration(std::string filename,Lattice > &Umu, FieldMetaData &FieldMetaData_) { + + typedef Lattice > GaugeField; + typedef LorentzColourMatrixD sobjd; + typedef LorentzColourMatrixF sobjf; + typedef iLorentzColourMatrix itype; + typedef LorentzColourMatrix sobj; + + GridBase *grid = Umu._grid; + + std::vector dims = Umu._grid->FullDimensions(); + assert(dims.size()==4); + + FILE *File = fopen(filename.c_str(), "r"); + LimeReader *LimeR = limeCreateReader(File); - PayloadSize = sizeof(LNF); - createHeader("ildg-binary-lfn", 1 , 1, PayloadSize, LimeW); - limeWriteRecordData(const_cast(LNF.c_str()), &PayloadSize, LimeW); + // Metadata holders + ildgFormat ildgFormat_ ; + std::string ildgLFN_ ; + scidacChecksum scidacChecksum_; + usqcdInfo usqcdInfo_ ; - limeWriterCloseRecord(LimeW); + // track what we read from file + int found_ildgFormat =0; + int found_ildgLFN =0; + int found_scidacChecksum=0; + int found_usqcdInfo =0; + int found_ildgBinary =0; + int found_FieldMetaData =0; - return csum; + uint32_t nersc_csum; + uint32_t scidac_csuma; + uint32_t scidac_csumb; + + // Binary format + std::string format; + + ////////////////////////////////////////////////////////////////////////// + // Loop over all records + // -- Order is poorly guaranteed except ILDG header preceeds binary section. + // -- Run like an event loop. + // -- Impose trust hierarchy. Grid takes precedence & look for ILDG, and failing + // that Scidac. + // -- Insist on Scidac checksum record. + ////////////////////////////////////////////////////////////////////////// + + while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { + + uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) + + ////////////////////////////////////////////////////////////////// + // If not BINARY_DATA read a string and parse + ////////////////////////////////////////////////////////////////// + if ( strncmp(limeReaderType(LimeR), ILDG_BINARY_DATA,strlen(ILDG_BINARY_DATA) ) ) { + + // Copy out the string + std::vector xmlc(nbytes+1,'\0'); + limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); + std::cout << GridLogMessage<< "Non binary record :" < munge; + BinaryIO::readLatticeObject< itype, sobjd >(Umu, filename, munge, offset, format, + nersc_csum,scidac_csuma,scidac_csumb); + found_ildgBinary = 1; + } + + } + + ////////////////////////////////////////////////////// + // Minimally must find binary segment and checksum + ////////////////////////////////////////////////////// + assert(found_ildgBinary); + assert(found_scidacChecksum); + + // Must find something with the lattice dimensions + assert(found_FieldMetaData||found_ildgFormat); + + if ( found_FieldMetaData ) { + + std::cout << GridLogMessage<<"a Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<(Umu,checker); + assert(fabs(checker.plaquette - FieldMetaData_.plaquette )<1.0e-5); + assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5); + std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl; + } } // format for RNG? Now just binary out diff --git a/lib/parallelIO/IldgIOtypes.h b/lib/parallelIO/IldgIOtypes.h index 4c7a1edd..8e1316eb 100644 --- a/lib/parallelIO/IldgIOtypes.h +++ b/lib/parallelIO/IldgIOtypes.h @@ -34,47 +34,83 @@ extern "C" { // for linkage namespace Grid { -struct ILDGtype { - bool is_ILDG; - LimeWriter* LW; - LimeReader* LR; +#define GRID_FORMAT "grid-format" +#define ILDG_FORMAT "ildg-format" +#define ILDG_BINARY_DATA "ildg-binary-data" +#define ILDG_DATA_LFN "ildg-data-lfn" +#define USQCD_INFO "usqcdInfo" +#define SCIDAC_CHECKSUM "scidac-checksum" - ILDGtype(bool is, LimeWriter* L) : is_ILDG(is), LW(L), LR(NULL) {} - ILDGtype(bool is, LimeReader* L) : is_ILDG(is), LW(NULL), LR(L) {} - ILDGtype() : is_ILDG(false), LW(NULL), LR(NULL) {} +///////////////////////////////////////////////////////////////////////////////// +// Data representation of records that enter ILDG and SciDac formats +///////////////////////////////////////////////////////////////////////////////// +struct ildgFormat : Serializable { +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat, + double, version, + std::string, field, + int, precision, + int, lx, + int, ly, + int, lz, + int, lt); + ildgFormat() { + version=1.0; + }; }; - -class ILDGField { +struct usqcdInfo : Serializable { public: - // header strings (not in order) - std::vector dimension; - std::vector boundary; - int data_start; - std::string hdr_version; - std::string storage_format; - // Checks on data - double link_trace; - double plaquette; - uint32_t checksum; - unsigned int sequence_number; - std::string data_type; - std::string ensemble_id; - std::string ensemble_label; - std::string creator; - std::string creator_hardware; - std::string creation_date; - std::string archive_date; - std::string floating_point; + GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo, + double, version, + double, plaq, + double, linktr, + std::string, info); + usqcdInfo() { + version=1.0; + }; +}; + +struct usqcdPropFile : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile, + double, version, + std::string, type, + std::string, info); + usqcdPropFile() { + version=1.0; + }; +}; +struct usqcdSourceInfo : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdSourceInfo, + double, version, + std::string, info); + usqcdSourceInfo() { + version=1.0; + }; +}; +struct usqcdPropInfo : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropInfo, + double, version, + int, spin, + int, color, + std::string, info); + usqcdPropInfo() { + version=1.0; + }; +}; +struct scidacChecksum : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum, + double, version, + uint32_t, suma, + uint32_t, sumb); + scidacChecksum() { + version=1.0; + suma=sumb=0; + }; }; } -#else -namespace Grid { - -struct ILDGtype { - bool is_ILDG; - ILDGtype() : is_ILDG(false) {} -}; -} - #endif #endif diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index ba9d23de..cc37b537 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -30,168 +30,11 @@ #ifndef GRID_NERSC_IO_H #define GRID_NERSC_IO_H -#include -#include -#include -#include -#include - -#include -#include -#include - namespace Grid { namespace QCD { using namespace Grid; - //////////////////////////////////////////////////////////////////////////////// - // Some data types for intermediate storage - //////////////////////////////////////////////////////////////////////////////// - template using iLorentzColour2x3 = iVector, 2>, 4 >; - - typedef iLorentzColour2x3 LorentzColour2x3; - typedef iLorentzColour2x3 LorentzColour2x3F; - typedef iLorentzColour2x3 LorentzColour2x3D; - - //////////////////////////////////////////////////////////////////////////////// - // header specification/interpretation - //////////////////////////////////////////////////////////////////////////////// - class NerscField { - public: - // header strings (not in order) - int dimension[4]; - std::string boundary[4]; - int data_start; - std::string hdr_version; - std::string storage_format; - // Checks on data - double link_trace; - double plaquette; - uint32_t checksum; - unsigned int sequence_number; - std::string data_type; - std::string ensemble_id ; - std::string ensemble_label ; - std::string creator ; - std::string creator_hardware ; - std::string creation_date ; - std::string archive_date ; - std::string floating_point; - }; - - ////////////////////////////////////////////////////////////////////// - // Bit and Physical Checksumming and QA of data - ////////////////////////////////////////////////////////////////////// - - inline void NerscGrid(GridBase *grid,NerscField &header) - { - assert(grid->_ndimension==4); - for(int d=0;d<4;d++) { - header.dimension[d] = grid->_fdimensions[d]; - } - for(int d=0;d<4;d++) { - header.boundary[d] = std::string("PERIODIC"); - } - } - template - inline void NerscStatistics(GaugeField & data,NerscField &header) - { - // How to convert data precision etc... - header.link_trace=Grid::QCD::WilsonLoops::linkTrace(data); - header.plaquette =Grid::QCD::WilsonLoops::avgPlaquette(data); - } - - inline void NerscMachineCharacteristics(NerscField &header) - { - // Who - struct passwd *pw = getpwuid (getuid()); - if (pw) header.creator = std::string(pw->pw_name); - - // When - std::time_t t = std::time(nullptr); - std::tm tm = *std::localtime(&t); - std::ostringstream oss; - // oss << std::put_time(&tm, "%c %Z"); - header.creation_date = oss.str(); - header.archive_date = header.creation_date; - - // What - struct utsname name; uname(&name); - header.creator_hardware = std::string(name.nodename)+"-"; - header.creator_hardware+= std::string(name.machine)+"-"; - header.creator_hardware+= std::string(name.sysname)+"-"; - header.creator_hardware+= std::string(name.release); - - } - ////////////////////////////////////////////////////////////////////// - // Utilities ; these are QCD aware - ////////////////////////////////////////////////////////////////////// - inline void reconstruct3(LorentzColourMatrix & cm) - { - const int x=0; - const int y=1; - const int z=2; - for(int mu=0;mu<4;mu++){ - cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy - cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz - cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx - } - } - - template - struct NerscSimpleMunger{ - void operator()(fobj &in, sobj &out) { - for (int mu = 0; mu < Nd; mu++) { - for (int i = 0; i < Nc; i++) { - for (int j = 0; j < Nc; j++) { - out(mu)()(i, j) = in(mu)()(i, j); - }} - } - }; - }; - - template - struct NerscSimpleUnmunger { - - void operator()(sobj &in, fobj &out) { - for (int mu = 0; mu < Nd; mu++) { - for (int i = 0; i < Nc; i++) { - for (int j = 0; j < Nc; j++) { - out(mu)()(i, j) = in(mu)()(i, j); - }} - } - }; - }; - - template - struct Nersc3x2munger{ - - void operator() (fobj &in,sobj &out){ - for(int mu=0;mu<4;mu++){ - for(int i=0;i<2;i++){ - for(int j=0;j<3;j++){ - out(mu)()(i,j) = in(mu)(i)(j); - }} - } - reconstruct3(out); - } - }; - - template - struct Nersc3x2unmunger{ - - void operator() (sobj &in,fobj &out){ - for(int mu=0;mu<4;mu++){ - for(int i=0;i<2;i++){ - for(int j=0;j<3;j++){ - out(mu)(i)(j) = in(mu)()(i,j); - }} - } - } - }; - - //////////////////////////////////////////////////////////////////////////////// // Write and read from fstream; comput header offset for payload //////////////////////////////////////////////////////////////////////////////// @@ -202,42 +45,17 @@ namespace Grid { std::ofstream fout(file,std::ios::out); } -#define dump_nersc_header(field, s) \ - s << "BEGIN_HEADER" << std::endl; \ - s << "HDR_VERSION = " << field.hdr_version << std::endl; \ - s << "DATATYPE = " << field.data_type << std::endl; \ - s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \ - for(int i=0;i<4;i++){ \ - s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \ - } \ - s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \ - s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \ - for(int i=0;i<4;i++){ \ - s << "BOUNDARY_"< header; @@ -309,19 +127,21 @@ namespace Grid { return field.data_start; } - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Now the meat: the object readers - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Now the meat: the object readers + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template - static inline void readConfiguration(Lattice > &Umu,NerscField& header,std::string file) + static inline void readConfiguration(Lattice > &Umu, + FieldMetaData& header, + std::string file) { typedef Lattice > GaugeField; GridBase *grid = Umu._grid; int offset = readHeader(file,Umu._grid,header); - NerscField clone(header); + FieldMetaData clone(header); std::string format(header.floating_point); @@ -330,34 +150,38 @@ namespace Grid { int ieee64big = (format == std::string("IEEE64BIG")); int ieee64 = (format == std::string("IEEE64")); - uint32_t csum; + uint32_t nersc_csum,scidac_csuma,scidac_csumb; // depending on datatype, set up munger; // munger is a function of if ( header.data_type == std::string("4D_SU3_GAUGE") ) { if ( ieee32 || ieee32big ) { - csum=BinaryIO::readLatticeObject, LorentzColour2x3F> - (Umu,file,Nersc3x2munger(), offset,format); + BinaryIO::readLatticeObject, LorentzColour2x3F> + (Umu,file,Gauge3x2munger(), offset,format, + nersc_csum,scidac_csuma,scidac_csumb); } if ( ieee64 || ieee64big ) { - csum=BinaryIO::readLatticeObject, LorentzColour2x3D> - (Umu,file,Nersc3x2munger(),offset,format); + BinaryIO::readLatticeObject, LorentzColour2x3D> + (Umu,file,Gauge3x2munger(),offset,format, + nersc_csum,scidac_csuma,scidac_csumb); } } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { if ( ieee32 || ieee32big ) { - csum=BinaryIO::readLatticeObject,LorentzColourMatrixF> - (Umu,file,NerscSimpleMunger(),offset,format); + BinaryIO::readLatticeObject,LorentzColourMatrixF> + (Umu,file,GaugeSimpleMunger(),offset,format, + nersc_csum,scidac_csuma,scidac_csumb); } if ( ieee64 || ieee64big ) { - csum=BinaryIO::readLatticeObject,LorentzColourMatrixD> - (Umu,file,NerscSimpleMunger(),offset,format); + BinaryIO::readLatticeObject,LorentzColourMatrixD> + (Umu,file,GaugeSimpleMunger(),offset,format, + nersc_csum,scidac_csuma,scidac_csumb); } } else { assert(0); } - NerscStatistics(Umu,clone); + GaugeStatistics(Umu,clone); - std::cout< - static inline void writeConfiguration(Lattice > &Umu,std::string file, int two_row,int bits32) + static inline void writeConfiguration(Lattice > &Umu, + std::string file, + int two_row, + int bits32) { typedef Lattice > GaugeField; typedef iLorentzColourMatrix vobj; typedef typename vobj::scalar_object sobj; + FieldMetaData header; + /////////////////////////////////////////// // Following should become arguments - NerscField header; + /////////////////////////////////////////// header.sequence_number = 1; header.ensemble_id = "UKQCD"; header.ensemble_label = "DWF"; @@ -402,32 +231,31 @@ namespace Grid { GridBase *grid = Umu._grid; - NerscGrid(grid,header); - NerscStatistics(Umu,header); - NerscMachineCharacteristics(header); + GridMetaData(grid,header); + assert(header.nd==4); + GaugeStatistics(Umu,header); + MachineCharacteristics(header); int offset; truncate(file); - if ( two_row ) { - header.floating_point = std::string("IEEE64BIG"); - header.data_type = std::string("4D_SU3_GAUGE"); - Nersc3x2unmunger munge; - offset = writeHeader(header,file); - header.checksum=BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point); - writeHeader(header,file); - } else { - header.floating_point = std::string("IEEE64BIG"); - header.data_type = std::string("4D_SU3_GAUGE_3x3"); - NerscSimpleUnmunger munge; - offset = writeHeader(header,file); - header.checksum=BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point); - writeHeader(header,file); - } + // Sod it -- always write 3x3 double + header.floating_point = std::string("IEEE64BIG"); + header.data_type = std::string("4D_SU3_GAUGE_3x3"); + GaugeSimpleUnmunger munge; + offset = writeHeader(header,file); + + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, + nersc_csum,scidac_csuma,scidac_csumb); + header.checksum = nersc_csum; + writeHeader(header,file); + std::cout< - uint32_t csum=BinaryIO::readRNG(serial,parallel,file,offset); + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); - if ( csum != header.checksum ) { - std::cerr << "checksum mismatch "< { fout.close(); } - void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, - GridParallelRNG &pRNG) { + void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { + if ((traj % Params.saveInterval) == 0) { std::string config, rng; this->build_filenames(traj, Params, config, rng); + uint32_t nersc_csum; + uint32_t scidac_csuma; + uint32_t scidac_csumb; + BinarySimpleUnmunger munge; truncate(rng); - BinaryIO::writeRNG(sRNG, pRNG, rng, 0); + BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); truncate(config); - uint32_t csum = BinaryIO::writeLatticeObject( - U, config, munge, 0, Params.format); + + BinaryIO::writeLatticeObject(U, config, munge, 0, Params.format, + nersc_csum,scidac_csuma,scidac_csumb); std::cout << GridLogMessage << "Written Binary Configuration " << config - << " checksum " << std::hex << csum << std::dec << std::endl; + << " checksum " << std::hex + << nersc_csum <<"/" + << scidac_csuma <<"/" + << scidac_csumb + << std::dec << std::endl; } + }; - void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, - GridParallelRNG &pRNG) { + void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { std::string config, rng; this->build_filenames(traj, Params, config, rng); BinarySimpleMunger munge; - BinaryIO::readRNG(sRNG, pRNG, rng, 0); - uint32_t csum = BinaryIO::readLatticeObject( - U, config, munge, 0, Params.format); + uint32_t nersc_csum; + uint32_t scidac_csuma; + uint32_t scidac_csumb; + BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); + BinaryIO::readLatticeObject(U, config, munge, 0, Params.format, + nersc_csum,scidac_csuma,scidac_csumb); + std::cout << GridLogMessage << "Read Binary Configuration " << config - << " checksum " << std::hex << csum << std::dec << std::endl; + << " checksums " << std::hex << nersc_csum<<"/"< { // check here that the format is valid int ieee32big = (Params.format == std::string("IEEE32BIG")); - int ieee32 = (Params.format == std::string("IEEE32")); + int ieee32 = (Params.format == std::string("IEEE32")); int ieee64big = (Params.format == std::string("IEEE64BIG")); - int ieee64 = (Params.format == std::string("IEEE64")); + int ieee64 = (Params.format == std::string("IEEE64")); if (!(ieee64big || ieee32 || ieee32big || ieee64)) { std::cout << GridLogError << "Unrecognized file format " << Params.format @@ -74,13 +74,17 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer { if ((traj % Params.saveInterval) == 0) { std::string config, rng; this->build_filenames(traj, Params, config, rng); - - ILDGIO IO(config, ILDGwrite); - BinaryIO::writeRNGSerial(sRNG, pRNG, rng, 0); - uint32_t csum = IO.writeConfiguration(U, Params.format); + + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); + IldgIO::writeConfiguration(config,U, Params.format); std::cout << GridLogMessage << "Written ILDG Configuration on " << config - << " checksum " << std::hex << csum << std::dec << std::endl; + << " checksum " << std::hex + << nersc_csum<<"/" + << scidac_csuma<<"/" + << scidac_csumb + << std::dec << std::endl; } }; @@ -89,12 +93,18 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer { std::string config, rng; this->build_filenames(traj, Params, config, rng); - ILDGIO IO(config, ILDGread); - BinaryIO::readRNGSerial(sRNG, pRNG, rng, 0); - uint32_t csum = IO.readConfiguration(U); // format from the header + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); + + FieldMetaData header; + IldgIO::readConfiguration(config,U,header); // format from the header std::cout << GridLogMessage << "Read ILDG Configuration from " << config - << " checksum " << std::hex << csum << std::dec << std::endl; + << " checksum " << std::hex + << nersc_csum<<"/" + << scidac_csuma<<"/" + << scidac_csumb + << std::dec << std::endl; }; }; } diff --git a/lib/qcd/hmc/checkpointers/NerscCheckpointer.h b/lib/qcd/hmc/checkpointers/NerscCheckpointer.h index 395369a0..a4b1b480 100644 --- a/lib/qcd/hmc/checkpointers/NerscCheckpointer.h +++ b/lib/qcd/hmc/checkpointers/NerscCheckpointer.h @@ -70,7 +70,7 @@ class NerscHmcCheckpointer : public BaseHmcCheckpointer { std::string config, rng; this->build_filenames(traj, Params, config, rng); - NerscField header; + FieldMetaData header; NerscIO::readRNGState(sRNG, pRNG, header, rng); NerscIO::readConfiguration(U, header, config); }; diff --git a/lib/qcd/utils/Utils.h b/lib/qcd/utils/Utils.h index 61c81cb5..1786db54 100644 --- a/lib/qcd/utils/Utils.h +++ b/lib/qcd/utils/Utils.h @@ -12,7 +12,4 @@ #include #include - - - #endif diff --git a/lib/serialisation/XmlIO.cc b/lib/serialisation/XmlIO.cc index b04263c9..a132a2f0 100644 --- a/lib/serialisation/XmlIO.cc +++ b/lib/serialisation/XmlIO.cc @@ -32,16 +32,21 @@ using namespace Grid; using namespace std; // Writer implementation /////////////////////////////////////////////////////// -XmlWriter::XmlWriter(const string &fileName) -: fileName_(fileName) +XmlWriter::XmlWriter(const string &fileName, string toplev) : fileName_(fileName) { - node_ = doc_.append_child(); - node_.set_name("grid"); + if ( toplev == std::string("") ) { + node_=doc_; + } else { + node_=doc_.append_child(); + node_.set_name(toplev.c_str()); + } } XmlWriter::~XmlWriter(void) { - doc_.save_file(fileName_.c_str(), " "); + if ( fileName_ != std::string("") ) { + doc_.save_file(fileName_.c_str(), " "); + } } void XmlWriter::push(const string &s) @@ -53,21 +58,44 @@ void XmlWriter::pop(void) { node_ = node_.parent(); } - -// Reader implementation /////////////////////////////////////////////////////// -XmlReader::XmlReader(const string &fileName) -: fileName_(fileName) +std::string XmlWriter::XmlString(void) { - pugi::xml_parse_result result = doc_.load_file(fileName_.c_str()); - - if ( !result ) - { + std::ostringstream oss; + doc_.save(oss); + return oss.str(); +} + +XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("") +{ + pugi::xml_parse_result result; + result = doc_.load_string(xmlstring); + if ( !result ) { cerr << "XML error description: " << result.description() << "\n"; cerr << "XML error offset : " << result.offset << "\n"; abort(); } - - node_ = doc_.child("grid"); + if ( toplev == std::string("") ) { + node_ = doc_; + } else { + node_ = doc_.child(toplev.c_str()); + } +} + +// Reader implementation /////////////////////////////////////////////////////// +XmlReader::XmlReader(const string &fileName,string toplev) : fileName_(fileName) +{ + pugi::xml_parse_result result; + result = doc_.load_file(fileName_.c_str()); + if ( !result ) { + cerr << "XML error description: " << result.description() << "\n"; + cerr << "XML error offset : " << result.offset << "\n"; + abort(); + } + if ( toplev == std::string("") ) { + node_ = doc_; + } else { + node_ = doc_.child(toplev.c_str()); + } } bool XmlReader::push(const string &s) diff --git a/lib/serialisation/XmlIO.h b/lib/serialisation/XmlIO.h index f333b9aa..fcdbf1e4 100644 --- a/lib/serialisation/XmlIO.h +++ b/lib/serialisation/XmlIO.h @@ -44,10 +44,9 @@ namespace Grid { class XmlWriter: public Writer - { - + { public: - XmlWriter(const std::string &fileName); + XmlWriter(const std::string &fileName,std::string toplev = std::string("grid") ); virtual ~XmlWriter(void); void push(const std::string &s); void pop(void); @@ -55,6 +54,7 @@ namespace Grid void writeDefault(const std::string &s, const U &x); template void writeDefault(const std::string &s, const std::vector &x); + std::string XmlString(void); private: pugi::xml_document doc_; pugi::xml_node node_; @@ -64,7 +64,8 @@ namespace Grid class XmlReader: public Reader { public: - XmlReader(const std::string &fileName); + XmlReader(const char *xmlstring,std::string toplev = std::string("grid") ); + XmlReader(const std::string &fileName,std::string toplev = std::string("grid") ); virtual ~XmlReader(void) = default; bool push(const std::string &s); void pop(void); @@ -118,7 +119,7 @@ namespace Grid std::string buf; readDefault(s, buf); - std::cout << s << " " << buf << std::endl; + // std::cout << s << " " << buf << std::endl; fromString(output, buf); } diff --git a/tests/IO/Test_nersc_io.cc b/tests/IO/Test_nersc_io.cc index 14c6080d..ca04e623 100644 --- a/tests/IO/Test_nersc_io.cc +++ b/tests/IO/Test_nersc_io.cc @@ -64,8 +64,8 @@ int main (int argc, char ** argv) std::cout < U(4,&Fine); - NerscField header; + FieldMetaData header; std::string file("./ckpoint_lat"); NerscIO::readConfiguration(Umu,header,file); diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index 7d911dfd..ceddee77 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -31,6 +31,7 @@ Author: Peter Boyle using namespace Grid; +using namespace Grid::QCD; GRID_SERIALIZABLE_ENUM(myenum, undef, red, 1, blue, 2, green, 3); @@ -62,6 +63,7 @@ public: } }; + int16_t i16 = 1; uint16_t u16 = 2; int32_t i32 = 3; @@ -237,7 +239,22 @@ int main(int argc,char **argv) std::cout << "Loaded (JSON) -----------------" << std::endl; std::cout << jcopy1 << std::endl << jveccopy1 << std::endl; } - + + { + ildgFormat format; + format.version =1.0; + format.field =std::string("su3gauge"); + format.precision =32; + format.lx =24; + format.ly =24; + format.lz =24; + format.lt =48; + XmlWriter WR("ildg-format.xml",""); + XmlWriter WRs("",""); + write(WR,"ildgFormat",format); + write(WRs,"ildgFormat",format); + std::cout << " XmlString: " < Date: Sun, 11 Jun 2017 23:19:20 +0100 Subject: [PATCH 068/177] New files --- lib/parallelIO/MetaData.h | 223 ++++++++++++++++++++++++++++++++++++++ tests/IO/Test_ildg_io.cc | 93 ++++++++++++++++ 2 files changed, 316 insertions(+) create mode 100644 lib/parallelIO/MetaData.h create mode 100644 tests/IO/Test_ildg_io.cc diff --git a/lib/parallelIO/MetaData.h b/lib/parallelIO/MetaData.h new file mode 100644 index 00000000..e91371b8 --- /dev/null +++ b/lib/parallelIO/MetaData.h @@ -0,0 +1,223 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/parallelIO/NerscIO.h + + Copyright (C) 2015 + + + Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Grid { + namespace QCD { + + using namespace Grid; + + //////////////////////////////////////////////////////////////////////////////// + // header specification/interpretation + //////////////////////////////////////////////////////////////////////////////// + class FieldMetaData : Serializable { + public: + + GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData, + int, nd, + std::vector, dimension, + std::vector, boundary, + int, data_start, + std::string, hdr_version, + std::string, storage_format, + double, link_trace, + double, plaquette, + uint32_t, checksum, + uint32_t, scidac_checksuma, + uint32_t, scidac_checksumb, + unsigned int, sequence_number, + std::string, data_type, + std::string, ensemble_id, + std::string, ensemble_label, + std::string, ildg_lfn, + std::string, creator, + std::string, creator_hardware, + std::string, creation_date, + std::string, archive_date, + std::string, floating_point); + }; + + ////////////////////////////////////////////////////////////////////// + // Bit and Physical Checksumming and QA of data + ////////////////////////////////////////////////////////////////////// + inline void GridMetaData(GridBase *grid,FieldMetaData &header) + { + int nd = grid->_ndimension; + header.nd = nd; + header.dimension.resize(nd); + header.boundary.resize(nd); + for(int d=0;d_fdimensions[d]; + } + for(int d=0;d + inline void GaugeStatistics(GaugeField & data,FieldMetaData &header) + { + // How to convert data precision etc... + header.link_trace=Grid::QCD::WilsonLoops::linkTrace(data); + header.plaquette =Grid::QCD::WilsonLoops::avgPlaquette(data); + } + + inline void MachineCharacteristics(FieldMetaData &header) + { + // Who + struct passwd *pw = getpwuid (getuid()); + if (pw) header.creator = std::string(pw->pw_name); + + // When + std::time_t t = std::time(nullptr); + std::tm tm = *std::localtime(&t); + std::ostringstream oss; + oss << std::put_time(&tm, "%c %Z"); + header.creation_date = oss.str(); + header.archive_date = header.creation_date; + + // What + struct utsname name; uname(&name); + header.creator_hardware = std::string(name.nodename)+"-"; + header.creator_hardware+= std::string(name.machine)+"-"; + header.creator_hardware+= std::string(name.sysname)+"-"; + header.creator_hardware+= std::string(name.release); + } + +#define dump_meta_data(field, s) \ + s << "BEGIN_HEADER" << std::endl; \ + s << "HDR_VERSION = " << field.hdr_version << std::endl; \ + s << "DATATYPE = " << field.data_type << std::endl; \ + s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \ + for(int i=0;i<4;i++){ \ + s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \ + } \ + s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \ + s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \ + for(int i=0;i<4;i++){ \ + s << "BOUNDARY_"< using iLorentzColour2x3 = iVector, 2>, Nd >; + + typedef iLorentzColour2x3 LorentzColour2x3; + typedef iLorentzColour2x3 LorentzColour2x3F; + typedef iLorentzColour2x3 LorentzColour2x3D; + + template + struct GaugeSimpleMunger{ + void operator()(fobj &in, sobj &out) { + for (int mu = 0; mu < Nd; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; + }; + + template + struct GaugeSimpleUnmunger { + + void operator()(sobj &in, fobj &out) { + for (int mu = 0; mu < Nd; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; + }; + + template + struct Gauge3x2munger{ + void operator() (fobj &in,sobj &out){ + for(int mu=0;mu + struct Gauge3x2unmunger{ + void operator() (sobj &in,fobj &out){ + for(int mu=0;mu +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + std::cout < simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + //std::vector latt_size ({48,48,48,96}); + //std::vector latt_size ({32,32,32,32}); + std::vector latt_size ({16,16,16,32}); + std::vector clatt_size ({4,4,4,8}); + int orthodir=3; + int orthosz =latt_size[orthodir]; + + GridCartesian Fine(latt_size,simd_layout,mpi_layout); + GridCartesian Coarse(clatt_size,simd_layout,mpi_layout); + + + GridParallelRNG pRNGa(&Fine); + GridParallelRNG pRNGb(&Fine); + GridSerialRNG sRNGa; + GridSerialRNG sRNGb; + + std::cout <({45,12,81,9})); + sRNGa.SeedFixedIntegers(std::vector({45,12,81,9})); + std::cout < U(4,&Fine); + + SU3::HotConfiguration(pRNGa,Umu); + + + FieldMetaData header; + + std::cout < Date: Mon, 12 Jun 2017 00:41:21 +0100 Subject: [PATCH 070/177] Odd new error on G++ 49 on travis --- lib/serialisation/MacroMagic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/serialisation/MacroMagic.h b/lib/serialisation/MacroMagic.h index a864989c..04f1b401 100644 --- a/lib/serialisation/MacroMagic.h +++ b/lib/serialisation/MacroMagic.h @@ -110,7 +110,7 @@ THE SOFTWARE. #define GRID_MACRO_MEMBER(A,B) A B; #define GRID_MACRO_COMP_MEMBER(A,B) result = (result and (lhs. B == rhs. B)); -#define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" "#B <<" = "<< obj. B <<" ; " < Date: Tue, 13 Jun 2017 10:48:43 +0100 Subject: [PATCH 071/177] gcc 4.9 fix --- lib/parallelIO/MetaData.h | 8 +++++--- tests/IO/Test_serialisation.cc | 5 ++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/parallelIO/MetaData.h b/lib/parallelIO/MetaData.h index 85a6e0b9..1bad07f2 100644 --- a/lib/parallelIO/MetaData.h +++ b/lib/parallelIO/MetaData.h @@ -37,9 +37,6 @@ #include namespace Grid { - namespace QCD { - - using namespace Grid; //////////////////////////////////////////////////////////////////////////////// // header specification/interpretation @@ -71,6 +68,11 @@ namespace Grid { std::string, floating_point); }; + namespace QCD { + + using namespace Grid; + + ////////////////////////////////////////////////////////////////////// // Bit and Physical Checksumming and QA of data ////////////////////////////////////////////////////////////////////// diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index ceddee77..6d918787 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -45,8 +45,8 @@ public: double, y, bool , b, std::vector, array, - std::vector>, twodimarray, - std::vector>>, cmplx3darray + std::vector >, twodimarray, + std::vector > >, cmplx3darray ); myclass() {} myclass(int i) @@ -63,7 +63,6 @@ public: } }; - int16_t i16 = 1; uint16_t u16 = 2; int32_t i32 = 3; From 0494feec98f1c53b2ac20cab2a4e159637ade84f Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 13 Jun 2017 12:00:23 +0100 Subject: [PATCH 072/177] Libz dependency --- configure.ac | 4 ++++ lib/parallelIO/IldgIO.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/configure.ac b/configure.ac index 2fc9dfec..f7284d48 100644 --- a/configure.ac +++ b/configure.ac @@ -184,6 +184,10 @@ AC_SEARCH_LIBS([limeCreateReader], [lime], In order to use ILGG file format please install or provide the correct path to your installation Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)]) +AC_SEARCH_LIBS([crc32], [z], + [AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])] + [have_zlib=true], + [AC_MSG_ERROR(zlib library was not found in your system.)]) AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index df840fb2..a6810b0d 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -379,6 +379,9 @@ class IldgIO : public BinaryIO { assert( scidac_csuma ==FieldMetaData_.scidac_checksuma); assert( scidac_csumb ==FieldMetaData_.scidac_checksumb); std::cout << GridLogMessage<<"SciDAC checksums match " << std::endl; + } else { + std::cout << GridLogWarning<<"SciDAC checksums not found. This is unsafe. " << std::endl; + //Could choose to fail ? } if ( found_FieldMetaData || found_usqcdInfo ) { From 91199a8ea0907ff1b074066ae566a318b803e437 Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 13 Jun 2017 12:21:29 +0100 Subject: [PATCH 073/177] openmpi is not const safe --- lib/parallelIO/BinaryIO.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index bc3da38b..7226ccba 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -376,7 +376,7 @@ class BinaryIO { if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { #ifdef USE_MPI_IO std::cout<< GridLogMessage<< "MPI read I/O "<< file<< std::endl; - ierr=MPI_File_open(grid->communicator, file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0); + ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0); ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); MPI_File_close(&fh); @@ -426,7 +426,7 @@ class BinaryIO { if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { #ifdef USE_MPI_IO std::cout<< GridLogMessage<< "MPI write I/O "<< file<< std::endl; - ierr=MPI_File_open(grid->communicator, file.c_str(), MPI_MODE_RDWR|MPI_MODE_CREATE,MPI_INFO_NULL, &fh); assert(ierr==0); + ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDWR|MPI_MODE_CREATE,MPI_INFO_NULL, &fh); assert(ierr==0); ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); ierr=MPI_File_write_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); MPI_File_close(&fh); From e7564f8330eceac22e73b61cca4110bdb2ea5b09 Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 13 Jun 2017 12:22:50 +0100 Subject: [PATCH 074/177] Starting a test for reading an ILDG file. --- tests/IO/Test_ildg_read.cc | 112 +++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 tests/IO/Test_ildg_read.cc diff --git a/tests/IO/Test_ildg_read.cc b/tests/IO/Test_ildg_read.cc new file mode 100644 index 00000000..70a46dbf --- /dev/null +++ b/tests/IO/Test_ildg_read.cc @@ -0,0 +1,112 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_nersc_io.cc + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + std::vector simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + std::vector latt_size = GridDefaultLatt(); + int orthodir=3; + int orthosz =latt_size[orthodir]; + + GridCartesian Fine(latt_size,simd_layout,mpi_layout); + + LatticeGaugeField Umu(&Fine); + std::vector U(4,&Fine); + + FieldMetaData header; + std::string file("./ckpoint_lat"); + IldgIO::readConfiguration(file,Umu,header); + + for(int mu=0;mu(Umu,mu); + } + + // Painful ; fix syntactical niceness + LatticeComplex LinkTrace(&Fine); + LinkTrace=zero; + for(int mu=0;mu Plaq_T(orthosz); + sliceSum(Plaq,Plaq_T,Nd-1); + int Nt = Plaq_T.size(); + + TComplex Plaq_T_sum; + Plaq_T_sum=zero; + for(int t=0;t Date: Wed, 14 Jun 2017 05:19:17 +0100 Subject: [PATCH 075/177] Serialisation no compile fix --- tests/IO/Test_serialisation.cc | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index 6d918787..d5b52044 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -239,21 +239,6 @@ int main(int argc,char **argv) std::cout << jcopy1 << std::endl << jveccopy1 << std::endl; } - { - ildgFormat format; - format.version =1.0; - format.field =std::string("su3gauge"); - format.precision =32; - format.lx =24; - format.ly =24; - format.lz =24; - format.lt =48; - XmlWriter WR("ildg-format.xml",""); - XmlWriter WRs("",""); - write(WR,"ildgFormat",format); - write(WRs,"ildgFormat",format); - std::cout << " XmlString: " < Date: Wed, 14 Jun 2017 10:53:39 +0100 Subject: [PATCH 076/177] QPX exchange support --- lib/simd/Grid_qpx.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/lib/simd/Grid_qpx.h b/lib/simd/Grid_qpx.h index cbca9118..9fc8ef3c 100644 --- a/lib/simd/Grid_qpx.h +++ b/lib/simd/Grid_qpx.h @@ -375,6 +375,49 @@ namespace Optimization { FLOAT_WRAP_2(operator(), inline) }; + ////////////////////////////////////////////// + // Exchange support +#define FLOAT_WRAP_EXCHANGE(fn) \ + static inline void fn(vector4float &out1, vector4float &out2, \ + vector4float in1, vector4float in2) \ + { \ + vector4double out1d, out2d, in1d, in2d; \ + in1d = Vset()(in1); \ + in2d = Vset()(in2); \ + fn(out1d, out2d, in1d, in2d); \ + Vstore()(out1d, out1); \ + Vstore()(out2d, out2); \ + } + + struct Exchange{ + + // double precision + static inline void Exchange0(vector4double &out1, vector4double &out2, + vector4double in1, vector4double in2) { + out1 = vec_perm(in1, in2, vec_gpci(0145)); + out2 = vec_perm(in1, in2, vec_gpci(02367)); + } + static inline void Exchange1(vector4double &out1, vector4double &out2, + vector4double in1, vector4double in2) { + out1 = vec_perm(in1, in2, vec_gpci(0426)); + out2 = vec_perm(in1, in2, vec_gpci(01537)); + } + static inline void Exchange2(vector4double &out1, vector4double &out2, + vector4double in1, vector4double in2) { + assert(0); + } + static inline void Exchange3(vector4double &out1, vector4double &out2, + vector4double in1, vector4double in2) { + assert(0); + } + + // single precision + FLOAT_WRAP_EXCHANGE(Exchange0); + FLOAT_WRAP_EXCHANGE(Exchange1); + FLOAT_WRAP_EXCHANGE(Exchange2); + FLOAT_WRAP_EXCHANGE(Exchange3); + }; + struct Permute{ //Complex double static inline vector4double Permute0(vector4double v){ //0123 -> 2301 From 735cbdb983703fd3ffadc6133d792b4d058a897b Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Wed, 14 Jun 2017 10:55:10 +0100 Subject: [PATCH 077/177] QPX Integer reduction (+ integer reduction test) --- lib/simd/Grid_qpx.h | 11 +++++++---- tests/Test_simd.cc | 47 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/lib/simd/Grid_qpx.h b/lib/simd/Grid_qpx.h index 9fc8ef3c..00dbace5 100644 --- a/lib/simd/Grid_qpx.h +++ b/lib/simd/Grid_qpx.h @@ -540,10 +540,13 @@ namespace Optimization { //Integer Reduce template<> - inline Integer Reduce::operator()(int in){ - // FIXME unimplemented - printf("Reduce : Missing integer implementation -> FIX\n"); - assert(0); + inline Integer Reduce::operator()(veci in){ + Integer a = 0; + for (unsigned int i = 0; i < W::r; ++i) + { + a += in.v[i]; + } + return a; } } diff --git a/tests/Test_simd.cc b/tests/Test_simd.cc index c0bbef1d..b2e8d68e 100644 --- a/tests/Test_simd.cc +++ b/tests/Test_simd.cc @@ -183,8 +183,6 @@ void IntTester(const functor &func) { typedef Integer scal; typedef vInteger vec; - GridSerialRNG sRNG; - sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); int Nsimd = vec::Nsimd(); @@ -287,6 +285,50 @@ void ReductionTester(const functor &func) } +template +void IntReductionTester(const functor &func) +{ + int Nsimd = vec::Nsimd(); + + std::vector input1(Nsimd); + std::vector input2(Nsimd); + reduced result(0); + reduced reference(0); + reduced tmp; + + std::vector > buf(3); + vec & v_input1 = buf[0]; + vec & v_input2 = buf[1]; + + for(int i=0;i(v_input1,input1); + merge(v_input2,input2); + + func.template vfunc(result,v_input1,v_input2); + + for(int i=0;i(tmp,input1[i],input2[i]); + reference+=tmp; + } + + std::cout<(funcReduce()); std::cout< Date: Fri, 16 Jun 2017 15:04:26 +0100 Subject: [PATCH 078/177] Placeholder precision change functions to allow Grid to compile with QPX (warning: no actual functionality) --- lib/simd/Grid_qpx.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/lib/simd/Grid_qpx.h b/lib/simd/Grid_qpx.h index 00dbace5..8de7bde8 100644 --- a/lib/simd/Grid_qpx.h +++ b/lib/simd/Grid_qpx.h @@ -374,6 +374,41 @@ namespace Optimization { // Complex float FLOAT_WRAP_2(operator(), inline) }; +#define USE_FP16 + struct PrecisionChange { + static inline vech StoH (const vector4float &a, const vector4float &b) { + vech ret; + std::cout << GridLogError << "QPX single to half precision conversion not yet supported." << std::endl; + assert(0); + return ret; + } + static inline void HtoS (vech h, vector4float &sa, vector4float &sb) { + std::cout << GridLogError << "QPX half to single precision conversion not yet supported." << std::endl; + assert(0); + } + static inline vector4float DtoS (vector4double a, vector4double b) { + vector4float ret; + std::cout << GridLogError << "QPX double to single precision conversion not yet supported." << std::endl; + assert(0); + return ret; + } + static inline void StoD (vector4float s, vector4double &a, vector4double &b) { + std::cout << GridLogError << "QPX single to double precision conversion not yet supported." << std::endl; + assert(0); + } + static inline vech DtoH (vector4double a, vector4double b, + vector4double c, vector4double d) { + vech ret; + std::cout << GridLogError << "QPX double to half precision conversion not yet supported." << std::endl; + assert(0); + return ret; + } + static inline void HtoD (vech h, vector4double &a, vector4double &b, + vector4double &c, vector4double &d) { + std::cout << GridLogError << "QPX half to double precision conversion not yet supported." << std::endl; + assert(0); + } + }; ////////////////////////////////////////////// // Exchange support @@ -552,6 +587,7 @@ namespace Optimization { //////////////////////////////////////////////////////////////////////////////// // Here assign types +typedef Optimization::vech SIMD_Htype; // Half precision type typedef Optimization::vector4float SIMD_Ftype; // Single precision type typedef vector4double SIMD_Dtype; // Double precision type typedef Optimization::veci SIMD_Itype; // Integer type From a833f88c3237f9c941e9eb79ad459d0e260d2a2b Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 16 Jun 2017 15:58:47 +0100 Subject: [PATCH 079/177] Added missing SIMD integer reduction implementation for AVX, AVX-512, SSE4, IMCI --- lib/simd/Grid_avx.h | 25 ++++++++++++++++++++++--- lib/simd/Grid_avx512.h | 22 +++++++++++++++++++--- lib/simd/Grid_imci.h | 4 +--- lib/simd/Grid_sse4.h | 6 +++--- 4 files changed, 45 insertions(+), 12 deletions(-) diff --git a/lib/simd/Grid_avx.h b/lib/simd/Grid_avx.h index 52be9c05..57d9064d 100644 --- a/lib/simd/Grid_avx.h +++ b/lib/simd/Grid_avx.h @@ -701,9 +701,28 @@ namespace Optimization { //Integer Reduce template<> inline Integer Reduce::operator()(__m256i in){ - // FIXME unimplemented - printf("Reduce : Missing integer implementation -> FIX\n"); - assert(0); + __m128i ret; +#if defined (AVX2) + // AVX2 horizontal adds within upper and lower halves of register; use + // SSE to add upper and lower halves for result. + __m256i v1, v2; + __m128i u1, u2; + v1 = _mm256_hadd_epi32(in, in); + v2 = _mm256_hadd_epi32(v1, v1); + u1 = _mm256_castsi256_si128(v2); // upper half + u2 = _mm256_extracti128_si256(v2, 1); // lower half + ret = _mm256_add_epi32(u1, u2); +#else + // No AVX horizontal add; extract upper and lower halves of register & use + // SSE intrinsics. + __m128i u1, u2, u3; + u1 = _mm256_extractf128_si256(in, 0); // upper half + u2 = _mm256_extractf128_si256(in, 1); // lower half + u3 = _mm_add_epi32(u1, u2); + u1 = _mm_hadd_epi32(u3, u3); + ret = _mm_hadd_epi32(u1, u1); +#endif + return _mm_cvtsi128_si32(ret); } } diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index ba054665..458a8f7c 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -543,6 +543,24 @@ namespace Optimization { u512d conv; conv.v = v1; return conv.f[0]; } + + //Integer Reduce + template<> + inline Integer Reduce::operator()(__m512i in){ + // No full vector reduce, use AVX to add upper and lower halves of register + // and perform AVX reduction. + __m256i v1, v2, v3; + __m128i u1, u2, ret; + v1 = _mm512_castsi512_si256(in); // upper half + v2 = _mm512_extracti32x8_epi32(in, 1); // lower half + v3 = _mm256_add_epi32(v1, v2); + v1 = _mm256_hadd_epi32(v3, v3); + v2 = _mm256_hadd_epi32(v1, v1); + u1 = _mm256_castsi256_si128(v2) // upper half + u2 = _mm256_extracti128_si256(v2, 1); // lower half + ret = _mm256_add_epi32(u1, u2); + return _mm_cvtsi128_si32(ret); + } #else //Complex float Reduce template<> @@ -570,9 +588,7 @@ namespace Optimization { //Integer Reduce template<> inline Integer Reduce::operator()(__m512i in){ - // FIXME unimplemented - printf("Reduce : Missing integer implementation -> FIX\n"); - assert(0); + return _mm512_reduce_add_epi32(in); } #endif diff --git a/lib/simd/Grid_imci.h b/lib/simd/Grid_imci.h index 173e57d8..a1dae565 100644 --- a/lib/simd/Grid_imci.h +++ b/lib/simd/Grid_imci.h @@ -401,9 +401,7 @@ namespace Optimization { //Integer Reduce template<> inline Integer Reduce::operator()(__m512i in){ - // FIXME unimplemented - printf("Reduce : Missing integer implementation -> FIX\n"); - assert(0); + return _mm512_reduce_add_epi32(in); } diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index 2fb2df76..0b1f9ffb 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -570,9 +570,9 @@ namespace Optimization { //Integer Reduce template<> inline Integer Reduce::operator()(__m128i in){ - // FIXME unimplemented - printf("Reduce : Missing integer implementation -> FIX\n"); - assert(0); + __m128i v1 = _mm_hadd_epi32(in, in); + __m128i v2 = _mm_hadd_epi32(v1, v1); + return _mm_cvtsi128_si32(v2); } } From d57217017075d38c8f170fe7b141ea6d7f662c16 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Jun 2017 00:10:20 +0100 Subject: [PATCH 080/177] Update for SciDAC --- lib/GridStd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/GridStd.h b/lib/GridStd.h index 959ba9ac..097e62ab 100644 --- a/lib/GridStd.h +++ b/lib/GridStd.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include From 8e9be9f84f0aa38e94dfafa81d525526fbed9bc1 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Jun 2017 00:10:42 +0100 Subject: [PATCH 081/177] Updates for SciDAC IO --- lib/parallelIO/BinaryIO.h | 135 +++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 76 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index 7226ccba..117bec01 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -69,46 +69,6 @@ inline uint64_t Grid_ntohll(uint64_t A) { } #endif -///////////////////////////////////////////////////////////////////////////////// -// Simple classes for precision conversion -///////////////////////////////////////////////////////////////////////////////// -template -struct BinarySimpleUnmunger { - typedef typename getPrecision::real_scalar_type fobj_stype; - typedef typename getPrecision::real_scalar_type sobj_stype; - - void operator()(sobj &in, fobj &out) { - // take word by word and transform accoding to the status - fobj_stype *out_buffer = (fobj_stype *)&out; - sobj_stype *in_buffer = (sobj_stype *)∈ - size_t fobj_words = sizeof(out) / sizeof(fobj_stype); - size_t sobj_words = sizeof(in) / sizeof(sobj_stype); - assert(fobj_words == sobj_words); - - for (unsigned int word = 0; word < sobj_words; word++) - out_buffer[word] = in_buffer[word]; // type conversion on the fly - - } -}; - -template -struct BinarySimpleMunger { - typedef typename getPrecision::real_scalar_type fobj_stype; - typedef typename getPrecision::real_scalar_type sobj_stype; - - void operator()(fobj &in, sobj &out) { - // take word by word and transform accoding to the status - fobj_stype *in_buffer = (fobj_stype *)∈ - sobj_stype *out_buffer = (sobj_stype *)&out; - size_t fobj_words = sizeof(in) / sizeof(fobj_stype); - size_t sobj_words = sizeof(out) / sizeof(sobj_stype); - assert(fobj_words == sobj_words); - - for (unsigned int word = 0; word < sobj_words; word++) - out_buffer[word] = in_buffer[word]; // type conversion on the fly - - } -}; // A little helper inline void removeWhitespace(std::string &key) { @@ -126,11 +86,7 @@ class BinaryIO { // more byte manipulation helpers ///////////////////////////////////////////////////////////////////////////// - template static inline void Uint32Checksum(Lattice &lat, - uint32_t &nersc_csum, - uint32_t &scidac_csuma, - uint32_t &scidac_csumb) - + template static inline void Uint32Checksum(Lattice &lat,uint32_t &nersc_csum) { typedef typename vobj::scalar_object sobj; @@ -140,15 +96,38 @@ class BinaryIO { std::vector scalardata(lsites); unvectorizeToLexOrdArray(scalardata,lat); - Uint32Checksum(grid,scalardata,nersc_csum,scidac_csuma,scidac_csumb); + NerscChecksum(grid,scalardata,nersc_csum); } - template - static inline void Uint32Checksum(GridBase *grid, - std::vector &fbuf, - uint32_t &nersc_csum, - uint32_t &scidac_csuma, - uint32_t &scidac_csumb) + template static inline void NerscChecksum(GridBase *grid,std::vector &fbuf,uint32_t &nersc_csum) + { + const uint64_t size32 = sizeof(fobj)/sizeof(uint32_t); + + + uint64_t lsites =grid->lSites(); + if (fbuf.size()==1) { + lsites=1; + } + +#pragma omp parallel + { + uint32_t nersc_csum_thr=0; + +#pragma omp for + for(uint64_t local_site=0;local_site static inline void ScidacChecksum(GridBase *grid,std::vector &fbuf,uint32_t &scidac_csuma,uint32_t &scidac_csumb) { const uint64_t size32 = sizeof(fobj)/sizeof(uint32_t); @@ -156,6 +135,9 @@ class BinaryIO { int nd = grid->_ndimension; uint64_t lsites =grid->lSites(); + if (fbuf.size()==1) { + lsites=1; + } std::vector local_vol =grid->LocalDimensions(); std::vector local_start =grid->LocalStarts(); std::vector global_vol =grid->FullDimensions(); @@ -163,21 +145,15 @@ class BinaryIO { #pragma omp parallel { std::vector coor(nd); - uint32_t nersc_csum_thr=0; uint32_t scidac_csuma_thr=0; uint32_t scidac_csumb_thr=0; uint32_t site_crc=0; - uint32_t zcrc = crc32(0L, Z_NULL, 0); #pragma omp for for(uint64_t local_site=0;local_site>(32-gsite29); scidac_csumb_thr ^= site_crc<>(32-gsite31); } #pragma omp critical { - nersc_csum += nersc_csum_thr; scidac_csuma^= scidac_csuma_thr; scidac_csumb^= scidac_csumb_thr; } @@ -386,7 +363,8 @@ class BinaryIO { assert(0); #endif } else { - std::cout<< GridLogMessage<< "C++ read I/O "<< file<< std::endl; + std::cout<< GridLogMessage<< "C++ read I/O "<< file<<" : " + << iodata.size()*sizeof(fobj)<<" bytes"<Barrier(); bstimer.Start(); + ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb); if (ieee32big) be32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); if (ieee32) le32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); if (ieee64big) be64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); if (ieee64) le64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); - Uint32Checksum(grid,iodata,nersc_csum,scidac_csuma,scidac_csumb); + NerscChecksum(grid,iodata,nersc_csum); bstimer.Stop(); } if ( control & BINARYIO_WRITE ) { bstimer.Start(); - Uint32Checksum(grid,iodata,nersc_csum,scidac_csuma,scidac_csumb); + NerscChecksum(grid,iodata,nersc_csum); if (ieee32big) htobe32_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); if (ieee32) htole32_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); if (ieee64big) htobe64_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); if (ieee64) htole64_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb); bstimer.Stop(); grid->Barrier(); @@ -436,9 +416,9 @@ class BinaryIO { assert(0); #endif } else { - std::cout<< GridLogMessage<< "C++ write I/O "<< file<< std::endl; - std::ofstream fout; - fout.open(file,std::ios::binary|std::ios::out|std::ios::in); + std::ofstream fout; fout.open(file,std::ios::binary|std::ios::out|std::ios::in); + std::cout<< GridLogMessage<< "C++ write I/O "<< file<<" : " + << iodata.size()*sizeof(fobj)<<" bytes"<GlobalXOR(scidac_csuma); grid->GlobalXOR(scidac_csumb); grid->Barrier(); - // std::cout << "Binary IO NERSC checksum 0x"< Date: Sun, 18 Jun 2017 00:11:02 +0100 Subject: [PATCH 082/177] SciDAC I/O and ILDG improvements --- lib/parallelIO/IldgIO.h | 552 ++++++++++++++++++++++++++--------- lib/parallelIO/IldgIOtypes.h | 149 ++++++++-- 2 files changed, 551 insertions(+), 150 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index a6810b0d..9a1612d5 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -27,6 +27,7 @@ directory #ifndef GRID_ILDG_IO_H #define GRID_ILDG_IO_H +#ifdef HAVE_LIME #include #include #include @@ -37,31 +38,153 @@ directory #include #include -#ifdef HAVE_LIME - +//Lime is a must have for this functionality extern "C" { // for linkage #include "lime.h" } - -// Unused SCIDAC records names -// SCIDAC_PRIVATE_FILE_XML "scidac-private-file-xml" -// SCIDAC_SITELIST "scidac-sitelist" -// SCIDAC_FILE_XML "scidac-file-xml" -// SCIDAC_RIVATE_RECORD_XML "scidac-private-record-xml" -// SCIDAC_RECORD_XML "scidac-record-xml" -// SCIDAC_BINARY_DATA "scidac-binary-data" -// -// Scidac checksum: CRC32 every site, xor reduce some hash of this. -// https://github.com/usqcd-software/qio/blob/master/lib/dml/DML_utils.c - namespace Grid { namespace QCD { -class IldgIO : public BinaryIO { + template inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); } + template<> inline std::string ScidacWordMnemonic (void){ return std::string("D"); } + template<> inline std::string ScidacWordMnemonic (void){ return std::string("F"); } + template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); } + template<> inline std::string ScidacWordMnemonic(void){ return std::string("U32_t"); } + template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); } + template<> inline std::string ScidacWordMnemonic(void){ return std::string("U64_t"); } + + template std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) { + + typedef typename getPrecision::real_scalar_type stype; + + int _ColourN = indexRank(); + int _ColourScalar = isScalar(); + int _ColourVector = isVector(); + int _ColourMatrix = isMatrix(); + + int _SpinN = indexRank(); + int _SpinScalar = isScalar(); + int _SpinVector = isVector(); + int _SpinMatrix = isMatrix(); + + int _LorentzN = indexRank(); + int _LorentzScalar = isScalar(); + int _LorentzVector = isVector(); + int _LorentzMatrix = isMatrix(); + + std::stringstream stream; + + stream << "GRID_"; + stream << ScidacWordMnemonic(); + + // std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix< std::string ScidacRecordTypeString(Lattice & lat,int &colors, int &spins, int & typesize,int &datacount) { + return ScidacRecordTypeString(colors,spins,typesize,datacount); + }; + + template void ScidacMetaData(Lattice & field, + FieldMetaData &header, + scidacRecord & _scidacRecord, + scidacFile & _scidacFile) + { + typedef typename getPrecision::real_scalar_type stype; + + ///////////////////////////////////// + // Pull Grid's metadata + ///////////////////////////////////// + PrepareMetaData(field,header); + + ///////////////////////////////////// + // Scidac Private File structure + ///////////////////////////////////// + _scidacFile = scidacFile(field._grid); + + ///////////////////////////////////// + // Scidac Private Record structure + ///////////////////////////////////// + scidacRecord sr; + sr.datatype = ScidacRecordTypeString(field,sr.colors,sr.spins,sr.typesize,sr.datacount); + sr.date = header.creation_date; + sr.precision = ScidacWordMnemonic(); + sr.recordtype = GRID_IO_FIELD; + + _scidacRecord = sr; + + std::cout << GridLogMessage << "Build SciDAC datatype " < + int readObject(serialisable_object &object,std::string object_name,std::string record_name) + + int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize); + template + int writeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name) + template + int writeLimeLatticeBinaryObject(Lattice &field,std::string filename,std::string record_name) + */ + /////////////////////////////////////////////////////// + // Lime utility functions + /////////////////////////////////////////////////////// + + static int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize, LimeWriter* L) { LimeRecordHeader *h; h = limeCreateHeader(MB, ME, const_cast(message.c_str()), PayloadSize); @@ -70,6 +193,9 @@ class IldgIO : public BinaryIO { return LIME_SUCCESS; } + //////////////////////////////////////////// + // Write a generic serialisable object + //////////////////////////////////////////// template static void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, LimeWriter *LimeW) { @@ -81,24 +207,232 @@ class IldgIO : public BinaryIO { } uint64_t nbytes = xmlstring.size(); LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); - assert(limeWriteRecordHeader(h, LimeW)>=0); - assert(limeWriteRecordData(&xmlstring[0], &nbytes, LimeW)>=0); - limeWriterCloseRecord(LimeW); + int err=limeWriteRecordHeader(h, LimeW); assert(err>=0); + err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); + err=limeWriterCloseRecord(LimeW); assert(err>=0); limeDestroyHeader(h); } + //////////////////////////////////////////// + // Read a generic serialisable object + //////////////////////////////////////////// + template + static void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name, LimeReader *LimeR) + { + std::string xmlstring; + // should this be a do while; can we miss a first record?? + while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { - static unsigned int writeHeader(FieldMetaData &header, LimeWriter *LimeW) { + uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) + + if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) { + std::vector xmlc(nbytes+1,'\0'); + limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); + XmlReader RD(&xmlc[0],""); + read(RD,object_name,object); + return; + } + + } + assert(0); + } + + //////////////////////////////////////////// + // Read a generic lattice field and verify checksum + //////////////////////////////////////////// + template + static void readLimeLatticeBinaryObject(Lattice &field,std::string filename,std::string record_name,FILE *File, LimeReader *LimeR) + { + typedef typename vobj::scalar_object sobj; + scidacChecksum scidacChecksum_; + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + + std::string format = getFormatString(); + + while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { + + std::cout << GridLogMessage << limeReaderType(LimeR) < munge; + BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); + + ///////////////////////////////////////////// + // Insist checksum is next record + ///////////////////////////////////////////// + readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name,LimeR); + + ///////////////////////////////////////////// + // Verify checksums + ///////////////////////////////////////////// + scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); + return; + } + } + } + + //////////////////////////////////////////// + // Write a generic lattice field and csum + //////////////////////////////////////////// + template + static void writeLimeLatticeBinaryObject(Lattice &field,std::string filename,std::string record_name,FILE *File, LimeWriter *LimeW) + { + + //////////////////////////////////////////// + // Create record header + //////////////////////////////////////////// + typedef typename vobj::scalar_object sobj; + int err; + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites; + createLimeRecordHeader(record_name, 0, 0, PayloadSize, LimeW); + + //////////////////////////////////////////////////////////////////// + // NB: FILE and iostream are jointly writing disjoint sequences in the + // the same file through different file handles (integer units). + // + // These are both buffered, so why I think this code is right is as follows. + // + // i) write record header to FILE *File, telegraphing the size. + // ii) ftell reads the offset from FILE *File . + // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk. + // Closes iostream and flushes. + // iv) fseek on FILE * to end of this disjoint section. + // v) Continue writing scidac record. + //////////////////////////////////////////////////////////////////// + off_t offset = ftell(File); + std::string format = getFormatString(); + BinarySimpleMunger munge; + BinaryIO::writeLatticeObject(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); + err=limeWriterCloseRecord(LimeW); assert(err>=0); + //////////////////////////////////////// + // Write checksum element, propagaing forward from the BinaryIO + // Always pair a checksum with a binary object, and close message + //////////////////////////////////////// + scidacChecksum checksum; + std::stringstream streama; streama << std::hex << scidac_csuma; + std::stringstream streamb; streamb << std::hex << scidac_csumb; + checksum.suma= streama.str(); + checksum.sumb= streamb.str(); + std::cout << GridLogMessage<<" writing scidac checksums "< + int open(std::string filename,GridBase *grid,userFile &_userFile,int volfmt) { + + } + void close(void) { + + } + template + int writeScidacField(Lattice &field,userRecord &_userRecord,int volfmt) + template + int readScidacField(Lattice &field,userRecord &_userRecord,int volfmt) + */ + //////////////////////////////////////////////// + // Write generic lattice field in scidac format + //////////////////////////////////////////////// + template + static void writeScidacField(std::string filename,Lattice &field,userFile _userFile,userRecord _userRecord) + { + typedef typename vobj::scalar_object sobj; + uint64_t nbytes; + GridBase * grid = field._grid; + + //////////////////////////////////////// + // fill the Grid header + //////////////////////////////////////// + FieldMetaData header; + scidacRecord _scidacRecord; + scidacFile _scidacFile; + + ScidacMetaData(field,header,_scidacRecord,_scidacFile); + + ////////////////////////////////////////////// + // Fill the Lime file record by record + ////////////////////////////////////////////// + FILE *File = fopen(filename.c_str(), "w"); + LimeWriter *LimeW = limeCreateWriter(File); + assert(LimeW != NULL ); + + writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT),LimeW); // Open message + writeLimeObject(0,0,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML),LimeW); + writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML),LimeW); + writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML),LimeW); + writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML),LimeW); + writeLimeLatticeBinaryObject(field,filename,std::string(ILDG_BINARY_DATA),File,LimeW); // Closes message with checksum + + limeDestroyWriter(LimeW); + fclose(File); + } +}; + +class IldgIO : public ScidacIO { + public: + + /////////////////////////////////// + // A little helper + /////////////////////////////////// + static void writeLimeIldgLFN(std::string &LFN,LimeWriter *LimeW) + { + uint64_t PayloadSize = LFN.size(); + int err; + createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize, LimeW); + err=limeWriteRecordData(const_cast(LFN.c_str()), &PayloadSize, LimeW); assert(err>=0); + err=limeWriterCloseRecord(LimeW); assert(err>=0); + } + + //////////////////////////////////////////////////////////////// + // Special ILDG operations ; gauge configs only. + // Don't require scidac records EXCEPT checksum + // Use Grid MetaData object if present. + //////////////////////////////////////////////////////////////// + template + static void writeConfiguration(std::string filename,Lattice > &Umu) + { + GridBase * grid = Umu._grid; + typedef Lattice > GaugeField; + typedef iLorentzColourMatrix vobj; + typedef typename vobj::scalar_object sobj; uint64_t nbytes; - ildgFormat ildgfmt ; - usqcdInfo info; + //////////////////////////////////////// + // fill the Grid header + //////////////////////////////////////// + FieldMetaData header; + scidacRecord _scidacRecord; + scidacFile _scidacFile; + + ScidacMetaData(Umu,header,_scidacRecord,_scidacFile); + + std::string format = header.floating_point; + + assert ( (format == std::string("IEEE32BIG")) + ||(format == std::string("IEEE64BIG")) ); ////////////////////////////////////////////////////// // Fill ILDG header data struct ////////////////////////////////////////////////////// + ildgFormat ildgfmt ; ildgfmt.field = std::string("su3gauge"); - ildgfmt.precision = 64; + + if ( format == std::string("IEEE32BIG") ) { + ildgfmt.precision = 32; + } else { + ildgfmt.precision = 64; + } ildgfmt.version = 1.0; ildgfmt.lx = header.dimension[0]; ildgfmt.ly = header.dimension[1]; @@ -107,108 +441,59 @@ class IldgIO : public BinaryIO { assert(header.nd==4); assert(header.nd==header.dimension.size()); + ////////////////////////////////////////////////////////////////////////////// + // Fill the USQCD info field + ////////////////////////////////////////////////////////////////////////////// + usqcdInfo info; info.version=1.0; info.plaq = header.plaquette; info.linktr = header.link_trace; - // Following scidac file downloaded from NERSC under MILC - // Begin message, keep open on successive records - //Message 1 - // Type: scidac-private-file-xml 1.1416 16 16 48 0 - // Type: scidac-file-xml MILC ILDG archival gauge configuration - //Message 2 - // Type: scidac-private-record-xml 1.0Thu May 11 00:11:33 2006 UTC0 - // QDP_F3_ColorMatrixF3724 - // Type: scidac-record-xml - // Type: ildg-format - // Type: ildg-data-lfn - // Type: ildg-binary-data - // Type: scidac-checksum - - writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT),LimeW); - writeLimeObject(0,0,info ,std::string("usqcdInfo" ),std::string(USQCD_INFO ),LimeW); - writeLimeObject(0,0,ildgfmt,std::string("ildgFormat") ,std::string(ILDG_FORMAT),LimeW); - // LFN is not a serializable object - { - std::string LFN = header.ildg_lfn; - uint64_t PayloadSize = LFN.size(); - createHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize, LimeW); - limeWriteRecordData(const_cast(LFN.c_str()), &PayloadSize, LimeW); - limeWriterCloseRecord(LimeW); - } - return 0; - } - - template - static void writeConfiguration(std::string filename,Lattice > &Umu, std::string format) { + std::cout << GridLogMessage << " Writing config; IldgIO "< > GaugeField; - typedef iLorentzColourMatrix vobj; - typedef typename vobj::scalar_object sobj; - typedef LorentzColourMatrixD fobj; - - GridBase * grid = Umu._grid; - - //////////////////////////////////////// - // fill the headers - //////////////////////////////////////// - FieldMetaData header; - - GridMetaData(grid,header); - GaugeStatistics(Umu,header); - MachineCharacteristics(header); - - assert( (format=="IEEE64BIG") || (format=="IEEE32BIG")); - header.floating_point = format; - header.checksum = 0x0; // unused in ILDG - writeHeader(header,LimeW); - - //////////////////////////////////////// - // Write data record header - //////////////////////////////////////// - uint64_t PayloadSize = sizeof(fobj) * Umu._grid->_gsites; - createHeader(ILDG_BINARY_DATA, 0, 0, PayloadSize, LimeW); - - off_t offset = ftell(File); - uint32_t nersc_csum,scidac_csuma,scidac_csumb; - GaugeSimpleMunger munge; - BinaryIO::writeLatticeObject(Umu, filename, munge, offset, header.floating_point, - nersc_csum,scidac_csuma,scidac_csumb); - limeWriterCloseRecord(LimeW); - - //////////////////////////////////////// - // Write checksum element, propagaing forward from the BinaryIO - //////////////////////////////////////// - scidacChecksum checksum; - checksum.suma= scidac_csuma; - checksum.sumb= scidac_csumb; - // std::cout << " writing scidac checksums "< static void readConfiguration(std::string filename,Lattice > &Umu, FieldMetaData &FieldMetaData_) { typedef Lattice > GaugeField; - typedef LorentzColourMatrixD sobjd; - typedef LorentzColourMatrixF sobjf; - typedef iLorentzColourMatrix itype; - typedef LorentzColourMatrix sobj; + typedef typename GaugeField::vector_object vobj; + typedef typename vobj::scalar_object sobj; + + typedef LorentzColourMatrixF fobj; + typedef LorentzColourMatrixD dobj; GridBase *grid = Umu._grid; std::vector dims = Umu._grid->FullDimensions(); + assert(dims.size()==4); FILE *File = fopen(filename.c_str(), "r"); LimeReader *LimeR = limeCreateReader(File); - // Metadata holders ildgFormat ildgFormat_ ; std::string ildgLFN_ ; @@ -263,8 +548,6 @@ class IldgIO : public BinaryIO { if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG"); if ( ildgFormat_.precision == 32 ) format = std::string("IEEE32BIG"); - // std::cout << "This is an ILDG format record : "< munge; - BinaryIO::readLatticeObject< itype, sobjd >(Umu, filename, munge, offset, format, - nersc_csum,scidac_csuma,scidac_csumb); + + if ( format == std::string("IEEE64BIG") ) { + GaugeSimpleMunger munge; + BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); + } else { + GaugeSimpleMunger munge; + BinaryIO::readLatticeObject< vobj, fobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); + } + found_ildgBinary = 1; } @@ -328,8 +610,10 @@ class IldgIO : public BinaryIO { ////////////////////////////////////////////////////// // Minimally must find binary segment and checksum + // Since this is an ILDG reader require ILDG format ////////////////////////////////////////////////////// assert(found_ildgBinary); + assert(found_ildgFormat); assert(found_scidacChecksum); // Must find something with the lattice dimensions @@ -337,9 +621,7 @@ class IldgIO : public BinaryIO { if ( found_FieldMetaData ) { - std::cout << GridLogMessage<<"a Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<(Umu,checker); + GaugeStatistics(Umu,checker); assert(fabs(checker.plaquette - FieldMetaData_.plaquette )<1.0e-5); assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5); std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl; } } + }; - // format for RNG? Now just binary out -}; -} -} +}} //HAVE_LIME #endif diff --git a/lib/parallelIO/IldgIOtypes.h b/lib/parallelIO/IldgIOtypes.h index 8e1316eb..c3a5321c 100644 --- a/lib/parallelIO/IldgIOtypes.h +++ b/lib/parallelIO/IldgIOtypes.h @@ -34,16 +34,110 @@ extern "C" { // for linkage namespace Grid { +///////////////////////////////////////////////////////////////////////////////// +// Data representation of records that enter ILDG and SciDac formats +///////////////////////////////////////////////////////////////////////////////// + #define GRID_FORMAT "grid-format" #define ILDG_FORMAT "ildg-format" #define ILDG_BINARY_DATA "ildg-binary-data" #define ILDG_DATA_LFN "ildg-data-lfn" -#define USQCD_INFO "usqcdInfo" -#define SCIDAC_CHECKSUM "scidac-checksum" +#define SCIDAC_CHECKSUM "scidac-checksum" +#define SCIDAC_PRIVATE_FILE_XML "scidac-private-file-xml" +#define SCIDAC_FILE_XML "scidac-file-xml" +#define SCIDAC_PRIVATE_RECORD_XML "scidac-private-record-xml" +#define SCIDAC_RECORD_XML "scidac-record-xml" +#define SCIDAC_BINARY_DATA "scidac-binary-data" +// Unused SCIDAC records names; could move to support this functionality +#define SCIDAC_SITELIST "scidac-sitelist" + + //////////////////////////////////////////////////////////// + const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat + const int GRID_IO_MULTIFILE = 1; // hardcode lift from QIO compat + const int GRID_IO_FIELD = 0; // hardcode lift from QIO compat + const int GRID_IO_GLOBAL = 1; // hardcode lift from QIO compat + //////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////// -// Data representation of records that enter ILDG and SciDac formats +// QIO uses mandatory "private" records fixed format +// Private is in principle "opaque" however it can't be changed now because that would break existing +// file compatability, so should be correct to assume the undocumented but defacto file structure. ///////////////////////////////////////////////////////////////////////////////// + +//////////////////////// +// Scidac private file xml +// 1.1416 16 16 32 0 +//////////////////////// +struct scidacFile : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile, + double, version, + int, spacetime, + std::string, dims, // must convert to int + int, volfmt); + + std::vector getDimensions(void) { + std::stringstream stream(dims); + std::vector dimensions; + int n; + while(stream >> n){ + dimensions.push_back(n); + } + return dimensions; + } + + void setDimensions(std::vector dimensions) { + char delimiter = ' '; + std::stringstream stream; + for(int i=0;i_ndimension; + setDimensions(grid->FullDimensions()); + volfmt = GRID_IO_SINGLEFILE; + } + +}; + +/////////////////////////////////////////////////////////////////////// +// scidac-private-record-xml : example +// +// 1.1Tue Jul 26 21:14:44 2011 UTC0 +// QDP_D3_ColorMatrixD34 +// 1444 +// +/////////////////////////////////////////////////////////////////////// + +struct scidacRecord : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(scidacRecord, + double, version, + std::string, date, + int, recordtype, + std::string, datatype, + std::string, precision, + int, colors, + int, spins, + int, typesize, + int, datacount); + + scidacRecord() { version =1.0; } + +}; + +//////////////////////// +// ILDG format +//////////////////////// struct ildgFormat : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat, @@ -54,10 +148,11 @@ public: int, ly, int, lz, int, lt); - ildgFormat() { - version=1.0; - }; + ildgFormat() { version=1.0; }; }; +//////////////////////// +// USQCD info +//////////////////////// struct usqcdInfo : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo, @@ -69,7 +164,36 @@ struct usqcdInfo : Serializable { version=1.0; }; }; +//////////////////////// +// Scidac Checksum +//////////////////////// +struct scidacChecksum : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum, + double, version, + std::string, suma, + std::string, sumb); + scidacChecksum() { + version=1.0; + }; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Type: scidac-file-xml MILC ILDG archival gauge configuration +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Type: +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////// +// Scidac private file xml +// 1.1416 16 16 32 0 +//////////////////////// + +#if 0 +//////////////////////////////////////////////////////////////////////////////////////// +// From http://www.physics.utah.edu/~detar/scidac/qio_2p3.pdf +//////////////////////////////////////////////////////////////////////////////////////// struct usqcdPropFile : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile, @@ -100,17 +224,8 @@ struct usqcdPropInfo : Serializable { version=1.0; }; }; -struct scidacChecksum : Serializable { - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum, - double, version, - uint32_t, suma, - uint32_t, sumb); - scidacChecksum() { - version=1.0; - suma=sumb=0; - }; -}; +#endif + } #endif #endif From ae4de947989d1c9299b7dbeb8c1a570f745a84d7 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Jun 2017 00:11:23 +0100 Subject: [PATCH 083/177] SciDAC I/O support --- lib/parallelIO/MetaData.h | 124 ++++++++++++++++++++++++++++++++++---- lib/parallelIO/NerscIO.h | 4 +- 2 files changed, 114 insertions(+), 14 deletions(-) diff --git a/lib/parallelIO/MetaData.h b/lib/parallelIO/MetaData.h index 1bad07f2..6d45d0a5 100644 --- a/lib/parallelIO/MetaData.h +++ b/lib/parallelIO/MetaData.h @@ -38,9 +38,24 @@ namespace Grid { - //////////////////////////////////////////////////////////////////////////////// - // header specification/interpretation - //////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////// + // Precision mapping + /////////////////////////////////////////////////////// + template static std::string getFormatString (void) + { + std::string format; + typedef typename getPrecision::real_scalar_type stype; + if ( sizeof(stype) == sizeof(float) ) { + format = std::string("IEEE32BIG"); + } + if ( sizeof(stype) == sizeof(double) ) { + format = std::string("IEEE64BIG"); + } + return format; + } + //////////////////////////////////////////////////////////////////////////////// + // header specification/interpretation + //////////////////////////////////////////////////////////////////////////////// class FieldMetaData : Serializable { public: @@ -66,8 +81,15 @@ namespace Grid { std::string, creation_date, std::string, archive_date, std::string, floating_point); + FieldMetaData(void) { + nd=4; + dimension.resize(4); + boundary.resize(4); + } }; + + namespace QCD { using namespace Grid; @@ -89,13 +111,6 @@ namespace Grid { header.boundary[d] = std::string("PERIODIC"); } } - template - inline void GaugeStatistics(GaugeField & data,FieldMetaData &header) - { - // How to convert data precision etc... - header.link_trace=Grid::QCD::WilsonLoops::linkTrace(data); - header.plaquette =Grid::QCD::WilsonLoops::avgPlaquette(data); - } inline void MachineCharacteristics(FieldMetaData &header) { @@ -133,7 +148,7 @@ namespace Grid { s << "BOUNDARY_"< inline void PrepareMetaData(Lattice & field, FieldMetaData &header) +{ + GridBase *grid = field._grid; + std::string format = getFormatString(); + header.floating_point = format; + header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac + GridMetaData(grid,header); + MachineCharacteristics(header); + } + inline void GaugeStatistics(Lattice & data,FieldMetaData &header) + { + // How to convert data precision etc... + header.link_trace=Grid::QCD::WilsonLoops::linkTrace(data); + header.plaquette =Grid::QCD::WilsonLoops::avgPlaquette(data); + } + inline void GaugeStatistics(Lattice & data,FieldMetaData &header) + { + // How to convert data precision etc... + header.link_trace=Grid::QCD::WilsonLoops::linkTrace(data); + header.plaquette =Grid::QCD::WilsonLoops::avgPlaquette(data); + } + template<> inline void PrepareMetaData(Lattice & field, FieldMetaData &header) + { + + GridBase *grid = field._grid; + std::string format = getFormatString(); + header.floating_point = format; + header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac + GridMetaData(grid,header); + GaugeStatistics(field,header); + MachineCharacteristics(header); + } + template<> inline void PrepareMetaData(Lattice & field, FieldMetaData &header) + { + GridBase *grid = field._grid; + std::string format = getFormatString(); + header.floating_point = format; + header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac + GridMetaData(grid,header); + GaugeStatistics(field,header); + MachineCharacteristics(header); + } ////////////////////////////////////////////////////////////////////// // Utilities ; these are QCD aware @@ -171,6 +228,48 @@ namespace Grid { typedef iLorentzColour2x3 LorentzColour2x3F; typedef iLorentzColour2x3 LorentzColour2x3D; +///////////////////////////////////////////////////////////////////////////////// +// Simple classes for precision conversion +///////////////////////////////////////////////////////////////////////////////// +template +struct BinarySimpleUnmunger { + typedef typename getPrecision::real_scalar_type fobj_stype; + typedef typename getPrecision::real_scalar_type sobj_stype; + + void operator()(sobj &in, fobj &out) { + // take word by word and transform accoding to the status + fobj_stype *out_buffer = (fobj_stype *)&out; + sobj_stype *in_buffer = (sobj_stype *)∈ + size_t fobj_words = sizeof(out) / sizeof(fobj_stype); + size_t sobj_words = sizeof(in) / sizeof(sobj_stype); + assert(fobj_words == sobj_words); + + for (unsigned int word = 0; word < sobj_words; word++) + out_buffer[word] = in_buffer[word]; // type conversion on the fly + + } +}; + +template +struct BinarySimpleMunger { + typedef typename getPrecision::real_scalar_type fobj_stype; + typedef typename getPrecision::real_scalar_type sobj_stype; + + void operator()(fobj &in, sobj &out) { + // take word by word and transform accoding to the status + fobj_stype *in_buffer = (fobj_stype *)∈ + sobj_stype *out_buffer = (sobj_stype *)&out; + size_t fobj_words = sizeof(in) / sizeof(fobj_stype); + size_t sobj_words = sizeof(out) / sizeof(sobj_stype); + assert(fobj_words == sobj_words); + + for (unsigned int word = 0; word < sobj_words; word++) + out_buffer[word] = in_buffer[word]; // type conversion on the fly + + } +}; + + template struct GaugeSimpleMunger{ void operator()(fobj &in, sobj &out) { @@ -220,6 +319,7 @@ namespace Grid { } } }; - } + + } diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index cc37b537..786839f2 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -179,7 +179,7 @@ namespace Grid { assert(0); } - GaugeStatistics(Umu,clone); + GaugeStatistics(Umu,clone); std::cout<(Umu,header); + GaugeStatistics(Umu,header); MachineCharacteristics(header); int offset; From 46879e165814015c8d82195771573df01a1edd66 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Jun 2017 00:11:45 +0100 Subject: [PATCH 084/177] Complex defined in Impl even for gauge. --- lib/qcd/action/fermion/FermionOperatorImpl.h | 6 ------ lib/qcd/action/gauge/GaugeImplTypes.h | 16 +++++++++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 20458b6d..524179f5 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -644,19 +644,16 @@ class StaggeredImpl : public PeriodicGaugeImpl using iImplScalar = iScalar > >; template using iImplSpinor = iScalar > >; template using iImplHalfSpinor = iScalar > >; template using iImplDoubledGaugeField = iVector >, Nds>; template using iImplPropagator = iScalar > >; - typedef iImplScalar SiteComplex; typedef iImplSpinor SiteSpinor; typedef iImplHalfSpinor SiteHalfSpinor; typedef iImplDoubledGaugeField SiteDoubledGaugeField; typedef iImplPropagator SitePropagator; - typedef Lattice ComplexField; typedef Lattice FermionField; typedef Lattice DoubledGaugeField; typedef Lattice PropagatorField; @@ -775,7 +772,6 @@ class StaggeredImpl : public PeriodicGaugeImpl using iImplScalar = iScalar > >; template using iImplSpinor = iScalar > >; template using iImplHalfSpinor = iScalar > >; template using iImplDoubledGaugeField = iVector >, Nds>; @@ -792,12 +788,10 @@ class StaggeredImpl : public PeriodicGaugeImpl DoubledGaugeField; typedef Lattice PropagatorField; - typedef iImplScalar SiteComplex; typedef iImplSpinor SiteSpinor; typedef iImplHalfSpinor SiteHalfSpinor; - typedef Lattice ComplexField; typedef Lattice FermionField; typedef SimpleCompressor Compressor; diff --git a/lib/qcd/action/gauge/GaugeImplTypes.h b/lib/qcd/action/gauge/GaugeImplTypes.h index 9d36eead..0c0df219 100644 --- a/lib/qcd/action/gauge/GaugeImplTypes.h +++ b/lib/qcd/action/gauge/GaugeImplTypes.h @@ -40,12 +40,15 @@ namespace QCD { typedef typename GImpl::Simd Simd; \ typedef typename GImpl::LinkField GaugeLinkField; \ typedef typename GImpl::Field GaugeField; \ + typedef typename GImpl::ComplexField ComplexField;\ typedef typename GImpl::SiteField SiteGaugeField; \ + typedef typename GImpl::SiteComplex SiteComplex; \ typedef typename GImpl::SiteLink SiteGaugeLink; -#define INHERIT_FIELD_TYPES(Impl) \ - typedef typename Impl::Simd Simd; \ - typedef typename Impl::SiteField SiteField; \ +#define INHERIT_FIELD_TYPES(Impl) \ + typedef typename Impl::Simd Simd; \ + typedef typename Impl::ComplexField ComplexField; \ + typedef typename Impl::SiteField SiteField; \ typedef typename Impl::Field Field; // hardcodes the exponential approximation in the template @@ -53,12 +56,15 @@ template class GaugeImplType public: typedef S Simd; - template using iImplGaugeLink = iScalar>>; - template using iImplGaugeField = iVector>, Nd>; + template using iImplScalar = iScalar > >; + template using iImplGaugeLink = iScalar > >; + template using iImplGaugeField = iVector >, Nd>; + typedef iImplScalar SiteComplex; typedef iImplGaugeLink SiteLink; typedef iImplGaugeField SiteField; + typedef Lattice ComplexField; typedef Lattice LinkField; typedef Lattice Field; From b96daf53a0c060c530eee3769861133d764589cf Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Jun 2017 00:12:15 +0100 Subject: [PATCH 085/177] Query tensor structures --- lib/tensors/Tensor_index.h | 60 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/lib/tensors/Tensor_index.h b/lib/tensors/Tensor_index.h index 7f34f3ac..f114baf8 100644 --- a/lib/tensors/Tensor_index.h +++ b/lib/tensors/Tensor_index.h @@ -47,6 +47,28 @@ template class TensorIndexRecursion { public: + + //////////////////////////////////////////////////// + // Type Queries + //////////////////////////////////////////////////// + template static inline int indexRank(const iScalar tmp) { return TensorIndexRecursion::indexRank(tmp._internal); } + template static inline int indexRank(const iVector tmp){ return TensorIndexRecursion::indexRank(tmp._internal[0]); } + template static inline int indexRank(const iMatrix tmp){ return TensorIndexRecursion::indexRank(tmp._internal[0][0]); } + + template static inline int isScalar(const iScalar tmp) { return TensorIndexRecursion::isScalar(tmp._internal); } + template static inline int isScalar(const iVector tmp){ return TensorIndexRecursion::isScalar(tmp._internal[0]); } + template static inline int isScalar(const iMatrix tmp){ return TensorIndexRecursion::isScalar(tmp._internal[0][0]); } + + template static inline int isVector(const iScalar tmp) { return TensorIndexRecursion::isVector(tmp._internal); } + template static inline int isVector(const iVector tmp){ return TensorIndexRecursion::isVector(tmp._internal[0]); } + template static inline int isVector(const iMatrix tmp){ return TensorIndexRecursion::isVector(tmp._internal[0][0]); } + + template static inline int isMatrix(const iScalar tmp) { return TensorIndexRecursion::isMatrix(tmp._internal); } + template static inline int isMatrix(const iVector tmp){ return TensorIndexRecursion::isMatrix(tmp._internal[0]); } + template static inline int isMatrix(const iMatrix tmp){ return TensorIndexRecursion::isMatrix(tmp._internal[0][0]); } + //////////////////////////////////////////////////// + // Trace + //////////////////////////////////////////////////// template static auto traceIndex(const iScalar arg) -> iScalar::traceIndex(arg._internal))> { @@ -215,6 +237,24 @@ class TensorIndexRecursion { template<> class TensorIndexRecursion<0> { public: + //////////////////////////////////////////////////// + // Type Queries + //////////////////////////////////////////////////// + template static inline int indexRank(const iScalar tmp) { return 1; } + template static inline int indexRank(const iVector tmp){ return N; } + template static inline int indexRank(const iMatrix tmp){ return N; } + + template static inline int isScalar(const iScalar tmp) { return true;} + template static inline int isScalar(const iVector tmp){ return false;} + template static inline int isScalar(const iMatrix tmp){ return false;} + + template static inline int isVector(const iScalar tmp) { return false;} + template static inline int isVector(const iVector tmp){ return true;} + template static inline int isVector(const iMatrix tmp){ return false;} + + template static inline int isMatrix(const iScalar tmp) { return false;} + template static inline int isMatrix(const iVector tmp){ return false;} + template static inline int isMatrix(const iMatrix tmp){ return true;} ///////////////////////////////////////// // Ends recursion for trace (scalar/vector/matrix) @@ -302,6 +342,26 @@ class TensorIndexRecursion<0> { //////////////////////////////////////////////////////////////////////////////////////////////////////// // External wrappers //////////////////////////////////////////////////////////////////////////////////////////////////////// +template inline int indexRank(void) +{ + vtype tmp; + return TensorIndexRecursion::indexRank(tmp); +} +template inline int isScalar(void) +{ + vtype tmp; + return TensorIndexRecursion::isScalar(tmp); +} +template inline int isVector(void) +{ + vtype tmp; + return TensorIndexRecursion::isVector(tmp); +} +template inline int isMatrix(void) +{ + vtype tmp; + return TensorIndexRecursion::isMatrix(tmp); +} template inline auto traceIndex (const vtype &arg) -> RemoveCRV(TensorIndexRecursion::traceIndex(arg)) { From ae39ec85a3b89072d9ea325cb953068a064ec822 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Jun 2017 00:12:48 +0100 Subject: [PATCH 086/177] ComplexField defined --- lib/qcd/utils/WilsonLoops.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index 5382882e..ea713ec8 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -73,7 +73,7 @@ public: ////////////////////////////////////////////////// // trace of directed plaquette oriented in mu,nu plane ////////////////////////////////////////////////// - static void traceDirPlaquette(LatticeComplex &plaq, + static void traceDirPlaquette(ComplexField &plaq, const std::vector &U, const int mu, const int nu) { GaugeMat sp(U[0]._grid); @@ -83,9 +83,9 @@ public: ////////////////////////////////////////////////// // sum over all planes of plaquette ////////////////////////////////////////////////// - static void sitePlaquette(LatticeComplex &Plaq, + static void sitePlaquette(ComplexField &Plaq, const std::vector &U) { - LatticeComplex sitePlaq(U[0]._grid); + ComplexField sitePlaq(U[0]._grid); Plaq = zero; for (int mu = 1; mu < Nd; mu++) { for (int nu = 0; nu < mu; nu++) { @@ -104,11 +104,11 @@ public: U[mu] = PeekIndex(Umu, mu); } - LatticeComplex Plaq(Umu._grid); + ComplexField Plaq(Umu._grid); sitePlaquette(Plaq, U); - TComplex Tp = sum(Plaq); - Complex p = TensorRemove(Tp); + auto Tp = sum(Plaq); + auto p = TensorRemove(Tp); return p.real(); } @@ -129,15 +129,15 @@ public: static RealD linkTrace(const GaugeLorentz &Umu) { std::vector U(Nd, Umu._grid); - LatticeComplex Tr(Umu._grid); + ComplexField Tr(Umu._grid); Tr = zero; for (int mu = 0; mu < Nd; mu++) { U[mu] = PeekIndex(Umu, mu); Tr = Tr + trace(U[mu]); } - TComplex Tp = sum(Tr); - Complex p = TensorRemove(Tp); + auto Tp = sum(Tr); + auto p = TensorRemove(Tp); double vol = Umu._grid->gSites(); @@ -330,8 +330,8 @@ public: double coeff = 8.0/(32.0*M_PI*M_PI); - LatticeComplex qfield = coeff*trace(Bx*Ex + By*Ey + Bz*Ez); - TComplex Tq = sum(qfield); + ComplexField qfield = coeff*trace(Bx*Ex + By*Ey + Bz*Ez); + auto Tq = sum(qfield); return TensorRemove(Tq).real(); } @@ -350,16 +350,16 @@ public: adj(Gimpl::CovShiftForward( U[nu], nu, Gimpl::CovShiftForward(U[nu], nu, U[mu]))); } - static void traceDirRectangle(LatticeComplex &rect, + static void traceDirRectangle(ComplexField &rect, const std::vector &U, const int mu, const int nu) { GaugeMat sp(U[0]._grid); dirRectangle(sp, U, mu, nu); rect = trace(sp); } - static void siteRectangle(LatticeComplex &Rect, + static void siteRectangle(ComplexField &Rect, const std::vector &U) { - LatticeComplex siteRect(U[0]._grid); + ComplexField siteRect(U[0]._grid); Rect = zero; for (int mu = 1; mu < Nd; mu++) { for (int nu = 0; nu < mu; nu++) { @@ -379,12 +379,12 @@ public: U[mu] = PeekIndex(Umu, mu); } - LatticeComplex Rect(Umu._grid); + ComplexField Rect(Umu._grid); siteRectangle(Rect, U); - TComplex Tp = sum(Rect); - Complex p = TensorRemove(Tp); + auto Tp = sum(Rect); + auto p = TensorRemove(Tp); return p.real(); } ////////////////////////////////////////////////// From 1d18d95d4f1457e2f37f0237db79873a346873df Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Jun 2017 00:13:03 +0100 Subject: [PATCH 087/177] Class name return --- lib/serialisation/MacroMagic.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/serialisation/MacroMagic.h b/lib/serialisation/MacroMagic.h index 04f1b401..774c947f 100644 --- a/lib/serialisation/MacroMagic.h +++ b/lib/serialisation/MacroMagic.h @@ -115,6 +115,7 @@ THE SOFTWARE. #define GRID_MACRO_WRITE_MEMBER(A,B) Grid::write(WR,#B,obj. B); #define GRID_SERIALIZABLE_CLASS_MEMBERS(cname,...)\ + std::string SerialisableClassName(void) {return std::string(#cname);} \ GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\ template \ static inline void write(Writer &WR,const std::string &s, const cname &obj){ \ From e6d984b484f9679bf1240414b2df239bc888e595 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Jun 2017 00:13:22 +0100 Subject: [PATCH 088/177] ILDG tests --- tests/IO/Test_ildg_io.cc | 2 +- tests/IO/Test_ildg_read.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/IO/Test_ildg_io.cc b/tests/IO/Test_ildg_io.cc index 1408c638..199773ab 100644 --- a/tests/IO/Test_ildg_io.cc +++ b/tests/IO/Test_ildg_io.cc @@ -78,7 +78,7 @@ int main (int argc, char ** argv) std::cout < U(4,&Fine); FieldMetaData header; - std::string file("./ckpoint_lat"); + std::string file("./ildg.file"); IldgIO::readConfiguration(file,Umu,header); for(int mu=0;mu Date: Mon, 19 Jun 2017 01:01:48 +0100 Subject: [PATCH 089/177] Update to enable multiple records per file more consistent with SciDAC. open, close, write records... --- lib/parallelIO/IldgIO.h | 285 ++++++++++--------- lib/qcd/hmc/checkpointers/ILDGCheckpointer.h | 10 +- tests/IO/Test_ildg_io.cc | 10 +- tests/IO/Test_ildg_read.cc | 5 +- 4 files changed, 173 insertions(+), 137 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index 9a1612d5..1d1b5e0c 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -38,14 +38,17 @@ directory #include #include -//Lime is a must have for this functionality -extern "C" { // for linkage +//C-Lime is a must have for this functionality +extern "C" { #include "lime.h" } namespace Grid { namespace QCD { + ///////////////////////////////// + // Encode word types as strings + ///////////////////////////////// template inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); } template<> inline std::string ScidacWordMnemonic (void){ return std::string("D"); } template<> inline std::string ScidacWordMnemonic (void){ return std::string("F"); } @@ -54,6 +57,9 @@ namespace QCD { template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); } template<> inline std::string ScidacWordMnemonic(void){ return std::string("U64_t"); } + ///////////////////////////////////////// + // Encode a generic tensor as a string + ///////////////////////////////////////// template std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) { typedef typename getPrecision::real_scalar_type stype; @@ -113,6 +119,10 @@ namespace QCD { return ScidacRecordTypeString(colors,spins,typesize,datacount); }; + + //////////////////////////////////////////////////////////// + // Helper to fill out metadata + //////////////////////////////////////////////////////////// template void ScidacMetaData(Lattice & field, FieldMetaData &header, scidacRecord & _scidacRecord, @@ -159,88 +169,38 @@ namespace QCD { //////////////////////////////////////////////////////////////////////////////////// // Lime, ILDG and Scidac I/O classes //////////////////////////////////////////////////////////////////////////////////// -class LimeIO : public BinaryIO { +class GridLimeReader : public BinaryIO { public: - /////////////////////////////////////////////////// // FIXME: format for RNG? Now just binary out instead - // FIXME: Make interface able to write multiple records - // FIXME: Split into LimeReader and LimeWriter /////////////////////////////////////////////////// - /* - FILE * File; - LimeWriter LimeW; - LimeReader LimeR; - template - int readObject(serialisable_object &object,std::string object_name,std::string record_name) - int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize); - template - int writeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name) - template - int writeLimeLatticeBinaryObject(Lattice &field,std::string filename,std::string record_name) - */ - /////////////////////////////////////////////////////// - // Lime utility functions - /////////////////////////////////////////////////////// + FILE *File; + LimeReader *LimeR; + std::string filename; - static int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize, LimeWriter* L) - { - LimeRecordHeader *h; - h = limeCreateHeader(MB, ME, const_cast(message.c_str()), PayloadSize); - assert(limeWriteRecordHeader(h, L) >= 0); - limeDestroyHeader(h); - return LIME_SUCCESS; - } - - //////////////////////////////////////////// - // Write a generic serialisable object - //////////////////////////////////////////// - template - static void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, LimeWriter *LimeW) - { - std::string xmlstring; - { - XmlWriter WR("",""); - write(WR,object_name,object); - xmlstring = WR.XmlString(); - } - uint64_t nbytes = xmlstring.size(); - LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); - int err=limeWriteRecordHeader(h, LimeW); assert(err>=0); - err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); - err=limeWriterCloseRecord(LimeW); assert(err>=0); - limeDestroyHeader(h); - } - //////////////////////////////////////////// - // Read a generic serialisable object - //////////////////////////////////////////// - template - static void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name, LimeReader *LimeR) - { - std::string xmlstring; - // should this be a do while; can we miss a first record?? - while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { - - uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) - - if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) { - std::vector xmlc(nbytes+1,'\0'); - limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); - XmlReader RD(&xmlc[0],""); - read(RD,object_name,object); - return; - } - - } - assert(0); - } + ///////////////////////////////////////////// + // Open the file + ///////////////////////////////////////////// + void open(std::string &_filename) + { + filename= _filename; + File = fopen(filename.c_str(), "r"); + LimeR = limeCreateReader(File); + } + ///////////////////////////////////////////// + // Close the file + ///////////////////////////////////////////// + void close(void){ + fclose(File); + // limeDestroyReader(LimeR); + } //////////////////////////////////////////// // Read a generic lattice field and verify checksum //////////////////////////////////////////// template - static void readLimeLatticeBinaryObject(Lattice &field,std::string filename,std::string record_name,FILE *File, LimeReader *LimeR) + void readLimeLatticeBinaryObject(Lattice &field,std::string record_name) { typedef typename vobj::scalar_object sobj; scidacChecksum scidacChecksum_; @@ -262,7 +222,7 @@ class LimeIO : public BinaryIO { ///////////////////////////////////////////// // Insist checksum is next record ///////////////////////////////////////////// - readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name,LimeR); + readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name); ///////////////////////////////////////////// // Verify checksums @@ -272,14 +232,91 @@ class LimeIO : public BinaryIO { } } } + //////////////////////////////////////////// + // Read a generic serialisable object + //////////////////////////////////////////// + template + void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name) + { + std::string xmlstring; + // should this be a do while; can we miss a first record?? + while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { + uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) + + if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) { + std::vector xmlc(nbytes+1,'\0'); + limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); + XmlReader RD(&xmlc[0],""); + read(RD,object_name,object); + return; + } + + } + assert(0); + } +}; + +class GridLimeWriter : public BinaryIO { + public: + /////////////////////////////////////////////////// + // FIXME: format for RNG? Now just binary out instead + /////////////////////////////////////////////////// + + FILE *File; + LimeWriter *LimeW; + std::string filename; + + void open(std::string &_filename) { + filename= _filename; + File = fopen(filename.c_str(), "w"); + LimeW = limeCreateWriter(File); assert(LimeW != NULL ); + } + ///////////////////////////////////////////// + // Close the file + ///////////////////////////////////////////// + void close(void) { + fclose(File); + // limeDestroyWriter(LimeW); + } + /////////////////////////////////////////////////////// + // Lime utility functions + /////////////////////////////////////////////////////// + int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize) + { + LimeRecordHeader *h; + h = limeCreateHeader(MB, ME, const_cast(message.c_str()), PayloadSize); + assert(limeWriteRecordHeader(h, LimeW) >= 0); + limeDestroyHeader(h); + return LIME_SUCCESS; + } + //////////////////////////////////////////// + // Write a generic serialisable object + //////////////////////////////////////////// + template + void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name) + { + std::string xmlstring; + { + XmlWriter WR("",""); + write(WR,object_name,object); + xmlstring = WR.XmlString(); + } + uint64_t nbytes = xmlstring.size(); + int err; + LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); assert(h!= NULL); + + err=limeWriteRecordHeader(h, LimeW); assert(err>=0); + err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); + err=limeWriterCloseRecord(LimeW); assert(err>=0); + limeDestroyHeader(h); + } //////////////////////////////////////////// // Write a generic lattice field and csum //////////////////////////////////////////// template - static void writeLimeLatticeBinaryObject(Lattice &field,std::string filename,std::string record_name,FILE *File, LimeWriter *LimeW) + void writeLimeLatticeBinaryObject(Lattice &field,std::string record_name) { - //////////////////////////////////////////// // Create record header //////////////////////////////////////////// @@ -287,7 +324,7 @@ class LimeIO : public BinaryIO { int err; uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites; - createLimeRecordHeader(record_name, 0, 0, PayloadSize, LimeW); + createLimeRecordHeader(record_name, 0, 0, PayloadSize); //////////////////////////////////////////////////////////////////// // NB: FILE and iostream are jointly writing disjoint sequences in the @@ -317,34 +354,25 @@ class LimeIO : public BinaryIO { checksum.suma= streama.str(); checksum.sumb= streamb.str(); std::cout << GridLogMessage<<" writing scidac checksums "< - int open(std::string filename,GridBase *grid,userFile &_userFile,int volfmt) { - - } - void close(void) { - - } - template - int writeScidacField(Lattice &field,userRecord &_userRecord,int volfmt) - template - int readScidacField(Lattice &field,userRecord &_userRecord,int volfmt) - */ + + template + void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile) + { + scidacFile _scidacFile(grid); + writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); + writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); + } //////////////////////////////////////////////// // Write generic lattice field in scidac format //////////////////////////////////////////////// - template - static void writeScidacField(std::string filename,Lattice &field,userFile _userFile,userRecord _userRecord) + template + void writeScidacFieldRecord(Lattice &field,userRecord _userRecord) { typedef typename vobj::scalar_object sobj; uint64_t nbytes; @@ -362,34 +390,25 @@ class ScidacIO : public LimeIO { ////////////////////////////////////////////// // Fill the Lime file record by record ////////////////////////////////////////////// - FILE *File = fopen(filename.c_str(), "w"); - LimeWriter *LimeW = limeCreateWriter(File); - assert(LimeW != NULL ); - - writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT),LimeW); // Open message - writeLimeObject(0,0,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML),LimeW); - writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML),LimeW); - writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML),LimeW); - writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML),LimeW); - writeLimeLatticeBinaryObject(field,filename,std::string(ILDG_BINARY_DATA),File,LimeW); // Closes message with checksum - - limeDestroyWriter(LimeW); - fclose(File); + writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message + writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); + writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); + writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum } }; -class IldgIO : public ScidacIO { +class IldgWriter : public ScidacWriter { public: /////////////////////////////////// // A little helper /////////////////////////////////// - static void writeLimeIldgLFN(std::string &LFN,LimeWriter *LimeW) + void writeLimeIldgLFN(std::string &LFN) { uint64_t PayloadSize = LFN.size(); int err; - createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize, LimeW); - err=limeWriteRecordData(const_cast(LFN.c_str()), &PayloadSize, LimeW); assert(err>=0); + createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize); + err=limeWriteRecordData(const_cast(LFN.c_str()), &PayloadSize,LimeW); assert(err>=0); err=limeWriterCloseRecord(LimeW); assert(err>=0); } @@ -399,7 +418,7 @@ class IldgIO : public ScidacIO { // Use Grid MetaData object if present. //////////////////////////////////////////////////////////////// template - static void writeConfiguration(std::string filename,Lattice > &Umu) + void writeConfiguration(Lattice > &Umu,int sequence,std::string LFN,std::string description) { GridBase * grid = Umu._grid; typedef Lattice > GaugeField; @@ -418,6 +437,10 @@ class IldgIO : public ScidacIO { ScidacMetaData(Umu,header,_scidacRecord,_scidacFile); std::string format = header.floating_point; + header.ensemble_id = description; + header.ensemble_label = description; + header.sequence_number = sequence; + header.ildg_lfn = LFN; assert ( (format == std::string("IEEE32BIG")) ||(format == std::string("IEEE64BIG")) ); @@ -453,20 +476,21 @@ class IldgIO : public ScidacIO { ////////////////////////////////////////////// // Fill the Lime file record by record ////////////////////////////////////////////// - - FILE *File = fopen(filename.c_str(), "w"); - LimeWriter *LimeW = limeCreateWriter(File); assert(LimeW != NULL); - writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT),LimeW); // Open message - writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML),LimeW); - writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML),LimeW); - writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_RECORD_XML),LimeW); - writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML),LimeW); - writeLimeObject(0,0,ildgfmt,std::string("ildgFormat") ,std::string(ILDG_FORMAT),LimeW); // rec - writeLimeIldgLFN(header.ildg_lfn, LimeW); // rec - writeLimeLatticeBinaryObject(Umu,filename,std::string(ILDG_BINARY_DATA),File,LimeW); // Closes message with checksum - limeDestroyWriter(LimeW); + writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message + writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); + writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); + writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); + writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); + writeLimeObject(0,0,ildgfmt,std::string("ildgFormat") ,std::string(ILDG_FORMAT)); // rec + writeLimeIldgLFN(header.ildg_lfn); // rec + writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum + // limeDestroyWriter(LimeW); fclose(File); } +}; + +class IldgReader : public GridLimeReader { + public: //////////////////////////////////////////////////////////////// // Read either Grid/SciDAC/ILDG configuration @@ -476,7 +500,7 @@ class IldgIO : public ScidacIO { // Else use SciDAC MetaData object if present. //////////////////////////////////////////////////////////////// template - static void readConfiguration(std::string filename,Lattice > &Umu, FieldMetaData &FieldMetaData_) { + void readConfiguration(Lattice > &Umu, FieldMetaData &FieldMetaData_) { typedef Lattice > GaugeField; typedef typename GaugeField::vector_object vobj; @@ -491,9 +515,6 @@ class IldgIO : public ScidacIO { assert(dims.size()==4); - FILE *File = fopen(filename.c_str(), "r"); - LimeReader *LimeR = limeCreateReader(File); - // Metadata holders ildgFormat ildgFormat_ ; std::string ildgLFN_ ; diff --git a/lib/qcd/hmc/checkpointers/ILDGCheckpointer.h b/lib/qcd/hmc/checkpointers/ILDGCheckpointer.h index b72fc6f7..118a8e25 100644 --- a/lib/qcd/hmc/checkpointers/ILDGCheckpointer.h +++ b/lib/qcd/hmc/checkpointers/ILDGCheckpointer.h @@ -77,7 +77,10 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer { uint32_t nersc_csum,scidac_csuma,scidac_csumb; BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); - IldgIO::writeConfiguration(config,U, Params.format); + IldgWriter _IldgWriter; + _IldgWriter.open(config); + _IldgWriter.writeConfiguration(U, traj, config, config); + _IldgWriter.close(); std::cout << GridLogMessage << "Written ILDG Configuration on " << config << " checksum " << std::hex @@ -97,7 +100,10 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer { BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); FieldMetaData header; - IldgIO::readConfiguration(config,U,header); // format from the header + IldgReader _IldgReader; + _IldgReader.open(config); + _IldgReader.readConfiguration(config,U,header); // format from the header + _IldgReader.close(); std::cout << GridLogMessage << "Read ILDG Configuration from " << config << " checksum " << std::hex diff --git a/tests/IO/Test_ildg_io.cc b/tests/IO/Test_ildg_io.cc index 199773ab..e3e9d385 100644 --- a/tests/IO/Test_ildg_io.cc +++ b/tests/IO/Test_ildg_io.cc @@ -78,13 +78,19 @@ int main (int argc, char ** argv) std::cout <(Umu,mu); From 8b7049f737617f67815433b52a7888874f7ffec1 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 19 Jun 2017 08:46:07 +0100 Subject: [PATCH 090/177] Improved detectino of usqcdInfo for plaq/linktr --- lib/parallelIO/IldgIO.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index 1d1b5e0c..17ce4a06 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -598,9 +598,14 @@ class IldgReader : public GridLimeReader { } if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) { - XmlReader RD(&xmlc[0],""); - read(RD,"usqcdInfo",usqcdInfo_); - found_usqcdInfo = 1; + std::string xmls(&xmlc[0]); + // is it a USQCD info field + if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) { + std::cout << GridLogMessage<<"...found a usqcdInfo field"< Date: Mon, 19 Jun 2017 14:04:21 +0100 Subject: [PATCH 091/177] Block solver improvements --- .../iterative/BlockConjugateGradient.h | 19 +- lib/lattice/Lattice_reduction.h | 189 +++++++++++++++++- .../solver/Test_staggered_block_cg_unprec.cc | 7 +- 3 files changed, 192 insertions(+), 23 deletions(-) diff --git a/lib/algorithms/iterative/BlockConjugateGradient.h b/lib/algorithms/iterative/BlockConjugateGradient.h index d90194ae..53e11fa7 100644 --- a/lib/algorithms/iterative/BlockConjugateGradient.h +++ b/lib/algorithms/iterative/BlockConjugateGradient.h @@ -42,7 +42,7 @@ class BlockConjugateGradient : public OperatorFunction { typedef typename Field::scalar_type scomplex; - const int blockDim = 0; + int blockDim ; int Nblock; bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. @@ -51,14 +51,15 @@ class BlockConjugateGradient : public OperatorFunction { Integer MaxIterations; Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion - BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) + BlockConjugateGradient(int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), + blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){}; void operator()(LinearOperatorBase &Linop, const Field &Src, Field &Psi) { - int Orthog = 0; // First dimension is block dim + int Orthog = blockDim; // First dimension is block dim; this is an assumption Nblock = Src._grid->_fdimensions[Orthog]; std::cout< &Linop, const Field &Src, Field &Psi) Linop.HermOp(Psi, AP); AP = AP-Src; - std::cout << GridLogMessage <<"\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) < { typedef typename Field::scalar_type scomplex; - const int blockDim = 0; - + int blockDim; int Nblock; bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. // Defaults true. @@ -218,14 +218,15 @@ class MultiRHSConjugateGradient : public OperatorFunction { Integer MaxIterations; Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion - MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) + MultiRHSConjugateGradient(int Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), + blockDim(Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){}; void operator()(LinearOperatorBase &Linop, const Field &Src, Field &Psi) { - int Orthog = 0; // First dimension is block dim + int Orthog = blockDim; // First dimension is block dim Nblock = Src._grid->_fdimensions[Orthog]; std::cout< &Linop, const Field &Src, Field &Psi) MatrixTimer.Stop(); // Alpha - // sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog); sliceInnerTimer.Start(); sliceInnerProductVector(v_pAp,P,AP,Orthog); sliceInnerTimer.Stop(); for(int b=0;b &R,Eigen::MatrixXcd &aa,const Lattice typedef typename vobj::vector_type vector_type; int Nblock = X._grid->GlobalDimensions()[Orthog]; - + GridBase *FullGrid = X._grid; GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); - + Lattice Xslice(SliceGrid); Lattice Rslice(SliceGrid); - + +#if 0 + // R[i] = Y[i] + X[j] a(j,i) for(int i=0;i &R,Eigen::MatrixXcd &aa,const Lattice } InsertSlice(Rslice,R,i,Orthog); } +#endif +#if 0 + int nh = FullGrid->_ndimension; + int nl = SliceGrid->_ndimension; + +#pragma omp parallel +{ + + std::vector lcoor(nl); // sliced coor + std::vector hcoor(nh); // unsliced coor + std::vector s_x(Nblock); + +#pragma omp for + for(int idx=0;idxlSites();idx++){ + + SliceGrid->LocalIndexToLocalCoor(idx,lcoor); + + int ddl=0; + for(int d=0;d_simd_layout[Orthog]==1); + int nh = FullGrid->_ndimension; + int nl = SliceGrid->_ndimension; + + + //FIXME package in a convenient iterator + //Should loop over a plane orthogonal to direction "Orthog" + int stride=FullGrid->_slice_stride[Orthog]; + int block =FullGrid->_slice_block [Orthog]; + int nblock=FullGrid->_slice_nblock[Orthog]; + int ostride=FullGrid->_ostride[Orthog]; +#pragma omp parallel + { + + std::vector s_x(Nblock); + +#pragma omp for collapse(2) + for(int n=0;n static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice &lhs,const Lattice &rhs,int Orthog) { @@ -497,7 +581,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice Lattice Rslice(SliceGrid); mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); - + +#if 0 for(int i=0;i mat(i,j) = innerProduct(Lslice,Rslice); } } -#undef FORCE_DIAG -#ifdef FORCE_DIAG - for(int i=0;i_ndimension; + int nl = SliceGrid->_ndimension; + +#pragma omp parallel +{ + std::vector lcoor(nl); // sliced coor + std::vector hcoor(nh); // unsliced coor + std::vector Left(Nblock); + std::vector Right(Nblock); + Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock); + +#pragma omp for + for(int idx=0;idxlSites();idx++){ + + SliceGrid->LocalIndexToLocalCoor(idx,lcoor); + + int ddl=0; + for(int d=0;d ip = innerProduct(Left[i],Right[j]); + mat_thread(i,j) += ip; + }} + } + +#pragma omp critical + { + mat += mat_thread; + } + +} +#endif + +#if 1 + assert( FullGrid->_simd_layout[Orthog]==1); + int nh = FullGrid->_ndimension; + int nl = SliceGrid->_ndimension; + + //FIXME package in a convenient iterator + //Should loop over a plane orthogonal to direction "Orthog" + int stride=FullGrid->_slice_stride[Orthog]; + int block =FullGrid->_slice_block [Orthog]; + int nblock=FullGrid->_slice_nblock[Orthog]; + int ostride=FullGrid->_ostride[Orthog]; + + typedef typename vobj::vector_typeD vector_typeD; + +#pragma omp parallel + { + std::vector Left(Nblock); + std::vector Right(Nblock); + Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock); + +#pragma omp for collapse(2) + for(int n=0;n HermOp(Ds); ConjugateGradient CG(1.0e-8,10000); - BlockConjugateGradient BCG(1.0e-8,10000); - MultiRHSConjugateGradient mCG(1.0e-8,10000); + int blockDim = 0; + BlockConjugateGradient BCG(blockDim,1.0e-8,10000); + MultiRHSConjugateGradient mCG(blockDim,1.0e-8,10000); std::cout << GridLogMessage << "************************************************************************ "< Date: Mon, 19 Jun 2017 22:03:03 +0100 Subject: [PATCH 092/177] No compile make tests fix --- lib/simd/Grid_vector_types.h | 4 ++-- lib/tensors/Tensor_class.h | 9 ++++++++- lib/tensors/Tensor_exp.h | 7 +++++-- tests/core/Test_GaugeAction.cc | 2 +- tests/core/Test_RectPlaq.cc | 2 +- tests/core/Test_main.cc | 2 +- 6 files changed, 18 insertions(+), 8 deletions(-) diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 1ebe7379..e05fecc4 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -751,8 +751,8 @@ inline Grid_simd, V> toComplex(const Grid_simd &in) { conv.v = in.v; for (int i = 0; i < Rsimd::Nsimd(); i += 2) { - assert(conv.s[i + 1] == - conv.s[i]); // trap any cases where real was not duplicated + assert(conv.s[i + 1] == conv.s[i]); + // trap any cases where real was not duplicated // indicating the SIMD grids of real and imag assignment did not correctly // match conv.s[i + 1] = 0.0; // zero imaginary parts diff --git a/lib/tensors/Tensor_class.h b/lib/tensors/Tensor_class.h index cb90da6c..c7f868db 100644 --- a/lib/tensors/Tensor_class.h +++ b/lib/tensors/Tensor_class.h @@ -156,11 +156,18 @@ class iScalar { // convert from a something to a scalar via constructor of something arg template ::value, T>::type * = nullptr> - strong_inline iScalar operator=(T arg) { + strong_inline iScalar operator=(T arg) { _internal = arg; return *this; } + // Convert elements + template + strong_inline iScalar operator=(iScalar &&arg) { + _internal = arg._internal; + return *this; + } + friend std::ostream &operator<<(std::ostream &stream,const iScalar &o) { stream << "S {" << o._internal << "}"; return stream; diff --git a/lib/tensors/Tensor_exp.h b/lib/tensors/Tensor_exp.h index e18fed70..f7eee8f0 100644 --- a/lib/tensors/Tensor_exp.h +++ b/lib/tensors/Tensor_exp.h @@ -80,8 +80,11 @@ template inline iVector Exponentiate(const iVector mat iQ2 = arg*arg*alpha*alpha; mat iQ3 = arg*iQ2*alpha; // sign in c0 from the conventions on the Ta - c0 = -imag( trace(iQ3) ) * one_over_three; - c1 = -real( trace(iQ2) ) * one_over_two; + scalar imQ3, reQ2; + imQ3 = imag( trace(iQ3) ); + reQ2 = real( trace(iQ2) ); + c0 = -imQ3 * one_over_three; + c1 = -reQ2 * one_over_two; // Cayley Hamilton checks to machine precision, tested tmp = c1 * one_over_three; diff --git a/tests/core/Test_GaugeAction.cc b/tests/core/Test_GaugeAction.cc index 2f0535f1..572f19fb 100644 --- a/tests/core/Test_GaugeAction.cc +++ b/tests/core/Test_GaugeAction.cc @@ -73,7 +73,7 @@ int main (int argc, char ** argv) std::vector U(4,&Fine); - NerscField header; + FieldMetaData header; std::string file("./ckpoint_lat.4000"); NerscIO::readConfiguration(Umu,header,file); diff --git a/tests/core/Test_RectPlaq.cc b/tests/core/Test_RectPlaq.cc index 9154f879..2e9cc832 100644 --- a/tests/core/Test_RectPlaq.cc +++ b/tests/core/Test_RectPlaq.cc @@ -90,7 +90,7 @@ int main (int argc, char ** argv) std::vector U(4,&Fine); - NerscField header; + FieldMetaData header; std::string file("./ckpoint_lat.4000"); NerscIO::readConfiguration(Umu,header,file); diff --git a/tests/core/Test_main.cc b/tests/core/Test_main.cc index 921298c7..378f49bd 100644 --- a/tests/core/Test_main.cc +++ b/tests/core/Test_main.cc @@ -336,7 +336,7 @@ int main(int argc, char **argv) { std::cout << GridLogMessage << "norm cMmat : " << norm2(cMat) << std::endl; - cMat = expMat(cMat, ComplexD(1.0, 0.0)); + cMat = expMat(cMat,1.0);// ComplexD(1.0, 0.0)); std::cout << GridLogMessage << "norm expMat: " << norm2(cMat) << std::endl; peekSite(cm, cMat, mysite); From 0a8faac2713c981be4a61c06d90ce0d6c5de211a Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Mon, 19 Jun 2017 22:54:18 +0100 Subject: [PATCH 093/177] Fix make tests compile --- lib/qcd/action/scalar/ScalarImpl.h | 13 ++++++++----- tests/debug/Test_cayley_ldop_cr.cc | 2 +- tests/solver/Test_dwf_hdcr.cc | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/qcd/action/scalar/ScalarImpl.h b/lib/qcd/action/scalar/ScalarImpl.h index ee2d2fb8..0116b4f9 100644 --- a/lib/qcd/action/scalar/ScalarImpl.h +++ b/lib/qcd/action/scalar/ScalarImpl.h @@ -15,6 +15,8 @@ namespace Grid { typedef iImplField SiteField; + template using iImplScalar= iScalar > >; + typedef iImplScalar ComplexField; typedef Lattice Field; @@ -51,13 +53,14 @@ namespace Grid { public: typedef S Simd; - template - using iImplField = iScalar > >; - + template using iImplField = iScalar > >; + typedef iImplField SiteField; - - typedef Lattice Field; + + template using iImplScalar= iScalar > >; + typedef iImplScalar ComplexField; + static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ gaussian(pRNG, P); diff --git a/tests/debug/Test_cayley_ldop_cr.cc b/tests/debug/Test_cayley_ldop_cr.cc index dfda43d2..cbefdd46 100644 --- a/tests/debug/Test_cayley_ldop_cr.cc +++ b/tests/debug/Test_cayley_ldop_cr.cc @@ -67,7 +67,7 @@ int main (int argc, char ** argv) LatticeFermion err(FGrid); LatticeGaugeField Umu(UGrid); - NerscField header; + FieldMetaData header; std::string file("./ckpoint_lat.400"); NerscIO::readConfiguration(Umu,header,file); diff --git a/tests/solver/Test_dwf_hdcr.cc b/tests/solver/Test_dwf_hdcr.cc index 64ca0b33..c553ba0a 100644 --- a/tests/solver/Test_dwf_hdcr.cc +++ b/tests/solver/Test_dwf_hdcr.cc @@ -516,7 +516,7 @@ int main (int argc, char ** argv) LatticeColourMatrix U(UGrid); LatticeColourMatrix zz(UGrid); - NerscField header; + FieldMetaData header; std::string file("./ckpoint_lat.4000"); NerscIO::readConfiguration(Umu,header,file); From e9cc21900f00b81a17ab87d649e014edc99c636b Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Tue, 20 Jun 2017 12:37:41 +0100 Subject: [PATCH 094/177] Block solver complete for staggered. Now stable on mass 0.003 and gives 8x (!) speed up on Haswell laptop vs. standard CG for 8 RHS solves. 166 iterations vs. 537 iterations so algorithmic gain + 2x in flop rate gain. Better than a slap in the face with a wet kipper. --- .../iterative/BlockConjugateGradient.h | 295 ++++++++++++++++-- lib/lattice/Lattice_reduction.h | 235 +++----------- .../solver/Test_staggered_block_cg_unprec.cc | 13 +- 3 files changed, 321 insertions(+), 222 deletions(-) diff --git a/lib/algorithms/iterative/BlockConjugateGradient.h b/lib/algorithms/iterative/BlockConjugateGradient.h index 53e11fa7..f8b83b1f 100644 --- a/lib/algorithms/iterative/BlockConjugateGradient.h +++ b/lib/algorithms/iterative/BlockConjugateGradient.h @@ -33,6 +33,8 @@ directory namespace Grid { +enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; + ////////////////////////////////////////////////////////////////////////// // Block conjugate gradient. Dimension zero should be the block direction ////////////////////////////////////////////////////////////////////////// @@ -40,24 +42,274 @@ template class BlockConjugateGradient : public OperatorFunction { public: + typedef typename Field::scalar_type scomplex; int blockDim ; - int Nblock; + + BlockCGtype CGtype; bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. // Defaults true. RealD Tolerance; Integer MaxIterations; Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion - BlockConjugateGradient(int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) + BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), + CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){}; +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Thin QR factorisation (google it) +//////////////////////////////////////////////////////////////////////////////////////////////////// +void ThinQRfact (Eigen::MatrixXcd &m_rr, + Eigen::MatrixXcd &C, + Eigen::MatrixXcd &Cinv, + Field & Q, + const Field & R) +{ + int Orthog = blockDim; // First dimension is block dim; this is an assumption + //////////////////////////////////////////////////////////////////////////////////////////////////// + //Dimensions + // R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock + // + // Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen) + // + // Q C = R => Q = R C^{-1} + // + // Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock} + // + // Set C = L^{dag}, and then Q^dag Q = ident + // + // Checks: + // Cdag C = Rdag R ; passes. + // QdagQ = 1 ; passes + //////////////////////////////////////////////////////////////////////////////////////////////////// + sliceInnerProductMatrix(m_rr,R,R,Orthog); + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Cholesky from Eigen + // There exists a ldlt that is documented as more stable + //////////////////////////////////////////////////////////////////////////////////////////////////// + Eigen::MatrixXcd L = m_rr.llt().matrixL(); + + C = L.adjoint(); + Cinv = C.inverse(); + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Q = R C^{-1} + // + // Q_j = R_i Cinv(i,j) + // + // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already + //////////////////////////////////////////////////////////////////////////////////////////////////// + // FIXME:: make a sliceMulMatrix to avoid zero vector + sliceMulMatrix(Q,Cinv,R,Orthog); +} +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Call one of several implementations +//////////////////////////////////////////////////////////////////////////////////////////////////// void operator()(LinearOperatorBase &Linop, const Field &Src, Field &Psi) +{ + if ( CGtype == BlockCGrQ ) { + BlockCGrQsolve(Linop,Src,Psi); + } else if (CGtype == BlockCG ) { + BlockCGsolve(Linop,Src,Psi); + } else if (CGtype == CGmultiRHS ) { + CGmultiRHSsolve(Linop,Src,Psi); + } else { + assert(0); + } +} + +//////////////////////////////////////////////////////////////////////////// +// BlockCGrQ implementation: +//-------------------------- +// X is guess/Solution +// B is RHS +// Solve A X_i = B_i ; i refers to Nblock index +//////////////////////////////////////////////////////////////////////////// +void BlockCGrQsolve(LinearOperatorBase &Linop, const Field &B, Field &X) +{ + int Orthog = blockDim; // First dimension is block dim; this is an assumption + Nblock = B._grid->_fdimensions[Orthog]; + + std::cout< residuals(Nblock); + std::vector ssq(Nblock); + + sliceNorm(ssq,B,Orthog); + RealD sssum=0; + for(int b=0;b Thin QR factorisation (google it) + * for k: + * Z = AD + * M = [D^dag Z]^{-1} + * X = X + D MC + * QS = Q - ZM + * D = Q + D S^dag + * C = S C + */ + /////////////////////////////////////// + // Initial block: initial search dir is guess + /////////////////////////////////////// + std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " < Thin QR factorisation (google it) + + Linop.HermOp(X, AD); + tmp = B - AD; + ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); + D=Q; + + std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " < max_resid ) max_resid = rr; + } + + std::cout << GridLogIterative << "\titeration "< &Linop, const Field &Src, Field &Psi) { int Orthog = blockDim; // First dimension is block dim; this is an assumption Nblock = Src._grid->_fdimensions[Orthog]; @@ -163,8 +415,9 @@ void operator()(LinearOperatorBase &Linop, const Field &Src, Field &Psi) ********************* */ RealD max_resid=0; + RealD rr; for(int b=0;b max_resid ) max_resid = rr; } @@ -174,13 +427,14 @@ void operator()(LinearOperatorBase &Linop, const Field &Src, Field &Psi) std::cout << GridLogMessage<<"BlockCG converged in "< &Linop, const Field &Src, Field &Psi) if (ErrorOnNoConverge) assert(0); IterationsToComplete = k; } -}; - - ////////////////////////////////////////////////////////////////////////// // multiRHS conjugate gradient. Dimension zero should be the block direction +// Use this for spread out across nodes ////////////////////////////////////////////////////////////////////////// -template -class MultiRHSConjugateGradient : public OperatorFunction { - public: - - typedef typename Field::scalar_type scomplex; - - int blockDim; - int Nblock; - bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. - // Defaults true. - RealD Tolerance; - Integer MaxIterations; - Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion - - MultiRHSConjugateGradient(int Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) - : Tolerance(tol), - blockDim(Orthog), - MaxIterations(maxit), - ErrorOnNoConverge(err_on_no_conv){}; - -void operator()(LinearOperatorBase &Linop, const Field &Src, Field &Psi) +void CGmultiRHSsolve(LinearOperatorBase &Linop, const Field &Src, Field &Psi) { int Orthog = blockDim; // First dimension is block dim Nblock = Src._grid->_fdimensions[Orthog]; @@ -331,7 +563,7 @@ void operator()(LinearOperatorBase &Linop, const Field &Src, Field &Psi) std::cout << GridLogMessage<<"MultiRHS solver converged in " < &Linop, const Field &Src, Field &Psi) if (ErrorOnNoConverge) assert(0); IterationsToComplete = k; } + }; - - } #endif diff --git a/lib/lattice/Lattice_reduction.h b/lib/lattice/Lattice_reduction.h index 78f88ce3..c5b20f3c 100644 --- a/lib/lattice/Lattice_reduction.h +++ b/lib/lattice/Lattice_reduction.h @@ -369,71 +369,6 @@ static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice } }; - -/* -template -static void sliceMaddVectorSlow (Lattice &R,std::vector &a,const Lattice &X,const Lattice &Y, - int Orthog,RealD scale=1.0) -{ - // FIXME: Implementation is slow - // Best base the linear combination by constructing a - // set of vectors of size grid->_rdimensions[Orthog]. - typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - - int Nblock = X._grid->GlobalDimensions()[Orthog]; - - GridBase *FullGrid = X._grid; - GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); - - Lattice Xslice(SliceGrid); - Lattice Rslice(SliceGrid); - // If we based this on Cshift it would work for spread out - // but it would be even slower - for(int i=0;i -static void sliceInnerProductVectorSlow( std::vector & vec, const Lattice &lhs,const Lattice &rhs,int Orthog) - { - // FIXME: Implementation is slow - // Look at localInnerProduct implementation, - // and do inside a site loop with block strided iterators - typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - typedef typename vobj::tensor_reduced scalar; - typedef typename scalar::scalar_object scomplex; - - int Nblock = lhs._grid->GlobalDimensions()[Orthog]; - vec.resize(Nblock); - std::vector sip(Nblock); - Lattice IP(lhs._grid); - IP=localInnerProduct(lhs,rhs); - sliceSum(IP,sip,Orthog); - - for(int ss=0;ss_rdimensions[Orthog]. -////////////////////////////////////////////////////////////////////////////////////////// - inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) { int NN = BlockSolverGrid->_ndimension; @@ -453,7 +388,6 @@ inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Or return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys); } - template static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,const Lattice &Y,int Orthog,RealD scale=1.0) { @@ -469,64 +403,10 @@ static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice Lattice Xslice(SliceGrid); Lattice Rslice(SliceGrid); -#if 0 - // R[i] = Y[i] + X[j] a(j,i) - for(int i=0;i_ndimension; - int nl = SliceGrid->_ndimension; - -#pragma omp parallel -{ - - std::vector lcoor(nl); // sliced coor - std::vector hcoor(nh); // unsliced coor - std::vector s_x(Nblock); - -#pragma omp for - for(int idx=0;idxlSites();idx++){ - - SliceGrid->LocalIndexToLocalCoor(idx,lcoor); - - int ddl=0; - for(int d=0;d_simd_layout[Orthog]==1); int nh = FullGrid->_ndimension; int nl = SliceGrid->_ndimension; - //FIXME package in a convenient iterator //Should loop over a plane orthogonal to direction "Orthog" int stride=FullGrid->_slice_stride[Orthog]; @@ -535,7 +415,6 @@ static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice int ostride=FullGrid->_ostride[Orthog]; #pragma omp parallel { - std::vector s_x(Nblock); #pragma omp for collapse(2) @@ -543,13 +422,11 @@ static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice for(int b=0;b &R,Eigen::MatrixXcd &aa,const Lattice } }} } -#endif +}; + +template +static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,int Orthog,RealD scale=1.0) +{ + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + int Nblock = X._grid->GlobalDimensions()[Orthog]; + + GridBase *FullGrid = X._grid; + GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); + + Lattice Xslice(SliceGrid); + Lattice Rslice(SliceGrid); + + assert( FullGrid->_simd_layout[Orthog]==1); + int nh = FullGrid->_ndimension; + int nl = SliceGrid->_ndimension; + + //FIXME package in a convenient iterator + //Should loop over a plane orthogonal to direction "Orthog" + int stride=FullGrid->_slice_stride[Orthog]; + int block =FullGrid->_slice_block [Orthog]; + int nblock=FullGrid->_slice_nblock[Orthog]; + int ostride=FullGrid->_ostride[Orthog]; +#pragma omp parallel + { + std::vector s_x(Nblock); + +#pragma omp for collapse(2) + for(int n=0;n static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice &lhs,const Lattice &rhs,int Orthog) { - // FIXME: Implementation is slow - // Not sure of best solution.. think about it typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; @@ -582,63 +507,6 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); -#if 0 - for(int i=0;i_ndimension; - int nl = SliceGrid->_ndimension; - -#pragma omp parallel -{ - std::vector lcoor(nl); // sliced coor - std::vector hcoor(nh); // unsliced coor - std::vector Left(Nblock); - std::vector Right(Nblock); - Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock); - -#pragma omp for - for(int idx=0;idxlSites();idx++){ - - SliceGrid->LocalIndexToLocalCoor(idx,lcoor); - - int ddl=0; - for(int d=0;d ip = innerProduct(Left[i],Right[j]); - mat_thread(i,j) += ip; - }} - } - -#pragma omp critical - { - mat += mat_thread; - } - -} -#endif - -#if 1 assert( FullGrid->_simd_layout[Orthog]==1); int nh = FullGrid->_ndimension; int nl = SliceGrid->_ndimension; @@ -681,7 +549,6 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice mat += mat_thread; } } -#endif return; } diff --git a/tests/solver/Test_staggered_block_cg_unprec.cc b/tests/solver/Test_staggered_block_cg_unprec.cc index 8da93195..8db41e98 100644 --- a/tests/solver/Test_staggered_block_cg_unprec.cc +++ b/tests/solver/Test_staggered_block_cg_unprec.cc @@ -51,7 +51,7 @@ int main (int argc, char ** argv) typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField; typename ImprovedStaggeredFermion5DR::ImplParams params; - const int Ls=4; + const int Ls=8; Grid_init(&argc,&argv); @@ -80,12 +80,13 @@ int main (int argc, char ** argv) ConjugateGradient CG(1.0e-8,10000); int blockDim = 0; - BlockConjugateGradient BCG(blockDim,1.0e-8,10000); - MultiRHSConjugateGradient mCG(blockDim,1.0e-8,10000); + BlockConjugateGradient BCGrQ(BlockCGrQ,blockDim,1.0e-8,10000); + BlockConjugateGradient BCG (BlockCG,blockDim,1.0e-8,10000); + BlockConjugateGradient mCG (CGmultiRHS,blockDim,1.0e-8,10000); - std::cout << GridLogMessage << "************************************************************************ "< HermOp4d(Ds4d); FermionField src4d(UGrid); random(pRNG,src4d); @@ -112,7 +113,7 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << " Calling Block CG for "< Date: Tue, 20 Jun 2017 17:24:55 +0100 Subject: [PATCH 095/177] various compatibility fixes after merge --- lib/qcd/action/gauge/Photon.h | 6 ++++-- lib/qcd/action/scalar/ScalarImpl.h | 17 +++++++++++++---- tests/IO/Test_ildg_io.cc | 2 ++ tests/IO/Test_ildg_read.cc | 2 ++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/lib/qcd/action/gauge/Photon.h b/lib/qcd/action/gauge/Photon.h index 1512d4e3..7e21a1de 100644 --- a/lib/qcd/action/gauge/Photon.h +++ b/lib/qcd/action/gauge/Photon.h @@ -41,11 +41,13 @@ namespace QCD{ template using iImplGaugeField = iVector>, Nd>; - typedef iImplGaugeLink SiteLink; + typedef iImplGaugeLink SiteLink; typedef iImplGaugeField SiteField; + typedef SiteField SiteComplex; - typedef Lattice LinkField; + typedef Lattice LinkField; typedef Lattice Field; + typedef Field ComplexField; }; typedef QedGimpl QedGimplR; diff --git a/lib/qcd/action/scalar/ScalarImpl.h b/lib/qcd/action/scalar/ScalarImpl.h index 868bfc84..5342a1fa 100644 --- a/lib/qcd/action/scalar/ScalarImpl.h +++ b/lib/qcd/action/scalar/ScalarImpl.h @@ -15,8 +15,10 @@ class ScalarImplTypes { typedef iImplField SiteField; typedef SiteField SitePropagator; + typedef SiteField SiteComplex; typedef Lattice Field; + typedef Field ComplexField; typedef Field FermionField; typedef Field PropagatorField; @@ -92,11 +94,18 @@ class ScalarImplTypes { public: typedef S Simd; template - using iImplField = iScalar > >; + using iImplField = iScalar>>; + template + using iImplComplex = iScalar>>; - typedef iImplField SiteField; - - typedef Lattice Field; + typedef iImplField SiteField; + typedef SiteField SitePropagator; + typedef iImplComplex SiteComplex; + + typedef Lattice Field; + typedef Lattice ComplexField; + typedef Field FermionField; + typedef Field PropagatorField; static inline void generate_momenta(Field& P, GridParallelRNG& pRNG) { QCD::SU::GaussianFundamentalLieAlgebraMatrix(pRNG, P); diff --git a/tests/IO/Test_ildg_io.cc b/tests/IO/Test_ildg_io.cc index e3e9d385..6aac2e38 100644 --- a/tests/IO/Test_ildg_io.cc +++ b/tests/IO/Test_ildg_io.cc @@ -36,6 +36,7 @@ using namespace Grid::QCD; int main (int argc, char ** argv) { +#ifdef HAVE_LIME Grid_init(&argc,&argv); std::cout < Date: Tue, 20 Jun 2017 18:46:01 +0100 Subject: [PATCH 096/177] Improved the lancos --- TODO | 28 +- lib/algorithms/densematrix/DenseMatrix.h | 137 --- lib/algorithms/densematrix/Francis.h | 525 ---------- lib/algorithms/densematrix/Householder.h | 242 ----- .../iterative/ImplicitlyRestartedLanczos.h | 987 ++++-------------- lib/qcd/hmc/checkpointers/ILDGCheckpointer.h | 2 +- tests/solver/Test_dwf_lanczos.cc | 2 +- 7 files changed, 211 insertions(+), 1712 deletions(-) delete mode 100644 lib/algorithms/densematrix/DenseMatrix.h delete mode 100644 lib/algorithms/densematrix/Francis.h delete mode 100644 lib/algorithms/densematrix/Householder.h diff --git a/TODO b/TODO index a5d4cabd..eeb7dfa5 100644 --- a/TODO +++ b/TODO @@ -1,24 +1,28 @@ TODO: --------------- -Peter's work list: -1)- Precision conversion and sort out localConvert <-- -2)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- - --- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet --- Physical propagator interface --- Conserved currents --- GaugeFix into central location --- Multigrid Wilson and DWF, compare to other Multigrid implementations --- HDCR resume +Large item work list: +1)- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- +2)- MultiRHS with spread out extra dim +3)- BG/Q port and check +4)- Precision conversion and sort out localConvert <-- partial + - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet +5)- Physical propagator interface +6)- Conserved currents +7)- Multigrid Wilson and DWF, compare to other Multigrid implementations +8)- HDCR resume Recent DONE +-- GaugeFix into central location <-- DONE +-- Scidac and Ildg metadata handling <-- DONE +-- Binary I/O MPI2 IO <-- DONE -- Binary I/O speed up & x-strips <-- DONE -- Cut down the exterior overhead <-- DONE -- Interior legs from SHM comms <-- DONE -- Half-precision comms <-- DONE --- Merge high precision reduction into develop --- multiRHS DWF; benchmark on Cori/BNL for comms elimination +-- Merge high precision reduction into develop <-- DONE +-- BlockCG, BCGrQ <-- DONE +-- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE -- slice* linalg routines for multiRHS, BlockCG ----- diff --git a/lib/algorithms/densematrix/DenseMatrix.h b/lib/algorithms/densematrix/DenseMatrix.h deleted file mode 100644 index d86add21..00000000 --- a/lib/algorithms/densematrix/DenseMatrix.h +++ /dev/null @@ -1,137 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/algorithms/iterative/DenseMatrix.h - - Copyright (C) 2015 - -Author: Peter Boyle -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_DENSE_MATRIX_H -#define GRID_DENSE_MATRIX_H - -namespace Grid { - ///////////////////////////////////////////////////////////// - // Matrix untils - ///////////////////////////////////////////////////////////// - -template using DenseVector = std::vector; -template using DenseMatrix = DenseVector >; - -template void Size(DenseVector & vec, int &N) -{ - N= vec.size(); -} -template void Size(DenseMatrix & mat, int &N,int &M) -{ - N= mat.size(); - M= mat[0].size(); -} - -template void SizeSquare(DenseMatrix & mat, int &N) -{ - int M; Size(mat,N,M); - assert(N==M); -} - -template void Resize(DenseVector & mat, int N) { - mat.resize(N); -} -template void Resize(DenseMatrix & mat, int N, int M) { - mat.resize(N); - for(int i=0;i void Fill(DenseMatrix & mat, T&val) { - int N,M; - Size(mat,N,M); - for(int i=0;i DenseMatrix Transpose(DenseMatrix & mat){ - int N,M; - Size(mat,N,M); - DenseMatrix C; Resize(C,M,N); - for(int i=0;i void Unity(DenseMatrix &A){ - int N; SizeSquare(A,N); - for(int i=0;i -void PlusUnit(DenseMatrix & A,T c){ - int dim; SizeSquare(A,dim); - for(int i=0;i -DenseMatrix HermitianConj(DenseMatrix &mat){ - - int dim; SizeSquare(mat,dim); - - DenseMatrix C; Resize(C,dim,dim); - - for(int i=0;i -DenseMatrix GetSubMtx(DenseMatrix &A,int row_st, int row_end, int col_st, int col_end) -{ - DenseMatrix H; Resize(H,row_end - row_st,col_end-col_st); - - for(int i = row_st; i - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef FRANCIS_H -#define FRANCIS_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -//#include -//#include -//#include - -namespace Grid { - -template int SymmEigensystem(DenseMatrix &Ain, DenseVector &evals, DenseMatrix &evecs, RealD small); -template int Eigensystem(DenseMatrix &Ain, DenseVector &evals, DenseMatrix &evecs, RealD small); - -/** - Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm. -H = - x x x x x x x x x - x x x x x x x x x - 0 x x x x x x x x - 0 0 x x x x x x x - 0 0 0 x x x x x x - 0 0 0 0 x x x x x - 0 0 0 0 0 x x x x - 0 0 0 0 0 0 x x x - 0 0 0 0 0 0 0 x x -Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. -**/ -template -int QReigensystem(DenseMatrix &Hin, DenseVector &evals, DenseMatrix &evecs, RealD small) -{ - DenseMatrix H = Hin; - - int N ; SizeSquare(H,N); - int M = N; - - Fill(evals,0); - Fill(evecs,0); - - T s,t,x=0,y=0,z=0; - T u,d; - T apd,amd,bc; - DenseVector p(N,0); - T nrm = Norm(H); ///DenseMatrix Norm - int n, m; - int e = 0; - int it = 0; - int tot_it = 0; - int l = 0; - int r = 0; - DenseMatrix P; Resize(P,N,N); Unity(P); - DenseVector trows(N,0); - - /// Check if the matrix is really hessenberg, if not abort - RealD sth = 0; - for(int j=0;j small){ - std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl; - exit(1); - } - } - } - - do{ - std::cout << "Francis QR Step N = " << N << std::endl; - /** Check for convergence - x x x x x - 0 x x x x - 0 0 x x x - 0 0 x x x - 0 0 0 0 x - for this matrix l = 4 - **/ - do{ - l = Chop_subdiag(H,nrm,e,small); - r = 0; ///May have converged on more than one eval - ///Single eval - if(l == N-1){ - evals[e] = H[l][l]; - N--; e++; r++; it = 0; - } - ///RealD eval - if(l == N-2){ - trows[l+1] = 1; ///Needed for UTSolve - apd = H[l][l] + H[l+1][l+1]; - amd = H[l][l] - H[l+1][l+1]; - bc = (T)4.0*H[l+1][l]*H[l][l+1]; - evals[e] = (T)0.5*( apd + sqrt(amd*amd + bc) ); - evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) ); - N-=2; e+=2; r++; it = 0; - } - } while(r>0); - - if(N ==0) break; - - DenseVector ck; Resize(ck,3); - DenseVector v; Resize(v,3); - - for(int m = N-3; m >= l; m--){ - ///Starting vector essentially random shift. - if(it%10 == 0 && N >= 3 && it > 0){ - s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); - t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); - x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; - y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); - z = H[m+1][m]*H[m+2][m+1]; - } - ///Starting vector implicit Q theorem - else{ - s = (H[N-2][N-2] + H[N-1][N-1]); - t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]); - x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; - y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); - z = H[m+1][m]*H[m+2][m+1]; - } - ck[0] = x; ck[1] = y; ck[2] = z; - - if(m == l) break; - - /** Some stupid thing from numerical recipies, seems to work**/ - // PAB.. for heaven's sake quote page, purpose, evidence it works. - // what sort of comment is that!?!?!? - u=abs(H[m][m-1])*(abs(y)+abs(z)); - d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1])); - if ((T)abs(u+d) == (T)abs(d) ){ - l = m; break; - } - - //if (u < small){l = m; break;} - } - if(it > 100000){ - std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl; - std::cout << "got " << e << " evals " << l << " " << N << std::endl; - exit(1); - } - normalize(ck); ///Normalization cancels in PHP anyway - T beta; - Householder_vector(ck, 0, 2, v, beta); - Householder_mult(H,v,beta,0,l,l+2,0); - Householder_mult(H,v,beta,0,l,l+2,1); - ///Accumulate eigenvector - Householder_mult(P,v,beta,0,l,l+2,1); - int sw = 0; ///Are we on the last row? - for(int k=l;k(ck, 0, 2-sw, v, beta); - Householder_mult(H,v, beta,0,k+1,k+3-sw,0); - Householder_mult(H,v, beta,0,k+1,k+3-sw,1); - ///Accumulate eigenvector - Householder_mult(P,v, beta,0,k+1,k+3-sw,1); - } - it++; - tot_it++; - }while(N > 1); - N = evals.size(); - ///Annoying - UT solves in reverse order; - DenseVector tmp; Resize(tmp,N); - for(int i=0;i -int my_Wilkinson(DenseMatrix &Hin, DenseVector &evals, DenseMatrix &evecs, RealD small) -{ - /** - Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm. - H = - x x 0 0 0 0 - x x x 0 0 0 - 0 x x x 0 0 - 0 0 x x x 0 - 0 0 0 x x x - 0 0 0 0 x x - Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. **/ - return my_Wilkinson(Hin, evals, evecs, small, small); -} - -template -int my_Wilkinson(DenseMatrix &Hin, DenseVector &evals, DenseMatrix &evecs, RealD small, RealD tol) -{ - int N; SizeSquare(Hin,N); - int M = N; - - ///I don't want to modify the input but matricies must be passed by reference - //Scale a matrix by its "norm" - //RealD Hnorm = abs( Hin.LargestDiag() ); H = H*(1.0/Hnorm); - DenseMatrix H; H = Hin; - - RealD Hnorm = abs(Norm(Hin)); - H = H * (1.0 / Hnorm); - - // TODO use openmp and memset - Fill(evals,0); - Fill(evecs,0); - - T s, t, x = 0, y = 0, z = 0; - T u, d; - T apd, amd, bc; - DenseVector p; Resize(p,N); Fill(p,0); - - T nrm = Norm(H); ///DenseMatrix Norm - int n, m; - int e = 0; - int it = 0; - int tot_it = 0; - int l = 0; - int r = 0; - DenseMatrix P; Resize(P,N,N); - Unity(P); - DenseVector trows(N, 0); - /// Check if the matrix is really symm tridiag - RealD sth = 0; - for(int j = 0; j < N; ++j) - { - for(int i = j + 2; i < N; ++i) - { - if(abs(H[i][j]) > tol || abs(H[j][i]) > tol) - { - std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl; - std::cout << "Warning tridiagonalize and call again" << std::endl; - // exit(1); // see what is going on - //return; - } - } - } - - do{ - do{ - //Jasper - //Check if the subdiagonal term is small enough ( 0); - //Jasper - //Already converged - //-------------- - if(N == 0) break; - - DenseVector ck,v; Resize(ck,2); Resize(v,2); - - for(int m = N - 3; m >= l; m--) - { - ///Starting vector essentially random shift. - if(it%10 == 0 && N >= 3 && it > 0) - { - t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]); - x = H[m][m] - t; - z = H[m + 1][m]; - } else { - ///Starting vector implicit Q theorem - d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5; - t = H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2] - / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2])); - x = H[m][m] - t; - z = H[m + 1][m]; - } - //Jasper - //why it is here???? - //----------------------- - if(m == l) - break; - - u = abs(H[m][m - 1]) * (abs(y) + abs(z)); - d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1])); - if ((T)abs(u + d) == (T)abs(d)) - { - l = m; - break; - } - } - //Jasper - if(it > 1000000) - { - std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl; - std::cout << "got " << e << " evals " << l << " " << N << std::endl; - exit(1); - } - // - T s, c; - Givens_calc(x, z, c, s); - Givens_mult(H, l, l + 1, c, -s, 0); - Givens_mult(H, l, l + 1, c, s, 1); - Givens_mult(P, l, l + 1, c, s, 1); - // - for(int k = l; k < N - 2; ++k) - { - x = H.A[k + 1][k]; - z = H.A[k + 2][k]; - Givens_calc(x, z, c, s); - Givens_mult(H, k + 1, k + 2, c, -s, 0); - Givens_mult(H, k + 1, k + 2, c, s, 1); - Givens_mult(P, k + 1, k + 2, c, s, 1); - } - it++; - tot_it++; - }while(N > 1); - - N = evals.size(); - ///Annoying - UT solves in reverse order; - DenseVector tmp(N); - for(int i = 0; i < N; ++i) - tmp[i] = evals[N-i-1]; - evals = tmp; - // - UTeigenvectors(H, trows, evals, evecs); - //UTSymmEigenvectors(H, trows, evals, evecs); - for(int i = 0; i < evals.size(); ++i) - { - evecs[i] = P * evecs[i]; - normalize(evecs[i]); - evals[i] = evals[i] * Hnorm; - } - // // FIXME this is to test - // Hin.write("evecs3", evecs); - // Hin.write("evals3", evals); - // // check rsd - // for(int i = 0; i < M; i++) { - // vector Aevec = Hin * evecs[i]; - // RealD norm2(0.); - // for(int j = 0; j < M; j++) { - // norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]); - // } - // } - return tot_it; -} - -template -void Hess(DenseMatrix &A, DenseMatrix &Q, int start){ - - /** - turn a matrix A = - x x x x x - x x x x x - x x x x x - x x x x x - x x x x x - into - x x x x x - x x x x x - 0 x x x x - 0 0 x x x - 0 0 0 x x - with householder rotations - Slow. - */ - int N ; SizeSquare(A,N); - DenseVector p; Resize(p,N); Fill(p,0); - - for(int k=start;k ck,v; Resize(ck,N-k-1); Resize(v,N-k-1); - for(int i=k+1;i(ck, 0, ck.size()-1, v, beta); ///Householder vector - Householder_mult(A,v,beta,start,k+1,N-1,0); ///A -> PA - Householder_mult(A,v,beta,start,k+1,N-1,1); ///PA -> PAP^H - ///Accumulate eigenvector - Householder_mult(Q,v,beta,start,k+1,N-1,1); ///Q -> QP^H - } - /*for(int l=0;l -void Tri(DenseMatrix &A, DenseMatrix &Q, int start){ -///Tridiagonalize a matrix - int N; SizeSquare(A,N); - Hess(A,Q,start); - /*for(int l=0;l -void ForceTridiagonal(DenseMatrix &A){ -///Tridiagonalize a matrix - int N ; SizeSquare(A,N); - for(int l=0;l -int my_SymmEigensystem(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ - ///Solve a symmetric eigensystem, not necessarily in tridiagonal form - int N; SizeSquare(Ain,N); - DenseMatrix A; A = Ain; - DenseMatrix Q; Resize(Q,N,N); Unity(Q); - Tri(A,Q,0); - int it = my_Wilkinson(A, evals, evecs, small); - for(int k=0;k -int Wilkinson(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ - return my_Wilkinson(Ain, evals, evecs, small); -} - -template -int SymmEigensystem(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ - return my_SymmEigensystem(Ain, evals, evecs, small); -} - -template -int Eigensystem(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ -///Solve a general eigensystem, not necessarily in tridiagonal form - int N = Ain.dim; - DenseMatrix A(N); A = Ain; - DenseMatrix Q(N);Q.Unity(); - Hess(A,Q,0); - int it = QReigensystem(A, evals, evecs, small); - for(int k=0;k - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef HOUSEHOLDER_H -#define HOUSEHOLDER_H - -#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; -#define ENTER() std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; -#define LEAVE() std::cout << GridLogMessage << "EXIT "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace Grid { -/** Comparison function for finding the max element in a vector **/ -template bool cf(T i, T j) { - return abs(i) < abs(j); -} - -/** - Calculate a real Givens angle - **/ -template inline void Givens_calc(T y, T z, T &c, T &s){ - - RealD mz = (RealD)abs(z); - - if(mz==0.0){ - c = 1; s = 0; - } - if(mz >= (RealD)abs(y)){ - T t = -y/z; - s = (T)1.0 / sqrt ((T)1.0 + t * t); - c = s * t; - } else { - T t = -z/y; - c = (T)1.0 / sqrt ((T)1.0 + t * t); - s = c * t; - } -} - -template inline void Givens_mult(DenseMatrix &A, int i, int k, T c, T s, int dir) -{ - int q ; SizeSquare(A,q); - - if(dir == 0){ - for(int j=0;j inline void Householder_vector(DenseVector input, int k, int j, DenseVector &v, T &beta) -{ - int N ; Size(input,N); - T m = *max_element(input.begin() + k, input.begin() + j + 1, cf ); - - if(abs(m) > 0.0){ - T alpha = 0; - - for(int i=k; i 0.0) v[k] = v[k] + (v[k]/abs(v[k]))*alpha; - else v[k] = -alpha; - } else{ - for(int i=k; i inline void Householder_vector(DenseVector input, int k, int j, int dir, DenseVector &v, T &beta) -{ - int N = input.size(); - T m = *max_element(input.begin() + k, input.begin() + j + 1, cf); - - if(abs(m) > 0.0){ - T alpha = 0; - - for(int i=k; i 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha; - else v[dir] = -alpha; - }else{ - for(int i=k; i inline void Householder_mult(DenseMatrix &A , DenseVector v, T beta, int l, int k, int j, int trans) -{ - int N ; SizeSquare(A,N); - - if(abs(beta) > 0.0){ - for(int p=l; p inline void Householder_mult_tri(DenseMatrix &A , DenseVector v, T beta, int l, int M, int k, int j, int trans) -{ - if(abs(beta) > 0.0){ - - int N ; SizeSquare(A,N); - - DenseMatrix tmp; Resize(tmp,N,N); Fill(tmp,0); - - T s; - for(int p=l; p +template using DenseVector = std::vector; + +//#include #include namespace Grid { @@ -47,104 +49,85 @@ namespace Grid { ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// - - template - class ImplicitlyRestartedLanczos { +class ImplicitlyRestartedLanczos { - const RealD small = 1.0e-16; public: - int lock; - int get; - int Niter; - int converged; + int Niter; // Max iterations + int Nstop; // Number of evecs checked for convergence + int Nk; // Number of converged sought + int Nm; // Nm -- total number of vectors - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - int Np; // Np -- Number of spare vecs in kryloc space - int Nm; // Nm -- total number of vectors + RealD eresid; - RealD eresid; + //////////////////////////////////// + // Embedded objects + //////////////////////////////////// + SortEigen _sort; + LinearOperatorBase &_Linop; + OperatorFunction &_poly; - SortEigen _sort; + ///////////////////////// + // Constructor + ///////////////////////// + ImplicitlyRestartedLanczos(LinearOperatorBase &Linop, // op + OperatorFunction & poly, // polynmial + int _Nstop, // sought vecs + int _Nk, // sought vecs + int _Nm, // total vecs + RealD _eresid, // resid in lmdue deficit + int _Niter) : // Max iterations + _Linop(Linop), _poly(poly), + Nstop(_Nstop), Nk(_Nk), Nm(_Nm), + eresid(_eresid), Niter(_Niter) { }; -// GridCartesian &_fgrid; - - LinearOperatorBase &_Linop; - - OperatorFunction &_poly; - - ///////////////////////// - // Constructor - ///////////////////////// - void init(void){}; - void Abort(int ff, DenseVector &evals, DenseVector > &evecs); - - ImplicitlyRestartedLanczos( - LinearOperatorBase &Linop, // op - OperatorFunction & poly, // polynmial - int _Nstop, // sought vecs - int _Nk, // sought vecs - int _Nm, // spare vecs - RealD _eresid, // resid in lmdue deficit - int _Niter) : // Max iterations - _Linop(Linop), - _poly(poly), - Nstop(_Nstop), - Nk(_Nk), - Nm(_Nm), - eresid(_eresid), - Niter(_Niter) - { - Np = Nm-Nk; assert(Np>0); - }; - - ImplicitlyRestartedLanczos( - LinearOperatorBase &Linop, // op +#if 0 + ImplicitlyRestartedLanczos(LinearOperatorBase &Linop, // op OperatorFunction & poly, // polynmial int _Nk, // sought vecs - int _Nm, // spare vecs + int _Nm, // total vecs RealD _eresid, // resid in lmdue deficit int _Niter) : // Max iterations - _Linop(Linop), - _poly(poly), - Nstop(_Nk), - Nk(_Nk), - Nm(_Nm), - eresid(_eresid), - Niter(_Niter) - { - Np = Nm-Nk; assert(Np>0); - }; + _Linop(Linop), _poly(poly), + Nstop(_Nk), Nk(_Nk), Nm(_Nm), + eresid(_eresid), Niter(_Niter) { }; +#endif - ///////////////////////// - // Sanity checked this routine (step) against Saad. - ///////////////////////// - void RitzMatrix(DenseVector& evec,int k){ +#if 0 + void calc(DenseVector& eval, + DenseVector& evec, + const Field& src, + int& Nconv); - if(1) return; + void step(DenseVector& lmd, + DenseVector& lme, + DenseVector& evec, + Field& w,int Nm,int k); - GridBase *grid = evec[0]._grid; - Field w(grid); - std::cout << "RitzMatrix "<1 ) { - if (abs(in) >1.0e-9 ) { - std::cout<<"oops"< &Qt) ; + + static RealD normalise(Field& v) ; + void orthogonalize(Field& w, DenseVector& evec, int k); + void diagonalize(DenseVector& lmd, + DenseVector& lme, + int N2, int N1, + DenseVector& Qt, + GridBase *grid); + + void qr_decomp(DenseVector& lmd, + DenseVector& lme, + int Nk, int Nm, + DenseVector& Qt, + RealD Dsh, int kmin, int kmax); + +#ifdef USE_LAPACK + void diagonalize_lapack(DenseVector& lmd, + DenseVector& lme, + int N1, int N2, + DenseVector& Qt, + GridBase *grid); +#endif +#endif /* Saad PP. 195 1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 @@ -161,12 +144,12 @@ public: DenseVector& evec, Field& w,int Nm,int k) { + const RealD tiny = 1.0e-20; assert( k< Nm ); _poly(_Linop,evec[k],w); // 3. wk:=Avk−βkv_{k−1} - if(k>0){ - w -= lme[k-1] * evec[k-1]; - } + + if(k>0) w -= lme[k-1] * evec[k-1]; ComplexD zalph = innerProduct(evec[k],w); // 4. αk:=(wk,vk) RealD alph = real(zalph); @@ -176,29 +159,20 @@ public: RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop // 7. vk+1 := wk/βk+1 -// std::cout << "alpha = " << zalph << " beta "<0) { - orthogonalize(w,evec,k); // orthonormalise - } - - if(k < Nm-1) evec[k+1] = w; + if ( k > 0 ) orthogonalize(w,evec,k); // orthonormalise + if ( k < Nm-1) evec[k+1] = w; + + if ( beta < tiny ) std::cout << " beta is tiny "<& lmd, - DenseVector& lme, - int Nk, - int Nm, - DenseVector& Qt, - RealD Dsh, - int kmin, - int kmax) + + void qr_decomp(DenseVector& lmd, // Nm + DenseVector& lme, // Nm + int Nk, int Nm, + DenseVector& Qt, // Nm x Nm matrix + RealD Dsh, int kmin, int kmax) { int k = kmin-1; RealD x; @@ -218,7 +192,7 @@ public: lme[k+1] = c*lme[k+1]; for(int i=0; i& lmd, - DenseVector& lme, - int N1, - int N2, - DenseVector& Qt, - GridBase *grid){ - const int size = Nm; -// tevals.resize(size); -// tevecs.resize(size); - int NN = N1; - double evals_tmp[NN]; - double evec_tmp[NN][NN]; - memset(evec_tmp[0],0,sizeof(double)*NN*NN); -// double AA[NN][NN]; - double DD[NN]; - double EE[NN]; - for (int i = 0; i< NN; i++) - for (int j = i - 1; j <= i + 1; j++) - if ( j < NN && j >= 0 ) { - if (i==j) DD[i] = lmd[i]; - if (i==j) evals_tmp[i] = lmd[i]; - if (j==(i-1)) EE[j] = lme[j]; + DenseVector& lme, + int N1, + int N2, + DenseVector& Qt, + GridBase *grid) + { + const int size = Nm; + int NN = N1; + double evals_tmp[NN]; + double evec_tmp[NN][NN]; + memset(evec_tmp[0],0,sizeof(double)*NN*NN); + double DD[NN]; + double EE[NN]; + for (int i = 0; i< NN; i++) { + for (int j = i - 1; j <= i + 1; j++) { + if ( j < NN && j >= 0 ) { + if (i==j) DD[i] = lmd[i]; + if (i==j) evals_tmp[i] = lmd[i]; + if (j==(i-1)) EE[j] = lme[j]; + } + } } - int evals_found; - int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; - int liwork = 3+NN*10 ; - int iwork[liwork]; - double work[lwork]; - int isuppz[2*NN]; - char jobz = 'V'; // calculate evals & evecs - char range = 'I'; // calculate all evals - // char range = 'A'; // calculate all evals - char uplo = 'U'; // refer to upper half of original matrix - char compz = 'I'; // Compute eigenvectors of tridiagonal matrix - int ifail[NN]; - int info; -// int total = QMP_get_number_of_nodes(); -// int node = QMP_get_node_number(); -// GridBase *grid = evec[0]._grid; - int total = grid->_Nprocessors; - int node = grid->_processor; - int interval = (NN/total)+1; - double vl = 0.0, vu = 0.0; - int il = interval*node+1 , iu = interval*(node+1); - if (iu > NN) iu=NN; - double tol = 0.0; - if (1) { - memset(evals_tmp,0,sizeof(double)*NN); - if ( il <= NN){ - printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu); - LAPACK_dstegr(&jobz, &range, &NN, - (double*)DD, (double*)EE, - &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' - &tol, // tolerance - &evals_found, evals_tmp, (double*)evec_tmp, &NN, - isuppz, - work, &lwork, iwork, &liwork, - &info); - for (int i = iu-1; i>= il-1; i--){ - printf("node=%d evals_found=%d evals_tmp[%d] = %g\n",node,evals_found, i - (il-1),evals_tmp[i - (il-1)]); - evals_tmp[i] = evals_tmp[i - (il-1)]; - if (il>1) evals_tmp[i-(il-1)]=0.; - for (int j = 0; j< NN; j++){ - evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; - if (il>1) evec_tmp[i-(il-1)][j]=0.; - } - } + int evals_found; + int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; + int liwork = 3+NN*10 ; + int iwork[liwork]; + double work[lwork]; + int isuppz[2*NN]; + char jobz = 'V'; // calculate evals & evecs + char range = 'I'; // calculate all evals + // char range = 'A'; // calculate all evals + char uplo = 'U'; // refer to upper half of original matrix + char compz = 'I'; // Compute eigenvectors of tridiagonal matrix + int ifail[NN]; + int info; + int total = grid->_Nprocessors; + int node = grid->_processor; + int interval = (NN/total)+1; + double vl = 0.0, vu = 0.0; + int il = interval*node+1 , iu = interval*(node+1); + if (iu > NN) iu=NN; + double tol = 0.0; + if (1) { + memset(evals_tmp,0,sizeof(double)*NN); + if ( il <= NN){ + LAPACK_dstegr(&jobz, &range, &NN, + (double*)DD, (double*)EE, + &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' + &tol, // tolerance + &evals_found, evals_tmp, (double*)evec_tmp, &NN, + isuppz, + work, &lwork, iwork, &liwork, + &info); + for (int i = iu-1; i>= il-1; i--){ + evals_tmp[i] = evals_tmp[i - (il-1)]; + if (il>1) evals_tmp[i-(il-1)]=0.; + for (int j = 0; j< NN; j++){ + evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; + if (il>1) evec_tmp[i-(il-1)][j]=0.; + } + } + } + { + grid->GlobalSumVector(evals_tmp,NN); + grid->GlobalSumVector((double*)evec_tmp,NN*NN); + } + } + // cheating a bit. + // It is better to sort instead of just reversing it, + // but the document of the routine says evals are sorted in increasing order. + // qr gives evals in decreasing order. + for(int i=0;iGlobalSumVector(evals_tmp,NN); - grid->GlobalSumVector((double*)evec_tmp,NN*NN); - } - } -// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order. - for(int i=0;i& lmd, DenseVector& lme, int N2, @@ -354,24 +324,23 @@ public: if(!check_lapack) return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid); - DenseVector lmd2(N1); - DenseVector lme2(N1); - DenseVector Qt2(N1*N1); - for(int k=0; k lmd2(N1); + DenseVector lme2(N1); + DenseVector Qt2(N1*N1); + for(int k=0; k lmd3(N2); - for(int k=0; k lmd3(N2); + for(int k=0; kSMALL) std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <SMALL) std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <SMALL) std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] < &Qt) { for(int i=0; i & bq, - Field &bf, - DenseMatrix &H){ - - GridBase *grid = bq[0]._grid; - - RealD beta; - RealD sqbt; - RealD alpha; - - for(int i=start;i 1) std::cout << "orthagonality refined " << re << " times" < evals, - DenseVector evecs){ - int N= evals.size(); - _sort.push(evals,evecs, evals.size(),N); - } - - void ImplicitRestart(int TM, DenseVector &evals, DenseVector > &evecs, DenseVector &bq, Field &bf, int cont) - { - std::cout << "ImplicitRestart begin. Eigensort starting\n"; - - DenseMatrix H; Resize(H,Nm,Nm); - - EigenSort(evals, evecs); - - ///Assign shifts - int K=Nk; - int M=Nm; - int P=Np; - int converged=0; - if(K - converged < 4) P = (M - K-1); //one - // DenseVector shifts(P + shift_extra.size()); - DenseVector shifts(P); - for(int k = 0; k < P; ++k) - shifts[k] = evals[k]; - - /// Shift to form a new H and q - DenseMatrix Q; Resize(Q,TM,TM); - Unity(Q); - Shift(Q, shifts); // H is implicitly passed in in Rudy's Shift routine - - int ff = K; - - /// Shifted H defines a new K step Arnoldi factorization - RealD beta = H[ff][ff-1]; - RealD sig = Q[TM - 1][ff - 1]; - std::cout << "beta = " << beta << " sig = " << real(sig) < q Q - times_real(bq, Q, TM); - - std::cout << norm2(bq[0]) << " -- after " << ff < &bq, Field &bf, DenseVector > & evecs,DenseVector &evals) - { - init(); - - int M=Nm; - - DenseMatrix H; Resize(H,Nm,Nm); - Resize(evals,Nm); - Resize(evecs,Nm); - - int ff = Lanczos_Factor(0, M, cont, bq,bf,H); // 0--M to begin with - - if(ff < M) { - std::cout << "Krylov: aborting ff "< " << it << std::endl; - int lock_num = lock ? converged : 0; - DenseVector tevals(M - lock_num ); - DenseMatrix tevecs; Resize(tevecs,M - lock_num,M - lock_num); - - //check residual of polynominal - TestConv(H,M, tevals, tevecs); - - if(converged >= Nk) - break; - - ImplicitRestart(ff, tevals,tevecs,H); - } - Wilkinson(H, evals, evecs, small); - // Check(); - - std::cout << "Done "< & H,DenseMatrix &Q, DenseVector shifts) { - - int P; Size(shifts,P); - int M; SizeSquare(Q,M); - - Unity(Q); - - int lock_num = lock ? converged : 0; - - RealD t_Househoulder_vector(0.0); - RealD t_Househoulder_mult(0.0); - - for(int i=0;i ck(3), v(3); - - x = H[lock_num+0][lock_num+0]-shifts[i]; - y = H[lock_num+1][lock_num+0]; - ck[0] = x; ck[1] = y; ck[2] = 0; - - normalise(ck); ///Normalization cancels in PHP anyway - RealD beta; - - Householder_vector(ck, 0, 2, v, beta); - Householder_mult(H,v,beta,0,lock_num+0,lock_num+2,0); - Householder_mult(H,v,beta,0,lock_num+0,lock_num+2,1); - ///Accumulate eigenvector - Householder_mult(Q,v,beta,0,lock_num+0,lock_num+2,1); - - int sw = 0; - for(int k=lock_num+0;k(ck, 0, 2-sw, v, beta); - Householder_mult(H,v, beta,0,k+1,k+3-sw,0); - Householder_mult(H,v, beta,0,k+1,k+3-sw,1); - ///Accumulate eigenvector - Householder_mult(Q,v, beta,0,k+1,k+3-sw,1); - } - } - } - - void TestConv(DenseMatrix & H,int SS, - DenseVector &bq, Field &bf, - DenseVector &tevals, DenseVector > &tevecs, - int lock, int converged) - { - std::cout << "Converged " << converged << " so far." << std::endl; - int lock_num = lock ? converged : 0; - int M = Nm; - - ///Active Factorization - DenseMatrix AH; Resize(AH,SS - lock_num,SS - lock_num ); - - AH = GetSubMtx(H,lock_num, SS, lock_num, SS); - - int NN=tevals.size(); - int AHsize=SS-lock_num; - - RealD small=1.0e-16; - Wilkinson(AH, tevals, tevecs, small); - - EigenSort(tevals, tevecs); - - RealD resid_nrm= norm2(bf); - - if(!lock) converged = 0; -#if 0 - for(int i = SS - lock_num - 1; i >= SS - Nk && i >= 0; --i){ - - RealD diff = 0; - diff = abs( tevecs[i][Nm - 1 - lock_num] ) * resid_nrm; - - std::cout << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl; - - if(diff < converged) { - - if(lock) { - - DenseMatrix Q; Resize(Q,M,M); - bool herm = true; - - Lock(H, Q, tevals[i], converged, small, SS, herm); - - times_real(bq, Q, bq.size()); - bf = Q[M - 1][M - 1]* bf; - lock_num++; - } - converged++; - std::cout << " converged on eval " << converged << " of " << Nk << std::endl; - } else { - break; - } - } -#endif - std::cout << "Got " << converged << " so far " < &evals, - DenseVector > &evecs) { - - DenseVector goodval(this->get); - - EigenSort(evals,evecs); - - int NM = Nm; - - DenseVector< DenseVector > V; Size(V,NM); - DenseVector QZ(NM*NM); - - for(int i = 0; i < NM; i++){ - for(int j = 0; j < NM; j++){ - // evecs[i][j]; - } - } - } - - -/** - There is some matrix Q such that for any vector y - Q.e_1 = y and Q is unitary. -**/ - template - static T orthQ(DenseMatrix &Q, DenseVector y){ - int N = y.size(); //Matrix Size - Fill(Q,0.0); - T tau; - for(int i=0;i 0.0){ - - T gam = conj( (y[j]/tau)/tau0 ); - for(int k=0;k<=j-1;k++){ - Q[k][j]=-gam*y[k]; - } - Q[j][j]=tau0/tau; - } else { - Q[j-1][j]=1.0; - } - tau0 = tau; - } - return tau; - } - -/** - There is some matrix Q such that for any vector y - Q.e_k = y and Q is unitary. -**/ - template< class T> - static T orthU(DenseMatrix &Q, DenseVector y){ - T tau = orthQ(Q,y); - SL(Q); - return tau; - } - - -/** - Wind up with a matrix with the first con rows untouched - -say con = 2 - Q is such that Qdag H Q has {x, x, val, 0, 0, 0, 0, ...} as 1st colum - and the matrix is upper hessenberg - and with f and Q appropriately modidied with Q is the arnoldi factorization - -**/ - -template -static void Lock(DenseMatrix &H, // Hess mtx - DenseMatrix &Q, // Lock Transform - T val, // value to be locked - int con, // number already locked - RealD small, - int dfg, - bool herm) -{ - //ForceTridiagonal(H); - - int M = H.dim; - DenseVector vec; Resize(vec,M-con); - - DenseMatrix AH; Resize(AH,M-con,M-con); - AH = GetSubMtx(H,con, M, con, M); - - DenseMatrix QQ; Resize(QQ,M-con,M-con); - - Unity(Q); Unity(QQ); - - DenseVector evals; Resize(evals,M-con); - DenseMatrix evecs; Resize(evecs,M-con,M-con); - - Wilkinson(AH, evals, evecs, small); - - int k=0; - RealD cold = abs( val - evals[k]); - for(int i=1;icon+2; j--){ - - DenseMatrix U; Resize(U,j-1-con,j-1-con); - DenseVector z; Resize(z,j-1-con); - T nm = norm(z); - for(int k = con+0;k Hb; Resize(Hb,j-1-con,M); - - for(int a = 0;a Qb; Resize(Qb,M,M); - - for(int a = 0;a Hc; Resize(Hc,M,M); - - for(int a = 0;a { FieldMetaData header; IldgReader _IldgReader; _IldgReader.open(config); - _IldgReader.readConfiguration(config,U,header); // format from the header + _IldgReader.readConfiguration(U,header); // format from the header _IldgReader.close(); std::cout << GridLogMessage << "Read ILDG Configuration from " << config diff --git a/tests/solver/Test_dwf_lanczos.cc b/tests/solver/Test_dwf_lanczos.cc index bb978186..48cca378 100644 --- a/tests/solver/Test_dwf_lanczos.cc +++ b/tests/solver/Test_dwf_lanczos.cc @@ -54,7 +54,7 @@ int main (int argc, char ** argv) GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5); LatticeGaugeField Umu(UGrid); - SU3::TepidConfiguration(RNG4, Umu); + SU3::HotConfiguration(RNG4, Umu); std::vector U(4,UGrid); for(int mu=0;mu Date: Wed, 21 Jun 2017 02:26:03 +0100 Subject: [PATCH 097/177] Simplified lanczos, added Eigen diagonalisation. Curious if we can deprecate dependencly on BLAS. Will see when we get 48^3 running on our BG/Q port --- .../iterative/BlockConjugateGradient.h | 7 +- lib/algorithms/iterative/EigenSort.h | 81 -- .../iterative/ImplicitlyRestartedLanczos.h | 1074 +++++++++-------- tests/solver/Test_dwf_lanczos.cc | 9 +- 4 files changed, 547 insertions(+), 624 deletions(-) delete mode 100644 lib/algorithms/iterative/EigenSort.h diff --git a/lib/algorithms/iterative/BlockConjugateGradient.h b/lib/algorithms/iterative/BlockConjugateGradient.h index f8b83b1f..9418f63c 100644 --- a/lib/algorithms/iterative/BlockConjugateGradient.h +++ b/lib/algorithms/iterative/BlockConjugateGradient.h @@ -56,11 +56,8 @@ class BlockConjugateGradient : public OperatorFunction { Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) - : Tolerance(tol), - CGtype(cgtype), - blockDim(_Orthog), - MaxIterations(maxit), - ErrorOnNoConverge(err_on_no_conv){}; + : Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) + {}; //////////////////////////////////////////////////////////////////////////////////////////////////// // Thin QR factorisation (google it) diff --git a/lib/algorithms/iterative/EigenSort.h b/lib/algorithms/iterative/EigenSort.h deleted file mode 100644 index 23621544..00000000 --- a/lib/algorithms/iterative/EigenSort.h +++ /dev/null @@ -1,81 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/algorithms/iterative/EigenSort.h - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_EIGENSORT_H -#define GRID_EIGENSORT_H - - -namespace Grid { - ///////////////////////////////////////////////////////////// - // Eigen sorter to begin with - ///////////////////////////////////////////////////////////// - -template -class SortEigen { - private: - -//hacking for testing for now - private: - static bool less_lmd(RealD left,RealD right){ - return left > right; - } - static bool less_pair(std::pair& left, - std::pair& right){ - return left.first > (right.first); - } - - - public: - - void push(DenseVector& lmd, - DenseVector& evec,int N) { - DenseVector cpy(lmd.size(),evec[0]._grid); - for(int i=0;i > emod(lmd.size()); - for(int i=0;i(lmd[i],&cpy[i]); - - partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); - - typename DenseVector >::iterator it = emod.begin(); - for(int i=0;ifirst; - evec[i]=*(it->second); - ++it; - } - } - void push(DenseVector& lmd,int N) { - std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); - } - bool saturated(RealD lmd, RealD thrs) { - return fabs(lmd) > fabs(thrs); - } -}; - -} -#endif diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index acd67592..571bf1b2 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -7,7 +7,8 @@ Copyright (C) 2015 Author: Peter Boyle -Author: paboyle +Author: Chulwoo Jung +Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,35 +32,71 @@ Author: paboyle #include //memset -#ifdef USE_LAPACK -void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, - double *vl, double *vu, int *il, int *iu, double *abstol, - int *m, double *w, double *z, int *ldz, int *isuppz, - double *work, int *lwork, int *iwork, int *liwork, - int *info); -#endif - -template using DenseVector = std::vector; - -//#include -#include - namespace Grid { + enum IRLdiagonalisation { + IRLdiagonaliseWithDSTEGR, + IRLdiagonaliseWithQR, + IRLdiagonaliseWithEigen + }; + //////////////////////////////////////////////////////////////////////////////// + // Helper class for sorting the evalues AND evectors by Field + // Use pointer swizzle on vectors + //////////////////////////////////////////////////////////////////////////////// +template +class SortEigen { + private: + static bool less_lmd(RealD left,RealD right){ + return left > right; + } + static bool less_pair(std::pair& left, + std::pair& right){ + return left.first > (right.first); + } + + public: + void push(std::vector& lmd,std::vector& evec,int N) { + + //////////////////////////////////////////////////////////////////////// + // PAB: FIXME: VERY VERY VERY wasteful: takes a copy of the entire vector set. + // : The vector reorder should be done by pointer swizzle somehow + //////////////////////////////////////////////////////////////////////// + std::vector cpy(lmd.size(),evec[0]._grid); + for(int i=0;i > emod(lmd.size()); + + for(int i=0;i(lmd[i],&cpy[i]); + + partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); + + typename std::vector >::iterator it = emod.begin(); + for(int i=0;ifirst; + evec[i]=*(it->second); + ++it; + } + } + void push(std::vector& lmd,int N) { + std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); + } + bool saturated(RealD lmd, RealD thrs) { + return fabs(lmd) > fabs(thrs); + } +}; + ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// template class ImplicitlyRestartedLanczos { - -public: - int Niter; // Max iterations - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - int Nm; // Nm -- total number of vectors - +private: + int MaxIter; // Max iterations + int Nstop; // Number of evecs checked for convergence + int Nk; // Number of converged sought + int Nm; // Nm -- total number of vectors RealD eresid; - + IRLdiagonalisation diagonalisation; //////////////////////////////////// // Embedded objects //////////////////////////////////// @@ -70,362 +107,20 @@ public: ///////////////////////// // Constructor ///////////////////////// +public: ImplicitlyRestartedLanczos(LinearOperatorBase &Linop, // op - OperatorFunction & poly, // polynmial - int _Nstop, // sought vecs + OperatorFunction & poly, // polynomial + int _Nstop, // really sought vecs int _Nk, // sought vecs int _Nm, // total vecs - RealD _eresid, // resid in lmdue deficit - int _Niter) : // Max iterations + RealD _eresid, // resid in lmd deficit + int _MaxIter, // Max iterations + IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen ) : _Linop(Linop), _poly(poly), - Nstop(_Nstop), Nk(_Nk), Nm(_Nm), - eresid(_eresid), Niter(_Niter) { }; - -#if 0 - ImplicitlyRestartedLanczos(LinearOperatorBase &Linop, // op - OperatorFunction & poly, // polynmial - int _Nk, // sought vecs - int _Nm, // total vecs - RealD _eresid, // resid in lmdue deficit - int _Niter) : // Max iterations - _Linop(Linop), _poly(poly), - Nstop(_Nk), Nk(_Nk), Nm(_Nm), - eresid(_eresid), Niter(_Niter) { }; -#endif - -#if 0 - void calc(DenseVector& eval, - DenseVector& evec, - const Field& src, - int& Nconv); - - void step(DenseVector& lmd, - DenseVector& lme, - DenseVector& evec, - Field& w,int Nm,int k); - - void setUnit_Qt(int Nm, DenseVector &Qt) ; - - static RealD normalise(Field& v) ; - void orthogonalize(Field& w, DenseVector& evec, int k); - void diagonalize(DenseVector& lmd, - DenseVector& lme, - int N2, int N1, - DenseVector& Qt, - GridBase *grid); - - void qr_decomp(DenseVector& lmd, - DenseVector& lme, - int Nk, int Nm, - DenseVector& Qt, - RealD Dsh, int kmin, int kmax); - -#ifdef USE_LAPACK - void diagonalize_lapack(DenseVector& lmd, - DenseVector& lme, - int N1, int N2, - DenseVector& Qt, - GridBase *grid); -#endif -#endif - -/* Saad PP. 195 -1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 -2. For k = 1,2,...,m Do: -3. wk:=Avk−βkv_{k−1} -4. αk:=(wk,vk) // -5. wk:=wk−αkvk // wk orthog vk -6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop -7. vk+1 := wk/βk+1 -8. EndDo - */ - void step(DenseVector& lmd, - DenseVector& lme, - DenseVector& evec, - Field& w,int Nm,int k) - { - const RealD tiny = 1.0e-20; - assert( k< Nm ); - - _poly(_Linop,evec[k],w); // 3. wk:=Avk−βkv_{k−1} - - if(k>0) w -= lme[k-1] * evec[k-1]; - - ComplexD zalph = innerProduct(evec[k],w); // 4. αk:=(wk,vk) - RealD alph = real(zalph); - - w = w - alph * evec[k];// 5. wk:=wk−αkvk - - RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop - // 7. vk+1 := wk/βk+1 - - lmd[k] = alph; - lme[k] = beta; - - if ( k > 0 ) orthogonalize(w,evec,k); // orthonormalise - if ( k < Nm-1) evec[k+1] = w; - - if ( beta < tiny ) std::cout << " beta is tiny "<& lmd, // Nm - DenseVector& lme, // Nm - int Nk, int Nm, - DenseVector& Qt, // Nm x Nm matrix - RealD Dsh, int kmin, int kmax) - { - int k = kmin-1; - RealD x; - - RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); - RealD c = ( lmd[k] -Dsh) *Fden; - RealD s = -lme[k] *Fden; - - RealD tmpa1 = lmd[k]; - RealD tmpa2 = lmd[k+1]; - RealD tmpb = lme[k]; - - lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; - lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; - lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; - x =-s*lme[k+1]; - lme[k+1] = c*lme[k+1]; - - for(int i=0; i& lmd, - DenseVector& lme, - int N1, - int N2, - DenseVector& Qt, - GridBase *grid) - { - const int size = Nm; - int NN = N1; - double evals_tmp[NN]; - double evec_tmp[NN][NN]; - memset(evec_tmp[0],0,sizeof(double)*NN*NN); - double DD[NN]; - double EE[NN]; - for (int i = 0; i< NN; i++) { - for (int j = i - 1; j <= i + 1; j++) { - if ( j < NN && j >= 0 ) { - if (i==j) DD[i] = lmd[i]; - if (i==j) evals_tmp[i] = lmd[i]; - if (j==(i-1)) EE[j] = lme[j]; - } - } - } - int evals_found; - int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; - int liwork = 3+NN*10 ; - int iwork[liwork]; - double work[lwork]; - int isuppz[2*NN]; - char jobz = 'V'; // calculate evals & evecs - char range = 'I'; // calculate all evals - // char range = 'A'; // calculate all evals - char uplo = 'U'; // refer to upper half of original matrix - char compz = 'I'; // Compute eigenvectors of tridiagonal matrix - int ifail[NN]; - int info; - int total = grid->_Nprocessors; - int node = grid->_processor; - int interval = (NN/total)+1; - double vl = 0.0, vu = 0.0; - int il = interval*node+1 , iu = interval*(node+1); - if (iu > NN) iu=NN; - double tol = 0.0; - if (1) { - memset(evals_tmp,0,sizeof(double)*NN); - if ( il <= NN){ - LAPACK_dstegr(&jobz, &range, &NN, - (double*)DD, (double*)EE, - &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' - &tol, // tolerance - &evals_found, evals_tmp, (double*)evec_tmp, &NN, - isuppz, - work, &lwork, iwork, &liwork, - &info); - for (int i = iu-1; i>= il-1; i--){ - evals_tmp[i] = evals_tmp[i - (il-1)]; - if (il>1) evals_tmp[i-(il-1)]=0.; - for (int j = 0; j< NN; j++){ - evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; - if (il>1) evec_tmp[i-(il-1)][j]=0.; - } - } - } - { - grid->GlobalSumVector(evals_tmp,NN); - grid->GlobalSumVector((double*)evec_tmp,NN*NN); - } - } - // cheating a bit. - // It is better to sort instead of just reversing it, - // but the document of the routine says evals are sorted in increasing order. - // qr gives evals in decreasing order. - for(int i=0;i& lmd, - DenseVector& lme, - int N2, - int N1, - DenseVector& Qt, - GridBase *grid) - { - -#ifdef USE_LAPACK - const int check_lapack=0; // just use lapack if 0, check against lapack if 1 - - if(!check_lapack) - return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid); - - DenseVector lmd2(N1); - DenseVector lme2(N1); - DenseVector Qt2(N1*N1); - for(int k=0; k= kmin; --j){ - RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); - if(fabs(lme[j-1])+dds > dds){ - kmax = j+1; - goto continued; - } - } - Niter = iter; -#ifdef USE_LAPACK - if(check_lapack){ - const double SMALL=1e-8; - diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid); - DenseVector lmd3(N2); - for(int k=0; kSMALL) std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] < dds){ - kmin = j+1; - break; - } - } - } - std::cout << "[QL method] Error - Too many iteration: "<& evec, - int k) - { - typedef typename Field::scalar_type MyComplex; - MyComplex ip; - - if ( 0 ) { - for(int j=0; j &Qt) { - for(int i=0; i& eval, - DenseVector& evec, - const Field& src, - int& Nconv) - { - - GridBase *grid = evec[0]._grid; - assert(grid == src._grid); - - std::cout << " -- seek Nk = " << Nk <<" vectors"<< std::endl; - std::cout << " -- accept Nstop = " << Nstop <<" vectors"<< std::endl; - std::cout << " -- total Nm = " << Nm <<" vectors"<< std::endl; - std::cout << " -- size of eval = " << eval.size() << std::endl; - std::cout << " -- size of evec = " << evec.size() << std::endl; - - assert(Nm == evec.size() && Nm == eval.size()); - - DenseVector lme(Nm); - DenseVector lme2(Nm); - DenseVector eval2(Nm); - DenseVector Qt(Nm*Nm); - DenseVector Iconv(Nm); - - DenseVector B(Nm,grid); // waste of space replicating - - Field f(grid); - Field v(grid); - - int k1 = 1; - int k2 = Nk; - - Nconv = 0; - - RealD beta_k; - - // Set initial vector - evec[0] = src; - std:: cout <<"norm2(src)= " << norm2(src)<& eval, std::vector& evec, const Field& src, int& Nconv) + { - for(int i=0; i<(Nk+1); ++i) B[i] = 0.0; - - for(int j=k1-1; j=Nstop ){ - goto converged; - } - } // end of iter loop + GridBase *grid = evec[0]._grid; + assert(grid == src._grid); + + std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; + std::cout << GridLogMessage <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl; + std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; + std::cout << GridLogMessage <<" -- seek Nk = " << Nk <<" vectors"<< std::endl; + std::cout << GridLogMessage <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; + std::cout << GridLogMessage <<" -- total Nm = " << Nm <<" vectors"<< std::endl; + std::cout << GridLogMessage <<" -- size of eval = " << eval.size() << std::endl; + std::cout << GridLogMessage <<" -- size of evec = " << evec.size() << std::endl; + if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { + std::cout << GridLogMessage << "Diagonalisation is DSTEGR "< lme(Nm); + std::vector lme2(Nm); + std::vector eval2(Nm); + Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); + std::vector Iconv(Nm); + + std::vector B(Nm,grid); // waste of space replicating + + Field f(grid); + Field v(grid); + + int k1 = 1; + int k2 = Nk; + + Nconv = 0; + + RealD beta_k; + + // Set initial vector + evec[0] = src; + std::cout << GridLogMessage <<"norm2(src)= " << norm2(src)<=Nstop ){ + goto converged; + } + } // end of iter loop + + std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; + std::cout<< GridLogError <<" ImplicitlyRestartedLanczos::calc() NOT converged."; + std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; + abort(); + + converged: + // Sorting + eval.resize(Nconv); + evec.resize(Nconv,grid); + for(int i=0; i& lmd, + std::vector& lme, + std::vector& evec, + Field& w,int Nm,int k) + { + const RealD tiny = 1.0e-20; + assert( k< Nm ); + + _poly(_Linop,evec[k],w); // 3. wk:=Avk−βkv_{k−1} + + if(k>0) w -= lme[k-1] * evec[k-1]; + + ComplexD zalph = innerProduct(evec[k],w); // 4. αk:=(wk,vk) + RealD alph = real(zalph); + + w = w - alph * evec[k];// 5. wk:=wk−αkvk + + RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop + // 7. vk+1 := wk/βk+1 + + lmd[k] = alph; + lme[k] = beta; + + if ( k > 0 ) orthogonalize(w,evec,k); // orthonormalise + if ( k < Nm-1) evec[k+1] = w; + + if ( beta < tiny ) std::cout << GridLogMessage << " beta is tiny "<& lmd, // Nm + std::vector& lme, // Nm + int Nk, int Nm, // Nk, Nm + Eigen::MatrixXd& Qt, // Nm x Nm matrix + RealD Dsh, int kmin, int kmax) + { + int k = kmin-1; + RealD x; + + RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); + RealD c = ( lmd[k] -Dsh) *Fden; + RealD s = -lme[k] *Fden; + + RealD tmpa1 = lmd[k]; + RealD tmpa2 = lmd[k+1]; + RealD tmpb = lme[k]; + + lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; + lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; + lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; + x =-s*lme[k+1]; + lme[k+1] = c*lme[k+1]; + + for(int i=0; i& lmd, std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, + GridBase *grid) + { + Qt = Eigen::MatrixXd::Identity(Nm,Nm); + if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { + diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid); + } else if ( diagonalisation == IRLdiagonaliseWithQR ) { + diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid); + } else if ( diagonalisation == IRLdiagonaliseWithEigen ) { + diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid); + } else { + assert(0); + } + } + +#ifdef USE_LAPACK +void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, + double *vl, double *vu, int *il, int *iu, double *abstol, + int *m, double *w, double *z, int *ldz, int *isuppz, + double *work, int *lwork, int *iwork, int *liwork, + int *info); #endif +void diagonalize_lapack(std::vector& lmd, + std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd& Qt, + GridBase *grid) +{ +#ifdef USE_LAPACK + const int size = Nm; + int NN = Nk; + double evals_tmp[NN]; + double evec_tmp[NN][NN]; + memset(evec_tmp[0],0,sizeof(double)*NN*NN); + double DD[NN]; + double EE[NN]; + for (int i = 0; i< NN; i++) { + for (int j = i - 1; j <= i + 1; j++) { + if ( j < NN && j >= 0 ) { + if (i==j) DD[i] = lmd[i]; + if (i==j) evals_tmp[i] = lmd[i]; + if (j==(i-1)) EE[j] = lme[j]; + } + } + } + int evals_found; + int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; + int liwork = 3+NN*10 ; + int iwork[liwork]; + double work[lwork]; + int isuppz[2*NN]; + char jobz = 'V'; // calculate evals & evecs + char range = 'I'; // calculate all evals + // char range = 'A'; // calculate all evals + char uplo = 'U'; // refer to upper half of original matrix + char compz = 'I'; // Compute eigenvectors of tridiagonal matrix + int ifail[NN]; + int info; + int total = grid->_Nprocessors; + int node = grid->_processor; + int interval = (NN/total)+1; + double vl = 0.0, vu = 0.0; + int il = interval*node+1 , iu = interval*(node+1); + if (iu > NN) iu=NN; + double tol = 0.0; + if (1) { + memset(evals_tmp,0,sizeof(double)*NN); + if ( il <= NN){ + LAPACK_dstegr(&jobz, &range, &NN, + (double*)DD, (double*)EE, + &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' + &tol, // tolerance + &evals_found, evals_tmp, (double*)evec_tmp, &NN, + isuppz, + work, &lwork, iwork, &liwork, + &info); + for (int i = iu-1; i>= il-1; i--){ + evals_tmp[i] = evals_tmp[i - (il-1)]; + if (il>1) evals_tmp[i-(il-1)]=0.; + for (int j = 0; j< NN; j++){ + evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; + if (il>1) evec_tmp[i-(il-1)][j]=0.; + } + } + } + { + grid->GlobalSumVector(evals_tmp,NN); + grid->GlobalSumVector((double*)evec_tmp,NN*NN); + } + } + // Safer to sort instead of just reversing it, + // but the document of the routine says evals are sorted in increasing order. + // qr gives evals in decreasing order. + for(int i=0;i& lmd, std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, + GridBase *grid) + { + int Niter = 100*Nm; + int kmin = 1; + int kmax = Nk; + + // (this should be more sophisticated) + for(int iter=0; iter= kmin; --j){ + RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); + if(fabs(lme[j-1])+dds > dds){ + kmax = j+1; + goto continued; + } + } + Niter = iter; + return; + + continued: + for(int j=0; j dds){ + kmin = j+1; + break; + } + } + } + std::cout << GridLogError << "[QL method] Error - Too many iteration: "<& lmd, std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, // Nm x Nm + GridBase *grid) + { + Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk); + + for(int i=0;i eigensolver(TriDiag); + + for (int i = 0; i < Nk; i++) { + lmd[Nk-1-i] = eigensolver.eigenvalues()(i); + } + for (int i = 0; i < Nk; i++) { + for (int j = 0; j < Nk; j++) { + Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i); + } + } + } + + + static RealD normalise(Field& v) + { + RealD nn = norm2(v); + nn = sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void orthogonalize(Field& w, std::vector& evec, int k) + { + typedef typename Field::scalar_type MyComplex; + MyComplex ip; + + for(int j=0; j eval(Nm); - FermionField src(FrbGrid); gaussian(RNG5rb,src); + FermionField src(FrbGrid); + gaussian(RNG5rb,src); std::vector evec(Nm,FrbGrid); for(int i=0;i<1;i++){ - std::cout << i<<" / "<< Nm<< " grid pointer "< Date: Wed, 21 Jun 2017 02:50:09 +0100 Subject: [PATCH 098/177] Clean up finished. Could shrink Lanczos to around 400 lines at a push --- .../iterative/ImplicitlyRestartedLanczos.h | 114 +++++++++--------- tests/debug/Test_synthetic_lanczos.cc | 4 +- 2 files changed, 62 insertions(+), 56 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 571bf1b2..a8723f32 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -39,10 +39,11 @@ namespace Grid { IRLdiagonaliseWithQR, IRLdiagonaliseWithEigen }; - //////////////////////////////////////////////////////////////////////////////// - // Helper class for sorting the evalues AND evectors by Field - // Use pointer swizzle on vectors - //////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////// +// Helper class for sorting the evalues AND evectors by Field +// Use pointer swizzle on vectors +//////////////////////////////////////////////////////////////////////////////// template class SortEigen { private: @@ -90,7 +91,9 @@ class SortEigen { ///////////////////////////////////////////////////////////// template class ImplicitlyRestartedLanczos { + private: + int MaxIter; // Max iterations int Nstop; // Number of evecs checked for convergence int Nk; // Number of converged sought @@ -122,6 +125,29 @@ public: diagonalisation(_diagonalisation) { }; + //////////////////////////////// + // Helpers + //////////////////////////////// + static RealD normalise(Field& v) + { + RealD nn = norm2(v); + nn = sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void orthogonalize(Field& w, std::vector& evec, int k) + { + typedef typename Field::scalar_type MyComplex; + MyComplex ip; + + for(int j=0; j K P = M − K † @@ -167,9 +193,10 @@ until convergence std::vector lme(Nm); std::vector lme2(Nm); std::vector eval2(Nm); - Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); - std::vector Iconv(Nm); + Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); + + std::vector Iconv(Nm); std::vector B(Nm,grid); // waste of space replicating Field f(grid); @@ -218,6 +245,7 @@ until convergence // Implicitly shifted QR transformations Qt = Eigen::MatrixXd::Identity(Nm,Nm); for(int ip=k2; ip& lmd, std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, // Nm x Nm + GridBase *grid) + { + Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk); + + for(int i=0;i eigensolver(TriDiag); + + for (int i = 0; i < Nk; i++) { + lmd[Nk-1-i] = eigensolver.eigenvalues()(i); + } + for (int i = 0; i < Nk; i++) { + for (int j = 0; j < Nk; j++) { + Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i); + } + } + } + /////////////////////////////////////////////////////////////////////////// + // File could end here if settle on Eigen ??? + /////////////////////////////////////////////////////////////////////////// + void qr_decomp(std::vector& lmd, // Nm std::vector& lme, // Nm int Nk, int Nm, // Nk, Nm @@ -570,50 +620,6 @@ void diagonalize_lapack(std::vector& lmd, abort(); } - void diagonalize_Eigen(std::vector& lmd, std::vector& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, // Nm x Nm - GridBase *grid) - { - Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk); - - for(int i=0;i eigensolver(TriDiag); - - for (int i = 0; i < Nk; i++) { - lmd[Nk-1-i] = eigensolver.eigenvalues()(i); - } - for (int i = 0; i < Nk; i++) { - for (int j = 0; j < Nk; j++) { - Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i); - } - } - } - - - static RealD normalise(Field& v) - { - RealD nn = norm2(v); - nn = sqrt(nn); - v = v * (1.0/nn); - return nn; - } - - void orthogonalize(Field& w, std::vector& evec, int k) - { - typedef typename Field::scalar_type MyComplex; - MyComplex ip; - - for(int j=0; j IRL(HermOp,X,Nk,Nm,eresid,Nit); - ImplicitlyRestartedLanczos ChebyIRL(HermOp,Cheby,Nk,Nm,eresid,Nit); + ImplicitlyRestartedLanczos IRL(HermOp,X,Nk,Nk,Nm,eresid,Nit); + ImplicitlyRestartedLanczos ChebyIRL(HermOp,Cheby,Nk,Nk,Nm,eresid,Nit); LatticeComplex src(grid); gaussian(RNG,src); { From ef4f2b8c410d449ff0beea1682cfc3de9bda3f79 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 21 Jun 2017 09:22:20 +0100 Subject: [PATCH 099/177] todo update --- TODO | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/TODO b/TODO index eeb7dfa5..8f80903e 100644 --- a/TODO +++ b/TODO @@ -2,8 +2,8 @@ TODO: --------------- Large item work list: -1)- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- -2)- MultiRHS with spread out extra dim +1)- MultiRHS with spread out extra dim +2)- Christoph's local basis expansion Lanczos 3)- BG/Q port and check 4)- Precision conversion and sort out localConvert <-- partial - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet @@ -13,6 +13,7 @@ Large item work list: 8)- HDCR resume Recent DONE +-- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE -- GaugeFix into central location <-- DONE -- Scidac and Ildg metadata handling <-- DONE -- Binary I/O MPI2 IO <-- DONE From 9e56c6573007ccc857571aefa2ce3b6851f7b891 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 21 Jun 2017 14:02:58 +0100 Subject: [PATCH 100/177] Updated TODO list --- TODO | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/TODO b/TODO index 8f80903e..001c6c0c 100644 --- a/TODO +++ b/TODO @@ -2,7 +2,8 @@ TODO: --------------- Large item work list: -1)- MultiRHS with spread out extra dim +1)- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O + 2)- Christoph's local basis expansion Lanczos 3)- BG/Q port and check 4)- Precision conversion and sort out localConvert <-- partial From af71c63f4ce48ccbe9bfdaf40d4171913483add7 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 23 Jun 2017 11:03:12 +0200 Subject: [PATCH 101/177] AVX2 fix --- lib/simd/Grid_avx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/simd/Grid_avx.h b/lib/simd/Grid_avx.h index 57d9064d..f4634432 100644 --- a/lib/simd/Grid_avx.h +++ b/lib/simd/Grid_avx.h @@ -711,7 +711,7 @@ namespace Optimization { v2 = _mm256_hadd_epi32(v1, v1); u1 = _mm256_castsi256_si128(v2); // upper half u2 = _mm256_extracti128_si256(v2, 1); // lower half - ret = _mm256_add_epi32(u1, u2); + ret = _mm_add_epi32(u1, u2); #else // No AVX horizontal add; extract upper and lower halves of register & use // SSE intrinsics. From 56abbdf4c2fa3848fe9037cf95cf5e4930631d3a Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 23 Jun 2017 11:09:14 +0200 Subject: [PATCH 102/177] AVX512 integer reduce fix (for non-intel compiler) --- lib/simd/Grid_avx512.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index 458a8f7c..85d27421 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -558,7 +558,7 @@ namespace Optimization { v2 = _mm256_hadd_epi32(v1, v1); u1 = _mm256_castsi256_si128(v2) // upper half u2 = _mm256_extracti128_si256(v2, 1); // lower half - ret = _mm256_add_epi32(u1, u2); + ret = _mm_add_epi32(u1, u2); return _mm_cvtsi128_si32(ret); } #else From 869b99ec1efde04d94bdd02eb041a457accb930e Mon Sep 17 00:00:00 2001 From: paboyle Date: Sat, 24 Jun 2017 10:55:54 +0100 Subject: [PATCH 103/177] Threaded calls to multiple communicators --- lib/communicator/Communicator_mpit.cc | 260 ++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 lib/communicator/Communicator_mpit.cc diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc new file mode 100644 index 00000000..07522900 --- /dev/null +++ b/lib/communicator/Communicator_mpit.cc @@ -0,0 +1,260 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/communicator/Communicator_mpi.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include +#include + +namespace Grid { + + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////////////////////////////////////////// +MPI_Comm CartesianCommunicator::communicator_world; + +// Should error check all MPI calls. +void CartesianCommunicator::Init(int *argc, char ***argv) { + int flag; + int provided; + MPI_Initialized(&flag); // needed to coexist with other libs apparently + if ( !flag ) { + MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); + if ( provided != MPI_THREAD_MULTIPLE ) { + QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; + } + } + MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); + ShmInitGeneric(); +} + +CartesianCommunicator::CartesianCommunicator(const std::vector &processors) +{ + _ndimension = processors.size(); + std::vector periodic(_ndimension,1); + + _Nprocessors=1; + _processors = processors; + _processor_coor.resize(_ndimension); + + MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); + MPI_Comm_rank(communicator,&_processor); + MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); + + for(int i=0;i<_ndimension;i++){ + _Nprocessors*=_processors[i]; + } + + communicator_halo.resize (2*_ndimension); + for(int i=0;i<_ndimension*2;i++){ + MPI_Comm_dup(communicator,&communicator_halo[i]); + } + + int Size; + MPI_Comm_size(communicator,&Size); + + assert(Size==_Nprocessors); +} +void CartesianCommunicator::GlobalSum(uint32_t &u){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalSum(uint64_t &u){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalXOR(uint32_t &u){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalXOR(uint64_t &u){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalSum(float &f){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalSumVector(float *f,int N) +{ + int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalSum(double &d) +{ + int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalSumVector(double *d,int N) +{ + int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); + assert(ierr==0); +} +void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) +{ + int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); + assert(ierr==0); +} +int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) +{ + int rank; + int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); + assert(ierr==0); + return rank; +} +void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) +{ + coor.resize(_ndimension); + int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); + assert(ierr==0); +} + +// Basic Halo comms primitive +void CartesianCommunicator::SendToRecvFrom(void *xmit, + int dest, + void *recv, + int from, + int bytes) +{ + std::vector reqs(0); + SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); + SendToRecvFromComplete(reqs); +} + +void CartesianCommunicator::SendRecvPacket(void *xmit, + void *recv, + int sender, + int receiver, + int bytes) +{ + MPI_Status stat; + assert(sender != receiver); + int tag = sender; + if ( _processor == sender ) { + MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); + } + if ( _processor == receiver ) { + MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); + } +} + +// Basic Halo comms primitive +void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, + void *xmit, + int dest, + void *recv, + int from, + int bytes) +{ + int myrank = _processor; + int ierr; + if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { + MPI_Request xrq; + MPI_Request rrq; + + ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); + ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); + + assert(ierr==0); + list.push_back(xrq); + list.push_back(rrq); + } else { + // Give the CPU to MPI immediately; can use threads to overlap optionally + ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, + recv,bytes,MPI_CHAR,from, from, + communicator,MPI_STATUS_IGNORE); + assert(ierr==0); + } +} +void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) +{ + if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { + int nreq=list.size(); + std::vector status(nreq); + int ierr = MPI_Waitall(nreq,&list[0],&status[0]); + assert(ierr==0); + } +} + +void CartesianCommunicator::Barrier(void) +{ + int ierr = MPI_Barrier(communicator); + assert(ierr==0); +} + +void CartesianCommunicator::Broadcast(int root,void* data, int bytes) +{ + int ierr=MPI_Bcast(data, + bytes, + MPI_BYTE, + root, + communicator); + assert(ierr==0); +} + /////////////////////////////////////////////////////// + // Should only be used prior to Grid Init finished. + // Check for this? + /////////////////////////////////////////////////////// +int CartesianCommunicator::RankWorld(void){ + int r; + MPI_Comm_rank(communicator_world,&r); + return r; +} +void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) +{ + int ierr= MPI_Bcast(data, + bytes, + MPI_BYTE, + root, + communicator_world); + assert(ierr==0); +} + + double CartesianCommunicator::StencilSendToRecvFromBegin(int dir, + std::vector &list, + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes) +{ + int myrank = _processor; + int ierr; + // Give the CPU to MPI immediately; can use threads to overlap optionally + ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, + recv,bytes,MPI_CHAR,from, from, + communicator_halo[dir],MPI_STATUS_IGNORE); + assert(ierr==0); + return 2.0*bytes; +} +void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall){ }; + + + +} + From d2e8372df3c0a39b9eb2c000c7f190c670a75501 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Sat, 24 Jun 2017 23:03:39 +0100 Subject: [PATCH 104/177] SU(N) algebra fix (was not working) --- lib/qcd/utils/SUn.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/qcd/utils/SUn.h b/lib/qcd/utils/SUn.h index 99a620bc..8f0c0a7b 100644 --- a/lib/qcd/utils/SUn.h +++ b/lib/qcd/utils/SUn.h @@ -716,8 +716,7 @@ template for (int a = 0; a < AdjointDimension; a++) { generator(a, Ta); - auto tmp = - 2.0 * (trace(timesI(Ta) * in)) * scale;// 2.0 for the normalization of the trace in the fundamental rep - pokeColour(h_out, tmp, a); + pokeColour(h_out, - 2.0 * (trace(timesI(Ta) * in)) * scale, a); } } From 0af740dc1521656ee549094fea038176791d6cac Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Sat, 24 Jun 2017 23:04:05 +0100 Subject: [PATCH 105/177] minor scalar HMC code improvement --- lib/qcd/action/scalar/ScalarImpl.h | 8 +++++--- lib/qcd/action/scalar/ScalarInteractionAction.h | 2 +- lib/qcd/hmc/HMC.h | 2 +- lib/qcd/hmc/HMCResourceManager.h | 3 ++- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/qcd/action/scalar/ScalarImpl.h b/lib/qcd/action/scalar/ScalarImpl.h index 5342a1fa..174553a2 100644 --- a/lib/qcd/action/scalar/ScalarImpl.h +++ b/lib/qcd/action/scalar/ScalarImpl.h @@ -93,6 +93,8 @@ class ScalarImplTypes { class ScalarAdjMatrixImplTypes { public: typedef S Simd; + typedef QCD::SU Group; + template using iImplField = iScalar>>; template @@ -108,7 +110,7 @@ class ScalarImplTypes { typedef Field PropagatorField; static inline void generate_momenta(Field& P, GridParallelRNG& pRNG) { - QCD::SU::GaussianFundamentalLieAlgebraMatrix(pRNG, P); + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P); } static inline Field projectForce(Field& P) {return P;} @@ -122,11 +124,11 @@ class ScalarImplTypes { } static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { - QCD::SU::LieRandomize(pRNG, U); + Group::LieRandomize(pRNG, U); } static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { - QCD::SU::LieRandomize(pRNG, U, 0.01); + Group::LieRandomize(pRNG, U, 0.01); } static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { diff --git a/lib/qcd/action/scalar/ScalarInteractionAction.h b/lib/qcd/action/scalar/ScalarInteractionAction.h index 5f4c630c..1ff8fd37 100644 --- a/lib/qcd/action/scalar/ScalarInteractionAction.h +++ b/lib/qcd/action/scalar/ScalarInteractionAction.h @@ -98,7 +98,7 @@ namespace Grid { permute(temp2, *temp, permute_type); action._odata[i] -= temp2*(*t_p) + (*t_p)*temp2; } else { - action._odata[i] -= *temp*(*t_p) + (*t_p)*(*temp); + action._odata[i] -= (*temp)*(*t_p) + (*t_p)*(*temp); } } else { action._odata[i] -= phiStencil.CommBuf()[SE->_offset]*(*t_p) + (*t_p)*phiStencil.CommBuf()[SE->_offset]; diff --git a/lib/qcd/hmc/HMC.h b/lib/qcd/hmc/HMC.h index ac690b60..5688bb24 100644 --- a/lib/qcd/hmc/HMC.h +++ b/lib/qcd/hmc/HMC.h @@ -76,7 +76,7 @@ struct HMCparameters: Serializable { template < class ReaderClass > void initialize(Reader &TheReader){ - std::cout << "Reading HMC\n"; + std::cout << GridLogMessage << "Reading HMC\n"; read(TheReader, "HMC", *this); } diff --git a/lib/qcd/hmc/HMCResourceManager.h b/lib/qcd/hmc/HMCResourceManager.h index 9f4c99a9..cf0000ed 100644 --- a/lib/qcd/hmc/HMCResourceManager.h +++ b/lib/qcd/hmc/HMCResourceManager.h @@ -253,6 +253,7 @@ class HMCResourceManager { template void AddObservable(Types&&... Args){ ObservablesList.push_back(std::unique_ptr(new T(std::forward(Args)...))); + ObservablesList.back()->print_parameters(); } std::vector* > GetObservables(){ @@ -297,4 +298,4 @@ private: } } -#endif // HMC_RESOURCE_MANAGER_H \ No newline at end of file +#endif // HMC_RESOURCE_MANAGER_H From 54e94360ad06cde7edbaeede2cf18eb0d5a1227b Mon Sep 17 00:00:00 2001 From: paboyle Date: Sat, 24 Jun 2017 23:10:24 +0100 Subject: [PATCH 106/177] Experimental: Multiple communicators to see if we can avoid thread locks in --enable-comms=mpit --- benchmarks/Benchmark_comms.cc | 27 ++++++----- configure.ac | 10 ++--- lib/Makefile.am | 4 +- lib/communicator/Communicator_base.cc | 22 +++++---- lib/communicator/Communicator_base.h | 20 +++++---- lib/communicator/Communicator_mpi3.cc | 12 ++--- lib/communicator/Communicator_mpit.cc | 26 ++++++----- lib/cshift/Cshift.h | 2 +- lib/log/Log.cc | 2 +- lib/parallelIO/BinaryIO.h | 2 +- lib/qcd/action/fermion/WilsonFermion5D.cc | 55 +++++++++++------------ lib/stencil/Stencil.h | 45 ++++++++++++++++--- lib/util/Init.cc | 2 +- 13 files changed, 139 insertions(+), 90 deletions(-) diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 532532f8..753b8a58 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -68,7 +68,7 @@ int main (int argc, char ** argv) int Nloop=100; int nmu=0; - int maxlat=24; + int maxlat=32; for(int mu=0;mu1) nmu++; std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; @@ -80,7 +80,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0], lat*mpi_layout[1], @@ -163,7 +163,7 @@ int main (int argc, char ** argv) header(); for(int lat=4;lat<=maxlat;lat+=4){ - for(int Ls=8;Ls<=32;Ls*=2){ + for(int Ls=8;Ls<=8;Ls*=2){ std::vector latt_size ({lat,lat,lat,lat}); @@ -249,7 +249,7 @@ int main (int argc, char ** argv) header(); for(int lat=4;lat<=maxlat;lat+=4){ - for(int Ls=8;Ls<=32;Ls*=2){ + for(int Ls=8;Ls<=8;Ls*=2){ std::vector latt_size ({lat*mpi_layout[0], lat*mpi_layout[1], @@ -299,7 +299,7 @@ int main (int argc, char ** argv) xmit_to_rank, (void *)&rbuf[mu][0], recv_from_rank, - bytes); + bytes,mu); comm_proc = mpi_layout[mu]-1; @@ -310,11 +310,11 @@ int main (int argc, char ** argv) xmit_to_rank, (void *)&rbuf[mu+4][0], recv_from_rank, - bytes); + bytes,mu+4); } } - Grid.StencilSendToRecvFromComplete(requests); + Grid.StencilSendToRecvFromComplete(requests,0); Grid.Barrier(); double stop=usecond(); t_time[i] = stop-start; // microseconds @@ -346,7 +346,7 @@ int main (int argc, char ** argv) header(); for(int lat=4;lat<=maxlat;lat+=4){ - for(int Ls=8;Ls<=32;Ls*=2){ + for(int Ls=8;Ls<=8;Ls*=2){ std::vector latt_size ({lat*mpi_layout[0], lat*mpi_layout[1], @@ -393,8 +393,8 @@ int main (int argc, char ** argv) xmit_to_rank, (void *)&rbuf[mu][0], recv_from_rank, - bytes); - Grid.StencilSendToRecvFromComplete(requests); + bytes,mu); + Grid.StencilSendToRecvFromComplete(requests,mu); requests.resize(0); comm_proc = mpi_layout[mu]-1; @@ -406,8 +406,8 @@ int main (int argc, char ** argv) xmit_to_rank, (void *)&rbuf[mu+4][0], recv_from_rank, - bytes); - Grid.StencilSendToRecvFromComplete(requests); + bytes,mu+4); + Grid.StencilSendToRecvFromComplete(requests,mu+4); requests.resize(0); } @@ -435,6 +435,9 @@ int main (int argc, char ** argv) } } + std::cout< &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes) + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes, int dir) { + // Discard the "dir" SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); return 2.0*bytes; } -void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall) +void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall,int dir) { SendToRecvFromComplete(waitall); } +#endif + +#if !defined( GRID_COMMS_MPI3) + void CartesianCommunicator::StencilBarrier(void){}; commVector CartesianCommunicator::ShmBufStorageVector; diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 12a8429f..4e471b43 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -38,7 +38,7 @@ Author: Peter Boyle #ifdef GRID_COMMS_MPI3 #include #endif -#ifdef GRID_COMMS_MPI3L +#ifdef GRID_COMMS_MPIT #include #endif #ifdef GRID_COMMS_SHMEM @@ -64,7 +64,7 @@ class CartesianCommunicator { std::vector _processor_coor; // linear processor coordinate unsigned long _ndimension; -#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPI3L) +#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT) static MPI_Comm communicator_world; MPI_Comm communicator; typedef MPI_Request CommsRequest_t; @@ -72,6 +72,10 @@ class CartesianCommunicator { typedef int CommsRequest_t; #endif +#if defined (GRID_COMMS_MPIT) + std::vector communicator_halo; +#endif + //////////////////////////////////////////////////////////////////// // Helper functionality for SHM Windows common to all other impls //////////////////////////////////////////////////////////////////// @@ -212,13 +216,13 @@ class CartesianCommunicator { void SendToRecvFromComplete(std::vector &waitall); double StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes); + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes,int dir); - void StencilSendToRecvFromComplete(std::vector &waitall); + void StencilSendToRecvFromComplete(std::vector &waitall,int i); void StencilBarrier(void); //////////////////////////////////////////////////////////// diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 632eb991..8046fef6 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -600,11 +600,11 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector &lis } double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) + void *xmit, + int dest, + void *recv, + int from, + int bytes,int dir) { MPI_Request xrq; MPI_Request rrq; @@ -643,7 +643,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &waitall) +void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall,int dir) { SendToRecvFromComplete(waitall); } diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index 07522900..24a518ec 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -235,24 +235,30 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) assert(ierr==0); } - double CartesianCommunicator::StencilSendToRecvFromBegin(int dir, - std::vector &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes) +double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes,int dir) { + int myrank = _processor; int ierr; + assert(dir < communicator_halo.size()); + + // std::cout << " sending on communicator "< &waitall){ }; +void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall,int dir) +{ + // Do nothing +}; diff --git a/lib/cshift/Cshift.h b/lib/cshift/Cshift.h index cd162e35..7d0caeee 100644 --- a/lib/cshift/Cshift.h +++ b/lib/cshift/Cshift.h @@ -42,7 +42,7 @@ Author: Peter Boyle #include #endif -#ifdef GRID_COMMS_MPI3L +#ifdef GRID_COMMS_MPIT #include #endif diff --git a/lib/log/Log.cc b/lib/log/Log.cc index 69a9a0a8..65dc2812 100644 --- a/lib/log/Log.cc +++ b/lib/log/Log.cc @@ -95,7 +95,7 @@ void GridLogConfigure(std::vector &logstreams) { //////////////////////////////////////////////////////////// void Grid_quiesce_nodes(void) { int me = 0; -#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPI3L) +#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT) MPI_Comm_rank(MPI_COMM_WORLD, &me); #endif #ifdef GRID_COMMS_SHMEM diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index 117bec01..480afa01 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -29,7 +29,7 @@ #ifndef GRID_BINARY_IO_H #define GRID_BINARY_IO_H -#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) +#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT) #define USE_MPI_IO #else #undef USE_MPI_IO diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 27319fb0..6a6bc1f8 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -379,7 +379,6 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg { #ifdef GRID_OMP // assert((dag==DaggerNo) ||(dag==DaggerYes)); - typedef CartesianCommunicator::CommsRequest_t CommsRequest_t; Compressor compressor(dag); @@ -388,46 +387,46 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg DhopFaceTime-=usecond(); st.HaloExchangeOptGather(in,compressor); + st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms DhopFaceTime+=usecond(); - std::vector > reqs; // Rely on async comms; start comms before merge of local data + DhopComputeTime-=usecond(); DhopCommTime-=usecond(); - st.CommunicateBegin(reqs); - - DhopFaceTime-=usecond(); - st.CommsMergeSHM(compressor); - DhopFaceTime+=usecond(); - - // Perhaps use omp task and region #pragma omp parallel { - int nthreads = omp_get_num_threads(); - int me = omp_get_thread_num(); - int myoff, mywork; + // Should time this somehow; hard as the threads fork nowait + st.CommunicateThreaded(); - GridThread::GetWork(len,me-1,mywork,myoff,nthreads-1); - int sF = LLs * myoff; - - if ( me == 0 ) { - st.CommunicateComplete(reqs); - DhopCommTime+=usecond(); - } else { - // Interior links in stencil - if ( me==1 ) DhopComputeTime-=usecond(); - if (dag == DaggerYes) Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,1,0); - else Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,1,0); - if ( me==1 ) DhopComputeTime+=usecond(); + if (dag == DaggerYes) { +#pragma omp for + for (int ss = 0; ss < U._grid->oSites(); ss++) { + int sU = ss; + int sF = LLs * sU; + Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); + } + } else { +#pragma omp for + for (int ss = 0; ss < U._grid->oSites(); ss++) { + int sU = ss; + int sF = LLs * sU; + Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); } } +#pragma omp single + DhopComputeTime+=usecond(); + +#pragma omp taskwait + +#pragma omp single + DhopCommTime+=usecond(); + } // Closes parallel region and waits the comms (I hope) + DhopFaceTime-=usecond(); st.CommsMerge(compressor); DhopFaceTime+=usecond(); - // Load imbalance alert. Should use dynamic schedule OMP for loop - // Perhaps create a list of only those sites with face work, and - // load balance process the list. DhopComputeTime2-=usecond(); if (dag == DaggerYes) { int sz=st.surface_list.size(); @@ -448,11 +447,9 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg #else assert(0); #endif - } - template void WilsonFermion5D::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, DoubledGaugeField & U, diff --git a/lib/stencil/Stencil.h b/lib/stencil/Stencil.h index 2894778a..17db64d8 100644 --- a/lib/stencil/Stencil.h +++ b/lib/stencil/Stencil.h @@ -248,24 +248,57 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal ////////////////////////////////////////// // Comms packet queue for asynch thread ////////////////////////////////////////// + void CommunicateThreaded() + { + for(int i=0;i reqs; + bytes=_grid->StencilSendToRecvFromBegin(reqs, + Packets[i].send_buf, + Packets[i].to_rank, + Packets[i].recv_buf, + Packets[i].from_rank, + Packets[i].bytes,i); + _grid->StencilSendToRecvFromComplete(reqs,i); + // Last task logged; this is approximate but hard to catch + // the last to complete + stop = usecond(); + stop = stop - start; + + if ( i==0 ) commtime+=stop; + +#pragma omp critical + { + comms_bytes+=bytes; + } + + } + } + + } void CommunicateBegin(std::vector > &reqs) { reqs.resize(Packets.size()); commtime-=usecond(); for(int i=0;iStencilSendToRecvFromBegin(reqs[i], - Packets[i].send_buf, - Packets[i].to_rank, - Packets[i].recv_buf, - Packets[i].from_rank, - Packets[i].bytes); + Packets[i].send_buf, + Packets[i].to_rank, + Packets[i].recv_buf, + Packets[i].from_rank, + Packets[i].bytes,i); } } void CommunicateComplete(std::vector > &reqs) { for(int i=0;iStencilSendToRecvFromComplete(reqs[i]); + _grid->StencilSendToRecvFromComplete(reqs[i],i); } commtime+=usecond(); } diff --git a/lib/util/Init.cc b/lib/util/Init.cc index fe3b1734..fc701ac1 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -393,7 +393,7 @@ void Grid_init(int *argc,char ***argv) void Grid_finalize(void) { -#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) +#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT) MPI_Finalize(); Grid_unquiesce_nodes(); #endif From 7d7220cbd72278050a1cfda6a083a87b85fecbca Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 27 Jun 2017 14:38:45 +0100 Subject: [PATCH 107/177] scalar: lambda/4! convention --- lib/qcd/action/scalar/ScalarInteractionAction.h | 4 ++-- tests/hmc/Test_hmc_ScalarActionNxN.cc | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/qcd/action/scalar/ScalarInteractionAction.h b/lib/qcd/action/scalar/ScalarInteractionAction.h index 1ff8fd37..ac2d4fbb 100644 --- a/lib/qcd/action/scalar/ScalarInteractionAction.h +++ b/lib/qcd/action/scalar/ScalarInteractionAction.h @@ -81,7 +81,7 @@ namespace Grid { phiStencil.HaloExchange(p, compressor); Field action(p._grid), pshift(p._grid), phisquared(p._grid); phisquared = p*p; - action = (2.0*Ndim + mass_square)*phisquared + lambda*phisquared*phisquared; + action = (2.0*Ndim + mass_square)*phisquared + lambda/24.*phisquared*phisquared; for (int mu = 0; mu < Ndim; mu++) { // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils parallel_for (int i = 0; i < p._grid->oSites(); i++) { @@ -113,7 +113,7 @@ namespace Grid { virtual void deriv(const Field &p, Field &force) { assert(p._grid->Nd() == Ndim); - force = (2.0*Ndim + mass_square)*p + 2.0*lambda*p*p*p; + force = (2.0*Ndim + mass_square)*p + lambda/12.*p*p*p; // move this outside static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); phiStencil.HaloExchange(p, compressor); diff --git a/tests/hmc/Test_hmc_ScalarActionNxN.cc b/tests/hmc/Test_hmc_ScalarActionNxN.cc index a7490f51..a4dad1a3 100644 --- a/tests/hmc/Test_hmc_ScalarActionNxN.cc +++ b/tests/hmc/Test_hmc_ScalarActionNxN.cc @@ -45,7 +45,7 @@ using namespace Grid; using namespace Grid::QCD; template -class MagLogger : public HmcObservable { +class MagMeas : public HmcObservable { public: typedef typename Impl::Field Field; typedef typename Impl::Simd::scalar_type Trace; @@ -72,13 +72,13 @@ private: }; template -class MagMod: public ObservableModule, NoParameters>{ - typedef ObservableModule, NoParameters> ObsBase; +class MagMod: public ObservableModule, NoParameters>{ + typedef ObservableModule, NoParameters> ObsBase; using ObsBase::ObsBase; // for constructors // acquire resource virtual void initialize(){ - this->ObservablePtr.reset(new MagLogger()); + this->ObservablePtr.reset(new MagMeas()); } public: MagMod(): ObsBase(NoParameters()){} From 15e87a460725f07dd380bd21b538b43b687a0551 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 27 Jun 2017 14:39:27 +0100 Subject: [PATCH 108/177] HDF5 IO fix --- lib/serialisation/Hdf5IO.cc | 4 +++- lib/serialisation/Hdf5IO.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/serialisation/Hdf5IO.cc b/lib/serialisation/Hdf5IO.cc index b9bb0b87..1fb7be0c 100644 --- a/lib/serialisation/Hdf5IO.cc +++ b/lib/serialisation/Hdf5IO.cc @@ -65,10 +65,12 @@ Hdf5Reader::Hdf5Reader(const std::string &fileName) Hdf5Type::type()); } -void Hdf5Reader::push(const std::string &s) +bool Hdf5Reader::push(const std::string &s) { group_ = group_.openGroup(s); path_.push_back(s); + + return true; } void Hdf5Reader::pop(void) diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index 2f891cd4..94ad9736 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -54,7 +54,7 @@ namespace Grid public: Hdf5Reader(const std::string &fileName); virtual ~Hdf5Reader(void) = default; - void push(const std::string &s); + bool push(const std::string &s); void pop(void); template void readDefault(const std::string &s, U &output); From 07de925127e15fe7b43e31a9e9f3f2298f5f4261 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 28 Jun 2017 12:45:44 +0100 Subject: [PATCH 109/177] minor scalar action fixes --- lib/qcd/action/scalar/ScalarImpl.h | 4 ++-- lib/qcd/action/scalar/ScalarInteractionAction.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/qcd/action/scalar/ScalarImpl.h b/lib/qcd/action/scalar/ScalarImpl.h index 174553a2..f85ab840 100644 --- a/lib/qcd/action/scalar/ScalarImpl.h +++ b/lib/qcd/action/scalar/ScalarImpl.h @@ -124,11 +124,11 @@ class ScalarImplTypes { } static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { - Group::LieRandomize(pRNG, U); + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U); } static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { - Group::LieRandomize(pRNG, U, 0.01); + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U, 0.01); } static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { diff --git a/lib/qcd/action/scalar/ScalarInteractionAction.h b/lib/qcd/action/scalar/ScalarInteractionAction.h index ac2d4fbb..4d189352 100644 --- a/lib/qcd/action/scalar/ScalarInteractionAction.h +++ b/lib/qcd/action/scalar/ScalarInteractionAction.h @@ -81,7 +81,7 @@ namespace Grid { phiStencil.HaloExchange(p, compressor); Field action(p._grid), pshift(p._grid), phisquared(p._grid); phisquared = p*p; - action = (2.0*Ndim + mass_square)*phisquared + lambda/24.*phisquared*phisquared; + action = (2.0*Ndim + mass_square)*phisquared - lambda/24.*phisquared*phisquared; for (int mu = 0; mu < Ndim; mu++) { // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils parallel_for (int i = 0; i < p._grid->oSites(); i++) { @@ -113,7 +113,7 @@ namespace Grid { virtual void deriv(const Field &p, Field &force) { assert(p._grid->Nd() == Ndim); - force = (2.0*Ndim + mass_square)*p + lambda/12.*p*p*p; + force = (2.0*Ndim + mass_square)*p - lambda/12.*p*p*p; // move this outside static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); phiStencil.HaloExchange(p, compressor); From 08e04b96761a03c703899a7ee6ca3f42dddcf2d2 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 28 Jun 2017 15:30:06 +0100 Subject: [PATCH 110/177] Better benchmarks --- benchmarks/Benchmark_memory_bandwidth.cc | 44 ++++++++++---------- benchmarks/Benchmark_su3.cc | 52 ++++++++++++------------ 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/benchmarks/Benchmark_memory_bandwidth.cc b/benchmarks/Benchmark_memory_bandwidth.cc index 1aa088f8..1136dfe0 100644 --- a/benchmarks/Benchmark_memory_bandwidth.cc +++ b/benchmarks/Benchmark_memory_bandwidth.cc @@ -55,9 +55,9 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -65,11 +65,11 @@ int main (int argc, char ** argv) uint64_t Nloop=NLOOP; - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeVec z(&Grid); //random(pRNG,z); - LatticeVec x(&Grid); //random(pRNG,x); - LatticeVec y(&Grid); //random(pRNG,y); + LatticeVec z(&Grid); random(pRNG,z); + LatticeVec x(&Grid); random(pRNG,x); + LatticeVec y(&Grid); random(pRNG,y); double a=2.0; @@ -94,17 +94,17 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeVec z(&Grid); //random(pRNG,z); - LatticeVec x(&Grid); //random(pRNG,x); - LatticeVec y(&Grid); //random(pRNG,y); + LatticeVec z(&Grid); random(pRNG,z); + LatticeVec x(&Grid); random(pRNG,x); + LatticeVec y(&Grid); random(pRNG,y); double a=2.0; uint64_t Nloop=NLOOP; @@ -129,7 +129,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); @@ -138,11 +138,11 @@ int main (int argc, char ** argv) GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeVec z(&Grid); //random(pRNG,z); - LatticeVec x(&Grid); //random(pRNG,x); - LatticeVec y(&Grid); //random(pRNG,y); + LatticeVec z(&Grid); random(pRNG,z); + LatticeVec x(&Grid); random(pRNG,x); + LatticeVec y(&Grid); random(pRNG,y); RealD a=2.0; @@ -166,17 +166,17 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; uint64_t Nloop=NLOOP; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); - LatticeVec z(&Grid); //random(pRNG,z); - LatticeVec x(&Grid); //random(pRNG,x); - LatticeVec y(&Grid); //random(pRNG,y); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + LatticeVec z(&Grid); random(pRNG,z); + LatticeVec x(&Grid); random(pRNG,x); + LatticeVec y(&Grid); random(pRNG,y); RealD a=2.0; Real nn; double start=usecond(); diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index 3d7f9bc9..035af2d9 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -37,12 +37,12 @@ int main (int argc, char ** argv) Grid_init(&argc,&argv); #define LMAX (64) - int Nloop=20; + int64_t Nloop=20; std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); - int threads = GridThread::GetThreads(); + int64_t threads = GridThread::GetThreads(); std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeColourMatrix z(&Grid);// random(pRNG,z); - LatticeColourMatrix x(&Grid);// random(pRNG,x); - LatticeColourMatrix y(&Grid);// random(pRNG,y); + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); double start=usecond(); - for(int i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeColourMatrix z(&Grid); //random(pRNG,z); - LatticeColourMatrix x(&Grid); //random(pRNG,x); - LatticeColourMatrix y(&Grid); //random(pRNG,y); + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); double start=usecond(); - for(int i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeColourMatrix z(&Grid); //random(pRNG,z); - LatticeColourMatrix x(&Grid); //random(pRNG,x); - LatticeColourMatrix y(&Grid); //random(pRNG,y); + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); double start=usecond(); - for(int i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeColourMatrix z(&Grid); //random(pRNG,z); - LatticeColourMatrix x(&Grid); //random(pRNG,x); - LatticeColourMatrix y(&Grid); //random(pRNG,y); + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); double start=usecond(); - for(int i=0;i Date: Wed, 28 Jun 2017 23:27:02 +0100 Subject: [PATCH 111/177] Improved threaded comms benchmark --- TODO | 11 ++-- benchmarks/Benchmark_comms.cc | 94 +++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 3 deletions(-) diff --git a/TODO b/TODO index 001c6c0c..3d29215e 100644 --- a/TODO +++ b/TODO @@ -2,10 +2,13 @@ TODO: --------------- Large item work list: -1)- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O +1)- I/O; There appear to be issues with MPI IO and NERSC with large files. + Possible 2GB limit reappeared. GPFS driver in Intel MPI. + +2)- BG/Q port and check + +3)- Christoph's local basis expansion Lanczos; port to use Lattice_transfer features -2)- Christoph's local basis expansion Lanczos -3)- BG/Q port and check 4)- Precision conversion and sort out localConvert <-- partial - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet 5)- Physical propagator interface @@ -14,6 +17,8 @@ Large item work list: 8)- HDCR resume Recent DONE + +-- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O. <--- DONE -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE -- GaugeFix into central location <-- DONE -- Scidac and Ildg metadata handling <-- DONE diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 753b8a58..698f9d25 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -435,6 +435,100 @@ int main (int argc, char ** argv) } } + + + + std::cout< latt_size ({lat*mpi_layout[0], + lat*mpi_layout[1], + lat*mpi_layout[2], + lat*mpi_layout[3]}); + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + RealD Nrank = Grid._Nprocessors; + RealD Nnode = Grid.NodeCount(); + RealD ppn = Nrank/Nnode; + + std::vector xbuf(8); + std::vector rbuf(8); + Grid.ShmBufferFreeAll(); + for(int d=0;d<8;d++){ + xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + } + + int ncomm; + int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); + double dbytes; + for(int i=0;i requests; + dbytes=0; + ncomm=0; + + parallel_for(int dir=0;dir<8;dir++){ + + double tbytes; + int mu =dir % 4; + + if (mpi_layout[mu]>1 ) { + + ncomm++; + int xmit_to_rank; + int recv_from_rank; + if ( dir == mu ) { + int comm_proc=1; + Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); + } else { + int comm_proc = mpi_layout[mu]-1; + Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); + } + tbytes= Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[dir][0], + xmit_to_rank, + (void *)&rbuf[dir][0], + recv_from_rank, + bytes,dir); + Grid.StencilSendToRecvFromComplete(requests,dir); + requests.resize(0); + +#pragma omp atomic + dbytes+=tbytes; + } + } + Grid.Barrier(); + double stop=usecond(); + t_time[i] = stop-start; // microseconds + } + + timestat.statistics(t_time); + + dbytes=dbytes*ppn; + double xbytes = dbytes*0.5; + double rbytes = dbytes*0.5; + double bidibytes = dbytes; + + + std::cout< Date: Thu, 29 Jun 2017 11:30:29 +0100 Subject: [PATCH 112/177] Small corrections to the NEON port --- configure.ac | 2 +- lib/qcd/smearing/WilsonFlow.h | 9 ++++----- lib/simd/Grid_neon.h | 15 +++++---------- lib/simd/Grid_vector_types.h | 2 +- 4 files changed, 11 insertions(+), 17 deletions(-) diff --git a/configure.ac b/configure.ac index a69b97e3..75cf7891 100644 --- a/configure.ac +++ b/configure.ac @@ -250,7 +250,7 @@ case ${ax_cv_cxx_compiler_vendor} in SIMD_FLAGS='';; NEONv8) AC_DEFINE([NEONV8],[1],[ARMv8 NEON]) - SIMD_FLAGS='';; + SIMD_FLAGS='-march=armv8-a';; QPX|BGQ) AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) SIMD_FLAGS='';; diff --git a/lib/qcd/smearing/WilsonFlow.h b/lib/qcd/smearing/WilsonFlow.h index 5e9f2d95..4f5c0d43 100644 --- a/lib/qcd/smearing/WilsonFlow.h +++ b/lib/qcd/smearing/WilsonFlow.h @@ -108,7 +108,7 @@ void WilsonFlow::evolve_step_adaptive(typename Gimpl::GaugeField &U, Real if (maxTau - taus < epsilon){ epsilon = maxTau-taus; } - std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl; + //std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl; GaugeField Z(U._grid); GaugeField Zprime(U._grid); GaugeField tmp(U._grid), Uprime(U._grid); @@ -138,10 +138,10 @@ void WilsonFlow::evolve_step_adaptive(typename Gimpl::GaugeField &U, Real // adjust integration step taus += epsilon; - std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl; + //std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl; epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.); - std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl; + //std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl; } @@ -166,7 +166,6 @@ void WilsonFlow::smear(GaugeField& out, const GaugeField& in) const { out = in; for (unsigned int step = 1; step <= Nstep; step++) { auto start = std::chrono::high_resolution_clock::now(); - std::cout << GridLogMessage << "Evolution time :"<< tau(step) << std::endl; evolve_step(out); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration diff = end - start; @@ -191,7 +190,7 @@ void WilsonFlow::smear_adaptive(GaugeField& out, const GaugeField& in, Re unsigned int step = 0; do{ step++; - std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl; + //std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl; evolve_step_adaptive(out, maxTau); std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " diff --git a/lib/simd/Grid_neon.h b/lib/simd/Grid_neon.h index 38815389..d6eb9c5a 100644 --- a/lib/simd/Grid_neon.h +++ b/lib/simd/Grid_neon.h @@ -6,9 +6,9 @@ Copyright (C) 2015 -Author: Nils Meyer -Author: Peter Boyle -Author: neo + Author: Nils Meyer + Author: Peter Boyle + Author: neo This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,7 +27,7 @@ Author: neo See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -//---------------------------------------------------------------------- + /* ARMv8 NEON intrinsics layer by @@ -37,9 +37,6 @@ Author: neo SFB/TRR55 */ -//---------------------------------------------------------------------- -//#ifndef ARM_NEON -//#define ARM_NEON #ifndef GEN_SIMD_WIDTH #define GEN_SIMD_WIDTH 16u @@ -606,6 +603,4 @@ namespace Optimization { typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesI TimesISIMD; -} - -//#endif // ARM_NEON +} \ No newline at end of file diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index e05fecc4..27585547 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -53,7 +53,7 @@ directory #if defined IMCI #include "Grid_imci.h" #endif -#ifdef NEONv8 +#ifdef NEONV8 #include "Grid_neon.h" #endif #if defined QPX From bf630a6821ea8923fc9690a03f621f6d69b31f4e Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 29 Jun 2017 11:42:25 +0100 Subject: [PATCH 113/177] README file update --- README.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 9432abe1..5d168298 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ License: GPL v2. -Last update Nov 2016. +Last update June 2017. _Please do not send pull requests to the `master` branch which is reserved for releases._ @@ -78,14 +78,17 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi for most programmers. The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. -Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way). +Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported. -These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers. +These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. MPI, OpenMP, and SIMD parallelism are present in the library. Please see https://arxiv.org/abs/1512.03487 for more detail. +### Required libraries +Grid requires [GMP](https://gmplib.org/), [MPFR](http://www.mpfr.org/) and optionally [HDF5](https://support.hdfgroup.org/HDF5/) and [LIME](http://usqcd-software.github.io/c-lime/) (for ILDG file format support) to be installed. + ### Quick start First, start by cloning the repository: @@ -173,7 +176,8 @@ The following options can be use with the `--enable-simd=` option to target diff | `AVXFMA4` | AVX (256 bit) + FMA4 | | `AVX2` | AVX 2 (256 bit) | | `AVX512` | AVX 512 bit | -| `QPX` | QPX (256 bit) | +| `NEONv8` | ARM NEON (128 bit) | +| `QPX` | IBM QPX (256 bit) | Alternatively, some CPU codenames can be directly used: From 09d09d0fe5bce853e1b42115371cd935a4e29cc0 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 29 Jun 2017 11:48:11 +0100 Subject: [PATCH 114/177] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5d168298..1f0b450c 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. MPI, OpenMP, and SIMD parallelism are present in the library. -Please see https://arxiv.org/abs/1512.03487 for more detail. +Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail. ### Required libraries Grid requires [GMP](https://gmplib.org/), [MPFR](http://www.mpfr.org/) and optionally [HDF5](https://support.hdfgroup.org/HDF5/) and [LIME](http://usqcd-software.github.io/c-lime/) (for ILDG file format support) to be installed. @@ -176,7 +176,7 @@ The following options can be use with the `--enable-simd=` option to target diff | `AVXFMA4` | AVX (256 bit) + FMA4 | | `AVX2` | AVX 2 (256 bit) | | `AVX512` | AVX 512 bit | -| `NEONv8` | ARM NEON (128 bit) | +| `NEONv8` | [ARM NEON](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch07s03.html) (128 bit) | | `QPX` | IBM QPX (256 bit) | Alternatively, some CPU codenames can be directly used: @@ -216,4 +216,4 @@ where `` is the UNIX prefix where GMP and MPFR are installed. If you are w --with-mpfr= \ --enable-mkl \ CXX=CC CC=cc -``` \ No newline at end of file +``` From ac1f1838bc9c143a3e2091e75d3f68e4455d0231 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 30 Jun 2017 10:15:32 +0100 Subject: [PATCH 115/177] KNL only --- lib/perfmon/PerfCount.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/perfmon/PerfCount.cc b/lib/perfmon/PerfCount.cc index 4778295a..c6f92b9f 100644 --- a/lib/perfmon/PerfCount.cc +++ b/lib/perfmon/PerfCount.cc @@ -40,7 +40,7 @@ const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::Performan { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES , "CPUCYCLES.........." , INSTRUCTIONS}, { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS , "INSTRUCTIONS......." , CPUCYCLES }, // 4 -#ifdef AVX512 +#ifdef KNL { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES }, { PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS }, { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS }, From 2d3737a133b6f1208849cd8580badba4ff152a4d Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 30 Jun 2017 10:15:59 +0100 Subject: [PATCH 116/177] O3, KNL --- configure.ac | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index f7284d48..8175e8b0 100644 --- a/configure.ac +++ b/configure.ac @@ -27,7 +27,7 @@ AX_GXX_VERSION AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], [version of g++ that will compile the code]) -CXXFLAGS="-g $CXXFLAGS" +CXXFLAGS="-O3 $CXXFLAGS" ############### Checks for typedefs, structures, and compiler characteristics @@ -241,6 +241,7 @@ case ${ax_cv_cxx_compiler_vendor} in SIMD_FLAGS='';; KNL) AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) + AC_DEFINE([KNL],[1],[Knights landing processor]) SIMD_FLAGS='-march=knl';; GEN) AC_DEFINE([GEN],[1],[generic vector code]) @@ -276,6 +277,7 @@ case ${ax_cv_cxx_compiler_vendor} in SIMD_FLAGS='';; KNL) AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) + AC_DEFINE([KNL],[1],[Knights landing processor]) SIMD_FLAGS='-xmic-avx512';; GEN) AC_DEFINE([GEN],[1],[generic vector code]) From 694b305cab39e1b7870ca57107521679486c611a Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 30 Jun 2017 10:16:13 +0100 Subject: [PATCH 117/177] Update to reporting --- benchmarks/Benchmark_dwf.cc | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index a071c050..d50cc3a0 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -165,7 +165,7 @@ int main (int argc, char ** argv) std::cout << GridLogMessage<< "*****************************************************************" <Barrier(); Dw.ZeroCounters(); @@ -302,6 +302,7 @@ int main (int argc, char ** argv) std::cout<< "sD ERR \n " << err < Date: Fri, 30 Jun 2017 10:16:35 +0100 Subject: [PATCH 118/177] Switch off counters by default --- benchmarks/Benchmark_dwf.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index d50cc3a0..7814ec7d 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -383,7 +383,7 @@ int main (int argc, char ** argv) assert(error<1.0e-4); } - if(1){ + if(0){ std::cout << "Single cache warm call to sDw.Dhop " < Date: Fri, 30 Jun 2017 10:23:51 +0100 Subject: [PATCH 119/177] Interleave code path; not enabled --- lib/stencil/Lebesgue.cc | 25 ++++++++++++++++++++++++- lib/stencil/Lebesgue.h | 2 ++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/lib/stencil/Lebesgue.cc b/lib/stencil/Lebesgue.cc index 4551878c..0c644fc1 100644 --- a/lib/stencil/Lebesgue.cc +++ b/lib/stencil/Lebesgue.cc @@ -51,8 +51,31 @@ LebesgueOrder::LebesgueOrder(GridBase *_grid) if ( Block[0]==0) ZGraph(); else if ( Block[1]==0) NoBlocking(); else CartesianBlocking(); -} + if (0) { + std::cout << "Thread Interleaving"< reorder = _LebesgueReorder; + std::vector throrder; + int vol = _LebesgueReorder.size(); + int threads = GridThread::GetThreads(); + int blockbits=3; + int blocklen = 8; + int msk = 0x7; + + for(int t=0;t> blockbits) % threads == t ) { + throrder.push_back(reorder[ss]); + } + } + } + _LebesgueReorder = throrder; +} void LebesgueOrder::NoBlocking(void) { std::cout< & xi, std::vector &dims); + void ThreadInterleave(void); + private: std::vector _LebesgueReorder; From f20eceb6cd6469c496e07e01055a08c0e0e4f7c8 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 30 Jun 2017 10:48:27 +0100 Subject: [PATCH 120/177] First touch once per page in a threaded loop --- lib/allocator/AlignedAllocator.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/allocator/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h index 6e85ab27..54090024 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/lib/allocator/AlignedAllocator.h @@ -98,7 +98,12 @@ public: #else if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes); #endif - + // First touch optimise in threaded loop + uint8_t *cp = (uint8_t *)ptr; +#pragma omp parallel for + for(size_type n=0;n Date: Fri, 30 Jun 2017 10:49:08 +0100 Subject: [PATCH 121/177] Guard first touch --- lib/allocator/AlignedAllocator.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/allocator/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h index 54090024..4513ce26 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/lib/allocator/AlignedAllocator.h @@ -100,7 +100,9 @@ public: #endif // First touch optimise in threaded loop uint8_t *cp = (uint8_t *)ptr; +#ifdef GRID_OMP #pragma omp parallel for +#endif for(size_type n=0;n Date: Fri, 30 Jun 2017 10:53:22 +0100 Subject: [PATCH 122/177] Best option for Xeon cache blocking set --- lib/stencil/Lebesgue.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/stencil/Lebesgue.cc b/lib/stencil/Lebesgue.cc index 0c644fc1..2880e4b6 100644 --- a/lib/stencil/Lebesgue.cc +++ b/lib/stencil/Lebesgue.cc @@ -32,8 +32,11 @@ Author: paboyle namespace Grid { int LebesgueOrder::UseLebesgueOrder; +#ifdef KNL std::vector LebesgueOrder::Block({8,2,2,2}); - +#else +std::vector LebesgueOrder::Block({2,2,2,2}); +#endif LebesgueOrder::IndexInteger LebesgueOrder::alignup(IndexInteger n){ n--; // 1000 0011 --> 1000 0010 n |= n >> 1; // 1000 0010 | 0100 0001 = 1100 0011 From f3b0a92e71af2577afb68c3021b1f9a8467f3e8e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 1 Jul 2017 09:48:00 +0100 Subject: [PATCH 123/177] Update README.md --- README.md | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 94 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1f0b450c..072f7404 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,6 @@ The following options can be use with the `--enable-comms=` option to target dif | `none` | no communications | | `mpi[-auto]` | MPI communications | | `mpi3[-auto]` | MPI communications using MPI 3 shared memory | -| `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model | | `shmem ` | Cray SHMEM communications | For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead. @@ -199,21 +198,109 @@ The following configuration is recommended for the Intel Knights Landing platfor ``` bash ../configure --enable-precision=double\ --enable-simd=KNL \ - --enable-comms=mpi-auto \ - --with-gmp= \ - --with-mpfr= \ + --enable-comms=mpi-auto \ --enable-mkl \ CXX=icpc MPICXX=mpiicpc ``` +The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. -where `` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: +If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: ``` bash ../configure --enable-precision=double\ --enable-simd=KNL \ --enable-comms=mpi \ - --with-gmp= \ - --with-mpfr= \ --enable-mkl \ CXX=CC CC=cc ``` + +If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: +``` --with-gmp= \ + --with-mpfr= \ +``` +where `` is the UNIX prefix where GMP and MPFR are installed. + +Knight's Landing with Intel Omnipath adapters with two adapters per node +presently performs better with use of more than one rank per node, using shared memory +for interior communication. This is the mpi3 communications implementation. +We recommend four ranks per node for best performance, but optimum is local volume dependent. + +``` bash +../configure --enable-precision=double\ + --enable-simd=KNL \ + --enable-comms=mpi3 \ + --enable-mkl \ + CXX=mpiicpc +``` + +### Build setup for Intel Haswell Xeon platform + +The following configuration is recommended for the Intel Knights Landing platform: + +``` bash +../configure --enable-precision=double\ + --enable-simd=AVX2 \ + --enable-comms=mpi3 \ + --enable-mkl \ + CXX=mpiicpc +``` +The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. + +If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: +``` --with-gmp= \ + --with-mpfr= \ +``` +where `` is the UNIX prefix where GMP and MPFR are installed. + +If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: + +``` bash +../configure --enable-precision=double\ + --enable-simd=AVX2 \ + --enable-comms=mpi3 \ + --enable-mkl \ + CXX=CC CC=cc +``` +Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of +one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using +``` + export I_MPI_PIN=1 +``` +This is the default. + +### Build setup for Intel Skylake Xeon platform + +The following configuration is recommended for the Intel Knights Landing platform: + +``` bash +../configure --enable-precision=double\ + --enable-simd=AVX512 \ + --enable-comms=mpi3 \ + --enable-mkl \ + CXX=mpiicpc +``` +The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. + +If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: +``` --with-gmp= \ + --with-mpfr= \ +``` +where `` is the UNIX prefix where GMP and MPFR are installed. + +If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: + +``` bash +../configure --enable-precision=double\ + --enable-simd=AVX512 \ + --enable-comms=mpi3 \ + --enable-mkl \ + CXX=CC CC=cc +``` +Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of +one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using +``` + export I_MPI_PIN=1 +``` +This is the default. + + From e18929eaa0c8e6de539abf2c2ef259ea0816ea7e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 1 Jul 2017 09:53:15 +0100 Subject: [PATCH 124/177] Update README.md --- README.md | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 072f7404..f3645b3a 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,8 @@ If you are working on a Cray machine that does not use the `mpiicpc` wrapper, pl ``` If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: -``` --with-gmp= \ +``` bash + --with-gmp= \ --with-mpfr= \ ``` where `` is the UNIX prefix where GMP and MPFR are installed. @@ -228,26 +229,27 @@ We recommend four ranks per node for best performance, but optimum is local volu ``` bash ../configure --enable-precision=double\ --enable-simd=KNL \ - --enable-comms=mpi3 \ + --enable-comms=mpi3-auto \ --enable-mkl \ - CXX=mpiicpc + CC=icpc MPICXX=mpiicpc ``` ### Build setup for Intel Haswell Xeon platform -The following configuration is recommended for the Intel Knights Landing platform: +The following configuration is recommended for the Intel Haswell platform: ``` bash ../configure --enable-precision=double\ --enable-simd=AVX2 \ - --enable-comms=mpi3 \ + --enable-comms=mpi3-auto \ --enable-mkl \ - CXX=mpiicpc + CXX=icpc MPICXX=mpiicpc ``` The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: -``` --with-gmp= \ +``` bash + --with-gmp= \ --with-mpfr= \ ``` where `` is the UNIX prefix where GMP and MPFR are installed. @@ -270,7 +272,7 @@ This is the default. ### Build setup for Intel Skylake Xeon platform -The following configuration is recommended for the Intel Knights Landing platform: +The following configuration is recommended for the Intel Skylake platform: ``` bash ../configure --enable-precision=double\ @@ -282,7 +284,8 @@ The following configuration is recommended for the Intel Knights Landing platfor The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: -``` --with-gmp= \ +``` bash + --with-gmp= \ --with-mpfr= \ ``` where `` is the UNIX prefix where GMP and MPFR are installed. @@ -298,7 +301,7 @@ If you are working on a Cray machine that does not use the `mpiicpc` wrapper, pl ``` Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using -``` +``` bash export I_MPI_PIN=1 ``` This is the default. From 251a97fe1be59f28686e1d07f8576c7d9f815517 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 1 Jul 2017 09:55:36 +0100 Subject: [PATCH 125/177] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f3645b3a..f9fd7ab5 100644 --- a/README.md +++ b/README.md @@ -301,7 +301,7 @@ If you are working on a Cray machine that does not use the `mpiicpc` wrapper, pl ``` Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using -``` bash +``` export I_MPI_PIN=1 ``` This is the default. From 1354b46338bfaaa338e4e3ad7430e8b8fe087057 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 1 Jul 2017 10:04:32 +0100 Subject: [PATCH 126/177] Update README.md --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index f9fd7ab5..8f0babd9 100644 --- a/README.md +++ b/README.md @@ -306,4 +306,20 @@ one rank per socket. If using the Intel MPI library, threads should be pinned to ``` This is the default. +### Build setup for laptops, other compilers, non-cluster builds + +Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX), +and omit the enable-mkl flag. + +Single node builds are enabled with +``` + --enable-comms=none +``` + +FFTW support that is not in the default search path may then enabled with +``` + --with-fftw= +``` + +BLAS will not be compiled in by default, and Lanczos will default to Eigen diagonalisation. From 3d09e3e9e0c3b24e1646db3083aba01537bcf88a Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 1 Jul 2017 10:05:46 +0100 Subject: [PATCH 127/177] Update README.md --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 8f0babd9..8f7a3d42 100644 --- a/README.md +++ b/README.md @@ -306,6 +306,14 @@ one rank per socket. If using the Intel MPI library, threads should be pinned to ``` This is the default. +### Build setup for BlueGene/Q + +To be written... + +### Build setup for ARM Neon + +To be written.. + ### Build setup for laptops, other compilers, non-cluster builds Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX), From 37263fd9b181f1190ff201203da6ac6a431e045d Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 1 Jul 2017 10:06:24 +0100 Subject: [PATCH 128/177] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8f7a3d42..afb751f5 100644 --- a/README.md +++ b/README.md @@ -312,7 +312,7 @@ To be written... ### Build setup for ARM Neon -To be written.. +To be written... ### Build setup for laptops, other compilers, non-cluster builds From b68ad0cc0bf6ab479199020fd6b976229c0cb047 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 1 Jul 2017 10:20:07 +0100 Subject: [PATCH 129/177] Update README.md --- README.md | 74 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index afb751f5..a786bc6c 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,37 @@ Last update June 2017. _Please do not send pull requests to the `master` branch which is reserved for releases._ + + +### Description +This library provides data parallel C++ container classes with internal memory layout +that is transformed to map efficiently to SIMD architectures. CSHIFT facilities +are provided, similar to HPF and cmfortran, and user control is given over the mapping of +array indices to both MPI tasks and SIMD processing elements. + +* Identically shaped arrays then be processed with perfect data parallelisation. +* Such identically shaped arrays are called conformable arrays. + +The transformation is based on the observation that Cartesian array processing involves +identical processing to be performed on different regions of the Cartesian array. + +The library will both geometrically decompose into MPI tasks and across SIMD lanes. +Local vector loops are parallelised with OpenMP pragmas. + +Data parallel array operations can then be specified with a SINGLE data parallel paradigm, but +optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a significant simplification +for most programmers. + +The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. +Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported. + +These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. +The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. + +MPI, OpenMP, and SIMD parallelism are present in the library. +Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail. + + ### Compilers Intel ICPC v16.0.3 and later @@ -56,38 +87,19 @@ When you file an issue, please go though the following checklist: 6. Attach the output of `make V=1`. 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. - - -### Description -This library provides data parallel C++ container classes with internal memory layout -that is transformed to map efficiently to SIMD architectures. CSHIFT facilities -are provided, similar to HPF and cmfortran, and user control is given over the mapping of -array indices to both MPI tasks and SIMD processing elements. - -* Identically shaped arrays then be processed with perfect data parallelisation. -* Such identically shaped arrays are called conformable arrays. - -The transformation is based on the observation that Cartesian array processing involves -identical processing to be performed on different regions of the Cartesian array. - -The library will both geometrically decompose into MPI tasks and across SIMD lanes. -Local vector loops are parallelised with OpenMP pragmas. - -Data parallel array operations can then be specified with a SINGLE data parallel paradigm, but -optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a significant simplification -for most programmers. - -The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. -Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported. - -These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. -The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. - -MPI, OpenMP, and SIMD parallelism are present in the library. -Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail. - ### Required libraries -Grid requires [GMP](https://gmplib.org/), [MPFR](http://www.mpfr.org/) and optionally [HDF5](https://support.hdfgroup.org/HDF5/) and [LIME](http://usqcd-software.github.io/c-lime/) (for ILDG file format support) to be installed. +Grid requires: +[GMP](https://gmplib.org/), +[MPFR](http://www.mpfr.org/) + +Bootstrapping grid downloads and uses for internal dense matrix (non-QCD operations) the Eigen library. + +Grid optionally uses: +[HDF5](https://support.hdfgroup.org/HDF5/) +[LIME](http://usqcd-software.github.io/c-lime/) (for ILDG file format support) +[FFTW](http://www.fftw.org) (Either generic or via the Intel MKL library) +[LAPACK]( either generic or Intel MKL library) + ### Quick start First, start by cloning the repository: From 7b0237b0819d6981767a0189f7550546a58a8683 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 1 Jul 2017 10:24:41 +0100 Subject: [PATCH 130/177] Update README.md --- README.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a786bc6c..3572be26 100644 --- a/README.md +++ b/README.md @@ -89,16 +89,22 @@ When you file an issue, please go though the following checklist: ### Required libraries Grid requires: + [GMP](https://gmplib.org/), + [MPFR](http://www.mpfr.org/) Bootstrapping grid downloads and uses for internal dense matrix (non-QCD operations) the Eigen library. Grid optionally uses: + [HDF5](https://support.hdfgroup.org/HDF5/) -[LIME](http://usqcd-software.github.io/c-lime/) (for ILDG file format support) -[FFTW](http://www.fftw.org) (Either generic or via the Intel MKL library) -[LAPACK]( either generic or Intel MKL library) + +[LIME](http://usqcd-software.github.io/c-lime/) for ILDG and SciDAC file format support. + +[FFTW](http://www.fftw.org) either generic version or via the Intel MKL library. + +LAPACK either generic version or Intel MKL library. ### Quick start From 40e119c61cac619b7fa1874e5fa7ccdc1dcb77cb Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 8 Jul 2017 22:27:11 -0400 Subject: [PATCH 131/177] NUMA improvements worth preserving from AMD EPYC tests --- benchmarks/Benchmark_memory_bandwidth.cc | 48 ++++++++++++------------ lib/allocator/AlignedAllocator.h | 3 +- lib/communicator/Communicator_mpi3.cc | 20 +++++++++- 3 files changed, 45 insertions(+), 26 deletions(-) diff --git a/benchmarks/Benchmark_memory_bandwidth.cc b/benchmarks/Benchmark_memory_bandwidth.cc index 1136dfe0..848f271d 100644 --- a/benchmarks/Benchmark_memory_bandwidth.cc +++ b/benchmarks/Benchmark_memory_bandwidth.cc @@ -60,16 +60,16 @@ int main (int argc, char ** argv) for(int lat=8;lat<=lmax;lat+=8){ std::vector latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); uint64_t Nloop=NLOOP; - GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeVec z(&Grid); random(pRNG,z); - LatticeVec x(&Grid); random(pRNG,x); - LatticeVec y(&Grid); random(pRNG,y); + LatticeVec z(&Grid);// random(pRNG,z); + LatticeVec x(&Grid);// random(pRNG,x); + LatticeVec y(&Grid);// random(pRNG,y); double a=2.0; @@ -83,7 +83,7 @@ int main (int argc, char ** argv) double time = (stop-start)/Nloop*1000; double flops=vol*Nvec*2;// mul,add - double bytes=3*vol*Nvec*sizeof(Real); + double bytes=3.0*vol*Nvec*sizeof(Real); std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeVec z(&Grid); random(pRNG,z); - LatticeVec x(&Grid); random(pRNG,x); - LatticeVec y(&Grid); random(pRNG,y); + LatticeVec z(&Grid);// random(pRNG,z); + LatticeVec x(&Grid);// random(pRNG,x); + LatticeVec y(&Grid);// random(pRNG,y); double a=2.0; uint64_t Nloop=NLOOP; @@ -119,7 +119,7 @@ int main (int argc, char ** argv) double time = (stop-start)/Nloop*1000; double flops=vol*Nvec*2;// mul,add - double bytes=3*vol*Nvec*sizeof(Real); + double bytes=3.0*vol*Nvec*sizeof(Real); std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; uint64_t Nloop=NLOOP; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeVec z(&Grid); random(pRNG,z); - LatticeVec x(&Grid); random(pRNG,x); - LatticeVec y(&Grid); random(pRNG,y); + LatticeVec z(&Grid);// random(pRNG,z); + LatticeVec x(&Grid);// random(pRNG,x); + LatticeVec y(&Grid);// random(pRNG,y); RealD a=2.0; @@ -154,7 +154,7 @@ int main (int argc, char ** argv) double stop=usecond(); double time = (stop-start)/Nloop*1000; - double bytes=2*vol*Nvec*sizeof(Real); + double bytes=2.0*vol*Nvec*sizeof(Real); double flops=vol*Nvec*1;// mul std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; uint64_t Nloop=NLOOP; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeVec z(&Grid); random(pRNG,z); - LatticeVec x(&Grid); random(pRNG,x); - LatticeVec y(&Grid); random(pRNG,y); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + LatticeVec z(&Grid);// random(pRNG,z); + LatticeVec x(&Grid);// random(pRNG,x); + LatticeVec y(&Grid);// random(pRNG,y); RealD a=2.0; Real nn; double start=usecond(); @@ -187,7 +187,7 @@ int main (int argc, char ** argv) double stop=usecond(); double time = (stop-start)/Nloop*1000; - double bytes=vol*Nvec*sizeof(Real); + double bytes=1.0*vol*Nvec*sizeof(Real); double flops=vol*Nvec*2;// mul,add std::cout< #include #include #include -//#include +#include +#include #ifndef SHM_HUGETLB #define SHM_HUGETLB 04000 #endif @@ -214,6 +215,23 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); } assert(((uint64_t)ptr&0x3F)==0); + + int status; + int flags=MPOL_MF_MOVE; +#ifdef KNL + int nodes=1; // numa domain == MCDRAM + // Find out if in SNC2,SNC4 mode ? +#else + int nodes=r; // numa domain == MPI ID +#endif + unsigned long count=1; + for(uint64_t page=0;page Date: Sun, 9 Jul 2017 00:11:54 +0100 Subject: [PATCH 132/177] Update README.md --- README.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/README.md b/README.md index 3572be26..e0a9bb14 100644 --- a/README.md +++ b/README.md @@ -324,6 +324,60 @@ one rank per socket. If using the Intel MPI library, threads should be pinned to ``` This is the default. +### Build setup for AMD EPYC / RYZEN + +The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores. +So, even with a single socket node there is a quad-chip module. Dual socket nodes with 64 cores total +are common. Each chip within the module exposes a separate NUMA domain. +There are four NUMA domains per socket and we recommend one MPI rank per NUMA domain. +MPI-3 is recommended with the use of four ranks per socket, +and 8 threads per rank. + +The following configuration is recommended for the AMD EPYC platform. + +``` bash +../configure --enable-precision=double\ + --enable-simd=AVX2 \ + --enable-comms=mpi3 \ + CXX=mpicxx +``` + +If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: +``` bash + --with-gmp= \ + --with-mpfr= \ +``` +where `` is the UNIX prefix where GMP and MPFR are installed. + +Using MPICH and g++ v4.9.2, best performance can be obtained using explicit GOMP_CPU_AFFINITY flags for each MPI rank. +This can be done by invoking MPI on a wrapper script omp_bind.sh to handle this. + +It is recommended to run 8 MPI ranks on a single dual socket AMD EPYC, with 8 threads per rank using MPI3 and +shared memory to communicate within this node: + +mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --mpi 2.2.2.1 --dslash-unroll --threads 8 --grid 16.16.16.16 --cacheblocking 4.4.4.4 + +Where omp_bind.sh does the following: +``` +#!/bin/bash + +numanode=` expr $PMI_RANK % 8 ` +basecore=`expr $numanode \* 16` +core0=`expr $basecore + 0 ` +core1=`expr $basecore + 2 ` +core2=`expr $basecore + 4 ` +core3=`expr $basecore + 6 ` +core4=`expr $basecore + 8 ` +core5=`expr $basecore + 10 ` +core6=`expr $basecore + 12 ` +core7=`expr $basecore + 14 ` + +export GOMP_CPU_AFFINITY="$core0 $core1 $core2 $core3 $core4 $core5 $core6 $core7" +echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY + +$@ +``` + ### Build setup for BlueGene/Q To be written... From dc6f078246b006ad1b3e61c513273b73f8f0da81 Mon Sep 17 00:00:00 2001 From: azusayamaguchi Date: Tue, 11 Jul 2017 14:15:08 +0100 Subject: [PATCH 133/177] fixed the header file for mpi3 --- configure.ac | 8 +++++++- lib/communicator/Communicator_mpi3.cc | 18 +++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/configure.ac b/configure.ac index 8c43d67a..dc6754da 100644 --- a/configure.ac +++ b/configure.ac @@ -51,6 +51,7 @@ AC_CHECK_HEADERS(malloc/malloc.h) AC_CHECK_HEADERS(malloc.h) AC_CHECK_HEADERS(endian.h) AC_CHECK_HEADERS(execinfo.h) +AC_CHECK_HEADERS(numaif.h) AC_CHECK_DECLS([ntohll],[], [], [[#include ]]) AC_CHECK_DECLS([be64toh],[], [], [[#include ]]) @@ -186,9 +187,14 @@ Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)]) AC_SEARCH_LIBS([crc32], [z], [AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])] - [have_zlib=true], + [have_zlib=true] [LIBS="${LIBS} -lz"], [AC_MSG_ERROR(zlib library was not found in your system.)]) +AC_SEARCH_LIBS([move_pages], [numa], + [AC_DEFINE([HAVE_LIBNUMA], [1], [Define to 1 if you have the `LIBNUMA' library])] + [have_libnuma=true] [LIBS="${LIBS} -lnuma"], + [AC_MSG_WARN(libnuma library was not found in your system. Some optimisations will not apply)]) + AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] [have_hdf5=true] diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index f5646d44..4192300b 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -38,7 +38,9 @@ Author: Peter Boyle #include #include #include +#ifdef HAVE_NUMAIF_H #include +#endif #ifndef SHM_HUGETLB #define SHM_HUGETLB 04000 #endif @@ -216,6 +218,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); } assert(((uint64_t)ptr&0x3F)==0); + // Try to force numa domain on the shm segment if we have numaif.h +#ifdef HAVE_NUMAIF_H int status; int flags=MPOL_MF_MOVE; #ifdef KNL @@ -225,13 +229,13 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { int nodes=r; // numa domain == MPI ID #endif unsigned long count=1; - for(uint64_t page=0;page Date: Wed, 12 Jul 2017 15:01:48 +0100 Subject: [PATCH 134/177] For test/solver Fixed --- lib/lattice/Lattice_reduction.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/lattice/Lattice_reduction.h b/lib/lattice/Lattice_reduction.h index c5b20f3c..38982891 100644 --- a/lib/lattice/Lattice_reduction.h +++ b/lib/lattice/Lattice_reduction.h @@ -540,7 +540,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice for(int i=0;i Date: Fri, 14 Jul 2017 22:52:16 +0100 Subject: [PATCH 135/177] Update README.md --- README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.md b/README.md index e0a9bb14..ea20d0ec 100644 --- a/README.md +++ b/README.md @@ -324,6 +324,17 @@ one rank per socket. If using the Intel MPI library, threads should be pinned to ``` This is the default. +** Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping): ** + +mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18 +Average mflops/s per call per node (full): ** 498739 ** 4d vec +Average mflops/s per call per node (full): ** 457786 ** 4d vec, fp16 comms +Average mflops/s per call per node (full): ** 572645 ** 5d vec +Average mflops/s per call per node (full): ** 721206 ** 5d vec, red black +Average mflops/s per call per node (full): ** 634542 ** 4d vec, red black + + + ### Build setup for AMD EPYC / RYZEN The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores. @@ -378,6 +389,17 @@ echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY $@ ``` +Performance: + +** Expected EPYC 7601 Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping): ** + +mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4 +Average mflops/s per call per node (full): **420235** 4d vec +Average mflops/s per call per node (full): **437617** 4d vec, fp16 comms +Average mflops/s per call per node (full): **522988** 5d vec +Average mflops/s per call per node (full): **588984** 5d vec, red black +Average mflops/s per call per node (full): **508423** 4d vec, red black + ### Build setup for BlueGene/Q To be written... From 169f4b2711f0131f1909738c2b631ced3e47c9e1 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 14 Jul 2017 22:56:06 +0100 Subject: [PATCH 136/177] Update README.md --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index ea20d0ec..124c7bfa 100644 --- a/README.md +++ b/README.md @@ -327,11 +327,11 @@ This is the default. ** Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping): ** mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18 -Average mflops/s per call per node (full): ** 498739 ** 4d vec -Average mflops/s per call per node (full): ** 457786 ** 4d vec, fp16 comms -Average mflops/s per call per node (full): ** 572645 ** 5d vec -Average mflops/s per call per node (full): ** 721206 ** 5d vec, red black -Average mflops/s per call per node (full): ** 634542 ** 4d vec, red black +- Average mflops/s per call per node (full): 498739 : 4d vec +- Average mflops/s per call per node (full): 457786 : 4d vec, fp16 comms +- Average mflops/s per call per node (full): 572645 : 5d vec +- Average mflops/s per call per node (full): 721206 : 5d vec, red black +- Average mflops/s per call per node (full): 634542 : 4d vec, red black @@ -391,14 +391,14 @@ $@ Performance: -** Expected EPYC 7601 Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping): ** +### Expected EPYC 7601 Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping): ** mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4 -Average mflops/s per call per node (full): **420235** 4d vec -Average mflops/s per call per node (full): **437617** 4d vec, fp16 comms -Average mflops/s per call per node (full): **522988** 5d vec -Average mflops/s per call per node (full): **588984** 5d vec, red black -Average mflops/s per call per node (full): **508423** 4d vec, red black +- Average mflops/s per call per node (full): 420235 : 4d vec +- Average mflops/s per call per node (full): 437617 : 4d vec, fp16 comms +- Average mflops/s per call per node (full): 522988 : 5d vec +- Average mflops/s per call per node (full): 588984 : 5d vec, red black +- Average mflops/s per call per node (full): 508423 : 4d vec, red black ### Build setup for BlueGene/Q From f038c6babe1ec5cd3772c4bcb892d19709dc96f5 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 14 Jul 2017 22:59:16 +0100 Subject: [PATCH 137/177] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 124c7bfa..a185063e 100644 --- a/README.md +++ b/README.md @@ -324,7 +324,7 @@ one rank per socket. If using the Intel MPI library, threads should be pinned to ``` This is the default. -** Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping): ** +#### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping): mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18 - Average mflops/s per call per node (full): 498739 : 4d vec @@ -391,7 +391,7 @@ $@ Performance: -### Expected EPYC 7601 Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping): ** +#### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping): mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4 - Average mflops/s per call per node (full): 420235 : 4d vec From fe4912880d3ceaf96023e5074682cc4ee43cb871 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 17 Jul 2017 09:53:07 +0100 Subject: [PATCH 138/177] Update README.md --- README.md | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index a185063e..1e0988f3 100644 --- a/README.md +++ b/README.md @@ -327,12 +327,8 @@ This is the default. #### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping): mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18 -- Average mflops/s per call per node (full): 498739 : 4d vec -- Average mflops/s per call per node (full): 457786 : 4d vec, fp16 comms -- Average mflops/s per call per node (full): 572645 : 5d vec -- Average mflops/s per call per node (full): 721206 : 5d vec, red black -- Average mflops/s per call per node (full): 634542 : 4d vec, red black +TBA ### Build setup for AMD EPYC / RYZEN @@ -394,11 +390,8 @@ Performance: #### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping): mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4 -- Average mflops/s per call per node (full): 420235 : 4d vec -- Average mflops/s per call per node (full): 437617 : 4d vec, fp16 comms -- Average mflops/s per call per node (full): 522988 : 5d vec -- Average mflops/s per call per node (full): 588984 : 5d vec, red black -- Average mflops/s per call per node (full): 508423 : 4d vec, red black + +TBA ### Build setup for BlueGene/Q From 0f214ad427c2f903bc5effeb453f5bed27034cc5 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Fri, 21 Jul 2017 11:13:51 -0400 Subject: [PATCH 139/177] Moved FourierAcceleratedGaugeFixer into Grid::QCD namespace and removed 'using namespace' directives from header --- lib/qcd/utils/GaugeFix.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/qcd/utils/GaugeFix.h b/lib/qcd/utils/GaugeFix.h index 4ff216e4..f2ea1aa2 100644 --- a/lib/qcd/utils/GaugeFix.h +++ b/lib/qcd/utils/GaugeFix.h @@ -26,12 +26,12 @@ Author: Peter Boyle /* END LEGAL */ //#include -using namespace Grid; -using namespace Grid::QCD; +namespace Grid { +namespace QCD { template class FourierAcceleratedGaugeFixer : public Gimpl { - public: + public: INHERIT_GIMPL_TYPES(Gimpl); typedef typename Gimpl::GaugeLinkField GaugeMat; @@ -186,3 +186,5 @@ class FourierAcceleratedGaugeFixer : public Gimpl { } }; +} +} From 56967818626452a318c058684b9594adca4f7fa4 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 26 Jul 2017 12:07:34 +0100 Subject: [PATCH 140/177] Debug error in Tensor mult --- lib/tensors/Tensor_arith_mul.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/tensors/Tensor_arith_mul.h b/lib/tensors/Tensor_arith_mul.h index c24853b7..a474db9c 100644 --- a/lib/tensors/Tensor_arith_mul.h +++ b/lib/tensors/Tensor_arith_mul.h @@ -98,7 +98,9 @@ template strong_inline void mult(iVector * __restrict__ ret, const iVector * __restrict__ rhs, const iScalar * __restrict__ lhs){ - mult(ret,lhs,rhs); + for(int c1=0;c1_internal[c1],&rhs->_internal[c1],&lhs->_internal); + } } From 237cfd11ab493e1ea8ffaf24fc1da5171b8b929a Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 26 Jul 2017 12:08:51 +0100 Subject: [PATCH 141/177] Solving the spurious O2 flags --- configure.ac | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index dc6754da..a028fb0a 100644 --- a/configure.ac +++ b/configure.ac @@ -13,6 +13,10 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) ################ Get git info #AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])]) +################ Set flags +# do not move! +CXXFLAGS="-O3 $CXXFLAGS" + ############### Checks for programs AC_PROG_CXX AC_PROG_RANLIB @@ -27,7 +31,6 @@ AX_GXX_VERSION AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], [version of g++ that will compile the code]) -CXXFLAGS="-O3 $CXXFLAGS" ############### Checks for typedefs, structures, and compiler characteristics From 7abc5613bde6fb4e704145b0f2a4c8fa19090944 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 26 Jul 2017 16:21:17 +0100 Subject: [PATCH 142/177] Added smearing to the topological charge observable --- lib/qcd/modules/ObservableModules.h | 15 ++--- lib/qcd/observables/topological_charge.h | 70 +++++++++++++++++++++--- tests/hmc/Test_hmc_WilsonGauge.cc | 5 +- 3 files changed, 73 insertions(+), 17 deletions(-) diff --git a/lib/qcd/modules/ObservableModules.h b/lib/qcd/modules/ObservableModules.h index 579fc1ec..24511617 100644 --- a/lib/qcd/modules/ObservableModules.h +++ b/lib/qcd/modules/ObservableModules.h @@ -84,8 +84,6 @@ class PlaquetteMod: public ObservableModule, NoParameters> typedef ObservableModule, NoParameters> ObsBase; using ObsBase::ObsBase; // for constructors - - // acquire resource virtual void initialize(){ this->ObservablePtr.reset(new PlaquetteLogger()); @@ -94,23 +92,22 @@ class PlaquetteMod: public ObservableModule, NoParameters> PlaquetteMod(): ObsBase(NoParameters()){} }; + template < class Impl > -class TopologicalChargeMod: public ObservableModule, NoParameters>{ - typedef ObservableModule, NoParameters> ObsBase; +class TopologicalChargeMod: public ObservableModule, TopologyObsParameters>{ + typedef ObservableModule, TopologyObsParameters> ObsBase; using ObsBase::ObsBase; // for constructors - - // acquire resource virtual void initialize(){ - this->ObservablePtr.reset(new TopologicalCharge()); + this->ObservablePtr.reset(new TopologicalCharge(this->Par_)); } public: - TopologicalChargeMod(): ObsBase(NoParameters()){} + TopologicalChargeMod(TopologyObsParameters Par): ObsBase(Par){} + TopologicalChargeMod(): ObsBase(){} }; - }// QCD temporarily here diff --git a/lib/qcd/observables/topological_charge.h b/lib/qcd/observables/topological_charge.h index 5d09c420..c2c419fb 100644 --- a/lib/qcd/observables/topological_charge.h +++ b/lib/qcd/observables/topological_charge.h @@ -33,9 +33,45 @@ directory namespace Grid { namespace QCD { +struct TopologySmearingParameters : Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(TopologySmearingParameters, + int, steps, + float, step_size, + int, meas_interval, + float, maxTau); + + TopologySmearingParameters(int s = 0, float ss = 0.0f, int mi = 0, float mT = 0.0f): + steps(s), step_size(ss), meas_interval(mi), maxTau(mT){} + + template < class ReaderClass > + TopologySmearingParameters(Reader& Reader){ + read(Reader, "Smearing", *this); + } +}; + + + +struct TopologyObsParameters : Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(TopologyObsParameters, + int, interval, + bool, do_smearing, + TopologySmearingParameters, Smearing); + + TopologyObsParameters(int interval = 1, bool smearing = false): + interval(interval), Smearing(smearing){} + + template + TopologyObsParameters(Reader& Reader){ + read(Reader, "TopologyMeasurement", *this); + } +}; + + // this is only defined for a gauge theory template class TopologicalCharge : public HmcObservable { + TopologyObsParameters Pars; + public: // here forces the Impl to be of gauge fields // if not the compiler will complain @@ -44,20 +80,40 @@ class TopologicalCharge : public HmcObservable { // necessary for HmcObservable compatibility typedef typename Impl::Field Field; + TopologicalCharge(int interval = 1, bool do_smearing = false): + Pars(interval, do_smearing){} + + TopologicalCharge(TopologyObsParameters P):Pars(P){ + std::cout << GridLogDebug << "Creating TopologicalCharge " << std::endl; + } + void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { - Real q = WilsonLoops::TopologicalCharge(U); + if (traj%Pars.interval == 0){ + // Smearing + Field Usmear = U; + int def_prec = std::cout.precision(); + + if (Pars.do_smearing){ + // using wilson flow by default here + std::cout << "1. " << Pars.Smearing.step_size << std::endl; + WilsonFlow WF(Pars.Smearing.steps, Pars.Smearing.step_size, Pars.Smearing.meas_interval); + WF.smear_adaptive(Usmear, U, Pars.Smearing.maxTau); + Real T0 = WF.energyDensityPlaquette(Usmear); + std::cout << GridLogMessage << std::setprecision(std::numeric_limits::digits10 + 1) + << "T0 : [ " << traj << " ] "<< T0 << std::endl; + } - int def_prec = std::cout.precision(); + Real q = WilsonLoops::TopologicalCharge(Usmear); + std::cout << GridLogMessage + << std::setprecision(std::numeric_limits::digits10 + 1) + << "Topological Charge: [ " << traj << " ] "<< q << std::endl; - std::cout << GridLogMessage - << std::setprecision(std::numeric_limits::digits10 + 1) - << "Topological Charge: [ " << traj << " ] "<< q << std::endl; - - std::cout.precision(def_prec); + std::cout.precision(def_prec); + } } }; diff --git a/tests/hmc/Test_hmc_WilsonGauge.cc b/tests/hmc/Test_hmc_WilsonGauge.cc index b2d5fb02..4cf6d923 100644 --- a/tests/hmc/Test_hmc_WilsonGauge.cc +++ b/tests/hmc/Test_hmc_WilsonGauge.cc @@ -66,7 +66,10 @@ int main(int argc, char **argv) { typedef PlaquetteMod PlaqObs; typedef TopologicalChargeMod QObs; TheHMC.Resources.AddObservable(); - TheHMC.Resources.AddObservable(); + TopologyObsParameters TopParams; + TopParams.interval = 1; + TopParams.do_smearing = false; + TheHMC.Resources.AddObservable(TopParams); ////////////////////////////////////////////// ///////////////////////////////////////////////////////////// From c0485d799d915637fdc455dfa900ee9786f7cd69 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 26 Jul 2017 16:26:04 +0100 Subject: [PATCH 143/177] Explicit parameter declaration in the WilsonGauge test --- lib/qcd/observables/topological_charge.h | 1 - tests/hmc/Test_hmc_WilsonGauge.cc | 8 ++++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/qcd/observables/topological_charge.h b/lib/qcd/observables/topological_charge.h index c2c419fb..5af8d77b 100644 --- a/lib/qcd/observables/topological_charge.h +++ b/lib/qcd/observables/topological_charge.h @@ -99,7 +99,6 @@ class TopologicalCharge : public HmcObservable { if (Pars.do_smearing){ // using wilson flow by default here - std::cout << "1. " << Pars.Smearing.step_size << std::endl; WilsonFlow WF(Pars.Smearing.steps, Pars.Smearing.step_size, Pars.Smearing.meas_interval); WF.smear_adaptive(Usmear, U, Pars.Smearing.maxTau); Real T0 = WF.energyDensityPlaquette(Usmear); diff --git a/tests/hmc/Test_hmc_WilsonGauge.cc b/tests/hmc/Test_hmc_WilsonGauge.cc index 4cf6d923..05bf81a2 100644 --- a/tests/hmc/Test_hmc_WilsonGauge.cc +++ b/tests/hmc/Test_hmc_WilsonGauge.cc @@ -67,8 +67,12 @@ int main(int argc, char **argv) { typedef TopologicalChargeMod QObs; TheHMC.Resources.AddObservable(); TopologyObsParameters TopParams; - TopParams.interval = 1; - TopParams.do_smearing = false; + TopParams.interval = 5; + TopParams.do_smearing = true; + TopParams.Smearing.steps = 200; + TopParams.Smearing.step_size = 0.01; + TopParams.Smearing.meas_interval = 50; + TopParams.Smearing.maxTau = 2.0; TheHMC.Resources.AddObservable(TopParams); ////////////////////////////////////////////// From c7036f671754710c41de00cb0fa90a6e35104467 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 27 Jul 2017 11:15:09 +0100 Subject: [PATCH 144/177] Adding checks for libm and libstdc++ --- configure.ac | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/configure.ac b/configure.ac index a028fb0a..bf078b13 100644 --- a/configure.ac +++ b/configure.ac @@ -58,6 +58,10 @@ AC_CHECK_HEADERS(numaif.h) AC_CHECK_DECLS([ntohll],[], [], [[#include ]]) AC_CHECK_DECLS([be64toh],[], [], [[#include ]]) +############## Standard libraries +AC_CHECK_LIB([m],[cos]) +AC_CHECK_LIB([stdc++],[abort]) + ############### GMP and MPFR AC_ARG_WITH([gmp], [AS_HELP_STRING([--with-gmp=prefix], From 8bd869da37fc3911665213f96e431e3b60cb0332 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 27 Jul 2017 15:12:50 +0100 Subject: [PATCH 145/177] Correcting a bug in the IO routines --- lib/parallelIO/BinaryIO.h | 133 ++++++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 47 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index 117bec01..108e7ef8 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -98,35 +98,39 @@ class BinaryIO { NerscChecksum(grid,scalardata,nersc_csum); } - - template static inline void NerscChecksum(GridBase *grid,std::vector &fbuf,uint32_t &nersc_csum) + + template + static inline void NerscChecksum(GridBase *grid, std::vector &fbuf, uint32_t &nersc_csum) { - const uint64_t size32 = sizeof(fobj)/sizeof(uint32_t); + const uint64_t size32 = sizeof(fobj) / sizeof(uint32_t); - - uint64_t lsites =grid->lSites(); - if (fbuf.size()==1) { - lsites=1; + uint64_t lsites = grid->lSites(); + if (fbuf.size() == 1) + { + lsites = 1; } -#pragma omp parallel - { - uint32_t nersc_csum_thr=0; + #pragma omp parallel + { + uint32_t nersc_csum_thr = 0; -#pragma omp for - for(uint64_t local_site=0;local_site static inline void ScidacChecksum(GridBase *grid,std::vector &fbuf,uint32_t &scidac_csuma,uint32_t &scidac_csumb) { const uint64_t size32 = sizeof(fobj)/sizeof(uint32_t); @@ -266,7 +270,7 @@ class BinaryIO { grid->Barrier(); GridStopWatch timer; GridStopWatch bstimer; - + nersc_csum=0; scidac_csuma=0; scidac_csumb=0; @@ -362,18 +366,22 @@ class BinaryIO { #else assert(0); #endif - } else { - std::cout<< GridLogMessage<< "C++ read I/O "<< file<<" : " - << iodata.size()*sizeof(fobj)<<" bytes"<Barrier(); - grid->GlobalSum(nersc_csum); - grid->GlobalXOR(scidac_csuma); - grid->GlobalXOR(scidac_csumb); - grid->Barrier(); + // if the data size is 1 we do not want to sum over the MPI ranks + if (iodata.size() != 1){ + grid->Barrier(); + grid->GlobalSum(nersc_csum); + grid->GlobalXOR(scidac_csuma); + grid->GlobalXOR(scidac_csumb); + grid->Barrier(); + } } ///////////////////////////////////////////////////////////////////////////// @@ -546,9 +585,9 @@ class BinaryIO { int gsites = grid->gSites(); int lsites = grid->lSites(); - uint32_t nersc_csum_tmp; - uint32_t scidac_csuma_tmp; - uint32_t scidac_csumb_tmp; + uint32_t nersc_csum_tmp = 0; + uint32_t scidac_csuma_tmp = 0; + uint32_t scidac_csumb_tmp = 0; GridStopWatch timer; From 14d53e1c9eb8eb1ef684148728c075813814612e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 29 Jul 2017 13:06:53 -0400 Subject: [PATCH 146/177] Threaded MPI calls patches --- benchmarks/Benchmark_dwf.cc | 2 +- lib/allocator/AlignedAllocator.h | 10 ++- lib/communicator/Communicator_base.cc | 4 +- lib/communicator/Communicator_base.h | 14 ++++- lib/communicator/Communicator_mpit.cc | 25 +++++++- lib/qcd/action/fermion/WilsonFermion5D.cc | 74 ++++++++++++++--------- lib/stencil/Stencil.h | 59 +++++++++--------- lib/util/Init.cc | 6 +- 8 files changed, 128 insertions(+), 66 deletions(-) diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index a071c050..0264905c 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -489,7 +489,7 @@ int main (int argc, char ** argv) //assert(norm2(src_e)<1.0e-4); //assert(norm2(src_o)<1.0e-4); - + exit(0); Grid_finalize(); } diff --git a/lib/allocator/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h index 6e85ab27..7fd9496f 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/lib/allocator/AlignedAllocator.h @@ -92,11 +92,15 @@ public: size_type bytes = __n*sizeof(_Tp); _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); - + ////////////////// + // Hack 2MB align; could make option probably doesn't need configurability + ////////////////// +//define GRID_ALLOC_ALIGN (128) +#define GRID_ALLOC_ALIGN (2*1024*1024) #ifdef HAVE_MM_MALLOC_H - if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128); + if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN); #else - if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes); + if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes); #endif return ptr; diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index a5edf8e9..67bfaed0 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -34,7 +34,9 @@ namespace Grid { /////////////////////////////////////////////////////////////// void * CartesianCommunicator::ShmCommBuf; uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024; -CartesianCommunicator::CommunicatorPolicy_t CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; +CartesianCommunicator::CommunicatorPolicy_t +CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; +int CartesianCommunicator::nCommThreads = -1; ///////////////////////////////// // Alloc, free shmem region diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 4e471b43..84dbedb4 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -54,8 +54,9 @@ class CartesianCommunicator { // 128MB shared memory for comms enought for 48^4 local vol comms // Give external control (command line override?) of this - static const int MAXLOG2RANKSPERNODE = 16; - static uint64_t MAX_MPI_SHM_BYTES; + static const int MAXLOG2RANKSPERNODE = 16; + static uint64_t MAX_MPI_SHM_BYTES; + static int nCommThreads; // Communicator should know nothing of the physics grid, only processor grid. int _Nprocessors; // How many in all @@ -125,7 +126,7 @@ class CartesianCommunicator { enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; static CommunicatorPolicy_t CommunicatorPolicy; static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } - + size_t heap_top; size_t heap_bytes; @@ -215,6 +216,12 @@ class CartesianCommunicator { void SendToRecvFromComplete(std::vector &waitall); + double StencilSendToRecvFrom(void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes,int dir); + double StencilSendToRecvFromBegin(std::vector &list, void *xmit, int xmit_to_rank, @@ -222,6 +229,7 @@ class CartesianCommunicator { int recv_from_rank, int bytes,int dir); + void StencilSendToRecvFromComplete(std::vector &waitall,int i); void StencilBarrier(void); diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index 24a518ec..f522701c 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -242,7 +242,8 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &waitall,int dir) { diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 6a6bc1f8..0b6c9e3d 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -391,37 +391,57 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg DhopFaceTime+=usecond(); // Rely on async comms; start comms before merge of local data - DhopComputeTime-=usecond(); - DhopCommTime-=usecond(); -#pragma omp parallel + double ctime=0; + double ptime=0; + // DhopComputeTime-=usecond(); + // DhopCommTime-=usecond(); +#pragma omp parallel reduction(max:ctime) reduction(max:ptime) { - // Should time this somehow; hard as the threads fork nowait - st.CommunicateThreaded(); - - if (dag == DaggerYes) { -#pragma omp for - for (int ss = 0; ss < U._grid->oSites(); ss++) { - int sU = ss; - int sF = LLs * sU; - Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); + int tid = omp_get_thread_num(); + int nthreads = omp_get_num_threads(); + int ncomms = CartesianCommunicator::nCommThreads; + if (ncomms == -1) ncomms = st.Packets.size(); + assert(nthreads > ncomms); + if (tid >= ncomms) { + double start = usecond(); + nthreads -= ncomms; + int ttid = tid - ncomms; + int n = U._grid->oSites(); + int chunk = n / nthreads; + int rem = n % nthreads; + int myblock, myn; + if (ttid < rem) { + myblock = ttid * chunk + ttid; + myn = chunk+1; + } else { + myblock = ttid*chunk + rem; + myn = chunk; + } + + // do the compute + if (dag == DaggerYes) { + for (int ss = myblock; ss < myblock+myn; ++ss) { + int sU = ss; + int sF = LLs * sU; + Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); + } + } else { + for (int ss = myblock; ss < myblock+myn; ++ss) { + int sU = ss; + int sF = LLs * sU; + Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); + } + } + ptime = usecond() - start; } - } else { -#pragma omp for - for (int ss = 0; ss < U._grid->oSites(); ss++) { - int sU = ss; - int sF = LLs * sU; - Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); + { + double start = usecond(); + st.CommunicateThreaded(); + ctime = usecond() - start; } } -#pragma omp single - DhopComputeTime+=usecond(); - -#pragma omp taskwait - -#pragma omp single - DhopCommTime+=usecond(); - } // Closes parallel region and waits the comms (I hope) - + DhopCommTime += ctime; + DhopComputeTime+=ptime; DhopFaceTime-=usecond(); st.CommsMerge(compressor); diff --git a/lib/stencil/Stencil.h b/lib/stencil/Stencil.h index 17db64d8..d1d7a7e0 100644 --- a/lib/stencil/Stencil.h +++ b/lib/stencil/Stencil.h @@ -185,6 +185,8 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal double splicetime; double nosplicetime; double calls; + std::vector comms_bytesthr; + std::vector commtimethr; //////////////////////////////////////// // Stencil query @@ -250,36 +252,22 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal ////////////////////////////////////////// void CommunicateThreaded() { - for(int i=0;i reqs; - bytes=_grid->StencilSendToRecvFromBegin(reqs, - Packets[i].send_buf, - Packets[i].to_rank, - Packets[i].recv_buf, - Packets[i].from_rank, - Packets[i].bytes,i); - _grid->StencilSendToRecvFromComplete(reqs,i); - // Last task logged; this is approximate but hard to catch - // the last to complete - stop = usecond(); - stop = stop - start; - - if ( i==0 ) commtime+=stop; - -#pragma omp critical - { - comms_bytes+=bytes; - } - + // must be called in parallel region + int mythread = omp_get_thread_num(); + int nthreads = CartesianCommunicator::nCommThreads; + if (nthreads == -1) nthreads = Packets.size(); + if (mythread < nthreads) { + for (int i = mythread; i < Packets.size(); i += nthreads) { + double start = usecond(); + uint64_t bytes = _grid->StencilSendToRecvFrom(Packets[i].send_buf, + Packets[i].to_rank, + Packets[i].recv_buf, + Packets[i].from_rank, + Packets[i].bytes,i); + comms_bytesthr[mythread] += bytes; + commtimethr[mythread] += usecond() - start; } } - } void CommunicateBegin(std::vector > &reqs) { @@ -475,7 +463,10 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal int checkerboard, const std::vector &directions, const std::vector &distances) - : _permute_type(npoints), _comm_buf_size(npoints) + : _permute_type(npoints), + _comm_buf_size(npoints), + comms_bytesthr(npoints), + commtimethr(npoints) { face_table_computed=0; _npoints = npoints; @@ -1029,6 +1020,8 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal void ZeroCounters(void) { gathertime = 0.; commtime = 0.; + memset(&commtimethr[0], 0, sizeof(commtimethr)); + memset(&comms_bytesthr[0], 0, sizeof(comms_bytesthr)); halogtime = 0.; mergetime = 0.; decompresstime = 0.; @@ -1044,6 +1037,14 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal #define PRINTIT(A) AVERAGE(A); std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<_Nprocessors; RealD NN = _grid->NodeCount(); + double t = 0; + // if commtimethr is set they were all done in parallel so take the max + // but add up the bytes + for (int i = 0; i < 8; ++i) { + comms_bytes += comms_bytesthr[i]; + if (t < commtimethr[i]) t = commtimethr[i]; + } + commtime += t; _grid->GlobalSum(commtime); commtime/=NP; if ( calls > 0. ) { diff --git a/lib/util/Init.cc b/lib/util/Init.cc index fc701ac1..ef875429 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -359,7 +359,11 @@ void Grid_init(int *argc,char ***argv) if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){ LebesgueOrder::UseLebesgueOrder=1; } - + CartesianCommunicator::nCommThreads = -1; + if( GridCmdOptionExists(*argv,*argv+*argc,"--commthreads") ){ + arg= GridCmdOptionPayload(*argv,*argv+*argc,"--commthreads"); + GridCmdOptionInt(arg,CartesianCommunicator::nCommThreads); + } if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){ arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking"); GridCmdOptionIntVector(arg,LebesgueOrder::Block); From 175f393f9d1b3dda4da435a6d995003eddb7b257 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 4 Aug 2017 12:14:10 +0100 Subject: [PATCH 147/177] Binary IO error checking --- lib/parallelIO/BinaryIO.h | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index 108e7ef8..f56f6514 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -413,13 +413,33 @@ class BinaryIO { timer.Start(); if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { #ifdef USE_MPI_IO - std::cout<< GridLogMessage<< "MPI write I/O "<< file<< std::endl; - ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDWR|MPI_MODE_CREATE,MPI_INFO_NULL, &fh); assert(ierr==0); - ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); - ierr=MPI_File_write_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); - MPI_File_close(&fh); - MPI_Type_free(&fileArray); - MPI_Type_free(&localArray); + std::cout << GridLogMessage << "MPI write I/O " << file << std::endl; + ierr = MPI_File_open(grid->communicator, (char *)file.c_str(), MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh); + std::cout << GridLogMessage << "Checking for errors" << std::endl; + if (ierr != MPI_SUCCESS) + { + char error_string[BUFSIZ]; + int length_of_error_string, error_class; + + MPI_Error_class(ierr, &error_class); + MPI_Error_string(error_class, error_string, &length_of_error_string); + fprintf(stderr, "%3d: %s\n", myrank, error_string); + MPI_Error_string(ierr, error_string, &length_of_error_string); + fprintf(stderr, "%3d: %s\n", myrank, error_string); + MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0); + } + + std::cout << GridLogDebug << "MPI read I/O set view " << file << std::endl; + ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); + assert(ierr == 0); + + std::cout << GridLogDebug << "MPI read I/O write all " << file << std::endl; + ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status); + assert(ierr == 0); + + MPI_File_close(&fh); + MPI_Type_free(&fileArray); + MPI_Type_free(&localArray); #else assert(0); #endif From 4fe182e5a7c4b1d1dddc022706a71f1c0432cda5 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Sun, 6 Aug 2017 10:46:19 +0100 Subject: [PATCH 148/177] Added high level HMC support for overriding default SIMD lane decomposition --- lib/cartesian/Cartesian_base.h | 23 ++++---- lib/qcd/hmc/HMCResourceManager.h | 14 ++++- lib/qcd/hmc/HMC_GridModules.h | 92 +++++++++++++++++++++-------- lib/util/Init.cc | 2 +- tests/hmc/Test_hmc_EOMobiusRatio.cc | 13 ++-- 5 files changed, 98 insertions(+), 46 deletions(-) diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index 0db6ce0d..f4f9a269 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -185,17 +185,18 @@ public: //////////////////////////////////////////////////////////////// void show_decomposition(){ - std::cout << GridLogMessage << "Full Dimensions : " << _fdimensions << std::endl; - std::cout << GridLogMessage << "Global Dimensions : " << _gdimensions << std::endl; - std::cout << GridLogMessage << "Local Dimensions : " << _ldimensions << std::endl; - std::cout << GridLogMessage << "Reduced Dimensions : " << _rdimensions << std::endl; - std::cout << GridLogMessage << "Outer strides : " << _ostride << std::endl; - std::cout << GridLogMessage << "Inner strides : " << _istride << std::endl; - std::cout << GridLogMessage << "iSites : " << _isites << std::endl; - std::cout << GridLogMessage << "oSites : " << _osites << std::endl; - std::cout << GridLogMessage << "lSites : " << lSites() << std::endl; - std::cout << GridLogMessage << "gSites : " << gSites() << std::endl; - std::cout << GridLogMessage << "Nd : " << _ndimension << std::endl; + std::cout << GridLogMessage << "\tFull Dimensions : " << _fdimensions << std::endl; + std::cout << GridLogMessage << "\tSIMD layout : " << _simd_layout << std::endl; + std::cout << GridLogMessage << "\tGlobal Dimensions : " << _gdimensions << std::endl; + std::cout << GridLogMessage << "\tLocal Dimensions : " << _ldimensions << std::endl; + std::cout << GridLogMessage << "\tReduced Dimensions : " << _rdimensions << std::endl; + std::cout << GridLogMessage << "\tOuter strides : " << _ostride << std::endl; + std::cout << GridLogMessage << "\tInner strides : " << _istride << std::endl; + std::cout << GridLogMessage << "\tiSites : " << _isites << std::endl; + std::cout << GridLogMessage << "\toSites : " << _osites << std::endl; + std::cout << GridLogMessage << "\tlSites : " << lSites() << std::endl; + std::cout << GridLogMessage << "\tgSites : " << gSites() << std::endl; + std::cout << GridLogMessage << "\tNd : " << _ndimension << std::endl; } //////////////////////////////////////////////////////////////// diff --git a/lib/qcd/hmc/HMCResourceManager.h b/lib/qcd/hmc/HMCResourceManager.h index cf0000ed..3e20a8c1 100644 --- a/lib/qcd/hmc/HMCResourceManager.h +++ b/lib/qcd/hmc/HMCResourceManager.h @@ -165,7 +165,7 @@ class HMCResourceManager { // Grids ////////////////////////////////////////////////////////////// - void AddGrid(std::string s, GridModule& M) { + void AddGrid(const std::string s, GridModule& M) { // Check for name clashes auto search = Grids.find(s); if (search != Grids.end()) { @@ -174,14 +174,24 @@ class HMCResourceManager { exit(1); } Grids[s] = std::move(M); + std::cout << GridLogMessage << "::::::::::::::::::::::::::::::::::::::::" < Mod; AddGrid(s, Mod); } + // Add a named grid set, 4d shortcut + tweak simd lanes + void AddFourDimGrid(const std::string s, const std::vector simd_decomposition) { + GridFourDimModule Mod(simd_decomposition); + AddGrid(s, Mod); + } GridCartesian* GetCartesian(std::string s = "") { diff --git a/lib/qcd/hmc/HMC_GridModules.h b/lib/qcd/hmc/HMC_GridModules.h index 8331c02b..0f34e9a7 100644 --- a/lib/qcd/hmc/HMC_GridModules.h +++ b/lib/qcd/hmc/HMC_GridModules.h @@ -33,28 +33,29 @@ directory namespace Grid { // Resources -// Modules for grids +// Modules for grids // Introduce another namespace HMCModules? -class GridModuleParameters: Serializable{ +class GridModuleParameters: Serializable{ public: GRID_SERIALIZABLE_CLASS_MEMBERS(GridModuleParameters, std::string, lattice, std::string, mpi); - std::vector getLattice(){return strToVec(lattice);} - std::vector getMpi() {return strToVec(mpi);} + std::vector getLattice() const {return strToVec(lattice);} + std::vector getMpi() const {return strToVec(mpi);} - void check(){ - if (getLattice().size() != getMpi().size()) { - std::cout << GridLogError + + void check() const { + if (getLattice().size() != getMpi().size() ) { + std::cout << GridLogError << "Error in GridModuleParameters: lattice and mpi dimensions " "do not match" << std::endl; exit(1); } - } + } template GridModuleParameters(Reader& Reader, std::string n = "LatticeGrid"):name(n) { @@ -75,51 +76,94 @@ private: // Lower level class class GridModule { public: - GridCartesian* get_full() { + GridCartesian* get_full() { std::cout << GridLogDebug << "Getting cartesian in module"<< std::endl; return grid_.get(); } - GridRedBlackCartesian* get_rb() { + GridRedBlackCartesian* get_rb() { std::cout << GridLogDebug << "Getting rb-cartesian in module"<< std::endl; return rbgrid_.get(); } void set_full(GridCartesian* grid) { grid_.reset(grid); } void set_rb(GridRedBlackCartesian* rbgrid) { rbgrid_.reset(rbgrid); } + void show_full_decomposition(){ grid_->show_decomposition(); } + void show_rb_decomposition(){ rbgrid_->show_decomposition(); } protected: std::unique_ptr grid_; std::unique_ptr rbgrid_; - + }; //////////////////////////////////// // Classes for the user //////////////////////////////////// // Note: the space time grid should be out of the QCD namespace -template< class vector_type> -class GridFourDimModule : public GridModule { - public: - GridFourDimModule() { +template +class GridFourDimModule : public GridModule +{ +public: + GridFourDimModule() + { using namespace QCD; set_full(SpaceTimeGrid::makeFourDimGrid( - GridDefaultLatt(), GridDefaultSimd(4, vector_type::Nsimd()), + GridDefaultLatt(), + GridDefaultSimd(4, vector_type::Nsimd()), GridDefaultMpi())); set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get())); } - GridFourDimModule(GridModuleParameters Params) { + GridFourDimModule(const std::vector tweak_simd) + { + using namespace QCD; + if (tweak_simd.size() != 4) + { + std::cout << GridLogError + << "Error in GridFourDimModule: SIMD size different from 4" + << std::endl; + exit(1); + } + + // Checks that the product agrees with the expectation + int simd_sum = 1; + for (auto &n : tweak_simd) + simd_sum *= n; + std::cout << GridLogDebug << "TweakSIMD: " << tweak_simd << " Sum: " << simd_sum << std::endl; + + if (simd_sum == vector_type::Nsimd()) + { + set_full(SpaceTimeGrid::makeFourDimGrid( + GridDefaultLatt(), + tweak_simd, + GridDefaultMpi())); + set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get())); + } + else + { + std::cout << GridLogError + << "Error in GridFourDimModule: SIMD lanes must sum to " + << vector_type::Nsimd() + << std::endl; + } + } + + GridFourDimModule(const GridModuleParameters Params) + { using namespace QCD; - Params.check(); std::vector lattice_v = Params.getLattice(); std::vector mpi_v = Params.getMpi(); - if (lattice_v.size() == 4) { + if (lattice_v.size() == 4) + { set_full(SpaceTimeGrid::makeFourDimGrid( - lattice_v, GridDefaultSimd(4, vector_type::Nsimd()), + lattice_v, + GridDefaultSimd(4, vector_type::Nsimd()), mpi_v)); set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get())); - } else { - std::cout << GridLogError - << "Error in GridFourDimModule: lattice dimension different from 4" - << std::endl; + } + else + { + std::cout << GridLogError + << "Error in GridFourDimModule: lattice dimension different from 4" + << std::endl; exit(1); } } diff --git a/lib/util/Init.cc b/lib/util/Init.cc index fe3b1734..35a569ba 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -377,7 +377,7 @@ void Grid_init(int *argc,char ***argv) std::cout << GridLogDebug << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "< - //FermionParameters(Reader& Reader){ - // read(Reader, "Mobius", *this); - //} - }; @@ -113,9 +107,12 @@ int main(int argc, char **argv) { bool ApplySmearing = MyParams.Mobius.ApplySmearing; + // Use this if you want to tweak the default decomposition + std::vector simd_lanes({2,2,1,1}); - // Grid from the command line - TheHMC.Resources.AddFourDimGrid("gauge"); + // Grid from the command line arguments --grid and --mpi + // drop the simd_lanes argument to fall back to the default decomposition for the SIMD lanes + TheHMC.Resources.AddFourDimGrid("gauge", simd_lanes); // Possibile to create the module by hand // hardcoding parameters or using a Reader From dbe4d7850c1e132f538e4aead7869ba703a21ec5 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Sun, 6 Aug 2017 10:49:45 +0100 Subject: [PATCH 149/177] Make a test file compatible with all architectures --- tests/hmc/Test_hmc_EOMobiusRatio.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/hmc/Test_hmc_EOMobiusRatio.cc b/tests/hmc/Test_hmc_EOMobiusRatio.cc index 4b4555e3..d6ca96db 100644 --- a/tests/hmc/Test_hmc_EOMobiusRatio.cc +++ b/tests/hmc/Test_hmc_EOMobiusRatio.cc @@ -108,11 +108,16 @@ int main(int argc, char **argv) { // Use this if you want to tweak the default decomposition - std::vector simd_lanes({2,2,1,1}); + // commented out as very architecture speficic + + //std::vector simd_lanes({2,2,1,1}); // Grid from the command line arguments --grid and --mpi // drop the simd_lanes argument to fall back to the default decomposition for the SIMD lanes - TheHMC.Resources.AddFourDimGrid("gauge", simd_lanes); + + //TheHMC.Resources.AddFourDimGrid("gauge", simd_lanes); // tweak the SIMD lanes + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + // Possibile to create the module by hand // hardcoding parameters or using a Reader From 06e6f8de00528ede75f248f98d48eca715d79630 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 8 Aug 2017 10:22:12 +0100 Subject: [PATCH 150/177] Check that the reduced dim is an integer --- lib/cartesian/Cartesian_red_black.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index 3037de00..e58999c5 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -176,7 +176,8 @@ public: // Use a reduced simd grid _simd_layout[d] = simd_layout[d]; - _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; + _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; // this is not checking if this is integer + assert(_rdimensions[d]*_simd_layout[d] == _ldimensions[d]); assert(_rdimensions[d]>0); // all elements of a simd vector must have same checkerboard. From 44051aecd1eb0abc7a61ac814654491804455347 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 8 Aug 2017 10:31:12 +0100 Subject: [PATCH 151/177] Checking for integer divisions in cartesian full --- lib/cartesian/Cartesian_full.h | 130 +++++++++++++++++---------------- 1 file changed, 67 insertions(+), 63 deletions(-) diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index b0e47fa4..815e3b22 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -62,77 +62,81 @@ public: return shift; } GridCartesian(const std::vector &dimensions, - const std::vector &simd_layout, - const std::vector &processor_grid - ) : GridBase(processor_grid) + const std::vector &simd_layout, + const std::vector &processor_grid) : GridBase(processor_grid) { - /////////////////////// - // Grid information - /////////////////////// - _ndimension = dimensions.size(); - - _fdimensions.resize(_ndimension); - _gdimensions.resize(_ndimension); - _ldimensions.resize(_ndimension); - _rdimensions.resize(_ndimension); - _simd_layout.resize(_ndimension); - _lstart.resize(_ndimension); - _lend.resize(_ndimension); - - _ostride.resize(_ndimension); - _istride.resize(_ndimension); - - _fsites = _gsites = _osites = _isites = 1; + /////////////////////// + // Grid information + /////////////////////// + _ndimension = dimensions.size(); - for(int d=0;d<_ndimension;d++){ - _fdimensions[d] = dimensions[d]; // Global dimensions - _gdimensions[d] = _fdimensions[d]; // Global dimensions - _simd_layout[d] = simd_layout[d]; - _fsites = _fsites * _fdimensions[d]; - _gsites = _gsites * _gdimensions[d]; + _fdimensions.resize(_ndimension); + _gdimensions.resize(_ndimension); + _ldimensions.resize(_ndimension); + _rdimensions.resize(_ndimension); + _simd_layout.resize(_ndimension); + _lstart.resize(_ndimension); + _lend.resize(_ndimension); - //FIXME check for exact division + _ostride.resize(_ndimension); + _istride.resize(_ndimension); - // Use a reduced simd grid - _ldimensions[d]= _gdimensions[d]/_processors[d]; //local dimensions - _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition - _lstart[d] = _processor_coor[d]*_ldimensions[d]; - _lend[d] = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1; - _osites *= _rdimensions[d]; - _isites *= _simd_layout[d]; - - // Addressing support - if ( d==0 ) { - _ostride[d] = 1; - _istride[d] = 1; - } else { - _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; - _istride[d] = _istride[d-1]*_simd_layout[d-1]; - } + _fsites = _gsites = _osites = _isites = 1; + + for (int d = 0; d < _ndimension; d++) + { + _fdimensions[d] = dimensions[d]; // Global dimensions + _gdimensions[d] = _fdimensions[d]; // Global dimensions + _simd_layout[d] = simd_layout[d]; + _fsites = _fsites * _fdimensions[d]; + _gsites = _gsites * _gdimensions[d]; + + // Use a reduced simd grid + _ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions + assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); + + _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition + assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]); + + _lstart[d] = _processor_coor[d] * _ldimensions[d]; + _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1; + _osites *= _rdimensions[d]; + _isites *= _simd_layout[d]; + + // Addressing support + if (d == 0) + { + _ostride[d] = 1; + _istride[d] = 1; } - - /////////////////////// - // subplane information - /////////////////////// - _slice_block.resize(_ndimension); - _slice_stride.resize(_ndimension); - _slice_nblock.resize(_ndimension); - - int block =1; - int nblock=1; - for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d]; - - for(int d=0;d<_ndimension;d++){ - nblock/=_rdimensions[d]; - _slice_block[d] =block; - _slice_stride[d]=_ostride[d]*_rdimensions[d]; - _slice_nblock[d]=nblock; - block = block*_rdimensions[d]; + else + { + _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; + _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; } + } + /////////////////////// + // subplane information + /////////////////////// + _slice_block.resize(_ndimension); + _slice_stride.resize(_ndimension); + _slice_nblock.resize(_ndimension); + + int block = 1; + int nblock = 1; + for (int d = 0; d < _ndimension; d++) + nblock *= _rdimensions[d]; + + for (int d = 0; d < _ndimension; d++) + { + nblock /= _rdimensions[d]; + _slice_block[d] = block; + _slice_stride[d] = _ostride[d] * _rdimensions[d]; + _slice_nblock[d] = nblock; + block = block * _rdimensions[d]; + } }; }; - - } #endif From 8a3fe60a27e4573faca940efd33d18a7d468c764 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 8 Aug 2017 11:36:20 +0100 Subject: [PATCH 152/177] Added more asserts at grid creation time --- lib/cartesian/Cartesian_red_black.h | 192 +++++++++++++++------------- 1 file changed, 105 insertions(+), 87 deletions(-) diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index e58999c5..b1a5b9ef 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -131,21 +131,21 @@ public: Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0); } void Init(const std::vector &dimensions, - const std::vector &simd_layout, - const std::vector &processor_grid, - const std::vector &checker_dim_mask, - int checker_dim) + const std::vector &simd_layout, + const std::vector &processor_grid, + const std::vector &checker_dim_mask, + int checker_dim) { - /////////////////////// - // Grid information - /////////////////////// + /////////////////////// + // Grid information + /////////////////////// _checker_dim = checker_dim; - assert(checker_dim_mask[checker_dim]==1); + assert(checker_dim_mask[checker_dim] == 1); _ndimension = dimensions.size(); - assert(checker_dim_mask.size()==_ndimension); - assert(processor_grid.size()==_ndimension); - assert(simd_layout.size()==_ndimension); - + assert(checker_dim_mask.size() == _ndimension); + assert(processor_grid.size() == _ndimension); + assert(simd_layout.size() == _ndimension); + _fdimensions.resize(_ndimension); _gdimensions.resize(_ndimension); _ldimensions.resize(_ndimension); @@ -153,115 +153,133 @@ public: _simd_layout.resize(_ndimension); _lstart.resize(_ndimension); _lend.resize(_ndimension); - + _ostride.resize(_ndimension); _istride.resize(_ndimension); - + _fsites = _gsites = _osites = _isites = 1; - - _checker_dim_mask=checker_dim_mask; - for(int d=0;d<_ndimension;d++){ - _fdimensions[d] = dimensions[d]; - _gdimensions[d] = _fdimensions[d]; - _fsites = _fsites * _fdimensions[d]; - _gsites = _gsites * _gdimensions[d]; - - if (d==_checker_dim) { - _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard - } - _ldimensions[d] = _gdimensions[d]/_processors[d]; - _lstart[d] = _processor_coor[d]*_ldimensions[d]; - _lend[d] = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1; + _checker_dim_mask = checker_dim_mask; - // Use a reduced simd grid - _simd_layout[d] = simd_layout[d]; - _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; // this is not checking if this is integer - assert(_rdimensions[d]*_simd_layout[d] == _ldimensions[d]); - assert(_rdimensions[d]>0); + for (int d = 0; d < _ndimension; d++) + { + _fdimensions[d] = dimensions[d]; + _gdimensions[d] = _fdimensions[d]; + _fsites = _fsites * _fdimensions[d]; + _gsites = _gsites * _gdimensions[d]; - // all elements of a simd vector must have same checkerboard. - // If Ls vectorised, this must still be the case; e.g. dwf rb5d - if ( _simd_layout[d]>1 ) { - if ( checker_dim_mask[d] ) { - assert( (_rdimensions[d]&0x1) == 0 ); - } - } + if (d == _checker_dim) + { + assert((_gdimensions[d] & 0x1) == 0); + _gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard + } + _ldimensions[d] = _gdimensions[d] / _processors[d]; + assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); + _lstart[d] = _processor_coor[d] * _ldimensions[d]; + _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1; - _osites *= _rdimensions[d]; - _isites *= _simd_layout[d]; - - // Addressing support - if ( d==0 ) { - _ostride[d] = 1; - _istride[d] = 1; - } else { - _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; - _istride[d] = _istride[d-1]*_simd_layout[d-1]; - } + // Use a reduced simd grid + _simd_layout[d] = simd_layout[d]; + _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; // this is not checking if this is integer + assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]); + assert(_rdimensions[d] > 0); + // all elements of a simd vector must have same checkerboard. + // If Ls vectorised, this must still be the case; e.g. dwf rb5d + if (_simd_layout[d] > 1) + { + if (checker_dim_mask[d]) + { + assert((_rdimensions[d] & 0x1) == 0); + } + } + _osites *= _rdimensions[d]; + _isites *= _simd_layout[d]; + + // Addressing support + if (d == 0) + { + _ostride[d] = 1; + _istride[d] = 1; + } + else + { + _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; + _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; + } } - + //////////////////////////////////////////////////////////////////////////////////////////// // subplane information //////////////////////////////////////////////////////////////////////////////////////////// _slice_block.resize(_ndimension); _slice_stride.resize(_ndimension); _slice_nblock.resize(_ndimension); - - int block =1; - int nblock=1; - for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d]; - - for(int d=0;d<_ndimension;d++){ - nblock/=_rdimensions[d]; - _slice_block[d] =block; - _slice_stride[d]=_ostride[d]*_rdimensions[d]; - _slice_nblock[d]=nblock; - block = block*_rdimensions[d]; + + int block = 1; + int nblock = 1; + for (int d = 0; d < _ndimension; d++) + nblock *= _rdimensions[d]; + + for (int d = 0; d < _ndimension; d++) + { + nblock /= _rdimensions[d]; + _slice_block[d] = block; + _slice_stride[d] = _ostride[d] * _rdimensions[d]; + _slice_nblock[d] = nblock; + block = block * _rdimensions[d]; } //////////////////////////////////////////////// // Create a checkerboard lookup table //////////////////////////////////////////////// int rvol = 1; - for(int d=0;d<_ndimension;d++){ - rvol=rvol * _rdimensions[d]; + for (int d = 0; d < _ndimension; d++) + { + rvol = rvol * _rdimensions[d]; } _checker_board.resize(rvol); - for(int osite=0;osite<_osites;osite++){ - _checker_board[osite] = CheckerBoardFromOindex (osite); + for (int osite = 0; osite < _osites; osite++) + { + _checker_board[osite] = CheckerBoardFromOindex(osite); } - }; -protected: + + protected: virtual int oIndex(std::vector &coor) { - int idx=0; - for(int d=0;d<_ndimension;d++) { - if( d==_checker_dim ) { - idx+=_ostride[d]*((coor[d]/2)%_rdimensions[d]); - } else { - idx+=_ostride[d]*(coor[d]%_rdimensions[d]); - } + int idx = 0; + for (int d = 0; d < _ndimension; d++) + { + if (d == _checker_dim) + { + idx += _ostride[d] * ((coor[d] / 2) % _rdimensions[d]); + } + else + { + idx += _ostride[d] * (coor[d] % _rdimensions[d]); + } } return idx; }; - + virtual int iIndex(std::vector &lcoor) { - int idx=0; - for(int d=0;d<_ndimension;d++) { - if( d==_checker_dim ) { - idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d])); - } else { - idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); - } - } - return idx; + int idx = 0; + for (int d = 0; d < _ndimension; d++) + { + if (d == _checker_dim) + { + idx += _istride[d] * (lcoor[d] / (2 * _rdimensions[d])); + } + else + { + idx += _istride[d] * (lcoor[d] / _rdimensions[d]); + } + } + return idx; } }; - } #endif From fd367d8bfd95ec193b9528c59d7846508bf82296 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 16 Aug 2017 09:42:57 +0100 Subject: [PATCH 153/177] Debugging the PointerCache --- lib/allocator/AlignedAllocator.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/allocator/AlignedAllocator.cc b/lib/allocator/AlignedAllocator.cc index 4249a72e..04de20bf 100644 --- a/lib/allocator/AlignedAllocator.cc +++ b/lib/allocator/AlignedAllocator.cc @@ -11,7 +11,7 @@ int PointerCache::victim; void *PointerCache::Insert(void *ptr,size_t bytes) { - if (bytes < 4096 ) return NULL; + if (bytes < 4096 ) return ptr; #ifdef GRID_OMP assert(omp_in_parallel()==0); From bcefdd7c4eff147242ededf040653449c2d573c9 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 19 Aug 2017 12:49:02 -0400 Subject: [PATCH 154/177] Align both allocator calls to 2MB --- lib/allocator/AlignedAllocator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/allocator/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h index 7fd9496f..39734b53 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/lib/allocator/AlignedAllocator.h @@ -186,9 +186,9 @@ public: pointer allocate(size_type __n, const void* _p= 0) { #ifdef HAVE_MM_MALLOC_H - _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); + _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),GRID_ALLOC_ALIGN); #else - _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); + _Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,__n*sizeof(_Tp)); #endif return ptr; } From 9e658de2383620b5aa002f319b85442ab24d8115 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 19 Aug 2017 12:52:44 -0400 Subject: [PATCH 155/177] Use Vector --- benchmarks/Benchmark_comms.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 698f9d25..491fba1e 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -92,8 +92,8 @@ int main (int argc, char ** argv) RealD Nnode = Grid.NodeCount(); RealD ppn = Nrank/Nnode; - std::vector > xbuf(8,std::vector(lat*lat*lat*Ls)); - std::vector > rbuf(8,std::vector(lat*lat*lat*Ls)); + Vector > xbuf(8,Vector(lat*lat*lat*Ls)); + Vector > rbuf(8,Vector(lat*lat*lat*Ls)); int ncomm; int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); @@ -172,8 +172,8 @@ int main (int argc, char ** argv) RealD Nnode = Grid.NodeCount(); RealD ppn = Nrank/Nnode; - std::vector > xbuf(8,std::vector(lat*lat*lat*Ls)); - std::vector > rbuf(8,std::vector(lat*lat*lat*Ls)); + Vector > xbuf(8,Vector(lat*lat*lat*Ls)); + Vector > rbuf(8,Vector(lat*lat*lat*Ls)); int ncomm; From d6472eda8d00c8d0ffc60760a4dd9462702ac00b Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 19 Aug 2017 12:53:18 -0400 Subject: [PATCH 156/177] Use mmap --- lib/communicator/Communicator_base.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 67bfaed0..6767495f 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -26,6 +26,10 @@ Author: Peter Boyle *************************************************************************************/ /* END LEGAL */ #include +#include +#include +#include +#include namespace Grid { @@ -129,8 +133,15 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { return NULL; } void CartesianCommunicator::ShmInitGeneric(void){ +#if 1 + ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, MAP_HUGETLB| MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (ShmCommBuf == (void *)MAP_FAILED) exit(EXIT_FAILURE); + std::cout << "ShmCommBuf "< Date: Sat, 19 Aug 2017 12:53:59 -0400 Subject: [PATCH 157/177] Enable blocking stencil send --- lib/communicator/Communicator_mpit.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index f522701c..c0fb47fd 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -242,20 +242,15 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector Date: Sat, 19 Aug 2017 13:18:50 -0400 Subject: [PATCH 158/177] Fix mpi 3 interface change --- lib/communicator/Communicator_mpi3.cc | 11 +++++++++++ lib/communicator/Communicator_mpit.cc | 25 +++++++------------------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 46e4745c..e6e33d33 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -621,6 +621,17 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector &lis } } +double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, + int dest, + void *recv, + int from, + int bytes,int dir) +{ + std::vector list; + StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir); + StencilSendToRecvFromComplete(list,dir); +} + double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, void *xmit, int dest, diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index c0fb47fd..9a9b26d2 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -242,17 +242,12 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &waitall,int dir) +{ + // Do nothing +}; double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, int xmit_to_rank, void *recv, @@ -266,17 +261,11 @@ double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, // std::cout << " sending on communicator "< &waitall,int dir) -{ - // Do nothing -}; From bfef525ed2474c0cfe1047e0351ab58ce525ff10 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sat, 19 Aug 2017 23:10:12 +0100 Subject: [PATCH 159/177] New benchmark prep --- benchmarks/Benchmark_ITT.cc | 518 ++++++++++++++++++++++++++++++++++++ 1 file changed, 518 insertions(+) create mode 100644 benchmarks/Benchmark_ITT.cc diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc new file mode 100644 index 00000000..4f16b1de --- /dev/null +++ b/benchmarks/Benchmark_ITT.cc @@ -0,0 +1,518 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./benchmarks/Benchmark_memory_bandwidth.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +struct time_statistics{ + double mean; + double err; + double min; + double max; + + void statistics(std::vector v){ + double sum = std::accumulate(v.begin(), v.end(), 0.0); + mean = sum / v.size(); + + std::vector diff(v.size()); + std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); + double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); + err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); + + auto result = std::minmax_element(v.begin(), v.end()); + min = *result.first; + max = *result.second; +} +}; + +void comms_header(){ + std::cout < simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + std::vector t_time(Nloop); + time_statistics timestat; + + std::cout< latt_size ({lat*mpi_layout[0], + lat*mpi_layout[1], + lat*mpi_layout[2], + lat*mpi_layout[3]}); + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + RealD Nrank = Grid._Nprocessors; + RealD Nnode = Grid.NodeCount(); + RealD ppn = Nrank/Nnode; + + std::vector xbuf(8); + std::vector rbuf(8); + Grid.ShmBufferFreeAll(); + for(int d=0;d<8;d++){ + xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + } + + int ncomm; + int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); + double dbytes; + for(int i=0;i requests; + dbytes=0; + ncomm=0; + + parallel_for(int dir=0;dir<8;dir++){ + + double tbytes; + int mu =dir % 4; + + if (mpi_layout[mu]>1 ) { + + ncomm++; + int xmit_to_rank; + int recv_from_rank; + if ( dir == mu ) { + int comm_proc=1; + Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); + } else { + int comm_proc = mpi_layout[mu]-1; + Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); + } +#if 1 + tbytes= Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[dir][0], + xmit_to_rank, + (void *)&rbuf[dir][0], + recv_from_rank, + bytes,dir); + Grid.StencilSendToRecvFromComplete(requests,dir); +#endif + requests.resize(0); + +#pragma omp atomic + dbytes+=tbytes; + } + } + Grid.Barrier(); + double stop=usecond(); + t_time[i] = stop-start; // microseconds + } + + timestat.statistics(t_time); + + dbytes=dbytes*ppn; + double xbytes = dbytes*0.5; + double rbytes = dbytes*0.5; + double bidibytes = dbytes; + + + std::cout< > LatticeVec; + typedef iVector Vec; + + std::vector simd_layout = GridDefaultSimd(Nd,vReal::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + std::cout<({45,12,81,9})); + for(int lat=8;lat<=lmax;lat+=4){ + + std::vector latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + + Vec rn ; random(sRNG,rn); + + LatticeVec z(&Grid); z=rn; + LatticeVec x(&Grid); x=rn; + LatticeVec y(&Grid); y=rn; + double a=2.0; + + uint64_t Nloop=NLOOP; + + double start=usecond(); + for(int i=0;i mpi = GridDefaultMpi(); assert(mpi.size()==4); + std::vector local({L,L,L,L}); + + GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(std::vector({64,64,64,64}), + GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + uint64_t NP = TmpGrid->RankCount(); + uint64_t NN = TmpGrid->NodeCount(); + uint64_t SHM=NP/NN; + + std::vector internal; + if ( SHM == 1 ) internal = std::vector({1,1,1,1}); + else if ( SHM == 2 ) internal = std::vector({2,1,1,1}); + else if ( SHM == 4 ) internal = std::vector({2,2,1,1}); + else if ( SHM == 8 ) internal = std::vector({2,2,2,1}); + else assert(0); + + std::vector nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); + std::vector latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); + + ///////// Welcome message //////////// + std::cout< seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + std::cout << GridLogMessage << "Initialised RNGs" << std::endl; + + ///////// Source preparation //////////// + LatticeFermion src (FGrid); random(RNG5,src); + LatticeFermion ref (FGrid); + LatticeFermion tmp (FGrid); + + RealD N2 = 1.0/::sqrt(norm2(src)); + src = src*N2; + + LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); + + DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + + //////////////////////////////////// + // Naive wilson implementation + //////////////////////////////////// + { + LatticeGaugeField Umu5d(FGrid); + std::vector U(4,FGrid); + for(int ss=0;ssoSites();ss++){ + for(int s=0;s(Umu5d,mu); + } + for(int mu=0;muBarrier(); + for(int i=0;iBarrier(); + double t1=usecond(); + uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); + FGrid->Broadcast(0,&ncall,sizeof(ncall)); + + // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"< t_time(ncall); + for(uint64_t i=0;iBarrier(); + + double volume=Ls; for(int mu=0;mumflops_best ) mflops_best = mflops; + if ( mflops({2,2,2,2}); + + Benchmark::Decomposition(); + + int do_memory=1; + int do_comms =1; + int do_su3 =0; + int do_wilson=1; + int do_dwf =1; + + if ( do_memory ) { + std::cout< Date: Sat, 19 Aug 2017 23:11:30 +0100 Subject: [PATCH 160/177] Update TODO --- TODO | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/TODO b/TODO index 001c6c0c..cccc5f45 100644 --- a/TODO +++ b/TODO @@ -2,18 +2,18 @@ TODO: --------------- Large item work list: -1)- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O +1)- BG/Q port and check 2)- Christoph's local basis expansion Lanczos -3)- BG/Q port and check -4)- Precision conversion and sort out localConvert <-- partial +3)- Precision conversion and sort out localConvert <-- partial - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet -5)- Physical propagator interface -6)- Conserved currents -7)- Multigrid Wilson and DWF, compare to other Multigrid implementations -8)- HDCR resume +4)- Physical propagator interface +5)- Conserved currents +6)- Multigrid Wilson and DWF, compare to other Multigrid implementations +7)- HDCR resume Recent DONE +-- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE -- GaugeFix into central location <-- DONE -- Scidac and Ildg metadata handling <-- DONE From a446d95c3393d697f987434ac594950d18017b7a Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 20 Aug 2017 01:10:50 +0100 Subject: [PATCH 161/177] Trying to pass TeamCity and Travis --- benchmarks/Benchmark_ITT.cc | 12 ++++++------ lib/communicator/Communicator_base.cc | 6 +++++- lib/communicator/Communicator_base.h | 19 +++++++++++++------ lib/communicator/Communicator_mpi3.cc | 17 +++++++++++++---- lib/qcd/action/fermion/WilsonFermion5D.cc | 18 ++++++++++-------- lib/stencil/Stencil.h | 7 ++++++- lib/util/Init.cc | 18 ++++++++++++++---- 7 files changed, 67 insertions(+), 30 deletions(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 4f16b1de..9bf7d0a5 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -218,7 +218,7 @@ public: std::cout<({45,12,81,9})); for(int lat=8;lat<=lmax;lat+=4){ @@ -368,7 +368,7 @@ public: const int num_cases = 4; #endif controls Cases [] = { -#if defined(AVX512) +#ifdef AVX512 { QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, { QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, #endif @@ -380,6 +380,10 @@ public: for(int c=0;cBarrier(); diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 6767495f..3ce3a774 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -41,6 +41,7 @@ uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024; CartesianCommunicator::CommunicatorPolicy_t CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; int CartesianCommunicator::nCommThreads = -1; +int CartesianCommunicator::Hugepages = 0; ///////////////////////////////// // Alloc, free shmem region @@ -134,7 +135,10 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { } void CartesianCommunicator::ShmInitGeneric(void){ #if 1 - ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, MAP_HUGETLB| MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + int mmap_flag = MAP_SHARED | MAP_ANONYMOUS; + if ( Hugepages ) mmap_flag |= MAP_HUGETLB; + ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0); if (ShmCommBuf == (void *)MAP_FAILED) exit(EXIT_FAILURE); std::cout << "ShmCommBuf "< #ifdef HAVE_NUMAIF_H #include #endif + +// Make up for linex deficiencies #ifndef SHM_HUGETLB -#define SHM_HUGETLB 04000 +#define SHM_HUGETLB 0x0 +#endif +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x0 #endif namespace Grid { @@ -213,8 +218,11 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); if ( fd < 0 ) { perror("failed shm_open"); assert(0); } ftruncate(fd, size); + + int mmap_flag = MAP_SHARED; + if (Hugepages) mmap_flag |= MAP_HUGETLB; + void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); - void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); } assert(((uint64_t)ptr&0x3F)==0); @@ -628,8 +636,9 @@ double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, int bytes,int dir) { std::vector list; - StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir); + double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir); StencilSendToRecvFromComplete(list,dir); + return offbytes; } double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, @@ -671,7 +680,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vectorStencilSendToRecvFromComplete(list); + this->StencilSendToRecvFromComplete(list,dir); } return off_node_bytes; diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 0b6c9e3d..404ecce0 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -135,10 +135,11 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, template void WilsonFermion5D::Report(void) { - std::vector latt = GridDefaultLatt(); - RealD volume = Ls; for(int mu=0;mu_Nprocessors; - RealD NN = _FourDimGrid->NodeCount(); + RealD NP = _FourDimGrid->_Nprocessors; + RealD NN = _FourDimGrid->NodeCount(); + RealD volume = Ls; + std::vector latt = _FourDimGrid->GlobalDimensions(); + for(int mu=0;mu 0 ) { std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; @@ -390,17 +391,18 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms DhopFaceTime+=usecond(); - // Rely on async comms; start comms before merge of local data double ctime=0; double ptime=0; - // DhopComputeTime-=usecond(); - // DhopCommTime-=usecond(); + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // Ugly explicit thread mapping introduced for OPA reasons. + ////////////////////////////////////////////////////////////////////////////////////////////////////// #pragma omp parallel reduction(max:ctime) reduction(max:ptime) { int tid = omp_get_thread_num(); int nthreads = omp_get_num_threads(); int ncomms = CartesianCommunicator::nCommThreads; - if (ncomms == -1) ncomms = st.Packets.size(); + if (ncomms == -1) ncomms = 1; assert(nthreads > ncomms); if (tid >= ncomms) { double start = usecond(); diff --git a/lib/stencil/Stencil.h b/lib/stencil/Stencil.h index d1d7a7e0..cca67587 100644 --- a/lib/stencil/Stencil.h +++ b/lib/stencil/Stencil.h @@ -252,10 +252,15 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal ////////////////////////////////////////// void CommunicateThreaded() { +#ifdef GRID_OMP // must be called in parallel region int mythread = omp_get_thread_num(); int nthreads = CartesianCommunicator::nCommThreads; - if (nthreads == -1) nthreads = Packets.size(); +#else + int mythread = 0; + int nthreads = 1; +#endif + if (nthreads == -1) nthreads = 1; if (mythread < nthreads) { for (int i = mythread; i < Packets.size(); i += nthreads) { double start = usecond(); diff --git a/lib/util/Init.cc b/lib/util/Init.cc index 39a726cf..3fd8b4cd 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -222,6 +222,11 @@ void Grid_init(int *argc,char ***argv) CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024; } + if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){ + CartesianCommunicator::Hugepages = 1; + } + + if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){ Grid_debug_handler_init(); } @@ -304,6 +309,7 @@ void Grid_init(int *argc,char ***argv) std::cout< Date: Sun, 20 Aug 2017 01:27:48 +0100 Subject: [PATCH 162/177] Switch off comms for now until feature/multi-communicator is merged --- benchmarks/Benchmark_ITT.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 4f16b1de..91524149 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -160,7 +160,7 @@ public: int comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); } -#if 1 +#if 0 tbytes= Grid.StencilSendToRecvFromBegin(requests, (void *)&xbuf[dir][0], xmit_to_rank, From 11062fb6861153ffafa6d821f8ee53f01f5f72a4 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 20 Aug 2017 01:37:07 +0100 Subject: [PATCH 163/177] Comms none fail fix --- lib/communicator/Communicator_base.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 3ce3a774..2e6626be 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -102,6 +102,18 @@ int CartesianCommunicator::NodeCount(void) { return Proc int CartesianCommunicator::RankCount(void) { return ProcessorCount();}; #endif #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT) +double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes, int dir) +{ + std::vector list; + // Discard the "dir" + SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); + SendToRecvFromComplete(list); + return 2.0*bytes; +} double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, void *xmit, int xmit_to_rank, From 1cdf99966810227f180452393973c87ae4a301c4 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 20 Aug 2017 02:39:10 +0100 Subject: [PATCH 164/177] Moving multicommunicator into mpi3 also for threading --- lib/communicator/Communicator_base.h | 8 ++++---- lib/communicator/Communicator_mpi3.cc | 12 ++++++++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index ac7d94f3..ac866ced 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -78,15 +78,15 @@ class CartesianCommunicator { #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT) static MPI_Comm communicator_world; - MPI_Comm communicator; + + MPI_Comm communicator; + std::vector communicator_halo; + typedef MPI_Request CommsRequest_t; #else typedef int CommsRequest_t; #endif -#if defined (GRID_COMMS_MPIT) - std::vector communicator_halo; -#endif //////////////////////////////////////////////////////////////////// // Helper functionality for SHM Windows common to all other impls diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 4f769971..9e5dfb97 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -405,8 +405,14 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { int ierr; communicator=communicator_world; + _ndimension = processors.size(); + communicator_halo.resize (2*_ndimension); + for(int i=0;i<_ndimension*2;i++){ + MPI_Comm_dup(communicator,&communicator_halo[i]); + } + //////////////////////////////////////////////////////////////// // Assert power of two shm_size. //////////////////////////////////////////////////////////////// @@ -648,6 +654,8 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector Date: Sun, 20 Aug 2017 02:53:12 +0100 Subject: [PATCH 165/177] finalise issue on new OPA revert --- benchmarks/Benchmark_dwf.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 98ce0a07..3858226e 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -503,9 +503,9 @@ int main (int argc, char ** argv) std::cout< Date: Sun, 20 Aug 2017 03:08:54 +0100 Subject: [PATCH 166/177] MAP_HUGETLB portability fix --- lib/communicator/Communicator_base.cc | 2 ++ lib/communicator/Communicator_mpi3.cc | 15 +++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 2e6626be..3378c56a 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -149,7 +149,9 @@ void CartesianCommunicator::ShmInitGeneric(void){ #if 1 int mmap_flag = MAP_SHARED | MAP_ANONYMOUS; +#ifdef MAP_HUGETLB if ( Hugepages ) mmap_flag |= MAP_HUGETLB; +#endif ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0); if (ShmCommBuf == (void *)MAP_FAILED) exit(EXIT_FAILURE); std::cout << "ShmCommBuf "< #include #endif -// Make up for linex deficiencies -#ifndef SHM_HUGETLB -#define SHM_HUGETLB 0x0 -#endif -#ifndef MAP_HUGETLB -#define MAP_HUGETLB 0x0 -#endif namespace Grid { @@ -220,7 +213,9 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ftruncate(fd, size); int mmap_flag = MAP_SHARED; +#ifdef MAP_HUGETLB if (Hugepages) mmap_flag |= MAP_HUGETLB; +#endif void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); } @@ -274,7 +269,11 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { for(int r=0;r Date: Wed, 23 Aug 2017 15:07:18 +0100 Subject: [PATCH 167/177] Staggered multinode block cg debugged. Missing global sum. Code stalls and resumes on KNL at cambridge. Curious. CG iterations 23ms each, then 3200 ms pauses. Mean bandwidth reports as 200MB/s. Comms dominant in the report. However, the time behaviour suggests it is *bursty*.... Could be swap to disk? --- .../iterative/BlockConjugateGradient.h | 9 ++- lib/lattice/Lattice_reduction.h | 38 ++++++++---- .../fermion/ImprovedStaggeredFermion5D.cc | 60 +++++++++++++++++++ .../fermion/ImprovedStaggeredFermion5D.h | 10 ++++ .../solver/Test_staggered_block_cg_unprec.cc | 8 ++- 5 files changed, 110 insertions(+), 15 deletions(-) diff --git a/lib/algorithms/iterative/BlockConjugateGradient.h b/lib/algorithms/iterative/BlockConjugateGradient.h index 9418f63c..d7817c05 100644 --- a/lib/algorithms/iterative/BlockConjugateGradient.h +++ b/lib/algorithms/iterative/BlockConjugateGradient.h @@ -199,7 +199,12 @@ void BlockCGrQsolve(LinearOperatorBase &Linop, const Field &B, Field &X) Linop.HermOp(X, AD); tmp = B - AD; + //std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl; ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); + //std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl; + //std::cout << GridLogMessage << " m_rr " << m_rr< &Linop, const Field &B, Field &X) MatrixTimer.Start(); Linop.HermOp(D, Z); MatrixTimer.Stop(); + //std::cout << GridLogMessage << " norm2 Z " < &R,std::vector &a,const Lattice } }; +/* inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) { int NN = BlockSolverGrid->_ndimension; @@ -387,6 +388,7 @@ inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Or } return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys); } +*/ template static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,const Lattice &Y,int Orthog,RealD scale=1.0) @@ -398,14 +400,15 @@ static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice int Nblock = X._grid->GlobalDimensions()[Orthog]; GridBase *FullGrid = X._grid; - GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); + // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); - Lattice Xslice(SliceGrid); - Lattice Rslice(SliceGrid); + // Lattice Xslice(SliceGrid); + // Lattice Rslice(SliceGrid); assert( FullGrid->_simd_layout[Orthog]==1); int nh = FullGrid->_ndimension; - int nl = SliceGrid->_ndimension; + // int nl = SliceGrid->_ndimension; + int nl = nh-1; //FIXME package in a convenient iterator //Should loop over a plane orthogonal to direction "Orthog" @@ -448,14 +451,14 @@ static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice< int Nblock = X._grid->GlobalDimensions()[Orthog]; GridBase *FullGrid = X._grid; - GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); - - Lattice Xslice(SliceGrid); - Lattice Rslice(SliceGrid); + // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); + // Lattice Xslice(SliceGrid); + // Lattice Rslice(SliceGrid); assert( FullGrid->_simd_layout[Orthog]==1); int nh = FullGrid->_ndimension; - int nl = SliceGrid->_ndimension; + // int nl = SliceGrid->_ndimension; + int nl=1; //FIXME package in a convenient iterator //Should loop over a plane orthogonal to direction "Orthog" @@ -498,18 +501,19 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice typedef typename vobj::vector_type vector_type; GridBase *FullGrid = lhs._grid; - GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); + // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); int Nblock = FullGrid->GlobalDimensions()[Orthog]; - Lattice Lslice(SliceGrid); - Lattice Rslice(SliceGrid); + // Lattice Lslice(SliceGrid); + // Lattice Rslice(SliceGrid); mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); assert( FullGrid->_simd_layout[Orthog]==1); int nh = FullGrid->_ndimension; - int nl = SliceGrid->_ndimension; + // int nl = SliceGrid->_ndimension; + int nl = nh-1; //FIXME package in a convenient iterator //Should loop over a plane orthogonal to direction "Orthog" @@ -550,6 +554,14 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice mat += mat_thread; } } + + for(int i=0;iGlobalSum(sum); + mat(i,j)=sum; + }} + return; } diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc index 61a3c559..7d988d89 100644 --- a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc @@ -230,8 +230,15 @@ void ImprovedStaggeredFermion5D::DhopInternal(StencilImpl & st, LebesgueOr { Compressor compressor; int LLs = in._grid->_rdimensions[0]; + + + + DhopTotalTime -= usecond(); + DhopCommTime -= usecond(); st.HaloExchange(in,compressor); + DhopCommTime += usecond(); + DhopComputeTime -= usecond(); // Dhop takes the 4d grid from U, and makes a 5d index for fermion if (dag == DaggerYes) { parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { @@ -244,12 +251,15 @@ void ImprovedStaggeredFermion5D::DhopInternal(StencilImpl & st, LebesgueOr Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out); } } + DhopComputeTime += usecond(); + DhopTotalTime += usecond(); } template void ImprovedStaggeredFermion5D::DhopOE(const FermionField &in, FermionField &out,int dag) { + DhopCalls+=1; conformable(in._grid,FermionRedBlackGrid()); // verifies half grid conformable(in._grid,out._grid); // drops the cb check @@ -261,6 +271,7 @@ void ImprovedStaggeredFermion5D::DhopOE(const FermionField &in, FermionFie template void ImprovedStaggeredFermion5D::DhopEO(const FermionField &in, FermionField &out,int dag) { + DhopCalls+=1; conformable(in._grid,FermionRedBlackGrid()); // verifies half grid conformable(in._grid,out._grid); // drops the cb check @@ -272,6 +283,7 @@ void ImprovedStaggeredFermion5D::DhopEO(const FermionField &in, FermionFie template void ImprovedStaggeredFermion5D::Dhop(const FermionField &in, FermionField &out,int dag) { + DhopCalls+=2; conformable(in._grid,FermionGrid()); // verifies full grid conformable(in._grid,out._grid); @@ -280,6 +292,54 @@ void ImprovedStaggeredFermion5D::Dhop(const FermionField &in, FermionField DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag); } +template +void ImprovedStaggeredFermion5D::Report(void) +{ + std::vector latt = GridDefaultLatt(); + RealD volume = Ls; for(int mu=0;mu_Nprocessors; + RealD NN = _FourDimGrid->NodeCount(); + + std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; + + std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Number of DhopEO Calls : " + << DhopCalls << std::endl; + std::cout << GridLogMessage << "ImprovedStaggeredFermion5D TotalTime /Calls : " + << DhopTotalTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "ImprovedStaggeredFermion5D CommTime /Calls : " + << DhopCommTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "ImprovedStaggeredFermion5D ComputeTime/Calls : " + << DhopComputeTime / DhopCalls << " us" << std::endl; + + // Average the compute time + _FourDimGrid->GlobalSum(DhopComputeTime); + DhopComputeTime/=NP; + + RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting + std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl; + + RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting + std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl; + + std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Stencil" < +void ImprovedStaggeredFermion5D::ZeroCounters(void) +{ + DhopCalls = 0; + DhopTotalTime = 0; + DhopCommTime = 0; + DhopComputeTime = 0; + Stencil.ZeroCounters(); + StencilEven.ZeroCounters(); + StencilOdd.ZeroCounters(); +} ///////////////////////////////////////////////////////////////////////// // Implement the general interface. Here we use SAME mass on all slices diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h index 4961da49..ca1a955a 100644 --- a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h @@ -55,6 +55,16 @@ namespace QCD { FermionField _tmp; FermionField &tmp(void) { return _tmp; } + //////////////////////////////////////// + // Performance monitoring + //////////////////////////////////////// + void Report(void); + void ZeroCounters(void); + double DhopTotalTime; + double DhopCalls; + double DhopCommTime; + double DhopComputeTime; + /////////////////////////////////////////////////////////////// // Implement the abstract base /////////////////////////////////////////////////////////////// diff --git a/tests/solver/Test_staggered_block_cg_unprec.cc b/tests/solver/Test_staggered_block_cg_unprec.cc index 8db41e98..f54bc3b2 100644 --- a/tests/solver/Test_staggered_block_cg_unprec.cc +++ b/tests/solver/Test_staggered_block_cg_unprec.cc @@ -75,7 +75,7 @@ int main (int argc, char ** argv) LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); RealD mass=0.003; - ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass); + ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass); MdagMLinearOperator HermOp(Ds); ConjugateGradient CG(1.0e-8,10000); @@ -99,21 +99,27 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << " Calling 5d CG for "< Date: Thu, 24 Aug 2017 10:17:52 +0100 Subject: [PATCH 168/177] FFT test compile fixed --- lib/qcd/utils/GaugeFix.h | 3 +++ tests/core/Test_fft_gfix.cc | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/lib/qcd/utils/GaugeFix.h b/lib/qcd/utils/GaugeFix.h index f2ea1aa2..c4ea31aa 100644 --- a/lib/qcd/utils/GaugeFix.h +++ b/lib/qcd/utils/GaugeFix.h @@ -26,6 +26,8 @@ Author: Peter Boyle /* END LEGAL */ //#include +#ifndef GRID_QCD_GAUGE_FIX_H +#define GRID_QCD_GAUGE_FIX_H namespace Grid { namespace QCD { @@ -188,3 +190,4 @@ class FourierAcceleratedGaugeFixer : public Gimpl { } } +#endif diff --git a/tests/core/Test_fft_gfix.cc b/tests/core/Test_fft_gfix.cc index 9732eb85..916c4b0b 100644 --- a/tests/core/Test_fft_gfix.cc +++ b/tests/core/Test_fft_gfix.cc @@ -28,6 +28,9 @@ Author: Peter Boyle /* END LEGAL */ #include +using namespace Grid; +using namespace Grid::QCD; + int main (int argc, char ** argv) { std::vector seeds({1,2,3,4}); @@ -82,6 +85,7 @@ int main (int argc, char ** argv) Uorg = Uorg - Umu; std::cout << " Norm Difference "<< norm2(Uorg) << std::endl; + std::cout << " Norm "<< norm2(Umu) << std::endl; std::cout<< "*****************************************************************" < Date: Thu, 24 Aug 2017 18:17:09 +0100 Subject: [PATCH 169/177] CI update --- .travis.yml | 68 ----------------------------------------------------- README.md | 16 +------------ 2 files changed, 1 insertion(+), 83 deletions(-) diff --git a/.travis.yml b/.travis.yml index 64dae823..7d8203ce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,68 +9,6 @@ matrix: - os: osx osx_image: xcode8.3 compiler: clang - - compiler: gcc - dist: trusty - sudo: required - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-4.9 - - libmpfr-dev - - libgmp-dev - - libmpc-dev - - libopenmpi-dev - - openmpi-bin - - binutils-dev - env: VERSION=-4.9 - - compiler: gcc - dist: trusty - sudo: required - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-5 - - libmpfr-dev - - libgmp-dev - - libmpc-dev - - libopenmpi-dev - - openmpi-bin - - binutils-dev - env: VERSION=-5 - - compiler: clang - dist: trusty - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-4.8 - - libmpfr-dev - - libgmp-dev - - libmpc-dev - - libopenmpi-dev - - openmpi-bin - - binutils-dev - env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz - - compiler: clang - dist: trusty - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-4.8 - - libmpfr-dev - - libgmp-dev - - libmpc-dev - - libopenmpi-dev - - openmpi-bin - - binutils-dev - env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz before_install: - export GRIDDIR=`pwd` @@ -106,9 +44,3 @@ script: - make -j4 - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals - make check - - echo make clean - - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto ; fi - - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then make -j4; fi - - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi - - diff --git a/README.md b/README.md index 1e0988f3..13dd6996 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,4 @@ -# Grid - - - - - - - - - -
Last stable release - -
Development branch - -
+# Grid [![Teamcity status](http://ci.cliath.ph.ed.ac.uk/app/rest/builds/aggregated/strob:(buildType:(affectedProject(id:Grid)),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [![Travis status](https://travis-ci.org/paboyle/Grid.svg?branch=develop)](https://travis-ci.org/paboyle/Grid) **Data parallel C++ mathematical object library.** From c3b1263e75212356fc1aa061cd226db70f4f00fc Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 25 Aug 2017 09:25:54 +0100 Subject: [PATCH 170/177] Benchmark prep --- benchmarks/Benchmark_ITT.cc | 322 +++++++++++++++++++--- benchmarks/Benchmark_comms.cc | 30 +- lib/allocator/AlignedAllocator.h | 5 + lib/communicator/Communicator_base.cc | 6 +- lib/communicator/Communicator_mpi3.cc | 5 +- lib/communicator/Communicator_mpit.cc | 19 +- lib/qcd/action/fermion/CayleyFermion5D.cc | 12 +- lib/qcd/action/fermion/WilsonCompressor.h | 41 ++- lib/qcd/action/fermion/WilsonFermion5D.cc | 11 + lib/stencil/Stencil.h | 114 +++++++- 10 files changed, 494 insertions(+), 71 deletions(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 9bf7d0a5..c5226ee1 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -32,6 +32,19 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; +typedef WilsonFermion5D WilsonFermion5DR; +typedef WilsonFermion5D WilsonFermion5DF; +typedef WilsonFermion5D WilsonFermion5DD; + + +std::vector L_list; +std::vector Ls_list; +std::vector mflop_list; + +double mflop_ref; +double mflop_ref_err; + +int NN_global; struct time_statistics{ double mean; @@ -95,13 +108,15 @@ public: static void Comms(void) { - int Nloop=100; + int Nloop=1000; int nmu=0; int maxlat=32; std::vector simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); + for(int mu=0;mu1) nmu++; + std::vector t_time(Nloop); time_statistics timestat; @@ -133,13 +148,14 @@ public: bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); } - int ncomm; int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); + int ncomm; double dbytes; + std::vector times(Nloop); for(int i=0;i requests; dbytes=0; ncomm=0; @@ -150,7 +166,6 @@ public: if (mpi_layout[mu]>1 ) { - ncomm++; int xmit_to_rank; int recv_from_rank; if ( dir == mu ) { @@ -160,18 +175,18 @@ public: int comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); } -#if 1 - tbytes= Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[dir][0], - xmit_to_rank, - (void *)&rbuf[dir][0], - recv_from_rank, - bytes,dir); - Grid.StencilSendToRecvFromComplete(requests,dir); -#endif - requests.resize(0); - + tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, + (void *)&rbuf[dir][0], recv_from_rank, + bytes,dir); + +#ifdef GRID_OMP #pragma omp atomic +#endif + ncomm++; + +#ifdef GRID_OMP +#pragma omp atomic +#endif dbytes+=tbytes; } } @@ -181,13 +196,15 @@ public: } timestat.statistics(t_time); + // for(int i=0;i({45,12,81,9})); for(int lat=8;lat<=lmax;lat+=4){ @@ -253,8 +271,7 @@ public: } }; - - static void DWF(int Ls,int L) + static double DWF5(int Ls,int L) { RealD mass=0.1; RealD M5 =1.8; @@ -262,6 +279,7 @@ public: double mflops; double mflops_best = 0; double mflops_worst= 0; + std::vector mflops_all; /////////////////////////////////////////////////////// // Set/Get the layout & grid size @@ -274,6 +292,189 @@ public: GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); uint64_t NP = TmpGrid->RankCount(); uint64_t NN = TmpGrid->NodeCount(); + NN_global=NN; + uint64_t SHM=NP/NN; + + std::vector internal; + if ( SHM == 1 ) internal = std::vector({1,1,1,1}); + else if ( SHM == 2 ) internal = std::vector({2,1,1,1}); + else if ( SHM == 4 ) internal = std::vector({2,2,1,1}); + else if ( SHM == 8 ) internal = std::vector({2,2,2,1}); + else assert(0); + + std::vector nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); + std::vector latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); + + ///////// Welcome message //////////// + std::cout< seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5(sFGrid); RNG5.SeedFixedIntegers(seeds5); + std::cout << GridLogMessage << "Initialised RNGs" << std::endl; + + ///////// Source preparation //////////// + LatticeFermion src (sFGrid); random(RNG5,src); + LatticeFermion tmp (sFGrid); + + RealD N2 = 1.0/::sqrt(norm2(src)); + src = src*N2; + + LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); + + WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); + LatticeFermion src_e (sFrbGrid); + LatticeFermion src_o (sFrbGrid); + LatticeFermion r_e (sFrbGrid); + LatticeFermion r_o (sFrbGrid); + LatticeFermion r_eo (sFGrid); + LatticeFermion err (sFGrid); + { + + pickCheckerboard(Even,src_e,src); + pickCheckerboard(Odd,src_o,src); + +#if defined(AVX512) + const int num_cases = 6; + std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); +#else + const int num_cases = 4; + std::string fmt("U/S ; U/O ; G/S ; G/O "); +#endif + controls Cases [] = { +#ifdef AVX512 + { QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, + { QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, +#endif + { QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, + { QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, + { QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, + { QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential } + }; + + for(int c=0;cBarrier(); + for(int i=0;iBarrier(); + double t1=usecond(); + // uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); + // if (ncall < 500) ncall = 500; + uint64_t ncall = 1000; + + sFGrid->Broadcast(0,&ncall,sizeof(ncall)); + + // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"< t_time(ncall); + for(uint64_t i=0;iBarrier(); + + double volume=Ls; for(int mu=0;mumflops_best ) mflops_best = mflops; + if ( mflops mflops_all; + + /////////////////////////////////////////////////////// + // Set/Get the layout & grid size + /////////////////////////////////////////////////////// + int threads = GridThread::GetThreads(); + std::vector mpi = GridDefaultMpi(); assert(mpi.size()==4); + std::vector local({L,L,L,L}); + + GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(std::vector({64,64,64,64}), + GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + uint64_t NP = TmpGrid->RankCount(); + uint64_t NN = TmpGrid->NodeCount(); + NN_global=NN; uint64_t SHM=NP/NN; std::vector internal; @@ -364,13 +565,15 @@ public: #if defined(AVX512) const int num_cases = 6; + std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); #else const int num_cases = 4; + std::string fmt("U/S ; U/O ; G/S ; G/O "); #endif controls Cases [] = { #ifdef AVX512 - { QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, { QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, + { QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, #endif { QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, { QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, @@ -394,7 +597,7 @@ public: if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<Barrier(); for(int i=0;iBarrier(); double t1=usecond(); - uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); + // uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); + // if (ncall < 500) ncall = 500; + uint64_t ncall = 1000; + FGrid->Broadcast(0,&ncall,sizeof(ncall)); // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<mflops_best ) mflops_best = mflops; @@ -450,12 +656,20 @@ public: } std::cout< L_list({8,12,16,24}); + std::vector wilson; + std::vector dwf4; + std::vector dwf5; + if ( do_wilson ) { int Ls=1; std::cout< > xbuf(8,Vector(lat*lat*lat*Ls)); - Vector > rbuf(8,Vector(lat*lat*lat*Ls)); + std::vector > xbuf(8); + std::vector > rbuf(8); int ncomm; int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); + for(int mu=0;mu<8;mu++){ + xbuf[mu].resize(lat*lat*lat*Ls); + rbuf[mu].resize(lat*lat*lat*Ls); + // std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] < > xbuf(8,Vector(lat*lat*lat*Ls)); - Vector > rbuf(8,Vector(lat*lat*lat*Ls)); + std::vector > xbuf(8); + std::vector > rbuf(8); + for(int mu=0;mu<8;mu++){ + xbuf[mu].resize(lat*lat*lat*Ls); + rbuf[mu].resize(lat*lat*lat*Ls); + // std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] < &waitall,int dir) { - // Do nothing + int nreq=waitall.size(); + MPI_Waitall(nreq, &waitall[0], MPI_STATUSES_IGNORE); }; double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, int xmit_to_rank, @@ -262,7 +275,7 @@ double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, // Give the CPU to MPI immediately; can use threads to overlap optionally MPI_Request req[2]; MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]); - MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank,myrank, communicator_halo[dir], &req[0]); + MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank ,myrank , communicator_halo[dir],&req[0]); MPI_Waitall(2, req, MPI_STATUSES_IGNORE); return 2.0*bytes; } diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index 46ba3793..5e67d1f1 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -429,7 +429,7 @@ void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vectorM5) +1.0); - // assert(fabs(bee[i])>0.0); + assert(fabs(bee[i])>0.0); cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5)); beo[i]=as[i]*bs[i]; ceo[i]=-as[i]*cs[i]; @@ -455,11 +455,17 @@ void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vector0.0); + assert(fabs(bee[0])>0.0); lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column leem[i]=mass*cee[Ls-1]/bee[0]; - for(int j=0;j0.0); + leem[i]*= aee[j]/bee[j+1]; + } uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row @@ -478,7 +484,7 @@ void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vector0.0); + assert(fabs(bee[j])>0.0); delta_d *= cee[j]/bee[j]; } dee[Ls-1] += delta_d; diff --git a/lib/qcd/action/fermion/WilsonCompressor.h b/lib/qcd/action/fermion/WilsonCompressor.h index 96cbe1ec..30c6d838 100644 --- a/lib/qcd/action/fermion/WilsonCompressor.h +++ b/lib/qcd/action/fermion/WilsonCompressor.h @@ -238,7 +238,35 @@ template using WilsonCompressor = WilsonCom template class WilsonStencil : public CartesianStencil { public: - + double timer0; + double timer1; + double timer2; + double timer3; + double timer4; + double timer5; + double timer6; + uint64_t callsi; + void ZeroCountersi(void) + { + std::cout << GridLogMessage << " ZeroCountersi()"< same_node; @@ -252,6 +280,7 @@ public: : CartesianStencil (grid,npoints,checkerboard,directions,distances) , same_node(npoints) { + ZeroCountersi(); surface_list.resize(0); }; @@ -282,17 +311,25 @@ public: { std::vector > reqs; this->HaloExchangeOptGather(source,compress); + double t1=usecond(); this->CommunicateBegin(reqs); this->CommunicateComplete(reqs); + double t2=usecond(); timer1 += t2-t1; this->CommsMerge(compress); + double t3=usecond(); timer2 += t3-t2; this->CommsMergeSHM(compress); + double t4=usecond(); timer3 += t4-t3; } template void HaloExchangeOptGather(const Lattice &source,compressor &compress) { this->Prepare(); + double t0=usecond(); this->HaloGatherOpt(source,compress); + double t1=usecond(); + timer0 += t1-t0; + callsi++; } template @@ -304,7 +341,9 @@ public: typedef typename compressor::SiteHalfSpinor SiteHalfSpinor; typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor; + this->mpi3synctime_g-=usecond(); this->_grid->StencilBarrier(); + this->mpi3synctime_g+=usecond(); assert(source._grid==this->_grid); this->halogtime-=usecond(); diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 404ecce0..c5b0f872 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -185,6 +185,11 @@ void WilsonFermion5D::Report(void) std::cout << GridLogMessage << "WilsonFermion5D StencilEven"< 0){ + std::cout << GridLogMessage << "WilsonFermion5D Stencil Reporti()" < @@ -204,6 +209,9 @@ void WilsonFermion5D::ZeroCounters(void) { Stencil.ZeroCounters(); StencilEven.ZeroCounters(); StencilOdd.ZeroCounters(); + Stencil.ZeroCountersi(); + StencilEven.ZeroCountersi(); + StencilOdd.ZeroCountersi(); } @@ -445,6 +453,9 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg DhopCommTime += ctime; DhopComputeTime+=ptime; + // First to enter, last to leave timing + st.CollateThreads(); + DhopFaceTime-=usecond(); st.CommsMerge(compressor); DhopFaceTime+=usecond(); diff --git a/lib/stencil/Stencil.h b/lib/stencil/Stencil.h index cca67587..ad454bcb 100644 --- a/lib/stencil/Stencil.h +++ b/lib/stencil/Stencil.h @@ -176,6 +176,9 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal // Timing info; ugly; possibly temporary ///////////////////////////////////////// double commtime; + double mpi3synctime; + double mpi3synctime_g; + double shmmergetime; double gathertime; double gathermtime; double halogtime; @@ -185,8 +188,10 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal double splicetime; double nosplicetime; double calls; - std::vector comms_bytesthr; - std::vector commtimethr; + std::vector comm_bytes_thr; + std::vector comm_time_thr; + std::vector comm_enter_thr; + std::vector comm_leave_thr; //////////////////////////////////////// // Stencil query @@ -262,18 +267,45 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal #endif if (nthreads == -1) nthreads = 1; if (mythread < nthreads) { + comm_enter_thr[mythread] = usecond(); for (int i = mythread; i < Packets.size(); i += nthreads) { - double start = usecond(); uint64_t bytes = _grid->StencilSendToRecvFrom(Packets[i].send_buf, Packets[i].to_rank, Packets[i].recv_buf, Packets[i].from_rank, Packets[i].bytes,i); - comms_bytesthr[mythread] += bytes; - commtimethr[mythread] += usecond() - start; + comm_bytes_thr[mythread] += bytes; } + comm_leave_thr[mythread]= usecond(); + comm_time_thr[mythread] += comm_leave_thr[mythread] - comm_enter_thr[mythread]; } } + + void CollateThreads(void) + { + int nthreads = CartesianCommunicator::nCommThreads; + double first=0.0; + double last =0.0; + + for(int t=0;t 0.0) && ( t0 < first ) ) first = t0; // min time seen + + if ( t1 > last ) last = t1; // max time seen + + } + commtime+= last-first; + } void CommunicateBegin(std::vector > &reqs) { reqs.resize(Packets.size()); @@ -295,14 +327,48 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal } commtime+=usecond(); } + void Communicate(void) + { +#ifdef GRID_OMP +#pragma omp parallel + { + // must be called in parallel region + int mythread = omp_get_thread_num(); + int maxthreads= omp_get_max_threads(); + int nthreads = CartesianCommunicator::nCommThreads; + assert(nthreads <= maxthreads); + + if (nthreads == -1) nthreads = 1; +#else + int mythread = 0; + int nthreads = 1; +#endif + if (mythread < nthreads) { + for (int i = mythread; i < Packets.size(); i += nthreads) { + double start = usecond(); + comm_bytes_thr[mythread] += _grid->StencilSendToRecvFrom(Packets[i].send_buf, + Packets[i].to_rank, + Packets[i].recv_buf, + Packets[i].from_rank, + Packets[i].bytes,i); + comm_time_thr[mythread] += usecond() - start; + } + } +#ifdef GRID_OMP + } +#endif + } template void HaloExchange(const Lattice &source,compressor &compress) { std::vector > reqs; Prepare(); HaloGather(source,compress); + // Concurrent CommunicateBegin(reqs); CommunicateComplete(reqs); + // Sequential + // Communicate(); CommsMergeSHM(compress); CommsMerge(compress); } @@ -363,7 +429,9 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal template void HaloGather(const Lattice &source,compressor &compress) { + mpi3synctime_g-=usecond(); _grid->StencilBarrier();// Synch shared memory on a single nodes + mpi3synctime_g+=usecond(); // conformable(source._grid,_grid); assert(source._grid==_grid); @@ -423,8 +491,12 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal CommsMerge(decompress,Mergers,Decompressions); } template void CommsMergeSHM(decompressor decompress) { + mpi3synctime-=usecond(); _grid->StencilBarrier();// Synch shared memory on a single nodes + mpi3synctime+=usecond(); + shmmergetime-=usecond(); CommsMerge(decompress,MergersSHM,DecompressionsSHM); + shmmergetime+=usecond(); } template @@ -470,8 +542,10 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal const std::vector &distances) : _permute_type(npoints), _comm_buf_size(npoints), - comms_bytesthr(npoints), - commtimethr(npoints) + comm_bytes_thr(npoints), + comm_enter_thr(npoints), + comm_leave_thr(npoints), + comm_time_thr(npoints) { face_table_computed=0; _npoints = npoints; @@ -1025,8 +1099,15 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal void ZeroCounters(void) { gathertime = 0.; commtime = 0.; - memset(&commtimethr[0], 0, sizeof(commtimethr)); - memset(&comms_bytesthr[0], 0, sizeof(comms_bytesthr)); + mpi3synctime=0.; + mpi3synctime_g=0.; + shmmergetime=0.; + for(int i=0;i<_npoints;i++){ + comm_time_thr[i]=0; + comm_bytes_thr[i]=0; + comm_enter_thr[i]=0; + comm_leave_thr[i]=0; + } halogtime = 0.; mergetime = 0.; decompresstime = 0.; @@ -1043,13 +1124,17 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal RealD NP = _grid->_Nprocessors; RealD NN = _grid->NodeCount(); double t = 0; - // if commtimethr is set they were all done in parallel so take the max + // if comm_time_thr is set they were all done in parallel so take the max // but add up the bytes + int threaded = 0 ; for (int i = 0; i < 8; ++i) { - comms_bytes += comms_bytesthr[i]; - if (t < commtimethr[i]) t = commtimethr[i]; + if ( comm_time_thr[i]>0.0 ) { + threaded = 1; + comms_bytes += comm_bytes_thr[i]; + if (t < comm_time_thr[i]) t = comm_time_thr[i]; + } } - commtime += t; + if (threaded) commtime += t; _grid->GlobalSum(commtime); commtime/=NP; if ( calls > 0. ) { @@ -1065,6 +1150,9 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal std::cout << GridLogMessage << " Stencil " << comms_bytes/commtime/1000. << " GB/s per rank"< Date: Fri, 25 Aug 2017 11:41:01 +0100 Subject: [PATCH 171/177] updated from cambridge mpi3 shakeout --- benchmarks/Benchmark_ITT.cc | 4 ++-- lib/qcd/action/fermion/WilsonCompressor.h | 7 +++++-- lib/stencil/Stencil.h | 8 ++++---- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index c5226ee1..bd75dd8e 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -108,7 +108,7 @@ public: static void Comms(void) { - int Nloop=1000; + int Nloop=200; int nmu=0; int maxlat=32; @@ -197,7 +197,7 @@ public: timestat.statistics(t_time); // for(int i=0;i > reqs; this->HaloExchangeOptGather(source,compress); double t1=usecond(); - this->CommunicateBegin(reqs); - this->CommunicateComplete(reqs); + // Asynchronous MPI calls multidirectional, Isend etc... + // this->CommunicateBegin(reqs); + // this->CommunicateComplete(reqs); + // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways. + this->Communicate(); double t2=usecond(); timer1 += t2-t1; this->CommsMerge(compress); double t3=usecond(); timer2 += t3-t2; diff --git a/lib/stencil/Stencil.h b/lib/stencil/Stencil.h index ad454bcb..cd0792d5 100644 --- a/lib/stencil/Stencil.h +++ b/lib/stencil/Stencil.h @@ -365,10 +365,10 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal Prepare(); HaloGather(source,compress); // Concurrent - CommunicateBegin(reqs); - CommunicateComplete(reqs); - // Sequential - // Communicate(); + //CommunicateBegin(reqs); + //CommunicateComplete(reqs); + // Sequential, possibly threaded + Communicate(); CommsMergeSHM(compress); CommsMerge(compress); } From 3a582174053732f4e5645367b750fd446d8fcb1d Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 25 Aug 2017 14:29:53 +0100 Subject: [PATCH 172/177] Updated --- benchmarks/Benchmark_ITT.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index bd75dd8e..2edae8d0 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -386,7 +386,7 @@ public: if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<Barrier(); for(int i=0;iBroadcast(0,&ncall,sizeof(ncall)); From d0f3d525d5dfb6cd7a2f5fe3be5a69c7ddc1306e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 25 Aug 2017 19:33:54 +0100 Subject: [PATCH 173/177] Optimal block size for KNL --- benchmarks/Benchmark_ITT.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 2edae8d0..c0ce451f 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -679,8 +679,11 @@ int main (int argc, char ** argv) Grid_init(&argc,&argv); CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential); +#ifdef KNL + LebesgueOrder::Block = std::vector({8,2,2,2}); +#else LebesgueOrder::Block = std::vector({2,2,2,2}); - +#endif Benchmark::Decomposition(); int do_memory=1; From f68b5de9c8798779ef2657b9c2d469174ae8f53a Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 25 Aug 2017 19:35:21 +0100 Subject: [PATCH 174/177] No compile fix on Clang --- lib/qcd/action/fermion/CayleyFermion5D.cc | 12 ++++++------ lib/qcd/action/fermion/WilsonCompressor.h | 4 ---- lib/qcd/action/fermion/WilsonFermion5D.cc | 5 +++-- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index 5e67d1f1..838b1c3d 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -414,7 +414,7 @@ void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vector0.0); + assert(omega[i]!=Coeff_t(0.0)); bs[i] = 0.5*(bpc/omega[i] + bmc); cs[i] = 0.5*(bpc/omega[i] - bmc); } @@ -429,7 +429,7 @@ void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vectorM5) +1.0); - assert(fabs(bee[i])>0.0); + assert(bee[i]!=Coeff_t(0.0)); cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5)); beo[i]=as[i]*bs[i]; ceo[i]=-as[i]*cs[i]; @@ -456,14 +456,14 @@ void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vector0.0); - assert(fabs(bee[0])>0.0); + assert(bee[i]!=Coeff_t(0.0)); + assert(bee[0]!=Coeff_t(0.0)); lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column leem[i]=mass*cee[Ls-1]/bee[0]; for(int j=0;j0.0); + assert(bee[j+1]!=Coeff_t(0.0)); leem[i]*= aee[j]/bee[j+1]; } @@ -484,7 +484,7 @@ void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vector0.0); + assert(bee[j] != Coeff_t(0.0)); delta_d *= cee[j]/bee[j]; } dee[Ls-1] += delta_d; diff --git a/lib/qcd/action/fermion/WilsonCompressor.h b/lib/qcd/action/fermion/WilsonCompressor.h index 406476b0..cc5c3c63 100644 --- a/lib/qcd/action/fermion/WilsonCompressor.h +++ b/lib/qcd/action/fermion/WilsonCompressor.h @@ -248,7 +248,6 @@ public: uint64_t callsi; void ZeroCountersi(void) { - std::cout << GridLogMessage << " ZeroCountersi()"<_npoints;point++){ same_node[point] = this->SameNode(point); - // std::cout << " dir " <HaloGatherDir(source,XpCompress,Xp,face_idx)); assert(same_node[Yp]==this->HaloGatherDir(source,YpCompress,Yp,face_idx)); assert(same_node[Zp]==this->HaloGatherDir(source,ZpCompress,Zp,face_idx)); diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index c5b0f872..1da58ddb 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -123,12 +123,13 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, int vol4; vol4=FourDimGrid.oSites(); Stencil.BuildSurfaceList(LLs,vol4); + vol4=FourDimRedBlackGrid.oSites(); StencilEven.BuildSurfaceList(LLs,vol4); StencilOdd.BuildSurfaceList(LLs,vol4); - std::cout << GridLogMessage << " SurfaceLists "<< Stencil.surface_list.size() - <<" " << StencilEven.surface_list.size()< Date: Fri, 25 Aug 2017 20:43:37 +0100 Subject: [PATCH 175/177] Fix --- benchmarks/Benchmark_ITT.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 58fdb84a..c0ce451f 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -181,6 +181,7 @@ public: #ifdef GRID_OMP #pragma omp atomic +#endif ncomm++; #ifdef GRID_OMP From 54a5e6c1d0ec1cf1b66dac5ba407db49bc7e1016 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 25 Aug 2017 22:36:08 +0100 Subject: [PATCH 176/177] Check if we get huge pages on linux. Larry Meadows piece of magic. --- lib/allocator/AlignedAllocator.cc | 33 +++++++++++++++++++++++++++++++ lib/allocator/AlignedAllocator.h | 2 ++ 2 files changed, 35 insertions(+) diff --git a/lib/allocator/AlignedAllocator.cc b/lib/allocator/AlignedAllocator.cc index 04de20bf..764bd732 100644 --- a/lib/allocator/AlignedAllocator.cc +++ b/lib/allocator/AlignedAllocator.cc @@ -63,4 +63,37 @@ void *PointerCache::Lookup(size_t bytes) { return NULL; } + +void check_huge_pages(void *Buf,uint64_t BYTES) +{ +#ifdef __linux__ + int fd = open("/proc/self/pagemap", O_RDONLY); + assert(fd >= 0); + const int page_size = 4096; + uint64_t virt_pfn = (uint64_t)Buf / page_size; + off_t offset = sizeof(uint64_t) * virt_pfn; + uint64_t npages = (BYTES + page_size-1) / page_size; + uint64_t pagedata[npages]; + uint64_t ret = lseek(fd, offset, SEEK_SET); + assert(ret == offset); + ret = ::read(fd, pagedata, sizeof(uint64_t)*npages); + assert(ret == sizeof(uint64_t) * npages); + int nhugepages = npages / 512; + int n4ktotal, nnothuge; + n4ktotal = 0; + nnothuge = 0; + for (int i = 0; i < nhugepages; ++i) { + uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size; + for (int j = 0; j < 512; ++j) { + uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size; + ++n4ktotal; + if (pageaddr != baseaddr + j * page_size) + ++nnothuge; + } + } + int rank = CartesianCommunicator::RankWorld(); + printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge); +#endif +} + } diff --git a/lib/allocator/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h index c5ad0883..e64a5949 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/lib/allocator/AlignedAllocator.h @@ -64,6 +64,8 @@ namespace Grid { }; + void check_huge_pages(void *Buf,uint64_t BYTES); + //////////////////////////////////////////////////////////////////// // A lattice of something, but assume the something is SIMDized. //////////////////////////////////////////////////////////////////// From 4b4c2a715b319bcc7060ef9ae8aa983c49471167 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sat, 26 Aug 2017 11:38:04 +0100 Subject: [PATCH 177/177] fcntl.h needed --- lib/allocator/AlignedAllocator.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/allocator/AlignedAllocator.cc b/lib/allocator/AlignedAllocator.cc index 764bd732..967b2571 100644 --- a/lib/allocator/AlignedAllocator.cc +++ b/lib/allocator/AlignedAllocator.cc @@ -1,7 +1,5 @@ - - - #include +#include namespace Grid {