From a04eb7df5d53e72ce501d5e13c8ef051d3a28442 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 24 Mar 2017 12:43:28 +0900 Subject: [PATCH 001/377] Starting Clover term --- .vscode/settings.json | 3 + lib/qcd/action/fermion/WilsonCloverFermion.cc | 98 +++++++++++++++++++ lib/qcd/action/fermion/WilsonCloverFermion.h | 76 ++++++++++++++ 3 files changed, 177 insertions(+) create mode 100644 .vscode/settings.json create mode 100644 lib/qcd/action/fermion/WilsonCloverFermion.cc create mode 100644 lib/qcd/action/fermion/WilsonCloverFermion.h diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..20af2f68 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +// Place your settings in this file to overwrite default and user settings. +{ +} \ No newline at end of file diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc new file mode 100644 index 00000000..1d59474e --- /dev/null +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -0,0 +1,98 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +namespace Grid { +namespace QCD { + + /* + * BF sequence + * + void bfmbase::MooeeInv(Fermion_t psi, + Fermion_t chi, + int dag, int cb) + + double m = this->mass; + double tm = this->twistedmass; + double mtil = 4.0+this->mass; + + double sq = mtil*mtil + tm*tm; + + double a = mtil/sq; + double b = -tm /sq; + if(dag) b=-b; + axpibg5x(chi,psi,a,b); + + void bfmbase::Mooee(Fermion_t psi, + Fermion_t chi, + int dag,int cb) + double a = 4.0+this->mass; + double b = this->twistedmass; + if(dag) b=-b; + axpibg5x(chi,psi,a,b); + */ + + template + void WilsonTMFermion::Mooee(const FermionField &in, FermionField &out) { + RealD a = 4.0+this->mass; + RealD b = this->mu; + out.checkerboard = in.checkerboard; + axpibg5x(out,in,a,b); + } + template + void WilsonTMFermion::MooeeDag(const FermionField &in, FermionField &out) { + RealD a = 4.0+this->mass; + RealD b = -this->mu; + out.checkerboard = in.checkerboard; + axpibg5x(out,in,a,b); + } + template + void WilsonTMFermion::MooeeInv(const FermionField &in, FermionField &out) { + RealD m = this->mass; + RealD tm = this->mu; + RealD mtil = 4.0+this->mass; + RealD sq = mtil*mtil+tm*tm; + RealD a = mtil/sq; + RealD b = -tm /sq; + axpibg5x(out,in,a,b); + } + template + void WilsonTMFermion::MooeeInvDag(const FermionField &in, FermionField &out) { + RealD m = this->mass; + RealD tm = this->mu; + RealD mtil = 4.0+this->mass; + RealD sq = mtil*mtil+tm*tm; + RealD a = mtil/sq; + RealD b = tm /sq; + axpibg5x(out,in,a,b); + } + + FermOpTemplateInstantiate(WilsonTMFermion); + +} +} diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h new file mode 100644 index 00000000..5901cb2f --- /dev/null +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -0,0 +1,76 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.h + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_QCD_WILSON_TM_FERMION_H +#define GRID_QCD_WILSON_TM_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + template + class WilsonTMFermion : public WilsonFermion + { + public: + INHERIT_IMPL_TYPES(Impl); + public: + + virtual void Instantiatable(void) {}; + // Constructors + WilsonTMFermion(GaugeField &_Umu, + GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, + RealD _mass, + RealD _mu, + const ImplParams &p= ImplParams() + ) : + WilsonFermion(_Umu, + Fgrid, + Hgrid, + _mass,p) + + { + mu = _mu; + } + + + // allow override for twisted mass and clover + virtual void Mooee(const FermionField &in, FermionField &out) ; + virtual void MooeeDag(const FermionField &in, FermionField &out) ; + virtual void MooeeInv(const FermionField &in, FermionField &out) ; + virtual void MooeeInvDag(const FermionField &in, FermionField &out) ; + + private: + RealD mu; // TwistedMass parameter + + }; + +}} + +#endif From 5fdc05782b4578b21ba66421ec58753c32ec9350 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 27 Mar 2017 10:54:16 +0900 Subject: [PATCH 002/377] More in the clover fermion class --- lib/qcd/action/fermion/WilsonCloverFermion.h | 81 +++++++++----------- lib/qcd/action/fermion/WilsonFermion.h | 4 +- 2 files changed, 40 insertions(+), 45 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index 5901cb2f..41131e5d 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -1,12 +1,13 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid + Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/qcd/action/fermion/WilsonTMFermion.h - Copyright (C) 2015 + Copyright (C) 2017 Author: paboyle +Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,53 +25,47 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_WILSON_TM_FERMION_H -#define GRID_QCD_WILSON_TM_FERMION_H +/* END LEGAL */ +#ifndef GRID_QCD_WILSON_CLOVER_FERMION_H +#define GRID_QCD_WILSON_CLOVER_FERMION_H #include namespace Grid { +namespace QCD { - namespace QCD { +template +class WilsonCloverFermion : public WilsonFermion { +public: + INHERIT_IMPL_TYPES(Impl); - template - class WilsonTMFermion : public WilsonFermion - { - public: - INHERIT_IMPL_TYPES(Impl); - public: +public: + virtual void Instantiatable(void){}; + // Constructors + WilsonCloverFermion(GaugeField &_Umu, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, + RealD _mass, + RealD _csw, + const ImplParams &p = ImplParams()) : WilsonFermion(_Umu, + Fgrid, + Hgrid, + _mass, p) + { + csw = _csw; + } - virtual void Instantiatable(void) {}; - // Constructors - WilsonTMFermion(GaugeField &_Umu, - GridCartesian &Fgrid, - GridRedBlackCartesian &Hgrid, - RealD _mass, - RealD _mu, - const ImplParams &p= ImplParams() - ) : - WilsonFermion(_Umu, - Fgrid, - Hgrid, - _mass,p) + virtual RealD M(const FermionField& in, FermionField& out); + virtual RealD Mdag(const FermionField& in, FermionField& out); - { - mu = _mu; - } + virtual void Mooee(const FermionField &in, FermionField &out); + virtual void MooeeDag(const FermionField &in, FermionField &out); + virtual void MooeeInv(const FermionField &in, FermionField &out); + virtual void MooeeInvDag(const FermionField &in, FermionField &out); +private: + RealD csw; // Clover coefficient +}; +} +} - // allow override for twisted mass and clover - virtual void Mooee(const FermionField &in, FermionField &out) ; - virtual void MooeeDag(const FermionField &in, FermionField &out) ; - virtual void MooeeInv(const FermionField &in, FermionField &out) ; - virtual void MooeeInvDag(const FermionField &in, FermionField &out) ; - - private: - RealD mu; // TwistedMass parameter - - }; - -}} - -#endif +#endif // GRID_QCD_WILSON_CLOVER_FERMION_H diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h index 933be732..50f4f884 100644 --- a/lib/qcd/action/fermion/WilsonFermion.h +++ b/lib/qcd/action/fermion/WilsonFermion.h @@ -65,8 +65,8 @@ class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { // override multiply; cut number routines if pass dagger argument // and also make interface more uniformly consistent ////////////////////////////////////////////////////////////////// - RealD M(const FermionField &in, FermionField &out); - RealD Mdag(const FermionField &in, FermionField &out); + virtual RealD M(const FermionField &in, FermionField &out); + virtual RealD Mdag(const FermionField &in, FermionField &out); ///////////////////////////////////////////////////////// // half checkerboard operations From fff484eca5a88b01c61b1a638faf75d4b00e5304 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 27 Mar 2017 15:12:57 +0900 Subject: [PATCH 003/377] Populating Clover fermions methods --- lib/qcd/action/Actions.h | 11 +- lib/qcd/action/fermion/WilsonCloverFermion.cc | 134 ++++++++++-------- lib/qcd/action/fermion/WilsonCloverFermion.h | 28 +++- 3 files changed, 103 insertions(+), 70 deletions(-) diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index 4a30f8c3..b825ac49 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -142,11 +142,10 @@ typedef SymanzikGaugeAction ConjugateSymanzikGaugeAction //////////////////////////////////////////// #include // 4d wilson like -#include // 4d wilson like +#include // 4d wilson like +#include // 4d wilson like #include // 5d base used by all 5d overlap types -//#include - #include // Cayley types #include #include @@ -188,10 +187,16 @@ typedef WilsonFermion WilsonTwoIndexSymmetricFermi typedef WilsonFermion WilsonTwoIndexSymmetricFermionF; typedef WilsonFermion WilsonTwoIndexSymmetricFermionD; +// Twisted mass fermion typedef WilsonTMFermion WilsonTMFermionR; typedef WilsonTMFermion WilsonTMFermionF; typedef WilsonTMFermion WilsonTMFermionD; +// Clover fermions +typedef WilsonCloverFermion WilsonCloverFermionR; +typedef WilsonCloverFermion WilsonCloverFermionF; +typedef WilsonCloverFermion WilsonCloverFermionD; + typedef DomainWallFermion DomainWallFermionR; typedef DomainWallFermion DomainWallFermionF; typedef DomainWallFermion DomainWallFermionD; diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index 1d59474e..0cc82f62 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -1,12 +1,13 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid + Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - Copyright (C) 2015 + Copyright (C) 2017 -Author: paboyle + Author: paboyle + Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,75 +25,82 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ - /* END LEGAL */ +/* END LEGAL */ #include +#include namespace Grid { namespace QCD { - /* - * BF sequence - * - void bfmbase::MooeeInv(Fermion_t psi, - Fermion_t chi, - int dag, int cb) + template + void WilsonCloverFermion::AddCloverTerm(const FermionField& in, + FermionField& out){ + FermionField tmp(out._grid); + tmp = zero; + // the product sigma_munu Fmunu is hermitian + tmp += Bx*(Gamma(Gamma::Algebra::SigmaYZ)*in); + tmp += By*(Gamma(Gamma::Algebra::MinusSigmaXZ)*in); + tmp += Bz*(Gamma(Gamma::Algebra::SigmaXY)*in); + tmp += Ex*(Gamma(Gamma::Algebra::MinusSigmaXT)*in); + tmp += Ey*(Gamma(Gamma::Algebra::MinusSigmaYT)*in); + tmp += Ez*(Gamma(Gamma::Algebra::MinusSigmaZT)*in); + out += tmp*csw; // check signs - double m = this->mass; - double tm = this->twistedmass; - double mtil = 4.0+this->mass; + } - double sq = mtil*mtil + tm*tm; - double a = mtil/sq; - double b = -tm /sq; - if(dag) b=-b; - axpibg5x(chi,psi,a,b); - - void bfmbase::Mooee(Fermion_t psi, - Fermion_t chi, - int dag,int cb) - double a = 4.0+this->mass; - double b = this->twistedmass; - if(dag) b=-b; - axpibg5x(chi,psi,a,b); - */ - - template - void WilsonTMFermion::Mooee(const FermionField &in, FermionField &out) { - RealD a = 4.0+this->mass; - RealD b = this->mu; + template + RealD WilsonCloverFermion::M(const FermionField& in, FermionField& out) { + // Wilson term out.checkerboard = in.checkerboard; - axpibg5x(out,in,a,b); - } - template - void WilsonTMFermion::MooeeDag(const FermionField &in, FermionField &out) { - RealD a = 4.0+this->mass; - RealD b = -this->mu; - out.checkerboard = in.checkerboard; - axpibg5x(out,in,a,b); - } - template - void WilsonTMFermion::MooeeInv(const FermionField &in, FermionField &out) { - RealD m = this->mass; - RealD tm = this->mu; - RealD mtil = 4.0+this->mass; - RealD sq = mtil*mtil+tm*tm; - RealD a = mtil/sq; - RealD b = -tm /sq; - axpibg5x(out,in,a,b); - } - template - void WilsonTMFermion::MooeeInvDag(const FermionField &in, FermionField &out) { - RealD m = this->mass; - RealD tm = this->mu; - RealD mtil = 4.0+this->mass; - RealD sq = mtil*mtil+tm*tm; - RealD a = mtil/sq; - RealD b = tm /sq; - axpibg5x(out,in,a,b); + this->Dhop(in, out, DaggerNo); + // Clover term + // apply the sigma and Fmunu + AddCloverTerm(in, out); + // overall factor + return axpy_norm(out, 4 + this->mass, in, out); } - FermOpTemplateInstantiate(WilsonTMFermion); + template + RealD WilsonCloverFermion::Mdag(const FermionField& in, FermionField& out) { + // Wilson term + out.checkerboard = in.checkerboard; + this->Dhop(in, out, DaggerYes); + // Clover term + // apply the sigma and Fmunu + AddCloverTerm(in, out); + return axpy_norm(out, 4 + this->mass, in, out); + } + + template + void WilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { + this->ImportGauge(_Umu); + // Compute the field strength terms + + // Invert the Moo, Mee terms (?) + } + + + template + void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + + } + + template + void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out) { + assert(0); // not implemented yet + } + template + void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out) { + assert(0); // not implemented yet + } + template + void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out) { + assert(0); // not implemented yet + } + + FermOpTemplateInstantiate(WilsonCloverFermion); } } diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index 41131e5d..d3785cac 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -2,12 +2,12 @@ Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.h + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.h Copyright (C) 2017 -Author: paboyle -Author: Guido Cossu + Author: paboyle + Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -40,6 +40,8 @@ public: INHERIT_IMPL_TYPES(Impl); public: + typedef WilsonFermion WilsonBase; + virtual void Instantiatable(void){}; // Constructors WilsonCloverFermion(GaugeField &_Umu, GridCartesian &Fgrid, @@ -49,9 +51,17 @@ public: const ImplParams &p = ImplParams()) : WilsonFermion(_Umu, Fgrid, Hgrid, - _mass, p) + _mass, p), + Bx(_Umu._grid), + By(_Umu._grid), + Bz(_Umu._grid), + Ex(_Umu._grid), + Ey(_Umu._grid), + Ez(_Umu._grid) { csw = _csw; + assert(Nd == 4); // require 4 dimensions + } virtual RealD M(const FermionField& in, FermionField& out); @@ -62,8 +72,18 @@ public: virtual void MooeeInv(const FermionField &in, FermionField &out); virtual void MooeeInvDag(const FermionField &in, FermionField &out); + void ImportGauge(const GaugeField &_Umu); private: + // here fixing the 4 dimensions, make it more general? + + // Field strengths + GaugeLinkField Bx, By, Bz, Ex, Ey, Ez; + RealD csw; // Clover coefficient + + + // Methods + void AddCloverTerm(const FermionField& in, FermionField& out); }; } } From 5e549ebd8b645ba6b7c39548b1ded42b629c6011 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 27 Mar 2017 16:43:15 +0900 Subject: [PATCH 004/377] Adding force terms --- lib/qcd/QCD.h | 6 ++- lib/qcd/action/fermion/WilsonCloverFermion.cc | 29 ++++++++++- lib/qcd/action/fermion/WilsonCloverFermion.h | 6 ++- lib/qcd/utils/WilsonLoops.h | 50 +++++++++++++++++++ 4 files changed, 88 insertions(+), 3 deletions(-) diff --git a/lib/qcd/QCD.h b/lib/qcd/QCD.h index f434bdd9..1c24ecdd 100644 --- a/lib/qcd/QCD.h +++ b/lib/qcd/QCD.h @@ -35,7 +35,11 @@ namespace Grid{ namespace QCD { - + static const int Xdir = 0; + static const int Ydir = 1; + static const int Zdir = 2; + static const int Tdir = 3; + static const int Xp = 0; static const int Yp = 1; static const int Zp = 2; diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index 0cc82f62..b94c72c0 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -76,6 +76,12 @@ namespace QCD { void WilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { this->ImportGauge(_Umu); // Compute the field strength terms + WilsonLoops::FieldStrength(Bx, _Umu, Ydir, Zdir); + WilsonLoops::FieldStrength(By, _Umu, Zdir, Xdir); + WilsonLoops::FieldStrength(Bz, _Umu, Xdir, Ydir); + WilsonLoops::FieldStrength(Ex, _Umu, Tdir, Xdir); + WilsonLoops::FieldStrength(Ey, _Umu, Tdir, Ydir); + WilsonLoops::FieldStrength(Ez, _Umu, Tdir, Zdir); // Invert the Moo, Mee terms (?) } @@ -84,7 +90,7 @@ namespace QCD { template void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) { out.checkerboard = in.checkerboard; - + assert(0); // to be completed } template @@ -100,6 +106,27 @@ namespace QCD { assert(0); // not implemented yet } + // Derivative parts + template + void WilsonCloverFermion::MDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag){ + GaugeField tmp(mat._grid); + this->DhopDeriv(mat, U, V, dag); + MooDeriv(tmp, U, V, dag); + mat += tmp; + } + + // Derivative parts + template + void WilsonCloverFermion::MooDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag){ + assert(0); // not implemented yet + } + + // Derivative parts + template + void WilsonCloverFermion::MeeDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag){ + assert(0); // not implemented yet + } + FermOpTemplateInstantiate(WilsonCloverFermion); } diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index d3785cac..e942de1f 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -61,7 +61,6 @@ public: { csw = _csw; assert(Nd == 4); // require 4 dimensions - } virtual RealD M(const FermionField& in, FermionField& out); @@ -72,6 +71,11 @@ public: virtual void MooeeInv(const FermionField &in, FermionField &out); virtual void MooeeInvDag(const FermionField &in, FermionField &out); + virtual void MDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag); + virtual void MooDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag); + virtual void MeeDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag); + + void ImportGauge(const GaugeField &_Umu); private: // here fixing the 4 dimensions, make it more general? diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index 03d45c07..ca2b2b8d 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -256,6 +256,56 @@ public: } } + //////////////////////////////////////////////////////////////////////// + // the sum over all staples on each site in direction mu,nu, lower part + //////////////////////////////////////////////////////////////////////// + static void StapleLower(GaugeMat &staple, const GaugeLorentz &Umu, int mu, + int nu) { + if (nu != mu) { + GridBase *grid = Umu._grid; + + std::vector U(Nd, grid); + for (int d = 0; d < Nd; d++) { + U[d] = PeekIndex(Umu, d);// some redundant copies + } + + // mu + // ^ + // |__> nu + + // __ + // | + // |__ + // + // + staple = Gimpl::ShiftStaple( + Gimpl::CovShiftBackward(U[nu], nu, + Gimpl::CovShiftBackward(U[mu], mu, U[nu])), + mu); + } + } + + ////////////////////////////////////////////////////// + // Field Strength + ////////////////////////////////////////////////////// + static void FieldStrength(GaugeMat &FS, const GaugeLorentz &Umu, int mu, int nu){ + // Fmn +--<--+ Ut +--<--+ + // | | | | + // (x)+-->--+ +-->--+(x) + // | | | | + // +--<--+ +--<--+ + + GaugeMat Vup(Umu._grid), Vdn(Umu._grid); + StapleUpper(Vup, Umu, mu, nu);// coalesce these two (up low) + StapleLower(Vdn, Umu, mu, nu); + GaugeMat v = adj(Vup) - adj(Vdn); + GaugeMat u = PeekIndex(Umu, mu); // some redundant copies + GaugeMat vu = v*u; + FS = 0.25*Ta(u*v - Cshift(vu, mu, +1)); + } + + + ////////////////////////////////////////////////////// // Similar to above for rectangle is required ////////////////////////////////////////////////////// From 3750b9ffeefb31e8bfb9b30b2266b83acdc7d4ff Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 27 Mar 2017 16:53:32 +0900 Subject: [PATCH 005/377] Deleting MPI test for OSX in Travis --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index ae3efda8..055adee1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -102,5 +102,3 @@ script: - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto - make -j4 - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi - From 1ed69816b9ecb9838c0a9d613e08fc2fc042afa2 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 30 Mar 2017 11:14:27 +0100 Subject: [PATCH 006/377] First steps for the force term --- lib/qcd/action/fermion/WilsonCloverFermion.cc | 13 ++++++++++++- lib/qcd/utils/WilsonLoops.h | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index b94c72c0..7e51dcfe 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -82,8 +82,10 @@ namespace QCD { WilsonLoops::FieldStrength(Ex, _Umu, Tdir, Xdir); WilsonLoops::FieldStrength(Ey, _Umu, Tdir, Ydir); WilsonLoops::FieldStrength(Ez, _Umu, Tdir, Zdir); + // Save the contracted term with sigma + // into a dense matrix site by site - // Invert the Moo, Mee terms (?) + // Invert the Moo, Mee terms (using Eigen) } @@ -110,6 +112,14 @@ namespace QCD { template void WilsonCloverFermion::MDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag){ GaugeField tmp(mat._grid); + + conformable(U._grid, _grid); + conformable(U._grid, V._grid); + conformable(U._grid, mat._grid); + + mat.checkerboard = U.checkerboard; + tmp.checkerboard = U.checkerboard; + this->DhopDeriv(mat, U, V, dag); MooDeriv(tmp, U, V, dag); mat += tmp; @@ -118,6 +128,7 @@ namespace QCD { // Derivative parts template void WilsonCloverFermion::MooDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag){ + assert(0); // not implemented yet } diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index ca2b2b8d..1984d2b8 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -301,7 +301,7 @@ public: GaugeMat v = adj(Vup) - adj(Vdn); GaugeMat u = PeekIndex(Umu, mu); // some redundant copies GaugeMat vu = v*u; - FS = 0.25*Ta(u*v - Cshift(vu, mu, +1)); + FS = 0.25*Ta(u*v + Cshift(vu, mu, +1));// need jsut the antihermite part } From b8ae787b5e3e50646085b5ea0dc74dd825dd9489 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 30 Mar 2017 11:33:15 +0100 Subject: [PATCH 007/377] Correcting a simple typo --- lib/qcd/action/fermion/WilsonCloverFermion.cc | 1 - lib/qcd/utils/WilsonLoops.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index 7e51dcfe..5d5d6622 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -113,7 +113,6 @@ namespace QCD { void WilsonCloverFermion::MDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag){ GaugeField tmp(mat._grid); - conformable(U._grid, _grid); conformable(U._grid, V._grid); conformable(U._grid, mat._grid); diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index 1984d2b8..143b283a 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -301,7 +301,7 @@ public: GaugeMat v = adj(Vup) - adj(Vdn); GaugeMat u = PeekIndex(Umu, mu); // some redundant copies GaugeMat vu = v*u; - FS = 0.25*Ta(u*v + Cshift(vu, mu, +1));// need jsut the antihermite part + FS = 0.25*Ta(u*v + Cshift(vu, mu, +1)); } From 6fd82228bfcc949e60ba229600d44c251caffd38 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 5 Apr 2017 10:51:44 +0100 Subject: [PATCH 008/377] Working on the derivative --- lib/qcd/action/fermion/WilsonCloverFermion.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index 5d5d6622..e588de42 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -127,7 +127,7 @@ namespace QCD { // Derivative parts template void WilsonCloverFermion::MooDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag){ - + // Compute the 8 terms of the derivative assert(0); // not implemented yet } From ac1253bb764fe3424f1afa87dea4ed349ef24dd9 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Mon, 10 Apr 2017 17:42:55 +0100 Subject: [PATCH 009/377] Corrected solver in rare kaon test --- tests/hadrons/Test_hadrons_rarekaon.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/hadrons/Test_hadrons_rarekaon.cc b/tests/hadrons/Test_hadrons_rarekaon.cc index 26622525..7c76312d 100644 --- a/tests/hadrons/Test_hadrons_rarekaon.cc +++ b/tests/hadrons/Test_hadrons_rarekaon.cc @@ -109,11 +109,10 @@ int main(int argc, char *argv[]) application.createModule("DWF_" + flavour[i], actionPar); // solvers - // RBPrecCG -> CG MSolver::CG::Par solverPar; solverPar.action = "DWF_" + flavour[i]; solverPar.residual = 1.0e-8; - application.createModule(solvers[i], + application.createModule(solvers[i], solverPar); } From af2d6ce2e08d54ea7a46e5eeb56e0374905d9343 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 10 Mar 2017 14:59:11 +0000 Subject: [PATCH 010/377] Encapsulated 4D->5D and 5D->4D conversions in separate functions & added corresponding tests. --- extras/Hadrons/Modules/Quark.hpp | 32 ++++-- tests/hadrons/Test_hadrons_quark.cc | 156 ++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+), 9 deletions(-) create mode 100644 tests/hadrons/Test_hadrons_quark.cc diff --git a/extras/Hadrons/Modules/Quark.hpp b/extras/Hadrons/Modules/Quark.hpp index be7426ab..c08e0192 100644 --- a/extras/Hadrons/Modules/Quark.hpp +++ b/extras/Hadrons/Modules/Quark.hpp @@ -36,6 +36,27 @@ See the full license in the file "LICENSE" in the top level distribution directo BEGIN_HADRONS_NAMESPACE +/****************************************************************************** + * 5D -> 4D and 4D -> 5D conversions. * + ******************************************************************************/ +template // Note that 5D object is modified. +inline void make_4D(Lattice &in_5d, Lattice &out_4d, int Ls) +{ + axpby_ssp_pminus(in_5d, 0., in_5d, 1., in_5d, 0, 0); + axpby_ssp_pplus(in_5d, 1., in_5d, 1., in_5d, 0, Ls-1); + ExtractSlice(out_4d, in_5d, 0, 0); +} + +template +inline void make_5D(const Lattice &in_4d, Lattice &out_5d, int Ls) +{ + out_5d = zero; + InsertSlice(in_4d, out_5d, 0, 0); + InsertSlice(in_4d, out_5d, Ls-1, 0); + axpby_ssp_pplus(out_5d, 0., out_5d, 1., out_5d, 0, 0); + axpby_ssp_pminus(out_5d, 0., out_5d, 1., out_5d, Ls-1, Ls-1); +} + /****************************************************************************** * TQuark * ******************************************************************************/ @@ -143,12 +164,8 @@ void TQuark::execute(void) } else { - source = zero; PropToFerm(tmp, fullSrc, s, c); - InsertSlice(tmp, source, 0, 0); - InsertSlice(tmp, source, Ls_-1, 0); - axpby_ssp_pplus(source, 0., source, 1., source, 0, 0); - axpby_ssp_pminus(source, 0., source, 1., source, Ls_-1, Ls_-1); + make_5D(tmp, source, Ls_); } } // source conversion for 5D sources @@ -171,10 +188,7 @@ void TQuark::execute(void) { PropagatorField &p4d = *env().template getObject(getName()); - - axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); - axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); - ExtractSlice(tmp, sol, 0, 0); + make_4D(sol, tmp, Ls_); FermToProp(p4d, tmp, s, c); } } diff --git a/tests/hadrons/Test_hadrons_quark.cc b/tests/hadrons/Test_hadrons_quark.cc new file mode 100644 index 00000000..6a142ff6 --- /dev/null +++ b/tests/hadrons/Test_hadrons_quark.cc @@ -0,0 +1,156 @@ +/******************************************************************************* + Grid physics library, www.github.com/paboyle/Grid + + Source file: tests/hadrons/Test_hadrons_quark.cc + + Copyright (C) 2017 + + Author: Andrew Lawson + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution + directory. + *******************************************************************************/ + +#include "Test_hadrons.hpp" +#include + +using namespace Grid; +using namespace QCD; +using namespace Hadrons; + +/******************************************************************************* + * Unit test functions within Quark module. + ******************************************************************************/ + +// Alternative 4D & 5D projections +template +inline void make_4D_with_gammas(Lattice &in_5d, Lattice &out_4d, int Ls) +{ + GridBase *_grid(out_4d._grid); + Lattice tmp(_grid); + Gamma G5(Gamma::Algebra::Gamma5); + + ExtractSlice(tmp, in_5d, 0, 0); + out_4d = 0.5 * (tmp - G5*tmp); + ExtractSlice(tmp, in_5d, Ls - 1, 0); + out_4d += 0.5 * (tmp + G5*tmp); +} + +template +inline void make_5D_with_gammas(Lattice &in_4d, Lattice &out_5d, int Ls) +{ + out_5d = zero; + Gamma G5(Gamma::Algebra::Gamma5); + GridBase *_grid(in_4d._grid); + Lattice tmp(_grid); + + tmp = 0.5 * (in_4d + G5*in_4d); + InsertSlice(tmp, out_5d, 0, 0); + tmp = 0.5 * (in_4d - G5*in_4d); + InsertSlice(tmp, out_5d, Ls - 1, 0); +} + +int main(int argc, char **argv) +{ + /*************************************************************************** + * Initialisation. + **************************************************************************/ + Grid_init(&argc, &argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + const int Ls = 8; + + GridCartesian UGrid(latt_size,simd_layout,mpi_layout); + GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, &UGrid); + GridSerialRNG sRNG; + GridParallelRNG pRNG(&UGrid); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG rng4(&UGrid); + GridParallelRNG rng5(FGrid); + rng4.SeedFixedIntegers(seeds4); + rng5.SeedFixedIntegers(seeds5); + + /*************************************************************************** + * Build a 4D random source, and convert it to 5D. + **************************************************************************/ + LatticeFermion test4(&UGrid); + LatticeFermion test5(FGrid); + LatticeFermion check5(FGrid); + + gaussian(rng4, test4); + make_5D(test4, test5, Ls); + make_5D_with_gammas(test4, check5, Ls); + test5 -= check5; + std::cout << "4D -> 5D comparison, diff = " << Grid::sqrt(norm2(test5)) << std::endl; + + /*************************************************************************** + * Build a 5D random source, and project down to 4D. + **************************************************************************/ + LatticeFermion check4(&UGrid); + gaussian(rng5, test5); + check5 = test5; + + make_4D(test5, test4, Ls); + make_4D_with_gammas(check5, check4, Ls); + test4 -= check4; + std::cout << "5D -> 4D comparison, diff = " << Grid::sqrt(norm2(test4)) << std::endl; + + /*************************************************************************** + * Convert a propagator to a fermion & back. + **************************************************************************/ + LatticeFermion ferm(&UGrid); + LatticePropagator prop(&UGrid), ref(&UGrid); + gaussian(rng4, prop); + + // Define variables for sanity checking a single site. + typename SpinColourVector::scalar_object fermSite; + typename SpinColourMatrix::scalar_object propSite; + std::vector site(Nd, 0); + + for (int s = 0; s < Ns; ++s) + for (int c = 0; c < Nc; ++c) + { + ref = prop; + PropToFerm(ferm, prop, s, c); + FermToProp(prop, ferm, s, c); + + std::cout << "Spin = " << s << ", Colour = " << c << std::endl; + ref -= prop; + std::cout << "Prop->Ferm->Prop test, diff = " << Grid::sqrt(norm2(ref)) << std::endl; + + peekSite(fermSite, ferm, site); + peekSite(propSite, prop, site); + for (int s2 = 0; s2 < Ns; ++s2) + for (int c2 = 0; c2 < Nc; ++c2) + { + //if (propSite()(s2, s)(c2, c) != fermSite()(s2)(c2)) + //{ + std::cout << propSite()(s2, s)(c2, c) << " != " + << fermSite()(s2)(c2) << " for spin = " << s2 + << ", col = " << c2 << std::endl; + //} + } + } + + Grid_finalize(); + return EXIT_SUCCESS; +} From c382c351a5d088f6af5734ab5dbc11aec53e3cf4 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 10 Mar 2017 15:05:59 +0000 Subject: [PATCH 011/377] Quark test output correction. --- tests/hadrons/Test_hadrons_quark.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/hadrons/Test_hadrons_quark.cc b/tests/hadrons/Test_hadrons_quark.cc index 6a142ff6..5b9d0ce1 100644 --- a/tests/hadrons/Test_hadrons_quark.cc +++ b/tests/hadrons/Test_hadrons_quark.cc @@ -142,12 +142,12 @@ int main(int argc, char **argv) for (int s2 = 0; s2 < Ns; ++s2) for (int c2 = 0; c2 < Nc; ++c2) { - //if (propSite()(s2, s)(c2, c) != fermSite()(s2)(c2)) - //{ + if (propSite()(s2, s)(c2, c) != fermSite()(s2)(c2)) + { std::cout << propSite()(s2, s)(c2, c) << " != " << fermSite()(s2)(c2) << " for spin = " << s2 << ", col = " << c2 << std::endl; - //} + } } } From b69499630276813e217ac211e765d13c9a4e16a0 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 14 Apr 2017 13:30:14 +0100 Subject: [PATCH 012/377] adding comments --- lib/qcd/action/fermion/WilsonCloverFermion.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index e588de42..c7fd211d 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -128,6 +128,17 @@ namespace QCD { template void WilsonCloverFermion::MooDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag){ // Compute the 8 terms of the derivative + + // Pseudocode + // Using Chroma as a template + + // for loop on mu and nu, but upper matrix + // take the outer product factor * U x (sigma_mu_nu V) + + // derivative of loops + // end of loop + + assert(0); // not implemented yet } From 1425afc72feae364a2629f30eb6c34783d6374eb Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Tue, 25 Apr 2017 17:26:56 +0100 Subject: [PATCH 013/377] Rare Kaon test fix --- tests/hadrons/Test_hadrons_rarekaon.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/hadrons/Test_hadrons_rarekaon.cc b/tests/hadrons/Test_hadrons_rarekaon.cc index 7c76312d..9d35c1bc 100644 --- a/tests/hadrons/Test_hadrons_rarekaon.cc +++ b/tests/hadrons/Test_hadrons_rarekaon.cc @@ -109,7 +109,7 @@ int main(int argc, char *argv[]) application.createModule("DWF_" + flavour[i], actionPar); // solvers - MSolver::CG::Par solverPar; + MSolver::RBPrecCG::Par solverPar; solverPar.action = "DWF_" + flavour[i]; solverPar.residual = 1.0e-8; application.createModule(solvers[i], From 44260643f6b2d61d8ea6a2543c67e08a76dab748 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Tue, 25 Apr 2017 18:00:24 +0100 Subject: [PATCH 014/377] First conserved current implementation for Wilson fermions only. Not implemented for Gparity or 5D-vectorised Wilson fermions. --- lib/qcd/QCD.h | 8 ++ lib/qcd/action/fermion/FermionOperator.h | 15 ++++ .../fermion/ImprovedStaggeredFermion.cc | 25 ++++++ .../action/fermion/ImprovedStaggeredFermion.h | 16 ++++ .../fermion/ImprovedStaggeredFermion5D.cc | 24 +++++ .../fermion/ImprovedStaggeredFermion5D.h | 15 ++++ lib/qcd/action/fermion/WilsonFermion.cc | 47 ++++++++++ lib/qcd/action/fermion/WilsonFermion.h | 16 ++++ lib/qcd/action/fermion/WilsonFermion5D.cc | 77 ++++++++++++++++ lib/qcd/action/fermion/WilsonFermion5D.h | 15 ++++ lib/qcd/action/fermion/WilsonKernels.cc | 89 +++++++++++++++++++ lib/qcd/action/fermion/WilsonKernels.h | 18 ++++ 12 files changed, 365 insertions(+) diff --git a/lib/qcd/QCD.h b/lib/qcd/QCD.h index 6e6144da..c66c7b13 100644 --- a/lib/qcd/QCD.h +++ b/lib/qcd/QCD.h @@ -489,6 +489,14 @@ namespace QCD { return traceIndex(lhs); } + ////////////////////////////////////////// + // Current types + ////////////////////////////////////////// + GRID_SERIALIZABLE_ENUM(Current, undef, + Vector, 0, + Axial, 1, + Tadpole, 2); + } //namespace QCD } // Grid diff --git a/lib/qcd/action/fermion/FermionOperator.h b/lib/qcd/action/fermion/FermionOperator.h index 676a0e83..144b70f6 100644 --- a/lib/qcd/action/fermion/FermionOperator.h +++ b/lib/qcd/action/fermion/FermionOperator.h @@ -112,6 +112,21 @@ namespace Grid { /////////////////////////////////////////////// virtual void ImportGauge(const GaugeField & _U)=0; + ////////////////////////////////////////////////////////////////////// + // Conserved currents, either contract at sink or insert sequentially. + ////////////////////////////////////////////////////////////////////// + virtual void ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu)=0; + virtual void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + std::vector mom, + unsigned int tmin, + unsigned int tmax)=0; }; } diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc b/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc index 2ba4f4af..ef8c79bd 100644 --- a/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc @@ -395,6 +395,31 @@ void ImprovedStaggeredFermion::DhopInternal(StencilImpl &st, LebesgueOrder } }; +//////////////////////////////////////////////////////// +// Conserved current - not yet implemented. +//////////////////////////////////////////////////////// +template +void ImprovedStaggeredFermion::ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu) +{ + assert(0); +} + +template +void ImprovedStaggeredFermion::SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + std::vector mom, + unsigned int tmin, + unsigned int tmax) +{ + assert(0); +} + FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion); //AdjointFermOpTemplateInstantiate(ImprovedStaggeredFermion); diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion.h b/lib/qcd/action/fermion/ImprovedStaggeredFermion.h index 7d1f2996..9d5270c6 100644 --- a/lib/qcd/action/fermion/ImprovedStaggeredFermion.h +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion.h @@ -157,6 +157,22 @@ class ImprovedStaggeredFermion : public StaggeredKernels, public ImprovedS LebesgueOrder Lebesgue; LebesgueOrder LebesgueEvenOdd; + + /////////////////////////////////////////////////////////////// + // Conserved current utilities + /////////////////////////////////////////////////////////////// + void ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu); + void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + std::vector mom, + unsigned int tmin, + unsigned int tmax); }; typedef ImprovedStaggeredFermion ImprovedStaggeredFermionF; diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc index 61a3c559..293077f7 100644 --- a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc @@ -345,6 +345,30 @@ void ImprovedStaggeredFermion5D::MooeeInvDag(const FermionField &in, MooeeInv(in, out); } +//////////////////////////////////////////////////////// +// Conserved current - not yet implemented. +//////////////////////////////////////////////////////// +template +void ImprovedStaggeredFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu) +{ + assert(0); +} + +template +void ImprovedStaggeredFermion5D::SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + std::vector mom, + unsigned int tmin, + unsigned int tmax) +{ + assert(0); +} FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion5D); FermOpStaggeredVec5dTemplateInstantiate(ImprovedStaggeredFermion5D); diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h index 4961da49..1c540892 100644 --- a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h @@ -160,6 +160,21 @@ namespace QCD { // Comms buffer std::vector > comm_buf; + /////////////////////////////////////////////////////////////// + // Conserved current utilities + /////////////////////////////////////////////////////////////// + void ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu); + void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + std::vector mom, + unsigned int tmin, + unsigned int tmax); }; }} diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index 32083d5e..839f5215 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -347,6 +347,53 @@ void WilsonFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, } }; +/******************************************************************************* + * Conserved current utilities for Wilson fermions, for contracting propagators + * to make a conserved current sink or inserting the conserved current + * sequentially. + ******************************************************************************/ +template +void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu) +{ + Gamma g5(Gamma::Algebra::Gamma5); + conformable(_grid, q_in_1._grid); + conformable(_grid, q_in_2._grid); + conformable(_grid, q_out._grid); + Kernels::ContractConservedCurrentInternal(q_in_1, q_in_2, q_out, + Umu, curr_type, mu); +} + +template +void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + std::vector mom, + unsigned int tmin, + unsigned int tmax) +{ + conformable(_grid, q_in._grid); + conformable(_grid, q_out._grid); + Lattice> ph(_grid), coor(_grid); + Complex i(0.0,1.0); + + // Momentum projection + ph = zero; + for(unsigned int mu = 0; mu < Nd - 1; mu++) + { + LatticeCoordinate(coor, mu); + ph = ph + mom[mu]*coor*((1./(_grid->_fdimensions[mu]))); + } + ph = exp((Real)(2*M_PI)*i*ph); + + Kernels::SeqConservedCurrentInternal(q_in, q_out, Umu, curr_type, mu, ph, + tmin, tmax); +} + FermOpTemplateInstantiate(WilsonFermion); AdjointFermOpTemplateInstantiate(WilsonFermion); TwoIndexFermOpTemplateInstantiate(WilsonFermion); diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h index 933be732..feba40ed 100644 --- a/lib/qcd/action/fermion/WilsonFermion.h +++ b/lib/qcd/action/fermion/WilsonFermion.h @@ -146,6 +146,22 @@ class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { LebesgueOrder Lebesgue; LebesgueOrder LebesgueEvenOdd; + + /////////////////////////////////////////////////////////////// + // Conserved current utilities + /////////////////////////////////////////////////////////////// + void ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu); + void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + std::vector mom, + unsigned int tmin, + unsigned int tmax); }; typedef WilsonFermion WilsonFermionF; diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 88bc425a..d0d3d055 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -679,6 +679,83 @@ void WilsonFermion5D::MomentumSpacePropagatorHw(FermionField &out,const Fe } +/******************************************************************************* + * Conserved current utilities for Wilson fermions, for contracting propagators + * to make a conserved current sink or inserting the conserved current + * sequentially. + ******************************************************************************/ +template +void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu) +{ + conformable(q_in_1._grid, FermionGrid()); + conformable(q_in_1._grid, q_in_2._grid); + conformable(_FourDimGrid, q_out._grid); + + PropagatorField q1_s(_FourDimGrid); + PropagatorField q2_s(_FourDimGrid); + PropagatorField tmp(_FourDimGrid); + + // Contract across 5th dimension. + q_out = zero; + for (int s = 0; s < Ls; ++s) + { + ExtractSlice(q1_s, q_in_1, 0, s); + ExtractSlice(q2_s, q_in_2, 0, Ls - s - 1); + Kernels::ContractConservedCurrentInternal(q1_s, q2_s, tmp, Umu, curr_type, mu); + + // Axial current sign + Real G_s = (curr_type == Current::Axial) ? ((s < Ls/2) ? -1. : 1.) : 1.; + q_out += G_s*tmp; + } +} + + +template +void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + std::vector mom, + unsigned int tmin, + unsigned int tmax) +{ + conformable(q_in._grid, FermionGrid()); + conformable(q_in._grid, q_out._grid); + Lattice> ph(_FourDimGrid), coor(_FourDimGrid); + Complex i(0.0, 1.0); + + // Momentum projection + ph = zero; + for(unsigned int nu = 0; nu < Nd - 1; nu++) + { + LatticeCoordinate(coor, nu); + ph = ph + mom[nu]*coor*((1./(_FourDimGrid->_fdimensions[nu]))); + } + ph = exp((Real)(2*M_PI)*i*ph); + + // Sequential insertion + Kernels::SeqConservedCurrentInternal(q_in, q_out, Umu, curr_type, + mu, ph, tmin, tmax); + + // Axial current sign. + if (curr_type == Current::Axial) + { + SitePropagator result; + parallel_for(int sU = 0; sU < Umu._grid->oSites(); sU++) + { + int sF = sU * Ls; + for (int s = 0; s < Ls/2; s++) + { + vstream(q_out._odata[sF], -q_out._odata[sF]); + sF++; + } + } + } +} FermOpTemplateInstantiate(WilsonFermion5D); GparityFermOpTemplateInstantiate(WilsonFermion5D); diff --git a/lib/qcd/action/fermion/WilsonFermion5D.h b/lib/qcd/action/fermion/WilsonFermion5D.h index e87e927e..d66f4a1d 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.h +++ b/lib/qcd/action/fermion/WilsonFermion5D.h @@ -214,6 +214,21 @@ namespace QCD { // Comms buffer std::vector > comm_buf; + /////////////////////////////////////////////////////////////// + // Conserved current utilities + /////////////////////////////////////////////////////////////// + void ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu); + void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + std::vector mom, + unsigned int tmin, + unsigned int tmax); }; }} diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 6e72e089..fbf8dc00 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -567,6 +567,95 @@ void WilsonKernels::DhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHal vstream(out._odata[sF], result); } +/******************************************************************************* + * Conserved current utilities for Wilson fermions, for contracting propagators + * to make a conserved current sink or inserting the conserved current + * sequentially. Common to both 4D and 5D. + ******************************************************************************/ +#define WilsonCurrentFwd(expr, mu) (0.5*(Gamma::gmu[mu]*expr - expr)) +#define WilsonCurrentBwd(expr, mu) (0.5*(Gamma::gmu[mu]*expr + expr)) + +template +void WilsonKernels::ContractConservedCurrentInternal(const PropagatorField &q_in_1, + const PropagatorField &q_in_2, + PropagatorField &q_out, + DoubledGaugeField &U, + Current curr_type, + unsigned int mu) +{ + Gamma g5(Gamma::Algebra::Gamma5); + PropagatorField tmp(q_out._grid); + GaugeLinkField Umu(U._grid); + Umu = PeekIndex(U, mu); + + tmp = this->CovShiftForward(Umu, mu, q_in_1); + q_out = (g5*adj(q_in_2)*g5)*WilsonCurrentFwd(tmp, mu); + + tmp = adj(Umu)*q_in_1; + q_out += (g5*adj(this->CovShiftForward(Umu, mu, q_in_2))*g5)*WilsonCurrentBwd(q_in_1, mu); +} + + +template +void WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_in, + PropagatorField &q_out, + DoubledGaugeField &U, + Current curr_type, + unsigned int mu, + Lattice> &ph, + unsigned int tmin, + unsigned int tmax) +{ + int tshift = (mu == Nd - 1) ? 1 : 0; + Real G_T = (curr_type == Current::Tadpole) ? -1. : 1.; + PropagatorField tmp(q_in._grid); + GaugeLinkField Umu(U._grid); + Umu = PeekIndex(U, mu); + Lattice> t(q_in._grid); + + tmp = this->CovShiftForward(Umu, mu, q_in)*ph; + where((t >= tmin) and (t <= tmax), tmp, 0.*tmp); + q_out = G_T*WilsonCurrentFwd(tmp, mu); + + tmp = q_in*ph; + tmp = this->CovShiftBackward(Umu, mu, tmp); + where((t >= tmin + tshift) and (t <= tmax + tshift), tmp, 0.*tmp); + q_out += WilsonCurrentBwd(tmp, mu); +} + + +// GParity, (Z)DomainWallVec5D -> require special implementation +#define NO_CURR(Impl) \ +template <> void \ +WilsonKernels::ContractConservedCurrentInternal(const PropagatorField &q_in_1, \ + const PropagatorField &q_in_2, \ + PropagatorField &q_out, \ + DoubledGaugeField &U, \ + Current curr_type, \ + unsigned int mu) \ +{ \ + assert(0); \ +} \ +template <> void \ +WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_in, \ + PropagatorField &q_out, \ + DoubledGaugeField &U, \ + Current curr_type, \ + unsigned int mu, \ + Lattice> &ph, \ + unsigned int tmin, \ + unsigned int tmax) \ +{ \ + assert(0); \ +} + +NO_CURR(GparityWilsonImplF); +NO_CURR(GparityWilsonImplD); +NO_CURR(DomainWallVec5dImplF); +NO_CURR(DomainWallVec5dImplD); +NO_CURR(ZDomainWallVec5dImplF); +NO_CURR(ZDomainWallVec5dImplD); + FermOpTemplateInstantiate(WilsonKernels); AdjointFermOpTemplateInstantiate(WilsonKernels); TwoIndexFermOpTemplateInstantiate(WilsonKernels); diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index 20ee87f2..34820274 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -166,6 +166,24 @@ public: void DhopDir(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma); + ////////////////////////////////////////////////////////////////////////////// + // Utilities for inserting Wilson conserved current. + ////////////////////////////////////////////////////////////////////////////// + void ContractConservedCurrentInternal(const PropagatorField &q_in_1, + const PropagatorField &q_in_2, + PropagatorField &q_out, + DoubledGaugeField &U, + Current curr_type, + unsigned int mu); + void SeqConservedCurrentInternal(const PropagatorField &q_in, + PropagatorField &q_out, + DoubledGaugeField &U, + Current curr_type, + unsigned int mu, + Lattice> &ph, + unsigned int tmin, + unsigned int tmax); + private: // Specialised variants void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, From dc5a6404eadc237985a1b4ffac7b8a51760e6bc4 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Tue, 25 Apr 2017 22:08:33 +0100 Subject: [PATCH 015/377] Hadrons: modules for testing conserved current contractions and sequential insertion. --- extras/Hadrons/Modules.hpp | 3 + .../Modules/MContraction/WardIdentity.hpp | 151 ++++++++++++++++++ .../Modules/MContraction/WardIdentitySeq.hpp | 117 ++++++++++++++ .../Hadrons/Modules/MSource/SeqConserved.hpp | 129 +++++++++++++++ extras/Hadrons/modules.inc | 3 + 5 files changed, 403 insertions(+) create mode 100644 extras/Hadrons/Modules/MContraction/WardIdentity.hpp create mode 100644 extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp create mode 100644 extras/Hadrons/Modules/MSource/SeqConserved.hpp diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 05ad1697..67762246 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -32,6 +32,8 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include +#include +#include #include #include #include @@ -42,6 +44,7 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include +#include #include #include #include diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp new file mode 100644 index 00000000..39221148 --- /dev/null +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -0,0 +1,151 @@ +#ifndef Hadrons_WardIdentity_hpp_ +#define Hadrons_WardIdentity_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/* + Ward Identity contractions + ----------------------------- + + * options: + - q: propagator, 5D if available (string) + - q4d: 4D propagator, duplicate of q if q is not 5D (string) + - action: action module used for propagator solution (string) + - mass: mass of quark (double) +*/ + +/****************************************************************************** + * WardIdentity * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MContraction) + +class WardIdentityPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(WardIdentityPar, + std::string, q, + std::string, q4d, + std::string, action, + double, mass); +}; + +template +class TWardIdentity: public Module +{ +public: + TYPE_ALIASES(FImpl,); +public: + // constructor + TWardIdentity(const std::string name); + // destructor + virtual ~TWardIdentity(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +private: + unsigned int Ls_; +}; + +MODULE_REGISTER_NS(WardIdentity, TWardIdentity, MContraction); + +/****************************************************************************** + * TWardIdentity implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TWardIdentity::TWardIdentity(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TWardIdentity::getInput(void) +{ + std::vector in = {par().q, par().q4d, par().action}; + + return in; +} + +template +std::vector TWardIdentity::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TWardIdentity::setup(void) +{ + Ls_ = env().getObjectLs(par().q); +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TWardIdentity::execute(void) +{ + LOG(Message) << "Performing Ward Identity checks for quark '" << par().q + << "'." << std::endl; + + PropagatorField psi(env().getGrid()), tmp(env().getGrid()); + PropagatorField q = *env().template getObject(par().q); + PropagatorField q4d = *env().template getObject(par().q4d); + FMat &act = *(env().template getObject(par().action)); + Gamma g5(Gamma::Algebra::Gamma5); + LatticeComplex PP(env().getGrid()), PA(env().getGrid()), + c(env().getGrid()), PJ5q(env().getGrid()), + vector_WI(env().getGrid()), defect(env().getGrid()); + c = zero; PJ5q = zero; vector_WI = zero; defect = zero; + std::vector Vmu(Nd, c); + std::vector Amu(Nd, c); + + // Get PP, PA, V_mu, A_mu for 4D. + PP = trace(adj(q4d)*q4d); + PA = trace(adj(q4d)*g5*q4d); + for (unsigned int mu = 0; mu < Nd; ++mu) + { + act.ContractConservedCurrent(q, q, tmp, Current::Vector, mu); + Vmu[mu] = trace(g5*tmp); + act.ContractConservedCurrent(q, q, tmp, Current::Axial, mu); + Amu[mu] = trace(g5*tmp); + } + + // Get PJ5q for 5D (zero for 4D). + if (Ls_ > 1) + { + ExtractSlice(psi, q, 0, Ls_/2 - 1); + psi = 0.5 * (psi + g5*psi); + ExtractSlice(tmp, q, 0, Ls_/2); + psi += 0.5 * (tmp - g5*tmp); + PJ5q = trace(adj(psi)*psi); + } + + // Test ward identities, D_mu V_mu = 0; D_mu A_mu = 2m + 2 PJ5q + for (unsigned int mu = 0; mu < Nd; ++mu) + { + vector_WI += Vmu[mu] - Cshift(Vmu[mu], mu, -1); + defect += Amu[mu] - Cshift(Amu[mu], mu, -1); + } + defect -= 2.*PJ5q; + defect -= 2.*(par().mass)*PP; + + LOG(Message) << "Vector Ward Identity check Delta_mu V_mu = " + << norm2(vector_WI) << std::endl; + LOG(Message) << "Axial Ward Identity defect Delta_mu A_mu = " + << norm2(defect) << std::endl; +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_WardIdentity_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp b/extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp new file mode 100644 index 00000000..3e72c11e --- /dev/null +++ b/extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp @@ -0,0 +1,117 @@ +#ifndef Hadrons_WardIdentitySeq_hpp_ +#define Hadrons_WardIdentitySeq_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/* + Ward Identity contractions using sequential propagators. + ----------------------------- + + * options: + - q_x: propagator, mu = x current insertion (string). + - q_y: propagator, mu = y current insertion (string). + - q_z: propagator, mu = z current insertion (string). + - q_t: propagator, mu = t current insertion (string). +*/ + +/****************************************************************************** + * WardIdentitySeq * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MContraction) + +class WardIdentitySeqPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(WardIdentitySeqPar, + std::string, q_x, + std::string, q_y, + std::string, q_z, + std::string, q_t); +}; + +template +class TWardIdentitySeq: public Module +{ +public: + TYPE_ALIASES(FImpl,); +public: + // constructor + TWardIdentitySeq(const std::string name); + // destructor + virtual ~TWardIdentitySeq(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(WardIdentitySeq, TWardIdentitySeq, MContraction); + +/****************************************************************************** + * TWardIdentitySeq implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TWardIdentitySeq::TWardIdentitySeq(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TWardIdentitySeq::getInput(void) +{ + std::vector in = {par().q_x, par().q_y, par().q_z, par().q_t}; + + return in; +} + +template +std::vector TWardIdentitySeq::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TWardIdentitySeq::setup(void) +{ + +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TWardIdentitySeq::execute(void) +{ + LatticeComplex vector_WI(env().getGrid()), c(env().getGrid()); + PropagatorField q_x = *env().template getObject(par().q_x); + PropagatorField q_y = *env().template getObject(par().q_y); + PropagatorField q_z = *env().template getObject(par().q_z); + PropagatorField q_t = *env().template getObject(par().q_t); + PropagatorField *q[Nd] = {&q_x, &q_y, &q_z, &q_t}; + Gamma g5(Gamma::Algebra::Gamma5); + + // Check D_mu V_mu = 0 + for (unsigned int mu = 0; mu < Nd; ++mu) + { + c = trace(g5*(*q[mu])); + vector_WI += c - Cshift(c, mu, -1); + } + + LOG(Message) << "Ward Identity checks for sequential vector current " + << "insertion = " << norm2(vector_WI) << std::endl; +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_WardIdentitySeq_hpp_ diff --git a/extras/Hadrons/Modules/MSource/SeqConserved.hpp b/extras/Hadrons/Modules/MSource/SeqConserved.hpp new file mode 100644 index 00000000..7d4974f4 --- /dev/null +++ b/extras/Hadrons/Modules/MSource/SeqConserved.hpp @@ -0,0 +1,129 @@ +#ifndef Hadrons_SeqConserved_hpp_ +#define Hadrons_SeqConserved_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/* + + Sequential source + ----------------------------- + * src_x = q_x * theta(x_3 - tA) * theta(tB - x_3) * J_mu * exp(i x.mom) + + * options: + - q: input propagator (string) + - action: fermion action used for propagator q (string) + - tA: begin timeslice (integer) + - tB: end timesilce (integer) + - curr_type: type of conserved current to insert (Current) + - mu: Lorentz index of current to insert (integer) + - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.") + + */ + +/****************************************************************************** + * SeqConserved * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MSource) + +class SeqConservedPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(SeqConservedPar, + std::string, q, + std::string, action, + unsigned int, tA, + unsigned int, tB, + Current, curr_type, + unsigned int, mu, + std::string, mom); +}; + +template +class TSeqConserved: public Module +{ +public: + TYPE_ALIASES(FImpl,); +public: + // constructor + TSeqConserved(const std::string name); + // destructor + virtual ~TSeqConserved(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(SeqConserved, TSeqConserved, MSource); + +/****************************************************************************** + * TSeqConserved implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TSeqConserved::TSeqConserved(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TSeqConserved::getInput(void) +{ + std::vector in; + + return in; +} + +template +std::vector TSeqConserved::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TSeqConserved::setup(void) +{ + +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TSeqConserved::execute(void) +{ + if (par().tA == par().tB) + { + LOG(Message) << "Generating sequential source with conserved " + << par().curr_type << " current insertion (mu = " + << par().mu << ") at " << "t = " << par().tA << std::endl; + } + else + { + LOG(Message) << "Generating sequential source with conserved " + << par().curr_type << " current insertion (mu = " + << par().mu << ") for " << par().tA << " <= t <= " + << par().tB << std::endl; + } + PropagatorField &src = *env().template createLattice(getName()); + PropagatorField &q = *env().template getObject(par().q); + FMat &mat = *(env().template getObject(par().action)); + + std::vector mom = strToVec(par().mom); + mat.SeqConservedCurrent(q, src, par().curr_type, par().mu, + mom, par().tA, par().tB); +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_SeqConserved_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index af291631..32655c3b 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -13,6 +13,8 @@ modules_hpp =\ Modules/MContraction/DiscLoop.hpp \ Modules/MContraction/Gamma3pt.hpp \ Modules/MContraction/Meson.hpp \ + Modules/MContraction/WardIdentity.hpp \ + Modules/MContraction/WardIdentitySeq.hpp \ Modules/MContraction/WeakHamiltonian.hpp \ Modules/MContraction/WeakHamiltonianEye.hpp \ Modules/MContraction/WeakHamiltonianNonEye.hpp \ @@ -23,6 +25,7 @@ modules_hpp =\ Modules/MLoop/NoiseLoop.hpp \ Modules/MSolver/RBPrecCG.hpp \ Modules/MSource/Point.hpp \ + Modules/MSource/SeqConserved.hpp \ Modules/MSource/SeqGamma.hpp \ Modules/MSource/Wall.hpp \ Modules/MSource/Z2.hpp \ From 159770e21ba2515d145bb331305593474ce33b01 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Wed, 26 Apr 2017 09:32:57 +0100 Subject: [PATCH 016/377] Legal Banners added --- .../Modules/MContraction/WardIdentity.hpp | 28 +++++++++++++++++++ .../Modules/MContraction/WardIdentitySeq.hpp | 28 +++++++++++++++++++ .../Hadrons/Modules/MSource/SeqConserved.hpp | 28 +++++++++++++++++++ 3 files changed, 84 insertions(+) diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index 39221148..355126da 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -1,3 +1,31 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MContraction/WardIdentity.hpp + +Copyright (C) 2017 + +Author: Andrew Lawson + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + #ifndef Hadrons_WardIdentity_hpp_ #define Hadrons_WardIdentity_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp b/extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp index 3e72c11e..31409925 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp @@ -1,3 +1,31 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp + +Copyright (C) 2017 + +Author: Andrew Lawson + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + #ifndef Hadrons_WardIdentitySeq_hpp_ #define Hadrons_WardIdentitySeq_hpp_ diff --git a/extras/Hadrons/Modules/MSource/SeqConserved.hpp b/extras/Hadrons/Modules/MSource/SeqConserved.hpp index 7d4974f4..ccfb68f4 100644 --- a/extras/Hadrons/Modules/MSource/SeqConserved.hpp +++ b/extras/Hadrons/Modules/MSource/SeqConserved.hpp @@ -1,3 +1,31 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MContraction/SeqConserved.hpp + +Copyright (C) 2017 + +Author: Andrew Lawson + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + #ifndef Hadrons_SeqConserved_hpp_ #define Hadrons_SeqConserved_hpp_ From a39daecb62a0bf8a128f4650d311badaa1659fda Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Wed, 26 Apr 2017 12:39:07 +0100 Subject: [PATCH 017/377] Removed make_5D const declaration to avoid compilation error --- extras/Hadrons/Modules/Quark.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extras/Hadrons/Modules/Quark.hpp b/extras/Hadrons/Modules/Quark.hpp index c08e0192..fff34edf 100644 --- a/extras/Hadrons/Modules/Quark.hpp +++ b/extras/Hadrons/Modules/Quark.hpp @@ -48,7 +48,7 @@ inline void make_4D(Lattice &in_5d, Lattice &out_4d, int Ls) } template -inline void make_5D(const Lattice &in_4d, Lattice &out_5d, int Ls) +inline void make_5D(Lattice &in_4d, Lattice &out_5d, int Ls) { out_5d = zero; InsertSlice(in_4d, out_5d, 0, 0); From 6299dd35f57b03131447d70f7b8e7f002dc4cdf9 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Wed, 26 Apr 2017 12:41:39 +0100 Subject: [PATCH 018/377] Hadrons: Added test of conserved current code. Tests Ward identities for conserved vector and partially conserved axial currents. --- .../hadrons/Test_hadrons_conserved_current.cc | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 tests/hadrons/Test_hadrons_conserved_current.cc diff --git a/tests/hadrons/Test_hadrons_conserved_current.cc b/tests/hadrons/Test_hadrons_conserved_current.cc new file mode 100644 index 00000000..df774ac0 --- /dev/null +++ b/tests/hadrons/Test_hadrons_conserved_current.cc @@ -0,0 +1,127 @@ +/******************************************************************************* + Grid physics library, www.github.com/paboyle/Grid + + Source file: tests/hadrons/Test_hadrons_conserved_current.cc + + Copyright (C) 2017 + + Author: Andrew Lawson + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution + directory. + *******************************************************************************/ + +#include "Test_hadrons.hpp" + +using namespace Grid; +using namespace Hadrons; + +int main(int argc, char *argv[]) +{ + // initialization ////////////////////////////////////////////////////////// + Grid_init(&argc, &argv); + HadronsLogError.Active(GridLogError.isActive()); + HadronsLogWarning.Active(GridLogWarning.isActive()); + HadronsLogMessage.Active(GridLogMessage.isActive()); + HadronsLogIterative.Active(GridLogIterative.isActive()); + HadronsLogDebug.Active(GridLogDebug.isActive()); + LOG(Message) << "Grid initialized" << std::endl; + + // run setup /////////////////////////////////////////////////////////////// + Application application; + unsigned int nt = GridDefaultLatt()[Tp]; + double mass = 0.04; + + // global parameters + Application::GlobalPar globalPar; + globalPar.trajCounter.start = 1500; + globalPar.trajCounter.end = 1520; + globalPar.trajCounter.step = 20; + globalPar.seed = "1 2 3 4"; + globalPar.genetic.maxGen = 1000; + globalPar.genetic.maxCstGen = 200; + globalPar.genetic.popSize = 20; + globalPar.genetic.mutationRate = .1; + application.setPar(globalPar); + + // gauge field + application.createModule("gauge"); + + // action + std::string actionName = "DWF"; + MAction::DWF::Par actionPar; + actionPar.gauge = "gauge"; + actionPar.Ls = 12; + actionPar.M5 = 1.8; + actionPar.mass = mass; + application.createModule(actionName, actionPar); + + // solver + std::string solverName = "CG"; + MSolver::RBPrecCG::Par solverPar; + solverPar.action = actionName; + solverPar.residual = 1.0e-8; + application.createModule(solverName, + solverPar); + + // Conserved current sink contractions: use a single point propagator. + std::string pointProp = "q_0"; + std::string pos = "0 0 0 0"; + std::string modName = "Ward Identity Test"; + MAKE_POINT_PROP(pos, pointProp, solverName); + if (!(Environment::getInstance().hasModule(modName))) + { + MContraction::WardIdentity::Par wiPar; + wiPar.q = pointProp + "_5d"; + wiPar.q4d = pointProp; + wiPar.action = actionName; + wiPar.mass = mass; + application.createModule(modName, wiPar); + } + + // Conserved current contractions with sequential insertion of vector + // current. + std::string q_x = "q_x"; + std::string q_y = "q_y"; + std::string q_z = "q_z"; + std::string q_t = "q_t"; + std::string mom = ZERO_MOM; + modName = "Sequential Ward Identity Test"; + MAKE_SEQUENTIAL_PROP(nt/2, pointProp, mom, q_x, solverName); + MAKE_SEQUENTIAL_PROP(nt/2, pointProp, mom, q_y, solverName); + MAKE_SEQUENTIAL_PROP(nt/2, pointProp, mom, q_z, solverName); + MAKE_SEQUENTIAL_PROP(nt/2, pointProp, mom, q_t, solverName); + if (!(Environment::getInstance().hasModule(modName))) + { + MContraction::WardIdentitySeq::Par wiPar; + wiPar.q_x = q_x; + wiPar.q_y = q_y; + wiPar.q_z = q_z; + wiPar.q_t = q_t; + application.createModule(modName, wiPar); + } + + // execution + application.saveParameterFile("ConservedCurrentTest.xml"); + application.run(); + + // epilogue + LOG(Message) << "Grid is finalizing now" << std::endl; + Grid_finalize(); + + return EXIT_SUCCESS; +} \ No newline at end of file From d2003f24f49b68f49fd0cb5764ef32f6ef4cd498 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Wed, 26 Apr 2017 17:25:28 +0100 Subject: [PATCH 019/377] Corrected incorrect usage of ExtractSlice for conserved current code. --- extras/Hadrons/Modules/MContraction/WardIdentity.hpp | 4 ++-- lib/qcd/action/fermion/WilsonFermion5D.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index 355126da..41d8c6d1 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -150,9 +150,9 @@ void TWardIdentity::execute(void) // Get PJ5q for 5D (zero for 4D). if (Ls_ > 1) { - ExtractSlice(psi, q, 0, Ls_/2 - 1); + ExtractSlice(psi, q, Ls_/2 - 1, 0); psi = 0.5 * (psi + g5*psi); - ExtractSlice(tmp, q, 0, Ls_/2); + ExtractSlice(tmp, q, Ls_/2, 0); psi += 0.5 * (tmp - g5*tmp); PJ5q = trace(adj(psi)*psi); } diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index d0d3d055..99ff0dc1 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -703,8 +703,8 @@ void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, q_out = zero; for (int s = 0; s < Ls; ++s) { - ExtractSlice(q1_s, q_in_1, 0, s); - ExtractSlice(q2_s, q_in_2, 0, Ls - s - 1); + ExtractSlice(q1_s, q_in_1, s, 0); + ExtractSlice(q2_s, q_in_2, Ls - s - 1, 0); Kernels::ContractConservedCurrentInternal(q1_s, q2_s, tmp, Umu, curr_type, mu); // Axial current sign From a6ccbbe1080f8c7d2bd193e08ad38b97d0f5af1d Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 28 Apr 2017 10:43:47 +0100 Subject: [PATCH 020/377] Conserved current sequential source now registered properly and fixed module inputs. --- extras/Hadrons/Modules/MSource/SeqConserved.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/extras/Hadrons/Modules/MSource/SeqConserved.hpp b/extras/Hadrons/Modules/MSource/SeqConserved.hpp index ccfb68f4..6e5fb197 100644 --- a/extras/Hadrons/Modules/MSource/SeqConserved.hpp +++ b/extras/Hadrons/Modules/MSource/SeqConserved.hpp @@ -104,7 +104,7 @@ TSeqConserved::TSeqConserved(const std::string name) template std::vector TSeqConserved::getInput(void) { - std::vector in; + std::vector in = {par().q, par().action}; return in; } @@ -121,7 +121,8 @@ std::vector TSeqConserved::getOutput(void) template void TSeqConserved::setup(void) { - + auto Ls_ = env().getObjectLs(par().action); + env().template registerLattice(getName(), Ls_); } // execution /////////////////////////////////////////////////////////////////// From 5553b8d2b86a1fa4b44a304e4fab9ab3e8001f16 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 28 Apr 2017 15:23:34 +0100 Subject: [PATCH 021/377] Clover term compiles, not tested --- lib/qcd/action/fermion/Fermion.h | 2 +- lib/qcd/action/fermion/WilsonCloverFermion.cc | 144 ++++++---- lib/qcd/action/fermion/WilsonCloverFermion.h | 59 ++-- tests/core/Test_wilson_clover.cc | 251 ++++++++++++++++++ 4 files changed, 383 insertions(+), 73 deletions(-) create mode 100644 tests/core/Test_wilson_clover.cc diff --git a/lib/qcd/action/fermion/Fermion.h b/lib/qcd/action/fermion/Fermion.h index 2eaf42fa..9d999c6d 100644 --- a/lib/qcd/action/fermion/Fermion.h +++ b/lib/qcd/action/fermion/Fermion.h @@ -49,7 +49,7 @@ Author: Peter Boyle #include // 4d wilson like #include // 4d wilson like -#include // 4d wilson like +#include // 4d wilson clover fermions #include // 5d base used by all 5d overlap types #include diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index c7fd211d..ebea565b 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -27,28 +27,35 @@ *************************************************************************************/ /* END LEGAL */ #include +#include #include namespace Grid { namespace QCD { - template - void WilsonCloverFermion::AddCloverTerm(const FermionField& in, - FermionField& out){ - FermionField tmp(out._grid); - tmp = zero; - // the product sigma_munu Fmunu is hermitian - tmp += Bx*(Gamma(Gamma::Algebra::SigmaYZ)*in); - tmp += By*(Gamma(Gamma::Algebra::MinusSigmaXZ)*in); - tmp += Bz*(Gamma(Gamma::Algebra::SigmaXY)*in); - tmp += Ex*(Gamma(Gamma::Algebra::MinusSigmaXT)*in); - tmp += Ey*(Gamma(Gamma::Algebra::MinusSigmaYT)*in); - tmp += Ez*(Gamma(Gamma::Algebra::MinusSigmaZT)*in); - out += tmp*csw; // check signs - - } +//WilsonLoop::CloverPlaquette +///////////////////////////////////////////////////// +//// Clover plaquette combination in mu,nu plane with Double Stored U +//////////////////////////////////////////////////// +//static void CloverPlaquette(GaugeMat &Q, const std::vector &U, +// const int mu, const int nu){ +// Q = zero; +// Q += Gimpl::CovShiftBackward( +// U[mu], mu, Gimpl::CovShiftBackward( +// U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu] ))); +// Q += Gimpl::CovShiftForward( +// U[mu], mu, Gimpl::CovShiftForward( +// U[nu], nu, Gimpl::CovShiftBackward(U[mu], mu, U[nu+Nd] ))); +// Q += Gimpl::CovShiftBackward( +// U[nu], nu, Gimpl::CovShiftForward( +// U[mu], mu, Gimpl::CovShiftForward(U[nu], nu, U[mu+Nd] ))); +// Q += Gimpl::CovShiftForward( +// U[mu], mu, Gimpl::CovShiftBackward( +// U[nu], nu, Gimpl::CovShiftBackward(U[mu], mu, U[nu] ))); +// } +// *NOT* EO template RealD WilsonCloverFermion::M(const FermionField& in, FermionField& out) { // Wilson term @@ -56,7 +63,7 @@ namespace QCD { this->Dhop(in, out, DaggerNo); // Clover term // apply the sigma and Fmunu - AddCloverTerm(in, out); + Mooee(in, out); // overall factor return axpy_norm(out, 4 + this->mass, in, out); } @@ -68,13 +75,16 @@ namespace QCD { this->Dhop(in, out, DaggerYes); // Clover term // apply the sigma and Fmunu - AddCloverTerm(in, out); + MooeeDag(in, out); return axpy_norm(out, 4 + this->mass, in, out); } template void WilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { this->ImportGauge(_Umu); + GridBase* grid = _Umu._grid; + assert(Nd==4); //only works in 4 dim + typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid); // Compute the field strength terms WilsonLoops::FieldStrength(Bx, _Umu, Ydir, Zdir); WilsonLoops::FieldStrength(By, _Umu, Zdir, Xdir); @@ -82,31 +92,77 @@ namespace QCD { WilsonLoops::FieldStrength(Ex, _Umu, Tdir, Xdir); WilsonLoops::FieldStrength(Ey, _Umu, Tdir, Ydir); WilsonLoops::FieldStrength(Ez, _Umu, Tdir, Zdir); - // Save the contracted term with sigma - // into a dense matrix site by site - // Invert the Moo, Mee terms (using Eigen) + // Compute the Clover Operator acting on Colour and Spin + CloverTerm = fillClover(Bx)*(Gamma(Gamma::Algebra::SigmaYZ)); + CloverTerm += fillClover(By)*(Gamma(Gamma::Algebra::MinusSigmaXZ)); + CloverTerm += fillClover(Bz)*(Gamma(Gamma::Algebra::SigmaXY)); + CloverTerm += fillClover(Ex)*(Gamma(Gamma::Algebra::MinusSigmaXT)); + CloverTerm += fillClover(Ey)*(Gamma(Gamma::Algebra::MinusSigmaYT)); + CloverTerm += fillClover(Ez)*(Gamma(Gamma::Algebra::MinusSigmaZT)); + CloverTerm *= csw; + + + int lvol = _Umu._grid->lSites(); + int DimRep = Impl::Dimension; + + Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns*DimRep,Ns*DimRep); + Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns*DimRep,Ns*DimRep); + + std::vector lcoor; + typename SiteCloverType::scalar_object Qx = zero, Qxinv = zero; + + for (int site = 0; site < lvol; site++){ + grid->LocalIndexToLocalCoor(site,lcoor); + EigenCloverOp=Eigen::MatrixXcd::Zero(Ns*DimRep,Ns*DimRep); + peekLocalSite(Qx,CloverTerm,lcoor); + Qxinv = zero; + for(int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for(int a = 0; a < DimRep; a++) + for(int b = 0; b < DimRep; b++) + EigenCloverOp(a+j*DimRep,b+k*DimRep) = Qx()(j,k)(a,b); + + EigenInvCloverOp = EigenCloverOp.inverse(); + for(int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for(int a = 0; a < DimRep; a++) + for(int b = 0; b < DimRep; b++) + Qxinv()(j,k)(a,b) = EigenInvCloverOp(a+j*DimRep,b+k*DimRep); + + pokeLocalSite(Qxinv,CloverTermInv,lcoor); + } + } + + template + void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out){ + this -> MooeeInternal(in, out, DaggerNo, InverseNo); + } + + template + void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out){ + this -> MooeeInternal(in, out, DaggerNo, InverseYes); + } + + template + void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out){ + this -> MooeeInternal(in, out, DaggerNo, InverseYes); + } + + template + void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out){ + this -> MooeeInternal(in, out, DaggerNo, InverseYes); } template - void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; - assert(0); // to be completed - } + void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv){ + out.checkerboard = in.checkerboard; + CloverFieldType *Clover; - template - void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out) { - assert(0); // not implemented yet - } - template - void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out) { - assert(0); // not implemented yet - } - template - void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out) { - assert(0); // not implemented yet - } + Clover = (inv) ? &CloverTermInv : &CloverTerm; + if(dag){ out = adj(*Clover)*in;} else {out = *Clover*in;} + } // MooeeInternal // Derivative parts template @@ -128,17 +184,6 @@ namespace QCD { template void WilsonCloverFermion::MooDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag){ // Compute the 8 terms of the derivative - - // Pseudocode - // Using Chroma as a template - - // for loop on mu and nu, but upper matrix - // take the outer product factor * U x (sigma_mu_nu V) - - // derivative of loops - // end of loop - - assert(0); // not implemented yet } @@ -148,7 +193,10 @@ namespace QCD { assert(0); // not implemented yet } - FermOpTemplateInstantiate(WilsonCloverFermion); +FermOpTemplateInstantiate(WilsonCloverFermion); // now only for the fundamental representation +//AdjointFermOpTemplateInstantiate(WilsonCloverFermion); +//TwoIndexFermOpTemplateInstantiate(WilsonCloverFermion); +//GparityFermOpTemplateInstantiate(WilsonCloverFermion); } } diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index e942de1f..0fa0d57d 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -31,14 +31,20 @@ #include -namespace Grid { -namespace QCD { +namespace Grid +{ +namespace QCD +{ template -class WilsonCloverFermion : public WilsonFermion { +class WilsonCloverFermion : public WilsonFermion +{ public: + // Types definitions INHERIT_IMPL_TYPES(Impl); - + template using iImplClover = iScalar, Ns> >; + typedef iImplClover SiteCloverType; + typedef Lattice CloverFieldType; public: typedef WilsonFermion WilsonBase; @@ -51,43 +57,48 @@ public: const ImplParams &p = ImplParams()) : WilsonFermion(_Umu, Fgrid, Hgrid, - _mass, p), - Bx(_Umu._grid), - By(_Umu._grid), - Bz(_Umu._grid), - Ex(_Umu._grid), - Ey(_Umu._grid), - Ez(_Umu._grid) + _mass, p), + CloverTerm(&Fgrid), + CloverTermInv(&Fgrid) { csw = _csw; assert(Nd == 4); // require 4 dimensions } - virtual RealD M(const FermionField& in, FermionField& out); - virtual RealD Mdag(const FermionField& in, FermionField& out); + virtual RealD M(const FermionField &in, FermionField &out); + virtual RealD Mdag(const FermionField &in, FermionField &out); virtual void Mooee(const FermionField &in, FermionField &out); virtual void MooeeDag(const FermionField &in, FermionField &out); virtual void MooeeInv(const FermionField &in, FermionField &out); virtual void MooeeInvDag(const FermionField &in, FermionField &out); + virtual void MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv); - virtual void MDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag); - virtual void MooDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag); - virtual void MeeDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag); - + virtual void MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); + virtual void MooDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); + virtual void MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); void ImportGauge(const GaugeField &_Umu); + private: // here fixing the 4 dimensions, make it more general? - // Field strengths - GaugeLinkField Bx, By, Bz, Ex, Ey, Ez; + RealD csw; // Clover coefficient + CloverFieldType CloverTerm, CloverTermInv; // Clover term + // eventually these two can be compressed into 6x6 blocks instead of the 12x12 + // using the DeGrand-Rossi basis for the gamma matrices - RealD csw; // Clover coefficient - - - // Methods - void AddCloverTerm(const FermionField& in, FermionField& out); + CloverFieldType fillClover(const GaugeLinkField& F){ + CloverFieldType T(F._grid); + PARALLEL_FOR_LOOP + for (int i = 0; i < CloverTerm._grid->oSites(); i++){ + for (int s1 = 0; s1 < Nc; s1++) + for (int s2 = 0; s2 < Nc; s2++) + T._odata[i]()(s1,s2) = F._odata[i]()(); + } + return T; + } + }; } } diff --git a/tests/core/Test_wilson_clover.cc b/tests/core/Test_wilson_clover.cc new file mode 100644 index 00000000..08516d80 --- /dev/null +++ b/tests/core/Test_wilson_clover.cc @@ -0,0 +1,251 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./benchmarks/Benchmark_wilson.cc + + Copyright (C) 2015 + + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + + int threads = GridThread::GetThreads(); + std::cout< seeds({1,2,3,4}); + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(seeds); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + + typedef typename WilsonCloverFermionR::FermionField FermionField; + typename WilsonCloverFermionR::ImplParams params; + + FermionField src (&Grid); random(pRNG,src); + FermionField result(&Grid); result=zero; + FermionField ref(&Grid); ref=zero; + FermionField tmp(&Grid); tmp=zero; + FermionField err(&Grid); tmp=zero; + FermionField phi (&Grid); random(pRNG,phi); + FermionField chi (&Grid); random(pRNG,chi); + LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + std::vector U(4,&Grid); + + + double volume=1; + for(int mu=0;mu(Umu,mu); + /* Debug force unit + U[mu] = 1.0; + PokeIndex(Umu,U[mu],mu); + */ + } + + ref = zero; + + RealD mass=0.1; + RealD csw = 1.0; + + { // Simple clover implementation + + // ref = ref + mass * src; + } + + WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw,params); + + + std::cout< * = < chi | Deo^dag| phi> "< HermOpEO(Dwc); + HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); + HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + + HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); + HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + + pDce = innerProduct(phi_e,dchi_e); + pDco = innerProduct(phi_o,dchi_o); + cDpe = innerProduct(chi_e,dphi_e); + cDpo = innerProduct(chi_o,dphi_o); + + std::cout< Date: Fri, 28 Apr 2017 15:27:49 +0100 Subject: [PATCH 022/377] SitePropagator redefined to be a scalar object in TYPE_ALIASES. --- extras/Hadrons/Global.hpp | 14 +++++++------- extras/Hadrons/Modules/MSource/Point.hpp | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index 3e11ddf8..ebf93283 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -60,13 +60,13 @@ BEGIN_HADRONS_NAMESPACE // type aliases #define TYPE_ALIASES(FImpl, suffix)\ -typedef FermionOperator FMat##suffix; \ -typedef typename FImpl::FermionField FermionField##suffix; \ -typedef typename FImpl::PropagatorField PropagatorField##suffix; \ -typedef typename FImpl::SitePropagator SitePropagator##suffix; \ -typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;\ -typedef std::function SolverFn##suffix; +typedef FermionOperator FMat##suffix; \ +typedef typename FImpl::FermionField FermionField##suffix; \ +typedef typename FImpl::PropagatorField PropagatorField##suffix; \ +typedef typename FImpl::SitePropagator::scalar_object SitePropagator##suffix; \ +typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;\ +typedef std::function SolverFn##suffix; // logger class HadronsLogger: public Logger diff --git a/extras/Hadrons/Modules/MSource/Point.hpp b/extras/Hadrons/Modules/MSource/Point.hpp index a0ecbc2a..36e1cc5b 100644 --- a/extras/Hadrons/Modules/MSource/Point.hpp +++ b/extras/Hadrons/Modules/MSource/Point.hpp @@ -118,7 +118,7 @@ template void TPoint::execute(void) { std::vector position = strToVec(par().position); - typename SitePropagator::scalar_object id; + SitePropagator id; LOG(Message) << "Creating point source at position [" << par().position << "]" << std::endl; From 99a73f4287cf0eb595595851b0330e4c0e7c9dc1 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 28 Apr 2017 15:51:05 +0100 Subject: [PATCH 023/377] Correcting the M and Mdag in the clover term --- lib/qcd/action/fermion/WilsonCloverFermion.cc | 277 +++++++++--------- 1 file changed, 146 insertions(+), 131 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index ebea565b..3ab481ce 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -30,13 +30,15 @@ #include #include -namespace Grid { -namespace QCD { +namespace Grid +{ +namespace QCD +{ -//WilsonLoop::CloverPlaquette -///////////////////////////////////////////////////// -//// Clover plaquette combination in mu,nu plane with Double Stored U -//////////////////////////////////////////////////// +//WilsonLoop::CloverPlaquette +///////////////////////////////////////////////////// +//// Clover plaquette combination in mu,nu plane with Double Stored U +//////////////////////////////////////////////////// //static void CloverPlaquette(GaugeMat &Q, const std::vector &U, // const int mu, const int nu){ // Q = zero; @@ -54,149 +56,162 @@ namespace QCD { // U[nu], nu, Gimpl::CovShiftBackward(U[mu], mu, U[nu] ))); // } +// *NOT* EO +template +RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) +{ + // Wilson term + out.checkerboard = in.checkerboard; + this->Dhop(in, out, DaggerNo); + // Clover term + // apply the sigma and Fmunu + FermionField temp(out._grid); + Mooee(in, temp); + // overall factor + out += temp; + return axpy_norm(out, 4 + this->mass, in, out); +} -// *NOT* EO - template - RealD WilsonCloverFermion::M(const FermionField& in, FermionField& out) { - // Wilson term - out.checkerboard = in.checkerboard; - this->Dhop(in, out, DaggerNo); - // Clover term - // apply the sigma and Fmunu - Mooee(in, out); - // overall factor - return axpy_norm(out, 4 + this->mass, in, out); +template +RealD WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) +{ + // Wilson term + out.checkerboard = in.checkerboard; + this->Dhop(in, out, DaggerYes); + // Clover term + // apply the sigma and Fmunu + FermionField temp(out._grid); + MooeeDag(in, temp); + out+=temp; + return axpy_norm(out, 4 + this->mass, in, out); +} + +template +void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) +{ + this->ImportGauge(_Umu); + GridBase *grid = _Umu._grid; + assert(Nd == 4); // only works in 4 dim + typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid); + + // Compute the field strength terms + WilsonLoops::FieldStrength(Bx, _Umu, Ydir, Zdir); + WilsonLoops::FieldStrength(By, _Umu, Zdir, Xdir); + WilsonLoops::FieldStrength(Bz, _Umu, Xdir, Ydir); + WilsonLoops::FieldStrength(Ex, _Umu, Tdir, Xdir); + WilsonLoops::FieldStrength(Ey, _Umu, Tdir, Ydir); + WilsonLoops::FieldStrength(Ez, _Umu, Tdir, Zdir); + + // Compute the Clover Operator acting on Colour and Spin + CloverTerm = fillClover(Bx) * (Gamma(Gamma::Algebra::SigmaYZ)); + CloverTerm += fillClover(By) * (Gamma(Gamma::Algebra::MinusSigmaXZ)); + CloverTerm += fillClover(Bz) * (Gamma(Gamma::Algebra::SigmaXY)); + CloverTerm += fillClover(Ex) * (Gamma(Gamma::Algebra::MinusSigmaXT)); + CloverTerm += fillClover(Ey) * (Gamma(Gamma::Algebra::MinusSigmaYT)); + CloverTerm += fillClover(Ez) * (Gamma(Gamma::Algebra::MinusSigmaZT)); + CloverTerm *= csw; + + int lvol = _Umu._grid->lSites(); + int DimRep = Impl::Dimension; + + Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + + std::vector lcoor; + typename SiteCloverType::scalar_object Qx = zero, Qxinv = zero; + + for (int site = 0; site < lvol; site++) + { + grid->LocalIndexToLocalCoor(site, lcoor); + EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + peekLocalSite(Qx, CloverTerm, lcoor); + Qxinv = zero; + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++) + EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b); + + EigenInvCloverOp = EigenCloverOp.inverse(); + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++) + Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); + + pokeLocalSite(Qxinv, CloverTermInv, lcoor); } +} - template - RealD WilsonCloverFermion::Mdag(const FermionField& in, FermionField& out) { - // Wilson term - out.checkerboard = in.checkerboard; - this->Dhop(in, out, DaggerYes); - // Clover term - // apply the sigma and Fmunu - MooeeDag(in, out); - return axpy_norm(out, 4 + this->mass, in, out); - } +template +void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) +{ + this->MooeeInternal(in, out, DaggerNo, InverseNo); +} - template - void WilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { - this->ImportGauge(_Umu); - GridBase* grid = _Umu._grid; - assert(Nd==4); //only works in 4 dim - typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid); - // Compute the field strength terms - WilsonLoops::FieldStrength(Bx, _Umu, Ydir, Zdir); - WilsonLoops::FieldStrength(By, _Umu, Zdir, Xdir); - WilsonLoops::FieldStrength(Bz, _Umu, Xdir, Ydir); - WilsonLoops::FieldStrength(Ex, _Umu, Tdir, Xdir); - WilsonLoops::FieldStrength(Ey, _Umu, Tdir, Ydir); - WilsonLoops::FieldStrength(Ez, _Umu, Tdir, Zdir); +template +void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out) +{ + this->MooeeInternal(in, out, DaggerNo, InverseYes); +} - // Compute the Clover Operator acting on Colour and Spin - CloverTerm = fillClover(Bx)*(Gamma(Gamma::Algebra::SigmaYZ)); - CloverTerm += fillClover(By)*(Gamma(Gamma::Algebra::MinusSigmaXZ)); - CloverTerm += fillClover(Bz)*(Gamma(Gamma::Algebra::SigmaXY)); - CloverTerm += fillClover(Ex)*(Gamma(Gamma::Algebra::MinusSigmaXT)); - CloverTerm += fillClover(Ey)*(Gamma(Gamma::Algebra::MinusSigmaYT)); - CloverTerm += fillClover(Ez)*(Gamma(Gamma::Algebra::MinusSigmaZT)); - CloverTerm *= csw; +template +void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out) +{ + this->MooeeInternal(in, out, DaggerNo, InverseYes); +} +template +void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out) +{ + this->MooeeInternal(in, out, DaggerNo, InverseYes); +} - int lvol = _Umu._grid->lSites(); - int DimRep = Impl::Dimension; +template +void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv) +{ + out.checkerboard = in.checkerboard; + CloverFieldType *Clover; - Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns*DimRep,Ns*DimRep); - Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns*DimRep,Ns*DimRep); - - std::vector lcoor; - typename SiteCloverType::scalar_object Qx = zero, Qxinv = zero; + Clover = (inv) ? &CloverTermInv : &CloverTerm; + if (dag){ out = adj(*Clover) * in;} else { out = *Clover * in;} +} // MooeeInternal - for (int site = 0; site < lvol; site++){ - grid->LocalIndexToLocalCoor(site,lcoor); - EigenCloverOp=Eigen::MatrixXcd::Zero(Ns*DimRep,Ns*DimRep); - peekLocalSite(Qx,CloverTerm,lcoor); - Qxinv = zero; - for(int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for(int a = 0; a < DimRep; a++) - for(int b = 0; b < DimRep; b++) - EigenCloverOp(a+j*DimRep,b+k*DimRep) = Qx()(j,k)(a,b); +// Derivative parts +template +void WilsonCloverFermion::MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ + GaugeField tmp(mat._grid); - EigenInvCloverOp = EigenCloverOp.inverse(); - for(int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for(int a = 0; a < DimRep; a++) - for(int b = 0; b < DimRep; b++) - Qxinv()(j,k)(a,b) = EigenInvCloverOp(a+j*DimRep,b+k*DimRep); + conformable(U._grid, V._grid); + conformable(U._grid, mat._grid); - pokeLocalSite(Qxinv,CloverTermInv,lcoor); - } - } + mat.checkerboard = U.checkerboard; + tmp.checkerboard = U.checkerboard; - template - void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out){ - this -> MooeeInternal(in, out, DaggerNo, InverseNo); - } + this->DhopDeriv(mat, U, V, dag); + MooDeriv(tmp, U, V, dag); + mat += tmp; +} - template - void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out){ - this -> MooeeInternal(in, out, DaggerNo, InverseYes); - } +// Derivative parts +template +void WilsonCloverFermion::MooDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ + // Compute the 8 terms of the derivative + assert(0); // not implemented yet +} - template - void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out){ - this -> MooeeInternal(in, out, DaggerNo, InverseYes); - } - - template - void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out){ - this -> MooeeInternal(in, out, DaggerNo, InverseYes); - } - - - template - void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv){ - out.checkerboard = in.checkerboard; - CloverFieldType *Clover; - - Clover = (inv) ? &CloverTermInv : &CloverTerm; - if(dag){ out = adj(*Clover)*in;} else {out = *Clover*in;} - } // MooeeInternal - - // Derivative parts - template - void WilsonCloverFermion::MDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag){ - GaugeField tmp(mat._grid); - - conformable(U._grid, V._grid); - conformable(U._grid, mat._grid); - - mat.checkerboard = U.checkerboard; - tmp.checkerboard = U.checkerboard; - - this->DhopDeriv(mat, U, V, dag); - MooDeriv(tmp, U, V, dag); - mat += tmp; - } - - // Derivative parts - template - void WilsonCloverFermion::MooDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag){ - // Compute the 8 terms of the derivative - assert(0); // not implemented yet - } - - // Derivative parts - template - void WilsonCloverFermion::MeeDeriv(GaugeField&mat, const FermionField&U, const FermionField&V, int dag){ - assert(0); // not implemented yet - } +// Derivative parts +template +void WilsonCloverFermion::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ + assert(0); // not implemented yet +} FermOpTemplateInstantiate(WilsonCloverFermion); // now only for the fundamental representation //AdjointFermOpTemplateInstantiate(WilsonCloverFermion); //TwoIndexFermOpTemplateInstantiate(WilsonCloverFermion); //GparityFermOpTemplateInstantiate(WilsonCloverFermion); - } } From b9356d38662144acfa5b3f1d5184123b9769598d Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 28 Apr 2017 16:46:40 +0100 Subject: [PATCH 024/377] Added more complete test of sequential insertion of conserved current. --- extras/Hadrons/Modules.hpp | 1 + .../Modules/MUtilities/TestSeqConserved.hpp | 166 ++++++++++++++++++ extras/Hadrons/modules.inc | 1 + 3 files changed, 168 insertions(+) create mode 100644 extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 67762246..0286333c 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -48,4 +48,5 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include +#include #include diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp new file mode 100644 index 00000000..0730b8ed --- /dev/null +++ b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp @@ -0,0 +1,166 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp + +Copyright (C) 2017 + +Author: Andrew Lawson + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#ifndef Hadrons_TestSeqConserved_hpp_ +#define Hadrons_TestSeqConserved_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/* + Ward Identity contractions using sequential propagators. + ----------------------------- + + * options: + - q: point source propagator, 5D if available (string) + - q4d: 4D point source propagator, duplicate of q if q is 4D (string) + - qSeq: result of sequential insertion of conserved current using q (string) + - action: action used for computation of q (string) + - origin: string giving point source origin of q (string) + - t_J: time at which sequential current is inserted (int) + - mu: Lorentz index of current inserted (int) + - curr: current type, e.g. vector/axial (Current) +*/ + +/****************************************************************************** + * TestSeqConserved * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MUtilities) + +class TestSeqConservedPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(TestSeqConservedPar, + std::string, q, + std::string, q4d, + std::string, qSeq, + std::string, action, + std::string, origin, + unsigned int, t_J, + unsigned int, mu, + Current, curr); +}; + +template +class TTestSeqConserved: public Module +{ +public: + TYPE_ALIASES(FImpl,); +public: + // constructor + TTestSeqConserved(const std::string name); + // destructor + virtual ~TTestSeqConserved(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(TestSeqConserved, TTestSeqConserved, MUtilities); + +/****************************************************************************** + * TTestSeqConserved implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TTestSeqConserved::TTestSeqConserved(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TTestSeqConserved::getInput(void) +{ + std::vector in = {par().q, par().q4d, + par().qSeq, par().action}; + + return in; +} + +template +std::vector TTestSeqConserved::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TTestSeqConserved::setup(void) +{ + +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TTestSeqConserved::execute(void) +{ + PropagatorField tmp(env().getGrid()); + PropagatorField &q = *env().template getObject(par().q); + PropagatorField &q4d = *env().template getObject(par().q4d); + PropagatorField &qSeq = *env().template getObject(par().qSeq); + FMat &act = *(env().template getObject(par().action)); + Gamma g5(Gamma::Algebra::Gamma5); + SitePropagator qSite; + LatticeComplex c(env().getGrid()); + Complex seq_res, check_res; + std::vector check_buf; + + // Check sequential insertion of current gives same result as conserved + // current sink upon contraction. Assume q uses a point source. + std::vector siteCoord; + siteCoord = strToVec(par().origin); + peekSite(qSite, q, siteCoord); + seq_res = trace(g5*qSite); + + act.ContractConservedCurrent(q, q, tmp, par().curr, par().mu); + c = trace(tmp); + sliceSum(c, check_buf, Tp); + check_res = TensorRemove(check_buf[par().t_J]); + + // Check difference = 0 + check_res -= seq_res; + + LOG(Message) << "Consistency check for sequential conserved " + << par().curr << " current insertion = " << abs(check_res) + << std::endl; +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_TestSeqConserved_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 32655c3b..4ab51ce0 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -29,5 +29,6 @@ modules_hpp =\ Modules/MSource/SeqGamma.hpp \ Modules/MSource/Wall.hpp \ Modules/MSource/Z2.hpp \ + Modules/MUtilities/TestSeqConserved.hpp \ Modules/Quark.hpp From db14fb30df901c5e6bb445a2c59a55bf28ac75d5 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 28 Apr 2017 16:48:00 +0100 Subject: [PATCH 025/377] Hadrons: overhaul of conserved current test --- tests/hadrons/Test_hadrons.hpp | 125 +++++++++++++++++- .../hadrons/Test_hadrons_conserved_current.cc | 54 ++++---- 2 files changed, 145 insertions(+), 34 deletions(-) diff --git a/tests/hadrons/Test_hadrons.hpp b/tests/hadrons/Test_hadrons.hpp index 26d02a5c..c4dcedaf 100644 --- a/tests/hadrons/Test_hadrons.hpp +++ b/tests/hadrons/Test_hadrons.hpp @@ -40,6 +40,7 @@ using namespace Hadrons; #define LABEL_3PT(s, t1, t2) ADD_INDEX(INIT_INDEX(s, t1), t2) #define LABEL_4PT(s, t1, t2, t3) ADD_INDEX(ADD_INDEX(INIT_INDEX(s, t1), t2), t3) #define LABEL_4PT_NOISE(s, t1, t2, t3, nn) ADD_INDEX(ADD_INDEX(ADD_INDEX(INIT_INDEX(s, t1), t2), t3), nn) +#define LABEL_5D(s) s + "_5d"; // Wall source/sink macros #define NAME_3MOM_WALL_SOURCE(t, mom) ("wall_" + std::to_string(t) + "_" + mom) @@ -123,6 +124,44 @@ inline void makeSequentialSource(Application &application, std::string srcName, } } +/******************************************************************************* + * Name: makeConservedSequentialSource + * Purpose: Construct sequential source with conserved current insertion and + * add to application module. + * Parameters: application - main application that stores modules. + * srcName - name of source module to create. + * qSrc - Input quark for sequential inversion. + * actionName - action corresponding to quark. + * tS - sequential source timeslice. + * curr - conserved current type to insert. + * mu - Lorentz index of current to insert. + * mom - momentum insertion (default is zero). + * Returns: None. + ******************************************************************************/ +inline void makeConservedSequentialSource(Application &application, + std::string &srcName, + std::string &qSrc, + std::string &actionName, + unsigned int tS, + Current curr, + unsigned int mu, + std::string mom = ZERO_MOM) +{ + // If the source already exists, don't make the module again. + if (!(Environment::getInstance().hasModule(srcName))) + { + MSource::SeqConserved::Par seqPar; + seqPar.q = qSrc; + seqPar.action = actionName; + seqPar.tA = tS; + seqPar.tB = tS; + seqPar.curr_type = curr; + seqPar.mu = mu; + seqPar.mom = mom; + application.createModule(srcName, seqPar); + } +} + /******************************************************************************* * Name: makeWallSource * Purpose: Construct wall source and add to application module. @@ -132,7 +171,7 @@ inline void makeSequentialSource(Application &application, std::string srcName, * mom - momentum insertion (default is zero). * Returns: None. ******************************************************************************/ -inline void makeWallSource(Application &application, std::string srcName, +inline void makeWallSource(Application &application, std::string &srcName, unsigned int tW, std::string mom = ZERO_MOM) { // If the source already exists, don't make the module again. @@ -154,8 +193,8 @@ inline void makeWallSource(Application &application, std::string srcName, * mom - momentum insertion (default is zero). * Returns: None. ******************************************************************************/ -inline void makeWallSink(Application &application, std::string propName, - std::string wallName, std::string mom = ZERO_MOM) +inline void makeWallSink(Application &application, std::string &propName, + std::string &wallName, std::string mom = ZERO_MOM) { // If the propagator has already been smeared, don't smear it again. // Temporarily removed, strategy for sink smearing likely to change. @@ -365,4 +404,82 @@ inline void discLoopContraction(Application &application, discPar.gamma = gamma; application.createModule(modName, discPar); } - } +} + +/******************************************************************************* + * Name: makeWITest + * Purpose: Create module to test Ward Identities for conserved current + * contractions and add to application module. + * Parameters: application - main application that stores modules. + * modName - name of module to create. + * propName - 4D quark propagator. + * actionName - action used to compute quark propagator. + * mass - mass of quark. + * Ls - length of 5th dimension (default = 1). + * Returns: None. + ******************************************************************************/ +inline void makeWITest(Application &application, std::string &modName, + std::string &propName, std::string &actionName, + double mass, unsigned int Ls = 1) +{ + if (!(Environment::getInstance().hasModule(modName))) + { + MContraction::WardIdentity::Par wiPar; + if (Ls > 1) + { + wiPar.q = LABEL_5D(propName); + } + else + { + wiPar.q = propName; + } + wiPar.q4d = propName; + wiPar.action = actionName; + wiPar.mass = mass; + application.createModule(modName, wiPar); + } +} + +/******************************************************************************* + * Name: makeSeqTest + * Purpose: Create module to test sequential insertion of conserved current + * and add to application module. + * Parameters: application - main application that stores modules. + * modName - name of module to create. + * propName - 4D quark propagator. + * seqProp - 4D quark propagator with sequential insertion of + * conserved current. + * actionName - action used to compute quark propagators. + * t_J - time at which sequential current is inserted. + * mu - Lorentz index of sequential current. + * curr - type of conserved current inserted. + * Ls - length of 5th dimension (default = 1). + * Returns: None. + ******************************************************************************/ +inline void makeSeqTest(Application &application, std::string &modName, + std::string &propName, std::string &seqName, + std::string &actionName, std::string &origin, + unsigned int t_J, unsigned int mu, Current curr, + unsigned int Ls = 1) +{ + if (!(Environment::getInstance().hasModule(modName))) + { + MUtilities::TestSeqConserved::Par seqPar; + if (Ls > 1) + { + seqPar.q = LABEL_5D(propName); + } + else + { + seqPar.q = propName; + } + seqPar.q4d = propName; + seqPar.qSeq = seqName; + seqPar.action = actionName; + seqPar.origin = origin; + seqPar.t_J = t_J; + seqPar.mu = mu; + seqPar.curr = curr; + application.createModule(modName, seqPar); + } +} diff --git a/tests/hadrons/Test_hadrons_conserved_current.cc b/tests/hadrons/Test_hadrons_conserved_current.cc index df774ac0..a11a3530 100644 --- a/tests/hadrons/Test_hadrons_conserved_current.cc +++ b/tests/hadrons/Test_hadrons_conserved_current.cc @@ -45,6 +45,7 @@ int main(int argc, char *argv[]) Application application; unsigned int nt = GridDefaultLatt()[Tp]; double mass = 0.04; + unsigned int Ls = 12; // global parameters Application::GlobalPar globalPar; @@ -65,7 +66,7 @@ int main(int argc, char *argv[]) std::string actionName = "DWF"; MAction::DWF::Par actionPar; actionPar.gauge = "gauge"; - actionPar.Ls = 12; + actionPar.Ls = Ls; actionPar.M5 = 1.8; actionPar.mass = mass; application.createModule(actionName, actionPar); @@ -83,37 +84,30 @@ int main(int argc, char *argv[]) std::string pos = "0 0 0 0"; std::string modName = "Ward Identity Test"; MAKE_POINT_PROP(pos, pointProp, solverName); - if (!(Environment::getInstance().hasModule(modName))) - { - MContraction::WardIdentity::Par wiPar; - wiPar.q = pointProp + "_5d"; - wiPar.q4d = pointProp; - wiPar.action = actionName; - wiPar.mass = mass; - application.createModule(modName, wiPar); - } + makeWITest(application, modName, pointProp, actionName, mass, Ls); - // Conserved current contractions with sequential insertion of vector + // Conserved current contractions with sequential insertion of vector/axial // current. - std::string q_x = "q_x"; - std::string q_y = "q_y"; - std::string q_z = "q_z"; - std::string q_t = "q_t"; - std::string mom = ZERO_MOM; - modName = "Sequential Ward Identity Test"; - MAKE_SEQUENTIAL_PROP(nt/2, pointProp, mom, q_x, solverName); - MAKE_SEQUENTIAL_PROP(nt/2, pointProp, mom, q_y, solverName); - MAKE_SEQUENTIAL_PROP(nt/2, pointProp, mom, q_z, solverName); - MAKE_SEQUENTIAL_PROP(nt/2, pointProp, mom, q_t, solverName); - if (!(Environment::getInstance().hasModule(modName))) - { - MContraction::WardIdentitySeq::Par wiPar; - wiPar.q_x = q_x; - wiPar.q_y = q_y; - wiPar.q_z = q_z; - wiPar.q_t = q_t; - application.createModule(modName, wiPar); - } + std::string mom = ZERO_MOM; + unsigned int t_J = nt/2; + std::string seqPropA = ADD_INDEX(pointProp + "_seq_A", t_J); + std::string seqPropV = ADD_INDEX(pointProp + "_seq_V", t_J); + std::string seqSrcA = seqPropA + "_src"; + std::string seqSrcV = seqPropV + "_src"; + std::string point5d = LABEL_5D(pointProp); + makeConservedSequentialSource(application, seqSrcA, point5d, + actionName, t_J, Current::Axial, Tp, mom); + makePropagator(application, seqPropA, seqSrcA, solverName); + makeConservedSequentialSource(application, seqSrcV, point5d, + actionName, t_J, Current::Vector, Tp, mom); + makePropagator(application, seqPropV, seqSrcV, solverName); + + std::string modNameA = "Axial Sequential Test"; + std::string modNameV = "Vector Sequential Test"; + makeSeqTest(application, modNameA, pointProp, seqPropA, + actionName, pos, t_J, Tp, Current::Axial, Ls); + makeSeqTest(application, modNameV, pointProp, seqPropV, + actionName, pos, t_J, Tp, Current::Vector, Ls); // execution application.saveParameterFile("ConservedCurrentTest.xml"); From 51d84ec057a80b9898369c7181dacce9979a945d Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 28 Apr 2017 16:49:14 +0100 Subject: [PATCH 026/377] Bugfixes in Wilson 5D sequential conserved current insertion --- lib/qcd/action/fermion/WilsonFermion5D.cc | 24 ++++++++++------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 99ff0dc1..bae5ae70 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -726,6 +726,8 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, conformable(q_in._grid, FermionGrid()); conformable(q_in._grid, q_out._grid); Lattice> ph(_FourDimGrid), coor(_FourDimGrid); + PropagatorField q_in_s(_FourDimGrid); + PropagatorField q_out_s(_FourDimGrid); Complex i(0.0, 1.0); // Momentum projection @@ -737,23 +739,17 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, } ph = exp((Real)(2*M_PI)*i*ph); - // Sequential insertion - Kernels::SeqConservedCurrentInternal(q_in, q_out, Umu, curr_type, - mu, ph, tmin, tmax); - - // Axial current sign. - if (curr_type == Current::Axial) + // Sequential insertion across 5th dimension + for (int s = 0; s < Ls; s++) { - SitePropagator result; - parallel_for(int sU = 0; sU < Umu._grid->oSites(); sU++) + ExtractSlice(q_in_s, q_in, s, 0); + Kernels::SeqConservedCurrentInternal(q_in_s, q_out_s, Umu, curr_type, + mu, ph, tmin, tmax); + if ((curr_type == Current::Axial) && (s < Ls/2)) { - int sF = sU * Ls; - for (int s = 0; s < Ls/2; s++) - { - vstream(q_out._odata[sF], -q_out._odata[sF]); - sF++; - } + q_out_s = -q_out_s; } + InsertSlice(q_out_s, q_out, s, 0); } } From 49331a3e72b6b644cd48424b8566440a7338fa64 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 28 Apr 2017 16:50:17 +0100 Subject: [PATCH 027/377] Minor improvements to Ward Identity checks --- .../Hadrons/Modules/MContraction/WardIdentity.hpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index 41d8c6d1..d312bd4d 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -115,6 +115,10 @@ template void TWardIdentity::setup(void) { Ls_ = env().getObjectLs(par().q); + if (Ls_ != env().getObjectLs(par().action)) + { + HADRON_ERROR("Ls mismatch between quark action and propagator"); + } } // execution /////////////////////////////////////////////////////////////////// @@ -125,8 +129,8 @@ void TWardIdentity::execute(void) << "'." << std::endl; PropagatorField psi(env().getGrid()), tmp(env().getGrid()); - PropagatorField q = *env().template getObject(par().q); - PropagatorField q4d = *env().template getObject(par().q4d); + PropagatorField &q = *env().template getObject(par().q); + PropagatorField &q4d = *env().template getObject(par().q4d); FMat &act = *(env().template getObject(par().action)); Gamma g5(Gamma::Algebra::Gamma5); LatticeComplex PP(env().getGrid()), PA(env().getGrid()), @@ -142,7 +146,7 @@ void TWardIdentity::execute(void) for (unsigned int mu = 0; mu < Nd; ++mu) { act.ContractConservedCurrent(q, q, tmp, Current::Vector, mu); - Vmu[mu] = trace(g5*tmp); + Vmu[mu] = trace(tmp); act.ContractConservedCurrent(q, q, tmp, Current::Axial, mu); Amu[mu] = trace(g5*tmp); } @@ -170,6 +174,9 @@ void TWardIdentity::execute(void) << norm2(vector_WI) << std::endl; LOG(Message) << "Axial Ward Identity defect Delta_mu A_mu = " << norm2(defect) << std::endl; + LOG(Message) << "norm2(PP) = " << norm2(PP) << std::endl; + LOG(Message) << "norm2(PA) = " << norm2(PA) << std::endl; + LOG(Message) << "norm2(PJ5q) = " << norm2(PJ5q) << std::endl; } END_MODULE_NAMESPACE From 62a64d9108cf260cb0f5d3dd18aa0695568cc432 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 1 May 2017 11:06:21 +0100 Subject: [PATCH 028/377] EO support, wip --- .vscode/settings.json | 38 +++++++++++++++++++ lib/qcd/action/fermion/WilsonCloverFermion.cc | 15 +++++++- lib/qcd/action/fermion/WilsonCloverFermion.h | 8 +++- tests/core/Test_wilson_clover.cc | 3 +- 4 files changed, 60 insertions(+), 4 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 20af2f68..dd8f0473 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,41 @@ // Place your settings in this file to overwrite default and user settings. { + "files.associations": { + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "csignal": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "array": "cpp", + "atomic": "cpp", + "*.tcc": "cpp", + "chrono": "cpp", + "codecvt": "cpp", + "condition_variable": "cpp", + "cstdint": "cpp", + "exception": "cpp", + "slist": "cpp", + "functional": "cpp", + "initializer_list": "cpp", + "iosfwd": "cpp", + "limits": "cpp", + "memory": "cpp", + "mutex": "cpp", + "new": "cpp", + "ratio": "cpp", + "stdexcept": "cpp", + "system_error": "cpp", + "thread": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "typeinfo": "cpp", + "utility": "cpp" + } } \ No newline at end of file diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index 3ab481ce..e1900830 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -91,7 +91,6 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) { this->ImportGauge(_Umu); GridBase *grid = _Umu._grid; - assert(Nd == 4); // only works in 4 dim typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid); // Compute the field strength terms @@ -140,6 +139,11 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); pokeLocalSite(Qxinv, CloverTermInv, lcoor); + // Separate the even and odd parts. + pickCheckerboard(Even, CloverTermEven, CloverTerm); + pickCheckerboard( Odd, CloverTermOdd, CloverTerm); + pickCheckerboard(Even, CloverTermInvEven, CloverTermInv); + pickCheckerboard( Odd, CloverTermInvOdd, CloverTermInv); } } @@ -172,8 +176,15 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie { out.checkerboard = in.checkerboard; CloverFieldType *Clover; + if (in.checkerboard == Odd){ + std::cout << "Calling clover term Odd" << std::endl; + Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd; + } + if (in.checkerboard == Even){ + std::cout << "Calling clover term Even" << std::endl; + Clover = (inv) ? &CloverTermInvEven : &CloverTermEven; + } - Clover = (inv) ? &CloverTermInv : &CloverTerm; if (dag){ out = adj(*Clover) * in;} else { out = *Clover * in;} } // MooeeInternal diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index 0fa0d57d..c9e7be39 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -59,7 +59,11 @@ public: Hgrid, _mass, p), CloverTerm(&Fgrid), - CloverTermInv(&Fgrid) + CloverTermInv(&Fgrid), + CloverTermEven(&Hgrid), + CloverTermOdd(&Hgrid), + CloverTermInvEven(&Hgrid), + CloverTermInvOdd(&Hgrid) { csw = _csw; assert(Nd == 4); // require 4 dimensions @@ -85,6 +89,8 @@ private: RealD csw; // Clover coefficient CloverFieldType CloverTerm, CloverTermInv; // Clover term + CloverFieldType CloverTermEven, CloverTermOdd; + CloverFieldType CloverTermInvEven, CloverTermInvOdd; // Clover term // eventually these two can be compressed into 6x6 blocks instead of the 12x12 // using the DeGrand-Rossi basis for the gamma matrices diff --git a/tests/core/Test_wilson_clover.cc b/tests/core/Test_wilson_clover.cc index 08516d80..3df69e3b 100644 --- a/tests/core/Test_wilson_clover.cc +++ b/tests/core/Test_wilson_clover.cc @@ -191,8 +191,9 @@ int main (int argc, char ** argv) Dwc.MooeeInv(src_e,phi_e); Dwc.Mooee(chi_o,src_o); +exit(1); Dwc.MooeeInv(src_o,phi_o); - + setCheckerboard(phi,phi_e); setCheckerboard(phi,phi_o); From 77e0af9c2eca8ce816b6c6a54bc2d0edef26e213 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 5 May 2017 12:27:50 +0100 Subject: [PATCH 029/377] Compilation fix after merge - conserved current code not yet operational for vectorised 5D or Gparity Impl. --- lib/qcd/action/fermion/WilsonKernels.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 5deec27c..8dc6bd8c 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -365,10 +365,16 @@ WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_in, NO_CURR(GparityWilsonImplF); NO_CURR(GparityWilsonImplD); +NO_CURR(GparityWilsonImplFH); +NO_CURR(GparityWilsonImplDF); NO_CURR(DomainWallVec5dImplF); NO_CURR(DomainWallVec5dImplD); +NO_CURR(DomainWallVec5dImplFH); +NO_CURR(DomainWallVec5dImplDF); NO_CURR(ZDomainWallVec5dImplF); NO_CURR(ZDomainWallVec5dImplD); +NO_CURR(ZDomainWallVec5dImplFH); +NO_CURR(ZDomainWallVec5dImplDF); FermOpTemplateInstantiate(WilsonKernels); AdjointFermOpTemplateInstantiate(WilsonKernels); From d44cc204d166e19c0198ab9176061d3d8595930a Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 12 May 2017 14:58:17 +0100 Subject: [PATCH 030/377] Added test module for sequential gamma matrix insertion --- extras/Hadrons/Modules.hpp | 1 + .../Modules/MUtilities/TestSeqGamma.hpp | 119 ++++++++++++++++++ extras/Hadrons/modules.inc | 1 + 3 files changed, 121 insertions(+) create mode 100644 extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 0286333c..dd6a6010 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -49,4 +49,5 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include +#include #include diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp new file mode 100644 index 00000000..b3e99617 --- /dev/null +++ b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp @@ -0,0 +1,119 @@ +#ifndef Hadrons_TestSeqGamma_hpp_ +#define Hadrons_TestSeqGamma_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * TestSeqGamma * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MUtilities) + +class TestSeqGammaPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(TestSeqGammaPar, + std::string, q, + std::string, qSeq, + std::string, origin, + Gamma::Algebra, gamma, + unsigned int, t_g); +}; + +template +class TTestSeqGamma: public Module +{ +public: + TYPE_ALIASES(FImpl,); +public: + // constructor + TTestSeqGamma(const std::string name); + // destructor + virtual ~TTestSeqGamma(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(TestSeqGamma, TTestSeqGamma, MUtilities); + +/****************************************************************************** + * TTestSeqGamma implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TTestSeqGamma::TTestSeqGamma(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TTestSeqGamma::getInput(void) +{ + std::vector in = {par().q, par().qSeq}; + + return in; +} + +template +std::vector TTestSeqGamma::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TTestSeqGamma::setup(void) +{ + +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TTestSeqGamma::execute(void) +{ + PropagatorField &q = *env().template getObject(par().q); + PropagatorField &qSeq = *env().template getObject(par().qSeq); + LatticeComplex c(env().getGrid()); + Gamma g5(Gamma::Algebra::Gamma5); + Gamma g(par().gamma); + SitePropagator qSite; + Complex test, check; + std::vector check_buf; + + // Check sequential insertion of gamma matrix gives same result as + // insertion of gamma at sink upon contraction. Assume q uses a point + // source. + std::vector siteCoord; + siteCoord = strToVec(par().origin); + peekSite(qSite, qSeq, siteCoord); + test = trace(g*qSite); + + c = trace(adj(g)*g5*adj(q)*g5*g*q); + sliceSum(c, check_buf, Tp); + check = TensorRemove(check_buf[par().t_g]); + + LOG(Message) << "Seq Result = " << abs(test) << std::endl; + LOG(Message) << "Reference = " << abs(check) << std::endl; + + // Check difference = 0 + check -= test; + + LOG(Message) << "Consistency check for sequential " << par().gamma + << " insertion = " << abs(check) << std::endl; +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_TestSeqGamma_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 4ab51ce0..0364502a 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -30,5 +30,6 @@ modules_hpp =\ Modules/MSource/Wall.hpp \ Modules/MSource/Z2.hpp \ Modules/MUtilities/TestSeqConserved.hpp \ + Modules/MUtilities/TestSeqGamma.hpp \ Modules/Quark.hpp From 98f610ce5384f75883d7e4a31be54e55c6251410 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 12 May 2017 16:15:26 +0100 Subject: [PATCH 031/377] Reduced code duplication in hadron tests --- tests/hadrons/Test_hadrons.hpp | 96 ++++++++++++++++++++++++++ tests/hadrons/Test_hadrons_rarekaon.cc | 41 +++-------- 2 files changed, 107 insertions(+), 30 deletions(-) diff --git a/tests/hadrons/Test_hadrons.hpp b/tests/hadrons/Test_hadrons.hpp index c4dcedaf..c0a596b5 100644 --- a/tests/hadrons/Test_hadrons.hpp +++ b/tests/hadrons/Test_hadrons.hpp @@ -33,6 +33,30 @@ using namespace Hadrons; /******************************************************************************* * Macros to reduce code duplication. ******************************************************************************/ +// Common initialisation +#define HADRONS_DEFAULT_INIT \ + Grid_init(&argc, &argv); \ + HadronsLogError.Active(GridLogError.isActive()); \ + HadronsLogWarning.Active(GridLogWarning.isActive()); \ + HadronsLogMessage.Active(GridLogMessage.isActive()); \ + HadronsLogIterative.Active(GridLogIterative.isActive()); \ + HadronsLogDebug.Active(GridLogDebug.isActive()); \ + LOG(Message) << "Grid initialized" << std::endl; + +#define HADRONS_DEFAULT_GLOBALS(application) \ +{ \ + Application::GlobalPar globalPar; \ + globalPar.trajCounter.start = 1500; \ + globalPar.trajCounter.end = 1520; \ + globalPar.trajCounter.step = 20; \ + globalPar.seed = "1 2 3 4"; \ + globalPar.genetic.maxGen = 1000; \ + globalPar.genetic.maxCstGen = 200; \ + globalPar.genetic.popSize = 20; \ + globalPar.genetic.mutationRate = .1; \ + application.setPar(globalPar); \ +} + // Useful definitions #define ZERO_MOM "0. 0. 0. 0." #define INIT_INDEX(s, n) (std::string(s) + "_" + std::to_string(n)) @@ -73,10 +97,82 @@ using namespace Hadrons; makePropagator(application, propName, srcName, solver);\ } +/******************************************************************************* + * Action setups. + ******************************************************************************/ + +/******************************************************************************* + * Name: makeWilsonAction + * Parameters: application - main application that stores modules. + * actionName - name of action module to create. + * gaugeField - gauge field module. + * mass - quark mass. + * Returns: None. + ******************************************************************************/ +inline void makeWilsonAction(Application &application, std::string actionName, + std::string &gaugeField, double mass) +{ + if (!(Environment::getInstance().hasModule(actionName))) + { + MAction::Wilson::Par actionPar; + actionPar.gauge = gaugeField; + actionPar.mass = mass; + application.createModule(actionName, actionPar); + } +} + +/******************************************************************************* + * Name: makeDWFAction + * Parameters: application - main application that stores modules. + * actionName - name of action module to create. + * gaugeField - gauge field module. + * mass - quark mass. + * M5 - domain wall height. + * Ls - fifth dimension extent. + * Returns: None. + ******************************************************************************/ +inline void makeDWFAction(Application &application, std::string actionName, + std::string &gaugeField, double mass, double M5, + unsigned int Ls) +{ + if (!(Environment::getInstance().hasModule(actionName))) + { + MAction::DWF::Par actionPar; + actionPar.gauge = gaugeField; + actionPar.Ls = Ls; + actionPar.M5 = M5; + actionPar.mass = mass; + application.createModule(actionName, actionPar); + } +} + /******************************************************************************* * Functions for propagator construction. ******************************************************************************/ +/******************************************************************************* + * Name: makeRBPrecCGSolver + * Purpose: Make RBPrecCG solver module for specified action. + * Parameters: application - main application that stores modules. + * solverName - name of solver module to create. + * actionName - action module corresponding to propagators to be + * computed. + * residual - CG target residual. + * Returns: None. + ******************************************************************************/ +inline void makeRBPrecCGSolver(Application &application, std::string &solverName, + std::string &actionName, double residual = 1e-8) +{ + if (!(Environment::getInstance().hasModule(solverName))) + { + MSolver::RBPrecCG::Par solverPar; + solverPar.action = actionName; + solverPar.residual = residual; + application.createModule(solverName, + solverPar); + } +} + /******************************************************************************* * Name: makePointSource * Purpose: Construct point source and add to application module. diff --git a/tests/hadrons/Test_hadrons_rarekaon.cc b/tests/hadrons/Test_hadrons_rarekaon.cc index 9d35c1bc..1b5a45d9 100644 --- a/tests/hadrons/Test_hadrons_rarekaon.cc +++ b/tests/hadrons/Test_hadrons_rarekaon.cc @@ -51,13 +51,7 @@ int main(int argc, char *argv[]) configStem = argv[1]; // initialization ////////////////////////////////////////////////////////// - Grid_init(&argc, &argv); - HadronsLogError.Active(GridLogError.isActive()); - HadronsLogWarning.Active(GridLogWarning.isActive()); - HadronsLogMessage.Active(GridLogMessage.isActive()); - HadronsLogIterative.Active(GridLogIterative.isActive()); - HadronsLogDebug.Active(GridLogDebug.isActive()); - LOG(Message) << "Grid initialized" << std::endl; + HADRONS_DEFAULT_INIT; // run setup /////////////////////////////////////////////////////////////// Application application; @@ -74,46 +68,33 @@ int main(int argc, char *argv[]) unsigned int n_noise = 1; unsigned int nt = 32; bool do_disconnected(false); + Gamma::Algebra gT = Gamma::Algebra::GammaT; + unsigned int Ls = 16; + double M5 = 1.8; // Global parameters. - Application::GlobalPar globalPar; - globalPar.trajCounter.start = 1500; - globalPar.trajCounter.end = 1520; - globalPar.trajCounter.step = 20; - globalPar.seed = "1 2 3 4"; - globalPar.genetic.maxGen = 1000; - globalPar.genetic.maxCstGen = 200; - globalPar.genetic.popSize = 20; - globalPar.genetic.mutationRate = .1; - application.setPar(globalPar); + HADRONS_DEFAULT_GLOBALS(application); // gauge field + std::string gaugeField = "gauge"; if (configStem == "None") { - application.createModule("gauge"); + application.createModule(gaugeField); } else { MGauge::Load::Par gaugePar; gaugePar.file = configStem; - application.createModule("gauge", gaugePar); + application.createModule(gaugeField, gaugePar); } for (unsigned int i = 0; i < flavour.size(); ++i) { // actions - MAction::DWF::Par actionPar; - actionPar.gauge = "gauge"; - actionPar.Ls = 16; - actionPar.M5 = 1.8; - actionPar.mass = mass[i]; - application.createModule("DWF_" + flavour[i], actionPar); + std::string actionName = "DWF_" + flavour[i]; + makeDWFAction(application, actionName, gaugeField, mass[i], M5, Ls); // solvers - MSolver::RBPrecCG::Par solverPar; - solverPar.action = "DWF_" + flavour[i]; - solverPar.residual = 1.0e-8; - application.createModule(solvers[i], - solverPar); + makeRBPrecCGSolver(application, solvers[i], actionName); } // Create noise propagators for loops. From c2010f21aba12b3f2fd7166211b6c3243b428ed9 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 12 May 2017 16:23:01 +0100 Subject: [PATCH 032/377] Added sequential propagator test for gamma matrix insertion --- tests/hadrons/Test_hadrons.hpp | 42 ++++++++++-- tests/hadrons/Test_hadrons_rarekaon.cc | 10 +-- tests/hadrons/Test_hadrons_seq_gamma.cc | 89 +++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 10 deletions(-) create mode 100644 tests/hadrons/Test_hadrons_seq_gamma.cc diff --git a/tests/hadrons/Test_hadrons.hpp b/tests/hadrons/Test_hadrons.hpp index c0a596b5..61e90bac 100644 --- a/tests/hadrons/Test_hadrons.hpp +++ b/tests/hadrons/Test_hadrons.hpp @@ -82,11 +82,11 @@ using namespace Hadrons; MAKE_3MOM_WALL_PROP(tW, ZERO_MOM, propName, solver) // Sequential source macros -#define MAKE_SEQUENTIAL_PROP(tS, qSrc, mom, propName, solver)\ +#define MAKE_SEQUENTIAL_PROP(tS, qSrc, mom, seqPropName, solver, gamma)\ {\ - std::string srcName = ADD_INDEX(qSrc + "_seq", tS);\ - makeSequentialSource(application, srcName, qSrc, tS, mom);\ - makePropagator(application, propName, srcName, solver);\ + std::string srcName = seqPropName + "_src";\ + makeSequentialSource(application, srcName, qSrc, tS, gamma, mom);\ + makePropagator(application, seqPropName, srcName, solver);\ } // Point source macros @@ -205,6 +205,7 @@ inline void makePointSource(Application &application, std::string srcName, ******************************************************************************/ inline void makeSequentialSource(Application &application, std::string srcName, std::string qSrc, unsigned int tS, + Gamma::Algebra gamma = Gamma::Algebra::GammaT, std::string mom = ZERO_MOM) { // If the source already exists, don't make the module again. @@ -215,7 +216,7 @@ inline void makeSequentialSource(Application &application, std::string srcName, seqPar.tA = tS; seqPar.tB = tS; seqPar.mom = mom; - seqPar.gamma = Gamma::Algebra::GammaT; + seqPar.gamma = gamma; application.createModule(srcName, seqPar); } } @@ -579,3 +580,34 @@ inline void makeSeqTest(Application &application, std::string &modName, application.createModule(modName, seqPar); } } + +/******************************************************************************* + * Name: makeSeqGamComparison + * Purpose: Create module to compare sequential insertion of gamma matrix + * against sink contraction and add to application module. + * Parameters: application - main application that stores modules. + * modName - name of module to create. + * propName - 4D quark propagator. + * seqProp - 4D quark propagator with sequential insertion of + * gamma matrix. + * gamma - Inserted gamma matrix. + * t_g - time at which gamma matrix is inserted + * sequentially. + * Returns: None. + ******************************************************************************/ +inline void makeSeqGamComparison(Application &application, std::string &modName, + std::string &propName, std::string &seqProp, + std::string &origin, Gamma::Algebra gamma, + unsigned int t_g) +{ + if (!(Environment::getInstance().hasModule(modName))) + { + MUtilities::TestSeqGamma::Par seqPar; + seqPar.q = propName; + seqPar.qSeq = seqProp; + seqPar.origin = origin; + seqPar.t_g = t_g; + seqPar.gamma = gamma; + application.createModule(modName, seqPar); + } +} diff --git a/tests/hadrons/Test_hadrons_rarekaon.cc b/tests/hadrons/Test_hadrons_rarekaon.cc index 1b5a45d9..3a642f24 100644 --- a/tests/hadrons/Test_hadrons_rarekaon.cc +++ b/tests/hadrons/Test_hadrons_rarekaon.cc @@ -212,10 +212,10 @@ int main(int argc, char *argv[]) std::string q_KsCs_mq = LABEL_3PT("Q_KsCs_mq", tK, tJ); std::string q_pilCl_q = LABEL_3PT("Q_pilCl_q", tpi, tJ); std::string q_pilCl_mq = LABEL_3PT("Q_pilCl_mq", tpi, tJ); - MAKE_SEQUENTIAL_PROP(tJ, q_Kl_0, qmom, q_KlCl_q, solvers[light]); - MAKE_SEQUENTIAL_PROP(tJ, q_Ks_k, mqmom, q_KsCs_mq, solvers[strange]); - MAKE_SEQUENTIAL_PROP(tJ, q_pil_p, qmom, q_pilCl_q, solvers[light]); - MAKE_SEQUENTIAL_PROP(tJ, q_pil_0, mqmom, q_pilCl_mq, solvers[light]); + MAKE_SEQUENTIAL_PROP(tJ, q_Kl_0, qmom, q_KlCl_q, solvers[light], gT); + MAKE_SEQUENTIAL_PROP(tJ, q_Ks_k, mqmom, q_KsCs_mq, solvers[strange], gT); + MAKE_SEQUENTIAL_PROP(tJ, q_pil_p, qmom, q_pilCl_q, solvers[light], gT); + MAKE_SEQUENTIAL_PROP(tJ, q_pil_0, mqmom, q_pilCl_mq, solvers[light], gT); /******************************************************************* * CONTRACTIONS: pi and K 3pt contractions with current insertion. @@ -271,7 +271,7 @@ int main(int argc, char *argv[]) std::string loop_qCq = LABEL_3PT(loop_stem + flavour[f], tJ, nn); std::string loop_qCq_res = loop_qCq + "_res"; MAKE_SEQUENTIAL_PROP(tJ, noiseRes[f][nn], qmom, - loop_qCq_res, solvers[f]); + loop_qCq_res, solvers[f], gT); makeLoop(application, loop_qCq, eta, loop_qCq_res); /******************************************************* diff --git a/tests/hadrons/Test_hadrons_seq_gamma.cc b/tests/hadrons/Test_hadrons_seq_gamma.cc new file mode 100644 index 00000000..22c35ecb --- /dev/null +++ b/tests/hadrons/Test_hadrons_seq_gamma.cc @@ -0,0 +1,89 @@ +/******************************************************************************* + Grid physics library, www.github.com/paboyle/Grid + + Source file: tests/hadrons/Test_hadrons_seq_gamma.cc + + Copyright (C) 2017 + + Author: Andrew Lawson + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution + directory. + *******************************************************************************/ + +#include "Test_hadrons.hpp" + +using namespace Grid; +using namespace QCD; +using namespace Hadrons; + +/******************************************************************************* + * Consistency test for sequential gamma insertion. + ******************************************************************************/ + +int main(int argc, char *argv[]) +{ + // initialization ////////////////////////////////////////////////////////// + HADRONS_DEFAULT_INIT; + + // run setup /////////////////////////////////////////////////////////////// + Application application; + unsigned int nt = GridDefaultLatt()[Tp]; + unsigned int tS = nt / 2; + unsigned int Ls = 12; + double mass = 0.04; + double M5 = 1.8; + + // global parameters + HADRONS_DEFAULT_GLOBALS(application); + + // gauge field + std::string gaugeField = "gauge"; + application.createModule(gaugeField); + + // action + std::string actionName = "DWF"; + makeDWFAction(application, actionName, gaugeField, mass, M5, Ls); + + // solver + std::string solverName = "CG"; + makeRBPrecCGSolver(application, solverName, actionName); + + // test sequential propagator, with g5 insertion. + Gamma::Algebra g = Gamma::Algebra::Gamma5; + std::string pointProp = "q_0"; + std::string point5d = LABEL_5D(pointProp); + std::string origin = "0 0 0 0"; + MAKE_POINT_PROP(origin, pointProp, solverName); + + std::string seqProp = ADD_INDEX(pointProp + "_seqg5", tS); + std::string seqSrc = seqProp + "_src"; + MAKE_SEQUENTIAL_PROP(tS, pointProp, ZERO_MOM, seqProp, solverName, g); + + std::string modName = "Test g5 sequential insertion"; + makeSeqGamComparison(application, modName, pointProp, seqProp, origin, g, tS); + + // execution + application.saveParameterFile("SeqGamma5Test.xml"); + application.run(); + + // epilogue + LOG(Message) << "Grid is finalizing now" << std::endl; + Grid_finalize(); + + return EXIT_SUCCESS; +} \ No newline at end of file From 34332fe3934754025446cec92b6c099c6828df9f Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 12 May 2017 16:30:43 +0100 Subject: [PATCH 033/377] Improvement to sequential conserved current insertion tests --- .../Modules/MUtilities/TestSeqConserved.hpp | 45 +++-- tests/hadrons/Test_hadrons.hpp | 31 ++-- .../hadrons/Test_hadrons_conserved_current.cc | 156 +++++++++++------- 3 files changed, 131 insertions(+), 101 deletions(-) diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp index 0730b8ed..3ae1b8b0 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp @@ -41,7 +41,6 @@ BEGIN_HADRONS_NAMESPACE * options: - q: point source propagator, 5D if available (string) - - q4d: 4D point source propagator, duplicate of q if q is 4D (string) - qSeq: result of sequential insertion of conserved current using q (string) - action: action used for computation of q (string) - origin: string giving point source origin of q (string) @@ -60,7 +59,6 @@ class TestSeqConservedPar: Serializable public: GRID_SERIALIZABLE_CLASS_MEMBERS(TestSeqConservedPar, std::string, q, - std::string, q4d, std::string, qSeq, std::string, action, std::string, origin, @@ -103,8 +101,7 @@ TTestSeqConserved::TTestSeqConserved(const std::string name) template std::vector TTestSeqConserved::getInput(void) { - std::vector in = {par().q, par().q4d, - par().qSeq, par().action}; + std::vector in = {par().q, par().qSeq, par().action}; return in; } @@ -121,7 +118,11 @@ std::vector TTestSeqConserved::getOutput(void) template void TTestSeqConserved::setup(void) { - + auto Ls = env().getObjectLs(par().q); + if (Ls != env().getObjectLs(par().action)) + { + HADRON_ERROR("Ls mismatch between quark action and propagator"); + } } // execution /////////////////////////////////////////////////////////////////// @@ -130,33 +131,43 @@ void TTestSeqConserved::execute(void) { PropagatorField tmp(env().getGrid()); PropagatorField &q = *env().template getObject(par().q); - PropagatorField &q4d = *env().template getObject(par().q4d); PropagatorField &qSeq = *env().template getObject(par().qSeq); FMat &act = *(env().template getObject(par().action)); Gamma g5(Gamma::Algebra::Gamma5); + Gamma::Algebra gA = (par().curr == Current::Axial) ? + Gamma::Algebra::Gamma5 : + Gamma::Algebra::Identity; + Gamma g(gA); SitePropagator qSite; - LatticeComplex c(env().getGrid()); - Complex seq_res, check_res; - std::vector check_buf; + Complex test_S, test_V, check_S, check_V; + std::vector check_buf; // Check sequential insertion of current gives same result as conserved // current sink upon contraction. Assume q uses a point source. std::vector siteCoord; siteCoord = strToVec(par().origin); - peekSite(qSite, q, siteCoord); - seq_res = trace(g5*qSite); + peekSite(qSite, qSeq, siteCoord); + test_S = trace(qSite*g); + test_V = trace(qSite*g*Gamma::gmu[par().mu]); act.ContractConservedCurrent(q, q, tmp, par().curr, par().mu); - c = trace(tmp); - sliceSum(c, check_buf, Tp); - check_res = TensorRemove(check_buf[par().t_J]); + sliceSum(tmp, check_buf, Tp); + check_S = TensorRemove(trace(check_buf[par().t_J]*g)); + check_V = TensorRemove(trace(check_buf[par().t_J]*g*Gamma::gmu[par().mu])); + + LOG(Message) << "Test S = " << abs(test_S) << std::endl; + LOG(Message) << "Test V = " << abs(test_V) << std::endl; + LOG(Message) << "Check S = " << abs(check_S) << std::endl; + LOG(Message) << "Check V = " << abs(check_V) << std::endl; // Check difference = 0 - check_res -= seq_res; + check_S -= test_S; + check_V -= test_V; LOG(Message) << "Consistency check for sequential conserved " - << par().curr << " current insertion = " << abs(check_res) - << std::endl; + << par().curr << " current insertion: " << std::endl; + LOG(Message) << "Check S = " << abs(check_S) << std::endl; + LOG(Message) << "Check V = " << abs(check_V) << std::endl; } END_MODULE_NAMESPACE diff --git a/tests/hadrons/Test_hadrons.hpp b/tests/hadrons/Test_hadrons.hpp index 61e90bac..1b038388 100644 --- a/tests/hadrons/Test_hadrons.hpp +++ b/tests/hadrons/Test_hadrons.hpp @@ -538,39 +538,30 @@ inline void makeWITest(Application &application, std::string &modName, } /******************************************************************************* - * Name: makeSeqTest - * Purpose: Create module to test sequential insertion of conserved current - * and add to application module. + * Name: makeSeqCurrComparison + * Purpose: Create module to compare sequential insertion of conserved current + * against sink contraction and add to application module. * Parameters: application - main application that stores modules. * modName - name of module to create. - * propName - 4D quark propagator. - * seqProp - 4D quark propagator with sequential insertion of + * propName - quark propagator (point source), 5D if available. + * seqName - 4D quark propagator with sequential insertion of * conserved current. * actionName - action used to compute quark propagators. + * origin - origin of point source propagator. * t_J - time at which sequential current is inserted. * mu - Lorentz index of sequential current. * curr - type of conserved current inserted. - * Ls - length of 5th dimension (default = 1). * Returns: None. ******************************************************************************/ -inline void makeSeqTest(Application &application, std::string &modName, - std::string &propName, std::string &seqName, - std::string &actionName, std::string &origin, - unsigned int t_J, unsigned int mu, Current curr, - unsigned int Ls = 1) +inline void makeSeqCurrComparison(Application &application, std::string &modName, + std::string &propName, std::string &seqName, + std::string &actionName, std::string &origin, + unsigned int t_J, unsigned int mu, Current curr) { if (!(Environment::getInstance().hasModule(modName))) { MUtilities::TestSeqConserved::Par seqPar; - if (Ls > 1) - { - seqPar.q = LABEL_5D(propName); - } - else - { - seqPar.q = propName; - } - seqPar.q4d = propName; + seqPar.q = propName; seqPar.qSeq = seqName; seqPar.action = actionName; seqPar.origin = origin; diff --git a/tests/hadrons/Test_hadrons_conserved_current.cc b/tests/hadrons/Test_hadrons_conserved_current.cc index a11a3530..080fef73 100644 --- a/tests/hadrons/Test_hadrons_conserved_current.cc +++ b/tests/hadrons/Test_hadrons_conserved_current.cc @@ -30,84 +30,112 @@ using namespace Grid; using namespace Hadrons; +inline void setupSeqCurrTests(Application &application, std::string modStem, + std::string &pointProp, std::string &seqStem, + std::string &actionName, std::string &solverName, + std::string &origin, Current curr, + unsigned int t_J, unsigned int mu, + unsigned int Ls = 1) +{ + std::string modName = ADD_INDEX(modStem, mu); + std::string seqProp = ADD_INDEX(seqStem, mu); + std::string seqSrc = seqProp + "_src"; + + // 5D actions require 5D propagator as input for conserved current + // insertions. + std::string propIn; + if (Ls > 1) + { + propIn = LABEL_5D(pointProp); + } + else + { + propIn = pointProp; + } + + makeConservedSequentialSource(application, seqSrc, propIn, + actionName, t_J, curr, mu); + makePropagator(application, seqProp, seqSrc, solverName); + makeSeqCurrComparison(application, modName, propIn, seqProp, + actionName, origin, t_J, mu, curr); +} + +inline void setupWardIdentityTests(Application &application, + std::string &actionName, + double mass, + unsigned int Ls = 1, + bool perform_axial_tests = false) +{ + // solver + std::string solverName = actionName + "_CG"; + makeRBPrecCGSolver(application, solverName, actionName); + + unsigned int nt = GridDefaultLatt()[Tp]; + unsigned int t_J = nt/2; + + /*************************************************************************** + * Conserved current sink contractions: use a single point propagator for + * the Ward Identity test. + **************************************************************************/ + std::string pointProp = actionName + "_q_0"; + std::string origin = "0 0 0 0"; + std::string modName = actionName + " Ward Identity Test"; + MAKE_POINT_PROP(origin, pointProp, solverName); + makeWITest(application, modName, pointProp, actionName, mass, Ls); + + /*************************************************************************** + * Conserved current tests with sequential insertion of vector/axial + * current. If above Ward Identity passes, sufficient to test sequential + * insertion of conserved current agrees with contracted version. + **************************************************************************/ + // Compare sequential insertion to contraction. Should be enough to perform + // for time and one space component. + std::string seqStem = ADD_INDEX(pointProp + "seq_V", t_J); + std::string modStem = actionName + " Vector Sequential Test mu"; + setupSeqCurrTests(application, modStem, pointProp, seqStem, actionName, + solverName, origin, Current::Vector, t_J, Tp, Ls); + setupSeqCurrTests(application, modStem, pointProp, seqStem, actionName, + solverName, origin, Current::Vector, t_J, Xp, Ls); + + // Perform axial tests only if partially-conserved axial current exists for + // the action. + if (perform_axial_tests) + { + seqStem = ADD_INDEX(pointProp + "seq_A", t_J); + modStem = actionName + " Axial Sequential Test mu"; + setupSeqCurrTests(application, modStem, pointProp, seqStem, actionName, + solverName, origin, Current::Axial, t_J, Tp, Ls); + setupSeqCurrTests(application, modStem, pointProp, seqStem, actionName, + solverName, origin, Current::Axial, t_J, Xp, Ls); + } +} + int main(int argc, char *argv[]) { // initialization ////////////////////////////////////////////////////////// - Grid_init(&argc, &argv); - HadronsLogError.Active(GridLogError.isActive()); - HadronsLogWarning.Active(GridLogWarning.isActive()); - HadronsLogMessage.Active(GridLogMessage.isActive()); - HadronsLogIterative.Active(GridLogIterative.isActive()); - HadronsLogDebug.Active(GridLogDebug.isActive()); - LOG(Message) << "Grid initialized" << std::endl; - + HADRONS_DEFAULT_INIT; + // run setup /////////////////////////////////////////////////////////////// Application application; - unsigned int nt = GridDefaultLatt()[Tp]; double mass = 0.04; + double M5 = 1.8; unsigned int Ls = 12; // global parameters - Application::GlobalPar globalPar; - globalPar.trajCounter.start = 1500; - globalPar.trajCounter.end = 1520; - globalPar.trajCounter.step = 20; - globalPar.seed = "1 2 3 4"; - globalPar.genetic.maxGen = 1000; - globalPar.genetic.maxCstGen = 200; - globalPar.genetic.popSize = 20; - globalPar.genetic.mutationRate = .1; - application.setPar(globalPar); + HADRONS_DEFAULT_GLOBALS(application); // gauge field - application.createModule("gauge"); + std::string gaugeField = "gauge"; + application.createModule(gaugeField); - // action + // Setup each action and the conserved current tests relevant to it. std::string actionName = "DWF"; - MAction::DWF::Par actionPar; - actionPar.gauge = "gauge"; - actionPar.Ls = Ls; - actionPar.M5 = 1.8; - actionPar.mass = mass; - application.createModule(actionName, actionPar); + makeDWFAction(application, actionName, gaugeField, mass, M5, Ls); + setupWardIdentityTests(application, actionName, mass, Ls, true); - // solver - std::string solverName = "CG"; - MSolver::RBPrecCG::Par solverPar; - solverPar.action = actionName; - solverPar.residual = 1.0e-8; - application.createModule(solverName, - solverPar); - - // Conserved current sink contractions: use a single point propagator. - std::string pointProp = "q_0"; - std::string pos = "0 0 0 0"; - std::string modName = "Ward Identity Test"; - MAKE_POINT_PROP(pos, pointProp, solverName); - makeWITest(application, modName, pointProp, actionName, mass, Ls); - - // Conserved current contractions with sequential insertion of vector/axial - // current. - std::string mom = ZERO_MOM; - unsigned int t_J = nt/2; - std::string seqPropA = ADD_INDEX(pointProp + "_seq_A", t_J); - std::string seqPropV = ADD_INDEX(pointProp + "_seq_V", t_J); - std::string seqSrcA = seqPropA + "_src"; - std::string seqSrcV = seqPropV + "_src"; - std::string point5d = LABEL_5D(pointProp); - makeConservedSequentialSource(application, seqSrcA, point5d, - actionName, t_J, Current::Axial, Tp, mom); - makePropagator(application, seqPropA, seqSrcA, solverName); - makeConservedSequentialSource(application, seqSrcV, point5d, - actionName, t_J, Current::Vector, Tp, mom); - makePropagator(application, seqPropV, seqSrcV, solverName); - - std::string modNameA = "Axial Sequential Test"; - std::string modNameV = "Vector Sequential Test"; - makeSeqTest(application, modNameA, pointProp, seqPropA, - actionName, pos, t_J, Tp, Current::Axial, Ls); - makeSeqTest(application, modNameV, pointProp, seqPropV, - actionName, pos, t_J, Tp, Current::Vector, Ls); + actionName = "Wilson"; + makeWilsonAction(application, actionName, gaugeField, mass); + setupWardIdentityTests(application, actionName, mass); // execution application.saveParameterFile("ConservedCurrentTest.xml"); From 08b314fd0fadd012492710bf57fd17b37ea9cf54 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Thu, 18 May 2017 13:16:14 +0100 Subject: [PATCH 034/377] Hadrons: conserved current test fixes. Axial current tests now also optional. --- .../Modules/MContraction/WardIdentity.hpp | 106 ++++++++++-------- tests/hadrons/Test_hadrons.hpp | 13 ++- .../hadrons/Test_hadrons_conserved_current.cc | 3 +- 3 files changed, 69 insertions(+), 53 deletions(-) diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index d312bd4d..fa51ce95 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -40,10 +40,10 @@ BEGIN_HADRONS_NAMESPACE ----------------------------- * options: - - q: propagator, 5D if available (string) - - q4d: 4D propagator, duplicate of q if q is not 5D (string) - - action: action module used for propagator solution (string) - - mass: mass of quark (double) + - q: propagator, 5D if available (string) + - action: action module used for propagator solution (string) + - mass: mass of quark (double) + - test_axial: whether or not to test PCAC relation. */ /****************************************************************************** @@ -56,9 +56,9 @@ class WardIdentityPar: Serializable public: GRID_SERIALIZABLE_CLASS_MEMBERS(WardIdentityPar, std::string, q, - std::string, q4d, std::string, action, - double, mass); + double, mass, + bool, test_axial); }; template @@ -97,7 +97,7 @@ TWardIdentity::TWardIdentity(const std::string name) template std::vector TWardIdentity::getInput(void) { - std::vector in = {par().q, par().q4d, par().action}; + std::vector in = {par().q, par().action}; return in; } @@ -128,55 +128,69 @@ void TWardIdentity::execute(void) LOG(Message) << "Performing Ward Identity checks for quark '" << par().q << "'." << std::endl; - PropagatorField psi(env().getGrid()), tmp(env().getGrid()); + PropagatorField psi(env().getGrid()), tmp(env().getGrid()), + vector_WI(env().getGrid()); PropagatorField &q = *env().template getObject(par().q); - PropagatorField &q4d = *env().template getObject(par().q4d); FMat &act = *(env().template getObject(par().action)); Gamma g5(Gamma::Algebra::Gamma5); - LatticeComplex PP(env().getGrid()), PA(env().getGrid()), - c(env().getGrid()), PJ5q(env().getGrid()), - vector_WI(env().getGrid()), defect(env().getGrid()); - c = zero; PJ5q = zero; vector_WI = zero; defect = zero; - std::vector Vmu(Nd, c); - std::vector Amu(Nd, c); - - // Get PP, PA, V_mu, A_mu for 4D. - PP = trace(adj(q4d)*q4d); - PA = trace(adj(q4d)*g5*q4d); + + // Compute D_mu V_mu, D here is backward derivative. + vector_WI = zero; for (unsigned int mu = 0; mu < Nd; ++mu) { act.ContractConservedCurrent(q, q, tmp, Current::Vector, mu); - Vmu[mu] = trace(tmp); - act.ContractConservedCurrent(q, q, tmp, Current::Axial, mu); - Amu[mu] = trace(g5*tmp); + tmp -= Cshift(tmp, mu, -1); + vector_WI += tmp; } - // Get PJ5q for 5D (zero for 4D). - if (Ls_ > 1) - { - ExtractSlice(psi, q, Ls_/2 - 1, 0); - psi = 0.5 * (psi + g5*psi); - ExtractSlice(tmp, q, Ls_/2, 0); - psi += 0.5 * (tmp - g5*tmp); - PJ5q = trace(adj(psi)*psi); - } - - // Test ward identities, D_mu V_mu = 0; D_mu A_mu = 2m + 2 PJ5q - for (unsigned int mu = 0; mu < Nd; ++mu) - { - vector_WI += Vmu[mu] - Cshift(Vmu[mu], mu, -1); - defect += Amu[mu] - Cshift(Amu[mu], mu, -1); - } - defect -= 2.*PJ5q; - defect -= 2.*(par().mass)*PP; - LOG(Message) << "Vector Ward Identity check Delta_mu V_mu = " << norm2(vector_WI) << std::endl; - LOG(Message) << "Axial Ward Identity defect Delta_mu A_mu = " - << norm2(defect) << std::endl; - LOG(Message) << "norm2(PP) = " << norm2(PP) << std::endl; - LOG(Message) << "norm2(PA) = " << norm2(PA) << std::endl; - LOG(Message) << "norm2(PJ5q) = " << norm2(PJ5q) << std::endl; + + if (par().test_axial) + { + LatticeComplex PP(env().getGrid()), axial_defect(env().getGrid()), + PJ5q(env().getGrid()); + + // Compute D_mu A_mu, D is backwards derivative. + axial_defect = zero; + for (unsigned int mu = 0; mu < Nd; ++mu) + { + act.ContractConservedCurrent(q, q, tmp, Current::Axial, mu); + tmp -= Cshift(tmp, mu, -1); + axial_defect += trace(g5*tmp); + } + + // Get PJ5q for 5D (zero for 4D) and PP. + PJ5q = zero; + if (Ls_ > 1) + { + // PP + ExtractSlice(tmp, q, 0, 0); + psi = (tmp - g5*tmp); + ExtractSlice(tmp, q, Ls_ - 1, 0); + psi += (tmp + g5*tmp); + PP = trace(adj(psi)*psi); + + // P5Jq + ExtractSlice(tmp, q, Ls_/2 - 1, 0); + psi = 0.5 * (tmp + g5*tmp); + ExtractSlice(tmp, q, Ls_/2, 0); + psi += 0.5 * (tmp - g5*tmp); + PJ5q = trace(adj(psi)*psi); + } + else + { + PP = trace(adj(q)*q); + } + + // Test ward identities, D_mu V_mu = 0; D_mu A_mu = 2m + 2 PJ5q + axial_defect -= 2.*PJ5q; + axial_defect -= 2.*(par().mass)*PP; + LOG(Message) << "Axial Ward Identity defect Delta_mu A_mu = " + << norm2(axial_defect) << std::endl; + LOG(Message) << "norm2(PP) = " << norm2(PP) << std::endl; + LOG(Message) << "norm2(PJ5q) = " << norm2(PJ5q) << std::endl; + } } END_MODULE_NAMESPACE diff --git a/tests/hadrons/Test_hadrons.hpp b/tests/hadrons/Test_hadrons.hpp index 1b038388..6dbe3425 100644 --- a/tests/hadrons/Test_hadrons.hpp +++ b/tests/hadrons/Test_hadrons.hpp @@ -513,26 +513,27 @@ inline void discLoopContraction(Application &application, * actionName - action used to compute quark propagator. * mass - mass of quark. * Ls - length of 5th dimension (default = 1). + * test_axial - whether or not to check PCAC relation. * Returns: None. ******************************************************************************/ inline void makeWITest(Application &application, std::string &modName, std::string &propName, std::string &actionName, - double mass, unsigned int Ls = 1) + double mass, unsigned int Ls = 1, bool test_axial = false) { if (!(Environment::getInstance().hasModule(modName))) { MContraction::WardIdentity::Par wiPar; if (Ls > 1) { - wiPar.q = LABEL_5D(propName); + wiPar.q = LABEL_5D(propName); } else { - wiPar.q = propName; + wiPar.q = propName; } - wiPar.q4d = propName; - wiPar.action = actionName; - wiPar.mass = mass; + wiPar.action = actionName; + wiPar.mass = mass; + wiPar.test_axial = test_axial; application.createModule(modName, wiPar); } } diff --git a/tests/hadrons/Test_hadrons_conserved_current.cc b/tests/hadrons/Test_hadrons_conserved_current.cc index 080fef73..37ef30d9 100644 --- a/tests/hadrons/Test_hadrons_conserved_current.cc +++ b/tests/hadrons/Test_hadrons_conserved_current.cc @@ -81,7 +81,8 @@ inline void setupWardIdentityTests(Application &application, std::string origin = "0 0 0 0"; std::string modName = actionName + " Ward Identity Test"; MAKE_POINT_PROP(origin, pointProp, solverName); - makeWITest(application, modName, pointProp, actionName, mass, Ls); + makeWITest(application, modName, pointProp, actionName, mass, Ls, + perform_axial_tests); /*************************************************************************** * Conserved current tests with sequential insertion of vector/axial From eec79e0a1e8cdaf58c55547d520e2b9e9a894898 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Mon, 5 Jun 2017 11:55:41 +0100 Subject: [PATCH 035/377] Ward Identity test improvements and conserved current bug fixes --- .../Modules/MContraction/WardIdentity.hpp | 35 ++++++++++++------- lib/qcd/action/fermion/WilsonFermion5D.cc | 10 ++++-- lib/qcd/action/fermion/WilsonKernels.cc | 21 +++++------ 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index fa51ce95..7fc7d15d 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -143,6 +143,7 @@ void TWardIdentity::execute(void) vector_WI += tmp; } + // Test ward identity D_mu V_mu = 0; LOG(Message) << "Vector Ward Identity check Delta_mu V_mu = " << norm2(vector_WI) << std::endl; @@ -150,28 +151,29 @@ void TWardIdentity::execute(void) { LatticeComplex PP(env().getGrid()), axial_defect(env().getGrid()), PJ5q(env().getGrid()); + std::vector axial_buf; - // Compute D_mu A_mu, D is backwards derivative. + // Compute , D is backwards derivative. axial_defect = zero; for (unsigned int mu = 0; mu < Nd; ++mu) { act.ContractConservedCurrent(q, q, tmp, Current::Axial, mu); tmp -= Cshift(tmp, mu, -1); - axial_defect += trace(g5*tmp); + axial_defect += 2.*trace(g5*tmp); } - // Get PJ5q for 5D (zero for 4D) and PP. + // Get for 5D (zero for 4D) and . PJ5q = zero; if (Ls_ > 1) { - // PP + // ExtractSlice(tmp, q, 0, 0); - psi = (tmp - g5*tmp); + psi = 0.5 * (tmp - g5*tmp); ExtractSlice(tmp, q, Ls_ - 1, 0); - psi += (tmp + g5*tmp); + psi += 0.5 * (tmp + g5*tmp); PP = trace(adj(psi)*psi); - // P5Jq + // ExtractSlice(tmp, q, Ls_/2 - 1, 0); psi = 0.5 * (tmp + g5*tmp); ExtractSlice(tmp, q, Ls_/2, 0); @@ -183,13 +185,22 @@ void TWardIdentity::execute(void) PP = trace(adj(q)*q); } - // Test ward identities, D_mu V_mu = 0; D_mu A_mu = 2m + 2 PJ5q - axial_defect -= 2.*PJ5q; - axial_defect -= 2.*(par().mass)*PP; + // Test ward identity = 2m + 2 + LOG(Message) << "|D_mu A_mu|^2 = " << norm2(axial_defect) << std::endl; + LOG(Message) << "|PP|^2 = " << norm2(PP) << std::endl; + LOG(Message) << "|PJ5q|^2 = " << norm2(PJ5q) << std::endl; LOG(Message) << "Axial Ward Identity defect Delta_mu A_mu = " << norm2(axial_defect) << std::endl; - LOG(Message) << "norm2(PP) = " << norm2(PP) << std::endl; - LOG(Message) << "norm2(PJ5q) = " << norm2(PJ5q) << std::endl; + + // Axial defect by timeslice. + axial_defect -= 2.*(par().mass*PP + PJ5q); + LOG(Message) << "Check Axial defect by timeslice" << std::endl; + sliceSum(axial_defect, axial_buf, Tp); + for (int t = 0; t < axial_buf.size(); ++t) + { + LOG(Message) << "t = " << t << ": " + << TensorRemove(axial_buf[t]) << std::endl; + } } } diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index f616a080..3bbc03b4 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -700,8 +700,14 @@ void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, Kernels::ContractConservedCurrentInternal(q1_s, q2_s, tmp, Umu, curr_type, mu); // Axial current sign - Real G_s = (curr_type == Current::Axial) ? ((s < Ls/2) ? -1. : 1.) : 1.; - q_out += G_s*tmp; + if ((curr_type == Current::Axial) && (s < (Ls / 2))) + { + q_out -= tmp; + } + else + { + q_out += tmp; + } } } diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 8dc6bd8c..802c0940 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -286,8 +286,9 @@ void WilsonKernels::DhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHal * to make a conserved current sink or inserting the conserved current * sequentially. Common to both 4D and 5D. ******************************************************************************/ -#define WilsonCurrentFwd(expr, mu) (0.5*(Gamma::gmu[mu]*expr - expr)) -#define WilsonCurrentBwd(expr, mu) (0.5*(Gamma::gmu[mu]*expr + expr)) +// N.B. Functions below assume a -1/2 factor within U. +#define WilsonCurrentFwd(expr, mu) ((expr - Gamma::gmu[mu]*expr)) +#define WilsonCurrentBwd(expr, mu) ((expr + Gamma::gmu[mu]*expr)) template void WilsonKernels::ContractConservedCurrentInternal(const PropagatorField &q_in_1, @@ -300,13 +301,13 @@ void WilsonKernels::ContractConservedCurrentInternal(const PropagatorField Gamma g5(Gamma::Algebra::Gamma5); PropagatorField tmp(q_out._grid); GaugeLinkField Umu(U._grid); - Umu = PeekIndex(U, mu); + Umu = PeekIndex(U, mu); tmp = this->CovShiftForward(Umu, mu, q_in_1); q_out = (g5*adj(q_in_2)*g5)*WilsonCurrentFwd(tmp, mu); - tmp = adj(Umu)*q_in_1; - q_out += (g5*adj(this->CovShiftForward(Umu, mu, q_in_2))*g5)*WilsonCurrentBwd(q_in_1, mu); + tmp = this->CovShiftForward(Umu, mu, q_in_2); + q_out -= (g5*adj(tmp)*g5)*WilsonCurrentBwd(q_in_1, mu); } @@ -320,21 +321,21 @@ void WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_i unsigned int tmin, unsigned int tmax) { - int tshift = (mu == Nd - 1) ? 1 : 0; + int tshift = (mu == Tp) ? 1 : 0; Real G_T = (curr_type == Current::Tadpole) ? -1. : 1.; PropagatorField tmp(q_in._grid); GaugeLinkField Umu(U._grid); - Umu = PeekIndex(U, mu); + Umu = PeekIndex(U, mu); Lattice> t(q_in._grid); tmp = this->CovShiftForward(Umu, mu, q_in)*ph; - where((t >= tmin) and (t <= tmax), tmp, 0.*tmp); + tmp = where((t >= tmin) and (t <= tmax), tmp, 0.*tmp); q_out = G_T*WilsonCurrentFwd(tmp, mu); tmp = q_in*ph; tmp = this->CovShiftBackward(Umu, mu, tmp); - where((t >= tmin + tshift) and (t <= tmax + tshift), tmp, 0.*tmp); - q_out += WilsonCurrentBwd(tmp, mu); + tmp = where((t >= tmin + tshift) and (t <= tmax + tshift), tmp, 0.*tmp); + q_out -= WilsonCurrentBwd(tmp, mu); } From 622a21bec673ccf3a3b895584678afacd1f59c4b Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Mon, 5 Jun 2017 15:55:32 +0100 Subject: [PATCH 036/377] Improvements to sequential conserved current test and small bugfix. --- .../Modules/MUtilities/TestSeqConserved.hpp | 18 ++++++++++++------ lib/qcd/action/fermion/WilsonKernels.cc | 1 + 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp index 3ae1b8b0..eccb00cc 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp @@ -140,7 +140,8 @@ void TTestSeqConserved::execute(void) Gamma g(gA); SitePropagator qSite; Complex test_S, test_V, check_S, check_V; - std::vector check_buf; + std::vector check_buf; + LatticeComplex c(env().getGrid()); // Check sequential insertion of current gives same result as conserved // current sink upon contraction. Assume q uses a point source. @@ -151,9 +152,14 @@ void TTestSeqConserved::execute(void) test_V = trace(qSite*g*Gamma::gmu[par().mu]); act.ContractConservedCurrent(q, q, tmp, par().curr, par().mu); - sliceSum(tmp, check_buf, Tp); - check_S = TensorRemove(trace(check_buf[par().t_J]*g)); - check_V = TensorRemove(trace(check_buf[par().t_J]*g*Gamma::gmu[par().mu])); + + c = trace(tmp*g); + sliceSum(c, check_buf, Tp); + check_S = TensorRemove(check_buf[par().t_J]); + + c = trace(tmp*g*Gamma::gmu[par().mu]); + sliceSum(c, check_buf, Tp); + check_V = TensorRemove(check_buf[par().t_J]); LOG(Message) << "Test S = " << abs(test_S) << std::endl; LOG(Message) << "Test V = " << abs(test_V) << std::endl; @@ -166,8 +172,8 @@ void TTestSeqConserved::execute(void) LOG(Message) << "Consistency check for sequential conserved " << par().curr << " current insertion: " << std::endl; - LOG(Message) << "Check S = " << abs(check_S) << std::endl; - LOG(Message) << "Check V = " << abs(check_V) << std::endl; + LOG(Message) << "Diff S = " << abs(check_S) << std::endl; + LOG(Message) << "Diff V = " << abs(check_V) << std::endl; } END_MODULE_NAMESPACE diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 802c0940..8d5406f4 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -327,6 +327,7 @@ void WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_i GaugeLinkField Umu(U._grid); Umu = PeekIndex(U, mu); Lattice> t(q_in._grid); + LatticeCoordinate(t, mu); tmp = this->CovShiftForward(Umu, mu, q_in)*ph; tmp = where((t >= tmin) and (t <= tmax), tmp, 0.*tmp); From c504b4dbad611b8f36599fb5d6202a85b465134d Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Mon, 5 Jun 2017 15:56:43 +0100 Subject: [PATCH 037/377] Code cleaning --- extras/Hadrons/Modules/MContraction/WardIdentity.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index 7fc7d15d..fb2ea173 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -128,12 +128,11 @@ void TWardIdentity::execute(void) LOG(Message) << "Performing Ward Identity checks for quark '" << par().q << "'." << std::endl; - PropagatorField psi(env().getGrid()), tmp(env().getGrid()), - vector_WI(env().getGrid()); + PropagatorField tmp(env().getGrid()), vector_WI(env().getGrid()); PropagatorField &q = *env().template getObject(par().q); FMat &act = *(env().template getObject(par().action)); Gamma g5(Gamma::Algebra::Gamma5); - + // Compute D_mu V_mu, D here is backward derivative. vector_WI = zero; for (unsigned int mu = 0; mu < Nd; ++mu) @@ -149,6 +148,7 @@ void TWardIdentity::execute(void) if (par().test_axial) { + PropagatorField psi(env().getGrid()); LatticeComplex PP(env().getGrid()), axial_defect(env().getGrid()), PJ5q(env().getGrid()); std::vector axial_buf; @@ -159,7 +159,7 @@ void TWardIdentity::execute(void) { act.ContractConservedCurrent(q, q, tmp, Current::Axial, mu); tmp -= Cshift(tmp, mu, -1); - axial_defect += 2.*trace(g5*tmp); + axial_defect += trace(g5*tmp); } // Get for 5D (zero for 4D) and . @@ -191,7 +191,7 @@ void TWardIdentity::execute(void) LOG(Message) << "|PJ5q|^2 = " << norm2(PJ5q) << std::endl; LOG(Message) << "Axial Ward Identity defect Delta_mu A_mu = " << norm2(axial_defect) << std::endl; - + // Axial defect by timeslice. axial_defect -= 2.*(par().mass*PP + PJ5q); LOG(Message) << "Check Axial defect by timeslice" << std::endl; From e5c8b7369e2cb259379d987260cf21f2b96e404f Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Tue, 6 Jun 2017 14:19:10 +0100 Subject: [PATCH 038/377] Boundary condition option in quark actions for hadrons tests. --- tests/hadrons/Test_hadrons.hpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/hadrons/Test_hadrons.hpp b/tests/hadrons/Test_hadrons.hpp index 6dbe3425..a554425d 100644 --- a/tests/hadrons/Test_hadrons.hpp +++ b/tests/hadrons/Test_hadrons.hpp @@ -107,16 +107,20 @@ using namespace Hadrons; * actionName - name of action module to create. * gaugeField - gauge field module. * mass - quark mass. + * boundary - fermion boundary conditions (default to periodic + * space, antiperiodic time). * Returns: None. ******************************************************************************/ inline void makeWilsonAction(Application &application, std::string actionName, - std::string &gaugeField, double mass) + std::string &gaugeField, double mass, + std::string boundary = "1 1 1 -1") { if (!(Environment::getInstance().hasModule(actionName))) { MAction::Wilson::Par actionPar; actionPar.gauge = gaugeField; actionPar.mass = mass; + actionPar.boundary = boundary; application.createModule(actionName, actionPar); } } @@ -129,11 +133,13 @@ inline void makeWilsonAction(Application &application, std::string actionName, * mass - quark mass. * M5 - domain wall height. * Ls - fifth dimension extent. + * boundary - fermion boundary conditions (default to periodic + * space, antiperiodic time). * Returns: None. ******************************************************************************/ inline void makeDWFAction(Application &application, std::string actionName, std::string &gaugeField, double mass, double M5, - unsigned int Ls) + unsigned int Ls, std::string boundary = "1 1 1 -1") { if (!(Environment::getInstance().hasModule(actionName))) { @@ -142,6 +148,7 @@ inline void makeDWFAction(Application &application, std::string actionName, actionPar.Ls = Ls; actionPar.M5 = M5; actionPar.mass = mass; + actionPar.boundary = boundary; application.createModule(actionName, actionPar); } } From 8d442b502dc59f7fe4407b02142677299ac63740 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Tue, 6 Jun 2017 17:06:40 +0100 Subject: [PATCH 039/377] Sequential current fix for spacial indices. --- lib/qcd/action/fermion/WilsonKernels.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 8d5406f4..62ae93fa 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -327,7 +327,7 @@ void WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_i GaugeLinkField Umu(U._grid); Umu = PeekIndex(U, mu); Lattice> t(q_in._grid); - LatticeCoordinate(t, mu); + LatticeCoordinate(t, Tp); tmp = this->CovShiftForward(Umu, mu, q_in)*ph; tmp = where((t >= tmin) and (t <= tmax), tmp, 0.*tmp); From 60f11bfd72f2c74cfdb0b91eaa9f44d80dd9946c Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Wed, 7 Jun 2017 12:34:47 +0100 Subject: [PATCH 040/377] Removed redundant test module --- extras/Hadrons/Modules.hpp | 1 - .../Modules/MContraction/WardIdentitySeq.hpp | 145 ------------------ extras/Hadrons/modules.inc | 1 - 3 files changed, 147 deletions(-) delete mode 100644 extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index dd6a6010..53ec346c 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -33,7 +33,6 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include -#include #include #include #include diff --git a/extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp b/extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp deleted file mode 100644 index 31409925..00000000 --- a/extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp +++ /dev/null @@ -1,145 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: extras/Hadrons/Modules/MContraction/WardIdentitySeq.hpp - -Copyright (C) 2017 - -Author: Andrew Lawson - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#ifndef Hadrons_WardIdentitySeq_hpp_ -#define Hadrons_WardIdentitySeq_hpp_ - -#include -#include -#include - -BEGIN_HADRONS_NAMESPACE - -/* - Ward Identity contractions using sequential propagators. - ----------------------------- - - * options: - - q_x: propagator, mu = x current insertion (string). - - q_y: propagator, mu = y current insertion (string). - - q_z: propagator, mu = z current insertion (string). - - q_t: propagator, mu = t current insertion (string). -*/ - -/****************************************************************************** - * WardIdentitySeq * - ******************************************************************************/ -BEGIN_MODULE_NAMESPACE(MContraction) - -class WardIdentitySeqPar: Serializable -{ -public: - GRID_SERIALIZABLE_CLASS_MEMBERS(WardIdentitySeqPar, - std::string, q_x, - std::string, q_y, - std::string, q_z, - std::string, q_t); -}; - -template -class TWardIdentitySeq: public Module -{ -public: - TYPE_ALIASES(FImpl,); -public: - // constructor - TWardIdentitySeq(const std::string name); - // destructor - virtual ~TWardIdentitySeq(void) = default; - // dependency relation - virtual std::vector getInput(void); - virtual std::vector getOutput(void); - // setup - virtual void setup(void); - // execution - virtual void execute(void); -}; - -MODULE_REGISTER_NS(WardIdentitySeq, TWardIdentitySeq, MContraction); - -/****************************************************************************** - * TWardIdentitySeq implementation * - ******************************************************************************/ -// constructor ///////////////////////////////////////////////////////////////// -template -TWardIdentitySeq::TWardIdentitySeq(const std::string name) -: Module(name) -{} - -// dependencies/products /////////////////////////////////////////////////////// -template -std::vector TWardIdentitySeq::getInput(void) -{ - std::vector in = {par().q_x, par().q_y, par().q_z, par().q_t}; - - return in; -} - -template -std::vector TWardIdentitySeq::getOutput(void) -{ - std::vector out = {getName()}; - - return out; -} - -// setup /////////////////////////////////////////////////////////////////////// -template -void TWardIdentitySeq::setup(void) -{ - -} - -// execution /////////////////////////////////////////////////////////////////// -template -void TWardIdentitySeq::execute(void) -{ - LatticeComplex vector_WI(env().getGrid()), c(env().getGrid()); - PropagatorField q_x = *env().template getObject(par().q_x); - PropagatorField q_y = *env().template getObject(par().q_y); - PropagatorField q_z = *env().template getObject(par().q_z); - PropagatorField q_t = *env().template getObject(par().q_t); - PropagatorField *q[Nd] = {&q_x, &q_y, &q_z, &q_t}; - Gamma g5(Gamma::Algebra::Gamma5); - - // Check D_mu V_mu = 0 - for (unsigned int mu = 0; mu < Nd; ++mu) - { - c = trace(g5*(*q[mu])); - vector_WI += c - Cshift(c, mu, -1); - } - - LOG(Message) << "Ward Identity checks for sequential vector current " - << "insertion = " << norm2(vector_WI) << std::endl; -} - -END_MODULE_NAMESPACE - -END_HADRONS_NAMESPACE - -#endif // Hadrons_WardIdentitySeq_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 0364502a..b57aa577 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -14,7 +14,6 @@ modules_hpp =\ Modules/MContraction/Gamma3pt.hpp \ Modules/MContraction/Meson.hpp \ Modules/MContraction/WardIdentity.hpp \ - Modules/MContraction/WardIdentitySeq.hpp \ Modules/MContraction/WeakHamiltonian.hpp \ Modules/MContraction/WeakHamiltonianEye.hpp \ Modules/MContraction/WeakHamiltonianNonEye.hpp \ From b8e45ae490729a9ed79983974e1eeec1778a1e8d Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Wed, 7 Jun 2017 16:26:22 +0100 Subject: [PATCH 041/377] Fixed remaining fermion type aliases after merge. --- extras/Hadrons/Modules/MContraction/WardIdentity.hpp | 2 +- extras/Hadrons/Modules/MSource/SeqConserved.hpp | 2 +- extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp | 2 +- extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index fb2ea173..82b0317a 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -65,7 +65,7 @@ template class TWardIdentity: public Module { public: - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); public: // constructor TWardIdentity(const std::string name); diff --git a/extras/Hadrons/Modules/MSource/SeqConserved.hpp b/extras/Hadrons/Modules/MSource/SeqConserved.hpp index 6e5fb197..67086f11 100644 --- a/extras/Hadrons/Modules/MSource/SeqConserved.hpp +++ b/extras/Hadrons/Modules/MSource/SeqConserved.hpp @@ -74,7 +74,7 @@ template class TSeqConserved: public Module { public: - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); public: // constructor TSeqConserved(const std::string name); diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp index eccb00cc..faebab0a 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp @@ -71,7 +71,7 @@ template class TTestSeqConserved: public Module { public: - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); public: // constructor TTestSeqConserved(const std::string name); diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp index b3e99617..1b057c29 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp @@ -27,7 +27,7 @@ template class TTestSeqGamma: public Module { public: - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); public: // constructor TTestSeqGamma(const std::string name); From 2d433ba30720f621b4d0a1bae91434aa4a42fe36 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Mon, 12 Jun 2017 10:32:14 +0100 Subject: [PATCH 042/377] Changed header include guards to match new convention --- extras/Hadrons/Modules/MContraction/WardIdentity.hpp | 4 ++-- extras/Hadrons/Modules/MSource/SeqConserved.hpp | 4 ++-- extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp | 4 ++-- extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index 82b0317a..8a56e0eb 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_WardIdentity_hpp_ -#define Hadrons_WardIdentity_hpp_ +#ifndef Hadrons_MContraction_WardIdentity_hpp_ +#define Hadrons_MContraction_WardIdentity_hpp_ #include #include diff --git a/extras/Hadrons/Modules/MSource/SeqConserved.hpp b/extras/Hadrons/Modules/MSource/SeqConserved.hpp index 67086f11..86a7dfb9 100644 --- a/extras/Hadrons/Modules/MSource/SeqConserved.hpp +++ b/extras/Hadrons/Modules/MSource/SeqConserved.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_SeqConserved_hpp_ -#define Hadrons_SeqConserved_hpp_ +#ifndef Hadrons_MSource_SeqConserved_hpp_ +#define Hadrons_MSource_SeqConserved_hpp_ #include #include diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp index faebab0a..b085eb8c 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_TestSeqConserved_hpp_ -#define Hadrons_TestSeqConserved_hpp_ +#ifndef Hadrons_MUtilities_TestSeqConserved_hpp_ +#define Hadrons_MUtilities_TestSeqConserved_hpp_ #include #include diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp index 1b057c29..3dbd7d63 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp @@ -1,5 +1,5 @@ -#ifndef Hadrons_TestSeqGamma_hpp_ -#define Hadrons_TestSeqGamma_hpp_ +#ifndef Hadrons_MUtilities_TestSeqGamma_hpp_ +#define Hadrons_MUtilities_TestSeqGamma_hpp_ #include #include From 5633a2db20e99cec2b5f11906632beb20eaadb31 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Mon, 12 Jun 2017 10:41:02 +0100 Subject: [PATCH 043/377] Faster implementation of conserved current site contraction. Added 5D vectorised support, but not G-parity. --- lib/qcd/action/fermion/FermionOperatorImpl.h | 29 ++++- lib/qcd/action/fermion/WilsonFermion.cc | 20 ++- lib/qcd/action/fermion/WilsonFermion5D.cc | 48 ++++--- lib/qcd/action/fermion/WilsonKernels.cc | 124 +++++++++++++++---- lib/qcd/action/fermion/WilsonKernels.h | 26 +++- 5 files changed, 198 insertions(+), 49 deletions(-) diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 20458b6d..f330fb0d 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -212,6 +212,13 @@ namespace QCD { StencilImpl &St) { mult(&phi(), &U(mu), &chi()); } + + inline void multLinkProp(SitePropagator &phi, + const SiteDoubledGaugeField &U, + const SitePropagator &chi, + int mu) { + mult(&phi(), &U(mu), &chi()); + } template inline void loadLinkElement(Simd ®, ref &memory) { @@ -340,7 +347,20 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres } mult(&phi(), &UU(), &chi()); } - + + inline void multLinkProp(SitePropagator &phi, + const SiteDoubledGaugeField &U, + const SitePropagator &chi, + int mu) { + SiteGaugeLink UU; + for (int i = 0; i < Nrepresentation; i++) { + for (int j = 0; j < Nrepresentation; j++) { + vsplat(UU()()(i, j), U(mu)()(i, j)); + } + } + mult(&phi(), &UU(), &chi()); + } + inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu) { SiteScalarGaugeField ScalarUmu; @@ -538,6 +558,13 @@ class GparityWilsonImpl : public ConjugateGaugeImpl::ContractConservedCurrent(PropagatorField &q_in_1, conformable(_grid, q_in_1._grid); conformable(_grid, q_in_2._grid); conformable(_grid, q_out._grid); - Kernels::ContractConservedCurrentInternal(q_in_1, q_in_2, q_out, - Umu, curr_type, mu); + PropagatorField tmp(_grid); + q_out = zero; + + // Forward, need q1(x + mu), q2(x) + tmp = Cshift(q_in_1, mu, 1); + parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) + { + Kernels::ContractConservedCurrentSiteFwd(tmp, q_in_2, q_out, Umu, + mu, sU, sU, sU, sU); + } + + // Backward, need q1(x), q2(x + mu) + tmp = Cshift(q_in_2, mu, 1); + parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) + { + Kernels::ContractConservedCurrentSiteBwd(q_in_1, tmp, q_out, Umu, + mu, sU, sU, sU, sU); + } } template diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 3bbc03b4..b69a18ba 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -687,26 +687,44 @@ void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, conformable(q_in_1._grid, q_in_2._grid); conformable(_FourDimGrid, q_out._grid); - PropagatorField q1_s(_FourDimGrid); - PropagatorField q2_s(_FourDimGrid); - PropagatorField tmp(_FourDimGrid); - - // Contract across 5th dimension. + PropagatorField tmp(FermionGrid()); q_out = zero; - for (int s = 0; s < Ls; ++s) - { - ExtractSlice(q1_s, q_in_1, s, 0); - ExtractSlice(q2_s, q_in_2, Ls - s - 1, 0); - Kernels::ContractConservedCurrentInternal(q1_s, q2_s, tmp, Umu, curr_type, mu); - // Axial current sign - if ((curr_type == Current::Axial) && (s < (Ls / 2))) + // Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). 5D lattice so shift + // 4D coordinate mu by one. + tmp = Cshift(q_in_1, mu + 1, 1); + parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) + { + unsigned int sF1 = sU * Ls; + unsigned int sF2 = (sU + 1) * Ls - 1; + for (int s = 0; s < Ls; ++s) { - q_out -= tmp; + bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \ + true : false; + Kernels::ContractConservedCurrentSiteFwd(tmp, q_in_2, q_out, Umu, + mu, sF1, sF2, sU, sU, + axial_sign); + sF1++; + sF2--; } - else + } + + // Backward, need q1(x, s), q2(x + mu, Ls - 1 - s). 5D lattice so shift + // 4D coordinate mu by one. + tmp = Cshift(q_in_2, mu + 1, 1); + parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) + { + unsigned int sF1 = sU * Ls; + unsigned int sF2 = (sU + 1) * Ls - 1; + for (int s = 0; s < Ls; ++s) { - q_out += tmp; + bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \ + true : false; + Kernels::ContractConservedCurrentSiteBwd(q_in_1, tmp, q_out, Umu, + mu, sF1, sF2, sU, sU, + axial_sign); + sF1++; + sF2--; } } } diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 62ae93fa..c519dc56 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -290,26 +290,110 @@ void WilsonKernels::DhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHal #define WilsonCurrentFwd(expr, mu) ((expr - Gamma::gmu[mu]*expr)) #define WilsonCurrentBwd(expr, mu) ((expr + Gamma::gmu[mu]*expr)) +/******************************************************************************* + * Name: ContractConservedCurrentSiteFwd + * Operation: (1/2) * q2[x] * U(x) * (g[mu] - 1) * q1[x + mu] + * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. + * - Pass in q_in_1 shifted in +ve mu direction. + ******************************************************************************/ template -void WilsonKernels::ContractConservedCurrentInternal(const PropagatorField &q_in_1, - const PropagatorField &q_in_2, - PropagatorField &q_out, - DoubledGaugeField &U, - Current curr_type, - unsigned int mu) +void WilsonKernels::ContractConservedCurrentSiteFwd( + const PropagatorField &q_in_1, + const PropagatorField &q_in_2, + PropagatorField &q_out, + DoubledGaugeField &U, + unsigned int mu, + unsigned int sF_in_1, + unsigned int sF_in_2, + unsigned int sF_out, + unsigned int sU, + bool switch_sign) { + SitePropagator result, tmp; Gamma g5(Gamma::Algebra::Gamma5); - PropagatorField tmp(q_out._grid); - GaugeLinkField Umu(U._grid); - Umu = PeekIndex(U, mu); - - tmp = this->CovShiftForward(Umu, mu, q_in_1); - q_out = (g5*adj(q_in_2)*g5)*WilsonCurrentFwd(tmp, mu); - - tmp = this->CovShiftForward(Umu, mu, q_in_2); - q_out -= (g5*adj(tmp)*g5)*WilsonCurrentBwd(q_in_1, mu); + multLinkProp(tmp, U._odata[sU], q_in_1._odata[sF_in_1], mu); + result = g5 * adj(q_in_2._odata[sF_in_2]) * g5 * WilsonCurrentFwd(tmp, mu); + if (switch_sign) + { + q_out._odata[sF_out] -= result; + } + else + { + q_out._odata[sF_out] += result; + } } +/******************************************************************************* + * Name: ContractConservedCurrentSiteBwd + * Operation: (1/2) * q2[x + mu] * U^dag(x) * (g[mu] + 1) * q1[x] + * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. + * - Pass in q_in_2 shifted in +ve mu direction. + ******************************************************************************/ +template +void WilsonKernels::ContractConservedCurrentSiteBwd( + const PropagatorField &q_in_1, + const PropagatorField &q_in_2, + PropagatorField &q_out, + DoubledGaugeField &U, + unsigned int mu, + unsigned int sF_in_1, + unsigned int sF_in_2, + unsigned int sF_out, + unsigned int sU, + bool switch_sign) +{ + SitePropagator result, tmp; + Gamma g5(Gamma::Algebra::Gamma5); + multLinkProp(tmp, U._odata[sU], q_in_1._odata[sF_in_1], mu + Nd); + result = g5 * adj(q_in_2._odata[sF_in_2]) * g5 * WilsonCurrentBwd(tmp, mu); + if (switch_sign) + { + q_out._odata[sF_out] += result; + } + else + { + q_out._odata[sF_out] -= result; + } +} + +// G-parity requires more specialised implementation. +#define NO_CURR_SITE(Impl) \ +template <> \ +void WilsonKernels::ContractConservedCurrentSiteFwd( \ + const PropagatorField &q_in_1, \ + const PropagatorField &q_in_2, \ + PropagatorField &q_out, \ + DoubledGaugeField &U, \ + unsigned int mu, \ + unsigned int sF_in_1, \ + unsigned int sF_in_2, \ + unsigned int sF_out, \ + unsigned int sU, \ + bool switch_sign) \ +{ \ + assert(0); \ +} \ +template <> \ +void WilsonKernels::ContractConservedCurrentSiteBwd( \ + const PropagatorField &q_in_1, \ + const PropagatorField &q_in_2, \ + PropagatorField &q_out, \ + DoubledGaugeField &U, \ + unsigned int mu, \ + unsigned int sF_in_1, \ + unsigned int sF_in_2, \ + unsigned int sF_out, \ + unsigned int sU, \ + bool switch_sign) \ +{ \ + assert(0); \ +} + +NO_CURR_SITE(GparityWilsonImplF); +NO_CURR_SITE(GparityWilsonImplD); +NO_CURR_SITE(GparityWilsonImplFH); +NO_CURR_SITE(GparityWilsonImplDF); + template void WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_in, @@ -342,16 +426,6 @@ void WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_i // GParity, (Z)DomainWallVec5D -> require special implementation #define NO_CURR(Impl) \ -template <> void \ -WilsonKernels::ContractConservedCurrentInternal(const PropagatorField &q_in_1, \ - const PropagatorField &q_in_2, \ - PropagatorField &q_out, \ - DoubledGaugeField &U, \ - Current curr_type, \ - unsigned int mu) \ -{ \ - assert(0); \ -} \ template <> void \ WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_in, \ PropagatorField &q_out, \ diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index 25c956ef..95155ccc 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -183,12 +183,26 @@ public: ////////////////////////////////////////////////////////////////////////////// // Utilities for inserting Wilson conserved current. ////////////////////////////////////////////////////////////////////////////// - void ContractConservedCurrentInternal(const PropagatorField &q_in_1, - const PropagatorField &q_in_2, - PropagatorField &q_out, - DoubledGaugeField &U, - Current curr_type, - unsigned int mu); + void ContractConservedCurrentSiteFwd(const PropagatorField &q_in_1, + const PropagatorField &q_in_2, + PropagatorField &q_out, + DoubledGaugeField &U, + unsigned int mu, + unsigned int sF_in_1, + unsigned int sF_in_2, + unsigned int sF_out, + unsigned int sU, + bool switch_sign = false); + void ContractConservedCurrentSiteBwd(const PropagatorField &q_in_1, + const PropagatorField &q_in_2, + PropagatorField &q_out, + DoubledGaugeField &U, + unsigned int mu, + unsigned int sF_in_1, + unsigned int sF_in_2, + unsigned int sF_out, + unsigned int sU, + bool switch_sign = false); void SeqConservedCurrentInternal(const PropagatorField &q_in, PropagatorField &q_out, DoubledGaugeField &U, From 41af8c12d70145320a1f2fd924464802f26cffff Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 16 Jun 2017 16:38:59 +0100 Subject: [PATCH 044/377] Code cleaning for conserved current contractions. Will now be easier to implement mobius conserved current. --- lib/qcd/action/fermion/WilsonFermion.cc | 26 ++++---- lib/qcd/action/fermion/WilsonFermion5D.cc | 40 ++++--------- lib/qcd/action/fermion/WilsonKernels.cc | 72 ++++++++++------------- lib/qcd/action/fermion/WilsonKernels.h | 22 +++---- 4 files changed, 64 insertions(+), 96 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index 28842cdd..eff7d958 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -361,23 +361,23 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, conformable(_grid, q_in_1._grid); conformable(_grid, q_in_2._grid); conformable(_grid, q_out._grid); - PropagatorField tmp(_grid); + PropagatorField tmp1(_grid), tmp2(_grid); q_out = zero; - // Forward, need q1(x + mu), q2(x) - tmp = Cshift(q_in_1, mu, 1); + // Forward, need q1(x + mu), q2(x). Backward, need q1(x), q2(x + mu). + // Inefficient comms method but not performance critical. + tmp1 = Cshift(q_in_1, mu, 1); + tmp2 = Cshift(q_in_2, mu, 1); parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) { - Kernels::ContractConservedCurrentSiteFwd(tmp, q_in_2, q_out, Umu, - mu, sU, sU, sU, sU); - } - - // Backward, need q1(x), q2(x + mu) - tmp = Cshift(q_in_2, mu, 1); - parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) - { - Kernels::ContractConservedCurrentSiteBwd(q_in_1, tmp, q_out, Umu, - mu, sU, sU, sU, sU); + Kernels::ContractConservedCurrentSiteFwd(tmp1._odata[sU], + q_in_2._odata[sU], + q_out._odata[sU], + Umu, sU, mu); + Kernels::ContractConservedCurrentSiteBwd(q_in_1._odata[sU], + tmp2._odata[sU], + q_out._odata[sU], + Umu, sU, mu); } } diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index b69a18ba..76218098 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -686,13 +686,13 @@ void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, conformable(q_in_1._grid, FermionGrid()); conformable(q_in_1._grid, q_in_2._grid); conformable(_FourDimGrid, q_out._grid); - - PropagatorField tmp(FermionGrid()); + PropagatorField tmp1(FermionGrid()), tmp2(FermionGrid()); q_out = zero; - // Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). 5D lattice so shift - // 4D coordinate mu by one. - tmp = Cshift(q_in_1, mu + 1, 1); + // Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s), + // q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one. + tmp1 = Cshift(q_in_1, mu + 1, 1); + tmp2 = Cshift(q_in_2, mu + 1, 1); parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) { unsigned int sF1 = sU * Ls; @@ -701,28 +701,14 @@ void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, { bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \ true : false; - Kernels::ContractConservedCurrentSiteFwd(tmp, q_in_2, q_out, Umu, - mu, sF1, sF2, sU, sU, - axial_sign); - sF1++; - sF2--; - } - } - - // Backward, need q1(x, s), q2(x + mu, Ls - 1 - s). 5D lattice so shift - // 4D coordinate mu by one. - tmp = Cshift(q_in_2, mu + 1, 1); - parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) - { - unsigned int sF1 = sU * Ls; - unsigned int sF2 = (sU + 1) * Ls - 1; - for (int s = 0; s < Ls; ++s) - { - bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \ - true : false; - Kernels::ContractConservedCurrentSiteBwd(q_in_1, tmp, q_out, Umu, - mu, sF1, sF2, sU, sU, - axial_sign); + Kernels::ContractConservedCurrentSiteFwd(tmp1._odata[sF1], + q_in_2._odata[sF2], + q_out._odata[sU], + Umu, sU, mu, axial_sign); + Kernels::ContractConservedCurrentSiteBwd(q_in_1._odata[sF1], + tmp2._odata[sF2], + q_out._odata[sU], + Umu, sU, mu, axial_sign); sF1++; sF2--; } diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index c519dc56..6b193766 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -298,28 +298,25 @@ void WilsonKernels::DhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHal ******************************************************************************/ template void WilsonKernels::ContractConservedCurrentSiteFwd( - const PropagatorField &q_in_1, - const PropagatorField &q_in_2, - PropagatorField &q_out, + const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, DoubledGaugeField &U, - unsigned int mu, - unsigned int sF_in_1, - unsigned int sF_in_2, - unsigned int sF_out, unsigned int sU, + unsigned int mu, bool switch_sign) { SitePropagator result, tmp; Gamma g5(Gamma::Algebra::Gamma5); - multLinkProp(tmp, U._odata[sU], q_in_1._odata[sF_in_1], mu); - result = g5 * adj(q_in_2._odata[sF_in_2]) * g5 * WilsonCurrentFwd(tmp, mu); + Impl::multLinkProp(tmp, U._odata[sU], q_in_1, mu); + result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu); if (switch_sign) { - q_out._odata[sF_out] -= result; + q_out -= result; } else { - q_out._odata[sF_out] += result; + q_out += result; } } @@ -331,28 +328,25 @@ void WilsonKernels::ContractConservedCurrentSiteFwd( ******************************************************************************/ template void WilsonKernels::ContractConservedCurrentSiteBwd( - const PropagatorField &q_in_1, - const PropagatorField &q_in_2, - PropagatorField &q_out, + const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, DoubledGaugeField &U, - unsigned int mu, - unsigned int sF_in_1, - unsigned int sF_in_2, - unsigned int sF_out, unsigned int sU, + unsigned int mu, bool switch_sign) { SitePropagator result, tmp; Gamma g5(Gamma::Algebra::Gamma5); - multLinkProp(tmp, U._odata[sU], q_in_1._odata[sF_in_1], mu + Nd); - result = g5 * adj(q_in_2._odata[sF_in_2]) * g5 * WilsonCurrentBwd(tmp, mu); + Impl::multLinkProp(tmp, U._odata[sU], q_in_1, mu + Nd); + result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu); if (switch_sign) { - q_out._odata[sF_out] += result; + q_out += result; } else { - q_out._odata[sF_out] -= result; + q_out -= result; } } @@ -360,31 +354,25 @@ void WilsonKernels::ContractConservedCurrentSiteBwd( #define NO_CURR_SITE(Impl) \ template <> \ void WilsonKernels::ContractConservedCurrentSiteFwd( \ - const PropagatorField &q_in_1, \ - const PropagatorField &q_in_2, \ - PropagatorField &q_out, \ - DoubledGaugeField &U, \ - unsigned int mu, \ - unsigned int sF_in_1, \ - unsigned int sF_in_2, \ - unsigned int sF_out, \ - unsigned int sU, \ - bool switch_sign) \ + const SitePropagator &q_in_1, \ + const SitePropagator &q_in_2, \ + SitePropagator &q_out, \ + DoubledGaugeField &U, \ + unsigned int sU, \ + unsigned int mu, \ + bool switch_sign) \ { \ assert(0); \ } \ template <> \ void WilsonKernels::ContractConservedCurrentSiteBwd( \ - const PropagatorField &q_in_1, \ - const PropagatorField &q_in_2, \ - PropagatorField &q_out, \ - DoubledGaugeField &U, \ - unsigned int mu, \ - unsigned int sF_in_1, \ - unsigned int sF_in_2, \ - unsigned int sF_out, \ - unsigned int sU, \ - bool switch_sign) \ + const SitePropagator &q_in_1, \ + const SitePropagator &q_in_2, \ + SitePropagator &q_out, \ + DoubledGaugeField &U, \ + unsigned int mu, \ + unsigned int sU, \ + bool switch_sign) \ { \ assert(0); \ } diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index 95155ccc..0294c740 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -183,25 +183,19 @@ public: ////////////////////////////////////////////////////////////////////////////// // Utilities for inserting Wilson conserved current. ////////////////////////////////////////////////////////////////////////////// - void ContractConservedCurrentSiteFwd(const PropagatorField &q_in_1, - const PropagatorField &q_in_2, - PropagatorField &q_out, + void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, DoubledGaugeField &U, - unsigned int mu, - unsigned int sF_in_1, - unsigned int sF_in_2, - unsigned int sF_out, unsigned int sU, + unsigned int mu, bool switch_sign = false); - void ContractConservedCurrentSiteBwd(const PropagatorField &q_in_1, - const PropagatorField &q_in_2, - PropagatorField &q_out, + void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, DoubledGaugeField &U, - unsigned int mu, - unsigned int sF_in_1, - unsigned int sF_in_2, - unsigned int sF_out, unsigned int sU, + unsigned int mu, bool switch_sign = false); void SeqConservedCurrentInternal(const PropagatorField &q_in, PropagatorField &q_out, From 1bd311ba9ccd8506d13064cb6f6829515a0f0240 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Fri, 16 Jun 2017 16:43:15 +0100 Subject: [PATCH 045/377] Faster sequential conserved current implementation, now compatible with 5D vectorisation & G-parity. --- lib/qcd/action/fermion/WilsonFermion.cc | 41 ++++++++- lib/qcd/action/fermion/WilsonFermion5D.cc | 68 +++++++++++--- lib/qcd/action/fermion/WilsonKernels.cc | 105 ++++++++++++---------- lib/qcd/action/fermion/WilsonKernels.h | 22 +++-- 4 files changed, 164 insertions(+), 72 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index eff7d958..b986edd7 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -394,6 +394,8 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, conformable(_grid, q_out._grid); Lattice> ph(_grid), coor(_grid); Complex i(0.0,1.0); + PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid); + int tshift = (mu == Tp) ? 1 : 0; // Momentum projection ph = zero; @@ -404,8 +406,43 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, } ph = exp((Real)(2*M_PI)*i*ph); - Kernels::SeqConservedCurrentInternal(q_in, q_out, Umu, curr_type, mu, ph, - tmin, tmax); + q_out = zero; + LatticeInteger coords(_grid); + LatticeCoordinate(coords, Tp); + + // Need q(x + mu) and q(x - mu). + tmp = Cshift(q_in, mu, 1); + tmpFwd = tmp*ph; + tmp = ph*q_in; + tmpBwd = Cshift(tmp, mu, -1); + + parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) + { + // Compute the sequential conserved current insertion only if our simd + // object contains a timeslice we need. + vInteger t_mask = ((coords._odata[sU] >= tmin) && + (coords._odata[sU] <= tmax)); + Integer timeSlices = Reduce(t_mask); + + if (timeSlices > 0) + { + Kernels::SeqConservedCurrentSiteFwd(tmpFwd._odata[sU], + q_out._odata[sU], + Umu, sU, mu, t_mask); + } + + // Repeat for backward direction. + t_mask = ((coords._odata[sU] >= (tmin + tshift)) && + (coords._odata[sU] <= (tmax + tshift))); + timeSlices = Reduce(t_mask); + + if (timeSlices > 0) + { + Kernels::SeqConservedCurrentSiteBwd(tmpBwd._odata[sU], + q_out._odata[sU], + Umu, sU, mu, t_mask); + } + } } FermOpTemplateInstantiate(WilsonFermion); diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 76218098..5daed3de 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -727,31 +727,73 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, { conformable(q_in._grid, FermionGrid()); conformable(q_in._grid, q_out._grid); - Lattice> ph(_FourDimGrid), coor(_FourDimGrid); - PropagatorField q_in_s(_FourDimGrid); - PropagatorField q_out_s(_FourDimGrid); + Lattice> ph(FermionGrid()), coor(FermionGrid()); + PropagatorField tmpFwd(FermionGrid()), tmpBwd(FermionGrid()), + tmp(FermionGrid()); Complex i(0.0, 1.0); + int tshift = (mu == Tp) ? 1 : 0; - // Momentum projection + // Momentum projection. ph = zero; for(unsigned int nu = 0; nu < Nd - 1; nu++) { - LatticeCoordinate(coor, nu); + // Shift coordinate lattice index by 1 to account for 5th dimension. + LatticeCoordinate(coor, nu + 1); ph = ph + mom[nu]*coor*((1./(_FourDimGrid->_fdimensions[nu]))); } ph = exp((Real)(2*M_PI)*i*ph); - // Sequential insertion across 5th dimension - for (int s = 0; s < Ls; s++) + q_out = zero; + LatticeInteger coords(_FourDimGrid); + LatticeCoordinate(coords, Tp); + + // Need q(x + mu, s) and q(x - mu, s). 5D lattice so shift 4D coordinate mu + // by one. + tmp = Cshift(q_in, mu + 1, 1); + tmpFwd = tmp*ph; + tmp = ph*q_in; + tmpBwd = Cshift(tmp, mu + 1, -1); + + parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) { - ExtractSlice(q_in_s, q_in, s, 0); - Kernels::SeqConservedCurrentInternal(q_in_s, q_out_s, Umu, curr_type, - mu, ph, tmin, tmax); - if ((curr_type == Current::Axial) && (s < Ls/2)) + // Compute the sequential conserved current insertion only if our simd + // object contains a timeslice we need. + vInteger t_mask = ((coords._odata[sU] >= tmin) && + (coords._odata[sU] <= tmax)); + Integer timeSlices = Reduce(t_mask); + + if (timeSlices > 0) { - q_out_s = -q_out_s; + unsigned int sF = sU * Ls; + for (unsigned int s = 0; s < Ls; ++s) + { + bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \ + true : false; + Kernels::SeqConservedCurrentSiteFwd(tmpFwd._odata[sF], + q_out._odata[sF], Umu, sU, + mu, t_mask, axial_sign); + ++sF; + } + } + + // Repeat for backward direction. + t_mask = ((coords._odata[sU] >= (tmin + tshift)) && + (coords._odata[sU] <= (tmax + tshift))); + timeSlices = Reduce(t_mask); + + if (timeSlices > 0) + { + unsigned int sF = sU * Ls; + for (unsigned int s = 0; s < Ls; ++s) + { + bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \ + true : false; + Kernels::SeqConservedCurrentSiteBwd(tmpBwd._odata[sF], + q_out._odata[sF], Umu, sU, + mu, t_mask, axial_sign); + ++sF; + } } - InsertSlice(q_out_s, q_out, s, 0); } } diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 6b193766..dc66db23 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -383,63 +383,70 @@ NO_CURR_SITE(GparityWilsonImplFH); NO_CURR_SITE(GparityWilsonImplDF); -template -void WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_in, - PropagatorField &q_out, - DoubledGaugeField &U, - Current curr_type, - unsigned int mu, - Lattice> &ph, - unsigned int tmin, - unsigned int tmax) +/******************************************************************************* + * Name: SeqConservedCurrentSiteFwd + * Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu] + * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. + * - Pass in q_in shifted in +ve mu direction. + ******************************************************************************/ +template +void WilsonKernels::SeqConservedCurrentSiteFwd(const SitePropagator &q_in, + SitePropagator &q_out, + DoubledGaugeField &U, + unsigned int sU, + unsigned int mu, + vInteger t_mask, + bool switch_sign) { - int tshift = (mu == Tp) ? 1 : 0; - Real G_T = (curr_type == Current::Tadpole) ? -1. : 1.; - PropagatorField tmp(q_in._grid); - GaugeLinkField Umu(U._grid); - Umu = PeekIndex(U, mu); - Lattice> t(q_in._grid); - LatticeCoordinate(t, Tp); + SitePropagator result; + Impl::multLinkProp(result, U._odata[sU], q_in, mu); + result = WilsonCurrentFwd(result, mu); - tmp = this->CovShiftForward(Umu, mu, q_in)*ph; - tmp = where((t >= tmin) and (t <= tmax), tmp, 0.*tmp); - q_out = G_T*WilsonCurrentFwd(tmp, mu); + // Zero any unwanted timeslice entries. + result = predicatedWhere(t_mask, result, 0.*result); - tmp = q_in*ph; - tmp = this->CovShiftBackward(Umu, mu, tmp); - tmp = where((t >= tmin + tshift) and (t <= tmax + tshift), tmp, 0.*tmp); - q_out -= WilsonCurrentBwd(tmp, mu); + if (switch_sign) + { + q_out -= result; + } + else + { + q_out += result; + } } +/******************************************************************************* + * Name: SeqConservedCurrentSiteFwd + * Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu] + * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. + * - Pass in q_in shifted in -ve mu direction. + ******************************************************************************/ +template +void WilsonKernels::SeqConservedCurrentSiteBwd(const SitePropagator &q_in, + SitePropagator &q_out, + DoubledGaugeField &U, + unsigned int sU, + unsigned int mu, + vInteger t_mask, + bool switch_sign) +{ + SitePropagator result; + Impl::multLinkProp(result, U._odata[sU], q_in, mu + Nd); + result = WilsonCurrentBwd(result, mu); -// GParity, (Z)DomainWallVec5D -> require special implementation -#define NO_CURR(Impl) \ -template <> void \ -WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_in, \ - PropagatorField &q_out, \ - DoubledGaugeField &U, \ - Current curr_type, \ - unsigned int mu, \ - Lattice> &ph, \ - unsigned int tmin, \ - unsigned int tmax) \ -{ \ - assert(0); \ + // Zero any unwanted timeslice entries. + result = predicatedWhere(t_mask, result, 0.*result); + + if (switch_sign) + { + q_out += result; + } + else + { + q_out -= result; + } } -NO_CURR(GparityWilsonImplF); -NO_CURR(GparityWilsonImplD); -NO_CURR(GparityWilsonImplFH); -NO_CURR(GparityWilsonImplDF); -NO_CURR(DomainWallVec5dImplF); -NO_CURR(DomainWallVec5dImplD); -NO_CURR(DomainWallVec5dImplFH); -NO_CURR(DomainWallVec5dImplDF); -NO_CURR(ZDomainWallVec5dImplF); -NO_CURR(ZDomainWallVec5dImplD); -NO_CURR(ZDomainWallVec5dImplFH); -NO_CURR(ZDomainWallVec5dImplDF); - FermOpTemplateInstantiate(WilsonKernels); AdjointFermOpTemplateInstantiate(WilsonKernels); TwoIndexFermOpTemplateInstantiate(WilsonKernels); diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index 0294c740..ed8d6be9 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -197,14 +197,20 @@ public: unsigned int sU, unsigned int mu, bool switch_sign = false); - void SeqConservedCurrentInternal(const PropagatorField &q_in, - PropagatorField &q_out, - DoubledGaugeField &U, - Current curr_type, - unsigned int mu, - Lattice> &ph, - unsigned int tmin, - unsigned int tmax); + void SeqConservedCurrentSiteFwd(const SitePropagator &q_in, + SitePropagator &q_out, + DoubledGaugeField &U, + unsigned int sU, + unsigned int mu, + vInteger t_mask, + bool switch_sign = false); + void SeqConservedCurrentSiteBwd(const SitePropagator &q_in, + SitePropagator &q_out, + DoubledGaugeField &U, + unsigned int sU, + unsigned int mu, + vInteger t_mask, + bool switch_sign = false); private: // Specialised variants From 284ee194b115b98b0060998f4dcdf48d2049aab6 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 19 Jun 2017 14:38:15 +0100 Subject: [PATCH 046/377] JSON update --- lib/json/json.hpp | 3457 +++++++++++++++++++++++++++------------------ 1 file changed, 2092 insertions(+), 1365 deletions(-) diff --git a/lib/json/json.hpp b/lib/json/json.hpp index d5dc111e..6dfc1831 100644 --- a/lib/json/json.hpp +++ b/lib/json/json.hpp @@ -1,7 +1,7 @@ /* __ _____ _____ _____ __| | __| | | | JSON for Modern C++ -| | |__ | | | | | | version 2.0.10 +| | |__ | | | | | | version 2.1.1 |_____|_____|_____|_|___| https://github.com/nlohmann/json Licensed under the MIT License . @@ -28,22 +28,23 @@ SOFTWARE. #ifndef NLOHMANN_JSON_HPP #define NLOHMANN_JSON_HPP -#include -#include // all_of, for_each, transform + +#include // all_of, copy, fill, find, for_each, none_of, remove, reverse, transform #include // array #include // assert #include // isdigit #include // and, not, or -#include // isfinite, ldexp, signbit +#include // isfinite, labs, ldexp, signbit #include // nullptr_t, ptrdiff_t, size_t #include // int64_t, uint64_t -#include // strtod, strtof, strtold, strtoul +#include // abort, strtod, strtof, strtold, strtoul, strtoll, strtoull #include // strlen +#include // forward_list #include // function, hash, less #include // initializer_list #include // setw #include // istream, ostream -#include // advance, begin, bidirectional_iterator_tag, distance, end, inserter, iterator, iterator_traits, next, random_access_iterator_tag, reverse_iterator +#include // advance, begin, back_inserter, bidirectional_iterator_tag, distance, end, inserter, iterator, iterator_traits, next, random_access_iterator_tag, reverse_iterator #include // numeric_limits #include // locale #include // map @@ -52,19 +53,17 @@ SOFTWARE. #include // stringstream #include // domain_error, invalid_argument, out_of_range #include // getline, stoi, string, to_string -#include // add_pointer, enable_if, is_arithmetic, is_base_of, is_const, is_constructible, is_convertible, is_floating_point, is_integral, is_nothrow_move_assignable, std::is_nothrow_move_constructible, std::is_pointer, std::is_reference, std::is_same, remove_const, remove_pointer, remove_reference +#include // add_pointer, conditional, decay, enable_if, false_type, integral_constant, is_arithmetic, is_base_of, is_const, is_constructible, is_convertible, is_default_constructible, is_enum, is_floating_point, is_integral, is_nothrow_move_assignable, is_nothrow_move_constructible, is_pointer, is_reference, is_same, is_scalar, is_signed, remove_const, remove_cv, remove_pointer, remove_reference, true_type, underlying_type #include // declval, forward, make_pair, move, pair, swap #include // vector // exclude unsupported compilers #if defined(__clang__) - #define CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) - #if CLANG_VERSION < 30400 + #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" #endif #elif defined(__GNUC__) - #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) - #if GCC_VERSION < 40800 + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40900 #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" #endif #endif @@ -90,6 +89,17 @@ SOFTWARE. #define JSON_DEPRECATED #endif +// allow to disable exceptions +#if not defined(JSON_NOEXCEPTION) || defined(__EXCEPTIONS) + #define JSON_THROW(exception) throw exception + #define JSON_TRY try + #define JSON_CATCH(exception) catch(exception) +#else + #define JSON_THROW(exception) std::abort() + #define JSON_TRY if(true) + #define JSON_CATCH(exception) if(false) +#endif + /*! @brief namespace for Niels Lohmann @see https://github.com/nlohmann @@ -98,38 +108,842 @@ SOFTWARE. namespace nlohmann { - /*! @brief unnamed namespace with internal helper functions + +This namespace collects some functions that could not be defined inside the +@ref basic_json class. + +@since version 2.1.0 +*/ +namespace detail +{ +/////////////////////////// +// JSON type enumeration // +/////////////////////////// + +/*! +@brief the JSON type enumeration + +This enumeration collects the different JSON types. It is internally used to +distinguish the stored values, and the functions @ref basic_json::is_null(), +@ref basic_json::is_object(), @ref basic_json::is_array(), +@ref basic_json::is_string(), @ref basic_json::is_boolean(), +@ref basic_json::is_number() (with @ref basic_json::is_number_integer(), +@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()), +@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and +@ref basic_json::is_structured() rely on it. + +@note There are three enumeration entries (number_integer, number_unsigned, and +number_float), because the library distinguishes these three types for numbers: +@ref basic_json::number_unsigned_t is used for unsigned integers, +@ref basic_json::number_integer_t is used for signed integers, and +@ref basic_json::number_float_t is used for floating-point numbers or to +approximate integers which do not fit in the limits of their respective type. + +@sa @ref basic_json::basic_json(const value_t value_type) -- create a JSON +value with the default value for a given type + @since version 1.0.0 */ -namespace +enum class value_t : uint8_t { + null, ///< null value + object, ///< object (unordered set of name/value pairs) + array, ///< array (ordered collection of values) + string, ///< string value + boolean, ///< boolean value + number_integer, ///< number value (signed integer) + number_unsigned, ///< number value (unsigned integer) + number_float, ///< number value (floating-point) + discarded ///< discarded by the the parser callback function +}; + +/*! +@brief comparison operator for JSON types + +Returns an ordering that is similar to Python: +- order: null < boolean < number < object < array < string +- furthermore, each type is not smaller than itself + +@since version 1.0.0 +*/ +inline bool operator<(const value_t lhs, const value_t rhs) noexcept +{ + static constexpr std::array order = {{ + 0, // null + 3, // object + 4, // array + 5, // string + 1, // boolean + 2, // integer + 2, // unsigned + 2, // float + } + }; + + // discarded values are not comparable + if (lhs == value_t::discarded or rhs == value_t::discarded) + { + return false; + } + + return order[static_cast(lhs)] < + order[static_cast(rhs)]; +} + + +///////////// +// helpers // +///////////// + +// alias templates to reduce boilerplate +template +using enable_if_t = typename std::enable_if::type; + +template +using uncvref_t = typename std::remove_cv::type>::type; + +// taken from http://stackoverflow.com/a/26936864/266378 +template +using is_unscoped_enum = + std::integral_constant::value and + std::is_enum::value>; + +/* +Implementation of two C++17 constructs: conjunction, negation. This is needed +to avoid evaluating all the traits in a condition + +For example: not std::is_same::value and has_value_type::value +will not compile when T = void (on MSVC at least). Whereas +conjunction>, has_value_type>::value will +stop evaluating if negation<...>::value == false + +Please note that those constructs must be used with caution, since symbols can +become very long quickly (which can slow down compilation and cause MSVC +internal compiler errors). Only use it when you have to (see example ahead). +*/ +template struct conjunction : std::true_type {}; +template struct conjunction : B1 {}; +template +struct conjunction : std::conditional, B1>::type {}; + +template struct negation : std::integral_constant < bool, !B::value > {}; + +// dispatch utility (taken from ranges-v3) +template struct priority_tag : priority_tag < N - 1 > {}; +template<> struct priority_tag<0> {}; + + +////////////////// +// constructors // +////////////////// + +template struct external_constructor; + +template<> +struct external_constructor +{ + template + static void construct(BasicJsonType& j, typename BasicJsonType::boolean_t b) noexcept + { + j.m_type = value_t::boolean; + j.m_value = b; + j.assert_invariant(); + } +}; + +template<> +struct external_constructor +{ + template + static void construct(BasicJsonType& j, const typename BasicJsonType::string_t& s) + { + j.m_type = value_t::string; + j.m_value = s; + j.assert_invariant(); + } +}; + +template<> +struct external_constructor +{ + template + static void construct(BasicJsonType& j, typename BasicJsonType::number_float_t val) noexcept + { + // replace infinity and NAN by null + if (not std::isfinite(val)) + { + j = BasicJsonType{}; + } + else + { + j.m_type = value_t::number_float; + j.m_value = val; + } + j.assert_invariant(); + } +}; + +template<> +struct external_constructor +{ + template + static void construct(BasicJsonType& j, typename BasicJsonType::number_unsigned_t val) noexcept + { + j.m_type = value_t::number_unsigned; + j.m_value = val; + j.assert_invariant(); + } +}; + +template<> +struct external_constructor +{ + template + static void construct(BasicJsonType& j, typename BasicJsonType::number_integer_t val) noexcept + { + j.m_type = value_t::number_integer; + j.m_value = val; + j.assert_invariant(); + } +}; + +template<> +struct external_constructor +{ + template + static void construct(BasicJsonType& j, const typename BasicJsonType::array_t& arr) + { + j.m_type = value_t::array; + j.m_value = arr; + j.assert_invariant(); + } + + template::value, + int> = 0> + static void construct(BasicJsonType& j, const CompatibleArrayType& arr) + { + using std::begin; + using std::end; + j.m_type = value_t::array; + j.m_value.array = j.template create(begin(arr), end(arr)); + j.assert_invariant(); + } +}; + +template<> +struct external_constructor +{ + template + static void construct(BasicJsonType& j, const typename BasicJsonType::object_t& obj) + { + j.m_type = value_t::object; + j.m_value = obj; + j.assert_invariant(); + } + + template::value, + int> = 0> + static void construct(BasicJsonType& j, const CompatibleObjectType& obj) + { + using std::begin; + using std::end; + + j.m_type = value_t::object; + j.m_value.object = j.template create(begin(obj), end(obj)); + j.assert_invariant(); + } +}; + + +//////////////////////// +// has_/is_ functions // +//////////////////////// + /*! @brief Helper to determine whether there's a key_type for T. -Thus helper is used to tell associative containers apart from other containers +This helper is used to tell associative containers apart from other containers such as sequence containers. For instance, `std::map` passes the test as it contains a `mapped_type`, whereas `std::vector` fails the test. @sa http://stackoverflow.com/a/7728728/266378 @since version 1.0.0, overworked in version 2.0.6 */ -template -struct has_mapped_type -{ - private: - template - static int detect(U&&); +#define NLOHMANN_JSON_HAS_HELPER(type) \ + template struct has_##type { \ + private: \ + template \ + static int detect(U &&); \ + static void detect(...); \ + public: \ + static constexpr bool value = \ + std::is_integral()))>::value; \ + } - static void detect(...); - public: - static constexpr bool value = - std::is_integral()))>::value; +NLOHMANN_JSON_HAS_HELPER(mapped_type); +NLOHMANN_JSON_HAS_HELPER(key_type); +NLOHMANN_JSON_HAS_HELPER(value_type); +NLOHMANN_JSON_HAS_HELPER(iterator); + +#undef NLOHMANN_JSON_HAS_HELPER + + +template +struct is_compatible_object_type_impl : std::false_type {}; + +template +struct is_compatible_object_type_impl +{ + static constexpr auto value = + std::is_constructible::value and + std::is_constructible::value; }; +template +struct is_compatible_object_type +{ + static auto constexpr value = is_compatible_object_type_impl < + conjunction>, + has_mapped_type, + has_key_type>::value, + typename BasicJsonType::object_t, CompatibleObjectType >::value; +}; + +template +struct is_basic_json_nested_type +{ + static auto constexpr value = std::is_same::value or + std::is_same::value or + std::is_same::value or + std::is_same::value or + std::is_same::value; +}; + +template +struct is_compatible_array_type +{ + static auto constexpr value = + conjunction>, + negation>, + negation>, + negation>, + has_value_type, + has_iterator>::value; +}; + +template +struct is_compatible_integer_type_impl : std::false_type {}; + +template +struct is_compatible_integer_type_impl +{ + // is there an assert somewhere on overflows? + using RealLimits = std::numeric_limits; + using CompatibleLimits = std::numeric_limits; + + static constexpr auto value = + std::is_constructible::value and + CompatibleLimits::is_integer and + RealLimits::is_signed == CompatibleLimits::is_signed; +}; + +template +struct is_compatible_integer_type +{ + static constexpr auto value = + is_compatible_integer_type_impl < + std::is_integral::value and + not std::is_same::value, + RealIntegerType, CompatibleNumberIntegerType > ::value; +}; + + +// trait checking if JSONSerializer::from_json(json const&, udt&) exists +template +struct has_from_json +{ + private: + // also check the return type of from_json + template::from_json( + std::declval(), std::declval()))>::value>> + static int detect(U&&); + static void detect(...); + + public: + static constexpr bool value = std::is_integral>()))>::value; +}; + +// This trait checks if JSONSerializer::from_json(json const&) exists +// this overload is used for non-default-constructible user-defined-types +template +struct has_non_default_from_json +{ + private: + template < + typename U, + typename = enable_if_t::from_json(std::declval()))>::value >> + static int detect(U&&); + static void detect(...); + + public: + static constexpr bool value = std::is_integral>()))>::value; +}; + +// This trait checks if BasicJsonType::json_serializer::to_json exists +template +struct has_to_json +{ + private: + template::to_json( + std::declval(), std::declval()))> + static int detect(U&&); + static void detect(...); + + public: + static constexpr bool value = std::is_integral>()))>::value; +}; + + +///////////// +// to_json // +///////////// + +template::value, int> = 0> +void to_json(BasicJsonType& j, T b) noexcept +{ + external_constructor::construct(j, b); } +template::value, int> = 0> +void to_json(BasicJsonType& j, const CompatibleString& s) +{ + external_constructor::construct(j, s); +} + +template::value, int> = 0> +void to_json(BasicJsonType& j, FloatType val) noexcept +{ + external_constructor::construct(j, static_cast(val)); +} + +template < + typename BasicJsonType, typename CompatibleNumberUnsignedType, + enable_if_t::value, int> = 0 > +void to_json(BasicJsonType& j, CompatibleNumberUnsignedType val) noexcept +{ + external_constructor::construct(j, static_cast(val)); +} + +template < + typename BasicJsonType, typename CompatibleNumberIntegerType, + enable_if_t::value, int> = 0 > +void to_json(BasicJsonType& j, CompatibleNumberIntegerType val) noexcept +{ + external_constructor::construct(j, static_cast(val)); +} + +template::value, int> = 0> +void to_json(BasicJsonType& j, UnscopedEnumType e) noexcept +{ + external_constructor::construct(j, e); +} + +template < + typename BasicJsonType, typename CompatibleArrayType, + enable_if_t < + is_compatible_array_type::value or + std::is_same::value, + int > = 0 > +void to_json(BasicJsonType& j, const CompatibleArrayType& arr) +{ + external_constructor::construct(j, arr); +} + +template < + typename BasicJsonType, typename CompatibleObjectType, + enable_if_t::value, + int> = 0 > +void to_json(BasicJsonType& j, const CompatibleObjectType& arr) +{ + external_constructor::construct(j, arr); +} + + +/////////////// +// from_json // +/////////////// + +// overloads for basic_json template parameters +template::value and + not std::is_same::value, + int> = 0> +void get_arithmetic_value(const BasicJsonType& j, ArithmeticType& val) +{ + switch (static_cast(j)) + { + case value_t::number_unsigned: + { + val = static_cast( + *j.template get_ptr()); + break; + } + case value_t::number_integer: + { + val = static_cast( + *j.template get_ptr()); + break; + } + case value_t::number_float: + { + val = static_cast( + *j.template get_ptr()); + break; + } + default: + { + JSON_THROW( + std::domain_error("type must be number, but is " + j.type_name())); + } + } +} + +template +void from_json(const BasicJsonType& j, typename BasicJsonType::boolean_t& b) +{ + if (not j.is_boolean()) + { + JSON_THROW(std::domain_error("type must be boolean, but is " + j.type_name())); + } + b = *j.template get_ptr(); +} + +template +void from_json(const BasicJsonType& j, typename BasicJsonType::string_t& s) +{ + if (not j.is_string()) + { + JSON_THROW(std::domain_error("type must be string, but is " + j.type_name())); + } + s = *j.template get_ptr(); +} + +template +void from_json(const BasicJsonType& j, typename BasicJsonType::number_float_t& val) +{ + get_arithmetic_value(j, val); +} + +template +void from_json(const BasicJsonType& j, typename BasicJsonType::number_unsigned_t& val) +{ + get_arithmetic_value(j, val); +} + +template +void from_json(const BasicJsonType& j, typename BasicJsonType::number_integer_t& val) +{ + get_arithmetic_value(j, val); +} + +template::value, int> = 0> +void from_json(const BasicJsonType& j, UnscopedEnumType& e) +{ + typename std::underlying_type::type val; + get_arithmetic_value(j, val); + e = static_cast(val); +} + +template +void from_json(const BasicJsonType& j, typename BasicJsonType::array_t& arr) +{ + if (not j.is_array()) + { + JSON_THROW(std::domain_error("type must be array, but is " + j.type_name())); + } + arr = *j.template get_ptr(); +} + +// forward_list doesn't have an insert method +template +void from_json(const BasicJsonType& j, std::forward_list& l) +{ + // do not perform the check when user wants to retrieve jsons + // (except when it's null.. ?) + if (j.is_null()) + { + JSON_THROW(std::domain_error("type must be array, but is " + j.type_name())); + } + if (not std::is_same::value) + { + if (not j.is_array()) + { + JSON_THROW(std::domain_error("type must be array, but is " + j.type_name())); + } + } + for (auto it = j.rbegin(), end = j.rend(); it != end; ++it) + { + l.push_front(it->template get()); + } +} + +template +void from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, priority_tag<0>) +{ + using std::begin; + using std::end; + + std::transform(j.begin(), j.end(), + std::inserter(arr, end(arr)), [](const BasicJsonType & i) + { + // get() returns *this, this won't call a from_json + // method when value_type is BasicJsonType + return i.template get(); + }); +} + +template +auto from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, priority_tag<1>) +-> decltype( + arr.reserve(std::declval()), + void()) +{ + using std::begin; + using std::end; + + arr.reserve(j.size()); + std::transform( + j.begin(), j.end(), std::inserter(arr, end(arr)), [](const BasicJsonType & i) + { + // get() returns *this, this won't call a from_json + // method when value_type is BasicJsonType + return i.template get(); + }); +} + +template::value and + not std::is_same::value, int> = 0> +void from_json(const BasicJsonType& j, CompatibleArrayType& arr) +{ + if (j.is_null()) + { + JSON_THROW(std::domain_error("type must be array, but is " + j.type_name())); + } + + // when T == BasicJsonType, do not check if value_t is correct + if (not std::is_same::value) + { + if (not j.is_array()) + { + JSON_THROW(std::domain_error("type must be array, but is " + j.type_name())); + } + } + from_json_array_impl(j, arr, priority_tag<1> {}); +} + +template::value, int> = 0> +void from_json(const BasicJsonType& j, CompatibleObjectType& obj) +{ + if (not j.is_object()) + { + JSON_THROW(std::domain_error("type must be object, but is " + j.type_name())); + } + + auto inner_object = j.template get_ptr(); + using std::begin; + using std::end; + // we could avoid the assignment, but this might require a for loop, which + // might be less efficient than the container constructor for some + // containers (would it?) + obj = CompatibleObjectType(begin(*inner_object), end(*inner_object)); +} + +// overload for arithmetic types, not chosen for basic_json template arguments +// (BooleanType, etc..); note: Is it really necessary to provide explicit +// overloads for boolean_t etc. in case of a custom BooleanType which is not +// an arithmetic type? +template::value and + not std::is_same::value and + not std::is_same::value and + not std::is_same::value and + not std::is_same::value, + int> = 0> +void from_json(const BasicJsonType& j, ArithmeticType& val) +{ + switch (static_cast(j)) + { + case value_t::number_unsigned: + { + val = static_cast(*j.template get_ptr()); + break; + } + case value_t::number_integer: + { + val = static_cast(*j.template get_ptr()); + break; + } + case value_t::number_float: + { + val = static_cast(*j.template get_ptr()); + break; + } + case value_t::boolean: + { + val = static_cast(*j.template get_ptr()); + break; + } + default: + { + JSON_THROW(std::domain_error("type must be number, but is " + j.type_name())); + } + } +} + +struct to_json_fn +{ + private: + template + auto call(BasicJsonType& j, T&& val, priority_tag<1>) const noexcept(noexcept(to_json(j, std::forward(val)))) + -> decltype(to_json(j, std::forward(val)), void()) + { + return to_json(j, std::forward(val)); + } + + template + void call(BasicJsonType&, T&&, priority_tag<0>) const noexcept + { + static_assert(sizeof(BasicJsonType) == 0, + "could not find to_json() method in T's namespace"); + } + + public: + template + void operator()(BasicJsonType& j, T&& val) const + noexcept(noexcept(std::declval().call(j, std::forward(val), priority_tag<1> {}))) + { + return call(j, std::forward(val), priority_tag<1> {}); + } +}; + +struct from_json_fn +{ + private: + template + auto call(const BasicJsonType& j, T& val, priority_tag<1>) const + noexcept(noexcept(from_json(j, val))) + -> decltype(from_json(j, val), void()) + { + return from_json(j, val); + } + + template + void call(const BasicJsonType&, T&, priority_tag<0>) const noexcept + { + static_assert(sizeof(BasicJsonType) == 0, + "could not find from_json() method in T's namespace"); + } + + public: + template + void operator()(const BasicJsonType& j, T& val) const + noexcept(noexcept(std::declval().call(j, val, priority_tag<1> {}))) + { + return call(j, val, priority_tag<1> {}); + } +}; + +// taken from ranges-v3 +template +struct static_const +{ + static constexpr T value{}; +}; + +template +constexpr T static_const::value; +} // namespace detail + + +/// namespace to hold default `to_json` / `from_json` functions +namespace +{ +constexpr const auto& to_json = detail::static_const::value; +constexpr const auto& from_json = detail::static_const::value; +} + + +/*! +@brief default JSONSerializer template argument + +This serializer ignores the template arguments and uses ADL +([argument-dependent lookup](http://en.cppreference.com/w/cpp/language/adl)) +for serialization. +*/ +template +struct adl_serializer +{ + /*! + @brief convert a JSON value to any value type + + This function is usually called by the `get()` function of the + @ref basic_json class (either explicit or via conversion operators). + + @param[in] j JSON value to read from + @param[in,out] val value to write to + */ + template + static void from_json(BasicJsonType&& j, ValueType& val) noexcept( + noexcept(::nlohmann::from_json(std::forward(j), val))) + { + ::nlohmann::from_json(std::forward(j), val); + } + + /*! + @brief convert any value type to a JSON value + + This function is usually called by the constructors of the @ref basic_json + class. + + @param[in,out] j JSON value to write to + @param[in] val value to read from + */ + template + static void to_json(BasicJsonType& j, ValueType&& val) noexcept( + noexcept(::nlohmann::to_json(j, std::forward(val)))) + { + ::nlohmann::to_json(j, std::forward(val)); + } +}; + + /*! @brief a class to store JSON values @@ -149,11 +963,14 @@ default; will be used in @ref number_integer_t) default; will be used in @ref number_float_t) @tparam AllocatorType type of the allocator to use (`std::allocator` by default) +@tparam JSONSerializer the serializer to resolve internal calls to `to_json()` +and `from_json()` (@ref adl_serializer by default) @requirement The class satisfies the following concept requirements: - Basic - [DefaultConstructible](http://en.cppreference.com/w/cpp/concept/DefaultConstructible): - JSON values can be default constructed. The result will be a JSON null value. + JSON values can be default constructed. The result will be a JSON null + value. - [MoveConstructible](http://en.cppreference.com/w/cpp/concept/MoveConstructible): A JSON value can be constructed from an rvalue argument. - [CopyConstructible](http://en.cppreference.com/w/cpp/concept/CopyConstructible): @@ -168,8 +985,8 @@ default) - [StandardLayoutType](http://en.cppreference.com/w/cpp/concept/StandardLayoutType): JSON values have [standard layout](http://en.cppreference.com/w/cpp/language/data_members#Standard_layout): - All non-static data members are private and standard layout types, the class - has no virtual functions or (virtual) base classes. + All non-static data members are private and standard layout types, the + class has no virtual functions or (virtual) base classes. - Library-wide - [EqualityComparable](http://en.cppreference.com/w/cpp/concept/EqualityComparable): JSON values can be compared with `==`, see @ref @@ -216,21 +1033,26 @@ template < class NumberIntegerType = std::int64_t, class NumberUnsignedType = std::uint64_t, class NumberFloatType = double, - template class AllocatorType = std::allocator + template class AllocatorType = std::allocator, + template class JSONSerializer = adl_serializer > class basic_json { private: + template friend struct detail::external_constructor; /// workaround type for MSVC using basic_json_t = basic_json; + AllocatorType, JSONSerializer>; public: + using value_t = detail::value_t; // forward declarations template class iter_impl; template class json_reverse_iterator; class json_pointer; + template + using json_serializer = JSONSerializer; ///////////////////// // container types // @@ -282,6 +1104,84 @@ class basic_json return allocator_type(); } + /*! + @brief returns version information on the library + + This function returns a JSON object with information about the library, + including the version number and information on the platform and compiler. + + @return JSON object holding version information + key | description + ----------- | --------------- + `compiler` | Information on the used compiler. It is an object with the following keys: `c++` (the used C++ standard), `family` (the compiler family; possible values are `clang`, `icc`, `gcc`, `ilecpp`, `msvc`, `pgcpp`, `sunpro`, and `unknown`), and `version` (the compiler version). + `copyright` | The copyright line for the library as string. + `name` | The name of the library as string. + `platform` | The used platform as string. Possible values are `win32`, `linux`, `apple`, `unix`, and `unknown`. + `url` | The URL of the project as string. + `version` | The version of the library. It is an object with the following keys: `major`, `minor`, and `patch` as defined by [Semantic Versioning](http://semver.org), and `string` (the version string). + + @liveexample{The following code shows an example output of the `meta()` + function.,meta} + + @complexity Constant. + + @since 2.1.0 + */ + static basic_json meta() + { + basic_json result; + + result["copyright"] = "(C) 2013-2017 Niels Lohmann"; + result["name"] = "JSON for Modern C++"; + result["url"] = "https://github.com/nlohmann/json"; + result["version"] = + { + {"string", "2.1.1"}, + {"major", 2}, + {"minor", 1}, + {"patch", 1} + }; + +#ifdef _WIN32 + result["platform"] = "win32"; +#elif defined __linux__ + result["platform"] = "linux"; +#elif defined __APPLE__ + result["platform"] = "apple"; +#elif defined __unix__ + result["platform"] = "unix"; +#else + result["platform"] = "unknown"; +#endif + +#if defined(__clang__) + result["compiler"] = {{"family", "clang"}, {"version", __clang_version__}}; +#elif defined(__ICC) || defined(__INTEL_COMPILER) + result["compiler"] = {{"family", "icc"}, {"version", __INTEL_COMPILER}}; +#elif defined(__GNUC__) || defined(__GNUG__) + result["compiler"] = {{"family", "gcc"}, {"version", std::to_string(__GNUC__) + "." + std::to_string(__GNUC_MINOR__) + "." + std::to_string(__GNUC_PATCHLEVEL__)}}; +#elif defined(__HP_cc) || defined(__HP_aCC) + result["compiler"] = "hp" +#elif defined(__IBMCPP__) + result["compiler"] = {{"family", "ilecpp"}, {"version", __IBMCPP__}}; +#elif defined(_MSC_VER) + result["compiler"] = {{"family", "msvc"}, {"version", _MSC_VER}}; +#elif defined(__PGI) + result["compiler"] = {{"family", "pgcpp"}, {"version", __PGI}}; +#elif defined(__SUNPRO_CC) + result["compiler"] = {{"family", "sunpro"}, {"version", __SUNPRO_CC}}; +#else + result["compiler"] = {{"family", "unknown"}, {"version", "unknown"}}; +#endif + +#ifdef __cplusplus + result["compiler"]["c++"] = std::to_string(__cplusplus); +#else + result["compiler"]["c++"] = "unknown"; +#endif + return result; + } + /////////////////////////// // JSON value data types // @@ -449,6 +1349,12 @@ class basic_json std::string @endcode + #### Encoding + + Strings are stored in UTF-8 encoding. Therefore, functions like + `std::string::size()` or `std::string::length()` return the number of + bytes in the string rather than the number of characters or glyphs. + #### String comparison [RFC 7159](http://rfc7159.net/rfc7159) states: @@ -713,47 +1619,6 @@ class basic_json /// @} - - /////////////////////////// - // JSON type enumeration // - /////////////////////////// - - /*! - @brief the JSON type enumeration - - This enumeration collects the different JSON types. It is internally used - to distinguish the stored values, and the functions @ref is_null(), @ref - is_object(), @ref is_array(), @ref is_string(), @ref is_boolean(), @ref - is_number() (with @ref is_number_integer(), @ref is_number_unsigned(), and - @ref is_number_float()), @ref is_discarded(), @ref is_primitive(), and - @ref is_structured() rely on it. - - @note There are three enumeration entries (number_integer, - number_unsigned, and number_float), because the library distinguishes - these three types for numbers: @ref number_unsigned_t is used for unsigned - integers, @ref number_integer_t is used for signed integers, and @ref - number_float_t is used for floating-point numbers or to approximate - integers which do not fit in the limits of their respective type. - - @sa @ref basic_json(const value_t value_type) -- create a JSON value with - the default value for a given type - - @since version 1.0.0 - */ - enum class value_t : uint8_t - { - null, ///< null value - object, ///< object (unordered set of name/value pairs) - array, ///< array (ordered collection of values) - string, ///< string value - boolean, ///< boolean value - number_integer, ///< number value (signed integer) - number_unsigned, ///< number value (unsigned integer) - number_float, ///< number value (floating-point) - discarded ///< discarded by the the parser callback function - }; - - private: /// helper for exception-safe object creation @@ -767,7 +1632,7 @@ class basic_json }; std::unique_ptr object(alloc.allocate(1), deleter); alloc.construct(object.get(), std::forward(args)...); - assert(object.get() != nullptr); + assert(object != nullptr); return object.release(); } @@ -882,7 +1747,7 @@ class basic_json { if (t == value_t::null) { - throw std::domain_error("961c151d2e87f2686a955a9be24d316f1362bf21 2.0.10"); // LCOV_EXCL_LINE + JSON_THROW(std::domain_error("961c151d2e87f2686a955a9be24d316f1362bf21 2.1.1")); // LCOV_EXCL_LINE } break; } @@ -1046,18 +1911,6 @@ class basic_json @liveexample{The following code shows the constructor for different @ref value_t values,basic_json__value_t} - @sa @ref basic_json(std::nullptr_t) -- create a `null` value - @sa @ref basic_json(boolean_t value) -- create a boolean value - @sa @ref basic_json(const string_t&) -- create a string value - @sa @ref basic_json(const object_t&) -- create a object value - @sa @ref basic_json(const array_t&) -- create a array value - @sa @ref basic_json(const number_float_t) -- create a number - (floating-point) value - @sa @ref basic_json(const number_integer_t) -- create a number (integer) - value - @sa @ref basic_json(const number_unsigned_t) -- create a number (unsigned) - value - @since version 1.0.0 */ basic_json(const value_t value_type) @@ -1091,474 +1944,69 @@ class basic_json } /*! - @brief create an object (explicit) + @brief create a JSON value - Create an object JSON value with a given content. + This is a "catch all" constructor for all compatible JSON types; that is, + types for which a `to_json()` method exsits. The constructor forwards the + parameter @a val to that method (to `json_serializer::to_json` method + with `U = uncvref_t`, to be exact). - @param[in] val a value for the object + Template type @a CompatibleType includes, but is not limited to, the + following types: + - **arrays**: @ref array_t and all kinds of compatible containers such as + `std::vector`, `std::deque`, `std::list`, `std::forward_list`, + `std::array`, `std::set`, `std::unordered_set`, `std::multiset`, and + `unordered_multiset` with a `value_type` from which a @ref basic_json + value can be constructed. + - **objects**: @ref object_t and all kinds of compatible associative + containers such as `std::map`, `std::unordered_map`, `std::multimap`, + and `std::unordered_multimap` with a `key_type` compatible to + @ref string_t and a `value_type` from which a @ref basic_json value can + be constructed. + - **strings**: @ref string_t, string literals, and all compatible string + containers can be used. + - **numbers**: @ref number_integer_t, @ref number_unsigned_t, + @ref number_float_t, and all convertible number types such as `int`, + `size_t`, `int64_t`, `float` or `double` can be used. + - **boolean**: @ref boolean_t / `bool` can be used. - @complexity Linear in the size of the passed @a val. + See the examples below. - @throw std::bad_alloc if allocation for object value fails + @tparam CompatibleType a type such that: + - @a CompatibleType is not derived from `std::istream`, + - @a CompatibleType is not @ref basic_json (to avoid hijacking copy/move + constructors), + - @a CompatibleType is not a @ref basic_json nested type (e.g., + @ref json_pointer, @ref iterator, etc ...) + - @ref @ref json_serializer has a + `to_json(basic_json_t&, CompatibleType&&)` method - @liveexample{The following code shows the constructor with an @ref - object_t parameter.,basic_json__object_t} + @tparam U = `uncvref_t` - @sa @ref basic_json(const CompatibleObjectType&) -- create an object value - from a compatible STL container + @param[in] val the value to be forwarded - @since version 1.0.0 - */ - basic_json(const object_t& val) - : m_type(value_t::object), m_value(val) - { - assert_invariant(); - } + @complexity Usually linear in the size of the passed @a val, also + depending on the implementation of the called `to_json()` + method. - /*! - @brief create an object (implicit) - - Create an object JSON value with a given content. This constructor allows - any type @a CompatibleObjectType that can be used to construct values of - type @ref object_t. - - @tparam CompatibleObjectType An object type whose `key_type` and - `value_type` is compatible to @ref object_t. Examples include `std::map`, - `std::unordered_map`, `std::multimap`, and `std::unordered_multimap` with - a `key_type` of `std::string`, and a `value_type` from which a @ref - basic_json value can be constructed. - - @param[in] val a value for the object - - @complexity Linear in the size of the passed @a val. - - @throw std::bad_alloc if allocation for object value fails + @throw what `json_serializer::to_json()` throws @liveexample{The following code shows the constructor with several - compatible object type parameters.,basic_json__CompatibleObjectType} + compatible types.,basic_json__CompatibleType} - @sa @ref basic_json(const object_t&) -- create an object value - - @since version 1.0.0 + @since version 2.1.0 */ - template::value and - std::is_constructible::value, int>::type = 0> - basic_json(const CompatibleObjectType& val) - : m_type(value_t::object) - { - using std::begin; - using std::end; - m_value.object = create(begin(val), end(val)); - assert_invariant(); - } - - /*! - @brief create an array (explicit) - - Create an array JSON value with a given content. - - @param[in] val a value for the array - - @complexity Linear in the size of the passed @a val. - - @throw std::bad_alloc if allocation for array value fails - - @liveexample{The following code shows the constructor with an @ref array_t - parameter.,basic_json__array_t} - - @sa @ref basic_json(const CompatibleArrayType&) -- create an array value - from a compatible STL containers - - @since version 1.0.0 - */ - basic_json(const array_t& val) - : m_type(value_t::array), m_value(val) - { - assert_invariant(); - } - - /*! - @brief create an array (implicit) - - Create an array JSON value with a given content. This constructor allows - any type @a CompatibleArrayType that can be used to construct values of - type @ref array_t. - - @tparam CompatibleArrayType An object type whose `value_type` is - compatible to @ref array_t. Examples include `std::vector`, `std::deque`, - `std::list`, `std::forward_list`, `std::array`, `std::set`, - `std::unordered_set`, `std::multiset`, and `unordered_multiset` with a - `value_type` from which a @ref basic_json value can be constructed. - - @param[in] val a value for the array - - @complexity Linear in the size of the passed @a val. - - @throw std::bad_alloc if allocation for array value fails - - @liveexample{The following code shows the constructor with several - compatible array type parameters.,basic_json__CompatibleArrayType} - - @sa @ref basic_json(const array_t&) -- create an array value - - @since version 1.0.0 - */ - template::value and - not std::is_same::value and - not std::is_same::value and - not std::is_same::value and - not std::is_same::value and - not std::is_same::value and - std::is_constructible::value, int>::type = 0> - basic_json(const CompatibleArrayType& val) - : m_type(value_t::array) - { - using std::begin; - using std::end; - m_value.array = create(begin(val), end(val)); - assert_invariant(); - } - - /*! - @brief create a string (explicit) - - Create an string JSON value with a given content. - - @param[in] val a value for the string - - @complexity Linear in the size of the passed @a val. - - @throw std::bad_alloc if allocation for string value fails - - @liveexample{The following code shows the constructor with an @ref - string_t parameter.,basic_json__string_t} - - @sa @ref basic_json(const typename string_t::value_type*) -- create a - string value from a character pointer - @sa @ref basic_json(const CompatibleStringType&) -- create a string value - from a compatible string container - - @since version 1.0.0 - */ - basic_json(const string_t& val) - : m_type(value_t::string), m_value(val) - { - assert_invariant(); - } - - /*! - @brief create a string (explicit) - - Create a string JSON value with a given content. - - @param[in] val a literal value for the string - - @complexity Linear in the size of the passed @a val. - - @throw std::bad_alloc if allocation for string value fails - - @liveexample{The following code shows the constructor with string literal - parameter.,basic_json__string_t_value_type} - - @sa @ref basic_json(const string_t&) -- create a string value - @sa @ref basic_json(const CompatibleStringType&) -- create a string value - from a compatible string container - - @since version 1.0.0 - */ - basic_json(const typename string_t::value_type* val) - : basic_json(string_t(val)) - { - assert_invariant(); - } - - /*! - @brief create a string (implicit) - - Create a string JSON value with a given content. - - @param[in] val a value for the string - - @tparam CompatibleStringType an string type which is compatible to @ref - string_t, for instance `std::string`. - - @complexity Linear in the size of the passed @a val. - - @throw std::bad_alloc if allocation for string value fails - - @liveexample{The following code shows the construction of a string value - from a compatible type.,basic_json__CompatibleStringType} - - @sa @ref basic_json(const string_t&) -- create a string value - @sa @ref basic_json(const typename string_t::value_type*) -- create a - string value from a character pointer - - @since version 1.0.0 - */ - template::value, int>::type = 0> - basic_json(const CompatibleStringType& val) - : basic_json(string_t(val)) - { - assert_invariant(); - } - - /*! - @brief create a boolean (explicit) - - Creates a JSON boolean type from a given value. - - @param[in] val a boolean value to store - - @complexity Constant. - - @liveexample{The example below demonstrates boolean - values.,basic_json__boolean_t} - - @since version 1.0.0 - */ - basic_json(boolean_t val) noexcept - : m_type(value_t::boolean), m_value(val) - { - assert_invariant(); - } - - /*! - @brief create an integer number (explicit) - - Create an integer number JSON value with a given content. - - @tparam T A helper type to remove this function via SFINAE in case @ref - number_integer_t is the same as `int`. In this case, this constructor - would have the same signature as @ref basic_json(const int value). Note - the helper type @a T is not visible in this constructor's interface. - - @param[in] val an integer to create a JSON number from - - @complexity Constant. - - @liveexample{The example below shows the construction of an integer - number value.,basic_json__number_integer_t} - - @sa @ref basic_json(const int) -- create a number value (integer) - @sa @ref basic_json(const CompatibleNumberIntegerType) -- create a number - value (integer) from a compatible number type - - @since version 1.0.0 - */ - template::value) and - std::is_same::value, int>::type = 0> - basic_json(const number_integer_t val) noexcept - : m_type(value_t::number_integer), m_value(val) - { - assert_invariant(); - } - - - /*! - @brief create an integer number from an enum type (explicit) - - Create an integer number JSON value with a given content. - - @param[in] val an integer to create a JSON number from - - @note This constructor allows to pass enums directly to a constructor. As - C++ has no way of specifying the type of an anonymous enum explicitly, we - can only rely on the fact that such values implicitly convert to int. As - int may already be the same type of number_integer_t, we may need to - switch off the constructor @ref basic_json(const number_integer_t). - - @complexity Constant. - - @liveexample{The example below shows the construction of an integer - number value from an anonymous enum.,basic_json__const_int} - - @sa @ref basic_json(const number_integer_t) -- create a number value - (integer) - @sa @ref basic_json(const CompatibleNumberIntegerType) -- create a number - value (integer) from a compatible number type - - @since version 1.0.0 - */ - basic_json(const int val) noexcept - : m_type(value_t::number_integer), - m_value(static_cast(val)) - { - assert_invariant(); - } - - /*! - @brief create an integer number (implicit) - - Create an integer number JSON value with a given content. This constructor - allows any type @a CompatibleNumberIntegerType that can be used to - construct values of type @ref number_integer_t. - - @tparam CompatibleNumberIntegerType An integer type which is compatible to - @ref number_integer_t. Examples include the types `int`, `int32_t`, - `long`, and `short`. - - @param[in] val an integer to create a JSON number from - - @complexity Constant. - - @liveexample{The example below shows the construction of several integer - number values from compatible - types.,basic_json__CompatibleIntegerNumberType} - - @sa @ref basic_json(const number_integer_t) -- create a number value - (integer) - @sa @ref basic_json(const int) -- create a number value (integer) - - @since version 1.0.0 - */ - template::value and - std::numeric_limits::is_integer and - std::numeric_limits::is_signed, - CompatibleNumberIntegerType>::type = 0> - basic_json(const CompatibleNumberIntegerType val) noexcept - : m_type(value_t::number_integer), - m_value(static_cast(val)) - { - assert_invariant(); - } - - /*! - @brief create an unsigned integer number (explicit) - - Create an unsigned integer number JSON value with a given content. - - @tparam T helper type to compare number_unsigned_t and unsigned int (not - visible in) the interface. - - @param[in] val an integer to create a JSON number from - - @complexity Constant. - - @sa @ref basic_json(const CompatibleNumberUnsignedType) -- create a number - value (unsigned integer) from a compatible number type - - @since version 2.0.0 - */ - template::value) and - std::is_same::value, int>::type = 0> - basic_json(const number_unsigned_t val) noexcept - : m_type(value_t::number_unsigned), m_value(val) - { - assert_invariant(); - } - - /*! - @brief create an unsigned number (implicit) - - Create an unsigned number JSON value with a given content. This - constructor allows any type @a CompatibleNumberUnsignedType that can be - used to construct values of type @ref number_unsigned_t. - - @tparam CompatibleNumberUnsignedType An integer type which is compatible - to @ref number_unsigned_t. Examples may include the types `unsigned int`, - `uint32_t`, or `unsigned short`. - - @param[in] val an unsigned integer to create a JSON number from - - @complexity Constant. - - @sa @ref basic_json(const number_unsigned_t) -- create a number value - (unsigned) - - @since version 2.0.0 - */ - template::value and - std::numeric_limits::is_integer and - not std::numeric_limits::is_signed, - CompatibleNumberUnsignedType>::type = 0> - basic_json(const CompatibleNumberUnsignedType val) noexcept - : m_type(value_t::number_unsigned), - m_value(static_cast(val)) - { - assert_invariant(); - } - - /*! - @brief create a floating-point number (explicit) - - Create a floating-point number JSON value with a given content. - - @param[in] val a floating-point value to create a JSON number from - - @note [RFC 7159](http://www.rfc-editor.org/rfc/rfc7159.txt), section 6 - disallows NaN values: - > Numeric values that cannot be represented in the grammar below (such as - > Infinity and NaN) are not permitted. - In case the parameter @a val is not a number, a JSON null value is created - instead. - - @complexity Constant. - - @liveexample{The following example creates several floating-point - values.,basic_json__number_float_t} - - @sa @ref basic_json(const CompatibleNumberFloatType) -- create a number - value (floating-point) from a compatible number type - - @since version 1.0.0 - */ - basic_json(const number_float_t val) noexcept - : m_type(value_t::number_float), m_value(val) - { - // replace infinity and NAN by null - if (not std::isfinite(val)) - { - m_type = value_t::null; - m_value = json_value(); - } - - assert_invariant(); - } - - /*! - @brief create an floating-point number (implicit) - - Create an floating-point number JSON value with a given content. This - constructor allows any type @a CompatibleNumberFloatType that can be used - to construct values of type @ref number_float_t. - - @tparam CompatibleNumberFloatType A floating-point type which is - compatible to @ref number_float_t. Examples may include the types `float` - or `double`. - - @param[in] val a floating-point to create a JSON number from - - @note [RFC 7159](http://www.rfc-editor.org/rfc/rfc7159.txt), section 6 - disallows NaN values: - > Numeric values that cannot be represented in the grammar below (such as - > Infinity and NaN) are not permitted. - In case the parameter @a val is not a number, a JSON null value is - created instead. - - @complexity Constant. - - @liveexample{The example below shows the construction of several - floating-point number values from compatible - types.,basic_json__CompatibleNumberFloatType} - - @sa @ref basic_json(const number_float_t) -- create a number value - (floating-point) - - @since version 1.0.0 - */ - template::value and - std::is_floating_point::value>::type> - basic_json(const CompatibleNumberFloatType val) noexcept - : basic_json(number_float_t(val)) + template, + detail::enable_if_t::value and + not std::is_same::value and + not detail::is_basic_json_nested_type< + basic_json_t, U>::value and + detail::has_to_json::value, + int> = 0> + basic_json(CompatibleType && val) noexcept(noexcept(JSONSerializer::to_json( + std::declval(), std::forward(val)))) { + JSONSerializer::to_json(*this, std::forward(val)); assert_invariant(); } @@ -1655,7 +2103,7 @@ class basic_json // if object is wanted but impossible, throw an exception if (manual_type == value_t::object and not is_an_object) { - throw std::domain_error("cannot create object from initializer list"); + JSON_THROW(std::domain_error("cannot create object from initializer list")); } } @@ -1833,7 +2281,7 @@ class basic_json // make sure iterator fits the current value if (first.m_object != last.m_object) { - throw std::domain_error("iterators are not compatible"); + JSON_THROW(std::domain_error("iterators are not compatible")); } // copy type from first iterator @@ -1850,7 +2298,7 @@ class basic_json { if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end()) { - throw std::out_of_range("iterators out of range"); + JSON_THROW(std::out_of_range("iterators out of range")); } break; } @@ -1895,19 +2343,21 @@ class basic_json case value_t::object: { - m_value.object = create(first.m_it.object_iterator, last.m_it.object_iterator); + m_value.object = create(first.m_it.object_iterator, + last.m_it.object_iterator); break; } case value_t::array: { - m_value.array = create(first.m_it.array_iterator, last.m_it.array_iterator); + m_value.array = create(first.m_it.array_iterator, + last.m_it.array_iterator); break; } default: { - throw std::domain_error("cannot use construct with iterators from " + first.m_object->type_name()); + JSON_THROW(std::domain_error("cannot use construct with iterators from " + first.m_object->type_name())); } } @@ -2197,14 +2647,6 @@ class basic_json string_t dump(const int indent = -1) const { std::stringstream ss; - // fix locale problems - ss.imbue(std::locale::classic()); - - // 6, 15 or 16 digits of precision allows round-trip IEEE 754 - // string->float->string, string->double->string or string->long - // double->string; to be safe, we read this value from - // std::numeric_limits::digits10 - ss.precision(std::numeric_limits::digits10); if (indent >= 0) { @@ -2580,244 +3022,97 @@ class basic_json // value access // ////////////////// - /// get an object (explicit) - template::value and - std::is_convertible::value, int>::type = 0> - T get_impl(T*) const - { - if (is_object()) - { - return T(m_value.object->begin(), m_value.object->end()); - } - else - { - throw std::domain_error("type must be object, but is " + type_name()); - } - } - - /// get an object (explicit) - object_t get_impl(object_t*) const - { - if (is_object()) - { - return *(m_value.object); - } - else - { - throw std::domain_error("type must be object, but is " + type_name()); - } - } - - /// get an array (explicit) - template::value and - not std::is_same::value and - not std::is_arithmetic::value and - not std::is_convertible::value and - not has_mapped_type::value, int>::type = 0> - T get_impl(T*) const - { - if (is_array()) - { - T to_vector; - std::transform(m_value.array->begin(), m_value.array->end(), - std::inserter(to_vector, to_vector.end()), [](basic_json i) - { - return i.get(); - }); - return to_vector; - } - else - { - throw std::domain_error("type must be array, but is " + type_name()); - } - } - - /// get an array (explicit) - template::value and - not std::is_same::value, int>::type = 0> - std::vector get_impl(std::vector*) const - { - if (is_array()) - { - std::vector to_vector; - to_vector.reserve(m_value.array->size()); - std::transform(m_value.array->begin(), m_value.array->end(), - std::inserter(to_vector, to_vector.end()), [](basic_json i) - { - return i.get(); - }); - return to_vector; - } - else - { - throw std::domain_error("type must be array, but is " + type_name()); - } - } - - /// get an array (explicit) - template::value and - not has_mapped_type::value, int>::type = 0> - T get_impl(T*) const - { - if (is_array()) - { - return T(m_value.array->begin(), m_value.array->end()); - } - else - { - throw std::domain_error("type must be array, but is " + type_name()); - } - } - - /// get an array (explicit) - array_t get_impl(array_t*) const - { - if (is_array()) - { - return *(m_value.array); - } - else - { - throw std::domain_error("type must be array, but is " + type_name()); - } - } - - /// get a string (explicit) - template::value, int>::type = 0> - T get_impl(T*) const - { - if (is_string()) - { - return *m_value.string; - } - else - { - throw std::domain_error("type must be string, but is " + type_name()); - } - } - - /// get a number (explicit) - template::value, int>::type = 0> - T get_impl(T*) const - { - switch (m_type) - { - case value_t::number_integer: - { - return static_cast(m_value.number_integer); - } - - case value_t::number_unsigned: - { - return static_cast(m_value.number_unsigned); - } - - case value_t::number_float: - { - return static_cast(m_value.number_float); - } - - default: - { - throw std::domain_error("type must be number, but is " + type_name()); - } - } - } - /// get a boolean (explicit) - constexpr boolean_t get_impl(boolean_t*) const + boolean_t get_impl(boolean_t* /*unused*/) const { - return is_boolean() - ? m_value.boolean - : throw std::domain_error("type must be boolean, but is " + type_name()); + if (is_boolean()) + { + return m_value.boolean; + } + + JSON_THROW(std::domain_error("type must be boolean, but is " + type_name())); } /// get a pointer to the value (object) - object_t* get_impl_ptr(object_t*) noexcept + object_t* get_impl_ptr(object_t* /*unused*/) noexcept { return is_object() ? m_value.object : nullptr; } /// get a pointer to the value (object) - constexpr const object_t* get_impl_ptr(const object_t*) const noexcept + constexpr const object_t* get_impl_ptr(const object_t* /*unused*/) const noexcept { return is_object() ? m_value.object : nullptr; } /// get a pointer to the value (array) - array_t* get_impl_ptr(array_t*) noexcept + array_t* get_impl_ptr(array_t* /*unused*/) noexcept { return is_array() ? m_value.array : nullptr; } /// get a pointer to the value (array) - constexpr const array_t* get_impl_ptr(const array_t*) const noexcept + constexpr const array_t* get_impl_ptr(const array_t* /*unused*/) const noexcept { return is_array() ? m_value.array : nullptr; } /// get a pointer to the value (string) - string_t* get_impl_ptr(string_t*) noexcept + string_t* get_impl_ptr(string_t* /*unused*/) noexcept { return is_string() ? m_value.string : nullptr; } /// get a pointer to the value (string) - constexpr const string_t* get_impl_ptr(const string_t*) const noexcept + constexpr const string_t* get_impl_ptr(const string_t* /*unused*/) const noexcept { return is_string() ? m_value.string : nullptr; } /// get a pointer to the value (boolean) - boolean_t* get_impl_ptr(boolean_t*) noexcept + boolean_t* get_impl_ptr(boolean_t* /*unused*/) noexcept { return is_boolean() ? &m_value.boolean : nullptr; } /// get a pointer to the value (boolean) - constexpr const boolean_t* get_impl_ptr(const boolean_t*) const noexcept + constexpr const boolean_t* get_impl_ptr(const boolean_t* /*unused*/) const noexcept { return is_boolean() ? &m_value.boolean : nullptr; } /// get a pointer to the value (integer number) - number_integer_t* get_impl_ptr(number_integer_t*) noexcept + number_integer_t* get_impl_ptr(number_integer_t* /*unused*/) noexcept { return is_number_integer() ? &m_value.number_integer : nullptr; } /// get a pointer to the value (integer number) - constexpr const number_integer_t* get_impl_ptr(const number_integer_t*) const noexcept + constexpr const number_integer_t* get_impl_ptr(const number_integer_t* /*unused*/) const noexcept { return is_number_integer() ? &m_value.number_integer : nullptr; } /// get a pointer to the value (unsigned number) - number_unsigned_t* get_impl_ptr(number_unsigned_t*) noexcept + number_unsigned_t* get_impl_ptr(number_unsigned_t* /*unused*/) noexcept { return is_number_unsigned() ? &m_value.number_unsigned : nullptr; } /// get a pointer to the value (unsigned number) - constexpr const number_unsigned_t* get_impl_ptr(const number_unsigned_t*) const noexcept + constexpr const number_unsigned_t* get_impl_ptr(const number_unsigned_t* /*unused*/) const noexcept { return is_number_unsigned() ? &m_value.number_unsigned : nullptr; } /// get a pointer to the value (floating-point number) - number_float_t* get_impl_ptr(number_float_t*) noexcept + number_float_t* get_impl_ptr(number_float_t* /*unused*/) noexcept { return is_number_float() ? &m_value.number_float : nullptr; } /// get a pointer to the value (floating-point number) - constexpr const number_float_t* get_impl_ptr(const number_float_t*) const noexcept + constexpr const number_float_t* get_impl_ptr(const number_float_t* /*unused*/) const noexcept { return is_number_float() ? &m_value.number_float : nullptr; } @@ -2846,34 +3141,69 @@ class basic_json { return *ptr; } - else - { - throw std::domain_error("incompatible ReferenceType for get_ref, actual type is " + - obj.type_name()); - } + + JSON_THROW(std::domain_error("incompatible ReferenceType for get_ref, actual type is " + + obj.type_name())); } public: - /// @name value access /// Direct access to the stored value of a JSON value. /// @{ + /*! + @brief get special-case overload + + This overloads avoids a lot of template boilerplate, it can be seen as the + identity method + + @tparam BasicJsonType == @ref basic_json + + @return a copy of *this + + @complexity Constant. + + @since version 2.1.0 + */ + template < + typename BasicJsonType, + detail::enable_if_t::type, + basic_json_t>::value, + int> = 0 > + basic_json get() const + { + return *this; + } + /*! @brief get a value (explicit) - Explicit type conversion between the JSON value and a compatible value. + Explicit type conversion between the JSON value and a compatible value + which is [CopyConstructible](http://en.cppreference.com/w/cpp/concept/CopyConstructible) + and [DefaultConstructible](http://en.cppreference.com/w/cpp/concept/DefaultConstructible). + The value is converted by calling the @ref json_serializer + `from_json()` method. - @tparam ValueType non-pointer type compatible to the JSON value, for - instance `int` for JSON integer numbers, `bool` for JSON booleans, or - `std::vector` types for JSON arrays + The function is equivalent to executing + @code {.cpp} + ValueType ret; + JSONSerializer::from_json(*this, ret); + return ret; + @endcode - @return copy of the JSON value, converted to type @a ValueType + This overloads is chosen if: + - @a ValueType is not @ref basic_json, + - @ref json_serializer has a `from_json()` method of the form + `void from_json(const @ref basic_json&, ValueType&)`, and + - @ref json_serializer does not have a `from_json()` method of + the form `ValueType from_json(const @ref basic_json&)` - @throw std::domain_error in case passed type @a ValueType is incompatible - to JSON; example: `"type must be object, but is null"` + @tparam ValueTypeCV the provided value type + @tparam ValueType the returned value type - @complexity Linear in the size of the JSON value. + @return copy of the JSON value, converted to @a ValueType + + @throw what @ref json_serializer `from_json()` method throws @liveexample{The example below shows several conversions from JSON values to other types. There a few things to note: (1) Floating-point numbers can @@ -2882,21 +3212,75 @@ class basic_json associative containers such as `std::unordered_map`.,get__ValueType_const} - @internal - The idea of using a casted null pointer to choose the correct - implementation is from . - @endinternal - - @sa @ref operator ValueType() const for implicit conversion - @sa @ref get() for pointer-member access - - @since version 1.0.0 + @since version 2.1.0 */ - template::value, int>::type = 0> - ValueType get() const + template < + typename ValueTypeCV, + typename ValueType = detail::uncvref_t, + detail::enable_if_t < + not std::is_same::value and + detail::has_from_json::value and + not detail::has_non_default_from_json::value, + int > = 0 > + ValueType get() const noexcept(noexcept( + JSONSerializer::from_json(std::declval(), std::declval()))) { - return get_impl(static_cast(nullptr)); + // we cannot static_assert on ValueTypeCV being non-const, because + // there is support for get(), which is why we + // still need the uncvref + static_assert(not std::is_reference::value, + "get() cannot be used with reference types, you might want to use get_ref()"); + static_assert(std::is_default_constructible::value, + "types must be DefaultConstructible when used with get()"); + + ValueType ret; + JSONSerializer::from_json(*this, ret); + return ret; + } + + /*! + @brief get a value (explicit); special case + + Explicit type conversion between the JSON value and a compatible value + which is **not** [CopyConstructible](http://en.cppreference.com/w/cpp/concept/CopyConstructible) + and **not** [DefaultConstructible](http://en.cppreference.com/w/cpp/concept/DefaultConstructible). + The value is converted by calling the @ref json_serializer + `from_json()` method. + + The function is equivalent to executing + @code {.cpp} + return JSONSerializer::from_json(*this); + @endcode + + This overloads is chosen if: + - @a ValueType is not @ref basic_json and + - @ref json_serializer has a `from_json()` method of the form + `ValueType from_json(const @ref basic_json&)` + + @note If @ref json_serializer has both overloads of + `from_json()`, this one is chosen. + + @tparam ValueTypeCV the provided value type + @tparam ValueType the returned value type + + @return copy of the JSON value, converted to @a ValueType + + @throw what @ref json_serializer `from_json()` method throws + + @since version 2.1.0 + */ + template < + typename ValueTypeCV, + typename ValueType = detail::uncvref_t, + detail::enable_if_t::value and + detail::has_non_default_from_json::value, int> = 0 > + ValueType get() const noexcept(noexcept( + JSONSerializer::from_json(std::declval()))) + { + static_assert(not std::is_reference::value, + "get() cannot be used with reference types, you might want to use get_ref()"); + return JSONSerializer::from_json(*this); } /*! @@ -3026,7 +3410,7 @@ class basic_json /*! @brief get a reference value (implicit) - Implict reference access to the internally stored JSON value. No copies + Implicit reference access to the internally stored JSON value. No copies are made. @warning Writing data to the referee of the result yields an undefined @@ -3101,7 +3485,7 @@ class basic_json template < typename ValueType, typename std::enable_if < not std::is_pointer::value and not std::is_same::value -#ifndef _MSC_VER // Fix for issue #167 operator<< abiguity under VS2015 +#ifndef _MSC_VER // fix for issue #167 operator<< ambiguity under VS2015 and not std::is_same>::value #endif , int >::type = 0 > @@ -3149,19 +3533,19 @@ class basic_json // at only works for arrays if (is_array()) { - try + JSON_TRY { return m_value.array->at(idx); } - catch (std::out_of_range&) + JSON_CATCH (std::out_of_range&) { // create better exception explanation - throw std::out_of_range("array index " + std::to_string(idx) + " is out of range"); + JSON_THROW(std::out_of_range("array index " + std::to_string(idx) + " is out of range")); } } else { - throw std::domain_error("cannot use at() with " + type_name()); + JSON_THROW(std::domain_error("cannot use at() with " + type_name())); } } @@ -3192,19 +3576,19 @@ class basic_json // at only works for arrays if (is_array()) { - try + JSON_TRY { return m_value.array->at(idx); } - catch (std::out_of_range&) + JSON_CATCH (std::out_of_range&) { // create better exception explanation - throw std::out_of_range("array index " + std::to_string(idx) + " is out of range"); + JSON_THROW(std::out_of_range("array index " + std::to_string(idx) + " is out of range")); } } else { - throw std::domain_error("cannot use at() with " + type_name()); + JSON_THROW(std::domain_error("cannot use at() with " + type_name())); } } @@ -3239,19 +3623,19 @@ class basic_json // at only works for objects if (is_object()) { - try + JSON_TRY { return m_value.object->at(key); } - catch (std::out_of_range&) + JSON_CATCH (std::out_of_range&) { // create better exception explanation - throw std::out_of_range("key '" + key + "' not found"); + JSON_THROW(std::out_of_range("key '" + key + "' not found")); } } else { - throw std::domain_error("cannot use at() with " + type_name()); + JSON_THROW(std::domain_error("cannot use at() with " + type_name())); } } @@ -3286,19 +3670,19 @@ class basic_json // at only works for objects if (is_object()) { - try + JSON_TRY { return m_value.object->at(key); } - catch (std::out_of_range&) + JSON_CATCH (std::out_of_range&) { // create better exception explanation - throw std::out_of_range("key '" + key + "' not found"); + JSON_THROW(std::out_of_range("key '" + key + "' not found")); } } else { - throw std::domain_error("cannot use at() with " + type_name()); + JSON_THROW(std::domain_error("cannot use at() with " + type_name())); } } @@ -3350,10 +3734,8 @@ class basic_json return m_value.array->operator[](idx); } - else - { - throw std::domain_error("cannot use operator[] with " + type_name()); - } + + JSON_THROW(std::domain_error("cannot use operator[] with " + type_name())); } /*! @@ -3382,10 +3764,8 @@ class basic_json { return m_value.array->operator[](idx); } - else - { - throw std::domain_error("cannot use operator[] with " + type_name()); - } + + JSON_THROW(std::domain_error("cannot use operator[] with " + type_name())); } /*! @@ -3430,10 +3810,8 @@ class basic_json { return m_value.object->operator[](key); } - else - { - throw std::domain_error("cannot use operator[] with " + type_name()); - } + + JSON_THROW(std::domain_error("cannot use operator[] with " + type_name())); } /*! @@ -3474,10 +3852,8 @@ class basic_json assert(m_value.object->find(key) != m_value.object->end()); return m_value.object->find(key)->second; } - else - { - throw std::domain_error("cannot use operator[] with " + type_name()); - } + + JSON_THROW(std::domain_error("cannot use operator[] with " + type_name())); } /*! @@ -3591,10 +3967,8 @@ class basic_json { return m_value.object->operator[](key); } - else - { - throw std::domain_error("cannot use operator[] with " + type_name()); - } + + JSON_THROW(std::domain_error("cannot use operator[] with " + type_name())); } /*! @@ -3636,10 +4010,8 @@ class basic_json assert(m_value.object->find(key) != m_value.object->end()); return m_value.object->find(key)->second; } - else - { - throw std::domain_error("cannot use operator[] with " + type_name()); - } + + JSON_THROW(std::domain_error("cannot use operator[] with " + type_name())); } /*! @@ -3703,14 +4075,12 @@ class basic_json { return *it; } - else - { - return default_value; - } + + return default_value; } else { - throw std::domain_error("cannot use value() with " + type_name()); + JSON_THROW(std::domain_error("cannot use value() with " + type_name())); } } @@ -3772,19 +4142,17 @@ class basic_json if (is_object()) { // if pointer resolves a value, return it or use default value - try + JSON_TRY { return ptr.get_checked(this); } - catch (std::out_of_range&) + JSON_CATCH (std::out_of_range&) { return default_value; } } - else - { - throw std::domain_error("cannot use value() with " + type_name()); - } + + JSON_THROW(std::domain_error("cannot use value() with " + type_name())); } /*! @@ -3910,7 +4278,7 @@ class basic_json @complexity The complexity depends on the type: - objects: amortized constant - - arrays: linear in distance between pos and the end of the container + - arrays: linear in distance between @a pos and the end of the container - strings: linear in the length of the string - other types: constant @@ -3935,7 +4303,7 @@ class basic_json // make sure iterator fits the current value if (this != pos.m_object) { - throw std::domain_error("iterator does not fit current value"); + JSON_THROW(std::domain_error("iterator does not fit current value")); } IteratorType result = end(); @@ -3950,7 +4318,7 @@ class basic_json { if (not pos.m_it.primitive_iterator.is_begin()) { - throw std::out_of_range("iterator out of range"); + JSON_THROW(std::out_of_range("iterator out of range")); } if (is_string()) @@ -3980,7 +4348,7 @@ class basic_json default: { - throw std::domain_error("cannot use erase() with " + type_name()); + JSON_THROW(std::domain_error("cannot use erase() with " + type_name())); } } @@ -4042,7 +4410,7 @@ class basic_json // make sure iterator fits the current value if (this != first.m_object or this != last.m_object) { - throw std::domain_error("iterators do not fit current value"); + JSON_THROW(std::domain_error("iterators do not fit current value")); } IteratorType result = end(); @@ -4057,7 +4425,7 @@ class basic_json { if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end()) { - throw std::out_of_range("iterators out of range"); + JSON_THROW(std::out_of_range("iterators out of range")); } if (is_string()) @@ -4089,7 +4457,7 @@ class basic_json default: { - throw std::domain_error("cannot use erase() with " + type_name()); + JSON_THROW(std::domain_error("cannot use erase() with " + type_name())); } } @@ -4132,10 +4500,8 @@ class basic_json { return m_value.object->erase(key); } - else - { - throw std::domain_error("cannot use erase() with " + type_name()); - } + + JSON_THROW(std::domain_error("cannot use erase() with " + type_name())); } /*! @@ -4169,14 +4535,14 @@ class basic_json { if (idx >= size()) { - throw std::out_of_range("array index " + std::to_string(idx) + " is out of range"); + JSON_THROW(std::out_of_range("array index " + std::to_string(idx) + " is out of range")); } m_value.array->erase(m_value.array->begin() + static_cast(idx)); } else { - throw std::domain_error("cannot use erase() with " + type_name()); + JSON_THROW(std::domain_error("cannot use erase() with " + type_name())); } } @@ -4894,7 +5260,7 @@ class basic_json // push_back only works for null objects or arrays if (not(is_null() or is_array())) { - throw std::domain_error("cannot use push_back() with " + type_name()); + JSON_THROW(std::domain_error("cannot use push_back() with " + type_name())); } // transform null object into an array @@ -4930,7 +5296,7 @@ class basic_json // push_back only works for null objects or arrays if (not(is_null() or is_array())) { - throw std::domain_error("cannot use push_back() with " + type_name()); + JSON_THROW(std::domain_error("cannot use push_back() with " + type_name())); } // transform null object into an array @@ -4980,7 +5346,7 @@ class basic_json // push_back only works for null objects or objects if (not(is_null() or is_object())) { - throw std::domain_error("cannot use push_back() with " + type_name()); + JSON_THROW(std::domain_error("cannot use push_back() with " + type_name())); } // transform null object into an object @@ -5080,7 +5446,7 @@ class basic_json // emplace_back only works for null objects or arrays if (not(is_null() or is_array())) { - throw std::domain_error("cannot use emplace_back() with " + type_name()); + JSON_THROW(std::domain_error("cannot use emplace_back() with " + type_name())); } // transform null object into an array @@ -5098,8 +5464,8 @@ class basic_json /*! @brief add an object to an object if key does not exist - Inserts a new element into a JSON object constructed in-place with the given - @a args if there is no element with the key in the container. If the + Inserts a new element into a JSON object constructed in-place with the + given @a args if there is no element with the key in the container. If the function is called on a JSON null value, an empty object is created before appending the value created from @a args. @@ -5128,7 +5494,7 @@ class basic_json // emplace only works for null objects or arrays if (not(is_null() or is_object())) { - throw std::domain_error("cannot use emplace() with " + type_name()); + JSON_THROW(std::domain_error("cannot use emplace() with " + type_name())); } // transform null object into an object @@ -5164,8 +5530,8 @@ class basic_json @throw std::domain_error if @a pos is not an iterator of *this; example: `"iterator does not fit current value"` - @complexity Constant plus linear in the distance between pos and end of the - container. + @complexity Constant plus linear in the distance between @a pos and end of + the container. @liveexample{The example shows how `insert()` is used.,insert} @@ -5179,7 +5545,7 @@ class basic_json // check if iterator pos fits to this JSON value if (pos.m_object != this) { - throw std::domain_error("iterator does not fit current value"); + JSON_THROW(std::domain_error("iterator does not fit current value")); } // insert to array and return iterator @@ -5187,10 +5553,8 @@ class basic_json result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, val); return result; } - else - { - throw std::domain_error("cannot use insert() with " + type_name()); - } + + JSON_THROW(std::domain_error("cannot use insert() with " + type_name())); } /*! @@ -5234,7 +5598,7 @@ class basic_json // check if iterator pos fits to this JSON value if (pos.m_object != this) { - throw std::domain_error("iterator does not fit current value"); + JSON_THROW(std::domain_error("iterator does not fit current value")); } // insert to array and return iterator @@ -5242,10 +5606,8 @@ class basic_json result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, cnt, val); return result; } - else - { - throw std::domain_error("cannot use insert() with " + type_name()); - } + + JSON_THROW(std::domain_error("cannot use insert() with " + type_name())); } /*! @@ -5283,24 +5645,24 @@ class basic_json // insert only works for arrays if (not is_array()) { - throw std::domain_error("cannot use insert() with " + type_name()); + JSON_THROW(std::domain_error("cannot use insert() with " + type_name())); } // check if iterator pos fits to this JSON value if (pos.m_object != this) { - throw std::domain_error("iterator does not fit current value"); + JSON_THROW(std::domain_error("iterator does not fit current value")); } // check if range iterators belong to the same JSON object if (first.m_object != last.m_object) { - throw std::domain_error("iterators do not fit"); + JSON_THROW(std::domain_error("iterators do not fit")); } if (first.m_object == this or last.m_object == this) { - throw std::domain_error("passed iterators may not belong to container"); + JSON_THROW(std::domain_error("passed iterators may not belong to container")); } // insert to array and return iterator @@ -5341,13 +5703,13 @@ class basic_json // insert only works for arrays if (not is_array()) { - throw std::domain_error("cannot use insert() with " + type_name()); + JSON_THROW(std::domain_error("cannot use insert() with " + type_name())); } // check if iterator pos fits to this JSON value if (pos.m_object != this) { - throw std::domain_error("iterator does not fit current value"); + JSON_THROW(std::domain_error("iterator does not fit current value")); } // insert to array and return iterator @@ -5395,8 +5757,8 @@ class basic_json @param[in,out] other array to exchange the contents with - @throw std::domain_error when JSON value is not an array; example: `"cannot - use swap() with string"` + @throw std::domain_error when JSON value is not an array; example: + `"cannot use swap() with string"` @complexity Constant. @@ -5414,7 +5776,7 @@ class basic_json } else { - throw std::domain_error("cannot use swap() with " + type_name()); + JSON_THROW(std::domain_error("cannot use swap() with " + type_name())); } } @@ -5447,7 +5809,7 @@ class basic_json } else { - throw std::domain_error("cannot use swap() with " + type_name()); + JSON_THROW(std::domain_error("cannot use swap() with " + type_name())); } } @@ -5480,13 +5842,13 @@ class basic_json } else { - throw std::domain_error("cannot use swap() with " + type_name()); + JSON_THROW(std::domain_error("cannot use swap() with " + type_name())); } } /// @} - + public: ////////////////////////////////////////// // lexicographical comparison operators // ////////////////////////////////////////// @@ -5494,40 +5856,6 @@ class basic_json /// @name lexicographical comparison operators /// @{ - private: - /*! - @brief comparison operator for JSON types - - Returns an ordering that is similar to Python: - - order: null < boolean < number < object < array < string - - furthermore, each type is not smaller than itself - - @since version 1.0.0 - */ - friend bool operator<(const value_t lhs, const value_t rhs) noexcept - { - static constexpr std::array order = {{ - 0, // null - 3, // object - 4, // array - 5, // string - 1, // boolean - 2, // integer - 2, // unsigned - 2, // float - } - }; - - // discarded values are not comparable - if (lhs == value_t::discarded or rhs == value_t::discarded) - { - return false; - } - - return order[static_cast(lhs)] < order[static_cast(rhs)]; - } - - public: /*! @brief comparison: equal @@ -5628,34 +5956,24 @@ class basic_json /*! @brief comparison: equal - - The functions compares the given JSON value against a null pointer. As the - null pointer can be used to initialize a JSON value to null, a comparison - of JSON value @a v with a null pointer should be equivalent to call - `v.is_null()`. - - @param[in] v JSON value to consider - @return whether @a v is null - - @complexity Constant. - - @liveexample{The example compares several JSON types to the null pointer. - ,operator__equal__nullptr_t} - - @since version 1.0.0 + @copydoc operator==(const_reference, const_reference) */ - friend bool operator==(const_reference v, std::nullptr_t) noexcept + template::value, int>::type = 0> + friend bool operator==(const_reference lhs, const ScalarType rhs) noexcept { - return v.is_null(); + return (lhs == basic_json(rhs)); } /*! @brief comparison: equal - @copydoc operator==(const_reference, std::nullptr_t) + @copydoc operator==(const_reference, const_reference) */ - friend bool operator==(std::nullptr_t, const_reference v) noexcept + template::value, int>::type = 0> + friend bool operator==(const ScalarType lhs, const_reference rhs) noexcept { - return v.is_null(); + return (basic_json(lhs) == rhs); } /*! @@ -5681,34 +5999,24 @@ class basic_json /*! @brief comparison: not equal - - The functions compares the given JSON value against a null pointer. As the - null pointer can be used to initialize a JSON value to null, a comparison - of JSON value @a v with a null pointer should be equivalent to call - `not v.is_null()`. - - @param[in] v JSON value to consider - @return whether @a v is not null - - @complexity Constant. - - @liveexample{The example compares several JSON types to the null pointer. - ,operator__notequal__nullptr_t} - - @since version 1.0.0 + @copydoc operator!=(const_reference, const_reference) */ - friend bool operator!=(const_reference v, std::nullptr_t) noexcept + template::value, int>::type = 0> + friend bool operator!=(const_reference lhs, const ScalarType rhs) noexcept { - return not v.is_null(); + return (lhs != basic_json(rhs)); } /*! @brief comparison: not equal - @copydoc operator!=(const_reference, std::nullptr_t) + @copydoc operator!=(const_reference, const_reference) */ - friend bool operator!=(std::nullptr_t, const_reference v) noexcept + template::value, int>::type = 0> + friend bool operator!=(const ScalarType lhs, const_reference rhs) noexcept { - return not v.is_null(); + return (basic_json(lhs) != rhs); } /*! @@ -5899,10 +6207,6 @@ class basic_json `std::setw(4)` on @a o sets the indentation level to `4` and the serialization result is the same as calling `dump(4)`. - @note During serializaion, the locale and the precision of the output - stream @a o are changed. The original values are restored when the - function returns. - @param[in,out] o stream to serialize to @param[in] j JSON value to serialize @@ -5924,22 +6228,9 @@ class basic_json // reset width to 0 for subsequent calls to this stream o.width(0); - // fix locale problems - const auto old_locale = o.imbue(std::locale::classic()); - // set precision - - // 6, 15 or 16 digits of precision allows round-trip IEEE 754 - // string->float->string, string->double->string or string->long - // double->string; to be safe, we read this value from - // std::numeric_limits::digits10 - const auto old_precision = o.precision(std::numeric_limits::digits10); - // do the actual serialization j.dump(o, pretty_print, static_cast(indentation)); - // reset locale and precision - o.imbue(old_locale); - o.precision(old_precision); return o; } @@ -6123,7 +6414,7 @@ class basic_json { // assertion to check that the iterator range is indeed contiguous, // see http://stackoverflow.com/a/35008842/266378 for more discussion - assert(std::accumulate(first, last, std::make_pair(true, 0), + assert(std::accumulate(first, last, std::pair(true, 0), [&first](std::pair res, decltype(*first) val) { res.first &= (val == *(std::next(std::addressof(*first), res.second++))); @@ -6246,6 +6537,11 @@ class basic_json /// @{ private: + /*! + @note Some code in the switch cases has been copied, because otherwise + copilers would complain about implicit fallthrough and there is no + portable attribute to mute such warnings. + */ template static void add_to_vector(std::vector& vec, size_t bytes, const T number) { @@ -6255,24 +6551,31 @@ class basic_json { case 8: { - vec.push_back(static_cast((number >> 070) & 0xff)); - vec.push_back(static_cast((number >> 060) & 0xff)); - vec.push_back(static_cast((number >> 050) & 0xff)); - vec.push_back(static_cast((number >> 040) & 0xff)); - // intentional fall-through + vec.push_back(static_cast((static_cast(number) >> 070) & 0xff)); + vec.push_back(static_cast((static_cast(number) >> 060) & 0xff)); + vec.push_back(static_cast((static_cast(number) >> 050) & 0xff)); + vec.push_back(static_cast((static_cast(number) >> 040) & 0xff)); + vec.push_back(static_cast((number >> 030) & 0xff)); + vec.push_back(static_cast((number >> 020) & 0xff)); + vec.push_back(static_cast((number >> 010) & 0xff)); + vec.push_back(static_cast(number & 0xff)); + break; } case 4: { vec.push_back(static_cast((number >> 030) & 0xff)); vec.push_back(static_cast((number >> 020) & 0xff)); - // intentional fall-through + vec.push_back(static_cast((number >> 010) & 0xff)); + vec.push_back(static_cast(number & 0xff)); + break; } case 2: { vec.push_back(static_cast((number >> 010) & 0xff)); - // intentional fall-through + vec.push_back(static_cast(number & 0xff)); + break; } case 1: @@ -6324,11 +6627,11 @@ class basic_json { if (current_index + sizeof(T) + 1 > vec.size()) { - throw std::out_of_range("cannot read " + std::to_string(sizeof(T)) + " bytes from vector"); + JSON_THROW(std::out_of_range("cannot read " + std::to_string(sizeof(T)) + " bytes from vector")); } T result; - uint8_t* ptr = reinterpret_cast(&result); + auto* ptr = reinterpret_cast(&result); for (size_t i = 0; i < sizeof(T); ++i) { *ptr++ = vec[current_index + sizeof(T) - i]; @@ -6369,32 +6672,33 @@ class basic_json if (j.m_value.number_integer >= 0) { // MessagePack does not differentiate between positive - // signed integers and unsigned integers. Therefore, we used - // the code from the value_t::number_unsigned case here. + // signed integers and unsigned integers. Therefore, we + // used the code from the value_t::number_unsigned case + // here. if (j.m_value.number_unsigned < 128) { // positive fixnum add_to_vector(v, 1, j.m_value.number_unsigned); } - else if (j.m_value.number_unsigned <= UINT8_MAX) + else if (j.m_value.number_unsigned <= std::numeric_limits::max()) { // uint 8 v.push_back(0xcc); add_to_vector(v, 1, j.m_value.number_unsigned); } - else if (j.m_value.number_unsigned <= UINT16_MAX) + else if (j.m_value.number_unsigned <= std::numeric_limits::max()) { // uint 16 v.push_back(0xcd); add_to_vector(v, 2, j.m_value.number_unsigned); } - else if (j.m_value.number_unsigned <= UINT32_MAX) + else if (j.m_value.number_unsigned <= std::numeric_limits::max()) { // uint 32 v.push_back(0xce); add_to_vector(v, 4, j.m_value.number_unsigned); } - else if (j.m_value.number_unsigned <= UINT64_MAX) + else if (j.m_value.number_unsigned <= std::numeric_limits::max()) { // uint 64 v.push_back(0xcf); @@ -6408,25 +6712,25 @@ class basic_json // negative fixnum add_to_vector(v, 1, j.m_value.number_integer); } - else if (j.m_value.number_integer >= INT8_MIN and j.m_value.number_integer <= INT8_MAX) + else if (j.m_value.number_integer >= std::numeric_limits::min() and j.m_value.number_integer <= std::numeric_limits::max()) { // int 8 v.push_back(0xd0); add_to_vector(v, 1, j.m_value.number_integer); } - else if (j.m_value.number_integer >= INT16_MIN and j.m_value.number_integer <= INT16_MAX) + else if (j.m_value.number_integer >= std::numeric_limits::min() and j.m_value.number_integer <= std::numeric_limits::max()) { // int 16 v.push_back(0xd1); add_to_vector(v, 2, j.m_value.number_integer); } - else if (j.m_value.number_integer >= INT32_MIN and j.m_value.number_integer <= INT32_MAX) + else if (j.m_value.number_integer >= std::numeric_limits::min() and j.m_value.number_integer <= std::numeric_limits::max()) { // int 32 v.push_back(0xd2); add_to_vector(v, 4, j.m_value.number_integer); } - else if (j.m_value.number_integer >= INT64_MIN and j.m_value.number_integer <= INT64_MAX) + else if (j.m_value.number_integer >= std::numeric_limits::min() and j.m_value.number_integer <= std::numeric_limits::max()) { // int 64 v.push_back(0xd3); @@ -6443,25 +6747,25 @@ class basic_json // positive fixnum add_to_vector(v, 1, j.m_value.number_unsigned); } - else if (j.m_value.number_unsigned <= UINT8_MAX) + else if (j.m_value.number_unsigned <= std::numeric_limits::max()) { // uint 8 v.push_back(0xcc); add_to_vector(v, 1, j.m_value.number_unsigned); } - else if (j.m_value.number_unsigned <= UINT16_MAX) + else if (j.m_value.number_unsigned <= std::numeric_limits::max()) { // uint 16 v.push_back(0xcd); add_to_vector(v, 2, j.m_value.number_unsigned); } - else if (j.m_value.number_unsigned <= UINT32_MAX) + else if (j.m_value.number_unsigned <= std::numeric_limits::max()) { // uint 32 v.push_back(0xce); add_to_vector(v, 4, j.m_value.number_unsigned); } - else if (j.m_value.number_unsigned <= UINT64_MAX) + else if (j.m_value.number_unsigned <= std::numeric_limits::max()) { // uint 64 v.push_back(0xcf); @@ -6474,7 +6778,7 @@ class basic_json { // float 64 v.push_back(0xcb); - const uint8_t* helper = reinterpret_cast(&(j.m_value.number_float)); + const auto* helper = reinterpret_cast(&(j.m_value.number_float)); for (size_t i = 0; i < 8; ++i) { v.push_back(helper[7 - i]); @@ -6618,19 +6922,19 @@ class basic_json { add_to_vector(v, 1, j.m_value.number_integer); } - else if (j.m_value.number_integer <= UINT8_MAX) + else if (j.m_value.number_integer <= std::numeric_limits::max()) { v.push_back(0x18); // one-byte uint8_t add_to_vector(v, 1, j.m_value.number_integer); } - else if (j.m_value.number_integer <= UINT16_MAX) + else if (j.m_value.number_integer <= std::numeric_limits::max()) { v.push_back(0x19); // two-byte uint16_t add_to_vector(v, 2, j.m_value.number_integer); } - else if (j.m_value.number_integer <= UINT32_MAX) + else if (j.m_value.number_integer <= std::numeric_limits::max()) { v.push_back(0x1a); // four-byte uint32_t @@ -6645,26 +6949,26 @@ class basic_json } else { - // The conversions below encode the sign in the first byte, - // and the value is converted to a positive number. + // The conversions below encode the sign in the first + // byte, and the value is converted to a positive number. const auto positive_number = -1 - j.m_value.number_integer; if (j.m_value.number_integer >= -24) { v.push_back(static_cast(0x20 + positive_number)); } - else if (positive_number <= UINT8_MAX) + else if (positive_number <= std::numeric_limits::max()) { // int 8 v.push_back(0x38); add_to_vector(v, 1, positive_number); } - else if (positive_number <= UINT16_MAX) + else if (positive_number <= std::numeric_limits::max()) { // int 16 v.push_back(0x39); add_to_vector(v, 2, positive_number); } - else if (positive_number <= UINT32_MAX) + else if (positive_number <= std::numeric_limits::max()) { // int 32 v.push_back(0x3a); @@ -6717,7 +7021,7 @@ class basic_json { // Double-Precision Float v.push_back(0xfb); - const uint8_t* helper = reinterpret_cast(&(j.m_value.number_float)); + const auto* helper = reinterpret_cast(&(j.m_value.number_float)); for (size_t i = 0; i < 8; ++i) { v.push_back(helper[7 - i]); @@ -6730,7 +7034,7 @@ class basic_json const auto N = j.m_value.string->size(); if (N <= 0x17) { - v.push_back(0x60 + N); // 1 byte for string + size + v.push_back(0x60 + static_cast(N)); // 1 byte for string + size } else if (N <= 0xff) { @@ -6766,7 +7070,7 @@ class basic_json const auto N = j.m_value.array->size(); if (N <= 0x17) { - v.push_back(0x80 + N); // 1 byte for array + size + v.push_back(0x80 + static_cast(N)); // 1 byte for array + size } else if (N <= 0xff) { @@ -6804,7 +7108,7 @@ class basic_json const auto N = j.m_value.object->size(); if (N <= 0x17) { - v.push_back(0xa0 + N); // 1 byte for object + size + v.push_back(0xa0 + static_cast(N)); // 1 byte for object + size } else if (N <= 0xff) { @@ -6851,12 +7155,12 @@ class basic_json To secure the access to the byte vector during CBOR/MessagePack deserialization, bytes are copied from the vector into buffers. This - function checks if the number of bytes to copy (@a len) does not exceed the - size @s size of the vector. Additionally, an @a offset is given from where - to start reading the bytes. + function checks if the number of bytes to copy (@a len) does not exceed + the size @s size of the vector. Additionally, an @a offset is given from + where to start reading the bytes. - This function checks whether reading the bytes is safe; that is, offset is a - valid index in the vector, offset+len + This function checks whether reading the bytes is safe; that is, offset is + a valid index in the vector, offset+len @param[in] size size of the byte vector @param[in] len number of bytes to read @@ -6873,19 +7177,19 @@ class basic_json // simple case: requested length is greater than the vector's length if (len > size or offset > size) { - throw std::out_of_range("len out of range"); + JSON_THROW(std::out_of_range("len out of range")); } // second case: adding offset would result in overflow if ((size > (std::numeric_limits::max() - offset))) { - throw std::out_of_range("len+offset out of range"); + JSON_THROW(std::out_of_range("len+offset out of range")); } // last case: reading past the end of the vector if (len + offset > size) { - throw std::out_of_range("len+offset out of range"); + JSON_THROW(std::out_of_range("len+offset out of range")); } } @@ -6917,7 +7221,7 @@ class basic_json { return v[current_idx]; } - else if (v[current_idx] <= 0x8f) // fixmap + if (v[current_idx] <= 0x8f) // fixmap { basic_json result = value_t::object; const size_t len = v[current_idx] & 0x0f; @@ -6973,11 +7277,10 @@ class basic_json case 0xca: // float 32 { // copy bytes in reverse order into the double variable - check_length(v.size(), sizeof(float), 1); float res; for (size_t byte = 0; byte < sizeof(float); ++byte) { - reinterpret_cast(&res)[sizeof(float) - byte - 1] = v[current_idx + 1 + byte]; + reinterpret_cast(&res)[sizeof(float) - byte - 1] = v.at(current_idx + 1 + byte); } idx += sizeof(float); // skip content bytes return res; @@ -6986,11 +7289,10 @@ class basic_json case 0xcb: // float 64 { // copy bytes in reverse order into the double variable - check_length(v.size(), sizeof(double), 1); double res; for (size_t byte = 0; byte < sizeof(double); ++byte) { - reinterpret_cast(&res)[sizeof(double) - byte - 1] = v[current_idx + 1 + byte]; + reinterpret_cast(&res)[sizeof(double) - byte - 1] = v.at(current_idx + 1 + byte); } idx += sizeof(double); // skip content bytes return res; @@ -7123,7 +7425,7 @@ class basic_json default: { - throw std::invalid_argument("error parsing a msgpack @ " + std::to_string(current_idx) + ": " + std::to_string(static_cast(v[current_idx]))); + JSON_THROW(std::invalid_argument("error parsing a msgpack @ " + std::to_string(current_idx) + ": " + std::to_string(static_cast(v[current_idx])))); } } } @@ -7552,7 +7854,6 @@ class basic_json case 0xf9: // Half-Precision Float (two-byte IEEE 754) { - check_length(v.size(), 2, 1); idx += 2; // skip two content bytes // code from RFC 7049, Appendix D, Figure 3: @@ -7562,7 +7863,7 @@ class basic_json // include at least decoding support for them even without such // support. An example of a small decoder for half-precision // floating-point numbers in the C language is shown in Fig. 3. - const int half = (v[current_idx + 1] << 8) + v[current_idx + 2]; + const int half = (v.at(current_idx + 1) << 8) + v.at(current_idx + 2); const int exp = (half >> 10) & 0x1f; const int mant = half & 0x3ff; double val; @@ -7576,19 +7877,20 @@ class basic_json } else { - val = mant == 0 ? INFINITY : NAN; + val = mant == 0 + ? std::numeric_limits::infinity() + : std::numeric_limits::quiet_NaN(); } - return half & 0x8000 ? -val : val; + return (half & 0x8000) != 0 ? -val : val; } case 0xfa: // Single-Precision Float (four-byte IEEE 754) { // copy bytes in reverse order into the float variable - check_length(v.size(), sizeof(float), 1); float res; for (size_t byte = 0; byte < sizeof(float); ++byte) { - reinterpret_cast(&res)[sizeof(float) - byte - 1] = v[current_idx + 1 + byte]; + reinterpret_cast(&res)[sizeof(float) - byte - 1] = v.at(current_idx + 1 + byte); } idx += sizeof(float); // skip content bytes return res; @@ -7596,12 +7898,11 @@ class basic_json case 0xfb: // Double-Precision Float (eight-byte IEEE 754) { - check_length(v.size(), sizeof(double), 1); // copy bytes in reverse order into the double variable double res; for (size_t byte = 0; byte < sizeof(double); ++byte) { - reinterpret_cast(&res)[sizeof(double) - byte - 1] = v[current_idx + 1 + byte]; + reinterpret_cast(&res)[sizeof(double) - byte - 1] = v.at(current_idx + 1 + byte); } idx += sizeof(double); // skip content bytes return res; @@ -7609,7 +7910,7 @@ class basic_json default: // anything else (0xFF is handled inside the other types) { - throw std::invalid_argument("error parsing a CBOR @ " + std::to_string(current_idx) + ": " + std::to_string(static_cast(v[current_idx]))); + JSON_THROW(std::invalid_argument("error parsing a CBOR @ " + std::to_string(current_idx) + ": " + std::to_string(static_cast(v[current_idx])))); } } } @@ -7631,9 +7932,11 @@ class basic_json vector in MessagePack format.,to_msgpack} @sa http://msgpack.org - @sa @ref from_msgpack(const std::vector&) for the analogous - deserialization + @sa @ref from_msgpack(const std::vector&, const size_t) for the + analogous deserialization @sa @ref to_cbor(const basic_json& for the related CBOR format + + @since version 2.0.9 */ static std::vector to_msgpack(const basic_json& j) { @@ -7649,6 +7952,7 @@ class basic_json serialization format. @param[in] v a byte vector in MessagePack format + @param[in] start_index the index to start reading from @a v (0 by default) @return deserialized JSON value @throw std::invalid_argument if unsupported features from MessagePack were @@ -7662,11 +7966,15 @@ class basic_json @sa http://msgpack.org @sa @ref to_msgpack(const basic_json&) for the analogous serialization - @sa @ref from_cbor(const std::vector&) for the related CBOR format + @sa @ref from_cbor(const std::vector&, const size_t) for the + related CBOR format + + @since version 2.0.9, parameter @a start_index since 2.1.1 */ - static basic_json from_msgpack(const std::vector& v) + static basic_json from_msgpack(const std::vector& v, + const size_t start_index = 0) { - size_t i = 0; + size_t i = start_index; return from_msgpack_internal(v, i); } @@ -7687,9 +7995,11 @@ class basic_json vector in CBOR format.,to_cbor} @sa http://cbor.io - @sa @ref from_cbor(const std::vector&) for the analogous - deserialization + @sa @ref from_cbor(const std::vector&, const size_t) for the + analogous deserialization @sa @ref to_msgpack(const basic_json& for the related MessagePack format + + @since version 2.0.9 */ static std::vector to_cbor(const basic_json& j) { @@ -7705,6 +8015,7 @@ class basic_json (Concise Binary Object Representation) serialization format. @param[in] v a byte vector in CBOR format + @param[in] start_index the index to start reading from @a v (0 by default) @return deserialized JSON value @throw std::invalid_argument if unsupported features from CBOR were used in @@ -7718,18 +8029,20 @@ class basic_json @sa http://cbor.io @sa @ref to_cbor(const basic_json&) for the analogous serialization - @sa @ref from_msgpack(const std::vector&) for the related - MessagePack format + @sa @ref from_msgpack(const std::vector&, const size_t) for the + related MessagePack format + + @since version 2.0.9, parameter @a start_index since 2.1.1 */ - static basic_json from_cbor(const std::vector& v) + static basic_json from_cbor(const std::vector& v, + const size_t start_index = 0) { - size_t i = 0; + size_t i = start_index; return from_cbor_internal(v, i); } /// @} - private: /////////////////////////// // convenience functions // /////////////////////////// @@ -7744,29 +8057,35 @@ class basic_json @complexity Constant. - @since version 1.0.0 + @liveexample{The following code exemplifies `type_name()` for all JSON + types.,type_name} + + @since version 1.0.0, public since 2.1.0 */ std::string type_name() const { - switch (m_type) { - case value_t::null: - return "null"; - case value_t::object: - return "object"; - case value_t::array: - return "array"; - case value_t::string: - return "string"; - case value_t::boolean: - return "boolean"; - case value_t::discarded: - return "discarded"; - default: - return "number"; + switch (m_type) + { + case value_t::null: + return "null"; + case value_t::object: + return "object"; + case value_t::array: + return "array"; + case value_t::string: + return "string"; + case value_t::boolean: + return "boolean"; + case value_t::discarded: + return "discarded"; + default: + return "number"; + } } } + private: /*! @brief calculates the extra space to escape a JSON string @@ -7801,10 +8120,8 @@ class basic_json // from c (1 byte) to \uxxxx (6 bytes) return res + 5; } - else - { - return res; - } + + return res; } } }); @@ -7930,6 +8247,154 @@ class basic_json return result; } + + /*! + @brief locale-independent serialization for built-in arithmetic types + */ + struct numtostr + { + public: + template + numtostr(NumberType value) + { + x_write(value, std::is_integral()); + } + + const char* c_str() const + { + return m_buf.data(); + } + + private: + /// a (hopefully) large enough character buffer + std::array < char, 64 > m_buf{{}}; + + template + void x_write(NumberType x, /*is_integral=*/std::true_type) + { + // special case for "0" + if (x == 0) + { + m_buf[0] = '0'; + return; + } + + const bool is_negative = x < 0; + size_t i = 0; + + // spare 1 byte for '\0' + while (x != 0 and i < m_buf.size() - 1) + { + const auto digit = std::labs(static_cast(x % 10)); + m_buf[i++] = static_cast('0' + digit); + x /= 10; + } + + // make sure the number has been processed completely + assert(x == 0); + + if (is_negative) + { + // make sure there is capacity for the '-' + assert(i < m_buf.size() - 2); + m_buf[i++] = '-'; + } + + std::reverse(m_buf.begin(), m_buf.begin() + i); + } + + template + void x_write(NumberType x, /*is_integral=*/std::false_type) + { + // special case for 0.0 and -0.0 + if (x == 0) + { + size_t i = 0; + if (std::signbit(x)) + { + m_buf[i++] = '-'; + } + m_buf[i++] = '0'; + m_buf[i++] = '.'; + m_buf[i] = '0'; + return; + } + + // get number of digits for a text -> float -> text round-trip + static constexpr auto d = std::numeric_limits::digits10; + + // the actual conversion + const auto written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); + + // negative value indicates an error + assert(written_bytes > 0); + // check if buffer was large enough + assert(static_cast(written_bytes) < m_buf.size()); + + // read information from locale + const auto loc = localeconv(); + assert(loc != nullptr); + const char thousands_sep = !loc->thousands_sep ? '\0' + : loc->thousands_sep[0]; + + const char decimal_point = !loc->decimal_point ? '\0' + : loc->decimal_point[0]; + + // erase thousands separator + if (thousands_sep != '\0') + { + const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); + std::fill(end, m_buf.end(), '\0'); + } + + // convert decimal point to '.' + if (decimal_point != '\0' and decimal_point != '.') + { + for (auto& c : m_buf) + { + if (c == decimal_point) + { + c = '.'; + break; + } + } + } + + // determine if need to append ".0" + size_t i = 0; + bool value_is_int_like = true; + for (i = 0; i < m_buf.size(); ++i) + { + // break when end of number is reached + if (m_buf[i] == '\0') + { + break; + } + + // check if we find non-int character + value_is_int_like = value_is_int_like and m_buf[i] != '.' and + m_buf[i] != 'e' and m_buf[i] != 'E'; + } + + if (value_is_int_like) + { + // there must be 2 bytes left for ".0" + assert((i + 2) < m_buf.size()); + // we write to the end of the number + assert(m_buf[i] == '\0'); + assert(m_buf[i - 1] != '\0'); + + // add ".0" + m_buf[i] = '.'; + m_buf[i + 1] = '0'; + + // the resulting string is properly terminated + assert(m_buf[i + 2] == '\0'); + } + } + }; + + /*! @brief internal implementation of the serialization function @@ -8049,27 +8514,19 @@ class basic_json case value_t::number_integer: { - o << m_value.number_integer; + o << numtostr(m_value.number_integer).c_str(); return; } case value_t::number_unsigned: { - o << m_value.number_unsigned; + o << numtostr(m_value.number_unsigned).c_str(); return; } case value_t::number_float: { - if (m_value.number_float == 0) - { - // special case for zero to get "0.0"/"-0.0" - o << (std::signbit(m_value.number_float) ? "-0.0" : "0.0"); - } - else - { - o << m_value.number_float; - } + o << numtostr(m_value.number_float).c_str(); return; } @@ -8116,6 +8573,11 @@ class basic_json class primitive_iterator_t { public: + + difference_type get_value() const noexcept + { + return m_it; + } /// set iterator to a defined beginning void set_begin() noexcept { @@ -8140,16 +8602,89 @@ class basic_json return (m_it == end_value); } - /// return reference to the value to change and compare - operator difference_type& () noexcept + friend constexpr bool operator==(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept { - return m_it; + return lhs.m_it == rhs.m_it; } - /// return value to compare - constexpr operator difference_type () const noexcept + friend constexpr bool operator!=(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept { - return m_it; + return !(lhs == rhs); + } + + friend constexpr bool operator<(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it < rhs.m_it; + } + + friend constexpr bool operator<=(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it <= rhs.m_it; + } + + friend constexpr bool operator>(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it > rhs.m_it; + } + + friend constexpr bool operator>=(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it >= rhs.m_it; + } + + primitive_iterator_t operator+(difference_type i) + { + auto result = *this; + result += i; + return result; + } + + friend constexpr difference_type operator-(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it - rhs.m_it; + } + + friend std::ostream& operator<<(std::ostream& os, primitive_iterator_t it) + { + return os << it.m_it; + } + + primitive_iterator_t& operator++() + { + ++m_it; + return *this; + } + + primitive_iterator_t operator++(int) + { + auto result = *this; + m_it++; + return result; + } + + primitive_iterator_t& operator--() + { + --m_it; + return *this; + } + + primitive_iterator_t operator--(int) + { + auto result = *this; + m_it--; + return result; + } + + primitive_iterator_t& operator+=(difference_type n) + { + m_it += n; + return *this; + } + + primitive_iterator_t& operator-=(difference_type n) + { + m_it -= n; + return *this; } private: @@ -8501,7 +9036,7 @@ class basic_json case basic_json::value_t::null: { - throw std::out_of_range("cannot get value"); + JSON_THROW(std::out_of_range("cannot get value")); } default: @@ -8510,10 +9045,8 @@ class basic_json { return *m_object; } - else - { - throw std::out_of_range("cannot get value"); - } + + JSON_THROW(std::out_of_range("cannot get value")); } } } @@ -8546,10 +9079,8 @@ class basic_json { return m_object; } - else - { - throw std::out_of_range("cannot get value"); - } + + JSON_THROW(std::out_of_range("cannot get value")); } } } @@ -8649,7 +9180,7 @@ class basic_json // if objects are not the same, the comparison is undefined if (m_object != other.m_object) { - throw std::domain_error("cannot compare iterators of different containers"); + JSON_THROW(std::domain_error("cannot compare iterators of different containers")); } assert(m_object != nullptr); @@ -8691,7 +9222,7 @@ class basic_json // if objects are not the same, the comparison is undefined if (m_object != other.m_object) { - throw std::domain_error("cannot compare iterators of different containers"); + JSON_THROW(std::domain_error("cannot compare iterators of different containers")); } assert(m_object != nullptr); @@ -8700,7 +9231,7 @@ class basic_json { case basic_json::value_t::object: { - throw std::domain_error("cannot compare order of object iterators"); + JSON_THROW(std::domain_error("cannot compare order of object iterators")); } case basic_json::value_t::array: @@ -8754,7 +9285,7 @@ class basic_json { case basic_json::value_t::object: { - throw std::domain_error("cannot use offsets with object iterators"); + JSON_THROW(std::domain_error("cannot use offsets with object iterators")); } case basic_json::value_t::array: @@ -8816,7 +9347,7 @@ class basic_json { case basic_json::value_t::object: { - throw std::domain_error("cannot use offsets with object iterators"); + JSON_THROW(std::domain_error("cannot use offsets with object iterators")); } case basic_json::value_t::array: @@ -8843,7 +9374,7 @@ class basic_json { case basic_json::value_t::object: { - throw std::domain_error("cannot use operator[] for object iterators"); + JSON_THROW(std::domain_error("cannot use operator[] for object iterators")); } case basic_json::value_t::array: @@ -8853,19 +9384,17 @@ class basic_json case basic_json::value_t::null: { - throw std::out_of_range("cannot get value"); + JSON_THROW(std::out_of_range("cannot get value")); } default: { - if (m_it.primitive_iterator == -n) + if (m_it.primitive_iterator.get_value() == -n) { return *m_object; } - else - { - throw std::out_of_range("cannot get value"); - } + + JSON_THROW(std::out_of_range("cannot get value")); } } } @@ -8882,10 +9411,8 @@ class basic_json { return m_it.object_iterator->first; } - else - { - throw std::domain_error("cannot use key() for non-object iterators"); - } + + JSON_THROW(std::domain_error("cannot use key() for non-object iterators")); } /*! @@ -9040,7 +9567,9 @@ class basic_json literal_false, ///< the `false` literal literal_null, ///< the `null` literal value_string, ///< a string -- use get_string() for actual value - value_number, ///< a number -- use get_number() for actual value + value_unsigned, ///< an unsigned integer -- use get_number() for actual value + value_integer, ///< a signed integer -- use get_number() for actual value + value_float, ///< an floating point number -- use get_number() for actual value begin_array, ///< the character for array begin `[` begin_object, ///< the character for object begin `{` end_array, ///< the character for array end `]` @@ -9070,7 +9599,7 @@ class basic_json // immediately abort if stream is erroneous if (s.fail()) { - throw std::invalid_argument("stream error"); + JSON_THROW(std::invalid_argument("stream error")); } // fill buffer @@ -9137,7 +9666,7 @@ class basic_json } else { - throw std::invalid_argument("missing or wrong low surrogate"); + JSON_THROW(std::invalid_argument("missing or wrong low surrogate")); } } @@ -9171,7 +9700,7 @@ class basic_json } else { - throw std::out_of_range("code points above 0x10FFFF are invalid"); + JSON_THROW(std::out_of_range("code points above 0x10FFFF are invalid")); } return result; @@ -9192,7 +9721,9 @@ class basic_json return "null literal"; case token_type::value_string: return "string literal"; - case token_type::value_number: + case lexer::token_type::value_unsigned: + case lexer::token_type::value_integer: + case lexer::token_type::value_float: return "number literal"; case token_type::begin_array: return "'['"; @@ -9465,37 +9996,47 @@ basic_json_parser_12: } if (yych <= '0') { - goto basic_json_parser_13; + goto basic_json_parser_43; } if (yych <= '9') { - goto basic_json_parser_15; + goto basic_json_parser_45; } goto basic_json_parser_5; basic_json_parser_13: yyaccept = 1; yych = *(m_marker = ++m_cursor); - if (yych <= 'D') + if (yych <= '9') { if (yych == '.') { - goto basic_json_parser_43; + goto basic_json_parser_47; + } + if (yych >= '0') + { + goto basic_json_parser_48; } } else { if (yych <= 'E') { - goto basic_json_parser_44; + if (yych >= 'E') + { + goto basic_json_parser_51; + } } - if (yych == 'e') + else { - goto basic_json_parser_44; + if (yych == 'e') + { + goto basic_json_parser_51; + } } } basic_json_parser_14: { - last_token_type = token_type::value_number; + last_token_type = token_type::value_unsigned; break; } basic_json_parser_15: @@ -9514,7 +10055,7 @@ basic_json_parser_15: { if (yych == '.') { - goto basic_json_parser_43; + goto basic_json_parser_47; } goto basic_json_parser_14; } @@ -9522,11 +10063,11 @@ basic_json_parser_15: { if (yych <= 'E') { - goto basic_json_parser_44; + goto basic_json_parser_51; } if (yych == 'e') { - goto basic_json_parser_44; + goto basic_json_parser_51; } goto basic_json_parser_14; } @@ -9553,7 +10094,7 @@ basic_json_parser_23: yych = *(m_marker = ++m_cursor); if (yych == 'a') { - goto basic_json_parser_45; + goto basic_json_parser_52; } goto basic_json_parser_5; basic_json_parser_24: @@ -9561,7 +10102,7 @@ basic_json_parser_24: yych = *(m_marker = ++m_cursor); if (yych == 'u') { - goto basic_json_parser_46; + goto basic_json_parser_53; } goto basic_json_parser_5; basic_json_parser_25: @@ -9569,7 +10110,7 @@ basic_json_parser_25: yych = *(m_marker = ++m_cursor); if (yych == 'r') { - goto basic_json_parser_47; + goto basic_json_parser_54; } goto basic_json_parser_5; basic_json_parser_26: @@ -9651,13 +10192,27 @@ basic_json_parser_31: } basic_json_parser_32: m_cursor = m_marker; - if (yyaccept == 0) + if (yyaccept <= 1) { - goto basic_json_parser_5; + if (yyaccept == 0) + { + goto basic_json_parser_5; + } + else + { + goto basic_json_parser_14; + } } else { - goto basic_json_parser_14; + if (yyaccept == 2) + { + goto basic_json_parser_44; + } + else + { + goto basic_json_parser_58; + } } basic_json_parser_33: ++m_cursor; @@ -9738,7 +10293,7 @@ basic_json_parser_35: } if (yych <= 'u') { - goto basic_json_parser_48; + goto basic_json_parser_55; } goto basic_json_parser_32; } @@ -9857,64 +10412,159 @@ basic_json_parser_42: } goto basic_json_parser_32; basic_json_parser_43: + yyaccept = 2; + yych = *(m_marker = ++m_cursor); + if (yych <= '9') + { + if (yych == '.') + { + goto basic_json_parser_47; + } + if (yych >= '0') + { + goto basic_json_parser_48; + } + } + else + { + if (yych <= 'E') + { + if (yych >= 'E') + { + goto basic_json_parser_51; + } + } + else + { + if (yych == 'e') + { + goto basic_json_parser_51; + } + } + } +basic_json_parser_44: + { + last_token_type = token_type::value_integer; + break; + } +basic_json_parser_45: + yyaccept = 2; + m_marker = ++m_cursor; + if ((m_limit - m_cursor) < 3) + { + fill_line_buffer(3); // LCOV_EXCL_LINE + } + yych = *m_cursor; + if (yych <= '9') + { + if (yych == '.') + { + goto basic_json_parser_47; + } + if (yych <= '/') + { + goto basic_json_parser_44; + } + goto basic_json_parser_45; + } + else + { + if (yych <= 'E') + { + if (yych <= 'D') + { + goto basic_json_parser_44; + } + goto basic_json_parser_51; + } + else + { + if (yych == 'e') + { + goto basic_json_parser_51; + } + goto basic_json_parser_44; + } + } +basic_json_parser_47: yych = *++m_cursor; if (yych <= '/') { goto basic_json_parser_32; } if (yych <= '9') - { - goto basic_json_parser_49; - } - goto basic_json_parser_32; -basic_json_parser_44: - yych = *++m_cursor; - if (yych <= ',') - { - if (yych == '+') - { - goto basic_json_parser_51; - } - goto basic_json_parser_32; - } - else - { - if (yych <= '-') - { - goto basic_json_parser_51; - } - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_52; - } - goto basic_json_parser_32; - } -basic_json_parser_45: - yych = *++m_cursor; - if (yych == 'l') - { - goto basic_json_parser_54; - } - goto basic_json_parser_32; -basic_json_parser_46: - yych = *++m_cursor; - if (yych == 'l') - { - goto basic_json_parser_55; - } - goto basic_json_parser_32; -basic_json_parser_47: - yych = *++m_cursor; - if (yych == 'u') { goto basic_json_parser_56; } goto basic_json_parser_32; basic_json_parser_48: + ++m_cursor; + if (m_limit <= m_cursor) + { + fill_line_buffer(1); // LCOV_EXCL_LINE + } + yych = *m_cursor; + if (yych <= '/') + { + goto basic_json_parser_50; + } + if (yych <= '9') + { + goto basic_json_parser_48; + } +basic_json_parser_50: + { + last_token_type = token_type::parse_error; + break; + } +basic_json_parser_51: + yych = *++m_cursor; + if (yych <= ',') + { + if (yych == '+') + { + goto basic_json_parser_59; + } + goto basic_json_parser_32; + } + else + { + if (yych <= '-') + { + goto basic_json_parser_59; + } + if (yych <= '/') + { + goto basic_json_parser_32; + } + if (yych <= '9') + { + goto basic_json_parser_60; + } + goto basic_json_parser_32; + } +basic_json_parser_52: + yych = *++m_cursor; + if (yych == 'l') + { + goto basic_json_parser_62; + } + goto basic_json_parser_32; +basic_json_parser_53: + yych = *++m_cursor; + if (yych == 'l') + { + goto basic_json_parser_63; + } + goto basic_json_parser_32; +basic_json_parser_54: + yych = *++m_cursor; + if (yych == 'u') + { + goto basic_json_parser_64; + } + goto basic_json_parser_32; +basic_json_parser_55: ++m_cursor; if (m_limit <= m_cursor) { @@ -9929,7 +10579,7 @@ basic_json_parser_48: } if (yych <= '9') { - goto basic_json_parser_57; + goto basic_json_parser_65; } goto basic_json_parser_32; } @@ -9937,7 +10587,7 @@ basic_json_parser_48: { if (yych <= 'F') { - goto basic_json_parser_57; + goto basic_json_parser_65; } if (yych <= '`') { @@ -9945,12 +10595,12 @@ basic_json_parser_48: } if (yych <= 'f') { - goto basic_json_parser_57; + goto basic_json_parser_65; } goto basic_json_parser_32; } -basic_json_parser_49: - yyaccept = 1; +basic_json_parser_56: + yyaccept = 3; m_marker = ++m_cursor; if ((m_limit - m_cursor) < 3) { @@ -9961,27 +10611,30 @@ basic_json_parser_49: { if (yych <= '/') { - goto basic_json_parser_14; + goto basic_json_parser_58; } if (yych <= '9') { - goto basic_json_parser_49; + goto basic_json_parser_56; } - goto basic_json_parser_14; } else { if (yych <= 'E') { - goto basic_json_parser_44; + goto basic_json_parser_51; } if (yych == 'e') { - goto basic_json_parser_44; + goto basic_json_parser_51; } - goto basic_json_parser_14; } -basic_json_parser_51: +basic_json_parser_58: + { + last_token_type = token_type::value_float; + break; + } +basic_json_parser_59: yych = *++m_cursor; if (yych <= '/') { @@ -9991,7 +10644,7 @@ basic_json_parser_51: { goto basic_json_parser_32; } -basic_json_parser_52: +basic_json_parser_60: ++m_cursor; if (m_limit <= m_cursor) { @@ -10000,35 +10653,35 @@ basic_json_parser_52: yych = *m_cursor; if (yych <= '/') { - goto basic_json_parser_14; + goto basic_json_parser_58; } if (yych <= '9') { - goto basic_json_parser_52; + goto basic_json_parser_60; } - goto basic_json_parser_14; -basic_json_parser_54: + goto basic_json_parser_58; +basic_json_parser_62: yych = *++m_cursor; if (yych == 's') { - goto basic_json_parser_58; + goto basic_json_parser_66; } goto basic_json_parser_32; -basic_json_parser_55: +basic_json_parser_63: yych = *++m_cursor; if (yych == 'l') { - goto basic_json_parser_59; + goto basic_json_parser_67; } goto basic_json_parser_32; -basic_json_parser_56: +basic_json_parser_64: yych = *++m_cursor; if (yych == 'e') { - goto basic_json_parser_61; + goto basic_json_parser_69; } goto basic_json_parser_32; -basic_json_parser_57: +basic_json_parser_65: ++m_cursor; if (m_limit <= m_cursor) { @@ -10043,7 +10696,7 @@ basic_json_parser_57: } if (yych <= '9') { - goto basic_json_parser_63; + goto basic_json_parser_71; } goto basic_json_parser_32; } @@ -10051,7 +10704,7 @@ basic_json_parser_57: { if (yych <= 'F') { - goto basic_json_parser_63; + goto basic_json_parser_71; } if (yych <= '`') { @@ -10059,30 +10712,30 @@ basic_json_parser_57: } if (yych <= 'f') { - goto basic_json_parser_63; + goto basic_json_parser_71; } goto basic_json_parser_32; } -basic_json_parser_58: +basic_json_parser_66: yych = *++m_cursor; if (yych == 'e') { - goto basic_json_parser_64; + goto basic_json_parser_72; } goto basic_json_parser_32; -basic_json_parser_59: +basic_json_parser_67: ++m_cursor; { last_token_type = token_type::literal_null; break; } -basic_json_parser_61: +basic_json_parser_69: ++m_cursor; { last_token_type = token_type::literal_true; break; } -basic_json_parser_63: +basic_json_parser_71: ++m_cursor; if (m_limit <= m_cursor) { @@ -10097,7 +10750,7 @@ basic_json_parser_63: } if (yych <= '9') { - goto basic_json_parser_66; + goto basic_json_parser_74; } goto basic_json_parser_32; } @@ -10105,7 +10758,7 @@ basic_json_parser_63: { if (yych <= 'F') { - goto basic_json_parser_66; + goto basic_json_parser_74; } if (yych <= '`') { @@ -10113,17 +10766,17 @@ basic_json_parser_63: } if (yych <= 'f') { - goto basic_json_parser_66; + goto basic_json_parser_74; } goto basic_json_parser_32; } -basic_json_parser_64: +basic_json_parser_72: ++m_cursor; { last_token_type = token_type::literal_false; break; } -basic_json_parser_66: +basic_json_parser_74: ++m_cursor; if (m_limit <= m_cursor) { @@ -10210,7 +10863,7 @@ basic_json_parser_66: assert(m_marker == nullptr or m_marker <= m_limit); // number of processed characters (p) - const size_t num_processed_chars = static_cast(m_start - m_content); + const auto num_processed_chars = static_cast(m_start - m_content); // offset for m_marker wrt. to m_start const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start; // number of unprocessed characters (u) @@ -10220,7 +10873,7 @@ basic_json_parser_66: if (m_stream == nullptr or m_stream->eof()) { // m_start may or may not be pointing into m_line_buffer at - // this point. We trust the standand library to do the right + // this point. We trust the standard library to do the right // thing. See http://stackoverflow.com/q/28142011/266378 m_line_buffer.assign(m_start, m_limit); @@ -10308,7 +10961,7 @@ basic_json_parser_66: m_start + 1 + x < m_cursor - 1 must hold to loop indefinitely. This can be rephrased to m_cursor - m_start - 2 > x. With the precondition, we x <= 0, meaning that the loop condition holds - indefinitly if i is always decreased. However, observe that the value + indefinitely if i is always decreased. However, observe that the value of i is strictly increasing with each iteration, as it is incremented by 1 in the iteration expression and never decremented inside the loop body. Hence, the loop condition will eventually be false which @@ -10403,7 +11056,7 @@ basic_json_parser_66: // make sure there is a subsequent unicode if ((i + 6 >= m_limit) or * (i + 5) != '\\' or * (i + 6) != 'u') { - throw std::invalid_argument("missing low surrogate"); + JSON_THROW(std::invalid_argument("missing low surrogate")); } // get code yyyy from uxxxx\uyyyy @@ -10416,7 +11069,7 @@ basic_json_parser_66: else if (codepoint >= 0xDC00 and codepoint <= 0xDFFF) { // we found a lone low surrogate - throw std::invalid_argument("missing high surrogate"); + JSON_THROW(std::invalid_argument("missing high surrogate")); } else { @@ -10434,59 +11087,155 @@ basic_json_parser_66: return result; } - /*! - @brief parse floating point number - - This function (and its overloads) serves to select the most approprate - standard floating point number parsing function based on the type - supplied via the first parameter. Set this to @a - static_cast(nullptr). - - @param[in,out] endptr recieves a pointer to the first character after - the number - - @return the floating point number - */ - long double str_to_float_t(long double* /* type */, char** endptr) const - { - return std::strtold(reinterpret_cast(m_start), endptr); - } /*! - @brief parse floating point number + @brief parse string into a built-in arithmetic type as if the current + locale is POSIX. - This function (and its overloads) serves to select the most approprate - standard floating point number parsing function based on the type - supplied via the first parameter. Set this to @a - static_cast(nullptr). + @note in floating-point case strtod may parse past the token's end - + this is not an error - @param[in,out] endptr recieves a pointer to the first character after - the number - - @return the floating point number + @note any leading blanks are not handled */ - double str_to_float_t(double* /* type */, char** endptr) const + struct strtonum { - return std::strtod(reinterpret_cast(m_start), endptr); - } + public: + strtonum(const char* start, const char* end) + : m_start(start), m_end(end) + {} - /*! - @brief parse floating point number + /*! + @return true iff parsed successfully as number of type T - This function (and its overloads) serves to select the most approprate - standard floating point number parsing function based on the type - supplied via the first parameter. Set this to @a - static_cast(nullptr). + @param[in,out] val shall contain parsed value, or undefined value + if could not parse + */ + template::value>::type> + bool to(T& val) const + { + return parse(val, std::is_integral()); + } - @param[in,out] endptr recieves a pointer to the first character after - the number + private: + const char* const m_start = nullptr; + const char* const m_end = nullptr; - @return the floating point number - */ - float str_to_float_t(float* /* type */, char** endptr) const - { - return std::strtof(reinterpret_cast(m_start), endptr); - } + // floating-point conversion + + // overloaded wrappers for strtod/strtof/strtold + // that will be called from parse + static void strtof(float& f, const char* str, char** endptr) + { + f = std::strtof(str, endptr); + } + + static void strtof(double& f, const char* str, char** endptr) + { + f = std::strtod(str, endptr); + } + + static void strtof(long double& f, const char* str, char** endptr) + { + f = std::strtold(str, endptr); + } + + template + bool parse(T& value, /*is_integral=*/std::false_type) const + { + // replace decimal separator with locale-specific version, + // when necessary; data will point to either the original + // string, or buf, or tempstr containing the fixed string. + std::string tempstr; + std::array buf; + const size_t len = static_cast(m_end - m_start); + + // lexer will reject empty numbers + assert(len > 0); + + // since dealing with strtod family of functions, we're + // getting the decimal point char from the C locale facilities + // instead of C++'s numpunct facet of the current std::locale + const auto loc = localeconv(); + assert(loc != nullptr); + const char decimal_point_char = (loc->decimal_point == nullptr) ? '.' : loc->decimal_point[0]; + + const char* data = m_start; + + if (decimal_point_char != '.') + { + const size_t ds_pos = static_cast(std::find(m_start, m_end, '.') - m_start); + + if (ds_pos != len) + { + // copy the data into the local buffer or tempstr, if + // buffer is too small; replace decimal separator, and + // update data to point to the modified bytes + if ((len + 1) < buf.size()) + { + std::copy(m_start, m_end, buf.begin()); + buf[len] = 0; + buf[ds_pos] = decimal_point_char; + data = buf.data(); + } + else + { + tempstr.assign(m_start, m_end); + tempstr[ds_pos] = decimal_point_char; + data = tempstr.c_str(); + } + } + } + + char* endptr = nullptr; + value = 0; + // this calls appropriate overload depending on T + strtof(value, data, &endptr); + + // parsing was successful iff strtof parsed exactly the number + // of characters determined by the lexer (len) + const bool ok = (endptr == (data + len)); + + if (ok and (value == static_cast(0.0)) and (*data == '-')) + { + // some implementations forget to negate the zero + value = -0.0; + } + + return ok; + } + + // integral conversion + + signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const + { + return std::strtoll(m_start, endptr, 10); + } + + unsigned long long parse_integral(char** endptr, /*is_signed*/std::false_type) const + { + return std::strtoull(m_start, endptr, 10); + } + + template + bool parse(T& value, /*is_integral=*/std::true_type) const + { + char* endptr = nullptr; + errno = 0; // these are thread-local + const auto x = parse_integral(&endptr, std::is_signed()); + + // called right overload? + static_assert(std::is_signed() == std::is_signed(), ""); + + value = static_cast(x); + + return (x == static_cast(value)) // x fits into destination T + and (x < 0) == (value < 0) // preserved sign + //and ((x != 0) or is_integral()) // strto[u]ll did nto fail + and (errno == 0) // strto[u]ll did not overflow + and (m_start < m_end) // token was not empty + and (endptr == m_end); // parsed entire token exactly + } + }; /*! @brief return number value for number tokens @@ -10495,125 +11244,84 @@ basic_json_parser_66: number type (either integer, unsigned integer or floating point), which is passed back to the caller via the result parameter. - This function parses the integer component up to the radix point or - exponent while collecting information about the 'floating point - representation', which it stores in the result parameter. If there is - no radix point or exponent, and the number can fit into a @ref - number_integer_t or @ref number_unsigned_t then it sets the result - parameter accordingly. + integral numbers that don't fit into the the range of the respective + type are parsed as number_float_t - If the number is a floating point number the number is then parsed - using @a std:strtod (or @a std:strtof or @a std::strtold). + floating-point values do not satisfy std::isfinite predicate + are converted to value_t::null - @param[out] result @ref basic_json object to receive the number, or - NAN if the conversion read past the current token. The latter case - needs to be treated by the caller function. + throws if the entire string [m_start .. m_cursor) cannot be + interpreted as a number + + @param[out] result @ref basic_json object to receive the number. + @param[in] token the type of the number token */ - void get_number(basic_json& result) const + bool get_number(basic_json& result, const token_type token) const { assert(m_start != nullptr); + assert(m_start < m_cursor); + assert((token == token_type::value_unsigned) or + (token == token_type::value_integer) or + (token == token_type::value_float)); - const lexer::lexer_char_t* curptr = m_start; + strtonum num_converter(reinterpret_cast(m_start), + reinterpret_cast(m_cursor)); - // accumulate the integer conversion result (unsigned for now) - number_unsigned_t value = 0; - - // maximum absolute value of the relevant integer type - number_unsigned_t max; - - // temporarily store the type to avoid unecessary bitfield access - value_t type; - - // look for sign - if (*curptr == '-') + switch (token) { - type = value_t::number_integer; - max = static_cast((std::numeric_limits::max)()) + 1; - curptr++; - } - else - { - type = value_t::number_unsigned; - max = static_cast((std::numeric_limits::max)()); - } - - // count the significant figures - for (; curptr < m_cursor; curptr++) - { - // quickly skip tests if a digit - if (*curptr < '0' || *curptr > '9') + case lexer::token_type::value_unsigned: { - if (*curptr == '.') + number_unsigned_t val; + if (num_converter.to(val)) { - // don't count '.' but change to float - type = value_t::number_float; - continue; + // parsing successful + result.m_type = value_t::number_unsigned; + result.m_value = val; + return true; } - // assume exponent (if not then will fail parse): change to - // float, stop counting and record exponent details - type = value_t::number_float; break; } - // skip if definitely not an integer - if (type != value_t::number_float) + case lexer::token_type::value_integer: { - auto digit = static_cast(*curptr - '0'); - - // overflow if value * 10 + digit > max, move terms around - // to avoid overflow in intermediate values - if (value > (max - digit) / 10) + number_integer_t val; + if (num_converter.to(val)) { - // overflow - type = value_t::number_float; - } - else - { - // no overflow - value = value * 10 + digit; + // parsing successful + result.m_type = value_t::number_integer; + result.m_value = val; + return true; } + break; + } + + default: + { + break; } } - // save the value (if not a float) - if (type == value_t::number_unsigned) + // parse float (either explicitly or because a previous conversion + // failed) + number_float_t val; + if (num_converter.to(val)) { - result.m_value.number_unsigned = value; - } - else if (type == value_t::number_integer) - { - // invariant: if we parsed a '-', the absolute value is between - // 0 (we allow -0) and max == -INT64_MIN - assert(value >= 0); - assert(value <= max); - - if (value == max) - { - // we cannot simply negate value (== max == -INT64_MIN), - // see https://github.com/nlohmann/json/issues/389 - result.m_value.number_integer = static_cast(INT64_MIN); - } - else - { - // all other values can be negated safely - result.m_value.number_integer = -static_cast(value); - } - } - else - { - // parse with strtod - result.m_value.number_float = str_to_float_t(static_cast(nullptr), NULL); + // parsing successful + result.m_type = value_t::number_float; + result.m_value = val; // replace infinity and NAN by null if (not std::isfinite(result.m_value.number_float)) { - type = value_t::null; + result.m_type = value_t::null; result.m_value = basic_json::json_value(); } + + return true; } - // save the type - result.m_type = type; + // couldn't parse number in any format + return false; } private: @@ -10857,9 +11565,11 @@ basic_json_parser_66: break; } - case lexer::token_type::value_number: + case lexer::token_type::value_unsigned: + case lexer::token_type::value_integer: + case lexer::token_type::value_float: { - m_lexer.get_number(result); + m_lexer.get_number(result, last_token); get_token(); break; } @@ -10894,7 +11604,7 @@ basic_json_parser_66: "'") : lexer::token_type_name(last_token)); error_msg += "; expected " + lexer::token_type_name(t); - throw std::invalid_argument(error_msg); + JSON_THROW(std::invalid_argument(error_msg)); } } @@ -10906,7 +11616,7 @@ basic_json_parser_66: error_msg += (last_token == lexer::token_type::parse_error ? ("'" + m_lexer.get_token_string() + "'") : lexer::token_type_name(last_token)); - throw std::invalid_argument(error_msg); + JSON_THROW(std::invalid_argument(error_msg)); } } @@ -11002,7 +11712,7 @@ basic_json_parser_66: { if (is_root()) { - throw std::domain_error("JSON pointer has no parent"); + JSON_THROW(std::domain_error("JSON pointer has no parent")); } auto last = reference_tokens.back(); @@ -11020,7 +11730,7 @@ basic_json_parser_66: { if (is_root()) { - throw std::domain_error("JSON pointer has no parent"); + JSON_THROW(std::domain_error("JSON pointer has no parent")); } json_pointer result = *this; @@ -11081,7 +11791,7 @@ basic_json_parser_66: */ default: { - throw std::domain_error("invalid value to unflatten"); + JSON_THROW(std::domain_error("invalid value to unflatten")); } } } @@ -11149,12 +11859,12 @@ basic_json_parser_66: // error condition (cf. RFC 6901, Sect. 4) if (reference_token.size() > 1 and reference_token[0] == '0') { - throw std::domain_error("array index must not begin with '0'"); + JSON_THROW(std::domain_error("array index must not begin with '0'")); } if (reference_token == "-") { - // explicityly treat "-" as index beyond the end + // explicitly treat "-" as index beyond the end ptr = &ptr->operator[](ptr->m_value.array->size()); } else @@ -11167,7 +11877,7 @@ basic_json_parser_66: default: { - throw std::out_of_range("unresolved reference token '" + reference_token + "'"); + JSON_THROW(std::out_of_range("unresolved reference token '" + reference_token + "'")); } } } @@ -11193,15 +11903,15 @@ basic_json_parser_66: if (reference_token == "-") { // "-" always fails the range check - throw std::out_of_range("array index '-' (" + - std::to_string(ptr->m_value.array->size()) + - ") is out of range"); + JSON_THROW(std::out_of_range("array index '-' (" + + std::to_string(ptr->m_value.array->size()) + + ") is out of range")); } // error condition (cf. RFC 6901, Sect. 4) if (reference_token.size() > 1 and reference_token[0] == '0') { - throw std::domain_error("array index must not begin with '0'"); + JSON_THROW(std::domain_error("array index must not begin with '0'")); } // note: at performs range check @@ -11211,7 +11921,7 @@ basic_json_parser_66: default: { - throw std::out_of_range("unresolved reference token '" + reference_token + "'"); + JSON_THROW(std::out_of_range("unresolved reference token '" + reference_token + "'")); } } } @@ -11245,15 +11955,15 @@ basic_json_parser_66: if (reference_token == "-") { // "-" cannot be used for const access - throw std::out_of_range("array index '-' (" + - std::to_string(ptr->m_value.array->size()) + - ") is out of range"); + JSON_THROW(std::out_of_range("array index '-' (" + + std::to_string(ptr->m_value.array->size()) + + ") is out of range")); } // error condition (cf. RFC 6901, Sect. 4) if (reference_token.size() > 1 and reference_token[0] == '0') { - throw std::domain_error("array index must not begin with '0'"); + JSON_THROW(std::domain_error("array index must not begin with '0'")); } // use unchecked array access @@ -11263,7 +11973,7 @@ basic_json_parser_66: default: { - throw std::out_of_range("unresolved reference token '" + reference_token + "'"); + JSON_THROW(std::out_of_range("unresolved reference token '" + reference_token + "'")); } } } @@ -11289,15 +11999,15 @@ basic_json_parser_66: if (reference_token == "-") { // "-" always fails the range check - throw std::out_of_range("array index '-' (" + - std::to_string(ptr->m_value.array->size()) + - ") is out of range"); + JSON_THROW(std::out_of_range("array index '-' (" + + std::to_string(ptr->m_value.array->size()) + + ") is out of range")); } // error condition (cf. RFC 6901, Sect. 4) if (reference_token.size() > 1 and reference_token[0] == '0') { - throw std::domain_error("array index must not begin with '0'"); + JSON_THROW(std::domain_error("array index must not begin with '0'")); } // note: at performs range check @@ -11307,7 +12017,7 @@ basic_json_parser_66: default: { - throw std::out_of_range("unresolved reference token '" + reference_token + "'"); + JSON_THROW(std::out_of_range("unresolved reference token '" + reference_token + "'")); } } } @@ -11329,7 +12039,7 @@ basic_json_parser_66: // check if nonempty reference string begins with slash if (reference_string[0] != '/') { - throw std::domain_error("JSON pointer must be empty or begin with '/'"); + JSON_THROW(std::domain_error("JSON pointer must be empty or begin with '/'")); } // extract the reference tokens: @@ -11337,7 +12047,7 @@ basic_json_parser_66: // - start: position after the previous slash for ( // search for the first slash after the first character - size_t slash = reference_string.find_first_of("/", 1), + size_t slash = reference_string.find_first_of('/', 1), // set the beginning of the first reference token start = 1; // we can stop if start == string::npos+1 = 0 @@ -11346,16 +12056,16 @@ basic_json_parser_66: // (will eventually be 0 if slash == std::string::npos) start = slash + 1, // find next slash - slash = reference_string.find_first_of("/", start)) + slash = reference_string.find_first_of('/', start)) { // use the text between the beginning of the reference token // (start) and the last slash (slash). auto reference_token = reference_string.substr(start, slash - start); // check reference tokens are properly escaped - for (size_t pos = reference_token.find_first_of("~"); + for (size_t pos = reference_token.find_first_of('~'); pos != std::string::npos; - pos = reference_token.find_first_of("~", pos + 1)) + pos = reference_token.find_first_of('~', pos + 1)) { assert(reference_token[pos] == '~'); @@ -11364,7 +12074,7 @@ basic_json_parser_66: (reference_token[pos + 1] != '0' and reference_token[pos + 1] != '1')) { - throw std::domain_error("escape error: '~' must be followed with '0' or '1'"); + JSON_THROW(std::domain_error("escape error: '~' must be followed with '0' or '1'")); } } @@ -11490,7 +12200,7 @@ basic_json_parser_66: { if (not value.is_object()) { - throw std::domain_error("only objects can be unflattened"); + JSON_THROW(std::domain_error("only objects can be unflattened")); } basic_json result; @@ -11500,7 +12210,7 @@ basic_json_parser_66: { if (not element.second.is_primitive()) { - throw std::domain_error("values in object must be primitive"); + JSON_THROW(std::domain_error("values in object must be primitive")); } // assign value to reference pointed to by JSON pointer; Note @@ -11515,6 +12225,18 @@ basic_json_parser_66: } private: + friend bool operator==(json_pointer const& lhs, + json_pointer const& rhs) noexcept + { + return lhs.reference_tokens == rhs.reference_tokens; + } + + friend bool operator!=(json_pointer const& lhs, + json_pointer const& rhs) noexcept + { + return !(lhs == rhs); + } + /// the reference tokens std::vector reference_tokens {}; }; @@ -11649,7 +12371,7 @@ basic_json_parser_66: primitive. The original JSON value can be restored using the @ref unflatten() function. - @return an object that maps JSON pointers to primitve values + @return an object that maps JSON pointers to primitive values @note Empty objects and arrays are flattened to `null` and will not be reconstructed correctly by the @ref unflatten() function. @@ -11716,7 +12438,7 @@ basic_json_parser_66: [JSON Patch](http://jsonpatch.com) defines a JSON document structure for expressing a sequence of operations to apply to a JSON) document. With - this funcion, a JSON Patch is applied to the current JSON value by + this function, a JSON Patch is applied to the current JSON value by executing all operations from the patch. @param[in] json_patch JSON patch document @@ -11829,7 +12551,7 @@ basic_json_parser_66: if (static_cast(idx) > parent.size()) { // avoid undefined behavior - throw std::out_of_range("array index " + std::to_string(idx) + " is out of range"); + JSON_THROW(std::out_of_range("array index " + std::to_string(idx) + " is out of range")); } else { @@ -11867,7 +12589,7 @@ basic_json_parser_66: } else { - throw std::out_of_range("key '" + last_path + "' not found"); + JSON_THROW(std::out_of_range("key '" + last_path + "' not found")); } } else if (parent.is_array()) @@ -11881,10 +12603,10 @@ basic_json_parser_66: if (not json_patch.is_array()) { // a JSON patch must be an array of objects - throw std::invalid_argument("JSON patch must be an array of objects"); + JSON_THROW(std::invalid_argument("JSON patch must be an array of objects")); } - // iterate and apply th eoperations + // iterate and apply the operations for (const auto& val : json_patch) { // wrapper to get a value for an operation @@ -11901,13 +12623,13 @@ basic_json_parser_66: // check if desired value is present if (it == val.m_value.object->end()) { - throw std::invalid_argument(error_msg + " must have member '" + member + "'"); + JSON_THROW(std::invalid_argument(error_msg + " must have member '" + member + "'")); } // check if result is of type string if (string_type and not it->second.is_string()) { - throw std::invalid_argument(error_msg + " must have string member '" + member + "'"); + JSON_THROW(std::invalid_argument(error_msg + " must have string member '" + member + "'")); } // no error: return value @@ -11917,7 +12639,7 @@ basic_json_parser_66: // type check if (not val.is_object()) { - throw std::invalid_argument("JSON patch must be an array of objects"); + JSON_THROW(std::invalid_argument("JSON patch must be an array of objects")); } // collect mandatory members @@ -11976,13 +12698,13 @@ basic_json_parser_66: case patch_operations::test: { bool success = false; - try + JSON_TRY { // check if "value" matches the one at "path" // the "path" location must exist - use at() success = (result.at(ptr) == get_value("test", "value", false)); } - catch (std::out_of_range&) + JSON_CATCH (std::out_of_range&) { // ignore out of range errors: success remains false } @@ -11990,7 +12712,7 @@ basic_json_parser_66: // throw an exception if test fails if (not success) { - throw std::domain_error("unsuccessful: " + val.dump()); + JSON_THROW(std::domain_error("unsuccessful: " + val.dump())); } break; @@ -12000,7 +12722,7 @@ basic_json_parser_66: { // op must be "add", "remove", "replace", "move", "copy", or // "test" - throw std::invalid_argument("operation value '" + op + "' is invalid"); + JSON_THROW(std::invalid_argument("operation value '" + op + "' is invalid")); } } } @@ -12023,8 +12745,8 @@ basic_json_parser_66: @note Currently, only `remove`, `add`, and `replace` operations are generated. - @param[in] source JSON value to copare from - @param[in] target JSON value to copare against + @param[in] source JSON value to compare from + @param[in] target JSON value to compare against @param[in] path helper value to create JSON pointers @return a JSON patch to convert the @a source to @a target @@ -12175,7 +12897,6 @@ basic_json_parser_66: /// @} }; - ///////////// // presets // ///////////// @@ -12189,7 +12910,7 @@ uses the standard template types. @since version 1.0.0 */ using json = basic_json<>; -} +} // namespace nlohmann /////////////////////// @@ -12230,7 +12951,7 @@ struct hash return h(j.dump()); } }; -} +} // namespace std /*! @brief user-defined string literal for JSON values @@ -12273,4 +12994,10 @@ inline nlohmann::json::json_pointer operator "" _json_pointer(const char* s, std #pragma GCC diagnostic pop #endif +// clean up +#undef JSON_CATCH +#undef JSON_DEPRECATED +#undef JSON_THROW +#undef JSON_TRY + #endif From b672717096aa5940a99cc5ddf0aa23974de06bbe Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 19 Jun 2017 14:38:39 +0100 Subject: [PATCH 047/377] Test_serialiation update for JSON --- tests/IO/Test_serialisation.cc | 93 ++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 44 deletions(-) diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index d5b52044..3988784c 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -151,6 +151,11 @@ int main(int argc,char **argv) ioTest("iotest.dat", obj, "text (object) "); ioTest("iotest.dat", vec, "text (vector of objects)"); ioTest("iotest.dat", pair, "text (pair of objects)"); + //// text + ioTest("iotest.json", obj, "JSON (object) "); + ioTest("iotest.json", vec, "JSON (vector of objects)"); + ioTest("iotest.json", pair, "JSON (pair of objects)"); + //// HDF5 #undef HAVE_HDF5 #ifdef HAVE_HDF5 @@ -194,50 +199,50 @@ int main(int argc,char **argv) std::cout << std::endl; - std::cout << ".:::::: Testing JSON classes "<< std::endl; - - - { - JSONWriter JW("bother.json"); - - // test basic type writing - push(JW,"BasicTypes"); - write(JW,std::string("i16"),i16); - write(JW,"u16",u16); - write(JW,"i32",i32); - write(JW,"u32",u32); - write(JW,"i64",i64); - write(JW,"u64",u64); - write(JW,"f",f); - write(JW,"d",d); - write(JW,"b",b); - pop(JW); - - // test serializable class writing - myclass obj(1234); // non-trivial constructor - std::cout << "-- serialisable class writing to 'bother.json'..." << std::endl; - write(JW,"obj",obj); - JW.write("obj2", obj); - - std::cout << obj << std::endl; - - std::vector vec; - vec.push_back(myclass(1234)); - vec.push_back(myclass(5678)); - vec.push_back(myclass(3838)); - write(JW, "objvec", vec); - - } - - { - JSONReader RD("bother.json"); - myclass jcopy1; - std::vector jveccopy1; - read(RD,"obj",jcopy1); - read(RD,"objvec", jveccopy1); - std::cout << "Loaded (JSON) -----------------" << std::endl; - std::cout << jcopy1 << std::endl << jveccopy1 << std::endl; - } +// std::cout << ".:::::: Testing JSON classes "<< std::endl; +// +// +// { +// JSONWriter JW("bother.json"); +// +// // test basic type writing +// push(JW,"BasicTypes"); +// write(JW,std::string("i16"),i16); +// write(JW,"u16",u16); +// write(JW,"i32",i32); +// write(JW,"u32",u32); +// write(JW,"i64",i64); +// write(JW,"u64",u64); +// write(JW,"f",f); +// write(JW,"d",d); +// write(JW,"b",b); +// pop(JW); +// +// // test serializable class writing +// myclass obj(1234); // non-trivial constructor +// std::cout << "-- serialisable class writing to 'bother.json'..." << std::endl; +// write(JW,"obj",obj); +// JW.write("obj2", obj); +// +// std::cout << obj << std::endl; +// +// std::vector vec; +// vec.push_back(myclass(1234)); +// vec.push_back(myclass(5678)); +// vec.push_back(myclass(3838)); +// write(JW, "objvec", vec); +// +// } +// +// { +// JSONReader RD("bother.json"); +// myclass jcopy1; +// std::vector jveccopy1; +// read(RD,"obj",jcopy1); +// read(RD,"objvec", jveccopy1); +// std::cout << "Loaded (JSON) -----------------" << std::endl; +// std::cout << jcopy1 << std::endl << jveccopy1 << std::endl; +// } /* // This is still work in progress From b9104f307241558b2d8ec47ee9814a162da5bda8 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 21 Jun 2017 21:08:03 +0100 Subject: [PATCH 048/377] Block CG --- lib/algorithms/Algorithms.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/lib/algorithms/Algorithms.h b/lib/algorithms/Algorithms.h index 5123c7a1..3b62c753 100644 --- a/lib/algorithms/Algorithms.h +++ b/lib/algorithms/Algorithms.h @@ -44,30 +44,15 @@ Author: Peter Boyle #include #include #include - -// Lanczos support -//#include +#include #include #include #include -// Eigen/lanczos // EigCg -// MCR // Pcg -// Multishift CG // Hdcg // GCR // etc.. -// integrator/Leapfrog -// integrator/Omelyan -// integrator/ForceGradient - -// montecarlo/hmc -// montecarlo/rhmc -// montecarlo/metropolis -// etc... - - #endif From 1d7aa673a4cebfde47afb7da8ec9b10a3f681b28 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 21 Jun 2017 21:08:53 +0100 Subject: [PATCH 049/377] Include BlockCG by default --- tests/solver/Test_staggered_block_cg_unprec.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/solver/Test_staggered_block_cg_unprec.cc b/tests/solver/Test_staggered_block_cg_unprec.cc index 8db41e98..5d449f73 100644 --- a/tests/solver/Test_staggered_block_cg_unprec.cc +++ b/tests/solver/Test_staggered_block_cg_unprec.cc @@ -27,7 +27,6 @@ Author: Peter Boyle *************************************************************************************/ /* END LEGAL */ #include -#include using namespace std; using namespace Grid; From 6ebf9f15b7de6b074feb74af0c5037da0fd0730d Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 22 Jun 2017 08:14:34 +0100 Subject: [PATCH 050/377] Splitting communicators first cut --- benchmarks/Benchmark_staggered.cc | 2 +- benchmarks/Benchmark_wilson.cc | 2 +- benchmarks/Benchmark_wilson_sweep.cc | 2 +- lib/cartesian/Cartesian_base.h | 2 + lib/cartesian/Cartesian_full.h | 23 +++++- lib/cartesian/Cartesian_red_black.h | 45 +++++++++-- lib/communicator/Communicator_base.cc | 2 +- lib/communicator/Communicator_base.h | 18 ++++- lib/communicator/Communicator_mpi.cc | 76 ++++++++++++++++--- lib/communicator/Communicator_mpi3.cc | 9 +++ lib/communicator/Communicator_none.cc | 3 + lib/communicator/Communicator_shmem.cc | 5 ++ lib/qcd/utils/SpaceTimeGrid.cc | 45 ++++++----- lib/util/Lexicographic.h | 4 +- tests/Test_stencil.cc | 2 +- tests/core/Test_cshift_red_black.cc | 2 +- tests/core/Test_cshift_red_black_rotate.cc | 2 +- tests/core/Test_fft.cc | 2 +- tests/core/Test_gpwilson_even_odd.cc | 2 +- tests/core/Test_main.cc | 2 +- tests/core/Test_staggered.cc | 2 +- tests/core/Test_wilson_even_odd.cc | 2 +- .../core/Test_wilson_twisted_mass_even_odd.cc | 2 +- tests/forces/Test_gp_rect_force.cc | 2 +- tests/forces/Test_laplacian_force.cc | 2 +- tests/forces/Test_rect_force.cc | 2 +- tests/forces/Test_wilson_force.cc | 2 +- tests/smearing/Test_WilsonFlow.cc | 4 +- tests/solver/Test_laplacian.cc | 2 +- tests/solver/Test_staggered_cg_unprec.cc | 2 +- tests/solver/Test_wilson_cg_prec.cc | 2 +- tests/solver/Test_wilson_cg_schur.cc | 2 +- tests/solver/Test_wilson_cg_unprec.cc | 2 +- tests/solver/Test_wilson_cr_unprec.cc | 2 +- 34 files changed, 215 insertions(+), 65 deletions(-) diff --git a/benchmarks/Benchmark_staggered.cc b/benchmarks/Benchmark_staggered.cc index dc2dcf91..f5325b28 100644 --- a/benchmarks/Benchmark_staggered.cc +++ b/benchmarks/Benchmark_staggered.cc @@ -40,7 +40,7 @@ int main (int argc, char ** argv) std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); std::cout< simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); std::cout< friend class Lattice; GridBase(const std::vector & processor_grid) : CartesianCommunicator(processor_grid) {}; + GridBase(const std::vector & processor_grid, + const CartesianCommunicator &parent) : CartesianCommunicator(processor_grid,parent) {}; // Physics Grid information. std::vector _simd_layout;// Which dimensions get relayed out over simd lanes. diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index b0e47fa4..bced0791 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -61,10 +61,29 @@ public: virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){ return shift; } + ///////////////////////////////////////////////////////////////////////// + // Constructor takes a parent grid and possibly subdivides communicator. + ///////////////////////////////////////////////////////////////////////// GridCartesian(const std::vector &dimensions, const std::vector &simd_layout, - const std::vector &processor_grid - ) : GridBase(processor_grid) + const std::vector &processor_grid, + GridCartesian &parent) : GridBase(processor_grid,parent) + { + Init(dimensions,simd_layout,processor_grid); + } + ///////////////////////////////////////////////////////////////////////// + // Construct from comm world + ///////////////////////////////////////////////////////////////////////// + GridCartesian(const std::vector &dimensions, + const std::vector &simd_layout, + const std::vector &processor_grid) : GridBase(processor_grid) + { + Init(dimensions,simd_layout,processor_grid); + } + + void Init(const std::vector &dimensions, + const std::vector &simd_layout, + const std::vector &processor_grid) { /////////////////////// // Grid information diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index 3037de00..1f831bc4 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -112,24 +112,57 @@ public: } }; - GridRedBlackCartesian(const GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors) {}; + //////////////////////////////////////////////////////////// + // Create Redblack from original grid; require full grid pointer ? + //////////////////////////////////////////////////////////// + GridRedBlackCartesian(const GridBase *base) : GridBase(base->_processors,*base) + { + int dims = base->_ndimension; + std::vector checker_dim_mask(dims,1); + int checker_dim = 0; + Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim); + }; - GridRedBlackCartesian(const std::vector &dimensions, + //////////////////////////////////////////////////////////// + // Create redblack from original grid, with non-trivial checker dim mask + //////////////////////////////////////////////////////////// + GridRedBlackCartesian(const GridBase *base, + const std::vector &checker_dim_mask, + int checker_dim + ) : GridBase(base->_processors,*base) + { + Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim) ; + } +#if 0 + //////////////////////////////////////////////////////////// + // Create redblack grid ;; deprecate these. Should not + // need direct creation of redblack without a full grid to base on + //////////////////////////////////////////////////////////// + GridRedBlackCartesian(const GridBase *base, + const std::vector &dimensions, const std::vector &simd_layout, const std::vector &processor_grid, const std::vector &checker_dim_mask, int checker_dim - ) : GridBase(processor_grid) + ) : GridBase(processor_grid,*base) { Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim); } - GridRedBlackCartesian(const std::vector &dimensions, + + //////////////////////////////////////////////////////////// + // Create redblack grid + //////////////////////////////////////////////////////////// + GridRedBlackCartesian(const GridBase *base, + const std::vector &dimensions, const std::vector &simd_layout, - const std::vector &processor_grid) : GridBase(processor_grid) + const std::vector &processor_grid) : GridBase(processor_grid,*base) { std::vector checker_dim_mask(dimensions.size(),1); - Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0); + int checker_dim = 0; + Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim); } +#endif + void Init(const std::vector &dimensions, const std::vector &simd_layout, const std::vector &processor_grid, diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 557fef48..b042646f 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -60,7 +60,7 @@ void CartesianCommunicator::ShmBufferFreeAll(void) { ///////////////////////////////// // Grid information queries ///////////////////////////////// -int CartesianCommunicator::Dimensions(void) { return _ndimension; }; +int CartesianCommunicator::Dimensions(void) { return _ndimension; }; int CartesianCommunicator::IsBoss(void) { return _processor==0; }; int CartesianCommunicator::BossRank(void) { return 0; }; int CartesianCommunicator::ThisRank(void) { return _processor; }; diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 12a8429f..ecbb2061 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -68,6 +68,7 @@ class CartesianCommunicator { static MPI_Comm communicator_world; MPI_Comm communicator; typedef MPI_Request CommsRequest_t; + #else typedef int CommsRequest_t; #endif @@ -135,11 +136,24 @@ class CartesianCommunicator { // Must call in Grid startup //////////////////////////////////////////////// static void Init(int *argc, char ***argv); - + //////////////////////////////////////////////// - // Constructor of any given grid + // Constructors to sub-divide a parent communicator + // and default to comm world //////////////////////////////////////////////// + CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent); CartesianCommunicator(const std::vector &pdimensions_in); + + private: +#if defined (GRID_COMMS_MPI) + //|| defined (GRID_COMMS_MPI3) + //////////////////////////////////////////////// + // Private initialise from an MPI communicator + // Can use after an MPI_Comm_split, but hidden from user so private + //////////////////////////////////////////////// + void InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base); +#endif + public: //////////////////////////////////////////////////////////////////////////////////////// // Wraps MPI_Cart routines, or implements equivalent on other impls diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index bd2a62fb..b24b60bd 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -53,24 +53,80 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ShmInitGeneric(); } -CartesianCommunicator::CartesianCommunicator(const std::vector &processors) +CartesianCommunicator::CartesianCommunicator(const std::vector &processors) +{ + InitFromMPICommunicator(processors,communicator_world); + std::cout << "Passed communicator world to a new communicator" < &processors,const CartesianCommunicator &parent) { _ndimension = processors.size(); - std::vector periodic(_ndimension,1); + assert(_ndimension = parent._ndimension); + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // split the communicator + ////////////////////////////////////////////////////////////////////////////////////////////////////// + std::vector ratio(_ndimension); + std::vector rcoor(_ndimension); + std::vector scoor(_ndimension); + + int Nsubcomm=1; + int Nsubrank=1; + for(int d=0;d<_ndimension;d++) { + ratio[d] = parent._processors[d] / processors[d]; + rcoor[d] = parent._processor_coor[d] / processors[d]; + scoor[d] = parent._processor_coor[d] % processors[d]; + assert(ratio[d] * processors[d] == parent._processors[d]); // must exactly subdivide + Nsubcomm *= ratio[d]; + Nsubrank *= processors[d]; + } + + int rlex, slex; + Lexicographic::IndexFromCoor(rcoor,rlex,ratio); + Lexicographic::IndexFromCoor(scoor,slex,processors); + + MPI_Comm comm_split; + int ierr= MPI_Comm_split(communicator_world, rlex, slex,&comm_split); + assert(ierr==0); + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // Set up from the new split communicator + ////////////////////////////////////////////////////////////////////////////////////////////////////// + InitFromMPICommunicator(processors,comm_split); + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // Declare victory + ////////////////////////////////////////////////////////////////////////////////////////////////////// + std::cout << "Divided communicator "<< parent._Nprocessors<<" into " + < &processors, MPI_Comm communicator_base) +{ + if ( communicator_base != communicator_world ) { + std::cout << "Cartesian communicator created with a non-world communicator"< periodic(_ndimension,1); + MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],1,&communicator); + MPI_Comm_rank(communicator,&_processor); + MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); + + int Size; MPI_Comm_size(communicator,&Size); assert(Size==_Nprocessors); diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 632eb991..6d7f301c 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -371,6 +371,15 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &c assert(lr!=-1); Lexicographic::CoorFromIndex(coor,lr,_processors); } + +////////////////////////////////// +// Try to subdivide communicator +////////////////////////////////// +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,CartesianCommunicator &parent) + : CartesianCommunicator(processors) +{ + std::cout << "Attempts to split MPI3 communicators will fail until implemented" < &processors) { int ierr; diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index 5319ab93..a7484782 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -38,6 +38,9 @@ void CartesianCommunicator::Init(int *argc, char *** arv) ShmInitGeneric(); } +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,CartesianCommunicator &parent) + : CartesianCommunicator(processors) {} + CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { _processors = processors; diff --git a/lib/communicator/Communicator_shmem.cc b/lib/communicator/Communicator_shmem.cc index 3c76c808..49d32697 100644 --- a/lib/communicator/Communicator_shmem.cc +++ b/lib/communicator/Communicator_shmem.cc @@ -75,6 +75,11 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ShmInitGeneric(); } +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,CartesianCommunicator &parent) + : CartesianCommunicator(processors) +{ + std::cout << "Attempts to split SHMEM communicators will fail " < &processors) { _ndimension = processors.size(); diff --git a/lib/qcd/utils/SpaceTimeGrid.cc b/lib/qcd/utils/SpaceTimeGrid.cc index 3ada4a3b..cbbe0aee 100644 --- a/lib/qcd/utils/SpaceTimeGrid.cc +++ b/lib/qcd/utils/SpaceTimeGrid.cc @@ -68,18 +68,21 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimRedBlackGrid(int Ls,const GridC { int N4=FourDimGrid->_ndimension; int cbd=1; - std::vector latt5(1,Ls); - std::vector simd5(1,1); - std::vector mpi5(1,1); + // std::vector latt5(1,Ls); + // std::vector simd5(1,1); + // std::vector mpi5(1,1); std::vector cb5(1,0); for(int d=0;d_fdimensions[d]); - simd5.push_back(FourDimGrid->_simd_layout[d]); - mpi5.push_back(FourDimGrid->_processors[d]); + // latt5.push_back(FourDimGrid->_fdimensions[d]); + // simd5.push_back(FourDimGrid->_simd_layout[d]); + // mpi5.push_back(FourDimGrid->_processors[d]); cb5.push_back( 1); - } - return new GridRedBlackCartesian(latt5,simd5,mpi5,cb5,cbd); + } + GridCartesian *tmp = makeFiveDimGrid(Ls,FourDimGrid); + GridRedBlackCartesian *ret = new GridRedBlackCartesian(tmp,cb5,cbd); + delete tmp; + return ret; } @@ -99,24 +102,30 @@ GridCartesian *SpaceTimeGrid::makeFiveDimDWFGrid(int Ls,const GridCartes } return new GridCartesian(latt5,simd5,mpi5); } - +/////////////////////////////////////////////////// +// Interface is inefficient and forces the deletion +// Pass in the non-redblack grid +/////////////////////////////////////////////////// GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(int Ls,const GridCartesian *FourDimGrid) { int N4=FourDimGrid->_ndimension; - int nsimd = FourDimGrid->Nsimd(); int cbd=1; - std::vector latt5(1,Ls); - std::vector simd5(1,nsimd); - std::vector mpi5(1,1); std::vector cb5(1,0); + // int nsimd = FourDimGrid->Nsimd(); + // std::vector latt5(1,Ls); + // std::vector simd5(1,nsimd); + // std::vector mpi5(1,1); for(int d=0;d_fdimensions[d]); - simd5.push_back(1); - mpi5.push_back(FourDimGrid->_processors[d]); + // latt5.push_back(FourDimGrid->_fdimensions[d]); + // simd5.push_back(1); + // mpi5.push_back(FourDimGrid->_processors[d]); cb5.push_back(1); - } - return new GridRedBlackCartesian(latt5,simd5,mpi5,cb5,cbd); + } + GridCartesian *tmp = makeFiveDimDWFGrid(Ls,FourDimGrid); + GridRedBlackCartesian *ret = new GridRedBlackCartesian(tmp,cb5,cbd); + delete tmp; + return ret; } diff --git a/lib/util/Lexicographic.h b/lib/util/Lexicographic.h index 2d4e5df5..b922dba5 100644 --- a/lib/util/Lexicographic.h +++ b/lib/util/Lexicographic.h @@ -7,7 +7,7 @@ namespace Grid{ class Lexicographic { public: - static inline void CoorFromIndex (std::vector& coor,int index,std::vector &dims){ + static inline void CoorFromIndex (std::vector& coor,int index,const std::vector &dims){ int nd= dims.size(); coor.resize(nd); for(int d=0;d& coor,int &index,std::vector &dims){ + static inline void IndexFromCoor (const std::vector& coor,int &index,const std::vector &dims){ int nd=dims.size(); int stride=1; index=0; diff --git a/tests/Test_stencil.cc b/tests/Test_stencil.cc index d7bc5a6c..be8ddfd8 100644 --- a/tests/Test_stencil.cc +++ b/tests/Test_stencil.cc @@ -49,7 +49,7 @@ int main (int argc, char ** argv) double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Fine(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian rbFine(&Fine); GridParallelRNG fRNG(&Fine); // fRNG.SeedFixedIntegers(std::vector({45,12,81,9}); diff --git a/tests/core/Test_cshift_red_black.cc b/tests/core/Test_cshift_red_black.cc index f9269709..c7b0c2f1 100644 --- a/tests/core/Test_cshift_red_black.cc +++ b/tests/core/Test_cshift_red_black.cc @@ -47,7 +47,7 @@ int main (int argc, char ** argv) mask[0]=0; GridCartesian Fine (latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout,mask,1); + GridRedBlackCartesian RBFine(&Fine,mask,1); GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); diff --git a/tests/core/Test_cshift_red_black_rotate.cc b/tests/core/Test_cshift_red_black_rotate.cc index 3ef1cd21..aa9e6104 100644 --- a/tests/core/Test_cshift_red_black_rotate.cc +++ b/tests/core/Test_cshift_red_black_rotate.cc @@ -47,7 +47,7 @@ int main (int argc, char ** argv) mask[0]=0; GridCartesian Fine (latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout,mask,1); + GridRedBlackCartesian RBFine(&Fine,mask,1); GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); diff --git a/tests/core/Test_fft.cc b/tests/core/Test_fft.cc index 877683f0..b2336cfa 100644 --- a/tests/core/Test_fft.cc +++ b/tests/core/Test_fft.cc @@ -47,7 +47,7 @@ int main (int argc, char ** argv) vol = vol * latt_size[d]; } GridCartesian GRID(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGRID(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGRID(&GRID); LatticeComplexD one(&GRID); LatticeComplexD zz(&GRID); diff --git a/tests/core/Test_gpwilson_even_odd.cc b/tests/core/Test_gpwilson_even_odd.cc index fc12fe75..2069eb40 100644 --- a/tests/core/Test_gpwilson_even_odd.cc +++ b/tests/core/Test_gpwilson_even_odd.cc @@ -40,7 +40,7 @@ int main (int argc, char ** argv) std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); std::cout< simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); std::cout< simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); std::cout< simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); std::cout< mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); std::cout< mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); std::cout< mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); std::cout< mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); std::cout< simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size, simd_layout, mpi_layout); - GridRedBlackCartesian RBGrid(latt_size, simd_layout, mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); std::vector seeds({1, 2, 3, 4, 5}); GridSerialRNG sRNG; @@ -149,4 +149,4 @@ JSON } -*/ \ No newline at end of file +*/ diff --git a/tests/solver/Test_laplacian.cc b/tests/solver/Test_laplacian.cc index ce4307ab..f163fe54 100644 --- a/tests/solver/Test_laplacian.cc +++ b/tests/solver/Test_laplacian.cc @@ -40,7 +40,7 @@ int main (int argc, char ** argv) std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); std::vector seeds({1,2,3,4,5}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); diff --git a/tests/solver/Test_staggered_cg_unprec.cc b/tests/solver/Test_staggered_cg_unprec.cc index 5e0358d7..eb33c004 100644 --- a/tests/solver/Test_staggered_cg_unprec.cc +++ b/tests/solver/Test_staggered_cg_unprec.cc @@ -57,7 +57,7 @@ int main (int argc, char ** argv) std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); diff --git a/tests/solver/Test_wilson_cg_prec.cc b/tests/solver/Test_wilson_cg_prec.cc index 011bc70b..99ddfceb 100644 --- a/tests/solver/Test_wilson_cg_prec.cc +++ b/tests/solver/Test_wilson_cg_prec.cc @@ -52,7 +52,7 @@ int main (int argc, char ** argv) std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); diff --git a/tests/solver/Test_wilson_cg_schur.cc b/tests/solver/Test_wilson_cg_schur.cc index 7bbf74d3..13ac0090 100644 --- a/tests/solver/Test_wilson_cg_schur.cc +++ b/tests/solver/Test_wilson_cg_schur.cc @@ -52,7 +52,7 @@ int main (int argc, char ** argv) std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); diff --git a/tests/solver/Test_wilson_cg_unprec.cc b/tests/solver/Test_wilson_cg_unprec.cc index 19c5f854..db227ec8 100644 --- a/tests/solver/Test_wilson_cg_unprec.cc +++ b/tests/solver/Test_wilson_cg_unprec.cc @@ -52,7 +52,7 @@ int main (int argc, char ** argv) std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); diff --git a/tests/solver/Test_wilson_cr_unprec.cc b/tests/solver/Test_wilson_cr_unprec.cc index 4182c04e..eccd7e74 100644 --- a/tests/solver/Test_wilson_cr_unprec.cc +++ b/tests/solver/Test_wilson_cr_unprec.cc @@ -52,7 +52,7 @@ int main (int argc, char ** argv) std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); From 5e4bea8f2022fcb720bd887a754ee8943b3f38cd Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 22 Jun 2017 08:38:54 +0100 Subject: [PATCH 051/377] Benchmark DWF works --- lib/communicator/Communicator_mpi.cc | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index b24b60bd..28a270a0 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -86,19 +86,23 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, Lexicographic::IndexFromCoor(scoor,slex,processors); MPI_Comm comm_split; - int ierr= MPI_Comm_split(communicator_world, rlex, slex,&comm_split); - assert(ierr==0); + if ( Nsubcomm > 1 ) { + int ierr= MPI_Comm_split(communicator_world, rlex, slex,&comm_split); + assert(ierr==0); + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // Declare victory + ////////////////////////////////////////////////////////////////////////////////////////////////////// + std::cout << "Divided communicator "<< parent._Nprocessors<<" into " + < Date: Thu, 22 Jun 2017 16:02:15 +0200 Subject: [PATCH 052/377] Moving overly-specialised code out of Grid --- tests/hadrons/Test_hadrons_rarekaon.cc | 321 ------------------------- 1 file changed, 321 deletions(-) delete mode 100644 tests/hadrons/Test_hadrons_rarekaon.cc diff --git a/tests/hadrons/Test_hadrons_rarekaon.cc b/tests/hadrons/Test_hadrons_rarekaon.cc deleted file mode 100644 index a85beead..00000000 --- a/tests/hadrons/Test_hadrons_rarekaon.cc +++ /dev/null @@ -1,321 +0,0 @@ -/******************************************************************************* - Grid physics library, www.github.com/paboyle/Grid - - Source file: tests/hadrons/Test_hadrons_rarekaon.cc - - Copyright (C) 2017 - - Author: Andrew Lawson - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution - directory. - *******************************************************************************/ - -#include "Test_hadrons.hpp" - -using namespace Grid; -using namespace Hadrons; - -enum quarks -{ - light = 0, - strange = 1, - charm = 2 -}; - -int main(int argc, char *argv[]) -{ - // parse command line ////////////////////////////////////////////////////// - std::string configStem; - - if (argc < 2) - { - std::cerr << "usage: " << argv[0] << " [Grid options]"; - std::cerr << std::endl; - std::exit(EXIT_FAILURE); - } - configStem = argv[1]; - - // initialization ////////////////////////////////////////////////////////// - HADRONS_DEFAULT_INIT; - - // run setup /////////////////////////////////////////////////////////////// - Application application; - std::vector mass = {.01, .04, .2}; - std::vector flavour = {"l", "s", "c"}; - std::vector solvers = {"CG_l", "CG_s", "CG_c"}; - std::string kmom = "0. 0. 0. 0."; - std::string pmom = "1. 0. 0. 0."; - std::string qmom = "-1. 0. 0. 0."; - std::string mqmom = "1. 0. 0. 0."; - std::vector tKs = {0}; - unsigned int dt_pi = 16; - std::vector tJs = {8}; - unsigned int n_noise = 1; - unsigned int nt = 32; - bool do_disconnected(false); - Gamma::Algebra gT = Gamma::Algebra::GammaT; - unsigned int Ls = 16; - double M5 = 1.8; - - // Global parameters. - HADRONS_DEFAULT_GLOBALS(application); - - // gauge field - std::string gaugeField = "gauge"; - if (configStem == "None") - { - application.createModule(gaugeField); - } - else - { - MGauge::Load::Par gaugePar; - gaugePar.file = configStem; - application.createModule(gaugeField, gaugePar); - } - - // set fermion boundary conditions to be periodic space, antiperiodic time. - std::string boundary = "1 1 1 -1"; - - for (unsigned int i = 0; i < flavour.size(); ++i) - { - // actions - std::string actionName = "DWF_" + flavour[i]; - makeDWFAction(application, actionName, gaugeField, mass[i], M5, Ls); - - // solvers - makeRBPrecCGSolver(application, solvers[i], actionName); - } - - // Create noise propagators for loops. - std::vector noiseSrcs; - std::vector> noiseRes; - std::vector> noiseProps; - if (n_noise > 0) - { - MSource::Z2::Par noisePar; - noisePar.tA = 0; - noisePar.tB = nt - 1; - std::string loop_stem = "loop_"; - - noiseRes.resize(flavour.size()); - noiseProps.resize(flavour.size()); - for (unsigned int nn = 0; nn < n_noise; ++nn) - { - std::string eta = INIT_INDEX("noise", nn); - application.createModule(eta, noisePar); - noiseSrcs.push_back(eta); - - for (unsigned int f = 0; f < flavour.size(); ++f) - { - std::string loop_prop = INIT_INDEX(loop_stem + flavour[f], nn); - std::string loop_res = loop_prop + "_res"; - makePropagator(application, loop_res, eta, solvers[f]); - makeLoop(application, loop_prop, eta, loop_res); - noiseRes[f].push_back(loop_res); - noiseProps[f].push_back(loop_prop); - } - } - } - - // Translate rare kaon decay across specified timeslices. - for (unsigned int i = 0; i < tKs.size(); ++i) - { - // Zero-momentum wall source propagators for kaon and pion. - unsigned int tK = tKs[i]; - unsigned int tpi = (tK + dt_pi) % nt; - std::string q_Kl_0 = INIT_INDEX("Q_l_0", tK); - std::string q_pil_0 = INIT_INDEX("Q_l_0", tpi); - MAKE_WALL_PROP(tK, q_Kl_0, solvers[light]); - MAKE_WALL_PROP(tpi, q_pil_0, solvers[light]); - - // Wall sources for kaon and pion with momentum insertion. If either - // p or k are zero, or p = k, re-use the existing name to avoid - // duplicating a propagator. - std::string q_Ks_k = INIT_INDEX("Q_Ks_k", tK); - std::string q_Ks_p = INIT_INDEX((kmom == pmom) ? "Q_Ks_k" : "Q_Ks_p", tK); - std::string q_pil_k = INIT_INDEX((kmom == ZERO_MOM) ? "Q_l_0" : "Q_l_k", tpi); - std::string q_pil_p = INIT_INDEX((pmom == kmom) ? q_pil_k : ((pmom == ZERO_MOM) ? "Q_l_0" : "Q_l_p"), tpi); - MAKE_3MOM_WALL_PROP(tK, kmom, q_Ks_k, solvers[strange]); - MAKE_3MOM_WALL_PROP(tK, pmom, q_Ks_p, solvers[strange]); - MAKE_3MOM_WALL_PROP(tpi, kmom, q_pil_k, solvers[light]); - MAKE_3MOM_WALL_PROP(tpi, pmom, q_pil_p, solvers[light]); - - /*********************************************************************** - * CONTRACTIONS: pi and K 2pt contractions with mom = p, k. - **********************************************************************/ - // Wall-Point - std::string PW_K_k = INIT_INDEX("PW_K_k", tK); - std::string PW_K_p = INIT_INDEX("PW_K_p", tK); - std::string PW_pi_k = INIT_INDEX("PW_pi_k", tpi); - std::string PW_pi_p = INIT_INDEX("PW_pi_p", tpi); - mesonContraction(application, 2, q_Kl_0, q_Ks_k, PW_K_k, kmom); - mesonContraction(application, 2, q_Kl_0, q_Ks_p, PW_K_p, pmom); - mesonContraction(application, 2, q_pil_k, q_pil_0, PW_pi_k, kmom); - mesonContraction(application, 2, q_pil_p, q_pil_0, PW_pi_p, pmom); - // Wall-Wall, to be done - requires modification of meson module. - - /*********************************************************************** - * CONTRACTIONS: 3pt Weak Hamiltonian, C & W (non-Eye type) classes. - **********************************************************************/ - std::string HW_CW_k = LABEL_3PT("HW_CW_k", tK, tpi); - std::string HW_CW_p = LABEL_3PT("HW_CW_p", tK, tpi); - weakContractionNonEye(application, 3, q_Kl_0, q_Ks_k, q_pil_k, q_pil_0, HW_CW_k); - weakContractionNonEye(application, 3, q_Kl_0, q_Ks_p, q_pil_p, q_pil_0, HW_CW_p); - - /*********************************************************************** - * CONTRACTIONS: 3pt sd insertion. - **********************************************************************/ - // Note: eventually will use wall sink smeared q_Kl_0 instead. - std::string sd_k = LABEL_3PT("sd_k", tK, tpi); - std::string sd_p = LABEL_3PT("sd_p", tK, tpi); - gamma3ptContraction(application, 3, q_Kl_0, q_Ks_k, q_pil_k, sd_k); - gamma3ptContraction(application, 3, q_Kl_0, q_Ks_p, q_pil_p, sd_p); - - for (unsigned int nn = 0; nn < n_noise; ++nn) - { - /******************************************************************* - * CONTRACTIONS: 3pt Weak Hamiltonian, S and E (Eye type) classes. - ******************************************************************/ - // Note: eventually will use wall sink smeared q_Kl_0 instead. - for (unsigned int f = 0; f < flavour.size(); ++f) - { - if ((f != strange) || do_disconnected) - { - std::string HW_SE_k = LABEL_3PT("HW_SE_k_" + flavour[f], tK, tpi); - std::string HW_SE_p = LABEL_3PT("HW_SE_p_" + flavour[f], tK, tpi); - std::string loop_q = noiseProps[f][nn]; - weakContractionEye(application, 3, q_Kl_0, q_Ks_k, q_pil_k, loop_q, HW_CW_k); - weakContractionEye(application, 3, q_Kl_0, q_Ks_p, q_pil_p, loop_q, HW_CW_p); - } - } - } - - // Perform separate contractions for each t_J position. - for (unsigned int j = 0; j < tJs.size(); ++j) - { - // Sequential sources for current insertions. Local for now, - // gamma_0 only. - unsigned int tJ = (tJs[j] + tK) % nt; - MSource::SeqGamma::Par seqPar; - std::string q_KlCl_q = LABEL_3PT("Q_KlCl_q", tK, tJ); - std::string q_KsCs_mq = LABEL_3PT("Q_KsCs_mq", tK, tJ); - std::string q_pilCl_q = LABEL_3PT("Q_pilCl_q", tpi, tJ); - std::string q_pilCl_mq = LABEL_3PT("Q_pilCl_mq", tpi, tJ); - MAKE_SEQUENTIAL_PROP(tJ, q_Kl_0, qmom, q_KlCl_q, solvers[light], gT); - MAKE_SEQUENTIAL_PROP(tJ, q_Ks_k, mqmom, q_KsCs_mq, solvers[strange], gT); - MAKE_SEQUENTIAL_PROP(tJ, q_pil_p, qmom, q_pilCl_q, solvers[light], gT); - MAKE_SEQUENTIAL_PROP(tJ, q_pil_0, mqmom, q_pilCl_mq, solvers[light], gT); - - /******************************************************************* - * CONTRACTIONS: pi and K 3pt contractions with current insertion. - ******************************************************************/ - // Wall-Point - std::string C_PW_Kl = LABEL_3PT("C_PW_Kl", tK, tJ); - std::string C_PW_Ksb = LABEL_3PT("C_PW_Ksb", tK, tJ); - std::string C_PW_pilb = LABEL_3PT("C_PW_pilb", tK, tJ); - std::string C_PW_pil = LABEL_3PT("C_PW_pil", tK, tJ); - mesonContraction(application, 3, q_KlCl_q, q_Ks_k, C_PW_Kl, pmom); - mesonContraction(application, 3, q_Kl_0, q_KsCs_mq, C_PW_Ksb, pmom); - mesonContraction(application, 3, q_pil_0, q_pilCl_q, C_PW_pilb, kmom); - mesonContraction(application, 3, q_pilCl_mq, q_pil_p, C_PW_pil, kmom); - // Wall-Wall, to be done. - - /******************************************************************* - * CONTRACTIONS: 4pt contractions, C & W classes. - ******************************************************************/ - std::string CW_Kl = LABEL_4PT("CW_Kl", tK, tJ, tpi); - std::string CW_Ksb = LABEL_4PT("CW_Ksb", tK, tJ, tpi); - std::string CW_pilb = LABEL_4PT("CW_pilb", tK, tJ, tpi); - std::string CW_pil = LABEL_4PT("CW_pil", tK, tJ, tpi); - weakContractionNonEye(application, 4, q_KlCl_q, q_Ks_k, q_pil_p, q_pil_0, CW_Kl); - weakContractionNonEye(application, 4, q_Kl_0, q_KsCs_mq, q_pil_p, q_pil_0, CW_Ksb); - weakContractionNonEye(application, 4, q_Kl_0, q_Ks_k, q_pilCl_q, q_pil_0, CW_pilb); - weakContractionNonEye(application, 4, q_Kl_0, q_Ks_k, q_pil_p, q_pilCl_mq, CW_pil); - - /******************************************************************* - * CONTRACTIONS: 4pt contractions, sd insertions. - ******************************************************************/ - // Note: eventually will use wall sink smeared q_Kl_0/q_KlCl_q instead. - std::string sd_Kl = LABEL_4PT("sd_Kl", tK, tJ, tpi); - std::string sd_Ksb = LABEL_4PT("sd_Ksb", tK, tJ, tpi); - std::string sd_pilb = LABEL_4PT("sd_pilb", tK, tJ, tpi); - gamma3ptContraction(application, 4, q_KlCl_q, q_Ks_k, q_pil_p, sd_Kl); - gamma3ptContraction(application, 4, q_Kl_0, q_KsCs_mq, q_pil_p, sd_Ksb); - gamma3ptContraction(application, 4, q_Kl_0, q_Ks_k, q_pilCl_q, sd_pilb); - - // Sequential sources for each noise propagator. - for (unsigned int nn = 0; nn < n_noise; ++nn) - { - std::string loop_stem = "loop_"; - - // Contraction required for each quark flavour - alternatively - // drop the strange loop if not performing disconnected - // contractions or neglecting H_W operators Q_3 -> Q_10. - for (unsigned int f = 0; f < flavour.size(); ++f) - { - if ((f != strange) || do_disconnected) - { - std::string eta = noiseSrcs[nn]; - std::string loop_q = noiseProps[f][nn]; - std::string loop_qCq = LABEL_3PT(loop_stem + flavour[f], tJ, nn); - std::string loop_qCq_res = loop_qCq + "_res"; - MAKE_SEQUENTIAL_PROP(tJ, noiseRes[f][nn], qmom, - loop_qCq_res, solvers[f], gT); - makeLoop(application, loop_qCq, eta, loop_qCq_res); - - /******************************************************* - * CONTRACTIONS: 4pt contractions, S & E classes. - ******************************************************/ - // Note: eventually will use wall sink smeared q_Kl_0/q_KlCl_q instead. - std::string SE_Kl = LABEL_4PT_NOISE("SE_Kl", tK, tJ, tpi, nn); - std::string SE_Ksb = LABEL_4PT_NOISE("SE_Ksb", tK, tJ, tpi, nn); - std::string SE_pilb = LABEL_4PT_NOISE("SE_pilb", tK, tJ, tpi, nn); - std::string SE_loop = LABEL_4PT_NOISE("SE_loop", tK, tJ, tpi, nn); - weakContractionEye(application, 4, q_KlCl_q, q_Ks_k, q_pil_p, loop_q, SE_Kl); - weakContractionEye(application, 4, q_Kl_0, q_KsCs_mq, q_pil_p, loop_q, SE_Ksb); - weakContractionEye(application, 4, q_Kl_0, q_Ks_k, q_pilCl_q, loop_q, SE_pilb); - weakContractionEye(application, 4, q_Kl_0, q_Ks_k, q_pil_p, loop_qCq, SE_loop); - - /******************************************************* - * CONTRACTIONS: 4pt contractions, pi0 disconnected - * loop. - ******************************************************/ - std::string disc0 = LABEL_4PT_NOISE("disc0", tK, tJ, tpi, nn); - disc0Contraction(application, q_Kl_0, q_Ks_k, q_pilCl_q, loop_q, disc0); - - /******************************************************* - * CONTRACTIONS: Disconnected loop. - ******************************************************/ - std::string discLoop = "disc_" + loop_qCq; - discLoopContraction(application, loop_qCq, discLoop); - } - } - } - } - } - // execution - std::string par_file_name = "rarekaon_000_100_tK0_tpi16_tJ8_noloop_mc0.2.xml"; - application.saveParameterFile(par_file_name); - application.run(); - - // epilogue - LOG(Message) << "Grid is finalizing now" << std::endl; - Grid_finalize(); - - return EXIT_SUCCESS; -} From 18211eb5b13fa2738061b4c48a7518c7368c2645 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Thu, 22 Jun 2017 16:03:59 +0200 Subject: [PATCH 053/377] Hadrons: Fixed test to use new implementation of meson module. --- tests/hadrons/Test_hadrons.hpp | 20 ++++++----- tests/hadrons/Test_hadrons_meson_3pt.cc | 47 ++++++++++--------------- 2 files changed, 30 insertions(+), 37 deletions(-) diff --git a/tests/hadrons/Test_hadrons.hpp b/tests/hadrons/Test_hadrons.hpp index a554425d..3492816d 100644 --- a/tests/hadrons/Test_hadrons.hpp +++ b/tests/hadrons/Test_hadrons.hpp @@ -71,6 +71,9 @@ using namespace Hadrons; #define NAME_WALL_SOURCE(t) NAME_3MOM_WALL_SOURCE(t, ZERO_MOM) #define NAME_POINT_SOURCE(pos) ("point_" + pos) +// Meson module "gammas" special values +#define ALL_GAMMAS "all" + #define MAKE_3MOM_WALL_PROP(tW, mom, propName, solver)\ {\ std::string srcName = NAME_3MOM_WALL_SOURCE(tW, mom);\ @@ -364,28 +367,27 @@ inline void makeLoop(Application &application, std::string &propName, * Name: mesonContraction * Purpose: Create meson contraction module and add to application module. * Parameters: application - main application that stores modules. - * npt - specify n-point correlator (for labelling). + * modName - unique module name. + * output - name of output files. * q1 - quark propagator 1. * q2 - quark propagator 2. - * label - unique label to construct module name. - * mom - momentum to project (default is zero) + * sink - sink smearing module. * gammas - gamma insertions at source and sink. * Returns: None. ******************************************************************************/ -inline void mesonContraction(Application &application, unsigned int npt, +inline void mesonContraction(Application &application, + std::string &modName, std::string &output, std::string &q1, std::string &q2, - std::string &label, - std::string mom = ZERO_MOM, + std::string &sink, std::string gammas = "") { - std::string modName = std::to_string(npt) + "pt_" + label; if (!(Environment::getInstance().hasModule(modName))) { MContraction::Meson::Par mesPar; - mesPar.output = std::to_string(npt) + "pt/" + label; + mesPar.output = output; mesPar.q1 = q1; mesPar.q2 = q2; - mesPar.mom = mom; + mesPar.sink = sink; mesPar.gammas = gammas; application.createModule(modName, mesPar); } diff --git a/tests/hadrons/Test_hadrons_meson_3pt.cc b/tests/hadrons/Test_hadrons_meson_3pt.cc index 7e487153..1cbb866d 100644 --- a/tests/hadrons/Test_hadrons_meson_3pt.cc +++ b/tests/hadrons/Test_hadrons_meson_3pt.cc @@ -25,7 +25,7 @@ directory. *******************************************************************************/ -#include +#include "Test_hadrons.hpp" using namespace Grid; using namespace Hadrons; @@ -127,43 +127,34 @@ int main(int argc, char *argv[]) } } + // Point sink. + std::string sink = "sink"; + MSink::Point::Par sinkPar; + sinkPar.mom = ZERO_MOM; + application.createModule(sink, sinkPar); + // contractions MContraction::Meson::Par mesPar; for (unsigned int i = 0; i < flavour.size(); ++i) for (unsigned int j = i; j < flavour.size(); ++j) { - mesPar.output = "mesons/Z2_" + flavour[i] + flavour[j]; - mesPar.q1 = qName[i]; - mesPar.q2 = qName[j]; - mesPar.gammas = "all"; - mesPar.mom = "0. 0. 0. 0."; - application.createModule("meson_Z2_" - + std::to_string(t) - + "_" - + flavour[i] - + flavour[j], - mesPar); + std::string modName = "meson_Z2_" + std::to_string(t) + "_" + \ + flavour[i] + flavour[j]; + std::string output = "mesons/Z2_" + flavour[i] + flavour[j]; + mesonContraction(application, modName, output, qName[i], qName[j], + sink, ALL_GAMMAS); } for (unsigned int i = 0; i < flavour.size(); ++i) for (unsigned int j = 0; j < flavour.size(); ++j) for (unsigned int mu = 0; mu < Nd; ++mu) { - MContraction::Meson::Par mesPar; - - mesPar.output = "3pt/Z2_" + flavour[i] + flavour[j] + "_" - + std::to_string(mu); - mesPar.q1 = qName[i]; - mesPar.q2 = seqName[j][mu]; - mesPar.gammas = "all"; - mesPar.mom = "0. 0. 0. 0."; - application.createModule("3pt_Z2_" - + std::to_string(t) - + "_" - + flavour[i] - + flavour[j] - + "_" - + std::to_string(mu), - mesPar); + std::string modName = "3pt_Z2_" + std::to_string(t) + "_" + \ + flavour[i] + flavour[j] + "_" + \ + std::to_string(mu); + std::string output = "3pt/Z2_" + flavour[i] + \ + flavour[j] + "_" + std::to_string(mu); + mesonContraction(application, modName, output, + qName[i], seqName[j][mu], sink, ALL_GAMMAS); } } From 7a3bd5c66c3e1f125801f17fcb0b3b4f1de5d274 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Thu, 22 Jun 2017 16:06:15 +0200 Subject: [PATCH 054/377] Hadrons: new conserved current contraction test (for regression testing) --- .../Test_hadrons_meson_conserved_3pt.cc | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 tests/hadrons/Test_hadrons_meson_conserved_3pt.cc diff --git a/tests/hadrons/Test_hadrons_meson_conserved_3pt.cc b/tests/hadrons/Test_hadrons_meson_conserved_3pt.cc new file mode 100644 index 00000000..c9aeb2cc --- /dev/null +++ b/tests/hadrons/Test_hadrons_meson_conserved_3pt.cc @@ -0,0 +1,115 @@ +/******************************************************************************* + Grid physics library, www.github.com/paboyle/Grid + + Source file: tests/hadrons/Test_hadrons_meson_conserved_3pt.cc + + Copyright (C) 2017 + + Author: Andrew Lawson + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution + directory. + *******************************************************************************/ + +#include "Test_hadrons.hpp" + +using namespace Grid; +using namespace Hadrons; + +int main(int argc, char *argv[]) +{ + // initialization ////////////////////////////////////////////////////////// + HADRONS_DEFAULT_INIT; + + // run setup /////////////////////////////////////////////////////////////// + Application application; + + // actions parameters + double mass = 0.04; + unsigned int Ls = 16; + double M5 = 1.8; + + // kinematics + unsigned int nt = GridDefaultLatt()[Tp]; + unsigned int tSrc = 0; + unsigned int tJ = nt / 4; + std::string kmom = "0. 0. 0. 0."; + std::string pmom = "1. 0. 0. 0."; + + // Global parameters. + HADRONS_DEFAULT_GLOBALS(application); + + // Unit gauge field. + std::string gaugeField = "Unit gauge"; + application.createModule(gaugeField); + + // DWF action + std::string actionName = "DWF"; + makeDWFAction(application, actionName, gaugeField, mass, M5, Ls); + + // Solver + std::string solver = "CG"; + makeRBPrecCGSolver(application, solver, actionName); + + // main test body ////////////////////////////////////////////////////////// + // Point sink modules. + std::string sink_0 = "sink_0"; + std::string sink_p = "sink_p"; + MSink::Point::Par sinkPar; + sinkPar.mom = kmom; + application.createModule(sink_0, sinkPar); + sinkPar.mom = pmom; + application.createModule(sink_p, sinkPar); + + // 2pt pion contraction, zero momentum. + std::string q_0 = "Q_0"; + MAKE_WALL_PROP(tSrc, q_0, solver); + std::string modName = INIT_INDEX("2pt_pion_WP", tSrc); + std::string output = "2pt/pion_WP_0"; + mesonContraction(application, modName, output, q_0, q_0, sink_0); + + // 2pt pion contraction, with momentum p. + std::string q_p = "Q_p"; + MAKE_3MOM_WALL_PROP(tSrc, pmom, q_p, solver); + modName = INIT_INDEX("2pt_pion_WP_p", tSrc); + output = "2pt/pion_WP_p"; + mesonContraction(application, modName, output, q_0, q_p, sink_p); + + // 3pt pion(0) -> pion(p), with sequentially inserted vector current in + // time direction. + std::string qSeq = q_0 + INIT_INDEX("_seq_Vc3", tJ); + std::string q5d = LABEL_5D(q_0); // Need 5D prop for DWF conserved current. + std::string srcName = qSeq + "_src"; + modName = LABEL_3PT("3pt_pion_Vc3", tSrc, tJ); + output = "3pt/pion_Vc3_p"; + makeConservedSequentialSource(application, srcName, q5d, actionName, + tJ, Current::Vector, Tp, pmom); + makePropagator(application, qSeq, srcName, solver); + mesonContraction(application, modName, output, q_0, qSeq, sink_p); + + std::string par_file_name = "conserved_3pt.xml"; + application.saveParameterFile(par_file_name); + application.run(); + + // epilogue + LOG(Message) << "Grid is finalizing now" << std::endl; + Grid_finalize(); + + return EXIT_SUCCESS; +} + + \ No newline at end of file From dc6b2d30d2dea8ede405aef3f1a753eb7f5127f1 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Thu, 22 Jun 2017 16:09:45 +0200 Subject: [PATCH 055/377] Documentation fix --- extras/Hadrons/Modules/MContraction/Meson.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 7810326a..b71f7c08 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -51,8 +51,7 @@ BEGIN_HADRONS_NAMESPACE in a sequence (e.g. ""). Special values: "all" - perform all possible contractions. - - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."), - given as multiples of (2*pi) / L. + - sink: module to compute the sink to use in contraction (string). */ /****************************************************************************** From 08b0e472aa46eb0cef6ad00eaab46cce35357781 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Thu, 22 Jun 2017 16:34:33 +0200 Subject: [PATCH 056/377] Fixed hadrons tests after merge --- tests/hadrons/Test_hadrons.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/hadrons/Test_hadrons.hpp b/tests/hadrons/Test_hadrons.hpp index 3492816d..6ea51d72 100644 --- a/tests/hadrons/Test_hadrons.hpp +++ b/tests/hadrons/Test_hadrons.hpp @@ -329,10 +329,10 @@ inline void makePropagator(Application &application, std::string &propName, // If the propagator already exists, don't make the module again. if (!(Environment::getInstance().hasModule(propName))) { - Quark::Par quarkPar; + MFermion::GaugeProp::Par quarkPar; quarkPar.source = srcName; quarkPar.solver = solver; - application.createModule(propName, quarkPar); + application.createModule(propName, quarkPar); } } From e504260f3d8381376f9de60028b41c6685e0b93c Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 22 Jun 2017 18:53:11 +0100 Subject: [PATCH 057/377] Able to run a test job splitting into multiple MPI subdomains. --- lib/algorithms/iterative/ConjugateGradient.h | 4 +- lib/cartesian/Cartesian_base.h | 3 - lib/cartesian/Cartesian_full.h | 2 +- lib/communicator/Communicator_mpi.cc | 54 +++++---- lib/parallelIO/IldgIO.h | 115 ++++++++++++++++--- lib/parallelIO/IldgIOtypes.h | 4 + lib/parallelIO/MetaData.h | 1 + lib/qcd/utils/SpaceTimeGrid.cc | 19 +-- 8 files changed, 138 insertions(+), 64 deletions(-) diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index ed453161..5c968e04 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -52,8 +52,8 @@ class ConjugateGradient : public OperatorFunction { MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){}; - void operator()(LinearOperatorBase &Linop, const Field &src, - Field &psi) { + void operator()(LinearOperatorBase &Linop, const Field &src, Field &psi) { + psi.checkerboard = src.checkerboard; conformable(psi, src); diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index 25041d17..a7719ec4 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -211,9 +211,6 @@ public: assert(lidx & gcoor,int & gidx){ gidx=0; int mult=1; diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index bced0791..eb388f84 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -67,7 +67,7 @@ public: GridCartesian(const std::vector &dimensions, const std::vector &simd_layout, const std::vector &processor_grid, - GridCartesian &parent) : GridBase(processor_grid,parent) + const GridCartesian &parent) : GridBase(processor_grid,parent) { Init(dimensions,simd_layout,processor_grid); } diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 28a270a0..7879f518 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -56,46 +56,52 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { InitFromMPICommunicator(processors,communicator_world); - std::cout << "Passed communicator world to a new communicator" < &processors,const CartesianCommunicator &parent) { _ndimension = processors.size(); assert(_ndimension = parent._ndimension); - + ////////////////////////////////////////////////////////////////////////////////////////////////////// // split the communicator ////////////////////////////////////////////////////////////////////////////////////////////////////// - std::vector ratio(_ndimension); - std::vector rcoor(_ndimension); - std::vector scoor(_ndimension); + int Nparent; + MPI_Comm_size(parent.communicator,&Nparent); - int Nsubcomm=1; - int Nsubrank=1; + int childsize=1; for(int d=0;d<_ndimension;d++) { - ratio[d] = parent._processors[d] / processors[d]; - rcoor[d] = parent._processor_coor[d] / processors[d]; - scoor[d] = parent._processor_coor[d] % processors[d]; - assert(ratio[d] * processors[d] == parent._processors[d]); // must exactly subdivide - Nsubcomm *= ratio[d]; - Nsubrank *= processors[d]; + childsize *= processors[d]; } + int Nchild = Nparent/childsize; + assert (childsize * Nchild == Nparent); - int rlex, slex; - Lexicographic::IndexFromCoor(rcoor,rlex,ratio); - Lexicographic::IndexFromCoor(scoor,slex,processors); + int prank; MPI_Comm_rank(parent.communicator,&prank); + int crank = prank % childsize; + int ccomm = prank / childsize; MPI_Comm comm_split; - if ( Nsubcomm > 1 ) { - int ierr= MPI_Comm_split(communicator_world, rlex, slex,&comm_split); + if ( Nchild > 1 ) { + + std::cout << GridLogMessage<<"Child communicator of "<< parent.communicator< &processors, ////////////////////////////////////////////////////////////////////////////////////////////////////// void CartesianCommunicator::InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base) { - if ( communicator_base != communicator_world ) { - std::cout << "Cartesian communicator created with a non-world communicator"<(); - // std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix<_gsites; + + // std::cout << "R sizeof(sobj)= " <_gsites< munge; - BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); + BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); ///////////////////////////////////////////// // Insist checksum is next record ///////////////////////////////////////////// - readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name); + readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM)); ///////////////////////////////////////////// // Verify checksums @@ -242,9 +252,14 @@ class GridLimeReader : public BinaryIO { // should this be a do while; can we miss a first record?? while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { + // std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" < xmlc(nbytes+1,'\0'); limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); XmlReader RD(&xmlc[0],""); @@ -302,14 +317,18 @@ class GridLimeWriter : public BinaryIO { write(WR,object_name,object); xmlstring = WR.XmlString(); } + // std::cout << "WriteLimeObject" << record_name <(record_name.c_str()), nbytes); + assert(h!= NULL); err=limeWriteRecordHeader(h, LimeW); assert(err>=0); err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); err=limeWriterCloseRecord(LimeW); assert(err>=0); limeDestroyHeader(h); + // std::cout << " File offset is now"<_gsites; createLimeRecordHeader(record_name, 0, 0, PayloadSize); + + // std::cout << "W sizeof(sobj)" <_gsites<(); BinarySimpleMunger munge; BinaryIO::writeLatticeObject(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); @@ -354,7 +379,7 @@ class GridLimeWriter : public BinaryIO { checksum.suma= streama.str(); checksum.sumb= streamb.str(); std::cout << GridLogMessage<<" writing scidac checksums "< + template void writeScidacFieldRecord(Lattice &field,userRecord _userRecord) { - typedef typename vobj::scalar_object sobj; - uint64_t nbytes; GridBase * grid = field._grid; //////////////////////////////////////// @@ -397,6 +420,66 @@ class ScidacWriter : public GridLimeWriter { } }; + +class ScidacReader : public GridLimeReader { + public: + + template + void readScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile) + { + scidacFile _scidacFile(grid); + readLimeObject(_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); + readLimeObject(_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); + } + //////////////////////////////////////////////// + // Write generic lattice field in scidac format + //////////////////////////////////////////////// + template + void readScidacFieldRecord(Lattice &field,userRecord &_userRecord) + { + typedef typename vobj::scalar_object sobj; + GridBase * grid = field._grid; + + //////////////////////////////////////// + // fill the Grid header + //////////////////////////////////////// + FieldMetaData header; + scidacRecord _scidacRecord; + scidacFile _scidacFile; + + ////////////////////////////////////////////// + // Fill the Lime file record by record + ////////////////////////////////////////////// + readLimeObject(header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message + readLimeObject(_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); + readLimeObject(_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); + readLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); + } + void skipPastBinaryRecord(void) { + std::string rec_name(ILDG_BINARY_DATA); + while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { + if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) { + skipPastObjectRecord(std::string(SCIDAC_CHECKSUM)); + return; + } + } + } + void skipPastObjectRecord(std::string rec_name) { + while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { + if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) { + return; + } + } + } + void skipScidacFieldRecord() { + skipPastObjectRecord(std::string(GRID_FORMAT)); + skipPastObjectRecord(std::string(SCIDAC_RECORD_XML)); + skipPastObjectRecord(std::string(SCIDAC_PRIVATE_RECORD_XML)); + skipPastBinaryRecord(); + } +}; + + class IldgWriter : public ScidacWriter { public: @@ -425,8 +508,6 @@ class IldgWriter : public ScidacWriter { typedef iLorentzColourMatrix vobj; typedef typename vobj::scalar_object sobj; - uint64_t nbytes; - //////////////////////////////////////// // fill the Grid header //////////////////////////////////////// diff --git a/lib/parallelIO/IldgIOtypes.h b/lib/parallelIO/IldgIOtypes.h index c3a5321c..53664b49 100644 --- a/lib/parallelIO/IldgIOtypes.h +++ b/lib/parallelIO/IldgIOtypes.h @@ -64,6 +64,10 @@ namespace Grid { // file compatability, so should be correct to assume the undocumented but defacto file structure. ///////////////////////////////////////////////////////////////////////////////// +struct emptyUserRecord : Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(emptyUserRecord,int,dummy); +}; + //////////////////////// // Scidac private file xml // 1.1416 16 16 32 0 diff --git a/lib/parallelIO/MetaData.h b/lib/parallelIO/MetaData.h index 6d45d0a5..8fc0d777 100644 --- a/lib/parallelIO/MetaData.h +++ b/lib/parallelIO/MetaData.h @@ -104,6 +104,7 @@ namespace Grid { header.nd = nd; header.dimension.resize(nd); header.boundary.resize(nd); + header.data_start = 0; for(int d=0;d_fdimensions[d]; } diff --git a/lib/qcd/utils/SpaceTimeGrid.cc b/lib/qcd/utils/SpaceTimeGrid.cc index cbbe0aee..b2b5d9c8 100644 --- a/lib/qcd/utils/SpaceTimeGrid.cc +++ b/lib/qcd/utils/SpaceTimeGrid.cc @@ -60,7 +60,7 @@ GridCartesian *SpaceTimeGrid::makeFiveDimGrid(int Ls,const GridCartesian simd5.push_back(FourDimGrid->_simd_layout[d]); mpi5.push_back(FourDimGrid->_processors[d]); } - return new GridCartesian(latt5,simd5,mpi5); + return new GridCartesian(latt5,simd5,mpi5,*FourDimGrid); } @@ -68,15 +68,8 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimRedBlackGrid(int Ls,const GridC { int N4=FourDimGrid->_ndimension; int cbd=1; - // std::vector latt5(1,Ls); - // std::vector simd5(1,1); - // std::vector mpi5(1,1); std::vector cb5(1,0); - for(int d=0;d_fdimensions[d]); - // simd5.push_back(FourDimGrid->_simd_layout[d]); - // mpi5.push_back(FourDimGrid->_processors[d]); cb5.push_back( 1); } GridCartesian *tmp = makeFiveDimGrid(Ls,FourDimGrid); @@ -100,7 +93,7 @@ GridCartesian *SpaceTimeGrid::makeFiveDimDWFGrid(int Ls,const GridCartes simd5.push_back(1); mpi5.push_back(FourDimGrid->_processors[d]); } - return new GridCartesian(latt5,simd5,mpi5); + return new GridCartesian(latt5,simd5,mpi5,*FourDimGrid); } /////////////////////////////////////////////////// // Interface is inefficient and forces the deletion @@ -111,15 +104,7 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(int Ls,const Gr int N4=FourDimGrid->_ndimension; int cbd=1; std::vector cb5(1,0); - // int nsimd = FourDimGrid->Nsimd(); - // std::vector latt5(1,Ls); - // std::vector simd5(1,nsimd); - // std::vector mpi5(1,1); - for(int d=0;d_fdimensions[d]); - // simd5.push_back(1); - // mpi5.push_back(FourDimGrid->_processors[d]); cb5.push_back(1); } GridCartesian *tmp = makeFiveDimDWFGrid(Ls,FourDimGrid); From 600d7ddc2ea54c956a63d9cf0de448909130f51e Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 22 Jun 2017 18:54:34 +0100 Subject: [PATCH 058/377] Proof of concept : Multi RHS solver, running independent solves on different ranks --- tests/solver/Test_dwf_mrhs_cg.cc | 175 +++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 tests/solver/Test_dwf_mrhs_cg.cc diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc new file mode 100644 index 00000000..fa940390 --- /dev/null +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -0,0 +1,175 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_mrhs_cg.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + typedef typename DomainWallFermionR::FermionField FermionField; + typedef typename DomainWallFermionR::ComplexField ComplexField; + typename DomainWallFermionR::ImplParams params; + + const int Ls=8; + + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + std::vector mpi_split (mpi_layout.size(),1); + + std::cout << "UGrid (world root)"<RankCount() ; + + ///////////////////////////////////////////// + // Split into 1^4 mpi communicators + ///////////////////////////////////////////// + std::cout << "SGrid (world root)"< seeds({1,2,3,4}); + + GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds); + GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds); + std::vector src(nrhs,FGrid); + std::vector result(nrhs,FGrid); + + for(int s=0;sThisRank(); + LatticeGaugeField s_Umu(SGrid); + FermionField s_src(SFGrid); + FermionField s_res(SFGrid); + { + ScidacReader _ScidacReader; + _ScidacReader.open(file); + std::cout << GridLogMessage << " Opened file "< HermOp(Ddwf); + ConjugateGradient CG((1.0e-8/(me+1)),10000); + s_res = zero; + CG(HermOp,s_src,s_res); + + /////////////////////////////////////// + // Share the information + /////////////////////////////////////// + std::vector iterations(nrhs,0); + iterations[me] = CG.IterationsToComplete; + + for(int n=0;nGlobalSum(iterations[n]); + } + + ///////////////////////////////////////////////////////////// + // Report how long they all took + ///////////////////////////////////////////////////////////// + for(int r=0;r Date: Thu, 22 Jun 2017 19:32:41 +0100 Subject: [PATCH 059/377] const fixes --- lib/communicator/Communicator_mpi3.cc | 2 +- lib/communicator/Communicator_none.cc | 2 +- lib/communicator/Communicator_shmem.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 6d7f301c..22445f60 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -375,7 +375,7 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &c ////////////////////////////////// // Try to subdivide communicator ////////////////////////////////// -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,CartesianCommunicator &parent) +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent) : CartesianCommunicator(processors) { std::cout << "Attempts to split MPI3 communicators will fail until implemented" < &processors,CartesianCommunicator &parent) +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent) : CartesianCommunicator(processors) {} CartesianCommunicator::CartesianCommunicator(const std::vector &processors) diff --git a/lib/communicator/Communicator_shmem.cc b/lib/communicator/Communicator_shmem.cc index 49d32697..ed49285d 100644 --- a/lib/communicator/Communicator_shmem.cc +++ b/lib/communicator/Communicator_shmem.cc @@ -75,7 +75,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ShmInitGeneric(); } -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,CartesianCommunicator &parent) +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent) : CartesianCommunicator(processors) { std::cout << "Attempts to split SHMEM communicators will fail " < Date: Fri, 23 Jun 2017 09:42:21 +0100 Subject: [PATCH 060/377] Ticking off lots on the TODO list --- TODO | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/TODO b/TODO index 001c6c0c..f0c022ca 100644 --- a/TODO +++ b/TODO @@ -2,18 +2,24 @@ TODO: --------------- Large item work list: -1)- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O +1)- BG/Q port and check 2)- Christoph's local basis expansion Lanczos -3)- BG/Q port and check -4)- Precision conversion and sort out localConvert <-- partial - - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet -5)- Physical propagator interface -6)- Conserved currents -7)- Multigrid Wilson and DWF, compare to other Multigrid implementations -8)- HDCR resume +-- +3a)- RNG I/O in ILDG/SciDAC (minor) +3b)- Precision conversion and sort out localConvert <-- partial/easy +3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet + +4)- Physical propagator interface + +5)- Conserved currents + +6)- Multigrid Wilson and DWF, compare to other Multigrid implementations + +7)- HDCR resume Recent DONE +-- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O ; <-- DONE ; bmark cori -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE -- GaugeFix into central location <-- DONE -- Scidac and Ildg metadata handling <-- DONE From 349d75e48379c353c886966cebcb0eeb944bb624 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 23 Jun 2017 02:57:59 -0700 Subject: [PATCH 061/377] Precision fix --- lib/lattice/Lattice_reduction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lattice/Lattice_reduction.h b/lib/lattice/Lattice_reduction.h index c5b20f3c..fc1ccadb 100644 --- a/lib/lattice/Lattice_reduction.h +++ b/lib/lattice/Lattice_reduction.h @@ -540,7 +540,7 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice for(int i=0;i Date: Fri, 23 Jun 2017 23:10:24 +0100 Subject: [PATCH 062/377] Added an update to TODO list --- TODO | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/TODO b/TODO index f0c022ca..a778f476 100644 --- a/TODO +++ b/TODO @@ -3,19 +3,17 @@ TODO: Large item work list: -1)- BG/Q port and check +1)- BG/Q port and check ; Andrew says ok. 2)- Christoph's local basis expansion Lanczos -- 3a)- RNG I/O in ILDG/SciDAC (minor) 3b)- Precision conversion and sort out localConvert <-- partial/easy 3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet - +-- 4)- Physical propagator interface - 5)- Conserved currents - +-- 6)- Multigrid Wilson and DWF, compare to other Multigrid implementations - 7)- HDCR resume Recent DONE From 57002924bc3232308cd1add6610b632a3b055fe9 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Jul 2017 14:58:30 -0700 Subject: [PATCH 063/377] NERSC shakeout of this --- lib/communicator/Communicator_mpi.cc | 8 ++-- tests/solver/Test_dwf_mrhs_cg.cc | 65 ++++++++++++++-------------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 7879f518..1a7a0c05 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -70,7 +70,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, MPI_Comm_size(parent.communicator,&Nparent); int childsize=1; - for(int d=0;d<_ndimension;d++) { + for(int d=0;d &processors, MPI_Comm comm_split; if ( Nchild > 1 ) { - std::cout << GridLogMessage<<"Child communicator of "<< parent.communicator<ThisRank(); LatticeGaugeField s_Umu(SGrid); FermionField s_src(SFGrid); FermionField s_res(SFGrid); + { + ScidacWriter _ScidacWriter; + _ScidacWriter.open(file); + std::cout << GridLogMessage << " Writing out gauge field "<Barrier(); + } + } + + /////////////////////////////////////////////////////////////// // Set up N-solvers as trivially parallel /////////////////////////////////////////////////////////////// From dee68fc7287a2af2d2acba8f72f96760722d275a Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Jul 2017 23:33:48 +0100 Subject: [PATCH 064/377] IO working multiple nodes again. Strategy of all nodes writing metadata is unsafe. Only one rank should do this. must identify this rank. Means pass communicator to the Objects. --- lib/parallelIO/IldgIO.h | 8 ++++++-- lib/parallelIO/IldgIOtypes.h | 1 + lib/parallelIO/MetaData.h | 3 +++ tests/solver/Test_dwf_mrhs_cg.cc | 21 ++++++++++++++++++--- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index 542f80ef..7031f110 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -258,10 +258,13 @@ class GridLimeReader : public BinaryIO { if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) { - // std::cout << GridLogMessage<< " readLimeObject matches ! " < xmlc(nbytes+1,'\0'); limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); + + std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <ThisRank(); LatticeGaugeField s_Umu(SGrid); FermionField s_src(SFGrid); FermionField s_res(SFGrid); { + FGrid->Barrier(); ScidacWriter _ScidacWriter; _ScidacWriter.open(file); + std::cout << GridLogMessage << "****************************************************************** "<Barrier(); + std::cout << GridLogMessage << "****************************************************************** "<Barrier(); + std::cout << GridLogMessage << "****************************************************************** "<Barrier(); + std::cout << GridLogMessage << "****************************************************************** "<Barrier(); + std::cout << GridLogMessage << "****************************************************************** "< Date: Sun, 2 Jul 2017 16:47:42 -0700 Subject: [PATCH 065/377] Working on Cori --- lib/parallelIO/IldgIO.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index 7031f110..ba71153d 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -178,7 +178,7 @@ class GridLimeReader : public BinaryIO { ///////////////////////////////////////////// // Open the file ///////////////////////////////////////////// - void open(std::string &_filename) + void open(const std::string &_filename) { filename= _filename; File = fopen(filename.c_str(), "r"); @@ -258,12 +258,12 @@ class GridLimeReader : public BinaryIO { if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) { - std::cout << GridLogMessage<< " readLimeObject matches ! " << record_name < xmlc(nbytes+1,'\0'); limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); - std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] < Date: Sun, 2 Jul 2017 16:47:58 -0700 Subject: [PATCH 066/377] Works on Cori --- tests/solver/Test_dwf_mrhs_cg.cc | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc index 72d8d8f4..b4880148 100644 --- a/tests/solver/Test_dwf_mrhs_cg.cc +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -128,29 +128,36 @@ int main (int argc, char ** argv) { for(int n=0;nBarrier(); + std::stringstream filefn; + filefn << filef << "."<< n; + std::cout << GridLogMessage << "****************************************************************** "<Barrier(); + FGrid->Barrier(); - std::cout << GridLogMessage << "****************************************************************** "<Barrier(); } + FGrid->Barrier(); } @@ -185,7 +192,7 @@ int main (int argc, char ** argv) // Report how long they all took ///////////////////////////////////////////////////////////// for(int r=0;r Date: Tue, 11 Jul 2017 14:31:57 +0100 Subject: [PATCH 067/377] Fixed the JSON parsing error --- lib/json/json.hpp | 9862 ++++++++++++++++++-------------- lib/serialisation/BaseIO.h | 2 +- lib/serialisation/JSON_IO.cc | 2 + lib/serialisation/JSON_IO.h | 29 +- tests/IO/Test_serialisation.cc | 131 +- 5 files changed, 5815 insertions(+), 4211 deletions(-) diff --git a/lib/json/json.hpp b/lib/json/json.hpp index 6dfc1831..e53fbcf2 100644 --- a/lib/json/json.hpp +++ b/lib/json/json.hpp @@ -32,17 +32,17 @@ SOFTWARE. #include // all_of, copy, fill, find, for_each, none_of, remove, reverse, transform #include // array #include // assert -#include // isdigit #include // and, not, or +#include // lconv, localeconv #include // isfinite, labs, ldexp, signbit #include // nullptr_t, ptrdiff_t, size_t #include // int64_t, uint64_t #include // abort, strtod, strtof, strtold, strtoul, strtoll, strtoull -#include // strlen +#include // memcpy, strlen #include // forward_list #include // function, hash, less #include // initializer_list -#include // setw +#include // hex #include // istream, ostream #include // advance, begin, back_inserter, bidirectional_iterator_tag, distance, end, inserter, iterator, iterator_traits, next, random_access_iterator_tag, reverse_iterator #include // numeric_limits @@ -51,7 +51,6 @@ SOFTWARE. #include // addressof, allocator, allocator_traits, unique_ptr #include // accumulate #include // stringstream -#include // domain_error, invalid_argument, out_of_range #include // getline, stoi, string, to_string #include // add_pointer, conditional, decay, enable_if, false_type, integral_constant, is_arithmetic, is_base_of, is_const, is_constructible, is_convertible, is_default_constructible, is_enum, is_floating_point, is_integral, is_nothrow_move_assignable, is_nothrow_move_constructible, is_pointer, is_reference, is_same, is_scalar, is_signed, remove_const, remove_cv, remove_pointer, remove_reference, true_type, underlying_type #include // declval, forward, make_pair, move, pair, swap @@ -90,7 +89,7 @@ SOFTWARE. #endif // allow to disable exceptions -#if not defined(JSON_NOEXCEPTION) || defined(__EXCEPTIONS) +#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && not defined(JSON_NOEXCEPTION) #define JSON_THROW(exception) throw exception #define JSON_TRY try #define JSON_CATCH(exception) catch(exception) @@ -100,6 +99,15 @@ SOFTWARE. #define JSON_CATCH(exception) if(false) #endif +// manual branch prediction +#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + #define JSON_LIKELY(x) __builtin_expect(!!(x), 1) + #define JSON_UNLIKELY(x) __builtin_expect(!!(x), 0) +#else + #define JSON_LIKELY(x) x + #define JSON_UNLIKELY(x) x +#endif + /*! @brief namespace for Niels Lohmann @see https://github.com/nlohmann @@ -118,6 +126,259 @@ This namespace collects some functions that could not be defined inside the */ namespace detail { +//////////////// +// exceptions // +//////////////// + +/*! +@brief general exception of the @ref basic_json class + +Extension of std::exception objects with a member @a id for exception ids. + +@note To have nothrow-copy-constructible exceptions, we internally use + std::runtime_error which can cope with arbitrary-length error messages. + Intermediate strings are built with static functions and then passed to + the actual constructor. + +@since version 3.0.0 +*/ +class exception : public std::exception +{ + public: + /// returns the explanatory string + virtual const char* what() const noexcept override + { + return m.what(); + } + + /// the id of the exception + const int id; + + protected: + exception(int id_, const char* what_arg) + : id(id_), m(what_arg) + {} + + static std::string name(const std::string& ename, int id) + { + return "[json.exception." + ename + "." + std::to_string(id) + "] "; + } + + private: + /// an exception object as storage for error messages + std::runtime_error m; +}; + +/*! +@brief exception indicating a parse error + +This excpetion is thrown by the library when a parse error occurs. Parse +errors can occur during the deserialization of JSON text as well as when +using JSON Patch. + +Member @a byte holds the byte index of the last read character in the input +file. + +@note For an input with n bytes, 1 is the index of the first character + and n+1 is the index of the terminating null byte or the end of + file. This also holds true when reading a byte vector (CBOR or + MessagePack). + +Exceptions have ids 1xx. + +name / id | example massage | description +------------------------------ | --------------- | ------------------------- +json.exception.parse_error.101 | parse error at 2: unexpected end of input; expected string literal | This error indicates a syntax error while deserializing a JSON text. The error message describes that an unexpected token (character) was encountered, and the member @a byte indicates the error position. +json.exception.parse_error.102 | parse error at 14: missing or wrong low surrogate | JSON uses the `\uxxxx` format to describe Unicode characters. Code points above above 0xFFFF are split into two `\uxxxx` entries ("surrogate pairs"). This error indicates that the surrogate pair is incomplete or contains an invalid code point. +json.exception.parse_error.103 | parse error: code points above 0x10FFFF are invalid | Unicode supports code points up to 0x10FFFF. Code points above 0x10FFFF are invalid. +json.exception.parse_error.104 | parse error: JSON patch must be an array of objects | [RFC 6902](https://tools.ietf.org/html/rfc6902) requires a JSON Patch document to be a JSON document that represents an array of objects. +json.exception.parse_error.105 | parse error: operation must have string member 'op' | An operation of a JSON Patch document must contain exactly one "op" member, whose value indicates the operation to perform. Its value must be one of "add", "remove", "replace", "move", "copy", or "test"; other values are errors. +json.exception.parse_error.106 | parse error: array index '01' must not begin with '0' | An array index in a JSON Pointer ([RFC 6901](https://tools.ietf.org/html/rfc6901)) may be `0` or any number wihtout a leading `0`. +json.exception.parse_error.107 | parse error: JSON pointer must be empty or begin with '/' - was: 'foo' | A JSON Pointer must be a Unicode string containing a sequence of zero or more reference tokens, each prefixed by a `/` character. +json.exception.parse_error.108 | parse error: escape character '~' must be followed with '0' or '1' | In a JSON Pointer, only `~0` and `~1` are valid escape sequences. +json.exception.parse_error.109 | parse error: array index 'one' is not a number | A JSON Pointer array index must be a number. +json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read. +json.exception.parse_error.111 | parse error: bad input stream | Parsing CBOR or MessagePack from an input stream where the [`badbit` or `failbit`](http://en.cppreference.com/w/cpp/io/ios_base/iostate) is set. +json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xf8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. +json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. + +@since version 3.0.0 +*/ +class parse_error : public exception +{ + public: + /*! + @brief create a parse error exception + @param[in] id the id of the exception + @param[in] byte_ the byte index where the error occured (or 0 if + the position cannot be determined) + @param[in] what_arg the explanatory string + @return parse_error object + */ + static parse_error create(int id, size_t byte_, const std::string& what_arg) + { + std::string w = exception::name("parse_error", id) + "parse error" + + (byte_ != 0 ? (" at " + std::to_string(byte_)) : "") + + ": " + what_arg; + return parse_error(id, byte_, w.c_str()); + } + + /*! + @brief byte index of the parse error + + The byte index of the last read character in the input file. + + @note For an input with n bytes, 1 is the index of the first character + and n+1 is the index of the terminating null byte or the end of + file. This also holds true when reading a byte vector (CBOR or + MessagePack). + */ + const size_t byte; + + private: + parse_error(int id_, size_t byte_, const char* what_arg) + : exception(id_, what_arg), byte(byte_) + {} +}; + +/*! +@brief exception indicating errors with iterators + +Exceptions have ids 2xx. + +name / id | example massage | description +----------------------------------- | --------------- | ------------------------- +json.exception.invalid_iterator.201 | iterators are not compatible | The iterators passed to constructor @ref basic_json(InputIT first, InputIT last) are not compatible, meaning they do not belong to the same container. Therefore, the range (@a first, @a last) is invalid. +json.exception.invalid_iterator.202 | iterator does not fit current value | In an erase or insert function, the passed iterator @a pos does not belong to the JSON value for which the function was called. It hence does not define a valid position for the deletion/insertion. +json.exception.invalid_iterator.203 | iterators do not fit current value | Either iterator passed to function @ref erase(IteratorType first, IteratorType last) does not belong to the JSON value from which values shall be erased. It hence does not define a valid range to delete values from. +json.exception.invalid_iterator.204 | iterators out of range | When an iterator range for a primitive type (number, boolean, or string) is passed to a constructor or an erase function, this range has to be exactly (@ref begin(), @ref end()), because this is the only way the single stored value is expressed. All other ranges are invalid. +json.exception.invalid_iterator.205 | iterator out of range | When an iterator for a primitive type (number, boolean, or string) is passed to an erase function, the iterator has to be the @ref begin() iterator, because it is the only way to address the stored value. All other iterators are invalid. +json.exception.invalid_iterator.206 | cannot construct with iterators from null | The iterators passed to constructor @ref basic_json(InputIT first, InputIT last) belong to a JSON null value and hence to not define a valid range. +json.exception.invalid_iterator.207 | cannot use key() for non-object iterators | The key() member function can only be used on iterators belonging to a JSON object, because other types do not have a concept of a key. +json.exception.invalid_iterator.208 | cannot use operator[] for object iterators | The operator[] to specify a concrete offset cannot be used on iterators belonging to a JSON object, because JSON objects are unordered. +json.exception.invalid_iterator.209 | cannot use offsets with object iterators | The offset operators (+, -, +=, -=) cannot be used on iterators belonging to a JSON object, because JSON objects are unordered. +json.exception.invalid_iterator.210 | iterators do not fit | The iterator range passed to the insert function are not compatible, meaning they do not belong to the same container. Therefore, the range (@a first, @a last) is invalid. +json.exception.invalid_iterator.211 | passed iterators may not belong to container | The iterator range passed to the insert function must not be a subrange of the container to insert to. +json.exception.invalid_iterator.212 | cannot compare iterators of different containers | When two iterators are compared, they must belong to the same container. +json.exception.invalid_iterator.213 | cannot compare order of object iterators | The order of object iterators cannot be compated, because JSON objects are unordered. +json.exception.invalid_iterator.214 | cannot get value | Cannot get value for iterator: Either the iterator belongs to a null value or it is an iterator to a primitive type (number, boolean, or string), but the iterator is different to @ref begin(). + +@since version 3.0.0 +*/ +class invalid_iterator : public exception +{ + public: + static invalid_iterator create(int id, const std::string& what_arg) + { + std::string w = exception::name("invalid_iterator", id) + what_arg; + return invalid_iterator(id, w.c_str()); + } + + private: + invalid_iterator(int id_, const char* what_arg) + : exception(id_, what_arg) + {} +}; + +/*! +@brief exception indicating executing a member function with a wrong type + +Exceptions have ids 3xx. + +name / id | example message | description +----------------------------- | --------------- | ------------------------- +json.exception.type_error.301 | cannot create object from initializer list | To create an object from an initializer list, the initializer list must consist only of a list of pairs whose first element is a string. When this constraint is violated, an array is created instead. +json.exception.type_error.302 | type must be object, but is array | During implicit or explicit value conversion, the JSON type must be compatible to the target type. For instance, a JSON string can only be converted into string types, but not into numbers or boolean types. +json.exception.type_error.303 | incompatible ReferenceType for get_ref, actual type is object | To retrieve a reference to a value stored in a @ref basic_json object with @ref get_ref, the type of the reference must match the value type. For instance, for a JSON array, the @a ReferenceType must be @ref array_t&. +json.exception.type_error.304 | cannot use at() with string | The @ref at() member functions can only be executed for certain JSON types. +json.exception.type_error.305 | cannot use operator[] with string | The @ref operator[] member functions can only be executed for certain JSON types. +json.exception.type_error.306 | cannot use value() with string | The @ref value() member functions can only be executed for certain JSON types. +json.exception.type_error.307 | cannot use erase() with string | The @ref erase() member functions can only be executed for certain JSON types. +json.exception.type_error.308 | cannot use push_back() with string | The @ref push_back() and @ref operator+= member functions can only be executed for certain JSON types. +json.exception.type_error.309 | cannot use insert() with | The @ref insert() member functions can only be executed for certain JSON types. +json.exception.type_error.310 | cannot use swap() with number | The @ref swap() member functions can only be executed for certain JSON types. +json.exception.type_error.311 | cannot use emplace_back() with string | The @ref emplace_back() member function can only be executed for certain JSON types. +json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten function converts an object whose keys are JSON Pointers back into an arbitrary nested JSON value. The JSON Pointers must not overlap, because then the resulting value would not be well defined. +json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers. +json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive. + +@since version 3.0.0 +*/ +class type_error : public exception +{ + public: + static type_error create(int id, const std::string& what_arg) + { + std::string w = exception::name("type_error", id) + what_arg; + return type_error(id, w.c_str()); + } + + private: + type_error(int id_, const char* what_arg) + : exception(id_, what_arg) + {} +}; + +/*! +@brief exception indicating access out of the defined range + +Exceptions have ids 4xx. + +name / id | example message | description +------------------------------- | --------------- | ------------------------- +json.exception.out_of_range.401 | array index 3 is out of range | The provided array index @a i is larger than @a size-1. +json.exception.out_of_range.402 | array index '-' (3) is out of range | The special array index `-` in a JSON Pointer never describes a valid element of the array, but the index past the end. That is, it can only be used to add elements at this position, but not to read it. +json.exception.out_of_range.403 | key 'foo' not found | The provided key was not found in the JSON object. +json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved. +json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value. +json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. + +@since version 3.0.0 +*/ +class out_of_range : public exception +{ + public: + static out_of_range create(int id, const std::string& what_arg) + { + std::string w = exception::name("out_of_range", id) + what_arg; + return out_of_range(id, w.c_str()); + } + + private: + out_of_range(int id_, const char* what_arg) + : exception(id_, what_arg) + {} +}; + +/*! +@brief exception indicating other errors + +Exceptions have ids 5xx. + +name / id | example message | description +------------------------------ | --------------- | ------------------------- +json.exception.other_error.501 | unsuccessful: {"op":"test","path":"/baz", "value":"bar"} | A JSON Patch operation 'test' failed. The unsuccessful operation is also printed. +json.exception.other_error.502 | invalid object size for conversion | Some conversions to user-defined types impose constraints on the object size (e.g. std::pair) + +@since version 3.0.0 +*/ +class other_error : public exception +{ + public: + static other_error create(int id, const std::string& what_arg) + { + std::string w = exception::name("other_error", id) + what_arg; + return other_error(id, w.c_str()); + } + + private: + other_error(int id_, const char* what_arg) + : exception(id_, what_arg) + {} +}; + + + /////////////////////////// // JSON type enumeration // /////////////////////////// @@ -204,11 +465,38 @@ using enable_if_t = typename std::enable_if::type; template using uncvref_t = typename std::remove_cv::type>::type; -// taken from http://stackoverflow.com/a/26936864/266378 -template -using is_unscoped_enum = - std::integral_constant::value and - std::is_enum::value>; +// implementation of C++14 index_sequence and affiliates +// source: https://stackoverflow.com/a/32223343 +template +struct index_sequence +{ + using type = index_sequence; + using value_type = std::size_t; + static constexpr std::size_t size() noexcept + { + return sizeof...(Ints); + } +}; + +template +struct merge_and_renumber; + +template +struct merge_and_renumber, index_sequence> + : index_sequence < I1..., (sizeof...(I1) + I2)... > + { }; + +template +struct make_index_sequence + : merge_and_renumber < typename make_index_sequence < N / 2 >::type, + typename make_index_sequence < N - N / 2 >::type > +{ }; + +template<> struct make_index_sequence<0> : index_sequence<> { }; +template<> struct make_index_sequence<1> : index_sequence<0> { }; + +template +using index_sequence_for = make_index_sequence; /* Implementation of two C++17 constructs: conjunction, negation. This is needed @@ -271,16 +559,8 @@ struct external_constructor template static void construct(BasicJsonType& j, typename BasicJsonType::number_float_t val) noexcept { - // replace infinity and NAN by null - if (not std::isfinite(val)) - { - j = BasicJsonType{}; - } - else - { - j.m_type = value_t::number_float; - j.m_value = val; - } + j.m_type = value_t::number_float; + j.m_value = val; j.assert_invariant(); } }; @@ -332,6 +612,19 @@ struct external_constructor j.m_value.array = j.template create(begin(arr), end(arr)); j.assert_invariant(); } + + template + static void construct(BasicJsonType& j, const std::vector& arr) + { + j.m_type = value_t::array; + j.m_value = value_t::array; + j.m_value.array->reserve(arr.size()); + for (bool x : arr) + { + j.m_value.array->push_back(x); + } + j.assert_invariant(); + } }; template<> @@ -563,11 +856,18 @@ void to_json(BasicJsonType& j, CompatibleNumberIntegerType val) noexcept external_constructor::construct(j, static_cast(val)); } -template::value, int> = 0> -void to_json(BasicJsonType& j, UnscopedEnumType e) noexcept +template::value, int> = 0> +void to_json(BasicJsonType& j, EnumType e) noexcept { - external_constructor::construct(j, e); + using underlying_type = typename std::underlying_type::type; + external_constructor::construct(j, static_cast(e)); +} + +template +void to_json(BasicJsonType& j, const std::vector& e) +{ + external_constructor::construct(j, e); } template < @@ -590,6 +890,32 @@ void to_json(BasicJsonType& j, const CompatibleObjectType& arr) external_constructor::construct(j, arr); } +template ::value, + int> = 0> +void to_json(BasicJsonType& j, T (&arr)[N]) +{ + external_constructor::construct(j, arr); +} + +template +void to_json(BasicJsonType& j, const std::pair& p) +{ + j = {p.first, p.second}; +} + +template +void to_json_tuple_impl(BasicJsonType& j, const Tuple& t, index_sequence) +{ + j = {std::get(t)...}; +} + +template +void to_json(BasicJsonType& j, const std::tuple& t) +{ + to_json_tuple_impl(j, t, index_sequence_for {}); +} /////////////// // from_json // @@ -625,8 +951,7 @@ void get_arithmetic_value(const BasicJsonType& j, ArithmeticType& val) } default: { - JSON_THROW( - std::domain_error("type must be number, but is " + j.type_name())); + JSON_THROW(type_error::create(302, "type must be number, but is " + j.type_name())); } } } @@ -636,7 +961,7 @@ void from_json(const BasicJsonType& j, typename BasicJsonType::boolean_t& b) { if (not j.is_boolean()) { - JSON_THROW(std::domain_error("type must be boolean, but is " + j.type_name())); + JSON_THROW(type_error::create(302, "type must be boolean, but is " + j.type_name())); } b = *j.template get_ptr(); } @@ -646,7 +971,7 @@ void from_json(const BasicJsonType& j, typename BasicJsonType::string_t& s) { if (not j.is_string()) { - JSON_THROW(std::domain_error("type must be string, but is " + j.type_name())); + JSON_THROW(type_error::create(302, "type must be string, but is " + j.type_name())); } s = *j.template get_ptr(); } @@ -669,13 +994,13 @@ void from_json(const BasicJsonType& j, typename BasicJsonType::number_integer_t& get_arithmetic_value(j, val); } -template::value, int> = 0> -void from_json(const BasicJsonType& j, UnscopedEnumType& e) +template::value, int> = 0> +void from_json(const BasicJsonType& j, EnumType& e) { - typename std::underlying_type::type val; + typename std::underlying_type::type val; get_arithmetic_value(j, val); - e = static_cast(val); + e = static_cast(val); } template @@ -683,28 +1008,21 @@ void from_json(const BasicJsonType& j, typename BasicJsonType::array_t& arr) { if (not j.is_array()) { - JSON_THROW(std::domain_error("type must be array, but is " + j.type_name())); + JSON_THROW(type_error::create(302, "type must be array, but is " + j.type_name())); } arr = *j.template get_ptr(); } // forward_list doesn't have an insert method -template +template::value, int> = 0> void from_json(const BasicJsonType& j, std::forward_list& l) { - // do not perform the check when user wants to retrieve jsons - // (except when it's null.. ?) - if (j.is_null()) + if (not j.is_array()) { - JSON_THROW(std::domain_error("type must be array, but is " + j.type_name())); - } - if (not std::is_same::value) - { - if (not j.is_array()) - { - JSON_THROW(std::domain_error("type must be array, but is " + j.type_name())); - } + JSON_THROW(type_error::create(302, "type must be array, but is " + j.type_name())); } + for (auto it = j.rbegin(), end = j.rend(); it != end; ++it) { l.push_front(it->template get()); @@ -736,8 +1054,8 @@ auto from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, prio using std::end; arr.reserve(j.size()); - std::transform( - j.begin(), j.end(), std::inserter(arr, end(arr)), [](const BasicJsonType & i) + std::transform(j.begin(), j.end(), + std::inserter(arr, end(arr)), [](const BasicJsonType & i) { // get() returns *this, this won't call a from_json // method when value_type is BasicJsonType @@ -745,25 +1063,27 @@ auto from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, prio }); } +template +void from_json_array_impl(const BasicJsonType& j, std::array& arr, priority_tag<2>) +{ + for (std::size_t i = 0; i < N; ++i) + { + arr[i] = j.at(i).template get(); + } +} + template::value and + std::is_convertible::value and not std::is_same::value, int> = 0> void from_json(const BasicJsonType& j, CompatibleArrayType& arr) { - if (j.is_null()) + if (not j.is_array()) { - JSON_THROW(std::domain_error("type must be array, but is " + j.type_name())); + JSON_THROW(type_error::create(302, "type must be array, but is " + j.type_name())); } - // when T == BasicJsonType, do not check if value_t is correct - if (not std::is_same::value) - { - if (not j.is_array()) - { - JSON_THROW(std::domain_error("type must be array, but is " + j.type_name())); - } - } - from_json_array_impl(j, arr, priority_tag<1> {}); + from_json_array_impl(j, arr, priority_tag<2> {}); } template(); using std::begin; using std::end; - // we could avoid the assignment, but this might require a for loop, which - // might be less efficient than the container constructor for some - // containers (would it?) - obj = CompatibleObjectType(begin(*inner_object), end(*inner_object)); + using value_type = typename CompatibleObjectType::value_type; + std::transform( + inner_object->begin(), inner_object->end(), + std::inserter(obj, obj.begin()), + [](typename BasicJsonType::object_t::value_type const & p) + { + return value_type( + p.first, + p.second + .template get()); + }); } // overload for arithmetic types, not chosen for basic_json template arguments @@ -822,11 +1149,29 @@ void from_json(const BasicJsonType& j, ArithmeticType& val) } default: { - JSON_THROW(std::domain_error("type must be number, but is " + j.type_name())); + JSON_THROW(type_error::create(302, "type must be number, but is " + j.type_name())); } } } +template +void from_json(const BasicJsonType& j, std::pair& p) +{ + p = {j.at(0), j.at(1)}; +} + +template +void from_json_tuple_impl(const BasicJsonType& j, Tuple& t, index_sequence) +{ + t = std::make_tuple(j.at(Idx)...); +} + +template +void from_json(const BasicJsonType& j, std::tuple& t) +{ + from_json_tuple_impl(j, t, index_sequence_for {}); +} + struct to_json_fn { private: @@ -1054,6 +1399,31 @@ class basic_json template using json_serializer = JSONSerializer; + + //////////////// + // exceptions // + //////////////// + + /// @name exceptions + /// Classes to implement user-defined exceptions. + /// @{ + + /// @copydoc detail::exception + using exception = detail::exception; + /// @copydoc detail::parse_error + using parse_error = detail::parse_error; + /// @copydoc detail::invalid_iterator + using invalid_iterator = detail::invalid_iterator; + /// @copydoc detail::type_error + using type_error = detail::type_error; + /// @copydoc detail::out_of_range + using out_of_range = detail::out_of_range; + /// @copydoc detail::other_error + using other_error = detail::other_error; + + /// @} + + ///////////////////// // container types // ///////////////////// @@ -1136,10 +1506,7 @@ class basic_json result["url"] = "https://github.com/nlohmann/json"; result["version"] = { - {"string", "2.1.1"}, - {"major", 2}, - {"minor", 1}, - {"patch", 1} + {"string", "2.1.1"}, {"major", 2}, {"minor", 1}, {"patch", 1} }; #ifdef _WIN32 @@ -1745,9 +2112,9 @@ class basic_json default: { - if (t == value_t::null) + if (JSON_UNLIKELY(t == value_t::null)) { - JSON_THROW(std::domain_error("961c151d2e87f2686a955a9be24d316f1362bf21 2.1.1")); // LCOV_EXCL_LINE + JSON_THROW(other_error::create(500, "961c151d2e87f2686a955a9be24d316f1362bf21 2.1.1")); // LCOV_EXCL_LINE } break; } @@ -1905,9 +2272,6 @@ class basic_json @complexity Constant. - @throw std::bad_alloc if allocation for object, array, or string value - fails - @liveexample{The following code shows the constructor for different @ref value_t values,basic_json__value_t} @@ -2062,10 +2426,12 @@ class basic_json value_t::array and @ref value_t::object are valid); when @a type_deduction is set to `true`, this parameter has no effect - @throw std::domain_error if @a type_deduction is `false`, @a manual_type - is `value_t::object`, but @a init contains an element which is not a pair - whose first element is a string; example: `"cannot create object from - initializer list"` + @throw type_error.301 if @a type_deduction is `false`, @a manual_type is + `value_t::object`, but @a init contains an element which is not a pair + whose first element is a string. In this case, the constructor could not + create an object. If @a type_deduction would have be `true`, an array + would have been created. See @ref object(std::initializer_list) + for an example. @complexity Linear in the size of the initializer list @a init. @@ -2103,7 +2469,7 @@ class basic_json // if object is wanted but impossible, throw an exception if (manual_type == value_t::object and not is_an_object) { - JSON_THROW(std::domain_error("cannot create object from initializer list")); + JSON_THROW(type_error::create(301, "cannot create object from initializer list")); } } @@ -2179,16 +2545,17 @@ class basic_json related function @ref array(std::initializer_list), there are no cases which can only be expressed by this function. That is, any initializer list @a init can also be passed to the initializer list - constructor @ref basic_json(std::initializer_list, bool, - value_t). + constructor @ref basic_json(std::initializer_list, bool, value_t). @param[in] init initializer list to create an object from (optional) @return JSON object value - @throw std::domain_error if @a init is not a pair whose first elements are - strings; thrown by - @ref basic_json(std::initializer_list, bool, value_t) + @throw type_error.301 if @a init is not a list of pairs whose first + elements are strings. In this case, no object can be created. When such a + value is passed to @ref basic_json(std::initializer_list, bool, value_t), + an array would have been created from the passed initializer list @a init. + See example below. @complexity Linear in the size of @a init. @@ -2240,10 +2607,10 @@ class basic_json The semantics depends on the different types a JSON value can have: - In case of primitive types (number, boolean, or string), @a first must be `begin()` and @a last must be `end()`. In this case, the value is - copied. Otherwise, std::out_of_range is thrown. + copied. Otherwise, invalid_iterator.204 is thrown. - In case of structured types (array, object), the constructor behaves as similar versions for `std::vector`. - - In case of a null type, std::domain_error is thrown. + - In case of a null type, invalid_iterator.206 is thrown. @tparam InputIT an input iterator type (@ref iterator or @ref const_iterator) @@ -2254,14 +2621,19 @@ class basic_json @pre Iterators @a first and @a last must be initialized. **This precondition is enforced with an assertion.** - @throw std::domain_error if iterators are not compatible; that is, do not - belong to the same JSON value; example: `"iterators are not compatible"` - @throw std::out_of_range if iterators are for a primitive type (number, - boolean, or string) where an out of range error can be detected easily; - example: `"iterators out of range"` - @throw std::bad_alloc if allocation for object, array, or string fails - @throw std::domain_error if called with a null value; example: `"cannot - use construct with iterators from null"` + @pre Range `[first, last)` is valid. Usually, this precondition cannot be + checked efficiently. Only certain edge cases are detected; see the + description of the exceptions below. + + @throw invalid_iterator.201 if iterators @a first and @a last are not + compatible (i.e., do not belong to the same JSON value). In this case, + the range `[first, last)` is undefined. + @throw invalid_iterator.204 if iterators @a first and @a last belong to a + primitive type (number, boolean, or string), but @a first does not point + to the first element any more. In this case, the range `[first, last)` is + undefined. See example code below. + @throw invalid_iterator.206 if iterators @a first and @a last belong to a + null value. In this case, the range `[first, last)` is undefined. @complexity Linear in distance between @a first and @a last. @@ -2281,7 +2653,7 @@ class basic_json // make sure iterator fits the current value if (first.m_object != last.m_object) { - JSON_THROW(std::domain_error("iterators are not compatible")); + JSON_THROW(invalid_iterator::create(201, "iterators are not compatible")); } // copy type from first iterator @@ -2298,7 +2670,7 @@ class basic_json { if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end()) { - JSON_THROW(std::out_of_range("iterators out of range")); + JSON_THROW(invalid_iterator::create(204, "iterators out of range")); } break; } @@ -2357,47 +2729,14 @@ class basic_json default: { - JSON_THROW(std::domain_error("cannot use construct with iterators from " + first.m_object->type_name())); + JSON_THROW(invalid_iterator::create(206, "cannot construct with iterators from " + + first.m_object->type_name())); } } assert_invariant(); } - /*! - @brief construct a JSON value given an input stream - - @param[in,out] i stream to read a serialized JSON value from - @param[in] cb a parser callback function of type @ref parser_callback_t - which is used to control the deserialization by filtering unwanted values - (optional) - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. The complexity can be higher if the parser callback function - @a cb has a super-linear complexity. - - @note A UTF-8 byte order mark is silently ignored. - - @deprecated This constructor is deprecated and will be removed in version - 3.0.0 to unify the interface of the library. Deserialization will be - done by stream operators or by calling one of the `parse` functions, - e.g. @ref parse(std::istream&, const parser_callback_t). That is, calls - like `json j(i);` for an input stream @a i need to be replaced by - `json j = json::parse(i);`. See the example below. - - @liveexample{The example below demonstrates constructing a JSON value from - a `std::stringstream` with and without callback - function.,basic_json__istream} - - @since version 2.0.0, deprecated in version 2.0.3, to be removed in - version 3.0.0 - */ - JSON_DEPRECATED - explicit basic_json(std::istream& i, const parser_callback_t cb = nullptr) - { - *this = parser(i, cb).parse(); - assert_invariant(); - } /////////////////////////////////////// // other constructors and destructor // @@ -2418,8 +2757,6 @@ class basic_json - The complexity is linear. - As postcondition, it holds: `other == basic_json(other)`. - @throw std::bad_alloc if allocation for object, array, or string fails. - @liveexample{The following code shows an example for the copy constructor.,basic_json__basic_json} @@ -2632,6 +2969,8 @@ class basic_json members will be pretty-printed with that indent level. An indent level of `0` will only insert newlines. `-1` (the default) selects the most compact representation. + @param[in] indent_char The character to use for indentation of @a indent is + greate than `0`. The default is ` ` (space). @return string containing the serialization of the JSON value @@ -2642,22 +2981,23 @@ class basic_json @see https://docs.python.org/2/library/json.html#json.dump - @since version 1.0.0 + @since version 1.0.0; indentaction character added in version 3.0.0 */ - string_t dump(const int indent = -1) const + string_t dump(const int indent = -1, const char indent_char = ' ') const { - std::stringstream ss; + string_t result; + serializer s(output_adapter::create(result), indent_char); if (indent >= 0) { - dump(ss, true, static_cast(indent)); + s.dump(*this, true, static_cast(indent)); } else { - dump(ss, false, 0); + s.dump(*this, false, 0); } - return ss.str(); + return result; } /*! @@ -3030,7 +3370,7 @@ class basic_json return m_value.boolean; } - JSON_THROW(std::domain_error("type must be boolean, but is " + type_name())); + JSON_THROW(type_error::create(302, "type must be boolean, but is " + type_name())); } /// get a pointer to the value (object) @@ -3125,7 +3465,7 @@ class basic_json @tparam ThisType will be deduced as `basic_json` or `const basic_json` - @throw std::domain_error if ReferenceType does not match underlying value + @throw type_error.303 if ReferenceType does not match underlying value type of the current JSON */ template @@ -3142,8 +3482,7 @@ class basic_json return *ptr; } - JSON_THROW(std::domain_error("incompatible ReferenceType for get_ref, actual type is " + - obj.type_name())); + JSON_THROW(type_error::create(303, "incompatible ReferenceType for get_ref, actual type is " + obj.type_name())); } public: @@ -3194,9 +3533,9 @@ class basic_json This overloads is chosen if: - @a ValueType is not @ref basic_json, - @ref json_serializer has a `from_json()` method of the form - `void from_json(const @ref basic_json&, ValueType&)`, and + `void from_json(const basic_json&, ValueType&)`, and - @ref json_serializer does not have a `from_json()` method of - the form `ValueType from_json(const @ref basic_json&)` + the form `ValueType from_json(const basic_json&)` @tparam ValueTypeCV the provided value type @tparam ValueType the returned value type @@ -3255,7 +3594,7 @@ class basic_json This overloads is chosen if: - @a ValueType is not @ref basic_json and - @ref json_serializer has a `from_json()` method of the form - `ValueType from_json(const @ref basic_json&)` + `ValueType from_json(const basic_json&)` @note If @ref json_serializer has both overloads of `from_json()`, this one is chosen. @@ -3422,10 +3761,10 @@ class basic_json @return reference to the internally stored JSON value if the requested reference type @a ReferenceType fits to the JSON value; throws - std::domain_error otherwise + type_error.303 otherwise - @throw std::domain_error in case passed type @a ReferenceType is - incompatible with the stored JSON value + @throw type_error.303 in case passed type @a ReferenceType is incompatible + with the stored JSON value; see example below @complexity Constant. @@ -3468,8 +3807,9 @@ class basic_json @return copy of the JSON value, converted to type @a ValueType - @throw std::domain_error in case passed type @a ValueType is incompatible - to JSON, thrown by @ref get() const + @throw type_error.302 in case passed type @a ValueType is incompatible + to the JSON value type (e.g., the JSON value is of type boolean, but a + string is requested); see example below @complexity Linear in the size of the JSON value. @@ -3487,6 +3827,9 @@ class basic_json not std::is_same::value #ifndef _MSC_VER // fix for issue #167 operator<< ambiguity under VS2015 and not std::is_same>::value +#endif +#if (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_MSC_VER) && _MSC_VER >1900 && defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 + and not std::is_same::value #endif , int >::type = 0 > operator ValueType() const @@ -3516,17 +3859,21 @@ class basic_json @return reference to the element at index @a idx - @throw std::domain_error if the JSON value is not an array; example: - `"cannot use at() with string"` - @throw std::out_of_range if the index @a idx is out of range of the array; - that is, `idx >= size()`; example: `"array index 7 is out of range"` + @throw type_error.304 if the JSON value is not an array; in this case, + calling `at` with an index makes no sense. See example below. + @throw out_of_range.401 if the index @a idx is out of range of the array; + that is, `idx >= size()`. See example below. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. @complexity Constant. - @liveexample{The example below shows how array elements can be read and - written using `at()`.,at__size_type} - @since version 1.0.0 + + @liveexample{The example below shows how array elements can be read and + written using `at()`. It also demonstrates the different exceptions that + can be thrown.,at__size_type} */ reference at(size_type idx) { @@ -3540,12 +3887,12 @@ class basic_json JSON_CATCH (std::out_of_range&) { // create better exception explanation - JSON_THROW(std::out_of_range("array index " + std::to_string(idx) + " is out of range")); + JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); } } else { - JSON_THROW(std::domain_error("cannot use at() with " + type_name())); + JSON_THROW(type_error::create(304, "cannot use at() with " + type_name())); } } @@ -3559,17 +3906,21 @@ class basic_json @return const reference to the element at index @a idx - @throw std::domain_error if the JSON value is not an array; example: - `"cannot use at() with string"` - @throw std::out_of_range if the index @a idx is out of range of the array; - that is, `idx >= size()`; example: `"array index 7 is out of range"` + @throw type_error.304 if the JSON value is not an array; in this case, + calling `at` with an index makes no sense. See example below. + @throw out_of_range.401 if the index @a idx is out of range of the array; + that is, `idx >= size()`. See example below. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. @complexity Constant. - @liveexample{The example below shows how array elements can be read using - `at()`.,at__size_type_const} - @since version 1.0.0 + + @liveexample{The example below shows how array elements can be read using + `at()`. It also demonstrates the different exceptions that can be thrown., + at__size_type_const} */ const_reference at(size_type idx) const { @@ -3583,12 +3934,12 @@ class basic_json JSON_CATCH (std::out_of_range&) { // create better exception explanation - JSON_THROW(std::out_of_range("array index " + std::to_string(idx) + " is out of range")); + JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); } } else { - JSON_THROW(std::domain_error("cannot use at() with " + type_name())); + JSON_THROW(type_error::create(304, "cannot use at() with " + type_name())); } } @@ -3602,21 +3953,25 @@ class basic_json @return reference to the element at key @a key - @throw std::domain_error if the JSON value is not an object; example: - `"cannot use at() with boolean"` - @throw std::out_of_range if the key @a key is is not stored in the object; - that is, `find(key) == end()`; example: `"key "the fast" not found"` + @throw type_error.304 if the JSON value is not an object; in this case, + calling `at` with a key makes no sense. See example below. + @throw out_of_range.403 if the key @a key is is not stored in the object; + that is, `find(key) == end()`. See example below. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. @complexity Logarithmic in the size of the container. - @liveexample{The example below shows how object elements can be read and - written using `at()`.,at__object_t_key_type} - @sa @ref operator[](const typename object_t::key_type&) for unchecked access by reference @sa @ref value() for access by value with a default value @since version 1.0.0 + + @liveexample{The example below shows how object elements can be read and + written using `at()`. It also demonstrates the different exceptions that + can be thrown.,at__object_t_key_type} */ reference at(const typename object_t::key_type& key) { @@ -3630,12 +3985,12 @@ class basic_json JSON_CATCH (std::out_of_range&) { // create better exception explanation - JSON_THROW(std::out_of_range("key '" + key + "' not found")); + JSON_THROW(out_of_range::create(403, "key '" + key + "' not found")); } } else { - JSON_THROW(std::domain_error("cannot use at() with " + type_name())); + JSON_THROW(type_error::create(304, "cannot use at() with " + type_name())); } } @@ -3649,21 +4004,25 @@ class basic_json @return const reference to the element at key @a key - @throw std::domain_error if the JSON value is not an object; example: - `"cannot use at() with boolean"` - @throw std::out_of_range if the key @a key is is not stored in the object; - that is, `find(key) == end()`; example: `"key "the fast" not found"` + @throw type_error.304 if the JSON value is not an object; in this case, + calling `at` with a key makes no sense. See example below. + @throw out_of_range.403 if the key @a key is is not stored in the object; + that is, `find(key) == end()`. See example below. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. @complexity Logarithmic in the size of the container. - @liveexample{The example below shows how object elements can be read using - `at()`.,at__object_t_key_type_const} - @sa @ref operator[](const typename object_t::key_type&) for unchecked access by reference @sa @ref value() for access by value with a default value @since version 1.0.0 + + @liveexample{The example below shows how object elements can be read using + `at()`. It also demonstrates the different exceptions that can be thrown., + at__object_t_key_type_const} */ const_reference at(const typename object_t::key_type& key) const { @@ -3677,12 +4036,12 @@ class basic_json JSON_CATCH (std::out_of_range&) { // create better exception explanation - JSON_THROW(std::out_of_range("key '" + key + "' not found")); + JSON_THROW(out_of_range::create(403, "key '" + key + "' not found")); } } else { - JSON_THROW(std::domain_error("cannot use at() with " + type_name())); + JSON_THROW(type_error::create(304, "cannot use at() with " + type_name())); } } @@ -3699,8 +4058,8 @@ class basic_json @return reference to the element at index @a idx - @throw std::domain_error if JSON is not an array or null; example: - `"cannot use operator[] with string"` + @throw type_error.305 if the JSON value is not an array or null; in that + cases, using the [] operator with an index makes no sense. @complexity Constant if @a idx is in the range of the array. Otherwise linear in `idx - size()`. @@ -3735,7 +4094,7 @@ class basic_json return m_value.array->operator[](idx); } - JSON_THROW(std::domain_error("cannot use operator[] with " + type_name())); + JSON_THROW(type_error::create(305, "cannot use operator[] with " + type_name())); } /*! @@ -3747,8 +4106,8 @@ class basic_json @return const reference to the element at index @a idx - @throw std::domain_error if JSON is not an array; example: `"cannot use - operator[] with null"` + @throw type_error.305 if the JSON value is not an array; in that cases, + using the [] operator with an index makes no sense. @complexity Constant. @@ -3765,7 +4124,7 @@ class basic_json return m_value.array->operator[](idx); } - JSON_THROW(std::domain_error("cannot use operator[] with " + type_name())); + JSON_THROW(type_error::create(305, "cannot use operator[] with " + type_name())); } /*! @@ -3781,8 +4140,8 @@ class basic_json @return reference to the element at key @a key - @throw std::domain_error if JSON is not an object or null; example: - `"cannot use operator[] with string"` + @throw type_error.305 if the JSON value is not an object or null; in that + cases, using the [] operator with a key makes no sense. @complexity Logarithmic in the size of the container. @@ -3811,7 +4170,7 @@ class basic_json return m_value.object->operator[](key); } - JSON_THROW(std::domain_error("cannot use operator[] with " + type_name())); + JSON_THROW(type_error::create(305, "cannot use operator[] with " + type_name())); } /*! @@ -3830,8 +4189,8 @@ class basic_json @pre The element with key @a key must exist. **This precondition is enforced with an assertion.** - @throw std::domain_error if JSON is not an object; example: `"cannot use - operator[] with null"` + @throw type_error.305 if the JSON value is not an object; in that cases, + using the [] operator with a key makes no sense. @complexity Logarithmic in the size of the container. @@ -3853,7 +4212,7 @@ class basic_json return m_value.object->find(key)->second; } - JSON_THROW(std::domain_error("cannot use operator[] with " + type_name())); + JSON_THROW(type_error::create(305, "cannot use operator[] with " + type_name())); } /*! @@ -3869,8 +4228,8 @@ class basic_json @return reference to the element at key @a key - @throw std::domain_error if JSON is not an object or null; example: - `"cannot use operator[] with string"` + @throw type_error.305 if the JSON value is not an object or null; in that + cases, using the [] operator with a key makes no sense. @complexity Logarithmic in the size of the container. @@ -3904,8 +4263,8 @@ class basic_json @return const reference to the element at key @a key - @throw std::domain_error if JSON is not an object; example: `"cannot use - operator[] with null"` + @throw type_error.305 if the JSON value is not an object; in that cases, + using the [] operator with a key makes no sense. @complexity Logarithmic in the size of the container. @@ -3937,8 +4296,8 @@ class basic_json @return reference to the element at key @a key - @throw std::domain_error if JSON is not an object or null; example: - `"cannot use operator[] with string"` + @throw type_error.305 if the JSON value is not an object or null; in that + cases, using the [] operator with a key makes no sense. @complexity Logarithmic in the size of the container. @@ -3968,7 +4327,7 @@ class basic_json return m_value.object->operator[](key); } - JSON_THROW(std::domain_error("cannot use operator[] with " + type_name())); + JSON_THROW(type_error::create(305, "cannot use operator[] with " + type_name())); } /*! @@ -3987,8 +4346,8 @@ class basic_json @pre The element with key @a key must exist. **This precondition is enforced with an assertion.** - @throw std::domain_error if JSON is not an object; example: `"cannot use - operator[] with null"` + @throw type_error.305 if the JSON value is not an object; in that cases, + using the [] operator with a key makes no sense. @complexity Logarithmic in the size of the container. @@ -4011,7 +4370,7 @@ class basic_json return m_value.object->find(key)->second; } - JSON_THROW(std::domain_error("cannot use operator[] with " + type_name())); + JSON_THROW(type_error::create(305, "cannot use operator[] with " + type_name())); } /*! @@ -4024,7 +4383,7 @@ class basic_json @code {.cpp} try { return at(key); - } catch(std::out_of_range) { + } catch(out_of_range) { return default_value; } @endcode @@ -4047,8 +4406,8 @@ class basic_json @return copy of the element at key @a key or @a default_value if @a key is not found - @throw std::domain_error if JSON is not an object; example: `"cannot use - value() with null"` + @throw type_error.306 if the JSON value is not an objec; in that cases, + using `value()` with a key makes no sense. @complexity Logarithmic in the size of the container. @@ -4080,7 +4439,7 @@ class basic_json } else { - JSON_THROW(std::domain_error("cannot use value() with " + type_name())); + JSON_THROW(type_error::create(306, "cannot use value() with " + type_name())); } } @@ -4103,7 +4462,7 @@ class basic_json @code {.cpp} try { return at(ptr); - } catch(std::out_of_range) { + } catch(out_of_range) { return default_value; } @endcode @@ -4122,8 +4481,8 @@ class basic_json @return copy of the element at key @a key or @a default_value if @a key is not found - @throw std::domain_error if JSON is not an object; example: `"cannot use - value() with null"` + @throw type_error.306 if the JSON value is not an objec; in that cases, + using `value()` with a key makes no sense. @complexity Logarithmic in the size of the container. @@ -4146,13 +4505,13 @@ class basic_json { return ptr.get_checked(this); } - JSON_CATCH (std::out_of_range&) + JSON_CATCH (out_of_range&) { return default_value; } } - JSON_THROW(std::domain_error("cannot use value() with " + type_name())); + JSON_THROW(type_error::create(306, "cannot use value() with " + type_name())); } /*! @@ -4181,7 +4540,7 @@ class basic_json assertions**). @post The JSON value remains unchanged. - @throw std::out_of_range when called on `null` value + @throw invalid_iterator.214 when called on `null` value @liveexample{The following code shows an example for `front()`.,front} @@ -4224,7 +4583,8 @@ class basic_json assertions**). @post The JSON value remains unchanged. - @throw std::out_of_range when called on `null` value. + @throw invalid_iterator.214 when called on a `null` value. See example + below. @liveexample{The following code shows an example for `back()`.,back} @@ -4268,11 +4628,12 @@ class basic_json @post Invalidates iterators and references at or after the point of the erase, including the `end()` iterator. - @throw std::domain_error if called on a `null` value; example: `"cannot - use erase() with null"` - @throw std::domain_error if called on an iterator which does not belong to - the current JSON value; example: `"iterator does not fit current value"` - @throw std::out_of_range if called on a primitive type with invalid + @throw type_error.307 if called on a `null` value; example: `"cannot use + erase() with null"` + @throw invalid_iterator.202 if called on an iterator which does not belong + to the current JSON value; example: `"iterator does not fit current + value"` + @throw invalid_iterator.205 if called on a primitive type with invalid iterator (i.e., any iterator which is not `begin()`); example: `"iterator out of range"` @@ -4303,7 +4664,7 @@ class basic_json // make sure iterator fits the current value if (this != pos.m_object) { - JSON_THROW(std::domain_error("iterator does not fit current value")); + JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); } IteratorType result = end(); @@ -4318,7 +4679,7 @@ class basic_json { if (not pos.m_it.primitive_iterator.is_begin()) { - JSON_THROW(std::out_of_range("iterator out of range")); + JSON_THROW(invalid_iterator::create(205, "iterator out of range")); } if (is_string()) @@ -4348,7 +4709,7 @@ class basic_json default: { - JSON_THROW(std::domain_error("cannot use erase() with " + type_name())); + JSON_THROW(type_error::create(307, "cannot use erase() with " + type_name())); } } @@ -4375,11 +4736,11 @@ class basic_json @post Invalidates iterators and references at or after the point of the erase, including the `end()` iterator. - @throw std::domain_error if called on a `null` value; example: `"cannot - use erase() with null"` - @throw std::domain_error if called on iterators which does not belong to - the current JSON value; example: `"iterators do not fit current value"` - @throw std::out_of_range if called on a primitive type with invalid + @throw type_error.307 if called on a `null` value; example: `"cannot use + erase() with null"` + @throw invalid_iterator.203 if called on iterators which does not belong + to the current JSON value; example: `"iterators do not fit current value"` + @throw invalid_iterator.204 if called on a primitive type with invalid iterators (i.e., if `first != begin()` and `last != end()`); example: `"iterators out of range"` @@ -4410,7 +4771,7 @@ class basic_json // make sure iterator fits the current value if (this != first.m_object or this != last.m_object) { - JSON_THROW(std::domain_error("iterators do not fit current value")); + JSON_THROW(invalid_iterator::create(203, "iterators do not fit current value")); } IteratorType result = end(); @@ -4425,7 +4786,7 @@ class basic_json { if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end()) { - JSON_THROW(std::out_of_range("iterators out of range")); + JSON_THROW(invalid_iterator::create(204, "iterators out of range")); } if (is_string()) @@ -4457,7 +4818,7 @@ class basic_json default: { - JSON_THROW(std::domain_error("cannot use erase() with " + type_name())); + JSON_THROW(type_error::create(307, "cannot use erase() with " + type_name())); } } @@ -4478,7 +4839,7 @@ class basic_json @post References and iterators to the erased elements are invalidated. Other references and iterators are not affected. - @throw std::domain_error when called on a type other than JSON object; + @throw type_error.307 when called on a type other than JSON object; example: `"cannot use erase() with null"` @complexity `log(size()) + count(key)` @@ -4501,7 +4862,7 @@ class basic_json return m_value.object->erase(key); } - JSON_THROW(std::domain_error("cannot use erase() with " + type_name())); + JSON_THROW(type_error::create(307, "cannot use erase() with " + type_name())); } /*! @@ -4511,9 +4872,9 @@ class basic_json @param[in] idx index of the element to remove - @throw std::domain_error when called on a type other than JSON array; + @throw type_error.307 when called on a type other than JSON object; example: `"cannot use erase() with null"` - @throw std::out_of_range when `idx >= size()`; example: `"array index 17 + @throw out_of_range.401 when `idx >= size()`; example: `"array index 17 is out of range"` @complexity Linear in distance between @a idx and the end of the container. @@ -4535,14 +4896,14 @@ class basic_json { if (idx >= size()) { - JSON_THROW(std::out_of_range("array index " + std::to_string(idx) + " is out of range")); + JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); } m_value.array->erase(m_value.array->begin() + static_cast(idx)); } else { - JSON_THROW(std::domain_error("cannot use erase() with " + type_name())); + JSON_THROW(type_error::create(307, "cannot use erase() with " + type_name())); } } @@ -4929,6 +5290,8 @@ class basic_json reference to the JSON values is returned, so there is no access to the underlying iterator. + @liveexample{The following code shows how the wrapper is used,iterator_wrapper} + @note The name of this function is not yet final and may change in the future. */ @@ -5244,7 +5607,7 @@ class basic_json @param[in] val the value to add to the JSON array - @throw std::domain_error when called on a type other than JSON array or + @throw type_error.308 when called on a type other than JSON array or null; example: `"cannot use push_back() with number"` @complexity Amortized constant. @@ -5260,7 +5623,7 @@ class basic_json // push_back only works for null objects or arrays if (not(is_null() or is_array())) { - JSON_THROW(std::domain_error("cannot use push_back() with " + type_name())); + JSON_THROW(type_error::create(308, "cannot use push_back() with " + type_name())); } // transform null object into an array @@ -5296,7 +5659,7 @@ class basic_json // push_back only works for null objects or arrays if (not(is_null() or is_array())) { - JSON_THROW(std::domain_error("cannot use push_back() with " + type_name())); + JSON_THROW(type_error::create(308, "cannot use push_back() with " + type_name())); } // transform null object into an array @@ -5330,7 +5693,7 @@ class basic_json @param[in] val the value to add to the JSON object - @throw std::domain_error when called on a type other than JSON object or + @throw type_error.308 when called on a type other than JSON object or null; example: `"cannot use push_back() with number"` @complexity Logarithmic in the size of the container, O(log(`size()`)). @@ -5346,7 +5709,7 @@ class basic_json // push_back only works for null objects or objects if (not(is_null() or is_object())) { - JSON_THROW(std::domain_error("cannot use push_back() with " + type_name())); + JSON_THROW(type_error::create(308, "cannot use push_back() with " + type_name())); } // transform null object into an object @@ -5384,7 +5747,7 @@ class basic_json @ref push_back(const typename object_t::value_type&). Otherwise, @a init is converted to a JSON value and added using @ref push_back(basic_json&&). - @param init an initializer list + @param[in] init an initializer list @complexity Linear in the size of the initializer list @a init. @@ -5429,7 +5792,7 @@ class basic_json @param[in] args arguments to forward to a constructor of @ref basic_json @tparam Args compatible types to create a @ref basic_json object - @throw std::domain_error when called on a type other than JSON array or + @throw type_error.311 when called on a type other than JSON array or null; example: `"cannot use emplace_back() with number"` @complexity Amortized constant. @@ -5446,7 +5809,7 @@ class basic_json // emplace_back only works for null objects or arrays if (not(is_null() or is_array())) { - JSON_THROW(std::domain_error("cannot use emplace_back() with " + type_name())); + JSON_THROW(type_error::create(311, "cannot use emplace_back() with " + type_name())); } // transform null object into an array @@ -5476,7 +5839,7 @@ class basic_json already-existing element if no insertion happened, and a bool denoting whether the insertion took place. - @throw std::domain_error when called on a type other than JSON object or + @throw type_error.311 when called on a type other than JSON object or null; example: `"cannot use emplace() with number"` @complexity Logarithmic in the size of the container, O(log(`size()`)). @@ -5494,7 +5857,7 @@ class basic_json // emplace only works for null objects or arrays if (not(is_null() or is_object())) { - JSON_THROW(std::domain_error("cannot use emplace() with " + type_name())); + JSON_THROW(type_error::create(311, "cannot use emplace() with " + type_name())); } // transform null object into an object @@ -5525,10 +5888,10 @@ class basic_json @param[in] val element to insert @return iterator pointing to the inserted @a val. - @throw std::domain_error if called on JSON values other than arrays; + @throw type_error.309 if called on JSON values other than arrays; example: `"cannot use insert() with string"` - @throw std::domain_error if @a pos is not an iterator of *this; example: - `"iterator does not fit current value"` + @throw invalid_iterator.202 if @a pos is not an iterator of *this; + example: `"iterator does not fit current value"` @complexity Constant plus linear in the distance between @a pos and end of the container. @@ -5545,7 +5908,7 @@ class basic_json // check if iterator pos fits to this JSON value if (pos.m_object != this) { - JSON_THROW(std::domain_error("iterator does not fit current value")); + JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); } // insert to array and return iterator @@ -5554,7 +5917,7 @@ class basic_json return result; } - JSON_THROW(std::domain_error("cannot use insert() with " + type_name())); + JSON_THROW(type_error::create(309, "cannot use insert() with " + type_name())); } /*! @@ -5578,10 +5941,10 @@ class basic_json @return iterator pointing to the first element inserted, or @a pos if `cnt==0` - @throw std::domain_error if called on JSON values other than arrays; - example: `"cannot use insert() with string"` - @throw std::domain_error if @a pos is not an iterator of *this; example: - `"iterator does not fit current value"` + @throw type_error.309 if called on JSON values other than arrays; example: + `"cannot use insert() with string"` + @throw invalid_iterator.202 if @a pos is not an iterator of *this; + example: `"iterator does not fit current value"` @complexity Linear in @a cnt plus linear in the distance between @a pos and end of the container. @@ -5598,7 +5961,7 @@ class basic_json // check if iterator pos fits to this JSON value if (pos.m_object != this) { - JSON_THROW(std::domain_error("iterator does not fit current value")); + JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); } // insert to array and return iterator @@ -5607,7 +5970,7 @@ class basic_json return result; } - JSON_THROW(std::domain_error("cannot use insert() with " + type_name())); + JSON_THROW(type_error::create(309, "cannot use insert() with " + type_name())); } /*! @@ -5620,13 +5983,13 @@ class basic_json @param[in] first begin of the range of elements to insert @param[in] last end of the range of elements to insert - @throw std::domain_error if called on JSON values other than arrays; - example: `"cannot use insert() with string"` - @throw std::domain_error if @a pos is not an iterator of *this; example: - `"iterator does not fit current value"` - @throw std::domain_error if @a first and @a last do not belong to the same - JSON value; example: `"iterators do not fit"` - @throw std::domain_error if @a first or @a last are iterators into + @throw type_error.309 if called on JSON values other than arrays; example: + `"cannot use insert() with string"` + @throw invalid_iterator.202 if @a pos is not an iterator of *this; + example: `"iterator does not fit current value"` + @throw invalid_iterator.210 if @a first and @a last do not belong to the + same JSON value; example: `"iterators do not fit"` + @throw invalid_iterator.211 if @a first or @a last are iterators into container for which insert is called; example: `"passed iterators may not belong to container"` @@ -5645,24 +6008,24 @@ class basic_json // insert only works for arrays if (not is_array()) { - JSON_THROW(std::domain_error("cannot use insert() with " + type_name())); + JSON_THROW(type_error::create(309, "cannot use insert() with " + type_name())); } // check if iterator pos fits to this JSON value if (pos.m_object != this) { - JSON_THROW(std::domain_error("iterator does not fit current value")); + JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); } // check if range iterators belong to the same JSON object if (first.m_object != last.m_object) { - JSON_THROW(std::domain_error("iterators do not fit")); + JSON_THROW(invalid_iterator::create(210, "iterators do not fit")); } if (first.m_object == this or last.m_object == this) { - JSON_THROW(std::domain_error("passed iterators may not belong to container")); + JSON_THROW(invalid_iterator::create(211, "passed iterators may not belong to container")); } // insert to array and return iterator @@ -5683,10 +6046,10 @@ class basic_json the end() iterator @param[in] ilist initializer list to insert the values from - @throw std::domain_error if called on JSON values other than arrays; - example: `"cannot use insert() with string"` - @throw std::domain_error if @a pos is not an iterator of *this; example: - `"iterator does not fit current value"` + @throw type_error.309 if called on JSON values other than arrays; example: + `"cannot use insert() with string"` + @throw invalid_iterator.202 if @a pos is not an iterator of *this; + example: `"iterator does not fit current value"` @return iterator pointing to the first element inserted, or @a pos if `ilist` is empty @@ -5703,13 +6066,13 @@ class basic_json // insert only works for arrays if (not is_array()) { - JSON_THROW(std::domain_error("cannot use insert() with " + type_name())); + JSON_THROW(type_error::create(309, "cannot use insert() with " + type_name())); } // check if iterator pos fits to this JSON value if (pos.m_object != this) { - JSON_THROW(std::domain_error("iterator does not fit current value")); + JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); } // insert to array and return iterator @@ -5718,6 +6081,52 @@ class basic_json return result; } + /*! + @brief inserts elements + + Inserts elements from range `[first, last)`. + + @param[in] first begin of the range of elements to insert + @param[in] last end of the range of elements to insert + + @throw type_error.309 if called on JSON values other than objects; example: + `"cannot use insert() with string"` + @throw invalid_iterator.202 if iterator @a first or @a last does does not + point to an object; example: `"iterators first and last must point to + objects"` + @throw invalid_iterator.210 if @a first and @a last do not belong to the + same JSON value; example: `"iterators do not fit"` + + @complexity Logarithmic: `O(N*log(size() + N))`, where `N` is the number + of elements to insert. + + @liveexample{The example shows how `insert()` is used.,insert__range_object} + + @since version 3.0.0 + */ + void insert(const_iterator first, const_iterator last) + { + // insert only works for objects + if (not is_object()) + { + JSON_THROW(type_error::create(309, "cannot use insert() with " + type_name())); + } + + // check if range iterators belong to the same JSON object + if (first.m_object != last.m_object) + { + JSON_THROW(invalid_iterator::create(210, "iterators do not fit")); + } + + // passed iterators must belong to objects + if (not first.m_object->is_object() or not first.m_object->is_object()) + { + JSON_THROW(invalid_iterator::create(202, "iterators first and last must point to objects")); + } + + m_value.object->insert(first.m_it.object_iterator, last.m_it.object_iterator); + } + /*! @brief exchanges the values @@ -5757,8 +6166,8 @@ class basic_json @param[in,out] other array to exchange the contents with - @throw std::domain_error when JSON value is not an array; example: - `"cannot use swap() with string"` + @throw type_error.310 when JSON value is not an array; example: `"cannot + use swap() with string"` @complexity Constant. @@ -5776,7 +6185,7 @@ class basic_json } else { - JSON_THROW(std::domain_error("cannot use swap() with " + type_name())); + JSON_THROW(type_error::create(310, "cannot use swap() with " + type_name())); } } @@ -5790,7 +6199,7 @@ class basic_json @param[in,out] other object to exchange the contents with - @throw std::domain_error when JSON value is not an object; example: + @throw type_error.310 when JSON value is not an object; example: `"cannot use swap() with string"` @complexity Constant. @@ -5809,7 +6218,7 @@ class basic_json } else { - JSON_THROW(std::domain_error("cannot use swap() with " + type_name())); + JSON_THROW(type_error::create(310, "cannot use swap() with " + type_name())); } } @@ -5823,7 +6232,7 @@ class basic_json @param[in,out] other string to exchange the contents with - @throw std::domain_error when JSON value is not a string; example: `"cannot + @throw type_error.310 when JSON value is not a string; example: `"cannot use swap() with boolean"` @complexity Constant. @@ -5842,7 +6251,7 @@ class basic_json } else { - JSON_THROW(std::domain_error("cannot use swap() with " + type_name())); + JSON_THROW(type_error::create(310, "cannot use swap() with " + type_name())); } } @@ -5861,13 +6270,17 @@ class basic_json Compares two JSON values for equality according to the following rules: - Two JSON values are equal if (1) they are from the same type and (2) - their stored values are the same. + their stored values are the same according to their respective + `operator==`. - Integer and floating-point numbers are automatically converted before comparison. Floating-point numbers are compared indirectly: two floating-point numbers `f1` and `f2` are considered equal if neither - `f1 > f2` nor `f2 > f1` holds. + `f1 > f2` nor `f2 > f1` holds. Note than two NaN values are always + treated as unequal. - Two JSON null values are equal. + @note NaN values never compare equal to themselves or to other NaN values. + @param[in] lhs first JSON value to consider @param[in] rhs second JSON value to consider @return whether the values @a lhs and @a rhs are equal @@ -6054,7 +6467,7 @@ class basic_json { case value_t::array: { - return *lhs.m_value.array < *rhs.m_value.array; + return (*lhs.m_value.array) < (*rhs.m_value.array); } case value_t::object: { @@ -6121,6 +6534,28 @@ class basic_json return operator<(lhs_type, rhs_type); } + /*! + @brief comparison: less than + @copydoc operator<(const_reference, const_reference) + */ + template::value, int>::type = 0> + friend bool operator<(const_reference lhs, const ScalarType rhs) noexcept + { + return (lhs < basic_json(rhs)); + } + + /*! + @brief comparison: less than + @copydoc operator<(const_reference, const_reference) + */ + template::value, int>::type = 0> + friend bool operator<(const ScalarType lhs, const_reference rhs) noexcept + { + return (basic_json(lhs) < rhs); + } + /*! @brief comparison: less than or equal @@ -6143,6 +6578,28 @@ class basic_json return not (rhs < lhs); } + /*! + @brief comparison: less than or equal + @copydoc operator<=(const_reference, const_reference) + */ + template::value, int>::type = 0> + friend bool operator<=(const_reference lhs, const ScalarType rhs) noexcept + { + return (lhs <= basic_json(rhs)); + } + + /*! + @brief comparison: less than or equal + @copydoc operator<=(const_reference, const_reference) + */ + template::value, int>::type = 0> + friend bool operator<=(const ScalarType lhs, const_reference rhs) noexcept + { + return (basic_json(lhs) <= rhs); + } + /*! @brief comparison: greater than @@ -6165,6 +6622,28 @@ class basic_json return not (lhs <= rhs); } + /*! + @brief comparison: greater than + @copydoc operator>(const_reference, const_reference) + */ + template::value, int>::type = 0> + friend bool operator>(const_reference lhs, const ScalarType rhs) noexcept + { + return (lhs > basic_json(rhs)); + } + + /*! + @brief comparison: greater than + @copydoc operator>(const_reference, const_reference) + */ + template::value, int>::type = 0> + friend bool operator>(const ScalarType lhs, const_reference rhs) noexcept + { + return (basic_json(lhs) > rhs); + } + /*! @brief comparison: greater than or equal @@ -6187,8 +6666,133 @@ class basic_json return not (lhs < rhs); } + /*! + @brief comparison: greater than or equal + @copydoc operator>=(const_reference, const_reference) + */ + template::value, int>::type = 0> + friend bool operator>=(const_reference lhs, const ScalarType rhs) noexcept + { + return (lhs >= basic_json(rhs)); + } + + /*! + @brief comparison: greater than or equal + @copydoc operator>=(const_reference, const_reference) + */ + template::value, int>::type = 0> + friend bool operator>=(const ScalarType lhs, const_reference rhs) noexcept + { + return (basic_json(lhs) >= rhs); + } + /// @} + private: + ///////////////////// + // output adapters // + ///////////////////// + + /// abstract output adapter interface + template + class output_adapter + { + public: + virtual void write_character(CharType c) = 0; + virtual void write_characters(const CharType* s, size_t length) = 0; + virtual ~output_adapter() {} + + static std::shared_ptr> create(std::vector& vec) + { + return std::shared_ptr(new output_vector_adapter(vec)); + } + + static std::shared_ptr> create(std::ostream& s) + { + return std::shared_ptr(new output_stream_adapter(s)); + } + + static std::shared_ptr> create(std::string& s) + { + return std::shared_ptr(new output_string_adapter(s)); + } + }; + + /// a type to simplify interfaces + template + using output_adapter_t = std::shared_ptr>; + + /// output adapter for byte vectors + template + class output_vector_adapter : public output_adapter + { + public: + output_vector_adapter(std::vector& vec) + : v(vec) + {} + + void write_character(CharType c) override + { + v.push_back(c); + } + + void write_characters(const CharType* s, size_t length) override + { + std::copy(s, s + length, std::back_inserter(v)); + } + + private: + std::vector& v; + }; + + /// putput adatpter for output streams + template + class output_stream_adapter : public output_adapter + { + public: + output_stream_adapter(std::basic_ostream& s) + : stream(s) + {} + + void write_character(CharType c) override + { + stream.put(c); + } + + void write_characters(const CharType* s, size_t length) override + { + stream.write(s, static_cast(length)); + } + + private: + std::basic_ostream& stream; + }; + + /// output adapter for basic_string + template + class output_string_adapter : public output_adapter + { + public: + output_string_adapter(std::string& s) + : str(s) + {} + + void write_character(CharType c) override + { + str.push_back(c); + } + + void write_characters(const CharType* s, size_t length) override + { + str.append(s, length); + } + + private: + std::basic_string& str; + }; + /////////////////// // serialization // @@ -6197,15 +6801,613 @@ class basic_json /// @name serialization /// @{ + private: + /*! + @brief wrapper around the serialization functions + */ + class serializer + { + public: + /*! + @param[in] s output stream to serialize to + @param[in] ichar indentation character to use + */ + serializer(output_adapter_t s, const char ichar) + : o(s), loc(std::localeconv()), + thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]), + decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0]), + indent_char(ichar), indent_string(512, indent_char) + {} + + // delete because of pointer members + serializer(const serializer&) = delete; + serializer& operator=(const serializer&) = delete; + + /*! + @brief internal implementation of the serialization function + + This function is called by the public member function dump and + organizes the serialization internally. The indentation level is + propagated as additional parameter. In case of arrays and objects, the + function is called recursively. + + - strings and object keys are escaped using `escape_string()` + - integer numbers are converted implicitly via `operator<<` + - floating-point numbers are converted to a string using `"%g"` format + + @param[in] val value to serialize + @param[in] pretty_print whether the output shall be pretty-printed + @param[in] indent_step the indent level + @param[in] current_indent the current indent level (only used internally) + */ + void dump(const basic_json& val, + const bool pretty_print, + const unsigned int indent_step, + const unsigned int current_indent = 0) + { + switch (val.m_type) + { + case value_t::object: + { + if (val.m_value.object->empty()) + { + o->write_characters("{}", 2); + return; + } + + if (pretty_print) + { + o->write_characters("{\n", 2); + + // variable to hold indentation for recursive calls + const auto new_indent = current_indent + indent_step; + if (indent_string.size() < new_indent) + { + indent_string.resize(new_indent, ' '); + } + + // first n-1 elements + auto i = val.m_value.object->cbegin(); + for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) + { + o->write_characters(indent_string.c_str(), new_indent); + o->write_character('\"'); + dump_escaped(i->first); + o->write_characters("\": ", 3); + dump(i->second, true, indent_step, new_indent); + o->write_characters(",\n", 2); + } + + // last element + assert(i != val.m_value.object->cend()); + o->write_characters(indent_string.c_str(), new_indent); + o->write_character('\"'); + dump_escaped(i->first); + o->write_characters("\": ", 3); + dump(i->second, true, indent_step, new_indent); + + o->write_character('\n'); + o->write_characters(indent_string.c_str(), current_indent); + o->write_character('}'); + } + else + { + o->write_character('{'); + + // first n-1 elements + auto i = val.m_value.object->cbegin(); + for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) + { + o->write_character('\"'); + dump_escaped(i->first); + o->write_characters("\":", 2); + dump(i->second, false, indent_step, current_indent); + o->write_character(','); + } + + // last element + assert(i != val.m_value.object->cend()); + o->write_character('\"'); + dump_escaped(i->first); + o->write_characters("\":", 2); + dump(i->second, false, indent_step, current_indent); + + o->write_character('}'); + } + + return; + } + + case value_t::array: + { + if (val.m_value.array->empty()) + { + o->write_characters("[]", 2); + return; + } + + if (pretty_print) + { + o->write_characters("[\n", 2); + + // variable to hold indentation for recursive calls + const auto new_indent = current_indent + indent_step; + if (indent_string.size() < new_indent) + { + indent_string.resize(new_indent, ' '); + } + + // first n-1 elements + for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) + { + o->write_characters(indent_string.c_str(), new_indent); + dump(*i, true, indent_step, new_indent); + o->write_characters(",\n", 2); + } + + // last element + assert(not val.m_value.array->empty()); + o->write_characters(indent_string.c_str(), new_indent); + dump(val.m_value.array->back(), true, indent_step, new_indent); + + o->write_character('\n'); + o->write_characters(indent_string.c_str(), current_indent); + o->write_character(']'); + } + else + { + o->write_character('['); + + // first n-1 elements + for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) + { + dump(*i, false, indent_step, current_indent); + o->write_character(','); + } + + // last element + assert(not val.m_value.array->empty()); + dump(val.m_value.array->back(), false, indent_step, current_indent); + + o->write_character(']'); + } + + return; + } + + case value_t::string: + { + o->write_character('\"'); + dump_escaped(*val.m_value.string); + o->write_character('\"'); + return; + } + + case value_t::boolean: + { + if (val.m_value.boolean) + { + o->write_characters("true", 4); + } + else + { + o->write_characters("false", 5); + } + return; + } + + case value_t::number_integer: + { + dump_integer(val.m_value.number_integer); + return; + } + + case value_t::number_unsigned: + { + dump_integer(val.m_value.number_unsigned); + return; + } + + case value_t::number_float: + { + dump_float(val.m_value.number_float); + return; + } + + case value_t::discarded: + { + o->write_characters("", 11); + return; + } + + case value_t::null: + { + o->write_characters("null", 4); + return; + } + } + } + + private: + /*! + @brief calculates the extra space to escape a JSON string + + @param[in] s the string to escape + @return the number of characters required to escape string @a s + + @complexity Linear in the length of string @a s. + */ + static std::size_t extra_space(const string_t& s) noexcept + { + return std::accumulate(s.begin(), s.end(), size_t{}, + [](size_t res, typename string_t::value_type c) + { + switch (c) + { + case '"': + case '\\': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + { + // from c (1 byte) to \x (2 bytes) + return res + 1; + } + + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + // from c (1 byte) to \uxxxx (6 bytes) + return res + 5; + } + + default: + { + return res; + } + } + }); + } + + /*! + @brief dump escaped string + + Escape a string by replacing certain special characters by a sequence + of an escape character (backslash) and another character and other + control characters by a sequence of "\u" followed by a four-digit hex + representation. The escaped string is written to output stream @a o. + + @param[in] s the string to escape + + @complexity Linear in the length of string @a s. + */ + void dump_escaped(const string_t& s) const + { + const auto space = extra_space(s); + if (space == 0) + { + o->write_characters(s.c_str(), s.size()); + return; + } + + // create a result string of necessary size + string_t result(s.size() + space, '\\'); + std::size_t pos = 0; + + for (const auto& c : s) + { + switch (c) + { + // quotation mark (0x22) + case '"': + { + result[pos + 1] = '"'; + pos += 2; + break; + } + + // reverse solidus (0x5c) + case '\\': + { + // nothing to change + pos += 2; + break; + } + + // backspace (0x08) + case '\b': + { + result[pos + 1] = 'b'; + pos += 2; + break; + } + + // formfeed (0x0c) + case '\f': + { + result[pos + 1] = 'f'; + pos += 2; + break; + } + + // newline (0x0a) + case '\n': + { + result[pos + 1] = 'n'; + pos += 2; + break; + } + + // carriage return (0x0d) + case '\r': + { + result[pos + 1] = 'r'; + pos += 2; + break; + } + + // horizontal tab (0x09) + case '\t': + { + result[pos + 1] = 't'; + pos += 2; + break; + } + + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + // convert a number 0..15 to its hex representation + // (0..f) + static const char hexify[16] = + { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + }; + + // print character c as \uxxxx + for (const char m : + { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] + }) + { + result[++pos] = m; + } + + ++pos; + break; + } + + default: + { + // all other characters are added as-is + result[pos++] = c; + break; + } + } + } + + assert(pos == s.size() + space); + o->write_characters(result.c_str(), result.size()); + } + + /*! + @brief dump an integer + + Dump a given integer to output stream @a o. Works internally with + @a number_buffer. + + @param[in] x integer number (signed or unsigned) to dump + @tparam NumberType either @a number_integer_t or @a number_unsigned_t + */ + template::value or + std::is_same::value, int> = 0> + void dump_integer(NumberType x) + { + // special case for "0" + if (x == 0) + { + o->write_character('0'); + return; + } + + const bool is_negative = x < 0; + size_t i = 0; + + // spare 1 byte for '\0' + while (x != 0 and i < number_buffer.size() - 1) + { + const auto digit = std::labs(static_cast(x % 10)); + number_buffer[i++] = static_cast('0' + digit); + x /= 10; + } + + // make sure the number has been processed completely + assert(x == 0); + + if (is_negative) + { + // make sure there is capacity for the '-' + assert(i < number_buffer.size() - 2); + number_buffer[i++] = '-'; + } + + std::reverse(number_buffer.begin(), number_buffer.begin() + i); + o->write_characters(number_buffer.data(), i); + } + + /*! + @brief dump a floating-point number + + Dump a given floating-point number to output stream @a o. Works + internally with @a number_buffer. + + @param[in] x floating-point number to dump + */ + void dump_float(number_float_t x) + { + // NaN / inf + if (not std::isfinite(x) or std::isnan(x)) + { + o->write_characters("null", 4); + return; + } + + // special case for 0.0 and -0.0 + if (x == 0) + { + if (std::signbit(x)) + { + o->write_characters("-0.0", 4); + } + else + { + o->write_characters("0.0", 3); + } + return; + } + + // get number of digits for a text -> float -> text round-trip + static constexpr auto d = std::numeric_limits::digits10; + + // the actual conversion + std::ptrdiff_t len = snprintf(number_buffer.data(), number_buffer.size(), + "%.*g", d, x); + + // negative value indicates an error + assert(len > 0); + // check if buffer was large enough + assert(static_cast(len) < number_buffer.size()); + + // erase thousands separator + if (thousands_sep != '\0') + { + const auto end = std::remove(number_buffer.begin(), + number_buffer.begin() + len, + thousands_sep); + std::fill(end, number_buffer.end(), '\0'); + assert((end - number_buffer.begin()) <= len); + len = (end - number_buffer.begin()); + } + + // convert decimal point to '.' + if (decimal_point != '\0' and decimal_point != '.') + { + for (auto& c : number_buffer) + { + if (c == decimal_point) + { + c = '.'; + break; + } + } + } + + o->write_characters(number_buffer.data(), static_cast(len)); + + // determine if need to append ".0" + const bool value_is_int_like = std::none_of(number_buffer.begin(), + number_buffer.begin() + len + 1, + [](char c) + { + return c == '.' or c == 'e'; + }); + + if (value_is_int_like) + { + o->write_characters(".0", 2); + } + } + + private: + /// the output of the serializer + output_adapter_t o = nullptr; + + /// a (hopefully) large enough character buffer + std::array number_buffer{{}}; + + /// the locale + const std::lconv* loc = nullptr; + /// the locale's thousand separator character + const char thousands_sep = '\0'; + /// the locale's decimal point character + const char decimal_point = '\0'; + + /// the indentation character + const char indent_char; + + /// the indentation string + string_t indent_string; + }; + + public: /*! @brief serialize to stream Serialize the given JSON value @a j to the output stream @a o. The JSON - value will be serialized using the @ref dump member function. The - indentation of the output can be controlled with the member variable - `width` of the output stream @a o. For instance, using the manipulator - `std::setw(4)` on @a o sets the indentation level to `4` and the - serialization result is the same as calling `dump(4)`. + value will be serialized using the @ref dump member function. + + - The indentation of the output can be controlled with the member variable + `width` of the output stream @a o. For instance, using the manipulator + `std::setw(4)` on @a o sets the indentation level to `4` and the + serialization result is the same as calling `dump(4)`. + + - The indentation characrer can be controlled with the member variable + `fill` of the output stream @a o. For instance, the manipulator + `std::setfill('\\t')` sets indentation to use a tab character rather than + the default space character. @param[in,out] o stream to serialize to @param[in] j JSON value to serialize @@ -6217,7 +7419,7 @@ class basic_json @liveexample{The example below shows the serialization with different parameters to `width` to adjust the indentation level.,operator_serialize} - @since version 1.0.0 + @since version 1.0.0; indentaction character added in version 3.0.0 */ friend std::ostream& operator<<(std::ostream& o, const basic_json& j) { @@ -6229,15 +7431,19 @@ class basic_json o.width(0); // do the actual serialization - j.dump(o, pretty_print, static_cast(indentation)); - + serializer s(output_adapter::create(o), o.fill()); + s.dump(j, pretty_print, static_cast(indentation)); return o; } /*! @brief serialize to stream - @copydoc operator<<(std::ostream&, const basic_json&) + @deprecated This stream operator is deprecated and will be removed in a + future version of the library. Please use + @ref std::ostream& operator<<(std::ostream&, const basic_json&) + instead; that is, replace calls like `j >> o;` with `o << j;`. */ + JSON_DEPRECATED friend std::ostream& operator>>(const basic_json& j, std::ostream& o) { return o << j; @@ -6269,6 +7475,11 @@ class basic_json @return result of the deserialization + @throw parse_error.101 if a parse error occurs; example: `""unexpected end + of input; expected string literal""` + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + @complexity Linear in the length of the input. The parser is a predictive LL(1) parser. The complexity can be higher if the parser callback function @a cb has a super-linear complexity. @@ -6288,6 +7499,13 @@ class basic_json return parse(std::begin(array), std::end(array), cb); } + template + static bool accept(T (&array)[N]) + { + // delegate the call to the iterator-range accept overload + return accept(std::begin(array), std::end(array)); + } + /*! @brief deserialize from string literal @@ -6299,6 +7517,10 @@ class basic_json @return result of the deserialization + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + @complexity Linear in the length of the input. The parser is a predictive LL(1) parser. The complexity can be higher if the parser callback function @a cb has a super-linear complexity. @@ -6322,7 +7544,16 @@ class basic_json static basic_json parse(const CharT s, const parser_callback_t cb = nullptr) { - return parser(reinterpret_cast(s), cb).parse(); + return parser(input_adapter::create(s), cb).parse(true); + } + + template::value and + std::is_integral::type>::value and + sizeof(typename std::remove_pointer::type) == 1, int>::type = 0> + static bool accept(const CharT s) + { + return parser(input_adapter::create(s)).accept(true); } /*! @@ -6335,6 +7566,11 @@ class basic_json @return result of the deserialization + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + @throw parse_error.111 if input stream is in a bad state + @complexity Linear in the length of the input. The parser is a predictive LL(1) parser. The complexity can be higher if the parser callback function @a cb has a super-linear complexity. @@ -6352,7 +7588,12 @@ class basic_json static basic_json parse(std::istream& i, const parser_callback_t cb = nullptr) { - return parser(i, cb).parse(); + return parser(input_adapter::create(i), cb).parse(true); + } + + static bool accept(std::istream& i) + { + return parser(input_adapter::create(i)).accept(true); } /*! @@ -6361,7 +7602,12 @@ class basic_json static basic_json parse(std::istream&& i, const parser_callback_t cb = nullptr) { - return parser(i, cb).parse(); + return parser(input_adapter::create(i), cb).parse(true); + } + + static bool accept(std::istream&& i) + { + return parser(input_adapter::create(i)).accept(true); } /*! @@ -6394,6 +7640,10 @@ class basic_json @return result of the deserialization + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + @complexity Linear in the length of the input. The parser is a predictive LL(1) parser. The complexity can be higher if the parser callback function @a cb has a super-linear complexity. @@ -6412,27 +7662,16 @@ class basic_json static basic_json parse(IteratorType first, IteratorType last, const parser_callback_t cb = nullptr) { - // assertion to check that the iterator range is indeed contiguous, - // see http://stackoverflow.com/a/35008842/266378 for more discussion - assert(std::accumulate(first, last, std::pair(true, 0), - [&first](std::pair res, decltype(*first) val) - { - res.first &= (val == *(std::next(std::addressof(*first), res.second++))); - return res; - }).first); + return parser(input_adapter::create(first, last), cb).parse(true); + } - // assertion to check that each element is 1 byte long - static_assert(sizeof(typename std::iterator_traits::value_type) == 1, - "each element in the iterator range must have the size of 1 byte"); - - // if iterator range is empty, create a parser with an empty string - // to generate "unexpected EOF" error message - if (std::distance(first, last) <= 0) - { - return parser("").parse(); - } - - return parser(first, last, cb).parse(); + template::iterator_category>::value, int>::type = 0> + static bool accept(IteratorType first, IteratorType last) + { + return parser(input_adapter::create(first, last)).accept(true); } /*! @@ -6464,6 +7703,10 @@ class basic_json @return result of the deserialization + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + @complexity Linear in the length of the input. The parser is a predictive LL(1) parser. The complexity can be higher if the parser callback function @a cb has a super-linear complexity. @@ -6488,6 +7731,32 @@ class basic_json return parse(std::begin(c), std::end(c), cb); } + template::value and + std::is_base_of< + std::random_access_iterator_tag, + typename std::iterator_traits()))>::iterator_category>::value + , int>::type = 0> + static bool accept(const ContiguousContainer& c) + { + // delegate the call to the iterator-range accept overload + return accept(std::begin(c), std::end(c)); + } + + /*! + @brief deserialize from stream + @deprecated This stream operator is deprecated and will be removed in a + future version of the library. Please use + @ref std::istream& operator>>(std::istream&, basic_json&) + instead; that is, replace calls like `j << i;` with `i >> j;`. + */ + JSON_DEPRECATED + friend std::istream& operator<<(basic_json& j, std::istream& i) + { + j = parser(input_adapter::create(i)).parse(false); + return i; + } + /*! @brief deserialize from stream @@ -6496,7 +7765,10 @@ class basic_json @param[in,out] i input stream to read a serialized JSON value from @param[in,out] j JSON value to write the deserialized input to - @throw std::invalid_argument in case of parse errors + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + @throw parse_error.111 if input stream is in a bad state @complexity Linear in the length of the input. The parser is a predictive LL(1) parser. @@ -6511,1538 +7783,14 @@ class basic_json @since version 1.0.0 */ - friend std::istream& operator<<(basic_json& j, std::istream& i) - { - j = parser(i).parse(); - return i; - } - - /*! - @brief deserialize from stream - @copydoc operator<<(basic_json&, std::istream&) - */ friend std::istream& operator>>(std::istream& i, basic_json& j) { - j = parser(i).parse(); + j = parser(input_adapter::create(i)).parse(false); return i; } /// @} - ////////////////////////////////////////// - // binary serialization/deserialization // - ////////////////////////////////////////// - - /// @name binary serialization/deserialization support - /// @{ - - private: - /*! - @note Some code in the switch cases has been copied, because otherwise - copilers would complain about implicit fallthrough and there is no - portable attribute to mute such warnings. - */ - template - static void add_to_vector(std::vector& vec, size_t bytes, const T number) - { - assert(bytes == 1 or bytes == 2 or bytes == 4 or bytes == 8); - - switch (bytes) - { - case 8: - { - vec.push_back(static_cast((static_cast(number) >> 070) & 0xff)); - vec.push_back(static_cast((static_cast(number) >> 060) & 0xff)); - vec.push_back(static_cast((static_cast(number) >> 050) & 0xff)); - vec.push_back(static_cast((static_cast(number) >> 040) & 0xff)); - vec.push_back(static_cast((number >> 030) & 0xff)); - vec.push_back(static_cast((number >> 020) & 0xff)); - vec.push_back(static_cast((number >> 010) & 0xff)); - vec.push_back(static_cast(number & 0xff)); - break; - } - - case 4: - { - vec.push_back(static_cast((number >> 030) & 0xff)); - vec.push_back(static_cast((number >> 020) & 0xff)); - vec.push_back(static_cast((number >> 010) & 0xff)); - vec.push_back(static_cast(number & 0xff)); - break; - } - - case 2: - { - vec.push_back(static_cast((number >> 010) & 0xff)); - vec.push_back(static_cast(number & 0xff)); - break; - } - - case 1: - { - vec.push_back(static_cast(number & 0xff)); - break; - } - } - } - - /*! - @brief take sufficient bytes from a vector to fill an integer variable - - In the context of binary serialization formats, we need to read several - bytes from a byte vector and combine them to multi-byte integral data - types. - - @param[in] vec byte vector to read from - @param[in] current_index the position in the vector after which to read - - @return the next sizeof(T) bytes from @a vec, in reverse order as T - - @tparam T the integral return type - - @throw std::out_of_range if there are less than sizeof(T)+1 bytes in the - vector @a vec to read - - In the for loop, the bytes from the vector are copied in reverse order into - the return value. In the figures below, let sizeof(T)=4 and `i` be the loop - variable. - - Precondition: - - vec: | | | a | b | c | d | T: | | | | | - ^ ^ ^ ^ - current_index i ptr sizeof(T) - - Postcondition: - - vec: | | | a | b | c | d | T: | d | c | b | a | - ^ ^ ^ - | i ptr - current_index - - @sa Code adapted from . - */ - template - static T get_from_vector(const std::vector& vec, const size_t current_index) - { - if (current_index + sizeof(T) + 1 > vec.size()) - { - JSON_THROW(std::out_of_range("cannot read " + std::to_string(sizeof(T)) + " bytes from vector")); - } - - T result; - auto* ptr = reinterpret_cast(&result); - for (size_t i = 0; i < sizeof(T); ++i) - { - *ptr++ = vec[current_index + sizeof(T) - i]; - } - return result; - } - - /*! - @brief create a MessagePack serialization of a given JSON value - - This is a straightforward implementation of the MessagePack specification. - - @param[in] j JSON value to serialize - @param[in,out] v byte vector to write the serialization to - - @sa https://github.com/msgpack/msgpack/blob/master/spec.md - */ - static void to_msgpack_internal(const basic_json& j, std::vector& v) - { - switch (j.type()) - { - case value_t::null: - { - // nil - v.push_back(0xc0); - break; - } - - case value_t::boolean: - { - // true and false - v.push_back(j.m_value.boolean ? 0xc3 : 0xc2); - break; - } - - case value_t::number_integer: - { - if (j.m_value.number_integer >= 0) - { - // MessagePack does not differentiate between positive - // signed integers and unsigned integers. Therefore, we - // used the code from the value_t::number_unsigned case - // here. - if (j.m_value.number_unsigned < 128) - { - // positive fixnum - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= std::numeric_limits::max()) - { - // uint 8 - v.push_back(0xcc); - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= std::numeric_limits::max()) - { - // uint 16 - v.push_back(0xcd); - add_to_vector(v, 2, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= std::numeric_limits::max()) - { - // uint 32 - v.push_back(0xce); - add_to_vector(v, 4, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= std::numeric_limits::max()) - { - // uint 64 - v.push_back(0xcf); - add_to_vector(v, 8, j.m_value.number_unsigned); - } - } - else - { - if (j.m_value.number_integer >= -32) - { - // negative fixnum - add_to_vector(v, 1, j.m_value.number_integer); - } - else if (j.m_value.number_integer >= std::numeric_limits::min() and j.m_value.number_integer <= std::numeric_limits::max()) - { - // int 8 - v.push_back(0xd0); - add_to_vector(v, 1, j.m_value.number_integer); - } - else if (j.m_value.number_integer >= std::numeric_limits::min() and j.m_value.number_integer <= std::numeric_limits::max()) - { - // int 16 - v.push_back(0xd1); - add_to_vector(v, 2, j.m_value.number_integer); - } - else if (j.m_value.number_integer >= std::numeric_limits::min() and j.m_value.number_integer <= std::numeric_limits::max()) - { - // int 32 - v.push_back(0xd2); - add_to_vector(v, 4, j.m_value.number_integer); - } - else if (j.m_value.number_integer >= std::numeric_limits::min() and j.m_value.number_integer <= std::numeric_limits::max()) - { - // int 64 - v.push_back(0xd3); - add_to_vector(v, 8, j.m_value.number_integer); - } - } - break; - } - - case value_t::number_unsigned: - { - if (j.m_value.number_unsigned < 128) - { - // positive fixnum - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= std::numeric_limits::max()) - { - // uint 8 - v.push_back(0xcc); - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= std::numeric_limits::max()) - { - // uint 16 - v.push_back(0xcd); - add_to_vector(v, 2, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= std::numeric_limits::max()) - { - // uint 32 - v.push_back(0xce); - add_to_vector(v, 4, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= std::numeric_limits::max()) - { - // uint 64 - v.push_back(0xcf); - add_to_vector(v, 8, j.m_value.number_unsigned); - } - break; - } - - case value_t::number_float: - { - // float 64 - v.push_back(0xcb); - const auto* helper = reinterpret_cast(&(j.m_value.number_float)); - for (size_t i = 0; i < 8; ++i) - { - v.push_back(helper[7 - i]); - } - break; - } - - case value_t::string: - { - const auto N = j.m_value.string->size(); - if (N <= 31) - { - // fixstr - v.push_back(static_cast(0xa0 | N)); - } - else if (N <= 255) - { - // str 8 - v.push_back(0xd9); - add_to_vector(v, 1, N); - } - else if (N <= 65535) - { - // str 16 - v.push_back(0xda); - add_to_vector(v, 2, N); - } - else if (N <= 4294967295) - { - // str 32 - v.push_back(0xdb); - add_to_vector(v, 4, N); - } - - // append string - std::copy(j.m_value.string->begin(), j.m_value.string->end(), - std::back_inserter(v)); - break; - } - - case value_t::array: - { - const auto N = j.m_value.array->size(); - if (N <= 15) - { - // fixarray - v.push_back(static_cast(0x90 | N)); - } - else if (N <= 0xffff) - { - // array 16 - v.push_back(0xdc); - add_to_vector(v, 2, N); - } - else if (N <= 0xffffffff) - { - // array 32 - v.push_back(0xdd); - add_to_vector(v, 4, N); - } - - // append each element - for (const auto& el : *j.m_value.array) - { - to_msgpack_internal(el, v); - } - break; - } - - case value_t::object: - { - const auto N = j.m_value.object->size(); - if (N <= 15) - { - // fixmap - v.push_back(static_cast(0x80 | (N & 0xf))); - } - else if (N <= 65535) - { - // map 16 - v.push_back(0xde); - add_to_vector(v, 2, N); - } - else if (N <= 4294967295) - { - // map 32 - v.push_back(0xdf); - add_to_vector(v, 4, N); - } - - // append each element - for (const auto& el : *j.m_value.object) - { - to_msgpack_internal(el.first, v); - to_msgpack_internal(el.second, v); - } - break; - } - - default: - { - break; - } - } - } - - /*! - @brief create a CBOR serialization of a given JSON value - - This is a straightforward implementation of the CBOR specification. - - @param[in] j JSON value to serialize - @param[in,out] v byte vector to write the serialization to - - @sa https://tools.ietf.org/html/rfc7049 - */ - static void to_cbor_internal(const basic_json& j, std::vector& v) - { - switch (j.type()) - { - case value_t::null: - { - v.push_back(0xf6); - break; - } - - case value_t::boolean: - { - v.push_back(j.m_value.boolean ? 0xf5 : 0xf4); - break; - } - - case value_t::number_integer: - { - if (j.m_value.number_integer >= 0) - { - // CBOR does not differentiate between positive signed - // integers and unsigned integers. Therefore, we used the - // code from the value_t::number_unsigned case here. - if (j.m_value.number_integer <= 0x17) - { - add_to_vector(v, 1, j.m_value.number_integer); - } - else if (j.m_value.number_integer <= std::numeric_limits::max()) - { - v.push_back(0x18); - // one-byte uint8_t - add_to_vector(v, 1, j.m_value.number_integer); - } - else if (j.m_value.number_integer <= std::numeric_limits::max()) - { - v.push_back(0x19); - // two-byte uint16_t - add_to_vector(v, 2, j.m_value.number_integer); - } - else if (j.m_value.number_integer <= std::numeric_limits::max()) - { - v.push_back(0x1a); - // four-byte uint32_t - add_to_vector(v, 4, j.m_value.number_integer); - } - else - { - v.push_back(0x1b); - // eight-byte uint64_t - add_to_vector(v, 8, j.m_value.number_integer); - } - } - else - { - // The conversions below encode the sign in the first - // byte, and the value is converted to a positive number. - const auto positive_number = -1 - j.m_value.number_integer; - if (j.m_value.number_integer >= -24) - { - v.push_back(static_cast(0x20 + positive_number)); - } - else if (positive_number <= std::numeric_limits::max()) - { - // int 8 - v.push_back(0x38); - add_to_vector(v, 1, positive_number); - } - else if (positive_number <= std::numeric_limits::max()) - { - // int 16 - v.push_back(0x39); - add_to_vector(v, 2, positive_number); - } - else if (positive_number <= std::numeric_limits::max()) - { - // int 32 - v.push_back(0x3a); - add_to_vector(v, 4, positive_number); - } - else - { - // int 64 - v.push_back(0x3b); - add_to_vector(v, 8, positive_number); - } - } - break; - } - - case value_t::number_unsigned: - { - if (j.m_value.number_unsigned <= 0x17) - { - v.push_back(static_cast(j.m_value.number_unsigned)); - } - else if (j.m_value.number_unsigned <= 0xff) - { - v.push_back(0x18); - // one-byte uint8_t - add_to_vector(v, 1, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= 0xffff) - { - v.push_back(0x19); - // two-byte uint16_t - add_to_vector(v, 2, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= 0xffffffff) - { - v.push_back(0x1a); - // four-byte uint32_t - add_to_vector(v, 4, j.m_value.number_unsigned); - } - else if (j.m_value.number_unsigned <= 0xffffffffffffffff) - { - v.push_back(0x1b); - // eight-byte uint64_t - add_to_vector(v, 8, j.m_value.number_unsigned); - } - break; - } - - case value_t::number_float: - { - // Double-Precision Float - v.push_back(0xfb); - const auto* helper = reinterpret_cast(&(j.m_value.number_float)); - for (size_t i = 0; i < 8; ++i) - { - v.push_back(helper[7 - i]); - } - break; - } - - case value_t::string: - { - const auto N = j.m_value.string->size(); - if (N <= 0x17) - { - v.push_back(0x60 + static_cast(N)); // 1 byte for string + size - } - else if (N <= 0xff) - { - v.push_back(0x78); // one-byte uint8_t for N - add_to_vector(v, 1, N); - } - else if (N <= 0xffff) - { - v.push_back(0x79); // two-byte uint16_t for N - add_to_vector(v, 2, N); - } - else if (N <= 0xffffffff) - { - v.push_back(0x7a); // four-byte uint32_t for N - add_to_vector(v, 4, N); - } - // LCOV_EXCL_START - else if (N <= 0xffffffffffffffff) - { - v.push_back(0x7b); // eight-byte uint64_t for N - add_to_vector(v, 8, N); - } - // LCOV_EXCL_STOP - - // append string - std::copy(j.m_value.string->begin(), j.m_value.string->end(), - std::back_inserter(v)); - break; - } - - case value_t::array: - { - const auto N = j.m_value.array->size(); - if (N <= 0x17) - { - v.push_back(0x80 + static_cast(N)); // 1 byte for array + size - } - else if (N <= 0xff) - { - v.push_back(0x98); // one-byte uint8_t for N - add_to_vector(v, 1, N); - } - else if (N <= 0xffff) - { - v.push_back(0x99); // two-byte uint16_t for N - add_to_vector(v, 2, N); - } - else if (N <= 0xffffffff) - { - v.push_back(0x9a); // four-byte uint32_t for N - add_to_vector(v, 4, N); - } - // LCOV_EXCL_START - else if (N <= 0xffffffffffffffff) - { - v.push_back(0x9b); // eight-byte uint64_t for N - add_to_vector(v, 8, N); - } - // LCOV_EXCL_STOP - - // append each element - for (const auto& el : *j.m_value.array) - { - to_cbor_internal(el, v); - } - break; - } - - case value_t::object: - { - const auto N = j.m_value.object->size(); - if (N <= 0x17) - { - v.push_back(0xa0 + static_cast(N)); // 1 byte for object + size - } - else if (N <= 0xff) - { - v.push_back(0xb8); - add_to_vector(v, 1, N); // one-byte uint8_t for N - } - else if (N <= 0xffff) - { - v.push_back(0xb9); - add_to_vector(v, 2, N); // two-byte uint16_t for N - } - else if (N <= 0xffffffff) - { - v.push_back(0xba); - add_to_vector(v, 4, N); // four-byte uint32_t for N - } - // LCOV_EXCL_START - else if (N <= 0xffffffffffffffff) - { - v.push_back(0xbb); - add_to_vector(v, 8, N); // eight-byte uint64_t for N - } - // LCOV_EXCL_STOP - - // append each element - for (const auto& el : *j.m_value.object) - { - to_cbor_internal(el.first, v); - to_cbor_internal(el.second, v); - } - break; - } - - default: - { - break; - } - } - } - - - /* - @brief checks if given lengths do not exceed the size of a given vector - - To secure the access to the byte vector during CBOR/MessagePack - deserialization, bytes are copied from the vector into buffers. This - function checks if the number of bytes to copy (@a len) does not exceed - the size @s size of the vector. Additionally, an @a offset is given from - where to start reading the bytes. - - This function checks whether reading the bytes is safe; that is, offset is - a valid index in the vector, offset+len - - @param[in] size size of the byte vector - @param[in] len number of bytes to read - @param[in] offset offset where to start reading - - vec: x x x x x X X X X X - ^ ^ ^ - 0 offset len - - @throws out_of_range if `len > v.size()` - */ - static void check_length(const size_t size, const size_t len, const size_t offset) - { - // simple case: requested length is greater than the vector's length - if (len > size or offset > size) - { - JSON_THROW(std::out_of_range("len out of range")); - } - - // second case: adding offset would result in overflow - if ((size > (std::numeric_limits::max() - offset))) - { - JSON_THROW(std::out_of_range("len+offset out of range")); - } - - // last case: reading past the end of the vector - if (len + offset > size) - { - JSON_THROW(std::out_of_range("len+offset out of range")); - } - } - - /*! - @brief create a JSON value from a given MessagePack vector - - @param[in] v MessagePack serialization - @param[in] idx byte index to start reading from @a v - - @return deserialized JSON value - - @throw std::invalid_argument if unsupported features from MessagePack were - used in the given vector @a v or if the input is not valid MessagePack - @throw std::out_of_range if the given vector ends prematurely - - @sa https://github.com/msgpack/msgpack/blob/master/spec.md - */ - static basic_json from_msgpack_internal(const std::vector& v, size_t& idx) - { - // make sure reading 1 byte is safe - check_length(v.size(), 1, idx); - - // store and increment index - const size_t current_idx = idx++; - - if (v[current_idx] <= 0xbf) - { - if (v[current_idx] <= 0x7f) // positive fixint - { - return v[current_idx]; - } - if (v[current_idx] <= 0x8f) // fixmap - { - basic_json result = value_t::object; - const size_t len = v[current_idx] & 0x0f; - for (size_t i = 0; i < len; ++i) - { - std::string key = from_msgpack_internal(v, idx); - result[key] = from_msgpack_internal(v, idx); - } - return result; - } - else if (v[current_idx] <= 0x9f) // fixarray - { - basic_json result = value_t::array; - const size_t len = v[current_idx] & 0x0f; - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_msgpack_internal(v, idx)); - } - return result; - } - else // fixstr - { - const size_t len = v[current_idx] & 0x1f; - const size_t offset = current_idx + 1; - idx += len; // skip content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - } - else if (v[current_idx] >= 0xe0) // negative fixint - { - return static_cast(v[current_idx]); - } - else - { - switch (v[current_idx]) - { - case 0xc0: // nil - { - return value_t::null; - } - - case 0xc2: // false - { - return false; - } - - case 0xc3: // true - { - return true; - } - - case 0xca: // float 32 - { - // copy bytes in reverse order into the double variable - float res; - for (size_t byte = 0; byte < sizeof(float); ++byte) - { - reinterpret_cast(&res)[sizeof(float) - byte - 1] = v.at(current_idx + 1 + byte); - } - idx += sizeof(float); // skip content bytes - return res; - } - - case 0xcb: // float 64 - { - // copy bytes in reverse order into the double variable - double res; - for (size_t byte = 0; byte < sizeof(double); ++byte) - { - reinterpret_cast(&res)[sizeof(double) - byte - 1] = v.at(current_idx + 1 + byte); - } - idx += sizeof(double); // skip content bytes - return res; - } - - case 0xcc: // uint 8 - { - idx += 1; // skip content byte - return get_from_vector(v, current_idx); - } - - case 0xcd: // uint 16 - { - idx += 2; // skip 2 content bytes - return get_from_vector(v, current_idx); - } - - case 0xce: // uint 32 - { - idx += 4; // skip 4 content bytes - return get_from_vector(v, current_idx); - } - - case 0xcf: // uint 64 - { - idx += 8; // skip 8 content bytes - return get_from_vector(v, current_idx); - } - - case 0xd0: // int 8 - { - idx += 1; // skip content byte - return get_from_vector(v, current_idx); - } - - case 0xd1: // int 16 - { - idx += 2; // skip 2 content bytes - return get_from_vector(v, current_idx); - } - - case 0xd2: // int 32 - { - idx += 4; // skip 4 content bytes - return get_from_vector(v, current_idx); - } - - case 0xd3: // int 64 - { - idx += 8; // skip 8 content bytes - return get_from_vector(v, current_idx); - } - - case 0xd9: // str 8 - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 2; - idx += len + 1; // skip size byte + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0xda: // str 16 - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 3; - idx += len + 2; // skip 2 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0xdb: // str 32 - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 5; - idx += len + 4; // skip 4 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0xdc: // array 16 - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 2; // skip 2 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_msgpack_internal(v, idx)); - } - return result; - } - - case 0xdd: // array 32 - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 4; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_msgpack_internal(v, idx)); - } - return result; - } - - case 0xde: // map 16 - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 2; // skip 2 size bytes - for (size_t i = 0; i < len; ++i) - { - std::string key = from_msgpack_internal(v, idx); - result[key] = from_msgpack_internal(v, idx); - } - return result; - } - - case 0xdf: // map 32 - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 4; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - std::string key = from_msgpack_internal(v, idx); - result[key] = from_msgpack_internal(v, idx); - } - return result; - } - - default: - { - JSON_THROW(std::invalid_argument("error parsing a msgpack @ " + std::to_string(current_idx) + ": " + std::to_string(static_cast(v[current_idx])))); - } - } - } - } - - /*! - @brief create a JSON value from a given CBOR vector - - @param[in] v CBOR serialization - @param[in] idx byte index to start reading from @a v - - @return deserialized JSON value - - @throw std::invalid_argument if unsupported features from CBOR were used in - the given vector @a v or if the input is not valid CBOR - @throw std::out_of_range if the given vector ends prematurely - - @sa https://tools.ietf.org/html/rfc7049 - */ - static basic_json from_cbor_internal(const std::vector& v, size_t& idx) - { - // store and increment index - const size_t current_idx = idx++; - - switch (v.at(current_idx)) - { - // Integer 0x00..0x17 (0..23) - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x08: - case 0x09: - case 0x0a: - case 0x0b: - case 0x0c: - case 0x0d: - case 0x0e: - case 0x0f: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - { - return v[current_idx]; - } - - case 0x18: // Unsigned integer (one-byte uint8_t follows) - { - idx += 1; // skip content byte - return get_from_vector(v, current_idx); - } - - case 0x19: // Unsigned integer (two-byte uint16_t follows) - { - idx += 2; // skip 2 content bytes - return get_from_vector(v, current_idx); - } - - case 0x1a: // Unsigned integer (four-byte uint32_t follows) - { - idx += 4; // skip 4 content bytes - return get_from_vector(v, current_idx); - } - - case 0x1b: // Unsigned integer (eight-byte uint64_t follows) - { - idx += 8; // skip 8 content bytes - return get_from_vector(v, current_idx); - } - - // Negative integer -1-0x00..-1-0x17 (-1..-24) - case 0x20: - case 0x21: - case 0x22: - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - case 0x29: - case 0x2a: - case 0x2b: - case 0x2c: - case 0x2d: - case 0x2e: - case 0x2f: - case 0x30: - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: - { - return static_cast(0x20 - 1 - v[current_idx]); - } - - case 0x38: // Negative integer (one-byte uint8_t follows) - { - idx += 1; // skip content byte - // must be uint8_t ! - return static_cast(-1) - get_from_vector(v, current_idx); - } - - case 0x39: // Negative integer -1-n (two-byte uint16_t follows) - { - idx += 2; // skip 2 content bytes - return static_cast(-1) - get_from_vector(v, current_idx); - } - - case 0x3a: // Negative integer -1-n (four-byte uint32_t follows) - { - idx += 4; // skip 4 content bytes - return static_cast(-1) - get_from_vector(v, current_idx); - } - - case 0x3b: // Negative integer -1-n (eight-byte uint64_t follows) - { - idx += 8; // skip 8 content bytes - return static_cast(-1) - static_cast(get_from_vector(v, current_idx)); - } - - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6a: - case 0x6b: - case 0x6c: - case 0x6d: - case 0x6e: - case 0x6f: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - { - const auto len = static_cast(v[current_idx] - 0x60); - const size_t offset = current_idx + 1; - idx += len; // skip content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 2; - idx += len + 1; // skip size byte + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 3; - idx += len + 2; // skip 2 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x7a: // UTF-8 string (four-byte uint32_t for n follow) - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 5; - idx += len + 4; // skip 4 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x7b: // UTF-8 string (eight-byte uint64_t for n follow) - { - const auto len = static_cast(get_from_vector(v, current_idx)); - const size_t offset = current_idx + 9; - idx += len + 8; // skip 8 size bytes + content bytes - check_length(v.size(), len, offset); - return std::string(reinterpret_cast(v.data()) + offset, len); - } - - case 0x7f: // UTF-8 string (indefinite length) - { - std::string result; - while (v.at(idx) != 0xff) - { - string_t s = from_cbor_internal(v, idx); - result += s; - } - // skip break byte (0xFF) - idx += 1; - return result; - } - - // array (0x00..0x17 data items follow) - case 0x80: - case 0x81: - case 0x82: - case 0x83: - case 0x84: - case 0x85: - case 0x86: - case 0x87: - case 0x88: - case 0x89: - case 0x8a: - case 0x8b: - case 0x8c: - case 0x8d: - case 0x8e: - case 0x8f: - case 0x90: - case 0x91: - case 0x92: - case 0x93: - case 0x94: - case 0x95: - case 0x96: - case 0x97: - { - basic_json result = value_t::array; - const auto len = static_cast(v[current_idx] - 0x80); - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x98: // array (one-byte uint8_t for n follows) - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 1; // skip 1 size byte - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x99: // array (two-byte uint16_t for n follow) - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 2; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x9a: // array (four-byte uint32_t for n follow) - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 4; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x9b: // array (eight-byte uint64_t for n follow) - { - basic_json result = value_t::array; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 8; // skip 8 size bytes - for (size_t i = 0; i < len; ++i) - { - result.push_back(from_cbor_internal(v, idx)); - } - return result; - } - - case 0x9f: // array (indefinite length) - { - basic_json result = value_t::array; - while (v.at(idx) != 0xff) - { - result.push_back(from_cbor_internal(v, idx)); - } - // skip break byte (0xFF) - idx += 1; - return result; - } - - // map (0x00..0x17 pairs of data items follow) - case 0xa0: - case 0xa1: - case 0xa2: - case 0xa3: - case 0xa4: - case 0xa5: - case 0xa6: - case 0xa7: - case 0xa8: - case 0xa9: - case 0xaa: - case 0xab: - case 0xac: - case 0xad: - case 0xae: - case 0xaf: - case 0xb0: - case 0xb1: - case 0xb2: - case 0xb3: - case 0xb4: - case 0xb5: - case 0xb6: - case 0xb7: - { - basic_json result = value_t::object; - const auto len = static_cast(v[current_idx] - 0xa0); - for (size_t i = 0; i < len; ++i) - { - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xb8: // map (one-byte uint8_t for n follows) - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 1; // skip 1 size byte - for (size_t i = 0; i < len; ++i) - { - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xb9: // map (two-byte uint16_t for n follow) - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 2; // skip 2 size bytes - for (size_t i = 0; i < len; ++i) - { - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xba: // map (four-byte uint32_t for n follow) - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 4; // skip 4 size bytes - for (size_t i = 0; i < len; ++i) - { - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xbb: // map (eight-byte uint64_t for n follow) - { - basic_json result = value_t::object; - const auto len = static_cast(get_from_vector(v, current_idx)); - idx += 8; // skip 8 size bytes - for (size_t i = 0; i < len; ++i) - { - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - return result; - } - - case 0xbf: // map (indefinite length) - { - basic_json result = value_t::object; - while (v.at(idx) != 0xff) - { - std::string key = from_cbor_internal(v, idx); - result[key] = from_cbor_internal(v, idx); - } - // skip break byte (0xFF) - idx += 1; - return result; - } - - case 0xf4: // false - { - return false; - } - - case 0xf5: // true - { - return true; - } - - case 0xf6: // null - { - return value_t::null; - } - - case 0xf9: // Half-Precision Float (two-byte IEEE 754) - { - idx += 2; // skip two content bytes - - // code from RFC 7049, Appendix D, Figure 3: - // As half-precision floating-point numbers were only added to - // IEEE 754 in 2008, today's programming platforms often still - // only have limited support for them. It is very easy to - // include at least decoding support for them even without such - // support. An example of a small decoder for half-precision - // floating-point numbers in the C language is shown in Fig. 3. - const int half = (v.at(current_idx + 1) << 8) + v.at(current_idx + 2); - const int exp = (half >> 10) & 0x1f; - const int mant = half & 0x3ff; - double val; - if (exp == 0) - { - val = std::ldexp(mant, -24); - } - else if (exp != 31) - { - val = std::ldexp(mant + 1024, exp - 25); - } - else - { - val = mant == 0 - ? std::numeric_limits::infinity() - : std::numeric_limits::quiet_NaN(); - } - return (half & 0x8000) != 0 ? -val : val; - } - - case 0xfa: // Single-Precision Float (four-byte IEEE 754) - { - // copy bytes in reverse order into the float variable - float res; - for (size_t byte = 0; byte < sizeof(float); ++byte) - { - reinterpret_cast(&res)[sizeof(float) - byte - 1] = v.at(current_idx + 1 + byte); - } - idx += sizeof(float); // skip content bytes - return res; - } - - case 0xfb: // Double-Precision Float (eight-byte IEEE 754) - { - // copy bytes in reverse order into the double variable - double res; - for (size_t byte = 0; byte < sizeof(double); ++byte) - { - reinterpret_cast(&res)[sizeof(double) - byte - 1] = v.at(current_idx + 1 + byte); - } - idx += sizeof(double); // skip content bytes - return res; - } - - default: // anything else (0xFF is handled inside the other types) - { - JSON_THROW(std::invalid_argument("error parsing a CBOR @ " + std::to_string(current_idx) + ": " + std::to_string(static_cast(v[current_idx])))); - } - } - } - - public: - /*! - @brief create a MessagePack serialization of a given JSON value - - Serializes a given JSON value @a j to a byte vector using the MessagePack - serialization format. MessagePack is a binary serialization format which - aims to be more compact than JSON itself, yet more efficient to parse. - - @param[in] j JSON value to serialize - @return MessagePack serialization as byte vector - - @complexity Linear in the size of the JSON value @a j. - - @liveexample{The example shows the serialization of a JSON value to a byte - vector in MessagePack format.,to_msgpack} - - @sa http://msgpack.org - @sa @ref from_msgpack(const std::vector&, const size_t) for the - analogous deserialization - @sa @ref to_cbor(const basic_json& for the related CBOR format - - @since version 2.0.9 - */ - static std::vector to_msgpack(const basic_json& j) - { - std::vector result; - to_msgpack_internal(j, result); - return result; - } - - /*! - @brief create a JSON value from a byte vector in MessagePack format - - Deserializes a given byte vector @a v to a JSON value using the MessagePack - serialization format. - - @param[in] v a byte vector in MessagePack format - @param[in] start_index the index to start reading from @a v (0 by default) - @return deserialized JSON value - - @throw std::invalid_argument if unsupported features from MessagePack were - used in the given vector @a v or if the input is not valid MessagePack - @throw std::out_of_range if the given vector ends prematurely - - @complexity Linear in the size of the byte vector @a v. - - @liveexample{The example shows the deserialization of a byte vector in - MessagePack format to a JSON value.,from_msgpack} - - @sa http://msgpack.org - @sa @ref to_msgpack(const basic_json&) for the analogous serialization - @sa @ref from_cbor(const std::vector&, const size_t) for the - related CBOR format - - @since version 2.0.9, parameter @a start_index since 2.1.1 - */ - static basic_json from_msgpack(const std::vector& v, - const size_t start_index = 0) - { - size_t i = start_index; - return from_msgpack_internal(v, i); - } - - /*! - @brief create a MessagePack serialization of a given JSON value - - Serializes a given JSON value @a j to a byte vector using the CBOR (Concise - Binary Object Representation) serialization format. CBOR is a binary - serialization format which aims to be more compact than JSON itself, yet - more efficient to parse. - - @param[in] j JSON value to serialize - @return MessagePack serialization as byte vector - - @complexity Linear in the size of the JSON value @a j. - - @liveexample{The example shows the serialization of a JSON value to a byte - vector in CBOR format.,to_cbor} - - @sa http://cbor.io - @sa @ref from_cbor(const std::vector&, const size_t) for the - analogous deserialization - @sa @ref to_msgpack(const basic_json& for the related MessagePack format - - @since version 2.0.9 - */ - static std::vector to_cbor(const basic_json& j) - { - std::vector result; - to_cbor_internal(j, result); - return result; - } - - /*! - @brief create a JSON value from a byte vector in CBOR format - - Deserializes a given byte vector @a v to a JSON value using the CBOR - (Concise Binary Object Representation) serialization format. - - @param[in] v a byte vector in CBOR format - @param[in] start_index the index to start reading from @a v (0 by default) - @return deserialized JSON value - - @throw std::invalid_argument if unsupported features from CBOR were used in - the given vector @a v or if the input is not valid MessagePack - @throw std::out_of_range if the given vector ends prematurely - - @complexity Linear in the size of the byte vector @a v. - - @liveexample{The example shows the deserialization of a byte vector in CBOR - format to a JSON value.,from_cbor} - - @sa http://cbor.io - @sa @ref to_cbor(const basic_json&) for the analogous serialization - @sa @ref from_msgpack(const std::vector&, const size_t) for the - related MessagePack format - - @since version 2.0.9, parameter @a start_index since 2.1.1 - */ - static basic_json from_cbor(const std::vector& v, - const size_t start_index = 0) - { - size_t i = start_index; - return from_cbor_internal(v, i); - } - - /// @} - /////////////////////////// // convenience functions // /////////////////////////// @@ -8085,464 +7833,6 @@ class basic_json } } - private: - /*! - @brief calculates the extra space to escape a JSON string - - @param[in] s the string to escape - @return the number of characters required to escape string @a s - - @complexity Linear in the length of string @a s. - */ - static std::size_t extra_space(const string_t& s) noexcept - { - return std::accumulate(s.begin(), s.end(), size_t{}, - [](size_t res, typename string_t::value_type c) - { - switch (c) - { - case '"': - case '\\': - case '\b': - case '\f': - case '\n': - case '\r': - case '\t': - { - // from c (1 byte) to \x (2 bytes) - return res + 1; - } - - default: - { - if (c >= 0x00 and c <= 0x1f) - { - // from c (1 byte) to \uxxxx (6 bytes) - return res + 5; - } - - return res; - } - } - }); - } - - /*! - @brief escape a string - - Escape a string by replacing certain special characters by a sequence of - an escape character (backslash) and another character and other control - characters by a sequence of "\u" followed by a four-digit hex - representation. - - @param[in] s the string to escape - @return the escaped string - - @complexity Linear in the length of string @a s. - */ - static string_t escape_string(const string_t& s) - { - const auto space = extra_space(s); - if (space == 0) - { - return s; - } - - // create a result string of necessary size - string_t result(s.size() + space, '\\'); - std::size_t pos = 0; - - for (const auto& c : s) - { - switch (c) - { - // quotation mark (0x22) - case '"': - { - result[pos + 1] = '"'; - pos += 2; - break; - } - - // reverse solidus (0x5c) - case '\\': - { - // nothing to change - pos += 2; - break; - } - - // backspace (0x08) - case '\b': - { - result[pos + 1] = 'b'; - pos += 2; - break; - } - - // formfeed (0x0c) - case '\f': - { - result[pos + 1] = 'f'; - pos += 2; - break; - } - - // newline (0x0a) - case '\n': - { - result[pos + 1] = 'n'; - pos += 2; - break; - } - - // carriage return (0x0d) - case '\r': - { - result[pos + 1] = 'r'; - pos += 2; - break; - } - - // horizontal tab (0x09) - case '\t': - { - result[pos + 1] = 't'; - pos += 2; - break; - } - - default: - { - if (c >= 0x00 and c <= 0x1f) - { - // convert a number 0..15 to its hex representation - // (0..f) - static const char hexify[16] = - { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' - }; - - // print character c as \uxxxx - for (const char m : - { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] - }) - { - result[++pos] = m; - } - - ++pos; - } - else - { - // all other characters are added as-is - result[pos++] = c; - } - break; - } - } - } - - return result; - } - - - /*! - @brief locale-independent serialization for built-in arithmetic types - */ - struct numtostr - { - public: - template - numtostr(NumberType value) - { - x_write(value, std::is_integral()); - } - - const char* c_str() const - { - return m_buf.data(); - } - - private: - /// a (hopefully) large enough character buffer - std::array < char, 64 > m_buf{{}}; - - template - void x_write(NumberType x, /*is_integral=*/std::true_type) - { - // special case for "0" - if (x == 0) - { - m_buf[0] = '0'; - return; - } - - const bool is_negative = x < 0; - size_t i = 0; - - // spare 1 byte for '\0' - while (x != 0 and i < m_buf.size() - 1) - { - const auto digit = std::labs(static_cast(x % 10)); - m_buf[i++] = static_cast('0' + digit); - x /= 10; - } - - // make sure the number has been processed completely - assert(x == 0); - - if (is_negative) - { - // make sure there is capacity for the '-' - assert(i < m_buf.size() - 2); - m_buf[i++] = '-'; - } - - std::reverse(m_buf.begin(), m_buf.begin() + i); - } - - template - void x_write(NumberType x, /*is_integral=*/std::false_type) - { - // special case for 0.0 and -0.0 - if (x == 0) - { - size_t i = 0; - if (std::signbit(x)) - { - m_buf[i++] = '-'; - } - m_buf[i++] = '0'; - m_buf[i++] = '.'; - m_buf[i] = '0'; - return; - } - - // get number of digits for a text -> float -> text round-trip - static constexpr auto d = std::numeric_limits::digits10; - - // the actual conversion - const auto written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); - - // negative value indicates an error - assert(written_bytes > 0); - // check if buffer was large enough - assert(static_cast(written_bytes) < m_buf.size()); - - // read information from locale - const auto loc = localeconv(); - assert(loc != nullptr); - const char thousands_sep = !loc->thousands_sep ? '\0' - : loc->thousands_sep[0]; - - const char decimal_point = !loc->decimal_point ? '\0' - : loc->decimal_point[0]; - - // erase thousands separator - if (thousands_sep != '\0') - { - const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); - std::fill(end, m_buf.end(), '\0'); - } - - // convert decimal point to '.' - if (decimal_point != '\0' and decimal_point != '.') - { - for (auto& c : m_buf) - { - if (c == decimal_point) - { - c = '.'; - break; - } - } - } - - // determine if need to append ".0" - size_t i = 0; - bool value_is_int_like = true; - for (i = 0; i < m_buf.size(); ++i) - { - // break when end of number is reached - if (m_buf[i] == '\0') - { - break; - } - - // check if we find non-int character - value_is_int_like = value_is_int_like and m_buf[i] != '.' and - m_buf[i] != 'e' and m_buf[i] != 'E'; - } - - if (value_is_int_like) - { - // there must be 2 bytes left for ".0" - assert((i + 2) < m_buf.size()); - // we write to the end of the number - assert(m_buf[i] == '\0'); - assert(m_buf[i - 1] != '\0'); - - // add ".0" - m_buf[i] = '.'; - m_buf[i + 1] = '0'; - - // the resulting string is properly terminated - assert(m_buf[i + 2] == '\0'); - } - } - }; - - - /*! - @brief internal implementation of the serialization function - - This function is called by the public member function dump and organizes - the serialization internally. The indentation level is propagated as - additional parameter. In case of arrays and objects, the function is - called recursively. Note that - - - strings and object keys are escaped using `escape_string()` - - integer numbers are converted implicitly via `operator<<` - - floating-point numbers are converted to a string using `"%g"` format - - @param[out] o stream to write to - @param[in] pretty_print whether the output shall be pretty-printed - @param[in] indent_step the indent level - @param[in] current_indent the current indent level (only used internally) - */ - void dump(std::ostream& o, - const bool pretty_print, - const unsigned int indent_step, - const unsigned int current_indent = 0) const - { - // variable to hold indentation for recursive calls - unsigned int new_indent = current_indent; - - switch (m_type) - { - case value_t::object: - { - if (m_value.object->empty()) - { - o << "{}"; - return; - } - - o << "{"; - - // increase indentation - if (pretty_print) - { - new_indent += indent_step; - o << "\n"; - } - - for (auto i = m_value.object->cbegin(); i != m_value.object->cend(); ++i) - { - if (i != m_value.object->cbegin()) - { - o << (pretty_print ? ",\n" : ","); - } - o << string_t(new_indent, ' ') << "\"" - << escape_string(i->first) << "\":" - << (pretty_print ? " " : ""); - i->second.dump(o, pretty_print, indent_step, new_indent); - } - - // decrease indentation - if (pretty_print) - { - new_indent -= indent_step; - o << "\n"; - } - - o << string_t(new_indent, ' ') + "}"; - return; - } - - case value_t::array: - { - if (m_value.array->empty()) - { - o << "[]"; - return; - } - - o << "["; - - // increase indentation - if (pretty_print) - { - new_indent += indent_step; - o << "\n"; - } - - for (auto i = m_value.array->cbegin(); i != m_value.array->cend(); ++i) - { - if (i != m_value.array->cbegin()) - { - o << (pretty_print ? ",\n" : ","); - } - o << string_t(new_indent, ' '); - i->dump(o, pretty_print, indent_step, new_indent); - } - - // decrease indentation - if (pretty_print) - { - new_indent -= indent_step; - o << "\n"; - } - - o << string_t(new_indent, ' ') << "]"; - return; - } - - case value_t::string: - { - o << string_t("\"") << escape_string(*m_value.string) << "\""; - return; - } - - case value_t::boolean: - { - o << (m_value.boolean ? "true" : "false"); - return; - } - - case value_t::number_integer: - { - o << numtostr(m_value.number_integer).c_str(); - return; - } - - case value_t::number_unsigned: - { - o << numtostr(m_value.number_unsigned).c_str(); - return; - } - - case value_t::number_float: - { - o << numtostr(m_value.number_float).c_str(); - return; - } - - case value_t::discarded: - { - o << ""; - return; - } - - case value_t::null: - { - o << "null"; - return; - } - } - } private: ////////////////////// @@ -8896,50 +8186,35 @@ class basic_json } } - /* - Use operator `const_iterator` instead of `const_iterator(const iterator& - other) noexcept` to avoid two class definitions for @ref iterator and - @ref const_iterator. - - This function is only called if this class is an @ref iterator. If this - class is a @ref const_iterator this function is not called. + /*! + @note The conventional copy constructor and copy assignment are + implicitly defined. + Combined with the following converting constructor and assigment, + they support: copy from iterator to iterator, + copy from const iterator to const iterator, + and conversion from iterator to const iterator. + However conversion from const iterator to iterator is not defined. */ - operator const_iterator() const - { - const_iterator ret; - - if (m_object) - { - ret.m_object = m_object; - ret.m_it = m_it; - } - - return ret; - } /*! - @brief copy constructor - @param[in] other iterator to copy from + @brief converting constructor + @param[in] other non-const iterator to copy from @note It is not checked whether @a other is initialized. */ - iter_impl(const iter_impl& other) noexcept + iter_impl(const iter_impl& other) noexcept : m_object(other.m_object), m_it(other.m_it) {} /*! - @brief copy assignment - @param[in,out] other iterator to copy from + @brief converting assignment + @param[in,out] other non-const iterator to copy from + @return const/non-const iterator @note It is not checked whether @a other is initialized. */ - iter_impl& operator=(iter_impl other) noexcept( - std::is_nothrow_move_constructible::value and - std::is_nothrow_move_assignable::value and - std::is_nothrow_move_constructible::value and - std::is_nothrow_move_assignable::value - ) + iter_impl& operator=(const iter_impl& other) noexcept { - std::swap(m_object, other.m_object); - std::swap(m_it, other.m_it); + m_object = other.m_object; + m_it = other.m_it; return *this; } @@ -9036,7 +8311,7 @@ class basic_json case basic_json::value_t::null: { - JSON_THROW(std::out_of_range("cannot get value")); + JSON_THROW(invalid_iterator::create(214, "cannot get value")); } default: @@ -9046,7 +8321,7 @@ class basic_json return *m_object; } - JSON_THROW(std::out_of_range("cannot get value")); + JSON_THROW(invalid_iterator::create(214, "cannot get value")); } } } @@ -9080,7 +8355,7 @@ class basic_json return m_object; } - JSON_THROW(std::out_of_range("cannot get value")); + JSON_THROW(invalid_iterator::create(214, "cannot get value")); } } } @@ -9180,7 +8455,7 @@ class basic_json // if objects are not the same, the comparison is undefined if (m_object != other.m_object) { - JSON_THROW(std::domain_error("cannot compare iterators of different containers")); + JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers")); } assert(m_object != nullptr); @@ -9222,7 +8497,7 @@ class basic_json // if objects are not the same, the comparison is undefined if (m_object != other.m_object) { - JSON_THROW(std::domain_error("cannot compare iterators of different containers")); + JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers")); } assert(m_object != nullptr); @@ -9231,7 +8506,7 @@ class basic_json { case basic_json::value_t::object: { - JSON_THROW(std::domain_error("cannot compare order of object iterators")); + JSON_THROW(invalid_iterator::create(213, "cannot compare order of object iterators")); } case basic_json::value_t::array: @@ -9285,7 +8560,7 @@ class basic_json { case basic_json::value_t::object: { - JSON_THROW(std::domain_error("cannot use offsets with object iterators")); + JSON_THROW(invalid_iterator::create(209, "cannot use offsets with object iterators")); } case basic_json::value_t::array: @@ -9317,18 +8592,29 @@ class basic_json @brief add to iterator @pre The iterator is initialized; i.e. `m_object != nullptr`. */ - iter_impl operator+(difference_type i) + iter_impl operator+(difference_type i) const { auto result = *this; result += i; return result; } + /*! + @brief addition of distance and iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + friend iter_impl operator+(difference_type i, const iter_impl& it) + { + auto result = it; + result += i; + return result; + } + /*! @brief subtract from iterator @pre The iterator is initialized; i.e. `m_object != nullptr`. */ - iter_impl operator-(difference_type i) + iter_impl operator-(difference_type i) const { auto result = *this; result -= i; @@ -9347,7 +8633,7 @@ class basic_json { case basic_json::value_t::object: { - JSON_THROW(std::domain_error("cannot use offsets with object iterators")); + JSON_THROW(invalid_iterator::create(209, "cannot use offsets with object iterators")); } case basic_json::value_t::array: @@ -9374,7 +8660,7 @@ class basic_json { case basic_json::value_t::object: { - JSON_THROW(std::domain_error("cannot use operator[] for object iterators")); + JSON_THROW(invalid_iterator::create(208, "cannot use operator[] for object iterators")); } case basic_json::value_t::array: @@ -9384,7 +8670,7 @@ class basic_json case basic_json::value_t::null: { - JSON_THROW(std::out_of_range("cannot get value")); + JSON_THROW(invalid_iterator::create(214, "cannot get value")); } default: @@ -9394,7 +8680,7 @@ class basic_json return *m_object; } - JSON_THROW(std::out_of_range("cannot get value")); + JSON_THROW(invalid_iterator::create(214, "cannot get value")); } } } @@ -9412,7 +8698,7 @@ class basic_json return m_it.object_iterator->first; } - JSON_THROW(std::domain_error("cannot use key() for non-object iterators")); + JSON_THROW(invalid_iterator::create(207, "cannot use key() for non-object iterators")); } /*! @@ -9428,7 +8714,7 @@ class basic_json /// associated JSON instance pointer m_object = nullptr; /// the actual iterator of the associated instance - internal_iterator m_it = internal_iterator(); + struct internal_iterator m_it = internal_iterator(); }; /*! @@ -9470,56 +8756,49 @@ class basic_json /// post-increment (it++) json_reverse_iterator operator++(int) { - return base_iterator::operator++(1); + return static_cast(base_iterator::operator++(1)); } /// pre-increment (++it) json_reverse_iterator& operator++() { - base_iterator::operator++(); - return *this; + return static_cast(base_iterator::operator++()); } /// post-decrement (it--) json_reverse_iterator operator--(int) { - return base_iterator::operator--(1); + return static_cast(base_iterator::operator--(1)); } /// pre-decrement (--it) json_reverse_iterator& operator--() { - base_iterator::operator--(); - return *this; + return static_cast(base_iterator::operator--()); } /// add to iterator json_reverse_iterator& operator+=(difference_type i) { - base_iterator::operator+=(i); - return *this; + return static_cast(base_iterator::operator+=(i)); } /// add to iterator json_reverse_iterator operator+(difference_type i) const { - auto result = *this; - result += i; - return result; + return static_cast(base_iterator::operator+(i)); } /// subtract from iterator json_reverse_iterator operator-(difference_type i) const { - auto result = *this; - result -= i; - return result; + return static_cast(base_iterator::operator-(i)); } /// return difference difference_type operator-(const json_reverse_iterator& other) const { - return this->base() - other.base(); + return base_iterator(*this) - base_iterator(other); } /// access to successor @@ -9545,16 +8824,2292 @@ class basic_json private: + //////////////////// + // input adapters // + //////////////////// + + /// abstract input adapter interface + class input_adapter + { + public: + virtual int get_character() = 0; + virtual std::string read(size_t offset, size_t length) = 0; + virtual ~input_adapter() {} + + // native support + + /// input adapter for input stream + static std::shared_ptr create(std::istream& i) + { + return std::shared_ptr(new cached_input_stream_adapter<16384>(i)); + } + + /// input adapter for input stream + static std::shared_ptr create(std::istream&& i) + { + return std::shared_ptr(new cached_input_stream_adapter<16384>(i)); + } + + /// input adapter for buffer + static std::shared_ptr create(const char* b, size_t l) + { + return std::shared_ptr(new input_buffer_adapter(b, l)); + } + + // derived support + + /// input adapter for string literal + template::value and + std::is_integral::type>::value and + sizeof(typename std::remove_pointer::type) == 1, int>::type = 0> + static std::shared_ptr create(CharT b) + { + return create(reinterpret_cast(b), + std::strlen(reinterpret_cast(b))); + } + + /// input adapter for iterator range with contiguous storage + template::iterator_category, std::random_access_iterator_tag>::value + , int>::type + = 0> + static std::shared_ptr create(IteratorType first, IteratorType last) + { + // assertion to check that the iterator range is indeed contiguous, + // see http://stackoverflow.com/a/35008842/266378 for more discussion + assert(std::accumulate(first, last, std::pair(true, 0), + [&first](std::pair res, decltype(*first) val) + { + res.first &= (val == *(std::next(std::addressof(*first), res.second++))); + return res; + }).first); + + // assertion to check that each element is 1 byte long + static_assert(sizeof(typename std::iterator_traits::value_type) == 1, + "each element in the iterator range must have the size of 1 byte"); + + return create(reinterpret_cast(&(*first)), + static_cast(std::distance(first, last))); + } + + /// input adapter for array + template + static std::shared_ptr create(T (&array)[N]) + { + // delegate the call to the iterator-range overload + return create(std::begin(array), std::end(array)); + } + + /// input adapter for contiguous container + template::value and + std::is_base_of< + std::random_access_iterator_tag, + typename std::iterator_traits()))>::iterator_category>::value + , int>::type = 0> + static std::shared_ptr create(const ContiguousContainer& c) + { + // delegate the call to the iterator-range overload + return create(std::begin(c), std::end(c)); + } + }; + + /// a type to simplify interfaces + using input_adapter_t = std::shared_ptr; + + /// input adapter for cached stream input + template + class cached_input_stream_adapter : public input_adapter + { + public: + cached_input_stream_adapter(std::istream& i) + : is(i), start_position(is.tellg()) + { + // immediately abort if stream is erroneous + if (JSON_UNLIKELY(i.fail())) + { + JSON_THROW(parse_error::create(111, 0, "bad input stream")); + } + + fill_buffer(); + + // skip byte order mark + if (fill_size >= 3 and buffer[0] == '\xEF' and buffer[1] == '\xBB' and buffer[2] == '\xBF') + { + buffer_pos += 3; + processed_chars += 3; + } + } + + ~cached_input_stream_adapter() override + { + // clear stream flags + is.clear(); + // We initially read a lot of characters into the buffer, and we + // may not have processed all of them. Therefore, we need to + // "rewind" the stream after the last processed char. + is.seekg(start_position); + is.ignore(static_cast(processed_chars)); + // clear stream flags + is.clear(); + } + + int get_character() override + { + // check if refilling is necessary and possible + if (buffer_pos == fill_size and not eof) + { + fill_buffer(); + + // check and remember that filling did not yield new input + if (fill_size == 0) + { + eof = true; + return std::char_traits::eof(); + } + + // the buffer is ready + buffer_pos = 0; + } + + ++processed_chars; + assert(buffer_pos < buffer.size()); + return buffer[buffer_pos++] & 0xFF; + } + + std::string read(size_t offset, size_t length) override + { + // create buffer + std::string result(length, '\0'); + + // save stream position + const auto current_pos = is.tellg(); + // save stream flags + const auto flags = is.rdstate(); + + // clear stream flags + is.clear(); + // set stream position + is.seekg(static_cast(offset)); + // read bytes + is.read(&result[0], static_cast(length)); + + // reset stream position + is.seekg(current_pos); + // reset stream flags + is.setstate(flags); + + return result; + } + + private: + void fill_buffer() + { + // fill + is.read(buffer.data(), static_cast(buffer.size())); + // store number of bytes in the buffer + fill_size = static_cast(is.gcount()); + } + + /// the associated input stream + std::istream& is; + + /// chars returned via get_character() + size_t processed_chars = 0; + /// chars processed in the current buffer + size_t buffer_pos = 0; + + /// whether stream reached eof + bool eof = false; + /// how many chars have been copied to the buffer by last (re)fill + size_t fill_size = 0; + + /// position of the stream when we started + const std::streampos start_position; + + /// internal buffer + std::array buffer{{}}; + }; + + /// input adapter for buffer input + class input_buffer_adapter : public input_adapter + { + public: + input_buffer_adapter(const char* b, size_t l) + : input_adapter(), cursor(b), limit(b + l), start(b) + { + // skip byte order mark + if (l >= 3 and b[0] == '\xEF' and b[1] == '\xBB' and b[2] == '\xBF') + { + cursor += 3; + } + } + + // delete because of pointer members + input_buffer_adapter(const input_buffer_adapter&) = delete; + input_buffer_adapter& operator=(input_buffer_adapter&) = delete; + + int get_character() noexcept override + { + if (JSON_LIKELY(cursor < limit)) + { + return *(cursor++) & 0xFF; + } + else + { + return std::char_traits::eof(); + } + } + + std::string read(size_t offset, size_t length) override + { + // avoid reading too many characters + const size_t max_length = static_cast(limit - start); + return std::string(start + offset, (std::min)(length, max_length - offset)); + } + + private: + /// pointer to the current character + const char* cursor; + /// pointer past the last character + const char* limit; + /// pointer to the first character + const char* start; + }; + + ////////////////////////////////////////// + // binary serialization/deserialization // + ////////////////////////////////////////// + + /// @name binary serialization/deserialization support + /// @{ + + private: + /*! + @brief deserialization of CBOR and MessagePack values + */ + class binary_reader + { + public: + /*! + @brief create a binary reader + + @param[in] adapter input adapter to read from + */ + explicit binary_reader(input_adapter_t adapter) + : ia(adapter), is_little_endian(little_endianess()) + { + assert(ia); + } + + /*! + @brief create a JSON value from CBOR input + + @param[in] get_char whether a new character should be retrieved from + the input (true, default) or whether the last + read character should be considered instead + + @return JSON value created from CBOR input + + @throw parse_error.110 if input ended unexpectedly + @throw parse_error.112 if unsupported byte was read + */ + basic_json parse_cbor(const bool get_char = true) + { + switch (get_char ? get() : current) + { + // EOF + case std::char_traits::eof(): + { + JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + } + + // Integer 0x00..0x17 (0..23) + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0a: + case 0x0b: + case 0x0c: + case 0x0d: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + { + return static_cast(current); + } + + case 0x18: // Unsigned integer (one-byte uint8_t follows) + { + return get_number(); + } + + case 0x19: // Unsigned integer (two-byte uint16_t follows) + { + return get_number(); + } + + case 0x1a: // Unsigned integer (four-byte uint32_t follows) + { + return get_number(); + } + + case 0x1b: // Unsigned integer (eight-byte uint64_t follows) + { + return get_number(); + } + + // Negative integer -1-0x00..-1-0x17 (-1..-24) + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + { + return static_cast(0x20 - 1 - current); + } + + case 0x38: // Negative integer (one-byte uint8_t follows) + { + // must be uint8_t ! + return static_cast(-1) - get_number(); + } + + case 0x39: // Negative integer -1-n (two-byte uint16_t follows) + { + return static_cast(-1) - get_number(); + } + + case 0x3a: // Negative integer -1-n (four-byte uint32_t follows) + { + return static_cast(-1) - get_number(); + } + + case 0x3b: // Negative integer -1-n (eight-byte uint64_t follows) + { + return static_cast(-1) - static_cast(get_number()); + } + + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + case 0x7a: // UTF-8 string (four-byte uint32_t for n follow) + case 0x7b: // UTF-8 string (eight-byte uint64_t for n follow) + case 0x7f: // UTF-8 string (indefinite length) + { + return get_cbor_string(); + } + + // array (0x00..0x17 data items follow) + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8a: + case 0x8b: + case 0x8c: + case 0x8d: + case 0x8e: + case 0x8f: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + { + basic_json result = value_t::array; + const auto len = static_cast(current & 0x1f); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_cbor()); + } + return result; + } + + case 0x98: // array (one-byte uint8_t for n follows) + { + basic_json result = value_t::array; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_cbor()); + } + return result; + } + + case 0x99: // array (two-byte uint16_t for n follow) + { + basic_json result = value_t::array; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_cbor()); + } + return result; + } + + case 0x9a: // array (four-byte uint32_t for n follow) + { + basic_json result = value_t::array; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_cbor()); + } + return result; + } + + case 0x9b: // array (eight-byte uint64_t for n follow) + { + basic_json result = value_t::array; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_cbor()); + } + return result; + } + + case 0x9f: // array (indefinite length) + { + basic_json result = value_t::array; + while (get() != 0xff) + { + result.push_back(parse_cbor(false)); + } + return result; + } + + // map (0x00..0x17 pairs of data items follow) + case 0xa0: + case 0xa1: + case 0xa2: + case 0xa3: + case 0xa4: + case 0xa5: + case 0xa6: + case 0xa7: + case 0xa8: + case 0xa9: + case 0xaa: + case 0xab: + case 0xac: + case 0xad: + case 0xae: + case 0xaf: + case 0xb0: + case 0xb1: + case 0xb2: + case 0xb3: + case 0xb4: + case 0xb5: + case 0xb6: + case 0xb7: + { + basic_json result = value_t::object; + const auto len = static_cast(current & 0x1f); + for (size_t i = 0; i < len; ++i) + { + get(); + auto key = get_cbor_string(); + result[key] = parse_cbor(); + } + return result; + } + + case 0xb8: // map (one-byte uint8_t for n follows) + { + basic_json result = value_t::object; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + get(); + auto key = get_cbor_string(); + result[key] = parse_cbor(); + } + return result; + } + + case 0xb9: // map (two-byte uint16_t for n follow) + { + basic_json result = value_t::object; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + get(); + auto key = get_cbor_string(); + result[key] = parse_cbor(); + } + return result; + } + + case 0xba: // map (four-byte uint32_t for n follow) + { + basic_json result = value_t::object; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + get(); + auto key = get_cbor_string(); + result[key] = parse_cbor(); + } + return result; + } + + case 0xbb: // map (eight-byte uint64_t for n follow) + { + basic_json result = value_t::object; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + get(); + auto key = get_cbor_string(); + result[key] = parse_cbor(); + } + return result; + } + + case 0xbf: // map (indefinite length) + { + basic_json result = value_t::object; + while (get() != 0xff) + { + auto key = get_cbor_string(); + result[key] = parse_cbor(); + } + return result; + } + + case 0xf4: // false + { + return false; + } + + case 0xf5: // true + { + return true; + } + + case 0xf6: // null + { + return value_t::null; + } + + case 0xf9: // Half-Precision Float (two-byte IEEE 754) + { + const int byte1 = get(); + check_eof(); + const int byte2 = get(); + check_eof(); + + // code from RFC 7049, Appendix D, Figure 3: + // As half-precision floating-point numbers were only added + // to IEEE 754 in 2008, today's programming platforms often + // still only have limited support for them. It is very + // easy to include at least decoding support for them even + // without such support. An example of a small decoder for + // half-precision floating-point numbers in the C language + // is shown in Fig. 3. + const int half = (byte1 << 8) + byte2; + const int exp = (half >> 10) & 0x1f; + const int mant = half & 0x3ff; + double val; + if (exp == 0) + { + val = std::ldexp(mant, -24); + } + else if (exp != 31) + { + val = std::ldexp(mant + 1024, exp - 25); + } + else + { + val = mant == 0 + ? std::numeric_limits::infinity() + : std::numeric_limits::quiet_NaN(); + } + return (half & 0x8000) != 0 ? -val : val; + } + + case 0xfa: // Single-Precision Float (four-byte IEEE 754) + { + return get_number(); + } + + case 0xfb: // Double-Precision Float (eight-byte IEEE 754) + { + return get_number(); + } + + default: // anything else (0xFF is handled inside the other types) + { + std::stringstream ss; + ss << std::setw(2) << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + ss.str())); + } + } + } + + /*! + @brief create a JSON value from MessagePack input + + @return JSON value created from MessagePack input + + @throw parse_error.110 if input ended unexpectedly + @throw parse_error.112 if unsupported byte was read + */ + basic_json parse_msgpack() + { + switch (get()) + { + // EOF + case std::char_traits::eof(): + { + JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + } + + // positive fixint + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0a: + case 0x0b: + case 0x0c: + case 0x0d: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3a: + case 0x3b: + case 0x3c: + case 0x3d: + case 0x3e: + case 0x3f: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5a: + case 0x5b: + case 0x5c: + case 0x5d: + case 0x5e: + case 0x5f: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7a: + case 0x7b: + case 0x7c: + case 0x7d: + case 0x7e: + case 0x7f: + { + return static_cast(current); + } + + // fixmap + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8a: + case 0x8b: + case 0x8c: + case 0x8d: + case 0x8e: + case 0x8f: + { + basic_json result = value_t::object; + const auto len = static_cast(current & 0x0f); + for (size_t i = 0; i < len; ++i) + { + get(); + auto key = get_msgpack_string(); + result[key] = parse_msgpack(); + } + return result; + } + + // fixarray + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9a: + case 0x9b: + case 0x9c: + case 0x9d: + case 0x9e: + case 0x9f: + { + basic_json result = value_t::array; + const auto len = static_cast(current & 0x0f); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_msgpack()); + } + return result; + } + + // fixstr + case 0xa0: + case 0xa1: + case 0xa2: + case 0xa3: + case 0xa4: + case 0xa5: + case 0xa6: + case 0xa7: + case 0xa8: + case 0xa9: + case 0xaa: + case 0xab: + case 0xac: + case 0xad: + case 0xae: + case 0xaf: + case 0xb0: + case 0xb1: + case 0xb2: + case 0xb3: + case 0xb4: + case 0xb5: + case 0xb6: + case 0xb7: + case 0xb8: + case 0xb9: + case 0xba: + case 0xbb: + case 0xbc: + case 0xbd: + case 0xbe: + case 0xbf: + { + return get_msgpack_string(); + } + + case 0xc0: // nil + { + return value_t::null; + } + + case 0xc2: // false + { + return false; + } + + case 0xc3: // true + { + return true; + } + + case 0xca: // float 32 + { + return get_number(); + } + + case 0xcb: // float 64 + { + return get_number(); + } + + case 0xcc: // uint 8 + { + return get_number(); + } + + case 0xcd: // uint 16 + { + return get_number(); + } + + case 0xce: // uint 32 + { + return get_number(); + } + + case 0xcf: // uint 64 + { + return get_number(); + } + + case 0xd0: // int 8 + { + return get_number(); + } + + case 0xd1: // int 16 + { + return get_number(); + } + + case 0xd2: // int 32 + { + return get_number(); + } + + case 0xd3: // int 64 + { + return get_number(); + } + + case 0xd9: // str 8 + case 0xda: // str 16 + case 0xdb: // str 32 + { + return get_msgpack_string(); + } + + case 0xdc: // array 16 + { + basic_json result = value_t::array; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_msgpack()); + } + return result; + } + + case 0xdd: // array 32 + { + basic_json result = value_t::array; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + result.push_back(parse_msgpack()); + } + return result; + } + + case 0xde: // map 16 + { + basic_json result = value_t::object; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + get(); + auto key = get_msgpack_string(); + result[key] = parse_msgpack(); + } + return result; + } + + case 0xdf: // map 32 + { + basic_json result = value_t::object; + const auto len = static_cast(get_number()); + for (size_t i = 0; i < len; ++i) + { + get(); + auto key = get_msgpack_string(); + result[key] = parse_msgpack(); + } + return result; + } + + // positive fixint + case 0xe0: + case 0xe1: + case 0xe2: + case 0xe3: + case 0xe4: + case 0xe5: + case 0xe6: + case 0xe7: + case 0xe8: + case 0xe9: + case 0xea: + case 0xeb: + case 0xec: + case 0xed: + case 0xee: + case 0xef: + case 0xf0: + case 0xf1: + case 0xf2: + case 0xf3: + case 0xf4: + case 0xf5: + case 0xf6: + case 0xf7: + case 0xf8: + case 0xf9: + case 0xfa: + case 0xfb: + case 0xfc: + case 0xfd: + case 0xfe: + case 0xff: + { + return static_cast(current); + } + + default: // anything else + { + std::stringstream ss; + ss << std::setw(2) << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(112, chars_read, "error reading MessagePack; last byte: 0x" + ss.str())); + } + } + } + + /*! + @brief determine system byte order + + @return true iff system's byte order is little endian + + @note from http://stackoverflow.com/a/1001328/266378 + */ + static bool little_endianess() noexcept + { + int num = 1; + return (*reinterpret_cast(&num) == 1); + } + + private: + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns + `std::char_traits::eof()` in that case. + + @return character read from the input + */ + int get() + { + ++chars_read; + return (current = ia->get_character()); + } + + /* + @brief read a number from the input + + @tparam T the type of the number + + @return number of type @a T + + @note This function needs to respect the system's endianess, because + bytes in CBOR and MessagePack are stored in network order (big + endian) and therefore need reordering on little endian systems. + + @throw parse_error.110 if input has less than `sizeof(T)` bytes + */ + template + T get_number() + { + // step 1: read input into array with system's byte order + std::array vec; + for (size_t i = 0; i < sizeof(T); ++i) + { + get(); + check_eof(); + + // reverse byte order prior to conversion if necessary + if (is_little_endian) + { + vec[sizeof(T) - i - 1] = static_cast(current); + } + else + { + vec[i] = static_cast(current); // LCOV_EXCL_LINE + } + } + + // step 2: convert array into number of type T and return + T result; + std::memcpy(&result, vec.data(), sizeof(T)); + return result; + } + + /*! + @brief create a string by reading characters from the input + + @param[in] len number of bytes to read + + @return string created by reading @a len bytes + + @throw parse_error.110 if input has less than @a len bytes + */ + std::string get_string(const size_t len) + { + std::string result; + for (size_t i = 0; i < len; ++i) + { + get(); + check_eof(); + result.append(1, static_cast(current)); + } + return result; + } + + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @return string + + @throw parse_error.110 if input ended + @throw parse_error.113 if an unexpexted byte is read + */ + std::string get_cbor_string() + { + check_eof(); + + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + const auto len = static_cast(current & 0x1f); + return get_string(len); + } + + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + case 0x7a: // UTF-8 string (four-byte uint32_t for n follow) + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + case 0x7b: // UTF-8 string (eight-byte uint64_t for n follow) + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + case 0x7f: // UTF-8 string (indefinite length) + { + std::string result; + while (get() != 0xff) + { + check_eof(); + result.append(1, static_cast(current)); + } + return result; + } + + default: + { + std::stringstream ss; + ss << std::setw(2) << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(113, chars_read, "expected a CBOR string; last byte: 0x" + ss.str())); + } + } + } + + /*! + @brief reads a MessagePack string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + + @return string + + @throw parse_error.110 if input ended + @throw parse_error.113 if an unexpexted byte is read + */ + std::string get_msgpack_string() + { + check_eof(); + + switch (current) + { + // fixstr + case 0xa0: + case 0xa1: + case 0xa2: + case 0xa3: + case 0xa4: + case 0xa5: + case 0xa6: + case 0xa7: + case 0xa8: + case 0xa9: + case 0xaa: + case 0xab: + case 0xac: + case 0xad: + case 0xae: + case 0xaf: + case 0xb0: + case 0xb1: + case 0xb2: + case 0xb3: + case 0xb4: + case 0xb5: + case 0xb6: + case 0xb7: + case 0xb8: + case 0xb9: + case 0xba: + case 0xbb: + case 0xbc: + case 0xbd: + case 0xbe: + case 0xbf: + { + const auto len = static_cast(current & 0x1f); + return get_string(len); + } + + case 0xd9: // str 8 + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + case 0xda: // str 16 + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + case 0xdb: // str 32 + { + const auto len = static_cast(get_number()); + return get_string(len); + } + + default: + { + std::stringstream ss; + ss << std::setw(2) << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(113, chars_read, "expected a MessagePack string; last byte: 0x" + ss.str())); + } + } + } + + /*! + @brief check if input ended + @throw parse_error.110 if input ended + */ + void check_eof() const + { + if (JSON_UNLIKELY(current == std::char_traits::eof())) + { + JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + } + } + + private: + /// input adapter + input_adapter_t ia = nullptr; + + /// the current character + int current = std::char_traits::eof(); + + /// the number of characters read + size_t chars_read = 0; + + /// whether we can assume little endianess + const bool is_little_endian = true; + }; + + /*! + @brief serialization to CBOR and MessagePack values + */ + class binary_writer + { + public: + /*! + @brief create a binary writer + + @param[in] adapter output adapter to write to + */ + explicit binary_writer(output_adapter_t adapter) + : is_little_endian(binary_reader::little_endianess()), oa(adapter) + { + assert(oa); + } + + /*! + @brief[in] j JSON value to serialize + */ + void write_cbor(const basic_json& j) + { + switch (j.type()) + { + case value_t::null: + { + oa->write_character(0xf6); + break; + } + + case value_t::boolean: + { + oa->write_character(j.m_value.boolean ? 0xf5 : 0xf4); + break; + } + + case value_t::number_integer: + { + if (j.m_value.number_integer >= 0) + { + // CBOR does not differentiate between positive signed + // integers and unsigned integers. Therefore, we used the + // code from the value_t::number_unsigned case here. + if (j.m_value.number_integer <= 0x17) + { + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer <= (std::numeric_limits::max)()) + { + oa->write_character(0x18); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer <= (std::numeric_limits::max)()) + { + oa->write_character(0x19); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer <= (std::numeric_limits::max)()) + { + oa->write_character(0x1a); + write_number(static_cast(j.m_value.number_integer)); + } + else + { + oa->write_character(0x1b); + write_number(static_cast(j.m_value.number_integer)); + } + } + else + { + // The conversions below encode the sign in the first + // byte, and the value is converted to a positive number. + const auto positive_number = -1 - j.m_value.number_integer; + if (j.m_value.number_integer >= -24) + { + write_number(static_cast(0x20 + positive_number)); + } + else if (positive_number <= (std::numeric_limits::max)()) + { + oa->write_character(0x38); + write_number(static_cast(positive_number)); + } + else if (positive_number <= (std::numeric_limits::max)()) + { + oa->write_character(0x39); + write_number(static_cast(positive_number)); + } + else if (positive_number <= (std::numeric_limits::max)()) + { + oa->write_character(0x3a); + write_number(static_cast(positive_number)); + } + else + { + oa->write_character(0x3b); + write_number(static_cast(positive_number)); + } + } + break; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned <= 0x17) + { + write_number(static_cast(j.m_value.number_unsigned)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + oa->write_character(0x18); + write_number(static_cast(j.m_value.number_unsigned)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + oa->write_character(0x19); + write_number(static_cast(j.m_value.number_unsigned)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + oa->write_character(0x1a); + write_number(static_cast(j.m_value.number_unsigned)); + } + else + { + oa->write_character(0x1b); + write_number(static_cast(j.m_value.number_unsigned)); + } + break; + } + + case value_t::number_float: + { + // Double-Precision Float + oa->write_character(0xfb); + write_number(j.m_value.number_float); + break; + } + + case value_t::string: + { + // step 1: write control byte and the string length + const auto N = j.m_value.string->size(); + if (N <= 0x17) + { + write_number(static_cast(0x60 + N)); + } + else if (N <= 0xff) + { + oa->write_character(0x78); + write_number(static_cast(N)); + } + else if (N <= 0xffff) + { + oa->write_character(0x79); + write_number(static_cast(N)); + } + else if (N <= 0xffffffff) + { + oa->write_character(0x7a); + write_number(static_cast(N)); + } + // LCOV_EXCL_START + else if (N <= 0xffffffffffffffff) + { + oa->write_character(0x7b); + write_number(static_cast(N)); + } + // LCOV_EXCL_STOP + + // step 2: write the string + oa->write_characters(reinterpret_cast(j.m_value.string->c_str()), + j.m_value.string->size()); + break; + } + + case value_t::array: + { + // step 1: write control byte and the array size + const auto N = j.m_value.array->size(); + if (N <= 0x17) + { + write_number(static_cast(0x80 + N)); + } + else if (N <= 0xff) + { + oa->write_character(0x98); + write_number(static_cast(N)); + } + else if (N <= 0xffff) + { + oa->write_character(0x99); + write_number(static_cast(N)); + } + else if (N <= 0xffffffff) + { + oa->write_character(0x9a); + write_number(static_cast(N)); + } + // LCOV_EXCL_START + else if (N <= 0xffffffffffffffff) + { + oa->write_character(0x9b); + write_number(static_cast(N)); + } + // LCOV_EXCL_STOP + + // step 2: write each element + for (const auto& el : *j.m_value.array) + { + write_cbor(el); + } + break; + } + + case value_t::object: + { + // step 1: write control byte and the object size + const auto N = j.m_value.object->size(); + if (N <= 0x17) + { + write_number(static_cast(0xa0 + N)); + } + else if (N <= 0xff) + { + oa->write_character(0xb8); + write_number(static_cast(N)); + } + else if (N <= 0xffff) + { + oa->write_character(0xb9); + write_number(static_cast(N)); + } + else if (N <= 0xffffffff) + { + oa->write_character(0xba); + write_number(static_cast(N)); + } + // LCOV_EXCL_START + else if (N <= 0xffffffffffffffff) + { + oa->write_character(0xbb); + write_number(static_cast(N)); + } + // LCOV_EXCL_STOP + + // step 2: write each element + for (const auto& el : *j.m_value.object) + { + write_cbor(el.first); + write_cbor(el.second); + } + break; + } + + default: + { + break; + } + } + } + + /*! + @brief[in] j JSON value to serialize + */ + void write_msgpack(const basic_json& j) + { + switch (j.type()) + { + case value_t::null: + { + // nil + oa->write_character(0xc0); + break; + } + + case value_t::boolean: + { + // true and false + oa->write_character(j.m_value.boolean ? 0xc3 : 0xc2); + break; + } + + case value_t::number_integer: + { + if (j.m_value.number_integer >= 0) + { + // MessagePack does not differentiate between positive + // signed integers and unsigned integers. Therefore, we + // used the code from the value_t::number_unsigned case + // here. + if (j.m_value.number_unsigned < 128) + { + // positive fixnum + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 8 + oa->write_character(0xcc); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 16 + oa->write_character(0xcd); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 32 + oa->write_character(0xce); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 64 + oa->write_character(0xcf); + write_number(static_cast(j.m_value.number_integer)); + } + } + else + { + if (j.m_value.number_integer >= -32) + { + // negative fixnum + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + // int 8 + oa->write_character(0xd0); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + // int 16 + oa->write_character(0xd1); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + // int 32 + oa->write_character(0xd2); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + // int 64 + oa->write_character(0xd3); + write_number(static_cast(j.m_value.number_integer)); + } + } + break; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned < 128) + { + // positive fixnum + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 8 + oa->write_character(0xcc); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 16 + oa->write_character(0xcd); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 32 + oa->write_character(0xce); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 64 + oa->write_character(0xcf); + write_number(static_cast(j.m_value.number_integer)); + } + break; + } + + case value_t::number_float: + { + // float 64 + oa->write_character(0xcb); + write_number(j.m_value.number_float); + break; + } + + case value_t::string: + { + // step 1: write control byte and the string length + const auto N = j.m_value.string->size(); + if (N <= 31) + { + // fixstr + write_number(static_cast(0xa0 | N)); + } + else if (N <= 255) + { + // str 8 + oa->write_character(0xd9); + write_number(static_cast(N)); + } + else if (N <= 65535) + { + // str 16 + oa->write_character(0xda); + write_number(static_cast(N)); + } + else if (N <= 4294967295) + { + // str 32 + oa->write_character(0xdb); + write_number(static_cast(N)); + } + + // step 2: write the string + oa->write_characters(reinterpret_cast(j.m_value.string->c_str()), + j.m_value.string->size()); + break; + } + + case value_t::array: + { + // step 1: write control byte and the array size + const auto N = j.m_value.array->size(); + if (N <= 15) + { + // fixarray + write_number(static_cast(0x90 | N)); + } + else if (N <= 0xffff) + { + // array 16 + oa->write_character(0xdc); + write_number(static_cast(N)); + } + else if (N <= 0xffffffff) + { + // array 32 + oa->write_character(0xdd); + write_number(static_cast(N)); + } + + // step 2: write each element + for (const auto& el : *j.m_value.array) + { + write_msgpack(el); + } + break; + } + + case value_t::object: + { + // step 1: write control byte and the object size + const auto N = j.m_value.object->size(); + if (N <= 15) + { + // fixmap + write_number(static_cast(0x80 | (N & 0xf))); + } + else if (N <= 65535) + { + // map 16 + oa->write_character(0xde); + write_number(static_cast(N)); + } + else if (N <= 4294967295) + { + // map 32 + oa->write_character(0xdf); + write_number(static_cast(N)); + } + + // step 2: write each element + for (const auto& el : *j.m_value.object) + { + write_msgpack(el.first); + write_msgpack(el.second); + } + break; + } + + default: + { + break; + } + } + } + + private: + /* + @brief write a number to output input + + @param[in] n number of type @a T + @tparam T the type of the number + + @note This function needs to respect the system's endianess, because + bytes in CBOR and MessagePack are stored in network order (big + endian) and therefore need reordering on little endian systems. + */ + template + void write_number(T n) + { + // step 1: write number to array of length T + std::array vec; + std::memcpy(vec.data(), &n, sizeof(T)); + + // step 2: write array to output (with possible reordering) + for (size_t i = 0; i < sizeof(T); ++i) + { + // reverse byte order prior to conversion if necessary + if (is_little_endian) + { + oa->write_character(vec[sizeof(T) - i - 1]); + } + else + { + oa->write_character(vec[i]); // LCOV_EXCL_LINE + } + } + } + + private: + /// whether we can assume little endianess + const bool is_little_endian = true; + + /// the output + output_adapter_t oa = nullptr; + }; + + public: + /*! + @brief create a CBOR serialization of a given JSON value + + Serializes a given JSON value @a j to a byte vector using the CBOR (Concise + Binary Object Representation) serialization format. CBOR is a binary + serialization format which aims to be more compact than JSON itself, yet + more efficient to parse. + + The library uses the following mapping from JSON values types to + CBOR types according to the CBOR specification (RFC 7049): + + JSON value type | value/range | CBOR type | first byte + --------------- | ------------------------------------------ | ---------------------------------- | --------------- + null | `null` | Null | 0xf6 + boolean | `true` | True | 0xf5 + boolean | `false` | False | 0xf4 + number_integer | -9223372036854775808..-2147483649 | Negative integer (8 bytes follow) | 0x3b + number_integer | -2147483648..-32769 | Negative integer (4 bytes follow) | 0x3a + number_integer | -32768..-129 | Negative integer (2 bytes follow) | 0x39 + number_integer | -128..-25 | Negative integer (1 byte follow) | 0x38 + number_integer | -24..-1 | Negative integer | 0x20..0x37 + number_integer | 0..23 | Integer | 0x00..0x17 + number_integer | 24..255 | Unsigned integer (1 byte follow) | 0x18 + number_integer | 256..65535 | Unsigned integer (2 bytes follow) | 0x19 + number_integer | 65536..4294967295 | Unsigned integer (4 bytes follow) | 0x1a + number_integer | 4294967296..18446744073709551615 | Unsigned integer (8 bytes follow) | 0x1b + number_unsigned | 0..23 | Integer | 0x00..0x17 + number_unsigned | 24..255 | Unsigned integer (1 byte follow) | 0x18 + number_unsigned | 256..65535 | Unsigned integer (2 bytes follow) | 0x19 + number_unsigned | 65536..4294967295 | Unsigned integer (4 bytes follow) | 0x1a + number_unsigned | 4294967296..18446744073709551615 | Unsigned integer (8 bytes follow) | 0x1b + number_float | *any value* | Double-Precision Float | 0xfb + string | *length*: 0..23 | UTF-8 string | 0x60..0x77 + string | *length*: 23..255 | UTF-8 string (1 byte follow) | 0x78 + string | *length*: 256..65535 | UTF-8 string (2 bytes follow) | 0x79 + string | *length*: 65536..4294967295 | UTF-8 string (4 bytes follow) | 0x7a + string | *length*: 4294967296..18446744073709551615 | UTF-8 string (8 bytes follow) | 0x7b + array | *size*: 0..23 | array | 0x80..0x97 + array | *size*: 23..255 | array (1 byte follow) | 0x98 + array | *size*: 256..65535 | array (2 bytes follow) | 0x99 + array | *size*: 65536..4294967295 | array (4 bytes follow) | 0x9a + array | *size*: 4294967296..18446744073709551615 | array (8 bytes follow) | 0x9b + object | *size*: 0..23 | map | 0xa0..0xb7 + object | *size*: 23..255 | map (1 byte follow) | 0xb8 + object | *size*: 256..65535 | map (2 bytes follow) | 0xb9 + object | *size*: 65536..4294967295 | map (4 bytes follow) | 0xba + object | *size*: 4294967296..18446744073709551615 | map (8 bytes follow) | 0xbb + + @note The mapping is **complete** in the sense that any JSON value type + can be converted to a CBOR value. + + @note The following CBOR types are not used in the conversion: + - byte strings (0x40..0x5f) + - UTF-8 strings terminated by "break" (0x7f) + - arrays terminated by "break" (0x9f) + - maps terminated by "break" (0xbf) + - date/time (0xc0..0xc1) + - bignum (0xc2..0xc3) + - decimal fraction (0xc4) + - bigfloat (0xc5) + - tagged items (0xc6..0xd4, 0xd8..0xdb) + - expected conversions (0xd5..0xd7) + - simple values (0xe0..0xf3, 0xf8) + - undefined (0xf7) + - half and single-precision floats (0xf9-0xfa) + - break (0xff) + + @param[in] j JSON value to serialize + @return MessagePack serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @liveexample{The example shows the serialization of a JSON value to a byte + vector in CBOR format.,to_cbor} + + @sa http://cbor.io + @sa @ref from_cbor(const std::vector&, const size_t) for the + analogous deserialization + @sa @ref to_msgpack(const basic_json& for the related MessagePack format + + @since version 2.0.9 + */ + static std::vector to_cbor(const basic_json& j) + { + std::vector result; + binary_writer bw(output_adapter::create(result)); + bw.write_cbor(j); + return result; + } + + /*! + @brief create a MessagePack serialization of a given JSON value + + Serializes a given JSON value @a j to a byte vector using the MessagePack + serialization format. MessagePack is a binary serialization format which + aims to be more compact than JSON itself, yet more efficient to parse. + + The library uses the following mapping from JSON values types to + MessagePack types according to the MessagePack specification: + + JSON value type | value/range | MessagePack type | first byte + --------------- | --------------------------------- | ---------------- | ---------- + null | `null` | nil | 0xc0 + boolean | `true` | true | 0xc3 + boolean | `false` | false | 0xc2 + number_integer | -9223372036854775808..-2147483649 | int64 | 0xd3 + number_integer | -2147483648..-32769 | int32 | 0xd2 + number_integer | -32768..-129 | int16 | 0xd1 + number_integer | -128..-33 | int8 | 0xd0 + number_integer | -32..-1 | negative fixint | 0xe0..0xff + number_integer | 0..127 | positive fixint | 0x00..0x7f + number_integer | 128..255 | uint 8 | 0xcc + number_integer | 256..65535 | uint 16 | 0xcd + number_integer | 65536..4294967295 | uint 32 | 0xce + number_integer | 4294967296..18446744073709551615 | uint 64 | 0xcf + number_unsigned | 0..127 | positive fixint | 0x00..0x7f + number_unsigned | 128..255 | uint 8 | 0xcc + number_unsigned | 256..65535 | uint 16 | 0xcd + number_unsigned | 65536..4294967295 | uint 32 | 0xce + number_unsigned | 4294967296..18446744073709551615 | uint 64 | 0xcf + number_float | *any value* | float 64 | 0xcb + string | *length*: 0..31 | fixstr | 0xa0..0xbf + string | *length*: 32..255 | str 8 | 0xd9 + string | *length*: 256..65535 | str 16 | 0xda + string | *length*: 65536..4294967295 | str 32 | 0xdb + array | *size*: 0..15 | fixarray | 0x90..0x9f + array | *size*: 16..65535 | array 16 | 0xdc + array | *size*: 65536..4294967295 | array 32 | 0xdd + object | *size*: 0..15 | fix map | 0x80..0x8f + object | *size*: 16..65535 | map 16 | 0xde + object | *size*: 65536..4294967295 | map 32 | 0xdf + + @note The mapping is **complete** in the sense that any JSON value type + can be converted to a MessagePack value. + + @note The following values can **not** be converted to a MessagePack value: + - strings with more than 4294967295 bytes + - arrays with more than 4294967295 elements + - objects with more than 4294967295 elements + + @note The following MessagePack types are not used in the conversion: + - bin 8 - bin 32 (0xc4..0xc6) + - ext 8 - ext 32 (0xc7..0xc9) + - float 32 (0xca) + - fixext 1 - fixext 16 (0xd4..0xd8) + + @note Any MessagePack output created @ref to_msgpack can be successfully + parsed by @ref from_msgpack. + + @param[in] j JSON value to serialize + @return MessagePack serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @liveexample{The example shows the serialization of a JSON value to a byte + vector in MessagePack format.,to_msgpack} + + @sa http://msgpack.org + @sa @ref from_msgpack(const std::vector&, const size_t) for the + analogous deserialization + @sa @ref to_cbor(const basic_json& for the related CBOR format + + @since version 2.0.9 + */ + static std::vector to_msgpack(const basic_json& j) + { + std::vector result; + binary_writer bw(output_adapter::create(result)); + bw.write_msgpack(j); + return result; + } + + /*! + @brief create a JSON value from a byte vector in CBOR format + + Deserializes a given byte vector @a v to a JSON value using the CBOR + (Concise Binary Object Representation) serialization format. + + The library maps CBOR types to JSON value types as follows: + + CBOR type | JSON value type | first byte + ---------------------- | --------------- | ---------- + Integer | number_unsigned | 0x00..0x17 + Unsigned integer | number_unsigned | 0x18 + Unsigned integer | number_unsigned | 0x19 + Unsigned integer | number_unsigned | 0x1a + Unsigned integer | number_unsigned | 0x1b + Negative integer | number_integer | 0x20..0x37 + Negative integer | number_integer | 0x38 + Negative integer | number_integer | 0x39 + Negative integer | number_integer | 0x3a + Negative integer | number_integer | 0x3b + Negative integer | number_integer | 0x40..0x57 + UTF-8 string | string | 0x60..0x77 + UTF-8 string | string | 0x78 + UTF-8 string | string | 0x79 + UTF-8 string | string | 0x7a + UTF-8 string | string | 0x7b + UTF-8 string | string | 0x7f + array | array | 0x80..0x97 + array | array | 0x98 + array | array | 0x99 + array | array | 0x9a + array | array | 0x9b + array | array | 0x9f + map | object | 0xa0..0xb7 + map | object | 0xb8 + map | object | 0xb9 + map | object | 0xba + map | object | 0xbb + map | object | 0xbf + False | `false` | 0xf4 + True | `true` | 0xf5 + Nill | `null` | 0xf6 + Half-Precision Float | number_float | 0xf9 + Single-Precision Float | number_float | 0xfa + Double-Precision Float | number_float | 0xfb + + @warning The mapping is **incomplete** in the sense that not all CBOR + types can be converted to a JSON value. The following CBOR types + are not supported and will yield parse errors (parse_error.112): + - byte strings (0x40..0x5f) + - date/time (0xc0..0xc1) + - bignum (0xc2..0xc3) + - decimal fraction (0xc4) + - bigfloat (0xc5) + - tagged items (0xc6..0xd4, 0xd8..0xdb) + - expected conversions (0xd5..0xd7) + - simple values (0xe0..0xf3, 0xf8) + - undefined (0xf7) + + @warning CBOR allows map keys of any type, whereas JSON only allows + strings as keys in object values. Therefore, CBOR maps with keys + other than UTF-8 strings are rejected (parse_error.113). + + @note Any CBOR output created @ref to_cbor can be successfully parsed by + @ref from_cbor. + + @param[in] v a byte vector in CBOR format + @param[in] start_index the index to start reading from @a v (0 by default) + @return deserialized JSON value + + @throw parse_error.110 if the given vector ends prematurely + @throw parse_error.112 if unsupported features from CBOR were + used in the given vector @a v or if the input is not valid CBOR + @throw parse_error.113 if a string was expected as map key, but not found + + @complexity Linear in the size of the byte vector @a v. + + @liveexample{The example shows the deserialization of a byte vector in CBOR + format to a JSON value.,from_cbor} + + @sa http://cbor.io + @sa @ref to_cbor(const basic_json&) for the analogous serialization + @sa @ref from_msgpack(const std::vector&, const size_t) for the + related MessagePack format + + @since version 2.0.9, parameter @a start_index since 2.1.1 + */ + static basic_json from_cbor(const std::vector& v, + const size_t start_index = 0) + { + binary_reader br(input_adapter::create(v.begin() + static_cast(start_index), v.end())); + return br.parse_cbor(); + } + + + /*! + @brief create a JSON value from a byte vector in MessagePack format + + Deserializes a given byte vector @a v to a JSON value using the MessagePack + serialization format. + + The library maps MessagePack types to JSON value types as follows: + + MessagePack type | JSON value type | first byte + ---------------- | --------------- | ---------- + positive fixint | number_unsigned | 0x00..0x7f + fixmap | object | 0x80..0x8f + fixarray | array | 0x90..0x9f + fixstr | string | 0xa0..0xbf + nil | `null` | 0xc0 + false | `false` | 0xc2 + true | `true` | 0xc3 + float 32 | number_float | 0xca + float 64 | number_float | 0xcb + uint 8 | number_unsigned | 0xcc + uint 16 | number_unsigned | 0xcd + uint 32 | number_unsigned | 0xce + uint 64 | number_unsigned | 0xcf + int 8 | number_integer | 0xd0 + int 16 | number_integer | 0xd1 + int 32 | number_integer | 0xd2 + int 64 | number_integer | 0xd3 + str 8 | string | 0xd9 + str 16 | string | 0xda + str 32 | string | 0xdb + array 16 | array | 0xdc + array 32 | array | 0xdd + map 16 | object | 0xde + map 32 | object | 0xdf + negative fixint | number_integer | 0xe0-0xff + + @warning The mapping is **incomplete** in the sense that not all + MessagePack types can be converted to a JSON value. The following + MessagePack types are not supported and will yield parse errors: + - bin 8 - bin 32 (0xc4..0xc6) + - ext 8 - ext 32 (0xc7..0xc9) + - fixext 1 - fixext 16 (0xd4..0xd8) + + @note Any MessagePack output created @ref to_msgpack can be successfully + parsed by @ref from_msgpack. + + @param[in] v a byte vector in MessagePack format + @param[in] start_index the index to start reading from @a v (0 by default) + @return deserialized JSON value + + @throw parse_error.110 if the given vector ends prematurely + @throw parse_error.112 if unsupported features from MessagePack were + used in the given vector @a v or if the input is not valid MessagePack + @throw parse_error.113 if a string was expected as map key, but not found + + @complexity Linear in the size of the byte vector @a v. + + @liveexample{The example shows the deserialization of a byte vector in + MessagePack format to a JSON value.,from_msgpack} + + @sa http://msgpack.org + @sa @ref to_msgpack(const basic_json&) for the analogous serialization + @sa @ref from_cbor(const std::vector&, const size_t) for the + related CBOR format + + @since version 2.0.9, parameter @a start_index since 2.1.1 + */ + static basic_json from_msgpack(const std::vector& v, + const size_t start_index = 0) + { + binary_reader br(input_adapter::create(v.begin() + static_cast(start_index), v.end())); + return br.parse_msgpack(); + } + + /// @} + ////////////////////// // lexer and parser // ////////////////////// + private: /*! @brief lexical analysis - This class organizes the lexical analysis during JSON deserialization. The - core of it is a scanner generated by [re2c](http://re2c.org) that - processes a buffer and recognizes tokens according to RFC 7159. + This class organizes the lexical analysis during JSON deserialization. */ class lexer { @@ -9567,9 +11122,9 @@ class basic_json literal_false, ///< the `false` literal literal_null, ///< the `null` literal value_string, ///< a string -- use get_string() for actual value - value_unsigned, ///< an unsigned integer -- use get_number() for actual value - value_integer, ///< a signed integer -- use get_number() for actual value - value_float, ///< an floating point number -- use get_number() for actual value + value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value + value_integer, ///< a signed integer -- use get_number_integer() for actual value + value_float, ///< an floating point number -- use get_number_float() for actual value begin_array, ///< the character for array begin `[` begin_object, ///< the character for object begin `{` end_array, ///< the character for array end `]` @@ -9577,137 +11132,12 @@ class basic_json name_separator, ///< the name separator `:` value_separator, ///< the value separator `,` parse_error, ///< indicating a parse error - end_of_input ///< indicating the end of the input buffer + end_of_input, ///< indicating the end of the input buffer + literal_or_value ///< a literal or the begin of a value (only for diagnostics) }; - /// the char type to use in the lexer - using lexer_char_t = unsigned char; - - /// a lexer from a buffer with given length - lexer(const lexer_char_t* buff, const size_t len) noexcept - : m_content(buff) - { - assert(m_content != nullptr); - m_start = m_cursor = m_content; - m_limit = m_content + len; - } - - /// a lexer from an input stream - explicit lexer(std::istream& s) - : m_stream(&s), m_line_buffer() - { - // immediately abort if stream is erroneous - if (s.fail()) - { - JSON_THROW(std::invalid_argument("stream error")); - } - - // fill buffer - fill_line_buffer(); - - // skip UTF-8 byte-order mark - if (m_line_buffer.size() >= 3 and m_line_buffer.substr(0, 3) == "\xEF\xBB\xBF") - { - m_line_buffer[0] = ' '; - m_line_buffer[1] = ' '; - m_line_buffer[2] = ' '; - } - } - - // switch off unwanted functions (due to pointer members) - lexer() = delete; - lexer(const lexer&) = delete; - lexer operator=(const lexer&) = delete; - - /*! - @brief create a string from one or two Unicode code points - - There are two cases: (1) @a codepoint1 is in the Basic Multilingual - Plane (U+0000 through U+FFFF) and @a codepoint2 is 0, or (2) - @a codepoint1 and @a codepoint2 are a UTF-16 surrogate pair to - represent a code point above U+FFFF. - - @param[in] codepoint1 the code point (can be high surrogate) - @param[in] codepoint2 the code point (can be low surrogate or 0) - - @return string representation of the code point; the length of the - result string is between 1 and 4 characters. - - @throw std::out_of_range if code point is > 0x10ffff; example: `"code - points above 0x10FFFF are invalid"` - @throw std::invalid_argument if the low surrogate is invalid; example: - `""missing or wrong low surrogate""` - - @complexity Constant. - - @see - */ - static string_t to_unicode(const std::size_t codepoint1, - const std::size_t codepoint2 = 0) - { - // calculate the code point from the given code points - std::size_t codepoint = codepoint1; - - // check if codepoint1 is a high surrogate - if (codepoint1 >= 0xD800 and codepoint1 <= 0xDBFF) - { - // check if codepoint2 is a low surrogate - if (codepoint2 >= 0xDC00 and codepoint2 <= 0xDFFF) - { - codepoint = - // high surrogate occupies the most significant 22 bits - (codepoint1 << 10) - // low surrogate occupies the least significant 15 bits - + codepoint2 - // there is still the 0xD800, 0xDC00 and 0x10000 noise - // in the result so we have to subtract with: - // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 - - 0x35FDC00; - } - else - { - JSON_THROW(std::invalid_argument("missing or wrong low surrogate")); - } - } - - string_t result; - - if (codepoint < 0x80) - { - // 1-byte characters: 0xxxxxxx (ASCII) - result.append(1, static_cast(codepoint)); - } - else if (codepoint <= 0x7ff) - { - // 2-byte characters: 110xxxxx 10xxxxxx - result.append(1, static_cast(0xC0 | ((codepoint >> 6) & 0x1F))); - result.append(1, static_cast(0x80 | (codepoint & 0x3F))); - } - else if (codepoint <= 0xffff) - { - // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx - result.append(1, static_cast(0xE0 | ((codepoint >> 12) & 0x0F))); - result.append(1, static_cast(0x80 | ((codepoint >> 6) & 0x3F))); - result.append(1, static_cast(0x80 | (codepoint & 0x3F))); - } - else if (codepoint <= 0x10ffff) - { - // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - result.append(1, static_cast(0xF0 | ((codepoint >> 18) & 0x07))); - result.append(1, static_cast(0x80 | ((codepoint >> 12) & 0x3F))); - result.append(1, static_cast(0x80 | ((codepoint >> 6) & 0x3F))); - result.append(1, static_cast(0x80 | (codepoint & 0x3F))); - } - else - { - JSON_THROW(std::out_of_range("code points above 0x10FFFF are invalid")); - } - - return result; - } - /// return name of values of type token_type (only used for errors) - static std::string token_type_name(const token_type t) + static const char* token_type_name(const token_type t) noexcept { switch (t) { @@ -9741,6 +11171,8 @@ class basic_json return ""; case token_type::end_of_input: return "end of input"; + case token_type::literal_or_value: + return "'[', '{', or a literal"; default: { // catch non-enum values @@ -9749,1600 +11181,1438 @@ class basic_json } } - /*! - This function implements a scanner for JSON. It is specified using - regular expressions that try to follow RFC 7159 as close as possible. - These regular expressions are then translated into a minimized - deterministic finite automaton (DFA) by the tool - [re2c](http://re2c.org). As a result, the translated code for this - function consists of a large block of code with `goto` jumps. + explicit lexer(input_adapter_t adapter) + : ia(adapter), decimal_point_char(get_decimal_point()) + {} - @return the class of the next token read from the buffer + // delete because of pointer members + lexer(const lexer&) = delete; + lexer& operator=(lexer&) = delete; - @complexity Linear in the length of the input.\n + private: + ///////////////////// + // locales + ///////////////////// - Proposition: The loop below will always terminate for finite input.\n - - Proof (by contradiction): Assume a finite input. To loop forever, the - loop must never hit code with a `break` statement. The only code - snippets without a `break` statement are the continue statements for - whitespace and byte-order-marks. To loop forever, the input must be an - infinite sequence of whitespace or byte-order-marks. This contradicts - the assumption of finite input, q.e.d. - */ - token_type scan() + /// return the locale-dependent decimal point + static char get_decimal_point() noexcept { + const auto loc = localeconv(); + assert(loc != nullptr); + return (loc->decimal_point == nullptr) ? '.' : loc->decimal_point[0]; + } + + ///////////////////// + // scan functions + ///////////////////// + + /*! + @brief get codepoint from 4 hex characters following `\u` + + @return codepoint or -1 in case of an error (e.g. EOF or non-hex + character) + */ + int get_codepoint() + { + // this function only makes sense after reading `\u` + assert(current == 'u'); + int codepoint = 0; + + // byte 1: \uXxxx + switch (get()) + { + case '0': + break; + case '1': + codepoint += 0x1000; + break; + case '2': + codepoint += 0x2000; + break; + case '3': + codepoint += 0x3000; + break; + case '4': + codepoint += 0x4000; + break; + case '5': + codepoint += 0x5000; + break; + case '6': + codepoint += 0x6000; + break; + case '7': + codepoint += 0x7000; + break; + case '8': + codepoint += 0x8000; + break; + case '9': + codepoint += 0x9000; + break; + case 'A': + case 'a': + codepoint += 0xa000; + break; + case 'B': + case 'b': + codepoint += 0xb000; + break; + case 'C': + case 'c': + codepoint += 0xc000; + break; + case 'D': + case 'd': + codepoint += 0xd000; + break; + case 'E': + case 'e': + codepoint += 0xe000; + break; + case 'F': + case 'f': + codepoint += 0xf000; + break; + default: + return -1; + } + + // byte 2: \uxXxx + switch (get()) + { + case '0': + break; + case '1': + codepoint += 0x0100; + break; + case '2': + codepoint += 0x0200; + break; + case '3': + codepoint += 0x0300; + break; + case '4': + codepoint += 0x0400; + break; + case '5': + codepoint += 0x0500; + break; + case '6': + codepoint += 0x0600; + break; + case '7': + codepoint += 0x0700; + break; + case '8': + codepoint += 0x0800; + break; + case '9': + codepoint += 0x0900; + break; + case 'A': + case 'a': + codepoint += 0x0a00; + break; + case 'B': + case 'b': + codepoint += 0x0b00; + break; + case 'C': + case 'c': + codepoint += 0x0c00; + break; + case 'D': + case 'd': + codepoint += 0x0d00; + break; + case 'E': + case 'e': + codepoint += 0x0e00; + break; + case 'F': + case 'f': + codepoint += 0x0f00; + break; + default: + return -1; + } + + // byte 3: \uxxXx + switch (get()) + { + case '0': + break; + case '1': + codepoint += 0x0010; + break; + case '2': + codepoint += 0x0020; + break; + case '3': + codepoint += 0x0030; + break; + case '4': + codepoint += 0x0040; + break; + case '5': + codepoint += 0x0050; + break; + case '6': + codepoint += 0x0060; + break; + case '7': + codepoint += 0x0070; + break; + case '8': + codepoint += 0x0080; + break; + case '9': + codepoint += 0x0090; + break; + case 'A': + case 'a': + codepoint += 0x00a0; + break; + case 'B': + case 'b': + codepoint += 0x00b0; + break; + case 'C': + case 'c': + codepoint += 0x00c0; + break; + case 'D': + case 'd': + codepoint += 0x00d0; + break; + case 'E': + case 'e': + codepoint += 0x00e0; + break; + case 'F': + case 'f': + codepoint += 0x00f0; + break; + default: + return -1; + } + + // byte 4: \uxxxX + switch (get()) + { + case '0': + break; + case '1': + codepoint += 0x0001; + break; + case '2': + codepoint += 0x0002; + break; + case '3': + codepoint += 0x0003; + break; + case '4': + codepoint += 0x0004; + break; + case '5': + codepoint += 0x0005; + break; + case '6': + codepoint += 0x0006; + break; + case '7': + codepoint += 0x0007; + break; + case '8': + codepoint += 0x0008; + break; + case '9': + codepoint += 0x0009; + break; + case 'A': + case 'a': + codepoint += 0x000a; + break; + case 'B': + case 'b': + codepoint += 0x000b; + break; + case 'C': + case 'c': + codepoint += 0x000c; + break; + case 'D': + case 'd': + codepoint += 0x000d; + break; + case 'E': + case 'e': + codepoint += 0x000e; + break; + case 'F': + case 'f': + codepoint += 0x000f; + break; + default: + return -1; + } + + return codepoint; + } + + /*! + @brief scan a string literal + + This function scans a string according to Sect. 7 of RFC 7159. While + scanning, bytes are escaped and copied into buffer yytext. Then the + function returns successfully, yytext is null-terminated and yylen + contains the number of bytes in the string. + + @return token_type::value_string if string could be successfully + scanned, token_type::parse_error otherwise + + @note In case of errors, variable error_message contains a textual + description. + */ + token_type scan_string() + { + // reset yytext (ignore opening quote) + reset(); + + // we entered the function by reading an open quote + assert(current == '\"'); + while (true) { - // pointer for backtracking information - m_marker = nullptr; - - // remember the begin of the token - m_start = m_cursor; - assert(m_start != nullptr); - - + // get next character + switch (get()) { - lexer_char_t yych; - unsigned int yyaccept = 0; - static const unsigned char yybm[] = + // end of file while parsing string + case std::char_traits::eof(): { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 32, 32, 0, 0, 32, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 160, 128, 0, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 0, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }; - if ((m_limit - m_cursor) < 5) - { - fill_line_buffer(5); // LCOV_EXCL_LINE + error_message = "invalid string: missing closing quote"; + return token_type::parse_error; } - yych = *m_cursor; - if (yybm[0 + yych] & 32) + + // closing quote + case '\"': { - goto basic_json_parser_6; + // terminate yytext + add('\0'); + --yylen; + return token_type::value_string; } - if (yych <= '[') + + // escapes + case '\\': { - if (yych <= '-') + switch (get()) { - if (yych <= '"') + // quotation mark + case '\"': + add('\"'); + break; + // reverse solidus + case '\\': + add('\\'); + break; + // solidus + case '/': + add('/'); + break; + // backspace + case 'b': + add('\b'); + break; + // form feed + case 'f': + add('\f'); + break; + // line feed + case 'n': + add('\n'); + break; + // carriage return + case 'r': + add('\r'); + break; + // tab + case 't': + add('\t'); + break; + + // unicode escapes + case 'u': { - if (yych <= 0x00) + int codepoint; + int codepoint1 = get_codepoint(); + + if (JSON_UNLIKELY(codepoint1 == -1)) { - goto basic_json_parser_2; + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; } - if (yych <= '!') + + // check if code point is a high surrogate + if (0xD800 <= codepoint1 and codepoint1 <= 0xDBFF) { - goto basic_json_parser_4; + // expect next \uxxxx entry + if (JSON_LIKELY(get() == '\\' and get() == 'u')) + { + const int codepoint2 = get_codepoint(); + + if (JSON_UNLIKELY(codepoint2 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } + + // check if codepoint2 is a low surrogate + if (JSON_LIKELY(0xDC00 <= codepoint2 and codepoint2 <= 0xDFFF)) + { + codepoint = + // high surrogate occupies the most significant 22 bits + (codepoint1 << 10) + // low surrogate occupies the least significant 15 bits + + codepoint2 + // there is still the 0xD800, 0xDC00 and 0x10000 noise + // in the result so we have to subtract with: + // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 + - 0x35FDC00; + } + else + { + error_message = "invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF"; + return token_type::parse_error; + } + } + else + { + error_message = "invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF"; + return token_type::parse_error; + } } - goto basic_json_parser_9; - } - else - { - if (yych <= '+') - { - goto basic_json_parser_4; - } - if (yych <= ',') - { - goto basic_json_parser_10; - } - goto basic_json_parser_12; + else + { + if (JSON_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF)) + { + error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"; + return token_type::parse_error; + } + + // only work with first code point + codepoint = codepoint1; + } + + // result of the above calculation yields a proper codepoint + assert(0x00 <= codepoint and codepoint <= 0x10FFFF); + + // translate code point to bytes + if (codepoint < 0x80) + { + // 1-byte characters: 0xxxxxxx (ASCII) + add(codepoint); + } + else if (codepoint <= 0x7ff) + { + // 2-byte characters: 110xxxxx 10xxxxxx + add(0xC0 | (codepoint >> 6)); + add(0x80 | (codepoint & 0x3F)); + } + else if (codepoint <= 0xffff) + { + // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx + add(0xE0 | (codepoint >> 12)); + add(0x80 | ((codepoint >> 6) & 0x3F)); + add(0x80 | (codepoint & 0x3F)); + } + else + { + // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + add(0xF0 | (codepoint >> 18)); + add(0x80 | ((codepoint >> 12) & 0x3F)); + add(0x80 | ((codepoint >> 6) & 0x3F)); + add(0x80 | (codepoint & 0x3F)); + } + + break; } + + // other characters after escape + default: + error_message = "invalid string: forbidden character after backslash"; + return token_type::parse_error; } - else - { - if (yych <= '9') - { - if (yych <= '/') - { - goto basic_json_parser_4; - } - if (yych <= '0') - { - goto basic_json_parser_13; - } - goto basic_json_parser_15; - } - else - { - if (yych <= ':') - { - goto basic_json_parser_17; - } - if (yych <= 'Z') - { - goto basic_json_parser_4; - } - goto basic_json_parser_19; - } - } - } - else - { - if (yych <= 'n') - { - if (yych <= 'e') - { - if (yych == ']') - { - goto basic_json_parser_21; - } - goto basic_json_parser_4; - } - else - { - if (yych <= 'f') - { - goto basic_json_parser_23; - } - if (yych <= 'm') - { - goto basic_json_parser_4; - } - goto basic_json_parser_24; - } - } - else - { - if (yych <= 'z') - { - if (yych == 't') - { - goto basic_json_parser_25; - } - goto basic_json_parser_4; - } - else - { - if (yych <= '{') - { - goto basic_json_parser_26; - } - if (yych == '}') - { - goto basic_json_parser_28; - } - goto basic_json_parser_4; - } - } - } -basic_json_parser_2: - ++m_cursor; - { - last_token_type = token_type::end_of_input; + break; } -basic_json_parser_4: - ++m_cursor; -basic_json_parser_5: + + // invalid control characters + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0a: + case 0x0b: + case 0x0c: + case 0x0d: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: { - last_token_type = token_type::parse_error; + error_message = "invalid string: control character must be escaped"; + return token_type::parse_error; + } + + // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) + case 0x20: + case 0x21: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3a: + case 0x3b: + case 0x3c: + case 0x3d: + case 0x3e: + case 0x3f: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5a: + case 0x5b: + case 0x5d: + case 0x5e: + case 0x5f: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7a: + case 0x7b: + case 0x7c: + case 0x7d: + case 0x7e: + case 0x7f: + { + add(current); break; } -basic_json_parser_6: - ++m_cursor; - if (m_limit <= m_cursor) + + // U+0080..U+07FF: bytes C2..DF 80..BF + case 0xc2: + case 0xc3: + case 0xc4: + case 0xc5: + case 0xc6: + case 0xc7: + case 0xc8: + case 0xc9: + case 0xca: + case 0xcb: + case 0xcc: + case 0xcd: + case 0xce: + case 0xcf: + case 0xd0: + case 0xd1: + case 0xd2: + case 0xd3: + case 0xd4: + case 0xd5: + case 0xd6: + case 0xd7: + case 0xd8: + case 0xd9: + case 0xda: + case 0xdb: + case 0xdc: + case 0xdd: + case 0xde: + case 0xdf: { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yybm[0 + yych] & 32) - { - goto basic_json_parser_6; - } - { - continue; - } -basic_json_parser_9: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych <= 0x1F) - { - goto basic_json_parser_5; - } - if (yych <= 0x7F) - { - goto basic_json_parser_31; - } - if (yych <= 0xC1) - { - goto basic_json_parser_5; - } - if (yych <= 0xF4) - { - goto basic_json_parser_31; - } - goto basic_json_parser_5; -basic_json_parser_10: - ++m_cursor; - { - last_token_type = token_type::value_separator; - break; - } -basic_json_parser_12: - yych = *++m_cursor; - if (yych <= '/') - { - goto basic_json_parser_5; - } - if (yych <= '0') - { - goto basic_json_parser_43; - } - if (yych <= '9') - { - goto basic_json_parser_45; - } - goto basic_json_parser_5; -basic_json_parser_13: - yyaccept = 1; - yych = *(m_marker = ++m_cursor); - if (yych <= '9') - { - if (yych == '.') + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { - goto basic_json_parser_47; - } - if (yych >= '0') - { - goto basic_json_parser_48; + add(current); + continue; } + + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; } - else + + // U+0800..U+0FFF: bytes E0 A0..BF 80..BF + case 0xe0: { - if (yych <= 'E') + add(current); + get(); + if (JSON_LIKELY(0xa0 <= current and current <= 0xbf)) { - if (yych >= 'E') + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { - goto basic_json_parser_51; + add(current); + continue; } } - else + + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; + } + + // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF + // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF + case 0xe1: + case 0xe2: + case 0xe3: + case 0xe4: + case 0xe5: + case 0xe6: + case 0xe7: + case 0xe8: + case 0xe9: + case 0xea: + case 0xeb: + case 0xec: + case 0xee: + case 0xef: + { + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { - if (yych == 'e') + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { - goto basic_json_parser_51; + add(current); + continue; } } + + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; } -basic_json_parser_14: + + // U+D000..U+D7FF: bytes ED 80..9F 80..BF + case 0xed: { - last_token_type = token_type::value_unsigned; - break; - } -basic_json_parser_15: - yyaccept = 1; - m_marker = ++m_cursor; - if ((m_limit - m_cursor) < 3) - { - fill_line_buffer(3); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yybm[0 + yych] & 64) - { - goto basic_json_parser_15; - } - if (yych <= 'D') - { - if (yych == '.') + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0x9f)) { - goto basic_json_parser_47; - } - goto basic_json_parser_14; - } - else - { - if (yych <= 'E') - { - goto basic_json_parser_51; - } - if (yych == 'e') - { - goto basic_json_parser_51; - } - goto basic_json_parser_14; - } -basic_json_parser_17: - ++m_cursor; - { - last_token_type = token_type::name_separator; - break; - } -basic_json_parser_19: - ++m_cursor; - { - last_token_type = token_type::begin_array; - break; - } -basic_json_parser_21: - ++m_cursor; - { - last_token_type = token_type::end_array; - break; - } -basic_json_parser_23: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych == 'a') - { - goto basic_json_parser_52; - } - goto basic_json_parser_5; -basic_json_parser_24: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych == 'u') - { - goto basic_json_parser_53; - } - goto basic_json_parser_5; -basic_json_parser_25: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych == 'r') - { - goto basic_json_parser_54; - } - goto basic_json_parser_5; -basic_json_parser_26: - ++m_cursor; - { - last_token_type = token_type::begin_object; - break; - } -basic_json_parser_28: - ++m_cursor; - { - last_token_type = token_type::end_object; - break; - } -basic_json_parser_30: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; -basic_json_parser_31: - if (yybm[0 + yych] & 128) - { - goto basic_json_parser_30; - } - if (yych <= 0xE0) - { - if (yych <= '\\') - { - if (yych <= 0x1F) + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { - goto basic_json_parser_32; - } - if (yych <= '"') - { - goto basic_json_parser_33; - } - goto basic_json_parser_35; - } - else - { - if (yych <= 0xC1) - { - goto basic_json_parser_32; - } - if (yych <= 0xDF) - { - goto basic_json_parser_36; - } - goto basic_json_parser_37; - } - } - else - { - if (yych <= 0xEF) - { - if (yych == 0xED) - { - goto basic_json_parser_39; - } - goto basic_json_parser_38; - } - else - { - if (yych <= 0xF0) - { - goto basic_json_parser_40; - } - if (yych <= 0xF3) - { - goto basic_json_parser_41; - } - if (yych <= 0xF4) - { - goto basic_json_parser_42; + add(current); + continue; } } + + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; } -basic_json_parser_32: - m_cursor = m_marker; - if (yyaccept <= 1) + + // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF + case 0xf0: { - if (yyaccept == 0) + add(current); + get(); + if (JSON_LIKELY(0x90 <= current and current <= 0xbf)) { - goto basic_json_parser_5; - } - else - { - goto basic_json_parser_14; - } - } - else - { - if (yyaccept == 2) - { - goto basic_json_parser_44; - } - else - { - goto basic_json_parser_58; - } - } -basic_json_parser_33: - ++m_cursor; - { - last_token_type = token_type::value_string; - break; - } -basic_json_parser_35: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 'e') - { - if (yych <= '/') - { - if (yych == '"') + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { - goto basic_json_parser_30; - } - if (yych <= '.') - { - goto basic_json_parser_32; - } - goto basic_json_parser_30; - } - else - { - if (yych <= '\\') - { - if (yych <= '[') + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { - goto basic_json_parser_32; + add(current); + continue; } - goto basic_json_parser_30; } - else + } + + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; + } + + // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF + case 0xf1: + case 0xf2: + case 0xf3: + { + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) + { + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { - if (yych == 'b') + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { - goto basic_json_parser_30; + add(current); + continue; } - goto basic_json_parser_32; } } + + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; } - else + + // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF + case 0xf4: { - if (yych <= 'q') + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0x8f)) { - if (yych <= 'f') + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { - goto basic_json_parser_30; - } - if (yych == 'n') - { - goto basic_json_parser_30; - } - goto basic_json_parser_32; - } - else - { - if (yych <= 's') - { - if (yych <= 'r') + add(current); + get(); + if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) { - goto basic_json_parser_30; + add(current); + continue; } - goto basic_json_parser_32; - } - else - { - if (yych <= 't') - { - goto basic_json_parser_30; - } - if (yych <= 'u') - { - goto basic_json_parser_55; - } - goto basic_json_parser_32; } } + + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; } -basic_json_parser_36: - ++m_cursor; - if (m_limit <= m_cursor) + + // remaining bytes (80..C1 and F5..FF) are ill-formed + default: { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x7F) - { - goto basic_json_parser_32; - } - if (yych <= 0xBF) - { - goto basic_json_parser_30; - } - goto basic_json_parser_32; -basic_json_parser_37: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x9F) - { - goto basic_json_parser_32; - } - if (yych <= 0xBF) - { - goto basic_json_parser_36; - } - goto basic_json_parser_32; -basic_json_parser_38: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x7F) - { - goto basic_json_parser_32; - } - if (yych <= 0xBF) - { - goto basic_json_parser_36; - } - goto basic_json_parser_32; -basic_json_parser_39: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x7F) - { - goto basic_json_parser_32; - } - if (yych <= 0x9F) - { - goto basic_json_parser_36; - } - goto basic_json_parser_32; -basic_json_parser_40: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x8F) - { - goto basic_json_parser_32; - } - if (yych <= 0xBF) - { - goto basic_json_parser_38; - } - goto basic_json_parser_32; -basic_json_parser_41: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x7F) - { - goto basic_json_parser_32; - } - if (yych <= 0xBF) - { - goto basic_json_parser_38; - } - goto basic_json_parser_32; -basic_json_parser_42: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 0x7F) - { - goto basic_json_parser_32; - } - if (yych <= 0x8F) - { - goto basic_json_parser_38; - } - goto basic_json_parser_32; -basic_json_parser_43: - yyaccept = 2; - yych = *(m_marker = ++m_cursor); - if (yych <= '9') - { - if (yych == '.') - { - goto basic_json_parser_47; - } - if (yych >= '0') - { - goto basic_json_parser_48; - } - } - else - { - if (yych <= 'E') - { - if (yych >= 'E') - { - goto basic_json_parser_51; - } - } - else - { - if (yych == 'e') - { - goto basic_json_parser_51; - } - } - } -basic_json_parser_44: - { - last_token_type = token_type::value_integer; - break; - } -basic_json_parser_45: - yyaccept = 2; - m_marker = ++m_cursor; - if ((m_limit - m_cursor) < 3) - { - fill_line_buffer(3); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '9') - { - if (yych == '.') - { - goto basic_json_parser_47; - } - if (yych <= '/') - { - goto basic_json_parser_44; - } - goto basic_json_parser_45; - } - else - { - if (yych <= 'E') - { - if (yych <= 'D') - { - goto basic_json_parser_44; - } - goto basic_json_parser_51; - } - else - { - if (yych == 'e') - { - goto basic_json_parser_51; - } - goto basic_json_parser_44; - } - } -basic_json_parser_47: - yych = *++m_cursor; - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_56; - } - goto basic_json_parser_32; -basic_json_parser_48: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '/') - { - goto basic_json_parser_50; - } - if (yych <= '9') - { - goto basic_json_parser_48; - } -basic_json_parser_50: - { - last_token_type = token_type::parse_error; - break; - } -basic_json_parser_51: - yych = *++m_cursor; - if (yych <= ',') - { - if (yych == '+') - { - goto basic_json_parser_59; - } - goto basic_json_parser_32; - } - else - { - if (yych <= '-') - { - goto basic_json_parser_59; - } - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_60; - } - goto basic_json_parser_32; - } -basic_json_parser_52: - yych = *++m_cursor; - if (yych == 'l') - { - goto basic_json_parser_62; - } - goto basic_json_parser_32; -basic_json_parser_53: - yych = *++m_cursor; - if (yych == 'l') - { - goto basic_json_parser_63; - } - goto basic_json_parser_32; -basic_json_parser_54: - yych = *++m_cursor; - if (yych == 'u') - { - goto basic_json_parser_64; - } - goto basic_json_parser_32; -basic_json_parser_55: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '@') - { - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_65; - } - goto basic_json_parser_32; - } - else - { - if (yych <= 'F') - { - goto basic_json_parser_65; - } - if (yych <= '`') - { - goto basic_json_parser_32; - } - if (yych <= 'f') - { - goto basic_json_parser_65; - } - goto basic_json_parser_32; - } -basic_json_parser_56: - yyaccept = 3; - m_marker = ++m_cursor; - if ((m_limit - m_cursor) < 3) - { - fill_line_buffer(3); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= 'D') - { - if (yych <= '/') - { - goto basic_json_parser_58; - } - if (yych <= '9') - { - goto basic_json_parser_56; - } - } - else - { - if (yych <= 'E') - { - goto basic_json_parser_51; - } - if (yych == 'e') - { - goto basic_json_parser_51; - } - } -basic_json_parser_58: - { - last_token_type = token_type::value_float; - break; - } -basic_json_parser_59: - yych = *++m_cursor; - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych >= ':') - { - goto basic_json_parser_32; - } -basic_json_parser_60: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '/') - { - goto basic_json_parser_58; - } - if (yych <= '9') - { - goto basic_json_parser_60; - } - goto basic_json_parser_58; -basic_json_parser_62: - yych = *++m_cursor; - if (yych == 's') - { - goto basic_json_parser_66; - } - goto basic_json_parser_32; -basic_json_parser_63: - yych = *++m_cursor; - if (yych == 'l') - { - goto basic_json_parser_67; - } - goto basic_json_parser_32; -basic_json_parser_64: - yych = *++m_cursor; - if (yych == 'e') - { - goto basic_json_parser_69; - } - goto basic_json_parser_32; -basic_json_parser_65: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '@') - { - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_71; - } - goto basic_json_parser_32; - } - else - { - if (yych <= 'F') - { - goto basic_json_parser_71; - } - if (yych <= '`') - { - goto basic_json_parser_32; - } - if (yych <= 'f') - { - goto basic_json_parser_71; - } - goto basic_json_parser_32; - } -basic_json_parser_66: - yych = *++m_cursor; - if (yych == 'e') - { - goto basic_json_parser_72; - } - goto basic_json_parser_32; -basic_json_parser_67: - ++m_cursor; - { - last_token_type = token_type::literal_null; - break; - } -basic_json_parser_69: - ++m_cursor; - { - last_token_type = token_type::literal_true; - break; - } -basic_json_parser_71: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '@') - { - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_74; - } - goto basic_json_parser_32; - } - else - { - if (yych <= 'F') - { - goto basic_json_parser_74; - } - if (yych <= '`') - { - goto basic_json_parser_32; - } - if (yych <= 'f') - { - goto basic_json_parser_74; - } - goto basic_json_parser_32; - } -basic_json_parser_72: - ++m_cursor; - { - last_token_type = token_type::literal_false; - break; - } -basic_json_parser_74: - ++m_cursor; - if (m_limit <= m_cursor) - { - fill_line_buffer(1); // LCOV_EXCL_LINE - } - yych = *m_cursor; - if (yych <= '@') - { - if (yych <= '/') - { - goto basic_json_parser_32; - } - if (yych <= '9') - { - goto basic_json_parser_30; - } - goto basic_json_parser_32; - } - else - { - if (yych <= 'F') - { - goto basic_json_parser_30; - } - if (yych <= '`') - { - goto basic_json_parser_32; - } - if (yych <= 'f') - { - goto basic_json_parser_30; - } - goto basic_json_parser_32; + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; } } - } + } - return last_token_type; + static void strtof(float& f, const char* str, char** endptr) noexcept + { + f = std::strtof(str, endptr); + } + + static void strtof(double& f, const char* str, char** endptr) noexcept + { + f = std::strtod(str, endptr); + } + + static void strtof(long double& f, const char* str, char** endptr) noexcept + { + f = std::strtold(str, endptr); } /*! - @brief append data from the stream to the line buffer + @brief scan a number literal - This function is called by the scan() function when the end of the - buffer (`m_limit`) is reached and the `m_cursor` pointer cannot be - incremented without leaving the limits of the line buffer. Note re2c - decides when to call this function. + This function scans a string according to Sect. 6 of RFC 7159. - If the lexer reads from contiguous storage, there is no trailing null - byte. Therefore, this function must make sure to add these padding - null bytes. + The function is realized with a deterministic finite state machine + derived from the grammar described in RFC 7159. Starting in state + "init", the input is read and used to determined the next state. Only + state "done" accepts the number. State "error" is a trap state to model + errors. In the table below, "anything" means any character but the ones + listed before. - If the lexer reads from an input stream, this function reads the next - line of the input. + state | 0 | 1-9 | e E | + | - | . | anything + ---------|----------|----------|----------|---------|---------|----------|----------- + init | zero | any1 | [error] | [error] | minus | [error] | [error] + minus | zero | any1 | [error] | [error] | [error] | [error] | [error] + zero | done | done | exponent | done | done | decimal1 | done + any1 | any1 | any1 | exponent | done | done | decimal1 | done + decimal1 | decimal2 | [error] | [error] | [error] | [error] | [error] | [error] + decimal2 | decimal2 | decimal2 | exponent | done | done | done | done + exponent | any2 | any2 | [error] | sign | sign | [error] | [error] + sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] + any2 | any2 | any2 | done | done | done | done | done - @pre - p p p p p p u u u u u x . . . . . . - ^ ^ ^ ^ - m_content m_start | m_limit - m_cursor + The state machine is realized with one label per state (prefixed with + "scan_number_") and `goto` statements between them. The state machine + contains cycles, but any cycle can be left when EOF is read. Therefore, + the function is guaranteed to terminate. - @post - u u u u u x x x x x x x . . . . . . - ^ ^ ^ - | m_cursor m_limit - m_start - m_content + During scanning, the read bytes are stored in yytext. This string is + then converted to a signed integer, an unsigned integer, or a + floating-point number. + + @return token_type::value_unsigned, token_type::value_integer, or + token_type::value_float if number could be successfully scanned, + token_type::parse_error otherwise + + @note The scanner is independent of the current locale. Internally, the + locale's decimal point is used instead of `.` to work with the + locale-dependent converters. */ - void fill_line_buffer(size_t n = 0) + token_type scan_number() { - // if line buffer is used, m_content points to its data - assert(m_line_buffer.empty() - or m_content == reinterpret_cast(m_line_buffer.data())); + // reset yytext to store the number's bytes + reset(); - // if line buffer is used, m_limit is set past the end of its data - assert(m_line_buffer.empty() - or m_limit == m_content + m_line_buffer.size()); + // the type of the parsed number; initially set to unsigned; will be + // changed if minus sign, decimal point or exponent is read + token_type number_type = token_type::value_unsigned; - // pointer relationships - assert(m_content <= m_start); - assert(m_start <= m_cursor); - assert(m_cursor <= m_limit); - assert(m_marker == nullptr or m_marker <= m_limit); - - // number of processed characters (p) - const auto num_processed_chars = static_cast(m_start - m_content); - // offset for m_marker wrt. to m_start - const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start; - // number of unprocessed characters (u) - const auto offset_cursor = m_cursor - m_start; - - // no stream is used or end of file is reached - if (m_stream == nullptr or m_stream->eof()) + // state (init): we just found out we need to scan a number + switch (current) { - // m_start may or may not be pointing into m_line_buffer at - // this point. We trust the standard library to do the right - // thing. See http://stackoverflow.com/q/28142011/266378 - m_line_buffer.assign(m_start, m_limit); - - // append n characters to make sure that there is sufficient - // space between m_cursor and m_limit - m_line_buffer.append(1, '\x00'); - if (n > 0) + case '-': { - m_line_buffer.append(n - 1, '\x01'); + add(current); + goto scan_number_minus; + } + + case '0': + { + add(current); + goto scan_number_zero; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + default: + { + // all other characters are rejected outside scan_number() + assert(false); // LCOV_EXCL_LINE } } - else - { - // delete processed characters from line buffer - m_line_buffer.erase(0, num_processed_chars); - // read next line from input stream - m_line_buffer_tmp.clear(); - std::getline(*m_stream, m_line_buffer_tmp, '\n'); - // add line with newline symbol to the line buffer - m_line_buffer += m_line_buffer_tmp; - m_line_buffer.push_back('\n'); +scan_number_minus: + // state: we just parsed a leading minus sign + number_type = token_type::value_integer; + switch (get()) + { + case '0': + { + add(current); + goto scan_number_zero; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + default: + { + error_message = "invalid number; expected digit after '-'"; + return token_type::parse_error; + } } - // set pointers - m_content = reinterpret_cast(m_line_buffer.data()); - assert(m_content != nullptr); - m_start = m_content; - m_marker = m_start + offset_marker; - m_cursor = m_start + offset_cursor; - m_limit = m_start + m_line_buffer.size(); - } +scan_number_zero: + // state: we just parse a zero (maybe with a leading minus sign) + switch (get()) + { + case '.': + { + add(decimal_point_char); + goto scan_number_decimal1; + } - /// return string representation of last read token - string_t get_token_string() const - { - assert(m_start != nullptr); - return string_t(reinterpret_cast(m_start), - static_cast(m_cursor - m_start)); + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + { + goto scan_number_done; + } + } + +scan_number_any1: + // state: we just parsed a number 0-9 (maybe with a leading minus sign) + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + case '.': + { + add(decimal_point_char); + goto scan_number_decimal1; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + { + goto scan_number_done; + } + } + +scan_number_decimal1: + // state: we just parsed a decimal point + number_type = token_type::value_float; + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } + + default: + { + error_message = "invalid number; expected digit after '.'"; + return token_type::parse_error; + } + } + +scan_number_decimal2: + // we just parsed at least one number after a decimal point + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + { + goto scan_number_done; + } + } + +scan_number_exponent: + // we just parsed an exponent + number_type = token_type::value_float; + switch (get()) + { + case '+': + case '-': + { + add(current); + goto scan_number_sign; + } + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + { + error_message = "invalid number; expected '+', '-', or digit after exponent"; + return token_type::parse_error; + } + } + +scan_number_sign: + // we just parsed an exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + { + error_message = "invalid number; expected digit after exponent sign"; + return token_type::parse_error; + } + } + +scan_number_any2: + // we just parsed a number after the exponent or exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + { + goto scan_number_done; + } + } + +scan_number_done: + // unget the character after the number (we only read it to know + // that we are done scanning a number) + --chars_read; + next_unget = true; + + // terminate token + add('\0'); + --yylen; + + // try to parse integers first and fall back to floats + if (number_type == token_type::value_unsigned) + { + char* endptr = nullptr; + errno = 0; + const auto x = std::strtoull(yytext.data(), &endptr, 10); + + // we checked the number format before + assert(endptr == yytext.data() + yylen); + + if (errno == 0) + { + value_unsigned = static_cast(x); + if (value_unsigned == x) + { + return token_type::value_unsigned; + } + } + } + else if (number_type == token_type::value_integer) + { + char* endptr = nullptr; + errno = 0; + const auto x = std::strtoll(yytext.data(), &endptr, 10); + + // we checked the number format before + assert(endptr == yytext.data() + yylen); + + if (errno == 0) + { + value_integer = static_cast(x); + if (value_integer == x) + { + return token_type::value_integer; + } + } + } + + // this code is reached if we parse a floating-point number or if + // an integer conversion above failed + strtof(value_float, yytext.data(), nullptr); + return token_type::value_float; } /*! - @brief return string value for string tokens - - The function iterates the characters between the opening and closing - quotes of the string value. The complete string is the range - [m_start,m_cursor). Consequently, we iterate from m_start+1 to - m_cursor-1. - - We differentiate two cases: - - 1. Escaped characters. In this case, a new character is constructed - according to the nature of the escape. Some escapes create new - characters (e.g., `"\\n"` is replaced by `"\n"`), some are copied - as is (e.g., `"\\\\"`). Furthermore, Unicode escapes of the shape - `"\\uxxxx"` need special care. In this case, to_unicode takes care - of the construction of the values. - 2. Unescaped characters are copied as is. - - @pre `m_cursor - m_start >= 2`, meaning the length of the last token - is at least 2 bytes which is trivially true for any string (which - consists of at least two quotes). - - " c1 c2 c3 ... " - ^ ^ - m_start m_cursor - - @complexity Linear in the length of the string.\n - - Lemma: The loop body will always terminate.\n - - Proof (by contradiction): Assume the loop body does not terminate. As - the loop body does not contain another loop, one of the called - functions must never return. The called functions are `std::strtoul` - and to_unicode. Neither function can loop forever, so the loop body - will never loop forever which contradicts the assumption that the loop - body does not terminate, q.e.d.\n - - Lemma: The loop condition for the for loop is eventually false.\n - - Proof (by contradiction): Assume the loop does not terminate. Due to - the above lemma, this can only be due to a tautological loop - condition; that is, the loop condition i < m_cursor - 1 must always be - true. Let x be the change of i for any loop iteration. Then - m_start + 1 + x < m_cursor - 1 must hold to loop indefinitely. This - can be rephrased to m_cursor - m_start - 2 > x. With the - precondition, we x <= 0, meaning that the loop condition holds - indefinitely if i is always decreased. However, observe that the value - of i is strictly increasing with each iteration, as it is incremented - by 1 in the iteration expression and never decremented inside the loop - body. Hence, the loop condition will eventually be false which - contradicts the assumption that the loop condition is a tautology, - q.e.d. - - @return string value of current token without opening and closing - quotes - @throw std::out_of_range if to_unicode fails + @param[in] literal_text the literal text to expect + @param[in] length the length of the passed literal text + @param[in] return_type the token type to return on success */ - string_t get_string() const + token_type scan_literal(const char* literal_text, const size_t length, + token_type return_type) { - assert(m_cursor - m_start >= 2); - - string_t result; - result.reserve(static_cast(m_cursor - m_start - 2)); - - // iterate the result between the quotes - for (const lexer_char_t* i = m_start + 1; i < m_cursor - 1; ++i) + assert(current == literal_text[0]); + for (size_t i = 1; i < length; ++i) { - // find next escape character - auto e = std::find(i, m_cursor - 1, '\\'); - if (e != i) + if (JSON_UNLIKELY(get() != literal_text[i])) { - // see https://github.com/nlohmann/json/issues/365#issuecomment-262874705 - for (auto k = i; k < e; k++) - { - result.push_back(static_cast(*k)); - } - i = e - 1; // -1 because of ++i + error_message = "invalid literal"; + return token_type::parse_error; + } + } + return return_type; + } + + ///////////////////// + // input management + ///////////////////// + + /// reset yytext + void reset() noexcept + { + yylen = 0; + start_pos = chars_read - 1; + } + + /// get a character from the input + int get() + { + ++chars_read; + return next_unget + ? (next_unget = false, current) + : (current = ia->get_character()); + } + + /// add a character to yytext + void add(int c) + { + // resize yytext if necessary; this condition is deemed unlikely, + // because we start with a 1024-byte buffer + if (JSON_UNLIKELY((yylen + 1 > yytext.capacity()))) + { + yytext.resize(2 * yytext.capacity(), '\0'); + } + assert(yylen < yytext.size()); + yytext[yylen++] = static_cast(c); + } + + public: + ///////////////////// + // value getters + ///////////////////// + + /// return integer value + constexpr number_integer_t get_number_integer() const noexcept + { + return value_integer; + } + + /// return unsigned integer value + constexpr number_unsigned_t get_number_unsigned() const noexcept + { + return value_unsigned; + } + + /// return floating-point value + constexpr number_float_t get_number_float() const noexcept + { + return value_float; + } + + /// return string value + const std::string get_string() + { + // yytext cannot be returned as char*, because it may contain a + // null byte (parsed as "\u0000") + return std::string(yytext.data(), yylen); + } + + ///////////////////// + // diagnostics + ///////////////////// + + /// return position of last read token + constexpr size_t get_position() const noexcept + { + return chars_read; + } + + /// return the last read token (for errors only) + std::string get_token_string() const + { + // get the raw byte sequence of the last token + std::string s = ia->read(start_pos, chars_read - start_pos); + + // escape control characters + std::string result; + for (auto c : s) + { + if (c == '\0' or c == std::char_traits::eof()) + { + // ignore EOF + continue; + } + else if ('\x00' <= c and c <= '\x1f') + { + // escape control characters + std::stringstream ss; + ss << "(c) << ">"; + result += ss.str(); } else { - // processing escaped character - // read next character - ++i; - - switch (*i) - { - // the default escapes - case 't': - { - result += "\t"; - break; - } - case 'b': - { - result += "\b"; - break; - } - case 'f': - { - result += "\f"; - break; - } - case 'n': - { - result += "\n"; - break; - } - case 'r': - { - result += "\r"; - break; - } - case '\\': - { - result += "\\"; - break; - } - case '/': - { - result += "/"; - break; - } - case '"': - { - result += "\""; - break; - } - - // unicode - case 'u': - { - // get code xxxx from uxxxx - auto codepoint = std::strtoul(std::string(reinterpret_cast(i + 1), - 4).c_str(), nullptr, 16); - - // check if codepoint is a high surrogate - if (codepoint >= 0xD800 and codepoint <= 0xDBFF) - { - // make sure there is a subsequent unicode - if ((i + 6 >= m_limit) or * (i + 5) != '\\' or * (i + 6) != 'u') - { - JSON_THROW(std::invalid_argument("missing low surrogate")); - } - - // get code yyyy from uxxxx\uyyyy - auto codepoint2 = std::strtoul(std::string(reinterpret_cast - (i + 7), 4).c_str(), nullptr, 16); - result += to_unicode(codepoint, codepoint2); - // skip the next 10 characters (xxxx\uyyyy) - i += 10; - } - else if (codepoint >= 0xDC00 and codepoint <= 0xDFFF) - { - // we found a lone low surrogate - JSON_THROW(std::invalid_argument("missing high surrogate")); - } - else - { - // add unicode character(s) - result += to_unicode(codepoint); - // skip the next four characters (xxxx) - i += 4; - } - break; - } - } + // add character as is + result.append(1, c); } } return result; } - - /*! - @brief parse string into a built-in arithmetic type as if the current - locale is POSIX. - - @note in floating-point case strtod may parse past the token's end - - this is not an error - - @note any leading blanks are not handled - */ - struct strtonum + /// return syntax error message + constexpr const char* get_error_message() const noexcept { - public: - strtonum(const char* start, const char* end) - : m_start(start), m_end(end) - {} + return error_message; + } - /*! - @return true iff parsed successfully as number of type T + ///////////////////// + // actual scanner + ///////////////////// - @param[in,out] val shall contain parsed value, or undefined value - if could not parse - */ - template::value>::type> - bool to(T& val) const - { - return parse(val, std::is_integral()); - } - - private: - const char* const m_start = nullptr; - const char* const m_end = nullptr; - - // floating-point conversion - - // overloaded wrappers for strtod/strtof/strtold - // that will be called from parse - static void strtof(float& f, const char* str, char** endptr) - { - f = std::strtof(str, endptr); - } - - static void strtof(double& f, const char* str, char** endptr) - { - f = std::strtod(str, endptr); - } - - static void strtof(long double& f, const char* str, char** endptr) - { - f = std::strtold(str, endptr); - } - - template - bool parse(T& value, /*is_integral=*/std::false_type) const - { - // replace decimal separator with locale-specific version, - // when necessary; data will point to either the original - // string, or buf, or tempstr containing the fixed string. - std::string tempstr; - std::array buf; - const size_t len = static_cast(m_end - m_start); - - // lexer will reject empty numbers - assert(len > 0); - - // since dealing with strtod family of functions, we're - // getting the decimal point char from the C locale facilities - // instead of C++'s numpunct facet of the current std::locale - const auto loc = localeconv(); - assert(loc != nullptr); - const char decimal_point_char = (loc->decimal_point == nullptr) ? '.' : loc->decimal_point[0]; - - const char* data = m_start; - - if (decimal_point_char != '.') - { - const size_t ds_pos = static_cast(std::find(m_start, m_end, '.') - m_start); - - if (ds_pos != len) - { - // copy the data into the local buffer or tempstr, if - // buffer is too small; replace decimal separator, and - // update data to point to the modified bytes - if ((len + 1) < buf.size()) - { - std::copy(m_start, m_end, buf.begin()); - buf[len] = 0; - buf[ds_pos] = decimal_point_char; - data = buf.data(); - } - else - { - tempstr.assign(m_start, m_end); - tempstr[ds_pos] = decimal_point_char; - data = tempstr.c_str(); - } - } - } - - char* endptr = nullptr; - value = 0; - // this calls appropriate overload depending on T - strtof(value, data, &endptr); - - // parsing was successful iff strtof parsed exactly the number - // of characters determined by the lexer (len) - const bool ok = (endptr == (data + len)); - - if (ok and (value == static_cast(0.0)) and (*data == '-')) - { - // some implementations forget to negate the zero - value = -0.0; - } - - return ok; - } - - // integral conversion - - signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const - { - return std::strtoll(m_start, endptr, 10); - } - - unsigned long long parse_integral(char** endptr, /*is_signed*/std::false_type) const - { - return std::strtoull(m_start, endptr, 10); - } - - template - bool parse(T& value, /*is_integral=*/std::true_type) const - { - char* endptr = nullptr; - errno = 0; // these are thread-local - const auto x = parse_integral(&endptr, std::is_signed()); - - // called right overload? - static_assert(std::is_signed() == std::is_signed(), ""); - - value = static_cast(x); - - return (x == static_cast(value)) // x fits into destination T - and (x < 0) == (value < 0) // preserved sign - //and ((x != 0) or is_integral()) // strto[u]ll did nto fail - and (errno == 0) // strto[u]ll did not overflow - and (m_start < m_end) // token was not empty - and (endptr == m_end); // parsed entire token exactly - } - }; - - /*! - @brief return number value for number tokens - - This function translates the last token into the most appropriate - number type (either integer, unsigned integer or floating point), - which is passed back to the caller via the result parameter. - - integral numbers that don't fit into the the range of the respective - type are parsed as number_float_t - - floating-point values do not satisfy std::isfinite predicate - are converted to value_t::null - - throws if the entire string [m_start .. m_cursor) cannot be - interpreted as a number - - @param[out] result @ref basic_json object to receive the number. - @param[in] token the type of the number token - */ - bool get_number(basic_json& result, const token_type token) const + token_type scan() { - assert(m_start != nullptr); - assert(m_start < m_cursor); - assert((token == token_type::value_unsigned) or - (token == token_type::value_integer) or - (token == token_type::value_float)); - - strtonum num_converter(reinterpret_cast(m_start), - reinterpret_cast(m_cursor)); - - switch (token) + // read next character and ignore whitespace + do { - case lexer::token_type::value_unsigned: - { - number_unsigned_t val; - if (num_converter.to(val)) - { - // parsing successful - result.m_type = value_t::number_unsigned; - result.m_value = val; - return true; - } - break; - } + get(); + } + while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); - case lexer::token_type::value_integer: - { - number_integer_t val; - if (num_converter.to(val)) - { - // parsing successful - result.m_type = value_t::number_integer; - result.m_value = val; - return true; - } - break; - } + switch (current) + { + // structural characters + case '[': + return token_type::begin_array; + case ']': + return token_type::end_array; + case '{': + return token_type::begin_object; + case '}': + return token_type::end_object; + case ':': + return token_type::name_separator; + case ',': + return token_type::value_separator; + // literals + case 't': + return scan_literal("true", 4, token_type::literal_true); + case 'f': + return scan_literal("false", 5, token_type::literal_false); + case 'n': + return scan_literal("null", 4, token_type::literal_null); + + // string + case '\"': + return scan_string(); + + // number + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return scan_number(); + + // end of input (the null byte is needed when parsing from + // string literals) + case '\0': + case std::char_traits::eof(): + return token_type::end_of_input; + + // error default: - { - break; - } + error_message = "invalid literal"; + return token_type::parse_error; } - - // parse float (either explicitly or because a previous conversion - // failed) - number_float_t val; - if (num_converter.to(val)) - { - // parsing successful - result.m_type = value_t::number_float; - result.m_value = val; - - // replace infinity and NAN by null - if (not std::isfinite(result.m_value.number_float)) - { - result.m_type = value_t::null; - result.m_value = basic_json::json_value(); - } - - return true; - } - - // couldn't parse number in any format - return false; } private: - /// optional input stream - std::istream* m_stream = nullptr; - /// line buffer buffer for m_stream - string_t m_line_buffer {}; - /// used for filling m_line_buffer - string_t m_line_buffer_tmp {}; - /// the buffer pointer - const lexer_char_t* m_content = nullptr; - /// pointer to the beginning of the current symbol - const lexer_char_t* m_start = nullptr; - /// pointer for backtracking information - const lexer_char_t* m_marker = nullptr; - /// pointer to the current symbol - const lexer_char_t* m_cursor = nullptr; - /// pointer to the end of the buffer - const lexer_char_t* m_limit = nullptr; - /// the last token type - token_type last_token_type = token_type::end_of_input; + /// input adapter + input_adapter_t ia = nullptr; + + /// the current character + int current = std::char_traits::eof(); + + /// whether get() should return the last character again + bool next_unget = false; + + /// the number of characters read + size_t chars_read = 0; + /// the start position of the current token + size_t start_pos = 0; + + /// buffer for variable-length tokens (numbers, strings) + std::vector yytext = std::vector(1024, '\0'); + /// current index in yytext + size_t yylen = 0; + + /// a description of occurred lexer errors + const char* error_message = ""; + + // number values + number_integer_t value_integer = 0; + number_unsigned_t value_unsigned = 0; + number_float_t value_float = 0; + + /// the decimal point + const char decimal_point_char = '.'; }; /*! @@ -11353,30 +12623,23 @@ basic_json_parser_74: class parser { public: - /// a parser reading from a string literal - parser(const char* buff, const parser_callback_t cb = nullptr) - : callback(cb), - m_lexer(reinterpret_cast(buff), std::strlen(buff)) + /// a parser reading from an input adapter + explicit parser(input_adapter_t adapter, + const parser_callback_t cb = nullptr) + : callback(cb), m_lexer(adapter) {} - /// a parser reading from an input stream - parser(std::istream& is, const parser_callback_t cb = nullptr) - : callback(cb), m_lexer(is) - {} + /*! + @brief public parser interface - /// a parser reading from an iterator range with contiguous storage - template::iterator_category, std::random_access_iterator_tag>::value - , int>::type - = 0> - parser(IteratorType first, IteratorType last, const parser_callback_t cb = nullptr) - : callback(cb), - m_lexer(reinterpret_cast(&(*first)), - static_cast(std::distance(first, last))) - {} + @param[in] strict whether to expect the last token to be EOF + @return parsed JSON value - /// public parser interface - basic_json parse() + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + */ + basic_json parse(const bool strict = true) { // read first token get_token(); @@ -11384,15 +12647,48 @@ basic_json_parser_74: basic_json result = parse_internal(true); result.assert_invariant(); - expect(lexer::token_type::end_of_input); + if (strict) + { + get_token(); + expect(lexer::token_type::end_of_input); + } // return parser result and replace it with null in case the // top-level value was discarded by the callback function return result.is_discarded() ? basic_json() : std::move(result); } + /*! + @brief public accept interface + + @param[in] strict whether to expect the last token to be EOF + @return whether the input is a proper JSON text + */ + bool accept(const bool strict = true) + { + // read first token + get_token(); + + if (not accept_internal()) + { + return false; + } + + if (strict and get_token() != lexer::token_type::end_of_input) + { + return false; + } + + return true; + } + private: - /// the actual parser + /*! + @brief the actual parser + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + */ basic_json parse_internal(bool keep) { auto result = basic_json(value_t::discarded); @@ -11415,7 +12711,6 @@ basic_json_parser_74: // closing } -> we are done if (last_token == lexer::token_type::end_object) { - get_token(); if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) { result = basic_json(value_t::discarded); @@ -11423,18 +12718,9 @@ basic_json_parser_74: return result; } - // no comma is expected here - unexpect(lexer::token_type::value_separator); - - // otherwise: parse key-value pairs - do + // parse values + while (true) { - // ugly, but could be fixed with loop reorganization - if (last_token == lexer::token_type::value_separator) - { - get_token(); - } - // store key expect(lexer::token_type::value_string); const auto key = m_lexer.get_string(); @@ -11464,12 +12750,20 @@ basic_json_parser_74: { result[key] = std::move(value); } - } - while (last_token == lexer::token_type::value_separator); - // closing } - expect(lexer::token_type::end_object); - get_token(); + // comma -> next value + get_token(); + if (last_token == lexer::token_type::value_separator) + { + get_token(); + continue; + } + + // closing } + expect(lexer::token_type::end_object); + break; + } + if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) { result = basic_json(value_t::discarded); @@ -11494,7 +12788,6 @@ basic_json_parser_74: // closing ] -> we are done if (last_token == lexer::token_type::end_array) { - get_token(); if (callback and not callback(--depth, parse_event_t::array_end, result)) { result = basic_json(value_t::discarded); @@ -11502,30 +12795,29 @@ basic_json_parser_74: return result; } - // no comma is expected here - unexpect(lexer::token_type::value_separator); - - // otherwise: parse values - do + // parse values + while (true) { - // ugly, but could be fixed with loop reorganization - if (last_token == lexer::token_type::value_separator) - { - get_token(); - } - // parse value auto value = parse_internal(keep); if (keep and not value.is_discarded()) { result.push_back(std::move(value)); } - } - while (last_token == lexer::token_type::value_separator); - // closing ] - expect(lexer::token_type::end_array); - get_token(); + // comma -> next value + get_token(); + if (last_token == lexer::token_type::value_separator) + { + get_token(); + continue; + } + + // closing ] + expect(lexer::token_type::end_array); + break; + } + if (keep and callback and not callback(--depth, parse_event_t::array_end, result)) { result = basic_json(value_t::discarded); @@ -11536,22 +12828,18 @@ basic_json_parser_74: case lexer::token_type::literal_null: { - get_token(); result.m_type = value_t::null; break; } case lexer::token_type::value_string: { - const auto s = m_lexer.get_string(); - get_token(); - result = basic_json(s); + result = basic_json(m_lexer.get_string()); break; } case lexer::token_type::literal_true: { - get_token(); result.m_type = value_t::boolean; result.m_value = true; break; @@ -11559,25 +12847,49 @@ basic_json_parser_74: case lexer::token_type::literal_false: { - get_token(); result.m_type = value_t::boolean; result.m_value = false; break; } case lexer::token_type::value_unsigned: + { + result.m_type = value_t::number_unsigned; + result.m_value = m_lexer.get_number_unsigned(); + break; + } + case lexer::token_type::value_integer: + { + result.m_type = value_t::number_integer; + result.m_value = m_lexer.get_number_integer(); + break; + } + case lexer::token_type::value_float: { - m_lexer.get_number(result, last_token); - get_token(); + result.m_type = value_t::number_float; + result.m_value = m_lexer.get_number_float(); + + // throw in case of infinity or NAN + if (JSON_UNLIKELY(not std::isfinite(result.m_value.number_float))) + { + JSON_THROW(out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); + } + break; } + case lexer::token_type::parse_error: + { + // using "uninitialized" to avoid "expected" message + expect(lexer::token_type::uninitialized); + } + default: { - // the last token was unexpected - unexpect(last_token); + // the last token was unexpected; we expected a value + expect(lexer::token_type::literal_or_value); } } @@ -11588,36 +12900,167 @@ basic_json_parser_74: return result; } + /*! + @brief the acutal acceptor + + @invariant 1. The last token is not yet processed. Therefore, the + caller of this function must make sure a token has + been read. + 2. When this function returns, the last token is processed. + That is, the last read character was already considered. + + This invariant makes sure that no token needs to be "unput". + */ + bool accept_internal() + { + switch (last_token) + { + case lexer::token_type::begin_object: + { + // read next token + get_token(); + + // closing } -> we are done + if (last_token == lexer::token_type::end_object) + { + return true; + } + + // parse values + while (true) + { + // parse key + if (last_token != lexer::token_type::value_string) + { + return false; + } + + // parse separator (:) + get_token(); + if (last_token != lexer::token_type::name_separator) + { + return false; + } + + // parse value + get_token(); + if (not accept_internal()) + { + return false; + } + + // comma -> next value + get_token(); + if (last_token == lexer::token_type::value_separator) + { + get_token(); + continue; + } + + // closing } + if (last_token != lexer::token_type::end_object) + { + return false; + } + + return true; + } + } + + case lexer::token_type::begin_array: + { + // read next token + get_token(); + + // closing ] -> we are done + if (last_token == lexer::token_type::end_array) + { + return true; + } + + // parse values + while (true) + { + // parse value + if (not accept_internal()) + { + return false; + } + + // comma -> next value + get_token(); + if (last_token == lexer::token_type::value_separator) + { + get_token(); + continue; + } + + // closing ] + if (last_token != lexer::token_type::end_array) + { + return false; + } + + return true; + } + } + + case lexer::token_type::literal_false: + case lexer::token_type::literal_null: + case lexer::token_type::literal_true: + case lexer::token_type::value_float: + case lexer::token_type::value_integer: + case lexer::token_type::value_string: + case lexer::token_type::value_unsigned: + { + return true; + } + + default: + { + // the last token was unexpected + return false; + } + } + } + /// get next token from lexer typename lexer::token_type get_token() { - last_token = m_lexer.scan(); - return last_token; + return (last_token = m_lexer.scan()); } - void expect(typename lexer::token_type t) const + /*! + @throw parse_error.101 if expected token did not occur + */ + void expect(typename lexer::token_type t) { - if (t != last_token) + if (JSON_UNLIKELY(t != last_token)) { - std::string error_msg = "parse error - unexpected "; - error_msg += (last_token == lexer::token_type::parse_error ? ("'" + m_lexer.get_token_string() + - "'") : - lexer::token_type_name(last_token)); - error_msg += "; expected " + lexer::token_type_name(t); - JSON_THROW(std::invalid_argument(error_msg)); + errored = true; + expected = t; + throw_exception(); } } - void unexpect(typename lexer::token_type t) const + [[noreturn]] void throw_exception() const { - if (t == last_token) + std::string error_msg = "syntax error - "; + if (last_token == lexer::token_type::parse_error) { - std::string error_msg = "parse error - unexpected "; - error_msg += (last_token == lexer::token_type::parse_error ? ("'" + m_lexer.get_token_string() + - "'") : - lexer::token_type_name(last_token)); - JSON_THROW(std::invalid_argument(error_msg)); + error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" + m_lexer.get_token_string() + "'"; } + else + { + error_msg += "unexpected " + std::string(lexer::token_type_name(last_token)); + } + + if (expected != lexer::token_type::uninitialized) + { + error_msg += "; expected " + std::string(lexer::token_type_name(expected)); + } + + JSON_THROW(parse_error::create(101, m_lexer.get_position(), error_msg)); } private: @@ -11629,6 +13072,10 @@ basic_json_parser_74: typename lexer::token_type last_token = lexer::token_type::uninitialized; /// the lexer lexer m_lexer; + /// whether a syntax error occurred + bool errored = false; + /// possible reason for the syntax error + typename lexer::token_type expected = lexer::token_type::uninitialized; }; public: @@ -11659,12 +13106,12 @@ basic_json_parser_74: empty string is assumed which references the whole JSON value - @throw std::domain_error if reference token is nonempty and does not - begin with a slash (`/`); example: `"JSON pointer must be empty or - begin with /"` - @throw std::domain_error if a tilde (`~`) is not followed by `0` - (representing `~`) or `1` (representing `/`); example: `"escape error: - ~ must be followed with 0 or 1"` + @throw parse_error.107 if the given JSON pointer @a s is nonempty and + does not begin with a slash (`/`); see example below + + @throw parse_error.108 if a tilde (`~`) in the given JSON pointer @a s + is not followed by `0` (representing `~`) or `1` (representing `/`); + see example below @liveexample{The example shows the construction several valid JSON pointers as well as the exceptional behavior.,json_pointer} @@ -11707,12 +13154,15 @@ basic_json_parser_74: } private: - /// remove and return last reference pointer + /*! + @brief remove and return last reference pointer + @throw out_of_range.405 if JSON pointer has no parent + */ std::string pop_back() { if (is_root()) { - JSON_THROW(std::domain_error("JSON pointer has no parent")); + JSON_THROW(out_of_range::create(405, "JSON pointer has no parent")); } auto last = reference_tokens.back(); @@ -11730,7 +13180,7 @@ basic_json_parser_74: { if (is_root()) { - JSON_THROW(std::domain_error("JSON pointer has no parent")); + JSON_THROW(out_of_range::create(405, "JSON pointer has no parent")); } json_pointer result = *this; @@ -11742,6 +13192,9 @@ basic_json_parser_74: @brief create and return a reference to the pointed to value @complexity Linear in the number of reference tokens. + + @throw parse_error.109 if array index is not a number + @throw type_error.313 if value cannot be unflattened */ reference get_and_create(reference j) const { @@ -11778,7 +13231,14 @@ basic_json_parser_74: case value_t::array: { // create an entry in the array - result = &result->operator[](static_cast(std::stoi(reference_token))); + JSON_TRY + { + result = &result->operator[](static_cast(std::stoi(reference_token))); + } + JSON_CATCH (std::invalid_argument&) + { + JSON_THROW(parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } break; } @@ -11791,7 +13251,7 @@ basic_json_parser_74: */ default: { - JSON_THROW(std::domain_error("invalid value to unflatten")); + JSON_THROW(type_error::create(313, "invalid value to unflatten")); } } } @@ -11814,9 +13274,9 @@ basic_json_parser_74: @complexity Linear in the length of the JSON pointer. - @throw std::out_of_range if the JSON pointer can not be resolved - @throw std::domain_error if an array index begins with '0' - @throw std::invalid_argument if an array index was not a number + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.404 if the JSON pointer can not be resolved */ reference get_unchecked(pointer ptr) const { @@ -11830,7 +13290,7 @@ basic_json_parser_74: reference_token.end(), [](const char x) { - return std::isdigit(x); + return (x >= '0' and x <= '9'); }); // change value to array for numbers or "-" or to object @@ -11859,7 +13319,7 @@ basic_json_parser_74: // error condition (cf. RFC 6901, Sect. 4) if (reference_token.size() > 1 and reference_token[0] == '0') { - JSON_THROW(std::domain_error("array index must not begin with '0'")); + JSON_THROW(parse_error::create(106, 0, "array index '" + reference_token + "' must not begin with '0'")); } if (reference_token == "-") @@ -11870,14 +13330,21 @@ basic_json_parser_74: else { // convert array index to number; unchecked access - ptr = &ptr->operator[](static_cast(std::stoi(reference_token))); + JSON_TRY + { + ptr = &ptr->operator[](static_cast(std::stoi(reference_token))); + } + JSON_CATCH (std::invalid_argument&) + { + JSON_THROW(parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } } break; } default: { - JSON_THROW(std::out_of_range("unresolved reference token '" + reference_token + "'")); + JSON_THROW(out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); } } } @@ -11885,6 +13352,12 @@ basic_json_parser_74: return *ptr; } + /*! + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.402 if the array index '-' is used + @throw out_of_range.404 if the JSON pointer can not be resolved + */ reference get_checked(pointer ptr) const { for (const auto& reference_token : reference_tokens) @@ -11903,25 +13376,32 @@ basic_json_parser_74: if (reference_token == "-") { // "-" always fails the range check - JSON_THROW(std::out_of_range("array index '-' (" + - std::to_string(ptr->m_value.array->size()) + - ") is out of range")); + JSON_THROW(out_of_range::create(402, "array index '-' (" + + std::to_string(ptr->m_value.array->size()) + + ") is out of range")); } // error condition (cf. RFC 6901, Sect. 4) if (reference_token.size() > 1 and reference_token[0] == '0') { - JSON_THROW(std::domain_error("array index must not begin with '0'")); + JSON_THROW(parse_error::create(106, 0, "array index '" + reference_token + "' must not begin with '0'")); } // note: at performs range check - ptr = &ptr->at(static_cast(std::stoi(reference_token))); + JSON_TRY + { + ptr = &ptr->at(static_cast(std::stoi(reference_token))); + } + JSON_CATCH (std::invalid_argument&) + { + JSON_THROW(parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } break; } default: { - JSON_THROW(std::out_of_range("unresolved reference token '" + reference_token + "'")); + JSON_THROW(out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); } } } @@ -11936,6 +13416,11 @@ basic_json_parser_74: @return const reference to the JSON value pointed to by the JSON pointer + + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.402 if the array index '-' is used + @throw out_of_range.404 if the JSON pointer can not be resolved */ const_reference get_unchecked(const_pointer ptr) const { @@ -11955,25 +13440,32 @@ basic_json_parser_74: if (reference_token == "-") { // "-" cannot be used for const access - JSON_THROW(std::out_of_range("array index '-' (" + - std::to_string(ptr->m_value.array->size()) + - ") is out of range")); + JSON_THROW(out_of_range::create(402, "array index '-' (" + + std::to_string(ptr->m_value.array->size()) + + ") is out of range")); } // error condition (cf. RFC 6901, Sect. 4) if (reference_token.size() > 1 and reference_token[0] == '0') { - JSON_THROW(std::domain_error("array index must not begin with '0'")); + JSON_THROW(parse_error::create(106, 0, "array index '" + reference_token + "' must not begin with '0'")); } // use unchecked array access - ptr = &ptr->operator[](static_cast(std::stoi(reference_token))); + JSON_TRY + { + ptr = &ptr->operator[](static_cast(std::stoi(reference_token))); + } + JSON_CATCH (std::invalid_argument&) + { + JSON_THROW(parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } break; } default: { - JSON_THROW(std::out_of_range("unresolved reference token '" + reference_token + "'")); + JSON_THROW(out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); } } } @@ -11981,6 +13473,12 @@ basic_json_parser_74: return *ptr; } + /*! + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.402 if the array index '-' is used + @throw out_of_range.404 if the JSON pointer can not be resolved + */ const_reference get_checked(const_pointer ptr) const { for (const auto& reference_token : reference_tokens) @@ -11999,25 +13497,32 @@ basic_json_parser_74: if (reference_token == "-") { // "-" always fails the range check - JSON_THROW(std::out_of_range("array index '-' (" + - std::to_string(ptr->m_value.array->size()) + - ") is out of range")); + JSON_THROW(out_of_range::create(402, "array index '-' (" + + std::to_string(ptr->m_value.array->size()) + + ") is out of range")); } // error condition (cf. RFC 6901, Sect. 4) if (reference_token.size() > 1 and reference_token[0] == '0') { - JSON_THROW(std::domain_error("array index must not begin with '0'")); + JSON_THROW(parse_error::create(106, 0, "array index '" + reference_token + "' must not begin with '0'")); } // note: at performs range check - ptr = &ptr->at(static_cast(std::stoi(reference_token))); + JSON_TRY + { + ptr = &ptr->at(static_cast(std::stoi(reference_token))); + } + JSON_CATCH (std::invalid_argument&) + { + JSON_THROW(parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } break; } default: { - JSON_THROW(std::out_of_range("unresolved reference token '" + reference_token + "'")); + JSON_THROW(out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); } } } @@ -12025,7 +13530,15 @@ basic_json_parser_74: return *ptr; } - /// split the string input to reference tokens + /*! + @brief split the string input to reference tokens + + @note This function is only called by the json_pointer constructor. + All exceptions below are documented there. + + @throw parse_error.107 if the pointer is not empty or begins with '/' + @throw parse_error.108 if character '~' is not followed by '0' or '1' + */ static std::vector split(const std::string& reference_string) { std::vector result; @@ -12039,7 +13552,7 @@ basic_json_parser_74: // check if nonempty reference string begins with slash if (reference_string[0] != '/') { - JSON_THROW(std::domain_error("JSON pointer must be empty or begin with '/'")); + JSON_THROW(parse_error::create(107, 1, "JSON pointer must be empty or begin with '/' - was: '" + reference_string + "'")); } // extract the reference tokens: @@ -12074,7 +13587,7 @@ basic_json_parser_74: (reference_token[pos + 1] != '0' and reference_token[pos + 1] != '1')) { - JSON_THROW(std::domain_error("escape error: '~' must be followed with '0' or '1'")); + JSON_THROW(parse_error::create(108, 0, "escape character '~' must be followed with '0' or '1'")); } } @@ -12086,7 +13599,6 @@ basic_json_parser_74: return result; } - private: /*! @brief replace all occurrences of a substring by another string @@ -12095,7 +13607,8 @@ basic_json_parser_74: @param[in] f the substring to replace with @a t @param[in] t the string to replace @a f - @pre The search string @a f must not be empty. + @pre The search string @a f must not be empty. **This precondition is + enforced with an assertion.** @since version 2.0.0 */ @@ -12195,12 +13708,17 @@ basic_json_parser_74: @param[in] value flattened JSON @return unflattened JSON + + @throw parse_error.109 if array index is not a number + @throw type_error.314 if value is not an object + @throw type_error.315 if object values are not primitive + @throw type_error.313 if value cannot be unflattened */ static basic_json unflatten(const basic_json& value) { if (not value.is_object()) { - JSON_THROW(std::domain_error("only objects can be unflattened")); + JSON_THROW(type_error::create(314, "only objects can be unflattened")); } basic_json result; @@ -12210,7 +13728,7 @@ basic_json_parser_74: { if (not element.second.is_primitive()) { - JSON_THROW(std::domain_error("values in object must be primitive")); + JSON_THROW(type_error::create(315, "values in object must be primitive")); } // assign value to reference pointed to by JSON pointer; Note @@ -12224,7 +13742,6 @@ basic_json_parser_74: return result; } - private: friend bool operator==(json_pointer const& lhs, json_pointer const& rhs) noexcept { @@ -12273,9 +13790,9 @@ basic_json_parser_74: @complexity Constant. - @throw std::out_of_range if the JSON pointer can not be resolved - @throw std::domain_error if an array index begins with '0' - @throw std::invalid_argument if an array index was not a number + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.404 if the JSON pointer can not be resolved @liveexample{The behavior is shown in the example.,operatorjson_pointer} @@ -12300,9 +13817,10 @@ basic_json_parser_74: @complexity Constant. - @throw std::out_of_range if the JSON pointer can not be resolved - @throw std::domain_error if an array index begins with '0' - @throw std::invalid_argument if an array index was not a number + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.402 if the array index '-' is used + @throw out_of_range.404 if the JSON pointer can not be resolved @liveexample{The behavior is shown in the example.,operatorjson_pointer_const} @@ -12323,15 +13841,30 @@ basic_json_parser_74: @return reference to the element pointed to by @a ptr + @throw parse_error.106 if an array index in the passed JSON pointer @a ptr + begins with '0'. See example below. + + @throw parse_error.109 if an array index in the passed JSON pointer @a ptr + is not a number. See example below. + + @throw out_of_range.401 if an array index in the passed JSON pointer @a ptr + is out of range. See example below. + + @throw out_of_range.402 if the array index '-' is used in the passed JSON + pointer @a ptr. As `at` provides checked access (and no elements are + implicitly inserted), the index '-' is always invalid. See example below. + + @throw out_of_range.404 if the JSON pointer @a ptr can not be resolved. + See example below. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. + @complexity Constant. - @throw std::out_of_range if the JSON pointer can not be resolved - @throw std::domain_error if an array index begins with '0' - @throw std::invalid_argument if an array index was not a number + @since version 2.0.0 @liveexample{The behavior is shown in the example.,at_json_pointer} - - @since version 2.0.0 */ reference at(const json_pointer& ptr) { @@ -12348,15 +13881,30 @@ basic_json_parser_74: @return reference to the element pointed to by @a ptr + @throw parse_error.106 if an array index in the passed JSON pointer @a ptr + begins with '0'. See example below. + + @throw parse_error.109 if an array index in the passed JSON pointer @a ptr + is not a number. See example below. + + @throw out_of_range.401 if an array index in the passed JSON pointer @a ptr + is out of range. See example below. + + @throw out_of_range.402 if the array index '-' is used in the passed JSON + pointer @a ptr. As `at` provides checked access (and no elements are + implicitly inserted), the index '-' is always invalid. See example below. + + @throw out_of_range.404 if the JSON pointer @a ptr can not be resolved. + See example below. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. + @complexity Constant. - @throw std::out_of_range if the JSON pointer can not be resolved - @throw std::domain_error if an array index begins with '0' - @throw std::invalid_argument if an array index was not a number + @since version 2.0.0 @liveexample{The behavior is shown in the example.,at_json_pointer_const} - - @since version 2.0.0 */ const_reference at(const json_pointer& ptr) const { @@ -12412,6 +13960,9 @@ basic_json_parser_74: @complexity Linear in the size the JSON value. + @throw type_error.314 if value is not an object + @throw type_error.315 if object values are not primitve + @liveexample{The following code shows how a flattened JSON object is unflattened into the original nested JSON object.,unflatten} @@ -12449,12 +14000,23 @@ basic_json_parser_74: any case, the original value is not changed: the patch is applied to a copy of the value. - @throw std::out_of_range if a JSON pointer inside the patch could not - be resolved successfully in the current JSON value; example: `"key baz - not found"` - @throw invalid_argument if the JSON patch is malformed (e.g., mandatory + @throw parse_error.104 if the JSON patch does not consist of an array of + objects + + @throw parse_error.105 if the JSON patch is malformed (e.g., mandatory attributes are missing); example: `"operation add must have member path"` + @throw out_of_range.401 if an array index is out of range. + + @throw out_of_range.403 if a JSON pointer inside the patch could not be + resolved successfully in the current JSON value; example: `"key baz not + found"` + + @throw out_of_range.405 if JSON pointer has no parent ("add", "remove", + "move") + + @throw other_error.501 if "test" operation was unsuccessful + @complexity Linear in the size of the JSON value and the length of the JSON patch. As usually only a fraction of the JSON value is affected by the patch, the complexity can usually be neglected. @@ -12477,7 +14039,7 @@ basic_json_parser_74: // the valid JSON Patch operations enum class patch_operations {add, remove, replace, move, copy, test, invalid}; - const auto get_op = [](const std::string op) + const auto get_op = [](const std::string & op) { if (op == "add") { @@ -12551,7 +14113,7 @@ basic_json_parser_74: if (static_cast(idx) > parent.size()) { // avoid undefined behavior - JSON_THROW(std::out_of_range("array index " + std::to_string(idx) + " is out of range")); + JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); } else { @@ -12589,7 +14151,7 @@ basic_json_parser_74: } else { - JSON_THROW(std::out_of_range("key '" + last_path + "' not found")); + JSON_THROW(out_of_range::create(403, "key '" + last_path + "' not found")); } } else if (parent.is_array()) @@ -12599,11 +14161,10 @@ basic_json_parser_74: } }; - // type check + // type check: top level value must be an array if (not json_patch.is_array()) { - // a JSON patch must be an array of objects - JSON_THROW(std::invalid_argument("JSON patch must be an array of objects")); + JSON_THROW(parse_error::create(104, 0, "JSON patch must be an array of objects")); } // iterate and apply the operations @@ -12623,23 +14184,23 @@ basic_json_parser_74: // check if desired value is present if (it == val.m_value.object->end()) { - JSON_THROW(std::invalid_argument(error_msg + " must have member '" + member + "'")); + JSON_THROW(parse_error::create(105, 0, error_msg + " must have member '" + member + "'")); } // check if result is of type string if (string_type and not it->second.is_string()) { - JSON_THROW(std::invalid_argument(error_msg + " must have string member '" + member + "'")); + JSON_THROW(parse_error::create(105, 0, error_msg + " must have string member '" + member + "'")); } // no error: return value return it->second; }; - // type check + // type check: every element of the array must be an object if (not val.is_object()) { - JSON_THROW(std::invalid_argument("JSON patch must be an array of objects")); + JSON_THROW(parse_error::create(104, 0, "JSON patch must be an array of objects")); } // collect mandatory members @@ -12687,7 +14248,7 @@ basic_json_parser_74: case patch_operations::copy: { - const std::string from_path = get_value("copy", "from", true);; + const std::string from_path = get_value("copy", "from", true); const json_pointer from_ptr(from_path); // the "from" location must exist - use at() @@ -12704,7 +14265,7 @@ basic_json_parser_74: // the "path" location must exist - use at() success = (result.at(ptr) == get_value("test", "value", false)); } - JSON_CATCH (std::out_of_range&) + JSON_CATCH (out_of_range&) { // ignore out of range errors: success remains false } @@ -12712,7 +14273,7 @@ basic_json_parser_74: // throw an exception if test fails if (not success) { - JSON_THROW(std::domain_error("unsuccessful: " + val.dump())); + JSON_THROW(other_error::create(501, "unsuccessful: " + val.dump())); } break; @@ -12722,7 +14283,7 @@ basic_json_parser_74: { // op must be "add", "remove", "replace", "move", "copy", or // "test" - JSON_THROW(std::invalid_argument("operation value '" + op + "' is invalid")); + JSON_THROW(parse_error::create(105, 0, "operation value '" + op + "' is invalid")); } } } @@ -12951,6 +14512,22 @@ struct hash return h(j.dump()); } }; + +/// specialization for std::less +template <> +struct less<::nlohmann::detail::value_t> +{ + /*! + @brief compare two value_t enum values + @since version 3.0.0 + */ + bool operator()(nlohmann::detail::value_t lhs, + nlohmann::detail::value_t rhs) const noexcept + { + return nlohmann::detail::operator<(lhs, rhs); + } +}; + } // namespace std /*! @@ -12993,11 +14570,16 @@ inline nlohmann::json::json_pointer operator "" _json_pointer(const char* s, std #if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) #pragma GCC diagnostic pop #endif +#if defined(__clang__) + #pragma GCC diagnostic pop +#endif // clean up #undef JSON_CATCH -#undef JSON_DEPRECATED #undef JSON_THROW #undef JSON_TRY +#undef JSON_LIKELY +#undef JSON_UNLIKELY +#undef JSON_DEPRECATED #endif diff --git a/lib/serialisation/BaseIO.h b/lib/serialisation/BaseIO.h index ec3eefa0..24e1cec7 100644 --- a/lib/serialisation/BaseIO.h +++ b/lib/serialisation/BaseIO.h @@ -86,7 +86,7 @@ namespace Grid { or element::is_number; }; - // Vector flatening utility class //////////////////////////////////////////// + // Vector flattening utility class //////////////////////////////////////////// // Class to flatten a multidimensional std::vector template class Flatten diff --git a/lib/serialisation/JSON_IO.cc b/lib/serialisation/JSON_IO.cc index 0a3968e9..ae37a4e5 100644 --- a/lib/serialisation/JSON_IO.cc +++ b/lib/serialisation/JSON_IO.cc @@ -42,6 +42,7 @@ JSONWriter::~JSONWriter(void) // write prettified JSON to file std::ofstream os(fileName_); + std::cout << "JSONWriter::~JSONWriter" << std::endl; os << std::setw(2) << json::parse(ss_.str()) << std::endl; } @@ -56,6 +57,7 @@ void JSONWriter::push(const string &s) void JSONWriter::pop(void) { + std::cout << "JSONWriter::pop" << std::endl; delete_comma(); ss_ << "},"; } diff --git a/lib/serialisation/JSON_IO.h b/lib/serialisation/JSON_IO.h index fc5e9631..9c247d27 100644 --- a/lib/serialisation/JSON_IO.h +++ b/lib/serialisation/JSON_IO.h @@ -106,7 +106,7 @@ namespace Grid template void JSONWriter::writeDefault(const std::string &s, const U &x) { - //std::cout << "JSONReader::writeDefault(U) : " << s << std::endl; + //std::cout << "JSONWriter::writeDefault(U) : " << s << std::endl; std::ostringstream os; os << std::boolalpha << x; if (s.size()) @@ -115,10 +115,25 @@ namespace Grid ss_ << os.str() << " ," ; } + // specialize for string + template <> + void JSONWriter::writeDefault(const std::string &s, const std::string &x) + { + //std::cout << "JSONWriter::writeDefault(U) : " << s << std::endl; + std::ostringstream os; + os << std::boolalpha << x; + if (s.size()) + ss_ << "\""<< s << "\" : \"" << os.str() << "\" ," ; + else + ss_ << os.str() << " ," ; + } + + + template void JSONWriter::writeDefault(const std::string &s, const std::complex &x) { - //std::cout << "JSONReader::writeDefault(complex) : " << s << std::endl; + //std::cout << "JSONWriter::writeDefault(complex) : " << s << " " << x << std::endl; std::ostringstream os; os << "["<< std::boolalpha << x.real() << ", " << x.imag() << "]"; if (s.size()) @@ -130,7 +145,7 @@ namespace Grid template void JSONWriter::writeDefault(const std::string &s, const std::vector &x) { - //std::cout << "JSONReader::writeDefault(vec U) : " << s << std::endl; + //std::cout << "JSONWriter::writeDefault(vec U) : " << s << std::endl; if (s.size()) ss_ << " \""< void JSONWriter::writeDefault(const std::string &s, const char(&x)[N]){ - //std::cout << "JSONReader::writeDefault(char U) : " << s << std::endl; + //std::cout << "JSONWriter::writeDefault(char U) : " << s << " " << x << std::endl; if (s.size()) - ss_ << "\""<< s << "\" : \"" << x << "\" ," ; + ss_ << "\""<< s << "\" : \"" << x << "\" ," ; else - ss_ << "\"" << x << "\" ," ; + ss_ << "\"" << x << "\" ," ; } // Reader template implementation //////////////////////////////////////////// @@ -177,7 +192,7 @@ namespace Grid void JSONReader::readDefault(const std::string &s, std::complex &output) { U tmp1, tmp2; - //std::cout << "JSONReader::readDefault( complex U) : " << s << " : "<< jcur_ << std::endl; + //std::cout << "JSONReader::readDefault(complex U) : " << s << " : "<< jcur_ << std::endl; json j = jcur_; json::iterator it = j.begin(); jcur_ = *it; diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index 3988784c..8462fc5d 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -1,6 +1,6 @@ /************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid + Grid physics library, www.github.com/paboyle/Grid Source file: ./tests/Test_serialisation.cc @@ -34,7 +34,7 @@ using namespace Grid; using namespace Grid::QCD; GRID_SERIALIZABLE_ENUM(myenum, undef, red, 1, blue, 2, green, 3); - + class myclass: Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(myclass, @@ -79,14 +79,14 @@ void ioTest(const std::string &filename, const O &object, const std::string &nam // writer needs to be destroyed so that writing physically happens { W writer(filename); - + write(writer, "testobject", object); } - + R reader(filename); O buf; bool good; - + read(reader, "testobject", buf); good = (object == buf); std::cout << name << " IO test: " << (good ? "success" : "failure"); @@ -98,7 +98,7 @@ int main(int argc,char **argv) { std::cout << "==== basic IO" << std::endl; XmlWriter WR("bother.xml"); - + // test basic type writing std::cout << "-- basic writing to 'bother.xml'..." << std::endl; push(WR,"BasicTypes"); @@ -112,12 +112,12 @@ int main(int argc,char **argv) write(WR,"d",d); write(WR,"b",b); pop(WR); - + // test serializable class writing myclass obj(1234); // non-trivial constructor std::vector vec; std::pair pair; - + std::cout << "-- serialisable class writing to 'bother.xml'..." << std::endl; write(WR,"obj",obj); WR.write("obj2", obj); @@ -132,11 +132,11 @@ int main(int argc,char **argv) std::cout << "-- serialisable class comparison:" << std::endl; std::cout << "vec[0] == obj: " << ((vec[0] == obj) ? "true" : "false") << std::endl; std::cout << "vec[1] == obj: " << ((vec[1] == obj) ? "true" : "false") << std::endl; - + write(WR, "objpair", pair); std::cout << "-- pair writing to std::cout:" << std::endl; std::cout << pair << std::endl; - + // read tests std::cout << "\n==== IO self-consistency tests" << std::endl; //// XML @@ -154,7 +154,7 @@ int main(int argc,char **argv) //// text ioTest("iotest.json", obj, "JSON (object) "); ioTest("iotest.json", vec, "JSON (vector of objects)"); - ioTest("iotest.json", pair, "JSON (pair of objects)"); + //ioTest("iotest.json", pair, "JSON (pair of objects)"); //// HDF5 #undef HAVE_HDF5 @@ -163,13 +163,13 @@ int main(int argc,char **argv) ioTest("iotest.h5", vec, "HDF5 (vector of objects)"); ioTest("iotest.h5", pair, "HDF5 (pair of objects)"); #endif - + std::cout << "\n==== vector flattening/reconstruction" << std::endl; typedef std::vector>> vec3d; - + vec3d dv, buf; double d = 0.; - + dv.resize(4); for (auto &v1: dv) { @@ -185,66 +185,71 @@ int main(int argc,char **argv) } std::cout << "original 3D vector:" << std::endl; std::cout << dv << std::endl; - + Flatten flatdv(dv); - + std::cout << "\ndimensions:" << std::endl; std::cout << flatdv.getDim() << std::endl; std::cout << "\nflattened vector:" << std::endl; std::cout << flatdv.getFlatVector() << std::endl; - + Reconstruct rec(flatdv.getFlatVector(), flatdv.getDim()); std::cout << "\nreconstructed vector:" << std::endl; std::cout << flatdv.getVector() << std::endl; std::cout << std::endl; -// std::cout << ".:::::: Testing JSON classes "<< std::endl; -// -// -// { -// JSONWriter JW("bother.json"); -// -// // test basic type writing -// push(JW,"BasicTypes"); -// write(JW,std::string("i16"),i16); -// write(JW,"u16",u16); -// write(JW,"i32",i32); -// write(JW,"u32",u32); -// write(JW,"i64",i64); -// write(JW,"u64",u64); -// write(JW,"f",f); -// write(JW,"d",d); -// write(JW,"b",b); -// pop(JW); -// -// // test serializable class writing -// myclass obj(1234); // non-trivial constructor -// std::cout << "-- serialisable class writing to 'bother.json'..." << std::endl; -// write(JW,"obj",obj); -// JW.write("obj2", obj); -// -// std::cout << obj << std::endl; -// -// std::vector vec; -// vec.push_back(myclass(1234)); -// vec.push_back(myclass(5678)); -// vec.push_back(myclass(3838)); -// write(JW, "objvec", vec); -// -// } -// -// { -// JSONReader RD("bother.json"); -// myclass jcopy1; -// std::vector jveccopy1; -// read(RD,"obj",jcopy1); -// read(RD,"objvec", jveccopy1); -// std::cout << "Loaded (JSON) -----------------" << std::endl; -// std::cout << jcopy1 << std::endl << jveccopy1 << std::endl; -// } - -/* + std::cout << ".:::::: Testing JSON classes "<< std::endl; + + + { + JSONWriter JW("bother.json"); + + // test basic type writing + myenum a = myenum::red; + push(JW,"BasicTypes"); + write(JW,std::string("i16"),i16); + write(JW,"myenum",a); + write(JW,"u16",u16); + write(JW,"i32",i32); + write(JW,"u32",u32); + write(JW,"i64",i64); + write(JW,"u64",u64); + write(JW,"f",f); + write(JW,"d",d); + write(JW,"b",b); + pop(JW); + + + // test serializable class writing + myclass obj(1234); // non-trivial constructor + std::cout << obj << std::endl; + std::cout << "-- serialisable class writing to 'bother.json'..." << std::endl; + write(JW,"obj",obj); + JW.write("obj2", obj); + + + std::vector vec; + vec.push_back(myclass(1234)); + vec.push_back(myclass(5678)); + vec.push_back(myclass(3838)); + write(JW, "objvec", vec); + + } + + + { + JSONReader RD("bother.json"); + myclass jcopy1; + std::vector jveccopy1; + read(RD,"obj",jcopy1); + read(RD,"objvec", jveccopy1); + std::cout << "Loaded (JSON) -----------------" << std::endl; + std::cout << jcopy1 << std::endl << jveccopy1 << std::endl; + } + + +/* // This is still work in progress { // Testing the next element function From 184af5bd05267bd54e3322a04ec431a84164b708 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 12 Jul 2017 14:44:53 +0100 Subject: [PATCH 068/377] Added support for std::pair in the JSON serialiser --- lib/serialisation/JSON_IO.cc | 11 +++++---- lib/serialisation/JSON_IO.h | 45 ++++++++++++++++++++++++++++++++-- tests/IO/Test_serialisation.cc | 7 +++--- 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/lib/serialisation/JSON_IO.cc b/lib/serialisation/JSON_IO.cc index ae37a4e5..303dfcb7 100644 --- a/lib/serialisation/JSON_IO.cc +++ b/lib/serialisation/JSON_IO.cc @@ -42,7 +42,7 @@ JSONWriter::~JSONWriter(void) // write prettified JSON to file std::ofstream os(fileName_); - std::cout << "JSONWriter::~JSONWriter" << std::endl; + //std::cout << "JSONWriter::~JSONWriter" << std::endl; os << std::setw(2) << json::parse(ss_.str()) << std::endl; } @@ -57,7 +57,7 @@ void JSONWriter::push(const string &s) void JSONWriter::pop(void) { - std::cout << "JSONWriter::pop" << std::endl; + //std::cout << "JSONWriter::pop" << std::endl; delete_comma(); ss_ << "},"; } @@ -69,6 +69,7 @@ void JSONWriter::delete_comma() ss_.str(dlast); } +/* // here we are hitting a g++ bug (Bug 56480) // compiles fine with clang // have to wrap in the Grid namespace @@ -76,8 +77,7 @@ void JSONWriter::delete_comma() namespace Grid { template<> - void JSONWriter::writeDefault(const std::string &s, - const std::string &x) + void JSONWriter::writeDefault(const std::string &s, const std::string &x) { if (s.size()) ss_ << "\""<< s << "\" : \"" << x << "\" ," ; @@ -85,7 +85,7 @@ namespace Grid ss_ << "\"" << x << "\" ," ; } }// namespace Grid - +*/ // Reader implementation /////////////////////////////////////////////////////// JSONReader::JSONReader(const string &fileName) @@ -140,6 +140,7 @@ void JSONReader::pop(void) bool JSONReader::nextElement(const std::string &s) { + // Work in progress // JSON dictionaries do not support multiple names // Same name objects must be packed in vectors ++it_; diff --git a/lib/serialisation/JSON_IO.h b/lib/serialisation/JSON_IO.h index 9c247d27..a6d54fb3 100644 --- a/lib/serialisation/JSON_IO.h +++ b/lib/serialisation/JSON_IO.h @@ -58,10 +58,13 @@ namespace Grid void writeDefault(const std::string &s, const std::complex &x); template void writeDefault(const std::string &s, const std::vector &x); + template + void writeDefault(const std::string &s, const std::pair &x); template void writeDefault(const std::string &s, const char(&x)[N]); + private: void delete_comma(); std::string fileName_; @@ -82,6 +85,8 @@ namespace Grid void readDefault(const std::string &s, std::complex &output); template void readDefault(const std::string &s, std::vector &output); + template + void readDefault(const std::string &s, std::pair &output); private: json jobject_; // main object json jcur_; // current json object @@ -106,7 +111,7 @@ namespace Grid template void JSONWriter::writeDefault(const std::string &s, const U &x) { - //std::cout << "JSONWriter::writeDefault(U) : " << s << std::endl; + //std::cout << "JSONWriter::writeDefault(U) : " << s << " " << x < void JSONWriter::writeDefault(const std::string &s, const std::string &x) { - //std::cout << "JSONWriter::writeDefault(U) : " << s << std::endl; + //std::cout << "JSONWriter::writeDefault(string) : " << s << std::endl; std::ostringstream os; os << std::boolalpha << x; if (s.size()) @@ -142,6 +147,18 @@ namespace Grid ss_ << os.str() << " ," ; } + template + void JSONWriter::writeDefault(const std::string &s, const std::pair &x) + { + //std::cout << "JSONWriter::writeDefault(pair) : " << s << " " << x << std::endl; + std::ostringstream os; + os << "["<< std::boolalpha << "\""<< x.first << "\" , \"" << x.second << "\" ]"; + if (s.size()) + ss_ << "\""<< s << "\" : " << os.str() << " ," ; + else + ss_ << os.str() << " ," ; + } + template void JSONWriter::writeDefault(const std::string &s, const std::vector &x) { @@ -188,6 +205,30 @@ namespace Grid } + // Reader template implementation //////////////////////////////////////////// + template + void JSONReader::readDefault(const std::string &s, std::pair &output) + { + U first; + P second; + json j; + if (s.size()){ + //std::cout << "JSONReader::readDefault(pair) : " << s << " | "<< jcur_[s] << std::endl; + j = jcur_[s]; + } else { + j = jcur_; + } + json::iterator it = j.begin(); + jcur_ = *it; + read("", first); + it++; + jcur_ = *it; + read("", second); + output = std::pair(first,second); + } + + + template void JSONReader::readDefault(const std::string &s, std::complex &output) { diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index 8462fc5d..82638ad9 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -29,7 +29,6 @@ Author: Peter Boyle /* END LEGAL */ #include - using namespace Grid; using namespace Grid::QCD; @@ -152,9 +151,9 @@ int main(int argc,char **argv) ioTest("iotest.dat", vec, "text (vector of objects)"); ioTest("iotest.dat", pair, "text (pair of objects)"); //// text - ioTest("iotest.json", obj, "JSON (object) "); - ioTest("iotest.json", vec, "JSON (vector of objects)"); - //ioTest("iotest.json", pair, "JSON (pair of objects)"); + ioTest("iotest.json", obj, "JSON (object) "); + ioTest("iotest.json", vec, "JSON (vector of objects)"); + ioTest("iotest.json", pair, "JSON (pair of objects)"); //// HDF5 #undef HAVE_HDF5 From f64fb7bd77a775e4f9cde55a19cd6a8446af5d89 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 12 Jul 2017 14:55:42 +0100 Subject: [PATCH 069/377] Fix gcc error on JSON compilation --- lib/serialisation/JSON_IO.cc | 11 +++++++---- lib/serialisation/JSON_IO.h | 3 ++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/serialisation/JSON_IO.cc b/lib/serialisation/JSON_IO.cc index 303dfcb7..23a78b3e 100644 --- a/lib/serialisation/JSON_IO.cc +++ b/lib/serialisation/JSON_IO.cc @@ -69,7 +69,7 @@ void JSONWriter::delete_comma() ss_.str(dlast); } -/* + // here we are hitting a g++ bug (Bug 56480) // compiles fine with clang // have to wrap in the Grid namespace @@ -79,13 +79,16 @@ namespace Grid template<> void JSONWriter::writeDefault(const std::string &s, const std::string &x) { + //std::cout << "JSONWriter::writeDefault(string) : " << s << std::endl; + std::ostringstream os; + os << std::boolalpha << x; if (s.size()) - ss_ << "\""<< s << "\" : \"" << x << "\" ," ; + ss_ << "\""<< s << "\" : \"" << os.str() << "\" ," ; else - ss_ << "\"" << x << "\" ," ; + ss_ << os.str() << " ," ; } }// namespace Grid -*/ + // Reader implementation /////////////////////////////////////////////////////// JSONReader::JSONReader(const string &fileName) diff --git a/lib/serialisation/JSON_IO.h b/lib/serialisation/JSON_IO.h index a6d54fb3..c82648fc 100644 --- a/lib/serialisation/JSON_IO.h +++ b/lib/serialisation/JSON_IO.h @@ -120,6 +120,7 @@ namespace Grid ss_ << os.str() << " ," ; } +/* // specialize for string template <> void JSONWriter::writeDefault(const std::string &s, const std::string &x) @@ -132,7 +133,7 @@ namespace Grid else ss_ << os.str() << " ," ; } - +*/ template From 852ade029a64c6376d391205e607ae655c6d1c80 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Sun, 16 Jul 2017 13:41:47 +0100 Subject: [PATCH 070/377] Hadrons: Added module to sink a propagator --- extras/Hadrons/Modules.hpp | 1 + extras/Hadrons/Modules/MSink/Smear.hpp | 99 ++++++++++++++++++++++++++ extras/Hadrons/modules.inc | 1 + 3 files changed, 101 insertions(+) create mode 100644 extras/Hadrons/Modules/MSink/Smear.hpp diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 6e1b8823..d0d0d80d 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/extras/Hadrons/Modules/MSink/Smear.hpp b/extras/Hadrons/Modules/MSink/Smear.hpp new file mode 100644 index 00000000..9327001f --- /dev/null +++ b/extras/Hadrons/Modules/MSink/Smear.hpp @@ -0,0 +1,99 @@ +#ifndef Hadrons_MSink_Smear_hpp_ +#define Hadrons_MSink_Smear_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * Smear * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MSink) + +class SmearPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(SmearPar, + std::string, q, + std::string, sink); +}; + +template +class TSmear: public Module +{ +public: + FERM_TYPE_ALIASES(FImpl,); + SINK_TYPE_ALIASES(); +public: + // constructor + TSmear(const std::string name); + // destructor + virtual ~TSmear(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(Smear, TSmear, MSink); + +/****************************************************************************** + * TSmear implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TSmear::TSmear(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TSmear::getInput(void) +{ + std::vector in = {par().q, par().sink}; + + return in; +} + +template +std::vector TSmear::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TSmear::setup(void) +{ + unsigned int nt = env().getDim(Tp); + unsigned int size = nt * sizeof(SitePropagator); + env().registerObject(getName(), size); +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TSmear::execute(void) +{ + LOG(Message) << "Sink smearing propagator '" << par().q + << "' using sink function '" << par().sink << "'." + << std::endl; + + SinkFn &sink = *env().template getObject(par().sink); + PropagatorField &q = *env().template getObject(par().q); + SlicedPropagator *out = new SlicedPropagator(env().getDim(Tp)); + *out = sink(q); + env().setObject(getName(), out); +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MSink_Smear_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 91d0bbe1..fbbb2eb9 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -31,6 +31,7 @@ modules_hpp =\ Modules/MScalar/FreeProp.hpp \ Modules/MScalar/Scalar.hpp \ Modules/MSink/Point.hpp \ + Modules/MSink/Smear.hpp \ Modules/MSolver/RBPrecCG.hpp \ Modules/MSource/Point.hpp \ Modules/MSource/SeqConserved.hpp \ From 6293d438cd6ff2201300298ba29b985962991202 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Sun, 16 Jul 2017 13:43:25 +0100 Subject: [PATCH 071/377] Hadrons: sink smearing compatibility for 3pt contraction modules. --- .../Hadrons/Modules/MContraction/Gamma3pt.hpp | 22 +++++++++++++--- .../Modules/MContraction/WeakHamiltonian.hpp | 1 + .../MContraction/WeakHamiltonianEye.cc | 25 +++++++++++-------- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp index 7f643d49..162ab786 100644 --- a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp +++ b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp @@ -51,6 +51,14 @@ BEGIN_HADRONS_NAMESPACE * q1 * * trace(g5*q1*adj(q2)*g5*gamma*q3) + * + * options: + * - q1: sink smeared propagator, source at i + * - q2: propagator, source at i + * - q3: propagator, source at f + * - gamma: gamma matrix to insert + * - tSnk: sink position for propagator q1. + * */ /****************************************************************************** @@ -66,6 +74,7 @@ public: std::string, q2, std::string, q3, Gamma::Algebra, gamma, + unsigned int, tSnk, std::string, output); }; @@ -140,17 +149,22 @@ void TGamma3pt::execute(void) << par().q3 << "', with " << par().gamma << " insertion." << std::endl; + // Initialise variables. q2 and q3 are normal propagators, q1 may be + // sink smeared. CorrWriter writer(par().output); - PropagatorField1 &q1 = *env().template getObject(par().q1); + SlicedPropagator1 &q1 = *env().template getObject(par().q1); PropagatorField2 &q2 = *env().template getObject(par().q2); - PropagatorField3 &q3 = *env().template getObject(par().q3); + PropagatorField3 &q3 = *env().template getObject(par().q3); LatticeComplex c(env().getGrid()); Gamma g5(Gamma::Algebra::Gamma5); Gamma gamma(par().gamma); std::vector buf; Result result; - - c = trace(g5*q1*adj(q2)*(g5*gamma)*q3); + + // Extract relevant timeslice of sinked propagator q1, then contract & + // sum over all spacial positions of gamma insertion. + SitePropagator1 q1Snk = q1[par().tSnk]; + c = trace(g5*q1Snk*adj(q2)*(g5*gamma)*q3); sliceSum(c, buf, Tp); result.gamma = par().gamma; diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp index 0a3c2e31..302b207e 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp @@ -76,6 +76,7 @@ public: std::string, q2, std::string, q3, std::string, q4, + unsigned int, tSnk, std::string, output); }; diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc index a44c2534..314b080a 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc @@ -54,6 +54,8 @@ using namespace MContraction; * * S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2]) * E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2]) + * + * Note q1 must be sink smeared. */ /****************************************************************************** @@ -94,15 +96,15 @@ void TWeakHamiltonianEye::execute(void) << "'." << std::endl; CorrWriter writer(par().output); - PropagatorField &q1 = *env().template getObject(par().q1); - PropagatorField &q2 = *env().template getObject(par().q2); - PropagatorField &q3 = *env().template getObject(par().q3); - PropagatorField &q4 = *env().template getObject(par().q4); - Gamma g5 = Gamma(Gamma::Algebra::Gamma5); - LatticeComplex expbuf(env().getGrid()); - std::vector corrbuf; - std::vector result(n_eye_diag); - unsigned int ndim = env().getNd(); + SlicedPropagator &q1 = *env().template getObject(par().q1); + PropagatorField &q2 = *env().template getObject(par().q2); + PropagatorField &q3 = *env().template getObject(par().q3); + PropagatorField &q4 = *env().template getObject(par().q4); + Gamma g5 = Gamma(Gamma::Algebra::Gamma5); + LatticeComplex expbuf(env().getGrid()); + std::vector corrbuf; + std::vector result(n_eye_diag); + unsigned int ndim = env().getNd(); PropagatorField tmp1(env().getGrid()); LatticeComplex tmp2(env().getGrid()); @@ -111,10 +113,13 @@ void TWeakHamiltonianEye::execute(void) std::vector E_body(ndim, tmp2); std::vector E_loop(ndim, tmp2); + // Get sink timeslice of q1. + SitePropagator q1Snk = q1[par().tSnk]; + // Setup for S-type contractions. for (int mu = 0; mu < ndim; ++mu) { - S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu])); + S_body[mu] = MAKE_SE_BODY(q1Snk, q2, q3, GammaL(Gamma::gmu[mu])); S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu])); } From 0366288b1c8a42ff15eacb0f5e23eee2e89fb50f Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Sun, 16 Jul 2017 13:45:55 +0100 Subject: [PATCH 072/377] Hadrons: added tests for 3pt contractions. --- tests/hadrons/Test_hadrons.hpp | 75 ++++++++--- .../hadrons/Test_hadrons_3pt_contractions.cc | 122 ++++++++++++++++++ 2 files changed, 182 insertions(+), 15 deletions(-) create mode 100644 tests/hadrons/Test_hadrons_3pt_contractions.cc diff --git a/tests/hadrons/Test_hadrons.hpp b/tests/hadrons/Test_hadrons.hpp index 6ea51d72..9bd3ee0a 100644 --- a/tests/hadrons/Test_hadrons.hpp +++ b/tests/hadrons/Test_hadrons.hpp @@ -269,6 +269,26 @@ inline void makeConservedSequentialSource(Application &application, } } +/******************************************************************************* + * Name: makeNoiseSource + * Parameters: application - main application that stores modules. + * srcName - name of source module to create. + * tA - lower source timeslice limit. + * tB - upper source timeslice limit. + * Returns: None. + ******************************************************************************/ +inline void makeNoiseSource(Application &application, std::string &srcName, + unsigned int tA, unsigned int tB) +{ + if (!(Environment::getInstance().hasModule(srcName))) + { + MSource::Z2::Par noisePar; + noisePar.tA = tA; + noisePar.tB = tB; + application.createModule(srcName, noisePar); + } + } + /******************************************************************************* * Name: makeWallSource * Purpose: Construct wall source and add to application module. @@ -292,26 +312,46 @@ inline void makeWallSource(Application &application, std::string &srcName, } /******************************************************************************* - * Name: makeWallSink - * Purpose: Wall sink smearing of a propagator. + * Name: makePointSink + * Purpose: Create function for point sink smearing of a propagator. * Parameters: application - main application that stores modules. * propName - name of input propagator. - * wallName - name of smeared propagator. + * sinkFnct - name of output sink smearing module. * mom - momentum insertion (default is zero). * Returns: None. ******************************************************************************/ -inline void makeWallSink(Application &application, std::string &propName, - std::string &wallName, std::string mom = ZERO_MOM) +inline void makePointSink(Application &application, std::string &sinkFnct, + std::string mom = ZERO_MOM) +{ + // If the sink function already exists, don't make it again. + if (!(Environment::getInstance().hasModule(sinkFnct))) + { + MSink::Point::Par pointPar; + pointPar.mom = mom; + application.createModule(sinkFnct, pointPar); + } +} + +/******************************************************************************* + * Name: sinkSmear + * Purpose: Perform sink smearing of a propagator. + * Parameters: application - main application that stores modules. + * sinkFnct - sink smearing module. + * propName - propagator to smear. + * smearedProp - name of output smeared propagator. + * Returns: None. + ******************************************************************************/ +inline void sinkSmear(Application &application, std::string &sinkFnct, + std::string &propName, std::string &smearedProp) { // If the propagator has already been smeared, don't smear it again. - // Temporarily removed, strategy for sink smearing likely to change. - /*if (!(Environment::getInstance().hasModule(wallName))) + if (!(Environment::getInstance().hasModule(smearedProp))) { - MSink::Wall::Par wallPar; - wallPar.q = propName; - wallPar.mom = mom; - application.createModule(wallName, wallPar); - }*/ + MSink::Smear::Par smearPar; + smearPar.q = propName; + smearPar.sink = sinkFnct; + application.createModule(smearedProp, smearPar); + } } /******************************************************************************* @@ -398,16 +438,18 @@ inline void mesonContraction(Application &application, * Purpose: Create gamma3pt contraction module and add to application module. * Parameters: application - main application that stores modules. * npt - specify n-point correlator (for labelling). - * q1 - quark propagator 1. + * q1 - quark propagator 1, sink smeared. * q2 - quark propagator 2. * q3 - quark propagator 3. * label - unique label to construct module name. + * tSnk - sink position of sink for q1. * gamma - gamma insertions between q2 and q3. * Returns: None. ******************************************************************************/ inline void gamma3ptContraction(Application &application, unsigned int npt, std::string &q1, std::string &q2, - std::string &q3, std::string &label, + std::string &q3, std::string &label, + unsigned int tSnk = 0, Gamma::Algebra gamma = Gamma::Algebra::Identity) { std::string modName = std::to_string(npt) + "pt_" + label; @@ -418,6 +460,7 @@ inline void gamma3ptContraction(Application &application, unsigned int npt, gamma3ptPar.q1 = q1; gamma3ptPar.q2 = q2; gamma3ptPar.q3 = q3; + gamma3ptPar.tSnk = tSnk; gamma3ptPar.gamma = gamma; application.createModule(modName, gamma3ptPar); } @@ -434,13 +477,14 @@ inline void gamma3ptContraction(Application &application, unsigned int npt, * q3 - quark propagator 3. * q4 - quark propagator 4. * label - unique label to construct module name. + * tSnk - time position of sink (for sink smearing). * Returns: None. ******************************************************************************/ #define HW_CONTRACTION(top) \ inline void weakContraction##top(Application &application, unsigned int npt,\ std::string &q1, std::string &q2, \ std::string &q3, std::string &q4, \ - std::string &label)\ + std::string &label, unsigned int tSnk = 0)\ {\ std::string modName = std::to_string(npt) + "pt_" + label;\ if (!(Environment::getInstance().hasModule(modName)))\ @@ -451,6 +495,7 @@ inline void weakContraction##top(Application &application, unsigned int npt,\ weakPar.q2 = q2;\ weakPar.q3 = q3;\ weakPar.q4 = q4;\ + weakPar.tSnk = tSnk;\ application.createModule(modName, weakPar);\ }\ } diff --git a/tests/hadrons/Test_hadrons_3pt_contractions.cc b/tests/hadrons/Test_hadrons_3pt_contractions.cc new file mode 100644 index 00000000..452fc34d --- /dev/null +++ b/tests/hadrons/Test_hadrons_3pt_contractions.cc @@ -0,0 +1,122 @@ +/******************************************************************************* + Grid physics library, www.github.com/paboyle/Grid + + Source file: tests/hadrons/Test_hadrons_3pt_contractions.cc + + Copyright (C) 2017 + + Author: Andrew Lawson + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution + directory. + *******************************************************************************/ + +#include "Test_hadrons.hpp" + +using namespace Grid; +using namespace Hadrons; + +int main(int argc, char *argv[]) +{ + // initialization ////////////////////////////////////////////////////////// + HADRONS_DEFAULT_INIT; + + // run setup /////////////////////////////////////////////////////////////// + Application application; + double mass = 0.04; + double M5 = 1.8; + unsigned int Ls = 12; + unsigned int nt = GridDefaultLatt()[Tp]; + unsigned int t_i = 0; + unsigned int t_f = nt / 2; + std::string mom = "1. 0. 0. 0."; + + // global parameters + HADRONS_DEFAULT_GLOBALS(application); + + // gauge field + std::string gaugeField = "gauge"; + application.createModule(gaugeField); + + // Action & solver setup. + std::string action = "DWF"; + std::string solver = "CG"; + makeDWFAction(application, action, gaugeField, mass, M5, Ls); + makeRBPrecCGSolver(application, solver, action); + + /*************************************************************************** + * Weak Contraction test: Non-Eye class. + **************************************************************************/ + // Make wall source propagators for each leg of 4-quark vertex. + std::string q_i_0 = "q_i_0"; + std::string q_i_p = "q_i_p"; + std::string q_f_0 = "q_f_0"; + std::string q_f_p = "q_f_p"; + MAKE_WALL_PROP(t_i, q_i_0, solver); + MAKE_WALL_PROP(t_f, q_f_0, solver); + MAKE_3MOM_WALL_PROP(t_i, mom, q_i_p, solver); + MAKE_3MOM_WALL_PROP(t_f, mom, q_f_p, solver); + + // Perform contractions, zero and non-zero momentum. + std::string HW_CW_0 = LABEL_3PT("HW_CW_0", t_i, t_f); + std::string HW_CW_p = LABEL_3PT("HW_CW_p", t_i, t_f); + weakContractionNonEye(application, 3, q_i_0, q_i_0, q_f_0, q_f_0, HW_CW_0); + weakContractionNonEye(application, 3, q_i_0, q_i_p, q_f_p, q_f_0, HW_CW_p); + + /*************************************************************************** + * Weak Contraction test: Eye-class. + **************************************************************************/ + // Create random propagator for loop. + std::string eta = "noise_source"; + makeNoiseSource(application, eta, 0, nt - 1); + std::string loopProp = "loop"; + std::string loopRes = loopProp + "_res"; + makePropagator(application, loopRes, eta, solver); + makeLoop(application, loopProp, eta, loopRes); + + // Wall sink smear the propagator directly connecting the source & sink. + // (i.e. make point sink but smear before the contraction) + std::string wallSink = "wall_sink"; + std::string qWall = "q_wall"; + makePointSink(application, wallSink); + sinkSmear(application, wallSink, q_i_0, qWall); + + // Perform contractions, zero and non-zero momentum. + std::string HW_SE_0 = LABEL_3PT("HW_SE_0", t_i, t_f); + std::string HW_SE_p = LABEL_3PT("HW_SE_p", t_i, t_f); + weakContractionEye(application, 3, qWall, q_i_0, q_f_p, loopProp, HW_SE_0, t_f); + weakContractionEye(application, 3, qWall, q_i_p, q_f_p, loopProp, HW_SE_p, t_f); + + /*************************************************************************** + * Gamma insertion test. + **************************************************************************/ + Gamma::Algebra gamma = Gamma::Algebra::GammaT; + std::string sd_0 = LABEL_3PT("sd_0", t_i, t_f); + std::string sd_p = LABEL_3PT("sd_p", t_i, t_f); + gamma3ptContraction(application, 3, qWall, q_i_0, q_f_0, sd_0, t_f, gamma); + gamma3ptContraction(application, 3, qWall, q_i_p, q_f_p, sd_p, t_f, gamma); + + // execution + application.saveParameterFile("ContractionTest3pt.xml"); + application.run(); + + // epilogue + LOG(Message) << "Grid is finalizing now" << std::endl; + Grid_finalize(); + + return EXIT_SUCCESS; +} \ No newline at end of file From 875e1a841f24166084cc26e16aea363c1200070c Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Sun, 16 Jul 2017 13:47:00 +0100 Subject: [PATCH 073/377] Hadrons: updated Quark -> MFermion/GaugeProp module name in test. --- tests/hadrons/Test_hadrons_quark.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/hadrons/Test_hadrons_quark.cc b/tests/hadrons/Test_hadrons_quark.cc index 5b9d0ce1..eac065e9 100644 --- a/tests/hadrons/Test_hadrons_quark.cc +++ b/tests/hadrons/Test_hadrons_quark.cc @@ -26,7 +26,7 @@ *******************************************************************************/ #include "Test_hadrons.hpp" -#include +#include using namespace Grid; using namespace QCD; From 7a53dc3715fbf2606aa1eb8ce943be35367e8ebb Mon Sep 17 00:00:00 2001 From: Nils Meyer Date: Mon, 24 Jul 2017 11:12:59 +0200 Subject: [PATCH 074/377] Added integer reduce functionality --- lib/simd/Grid_neon.h | 53 +++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/lib/simd/Grid_neon.h b/lib/simd/Grid_neon.h index 38815389..cadb4df8 100644 --- a/lib/simd/Grid_neon.h +++ b/lib/simd/Grid_neon.h @@ -6,9 +6,9 @@ Copyright (C) 2015 -Author: Nils Meyer -Author: Peter Boyle -Author: neo + Author: Nils Meyer + Author: Peter Boyle + Author: neo This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,7 +27,7 @@ Author: neo See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -//---------------------------------------------------------------------- + /* ARMv8 NEON intrinsics layer by @@ -37,9 +37,6 @@ Author: neo SFB/TRR55 */ -//---------------------------------------------------------------------- -//#ifndef ARM_NEON -//#define ARM_NEON #ifndef GEN_SIMD_WIDTH #define GEN_SIMD_WIDTH 16u @@ -85,11 +82,11 @@ namespace Optimization { double tmp[2]={a,b}; return vld1q_f64(tmp); } - //Real double // N:tbc + //Real double inline float64x2_t operator()(double a){ return vdupq_n_f64(a); } - //Integer // N:tbc + //Integer inline uint32x4_t operator()(Integer a){ return vdupq_n_u32(a); } @@ -127,33 +124,32 @@ namespace Optimization { // Nils: Vset untested; not used currently in Grid at all; // git commit 4a8c4ccfba1d05159348d21a9698028ea847e77b struct Vset{ - // Complex float // N:ok + // Complex float inline float32x4_t operator()(Grid::ComplexF *a){ float tmp[4]={a[1].imag(),a[1].real(),a[0].imag(),a[0].real()}; return vld1q_f32(tmp); } - // Complex double // N:ok + // Complex double inline float64x2_t operator()(Grid::ComplexD *a){ double tmp[2]={a[0].imag(),a[0].real()}; return vld1q_f64(tmp); } - // Real float // N:ok + // Real float inline float32x4_t operator()(float *a){ float tmp[4]={a[3],a[2],a[1],a[0]}; return vld1q_f32(tmp); } - // Real double // N:ok + // Real double inline float64x2_t operator()(double *a){ double tmp[2]={a[1],a[0]}; return vld1q_f64(tmp); } - // Integer // N:ok + // Integer inline uint32x4_t operator()(Integer *a){ return vld1q_dup_u32(a); } }; - // N:leaving as is template struct Reduce{ //Need templated class to overload output type @@ -252,9 +248,9 @@ namespace Optimization { return vfmaq_f32(r4, r0, a); // ar*br-ai*bi ai*br+ar*bi ... // no fma, use mul and add - //float32x4_t r5; - //r5 = vmulq_f32(r0, a); - //return vaddq_f32(r4, r5); + // float32x4_t r5; + // r5 = vmulq_f32(r0, a); + // return vaddq_f32(r4, r5); } // Complex double inline float64x2_t operator()(float64x2_t a, float64x2_t b){ @@ -275,9 +271,9 @@ namespace Optimization { return vfmaq_f64(r4, r0, a); // ar*br-ai*bi ai*br+ar*bi // no fma, use mul and add - //float64x2_t r5; - //r5 = vmulq_f64(r0, a); - //return vaddq_f64(r4, r5); + // float64x2_t r5; + // r5 = vmulq_f64(r0, a); + // return vaddq_f64(r4, r5); } }; @@ -424,11 +420,6 @@ namespace Optimization { } } -// working, but no restriction on n -// template static inline float32x4_t tRotate(float32x4_t in){ return vextq_f32(in,in,n); }; -// template static inline float64x2_t tRotate(float64x2_t in){ return vextq_f64(in,in,n); }; - -// restriction on n template static inline float32x4_t tRotate(float32x4_t in){ return vextq_f32(in,in,n%4); }; template static inline float64x2_t tRotate(float64x2_t in){ return vextq_f64(in,in,n%2); }; @@ -444,7 +435,7 @@ namespace Optimization { sb = vcvt_high_f32_f16(h); // there is no direct conversion from lower float32x4_t to float64x2_t // vextq_f16 not supported by clang 3.8 / 4.0 / arm clang - //float16x8_t h1 = vextq_f16(h, h, 4); // correct, but not supported by clang + // float16x8_t h1 = vextq_f16(h, h, 4); // correct, but not supported by clang // workaround for clang uint32x4_t h1u = reinterpret_cast(h); float16x8_t h1 = reinterpret_cast(vextq_u32(h1u, h1u, 2)); @@ -550,7 +541,7 @@ namespace Optimization { //Complex double Reduce - template<> // N:by Boyle + template<> inline Grid::ComplexD Reduce::operator()(float64x2_t in){ u128d conv; conv.v = in; return Grid::ComplexD(conv.f[0],conv.f[1]); @@ -565,9 +556,7 @@ namespace Optimization { //Integer Reduce template<> inline Integer Reduce::operator()(uint32x4_t in){ - // FIXME unimplemented - printf("Reduce : Missing integer implementation -> FIX\n"); - assert(0); + return vaddvq_u32(in); } } @@ -607,5 +596,3 @@ namespace Optimization { typedef Optimization::TimesI TimesISIMD; } - -//#endif // ARM_NEON From 9f280b82c4e56d8b32034dfbb83187e15add41e9 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Tue, 25 Jul 2017 11:30:41 -0400 Subject: [PATCH 075/377] Added mixed-precision CG with reliable updates --- lib/algorithms/Algorithms.h | 1 + .../ConjugateGradientReliableUpdate.h | 231 ++++++++++++++++++ tests/Test_dwf_mixedcg_prec_halfcomms.cc | 40 ++- 3 files changed, 260 insertions(+), 12 deletions(-) create mode 100644 lib/algorithms/iterative/ConjugateGradientReliableUpdate.h diff --git a/lib/algorithms/Algorithms.h b/lib/algorithms/Algorithms.h index 5123c7a1..f8dc2dc2 100644 --- a/lib/algorithms/Algorithms.h +++ b/lib/algorithms/Algorithms.h @@ -44,6 +44,7 @@ Author: Peter Boyle #include #include #include +#include // Lanczos support //#include diff --git a/lib/algorithms/iterative/ConjugateGradientReliableUpdate.h b/lib/algorithms/iterative/ConjugateGradientReliableUpdate.h new file mode 100644 index 00000000..1aab064d --- /dev/null +++ b/lib/algorithms/iterative/ConjugateGradientReliableUpdate.h @@ -0,0 +1,231 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/ConjugateGradientReliableUpdate.h + + Copyright (C) 2015 + +Author: Christopher Kelly + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H +#define GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H + +namespace Grid { + + template::value == 2, int>::type = 0,typename std::enable_if< getPrecision::value == 1, int>::type = 0> + class ConjugateGradientReliableUpdate : public LinearFunction { + public: + bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. + // Defaults true. + RealD Tolerance; + Integer MaxIterations; + Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion + Integer ReliableUpdatesPerformed; + + bool DoFinalCleanup; //Final DP cleanup, defaults to true + Integer IterationsToCleanup; //Final DP cleanup step iterations + + LinearOperatorBase &Linop_f; + LinearOperatorBase &Linop_d; + GridBase* SinglePrecGrid; + RealD Delta; //reliable update parameter + + ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase &_Linop_f, LinearOperatorBase &_Linop_d, bool err_on_no_conv = true) + : Tolerance(tol), + MaxIterations(maxit), + Delta(_delta), + Linop_f(_Linop_f), + Linop_d(_Linop_d), + SinglePrecGrid(_sp_grid), + ErrorOnNoConverge(err_on_no_conv), + DoFinalCleanup(true) + {}; + + void operator()(const FieldD &src, FieldD &psi) { + psi.checkerboard = src.checkerboard; + conformable(psi, src); + + RealD cp, c, a, d, b, ssq, qq, b_pred; + + FieldD p(src); + FieldD mmp(src); + FieldD r(src); + + // Initial residual computation & set up + RealD guess = norm2(psi); + assert(std::isnan(guess) == 0); + + Linop_d.HermOpAndNorm(psi, mmp, d, b); + + r = src - mmp; + p = r; + + a = norm2(p); + cp = a; + ssq = norm2(src); + + std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl; + std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: src " << ssq << std::endl; + std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mp " << d << std::endl; + std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mmp " << b << std::endl; + std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: cp,r " << cp << std::endl; + std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: p " << a << std::endl; + + RealD rsq = Tolerance * Tolerance * ssq; + + // Check if guess is really REALLY good :) + if (cp <= rsq) { + return; + } + + //Single prec initialization + FieldF r_f(SinglePrecGrid); + r_f.checkerboard = r.checkerboard; + precisionChange(r_f, r); + + FieldF psi_f(r_f); + psi_f = zero; + + FieldF p_f(r_f); + FieldF mmp_f(r_f); + + RealD MaxResidSinceLastRelUp = cp; //initial residual + + std::cout << GridLogIterative << std::setprecision(4) + << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; + + GridStopWatch LinalgTimer; + GridStopWatch MatrixTimer; + GridStopWatch SolverTimer; + + SolverTimer.Start(); + int k = 0; + int l = 0; + + for (k = 1; k <= MaxIterations; k++) { + c = cp; + + MatrixTimer.Start(); + Linop_f.HermOpAndNorm(p_f, mmp_f, d, qq); + MatrixTimer.Stop(); + + LinalgTimer.Start(); + + a = c / d; + b_pred = a * (a * qq - d) / c; + + cp = axpy_norm(r_f, -a, mmp_f, r_f); + b = cp / c; + + // Fuse these loops ; should be really easy + psi_f = a * p_f + psi_f; + //p_f = p_f * b + r_f; + + LinalgTimer.Stop(); + + std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k + << " residual " << cp << " target " << rsq << std::endl; + std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl; + std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl; + + if(cp > MaxResidSinceLastRelUp){ + std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl; + MaxResidSinceLastRelUp = cp; + } + + // Stopping condition + if (cp <= rsq) { + //Although not written in the paper, I assume that I have to add on the final solution + precisionChange(mmp, psi_f); + psi = psi + mmp; + + + SolverTimer.Stop(); + Linop_d.HermOpAndNorm(psi, mmp, d, qq); + p = mmp - src; + + RealD srcnorm = sqrt(norm2(src)); + RealD resnorm = sqrt(norm2(p)); + RealD true_residual = resnorm / srcnorm; + + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl; + std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)< CG(Tolerance,MaxIterations); + CG.ErrorOnNoConverge = ErrorOnNoConverge; + CG(Linop_d,src,psi); + IterationsToCleanup = CG.IterationsToComplete; + } + else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); + + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n"; + return; + } + else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate " + << cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n"; + precisionChange(mmp, psi_f); + psi = psi + mmp; + + Linop_d.HermOpAndNorm(psi, mmp, d, qq); + r = src - mmp; + + psi_f = zero; + precisionChange(r_f, r); + cp = norm2(r); + MaxResidSinceLastRelUp = cp; + + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl; + + l = l+1; + } + + p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence + + } + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge" + << std::endl; + + if (ErrorOnNoConverge) assert(0); + IterationsToComplete = k; + ReliableUpdatesPerformed = l; + } + }; + + +}; + + + +#endif diff --git a/tests/Test_dwf_mixedcg_prec_halfcomms.cc b/tests/Test_dwf_mixedcg_prec_halfcomms.cc index 9cc935d9..d6aaa21e 100644 --- a/tests/Test_dwf_mixedcg_prec_halfcomms.cc +++ b/tests/Test_dwf_mixedcg_prec_halfcomms.cc @@ -80,31 +80,47 @@ int main (int argc, char ** argv) LatticeFermionD src_o(FrbGrid); - LatticeFermionD result_o(FrbGrid); - LatticeFermionD result_o_2(FrbGrid); + LatticeFermionD result_cg(FrbGrid); pickCheckerboard(Odd,src_o,src); - result_o.checkerboard = Odd; - result_o = zero; - result_o_2.checkerboard = Odd; - result_o_2 = zero; + result_cg.checkerboard = Odd; + result_cg = zero; + LatticeFermionD result_mcg(result_cg); + LatticeFermionD result_rlcg(result_cg); SchurDiagMooeeOperator HermOpEO(Ddwf); SchurDiagMooeeOperator HermOpEO_f(Ddwf_f); + //#define DO_MIXED_CG +#define DO_RLUP_CG + +#ifdef DO_MIXED_CG std::cout << "Starting mixed CG" << std::endl; MixedPrecisionConjugateGradient mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); mCG.InnerTolerance = 3.0e-5; - mCG(src_o,result_o); + mCG(src_o,result_mcg); +#endif +#ifdef DO_RLUP_CG + std::cout << "Starting reliable update CG" << std::endl; + ConjugateGradientReliableUpdate rlCG(1.e-8, 10000, 0.1, FrbGrid_f, HermOpEO_f, HermOpEO); + rlCG(src_o,result_rlcg); +#endif + std::cout << "Starting regular CG" << std::endl; ConjugateGradient CG(1.0e-8,10000); - CG(HermOpEO,src_o,result_o_2); + CG(HermOpEO,src_o,result_cg); - LatticeFermionD diff_o(FrbGrid); - RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2); - - std::cout << "Diff between mixed and regular CG: " << diff << std::endl; +#ifdef DO_MIXED_CG + LatticeFermionD diff_mcg(FrbGrid); + RealD vdiff_mcg = axpy_norm(diff_mcg, -1.0, result_cg, result_mcg); + std::cout << "Diff between mixed and regular CG: " << vdiff_mcg << std::endl; +#endif +#ifdef DO_RLUP_CG + LatticeFermionD diff_rlcg(FrbGrid); + RealD vdiff_rlcg = axpy_norm(diff_rlcg, -1.0, result_cg, result_rlcg); + std::cout << "Diff between reliable update and regular CG: " << vdiff_rlcg << std::endl; +#endif Grid_finalize(); } From 67b34e5789aec1b39d34c2cdbedb156ff9509e11 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Mon, 31 Jul 2017 11:35:01 +0100 Subject: [PATCH 076/377] Modified conserved current 5th dimension loop for compatibility with 5D vectorisation. --- lib/qcd/action/fermion/WilsonFermion5D.cc | 61 +++++++++++++++++------ 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 5daed3de..5ddfde9a 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -12,6 +12,7 @@ Author: Peter Boyle Author: Peter Boyle Author: paboyle Author: Guido Cossu +Author: Andrew Lawson This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -676,6 +677,21 @@ void WilsonFermion5D::MomentumSpacePropagatorHw(FermionField &out,const Fe * to make a conserved current sink or inserting the conserved current * sequentially. ******************************************************************************/ + +// Helper macro to reverse Simd vector. Fixme: slow, generic implementation. +#define REVERSE_LS(qSite, qSiteRev, Nsimd) \ +{ \ + std::vector qSiteVec(Nsimd); \ + extract(qSite, qSiteVec); \ + for (int i = 0; i < Nsimd / 2; ++i) \ + { \ + typename SitePropagator::scalar_object tmp = qSiteVec[i]; \ + qSiteVec[i] = qSiteVec[Nsimd - i - 1]; \ + qSiteVec[Nsimd - i - 1] = tmp; \ + } \ + merge(qSiteRev, qSiteVec); \ +} + template void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, PropagatorField &q_in_2, @@ -687,6 +703,7 @@ void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, conformable(q_in_1._grid, q_in_2._grid); conformable(_FourDimGrid, q_out._grid); PropagatorField tmp1(FermionGrid()), tmp2(FermionGrid()); + unsigned int LLs = q_in_1._grid->_rdimensions[0]; q_out = zero; // Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s), @@ -695,18 +712,33 @@ void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, tmp2 = Cshift(q_in_2, mu + 1, 1); parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) { - unsigned int sF1 = sU * Ls; - unsigned int sF2 = (sU + 1) * Ls - 1; - for (int s = 0; s < Ls; ++s) + unsigned int sF1 = sU * LLs; + unsigned int sF2 = (sU + 1) * LLs - 1; + + for (unsigned int s = 0; s < LLs; ++s) { - bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \ - true : false; + bool axial_sign = ((curr_type == Current::Axial) && \ + (s < (LLs / 2))); + SitePropagator qSite2, qmuSite2; + + // If vectorised in 5th dimension, reverse q2 vector to match up + // sites correctly. + if (Impl::LsVectorised) + { + REVERSE_LS(q_in_2._odata[sF2], qSite2, Ls / LLs); + REVERSE_LS(tmp2._odata[sF2], qmuSite2, Ls / LLs); + } + else + { + qSite2 = q_in_2._odata[sF2]; + qmuSite2 = tmp2._odata[sF2]; + } Kernels::ContractConservedCurrentSiteFwd(tmp1._odata[sF1], - q_in_2._odata[sF2], + qSite2, q_out._odata[sU], Umu, sU, mu, axial_sign); Kernels::ContractConservedCurrentSiteBwd(q_in_1._odata[sF1], - tmp2._odata[sF2], + qmuSite2, q_out._odata[sU], Umu, sU, mu, axial_sign); sF1++; @@ -732,6 +764,7 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, tmp(FermionGrid()); Complex i(0.0, 1.0); int tshift = (mu == Tp) ? 1 : 0; + unsigned int LLs = q_in._grid->_rdimensions[0]; // Momentum projection. ph = zero; @@ -764,11 +797,10 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, if (timeSlices > 0) { - unsigned int sF = sU * Ls; - for (unsigned int s = 0; s < Ls; ++s) + unsigned int sF = sU * LLs; + for (unsigned int s = 0; s < LLs; ++s) { - bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \ - true : false; + bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2))); Kernels::SeqConservedCurrentSiteFwd(tmpFwd._odata[sF], q_out._odata[sF], Umu, sU, mu, t_mask, axial_sign); @@ -783,11 +815,10 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, if (timeSlices > 0) { - unsigned int sF = sU * Ls; - for (unsigned int s = 0; s < Ls; ++s) + unsigned int sF = sU * LLs; + for (unsigned int s = 0; s < LLs; ++s) { - bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \ - true : false; + bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2))); Kernels::SeqConservedCurrentSiteBwd(tmpBwd._odata[sF], q_out._odata[sF], Umu, sU, mu, t_mask, axial_sign); From 323e9c439ab0889d69c60b4736e1bd07d7724c06 Mon Sep 17 00:00:00 2001 From: Lanny91 Date: Mon, 31 Jul 2017 12:26:34 +0100 Subject: [PATCH 077/377] Hadrons: Legal banner fixes --- extras/Hadrons/Modules.hpp | 30 ++++++++++++++++++ extras/Hadrons/Modules/MFermion/GaugeProp.hpp | 31 +++++++++++++++++++ extras/Hadrons/Modules/MSink/Point.hpp | 28 +++++++++++++++++ extras/Hadrons/Modules/MSink/Smear.hpp | 28 +++++++++++++++++ .../Modules/MUtilities/TestSeqGamma.hpp | 28 +++++++++++++++++ 5 files changed, 145 insertions(+) diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index d0d0d80d..e1f06f32 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -1,3 +1,33 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules.hpp + +Copyright (C) 2015 +Copyright (C) 2016 +Copyright (C) 2017 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + #include #include #include diff --git a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp index 050f8381..8add9a00 100644 --- a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp +++ b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp @@ -1,3 +1,34 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MFermion/GaugeProp.hpp + +Copyright (C) 2015 +Copyright (C) 2016 +Copyright (C) 2017 + +Author: Antonin Portelli + Andrew Lawson + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + #ifndef Hadrons_MFermion_GaugeProp_hpp_ #define Hadrons_MFermion_GaugeProp_hpp_ diff --git a/extras/Hadrons/Modules/MSink/Point.hpp b/extras/Hadrons/Modules/MSink/Point.hpp index 7b3aa9de..0761c4c4 100644 --- a/extras/Hadrons/Modules/MSink/Point.hpp +++ b/extras/Hadrons/Modules/MSink/Point.hpp @@ -1,3 +1,31 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MSink/Point.hpp + +Copyright (C) 2017 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + #ifndef Hadrons_MSink_Point_hpp_ #define Hadrons_MSink_Point_hpp_ diff --git a/extras/Hadrons/Modules/MSink/Smear.hpp b/extras/Hadrons/Modules/MSink/Smear.hpp index 9327001f..c3973d2b 100644 --- a/extras/Hadrons/Modules/MSink/Smear.hpp +++ b/extras/Hadrons/Modules/MSink/Smear.hpp @@ -1,3 +1,31 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MSink/Smear.hpp + +Copyright (C) 2017 + +Author: Andrew Lawson + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + #ifndef Hadrons_MSink_Smear_hpp_ #define Hadrons_MSink_Smear_hpp_ diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp index 3dbd7d63..2799e5d0 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp @@ -1,3 +1,31 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp + +Copyright (C) 2017 + +Author: Andrew Lawson + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + #ifndef Hadrons_MUtilities_TestSeqGamma_hpp_ #define Hadrons_MUtilities_TestSeqGamma_hpp_ From 9939b267d26f13cf5c152705741ea7655badece9 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Mon, 31 Jul 2017 13:39:44 -0400 Subject: [PATCH 078/377] Added switching to fallback linear operator in reliable update CG, and added recalculation of b parameter on update. --- .../ConjugateGradientReliableUpdate.h | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/lib/algorithms/iterative/ConjugateGradientReliableUpdate.h b/lib/algorithms/iterative/ConjugateGradientReliableUpdate.h index 1aab064d..13666f97 100644 --- a/lib/algorithms/iterative/ConjugateGradientReliableUpdate.h +++ b/lib/algorithms/iterative/ConjugateGradientReliableUpdate.h @@ -47,6 +47,11 @@ namespace Grid { LinearOperatorBase &Linop_d; GridBase* SinglePrecGrid; RealD Delta; //reliable update parameter + + //Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single + LinearOperatorBase *Linop_fallback; + RealD fallback_transition_tol; + ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase &_Linop_f, LinearOperatorBase &_Linop_d, bool err_on_no_conv = true) : Tolerance(tol), @@ -56,10 +61,19 @@ namespace Grid { Linop_d(_Linop_d), SinglePrecGrid(_sp_grid), ErrorOnNoConverge(err_on_no_conv), - DoFinalCleanup(true) + DoFinalCleanup(true), + Linop_fallback(NULL) {}; + void setFallbackLinop(LinearOperatorBase &_Linop_fallback, const RealD _fallback_transition_tol){ + Linop_fallback = &_Linop_fallback; + fallback_transition_tol = _fallback_transition_tol; + } + void operator()(const FieldD &src, FieldD &psi) { + LinearOperatorBase *Linop_f_use = &Linop_f; + bool using_fallback = false; + psi.checkerboard = src.checkerboard; conformable(psi, src); @@ -93,6 +107,8 @@ namespace Grid { // Check if guess is really REALLY good :) if (cp <= rsq) { + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n"; + std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<HermOpAndNorm(p_f, mmp_f, d, qq); MatrixTimer.Stop(); LinalgTimer.Start(); @@ -206,12 +222,21 @@ namespace Grid { cp = norm2(r); MaxResidSinceLastRelUp = cp; + b = cp/c; + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl; l = l+1; } p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence + + if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){ + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl; + Linop_f_use = Linop_fallback; + using_fallback = true; + } + } std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge" From 75ee6cfc86e2365baf0a50cd4bf317e7c6fb097a Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 4 Aug 2017 16:08:07 +0100 Subject: [PATCH 079/377] Debugging the Clover term --- .gitignore | 2 + .vscode/settings.json | 11 ++- lib/cartesian/Cartesian_base.h | 2 + lib/cartesian/Cartesian_full.h | 5 +- lib/cartesian/Cartesian_red_black.h | 1 + lib/qcd/action/fermion/WilsonCloverFermion.cc | 78 +++++++-------- lib/qcd/action/fermion/WilsonCloverFermion.h | 99 +++++++++++++++++-- tests/core/Test_wilson_clover.cc | 3 +- 8 files changed, 149 insertions(+), 52 deletions(-) diff --git a/.gitignore b/.gitignore index d743ee06..6b92b1a4 100644 --- a/.gitignore +++ b/.gitignore @@ -123,3 +123,5 @@ make-bin-BUCK.sh lib/qcd/spin/gamma-gen/*.h lib/qcd/spin/gamma-gen/*.cc +.vscode/settings.json +settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json index dd8f0473..3e49029b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -36,6 +36,15 @@ "tuple": "cpp", "type_traits": "cpp", "typeinfo": "cpp", - "utility": "cpp" + "utility": "cpp", + "iostream": "cpp", + "strstream": "cpp", + "complex": "cpp", + "fstream": "cpp", + "iomanip": "cpp", + "istream": "cpp", + "ostream": "cpp", + "sstream": "cpp", + "streambuf": "cpp" } } \ No newline at end of file diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index 0db6ce0d..e435bbba 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -69,6 +69,8 @@ public: std::vector _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d] std::vector _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 + bool _isCheckerBoarded; + public: //////////////////////////////////////////////////////////////// diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index b0e47fa4..3be3e8cd 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -69,6 +69,7 @@ public: /////////////////////// // Grid information /////////////////////// + _isCheckerBoarded = false; _ndimension = dimensions.size(); _fdimensions.resize(_ndimension); @@ -76,8 +77,8 @@ public: _ldimensions.resize(_ndimension); _rdimensions.resize(_ndimension); _simd_layout.resize(_ndimension); - _lstart.resize(_ndimension); - _lend.resize(_ndimension); + _lstart.resize(_ndimension); + _lend.resize(_ndimension); _ostride.resize(_ndimension); _istride.resize(_ndimension); diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index 3037de00..a440538a 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -139,6 +139,7 @@ public: /////////////////////// // Grid information /////////////////////// + _isCheckerBoarded = true; _checker_dim = checker_dim; assert(checker_dim_mask[checker_dim]==1); _ndimension = dimensions.size(); diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index e1900830..f8b62ba4 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -35,27 +35,6 @@ namespace Grid namespace QCD { -//WilsonLoop::CloverPlaquette -///////////////////////////////////////////////////// -//// Clover plaquette combination in mu,nu plane with Double Stored U -//////////////////////////////////////////////////// -//static void CloverPlaquette(GaugeMat &Q, const std::vector &U, -// const int mu, const int nu){ -// Q = zero; -// Q += Gimpl::CovShiftBackward( -// U[mu], mu, Gimpl::CovShiftBackward( -// U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu] ))); -// Q += Gimpl::CovShiftForward( -// U[mu], mu, Gimpl::CovShiftForward( -// U[nu], nu, Gimpl::CovShiftBackward(U[mu], mu, U[nu+Nd] ))); -// Q += Gimpl::CovShiftBackward( -// U[nu], nu, Gimpl::CovShiftForward( -// U[mu], mu, Gimpl::CovShiftForward(U[nu], nu, U[mu+Nd] ))); -// Q += Gimpl::CovShiftForward( -// U[mu], mu, Gimpl::CovShiftBackward( -// U[nu], nu, Gimpl::CovShiftBackward(U[mu], mu, U[nu] ))); -// } - // *NOT* EO template RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) @@ -89,10 +68,10 @@ RealD WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) template void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) { - this->ImportGauge(_Umu); + WilsonFermion::ImportGauge(_Umu); GridBase *grid = _Umu._grid; typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid); - + // Compute the field strength terms WilsonLoops::FieldStrength(Bx, _Umu, Ydir, Zdir); WilsonLoops::FieldStrength(By, _Umu, Zdir, Xdir); @@ -102,12 +81,12 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) WilsonLoops::FieldStrength(Ez, _Umu, Tdir, Zdir); // Compute the Clover Operator acting on Colour and Spin - CloverTerm = fillClover(Bx) * (Gamma(Gamma::Algebra::SigmaYZ)); - CloverTerm += fillClover(By) * (Gamma(Gamma::Algebra::MinusSigmaXZ)); - CloverTerm += fillClover(Bz) * (Gamma(Gamma::Algebra::SigmaXY)); - CloverTerm += fillClover(Ex) * (Gamma(Gamma::Algebra::MinusSigmaXT)); - CloverTerm += fillClover(Ey) * (Gamma(Gamma::Algebra::MinusSigmaYT)); - CloverTerm += fillClover(Ez) * (Gamma(Gamma::Algebra::MinusSigmaZT)); + CloverTerm = fillCloverYZ(Bx); + CloverTerm += fillCloverXZ(By); + CloverTerm += fillCloverXY(Bz); + CloverTerm += fillCloverXT(Ex); + CloverTerm += fillCloverYT(Ey); + CloverTerm += fillCloverZT(Ez) ; CloverTerm *= csw; int lvol = _Umu._grid->lSites(); @@ -130,8 +109,11 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) for (int a = 0; a < DimRep; a++) for (int b = 0; b < DimRep; b++) EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b); + //std::cout << EigenCloverOp << std::endl; + EigenInvCloverOp = EigenCloverOp.inverse(); + //std::cout << EigenInvCloverOp << std::endl; for (int j = 0; j < Ns; j++) for (int k = 0; k < Ns; k++) for (int a = 0; a < DimRep; a++) @@ -139,17 +121,21 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); pokeLocalSite(Qxinv, CloverTermInv, lcoor); - // Separate the even and odd parts. - pickCheckerboard(Even, CloverTermEven, CloverTerm); - pickCheckerboard( Odd, CloverTermOdd, CloverTerm); - pickCheckerboard(Even, CloverTermInvEven, CloverTermInv); - pickCheckerboard( Odd, CloverTermInvOdd, CloverTermInv); } + + // Separate the even and odd parts. + pickCheckerboard(Even, CloverTermEven, CloverTerm); + pickCheckerboard( Odd, CloverTermOdd, CloverTerm); + + pickCheckerboard(Even, CloverTermInvEven, CloverTermInv); + pickCheckerboard( Odd, CloverTermInvOdd, CloverTermInv); + } template void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) { + conformable(in,out); this->MooeeInternal(in, out, DaggerNo, InverseNo); } @@ -176,15 +162,27 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie { out.checkerboard = in.checkerboard; CloverFieldType *Clover; - if (in.checkerboard == Odd){ - std::cout << "Calling clover term Odd" << std::endl; - Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd; + assert(in.checkerboard == Odd || in.checkerboard == Even); + + if (in._grid->_isCheckerBoarded) + { + if (in.checkerboard == Odd) + { + std::cout << "Calling clover term Odd" << std::endl; + Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd; + } + else + { + std::cout << "Calling clover term Even" << std::endl; + Clover = (inv) ? &CloverTermInvEven : &CloverTermEven; + } } - if (in.checkerboard == Even){ - std::cout << "Calling clover term Even" << std::endl; - Clover = (inv) ? &CloverTermInvEven : &CloverTermEven; + else + { + Clover = (inv) ? &CloverTermInv : &CloverTerm; } + std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; if (dag){ out = adj(*Clover) * in;} else { out = *Clover * in;} } // MooeeInternal diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index c9e7be39..fd9d1f60 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -94,17 +94,102 @@ private: // eventually these two can be compressed into 6x6 blocks instead of the 12x12 // using the DeGrand-Rossi basis for the gamma matrices - CloverFieldType fillClover(const GaugeLinkField& F){ + CloverFieldType fillCloverYZ(const GaugeLinkField &F) + { CloverFieldType T(F._grid); + T = zero; PARALLEL_FOR_LOOP - for (int i = 0; i < CloverTerm._grid->oSites(); i++){ - for (int s1 = 0; s1 < Nc; s1++) - for (int s2 = 0; s2 < Nc; s2++) - T._odata[i]()(s1,s2) = F._odata[i]()(); + for (int i = 0; i < CloverTerm._grid->oSites(); i++) + { + T._odata[i]()(0, 1) = timesMinusI(F._odata[i]()()); + T._odata[i]()(1, 0) = timesMinusI(F._odata[i]()()); + T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()()); + T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()()); } - return T; - } + return T; +} + + CloverFieldType fillCloverXZ(const GaugeLinkField &F) + { + CloverFieldType T(F._grid); + T = zero; + PARALLEL_FOR_LOOP + for (int i = 0; i < CloverTerm._grid->oSites(); i++) + { + T._odata[i]()(0, 1) = -F._odata[i]()(); + T._odata[i]()(1, 0) = F._odata[i]()(); + T._odata[i]()(2, 3) = -F._odata[i]()(); + T._odata[i]()(3, 2) = F._odata[i]()(); + } + + return T; +} + + CloverFieldType fillCloverXY(const GaugeLinkField &F) + { + CloverFieldType T(F._grid); + T = zero; + PARALLEL_FOR_LOOP + for (int i = 0; i < CloverTerm._grid->oSites(); i++) + { + T._odata[i]()(0, 0) = timesMinusI(F._odata[i]()()); + T._odata[i]()(1, 1) = timesI(F._odata[i]()()); + T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()()); + T._odata[i]()(3, 3) = timesI(F._odata[i]()()); + } + + return T; +} + + CloverFieldType fillCloverXT(const GaugeLinkField &F) + { + CloverFieldType T(F._grid); + T = zero; + PARALLEL_FOR_LOOP + for (int i = 0; i < CloverTerm._grid->oSites(); i++) + { + T._odata[i]()(0, 1) = timesMinusI(F._odata[i]()()); + T._odata[i]()(1, 0) = timesMinusI(F._odata[i]()()); + T._odata[i]()(2, 3) = timesI(F._odata[i]()()); + T._odata[i]()(3, 2) = timesI(F._odata[i]()()); + } + + return T; +} + + CloverFieldType fillCloverYT(const GaugeLinkField &F) + { + CloverFieldType T(F._grid); + T = zero; + PARALLEL_FOR_LOOP + for (int i = 0; i < CloverTerm._grid->oSites(); i++) + { + T._odata[i]()(0, 1) = (F._odata[i]()()); + T._odata[i]()(1, 0) = -(F._odata[i]()()); + T._odata[i]()(2, 3) = -(F._odata[i]()()); + T._odata[i]()(3, 2) = (F._odata[i]()()); + } + + return T; +} + + CloverFieldType fillCloverZT(const GaugeLinkField &F) + { + CloverFieldType T(F._grid); + T = zero; + PARALLEL_FOR_LOOP + for (int i = 0; i < CloverTerm._grid->oSites(); i++) + { + T._odata[i]()(0, 0) = timesMinusI(F._odata[i]()()); + T._odata[i]()(1, 1) = timesI(F._odata[i]()()); + T._odata[i]()(2, 2) = timesI(F._odata[i]()()); + T._odata[i]()(3, 3) = timesMinusI(F._odata[i]()()); + } + + return T; +} + }; } } diff --git a/tests/core/Test_wilson_clover.cc b/tests/core/Test_wilson_clover.cc index 3df69e3b..1b208e2d 100644 --- a/tests/core/Test_wilson_clover.cc +++ b/tests/core/Test_wilson_clover.cc @@ -91,7 +91,7 @@ int main (int argc, char ** argv) } WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw,params); - + Dwc.ImportGauge(Umu); std::cout< Date: Tue, 15 Aug 2017 10:50:44 +0100 Subject: [PATCH 080/377] Correction of the dagger version of the Clover --- lib/qcd/action/fermion/WilsonCloverFermion.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index f8b62ba4..ce120846 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -142,19 +142,22 @@ void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) template void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out) { - this->MooeeInternal(in, out, DaggerNo, InverseYes); + conformable(in,out); + this->MooeeInternal(in, out, DaggerYes, InverseYes); } template void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out) { + conformable(in,out); this->MooeeInternal(in, out, DaggerNo, InverseYes); } template void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out) { - this->MooeeInternal(in, out, DaggerNo, InverseYes); + conformable(in,out); + this->MooeeInternal(in, out, DaggerYes, InverseYes); } template @@ -183,7 +186,10 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie } std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; - if (dag){ out = adj(*Clover) * in;} else { out = *Clover * in;} + std::cout << GridLogMessage << "in.checkerboard " << in.checkerboard << std::endl; + std::cout << GridLogMessage << "out.checkerboard " << out.checkerboard << std::endl; + out = *Clover * in; + //if (dag){ out = adj(*Clover) * in;} else { out = *Clover * in;} } // MooeeInternal // Derivative parts From 202a7fe900461bf4f65947abc637b6c679839c15 Mon Sep 17 00:00:00 2001 From: David Murphy Date: Tue, 15 Aug 2017 13:36:08 -0400 Subject: [PATCH 081/377] Re-import DWF and abstract base EOFA fermion classes and tests --- lib/qcd/action/fermion/AbstractEOFAFermion.h | 100 +++ .../action/fermion/DomainWallEOFAFermion.cc | 440 +++++++++++++ .../action/fermion/DomainWallEOFAFermion.h | 115 ++++ .../fermion/DomainWallEOFAFermioncache.cc | 248 +++++++ .../fermion/DomainWallEOFAFermiondense.cc | 159 +++++ .../fermion/DomainWallEOFAFermionssp.cc | 168 +++++ .../fermion/DomainWallEOFAFermionvec.cc | 605 ++++++++++++++++++ lib/qcd/action/fermion/Fermion.h | 26 +- lib/qcd/modules/FermionOperatorModules.h | 49 +- tests/core/Test_dwf_eofa_even_odd.cc | 239 +++++++ tests/debug/Test_reweight_dwf_eofa.cc | 206 ++++++ 11 files changed, 2346 insertions(+), 9 deletions(-) create mode 100644 lib/qcd/action/fermion/AbstractEOFAFermion.h create mode 100644 lib/qcd/action/fermion/DomainWallEOFAFermion.cc create mode 100644 lib/qcd/action/fermion/DomainWallEOFAFermion.h create mode 100644 lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc create mode 100644 lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc create mode 100644 lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc create mode 100644 lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc create mode 100644 tests/core/Test_dwf_eofa_even_odd.cc create mode 100644 tests/debug/Test_reweight_dwf_eofa.cc diff --git a/lib/qcd/action/fermion/AbstractEOFAFermion.h b/lib/qcd/action/fermion/AbstractEOFAFermion.h new file mode 100644 index 00000000..abe06b8c --- /dev/null +++ b/lib/qcd/action/fermion/AbstractEOFAFermion.h @@ -0,0 +1,100 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/AbstractEOFAFermion.h + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_QCD_ABSTRACT_EOFA_FERMION_H +#define GRID_QCD_ABSTRACT_EOFA_FERMION_H + +#include + +namespace Grid { +namespace QCD { + + // DJM: Abstract base class for EOFA fermion types. + // Defines layout of additional EOFA-specific parameters and operators. + // Use to construct EOFA pseudofermion actions that are agnostic to Shamir / Mobius / etc., + // and ensure that no one can construct EOFA pseudofermion action with non-EOFA fermion type. + template + class AbstractEOFAFermion : public CayleyFermion5D { + public: + INHERIT_IMPL_TYPES(Impl); + + public: + // Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} + RealD mq1; + RealD mq2; + RealD mq3; + RealD shift; + int pm; + + RealD alpha; // Mobius scale + RealD k; // EOFA normalization constant + + virtual void Instantiatable(void) = 0; + + // EOFA-specific operations + // Force user to implement in derived classes + virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0; + virtual void Dtilde (const FermionField& in, FermionField& out) = 0; + virtual void DtildeInv(const FermionField& in, FermionField& out) = 0; + + // Implement derivatives in base clcass: for EOFA both DWF and Mobius just need d(Dw)/dU + virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ + this->DhopDeriv(mat, U, V, dag); + }; + virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ + this->DhopDerivOE(mat, U, V, dag); + }; + virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ + this->DhopDerivEO(mat, U, V, dag); + }; + + // Recompute 5D coefficients for different value of shift constant (needed for heatbath loop over poles) + virtual void RefreshShiftCoefficients(RealD new_shift) = 0; + + // Constructors + AbstractEOFAFermion(GaugeField& _Umu, + GridCartesian& FiveDimGrid, + GridRedBlackCartesian& FiveDimRedBlackGrid, + GridCartesian& FourDimGrid, + GridRedBlackCartesian& FourDimRedBlackGrid, + RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm, + RealD _M5, RealD _b, RealD _c, const ImplParams &p= ImplParams()) : + CayleyFermion5D(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid, _mq1, _M5, p), + mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm) + { + int Ls = this->Ls; + this->alpha = _b + _c; + this->k = this->alpha * (_mq3 - _mq2) * std::pow(this->alpha+1.0,2*Ls) / + ( std::pow(alpha+1.0,Ls) + _mq2*std::pow(alpha-1.0,Ls) ) / + ( std::pow(alpha+1.0,Ls) + _mq3*std::pow(alpha-1.0,Ls) ); + } + }; +}} + +#endif diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermion.cc b/lib/qcd/action/fermion/DomainWallEOFAFermion.cc new file mode 100644 index 00000000..dc4f6504 --- /dev/null +++ b/lib/qcd/action/fermion/DomainWallEOFAFermion.cc @@ -0,0 +1,440 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include + +namespace Grid { +namespace QCD { + + template + DomainWallEOFAFermion::DomainWallEOFAFermion( + GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mq1, RealD _mq2, RealD _mq3, + RealD _shift, int _pm, RealD _M5, const ImplParams &p) : + AbstractEOFAFermion(_Umu, FiveDimGrid, FiveDimRedBlackGrid, + FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3, + _shift, _pm, _M5, 1.0, 0.0, p) + { + RealD eps = 1.0; + Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls); + assert(zdata->n == this->Ls); + + std::cout << GridLogMessage << "DomainWallEOFAFermion with Ls=" << this->Ls << std::endl; + this->SetCoefficientsTanh(zdata, 1.0, 0.0); + + Approx::zolotarev_free(zdata); + } + + /*************************************************************** + /* Additional EOFA operators only called outside the inverter. + /* Since speed is not essential, simple axpby-style + /* implementations should be fine. + /***************************************************************/ + template + void DomainWallEOFAFermion::Omega(const FermionField& psi, FermionField& Din, int sign, int dag) + { + int Ls = this->Ls; + + Din = zero; + if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); } + else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); } + else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); } + else if((sign == -1) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); } + } + + // This is just the identity for DWF + template + void DomainWallEOFAFermion::Dtilde(const FermionField& psi, FermionField& chi){ chi = psi; } + + // This is just the identity for DWF + template + void DomainWallEOFAFermion::DtildeInv(const FermionField& psi, FermionField& chi){ chi = psi; } + + /*****************************************************************************************************/ + + template + RealD DomainWallEOFAFermion::M(const FermionField& psi, FermionField& chi) + { + int Ls = this->Ls; + + FermionField Din(psi._grid); + + this->Meooe5D(psi, Din); + this->DW(Din, chi, DaggerNo); + axpby(chi, 1.0, 1.0, chi, psi); + this->M5D(psi, chi); + return(norm2(chi)); + } + + template + RealD DomainWallEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) + { + int Ls = this->Ls; + + FermionField Din(psi._grid); + + this->DW(psi, Din, DaggerYes); + this->MeooeDag5D(Din, chi); + this->M5Ddag(psi, chi); + axpby(chi, 1.0, 1.0, chi, psi); + return(norm2(chi)); + } + + /******************************************************************** + /* Performance critical fermion operators called inside the inverter + /********************************************************************/ + + template + void DomainWallEOFAFermion::M5D(const FermionField& psi, FermionField& chi) + { + int Ls = this->Ls; + int pm = this->pm; + RealD shift = this->shift; + RealD mq1 = this->mq1; + RealD mq2 = this->mq2; + RealD mq3 = this->mq3; + + // coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} ) + Coeff_t shiftp(0.0), shiftm(0.0); + if(shift != 0.0){ + if(pm == 1){ shiftp = shift*(mq3-mq2); } + else{ shiftm = -shift*(mq3-mq2); } + } + + std::vector diag(Ls,1.0); + std::vector upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm; + std::vector lower(Ls,-1.0); lower[0] = mq1 + shiftp; + + #if(0) + std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl; + for(int i=0; i::iscomplex()) { + sp[l] = PplusMat (l*istride+s1*ostride,s2); + sm[l] = PminusMat(l*istride+s1*ostride,s2); + } else { + // if real + scalar_type tmp; + tmp = PplusMat (l*istride+s1*ostride,s2); + sp[l] = scalar_type(tmp.real(),tmp.real()); + tmp = PminusMat(l*istride+s1*ostride,s2); + sm[l] = scalar_type(tmp.real(),tmp.real()); + } + } + Matp[LLs*s2+s1] = Vp; + Matm[LLs*s2+s1] = Vm; + }} + } + + FermOpTemplateInstantiate(DomainWallEOFAFermion); + GparityFermOpTemplateInstantiate(DomainWallEOFAFermion); + +}} diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermion.h b/lib/qcd/action/fermion/DomainWallEOFAFermion.h new file mode 100644 index 00000000..179736ba --- /dev/null +++ b/lib/qcd/action/fermion/DomainWallEOFAFermion.h @@ -0,0 +1,115 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.h + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H +#define GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H + +#include + +namespace Grid { +namespace QCD { + + // DJM: EOFA with (Shamir) domain wall fermions. + // We overload and re-implement only the routines which have a different operator + // structure than the CayleyFermion5D base class. + template + class DomainWallEOFAFermion : public AbstractEOFAFermion + { + public: + INHERIT_IMPL_TYPES(Impl); + + public: + // Modified (0,Ls-1) and (Ls-1,0) elements of Mooee for red-black preconditioned Shamir EOFA + Coeff_t dm; + Coeff_t dp; + + virtual void Instantiatable(void) {}; + + // EOFA specific operators + virtual void Omega (const FermionField& in, FermionField &out, int sign, int dag); + virtual void Dtilde (const FermionField& in, FermionField &out); + virtual void DtildeInv (const FermionField& in, FermionField &out); + + // override multiply + virtual RealD M (const FermionField& in, FermionField& out); + virtual RealD Mdag (const FermionField& in, FermionField& out); + + // half checkerboard operations + virtual void Mooee (const FermionField &in, FermionField &out); + virtual void MooeeDag (const FermionField &in, FermionField &out); + virtual void MooeeInv (const FermionField &in, FermionField &out); + virtual void MooeeInvDag(const FermionField &in, FermionField &out); + + virtual void M5D (const FermionField &psi, FermionField &chi); + virtual void M5Ddag (const FermionField &psi, FermionField &chi); + + ///////////////////////////////////////////////////// + // Instantiate different versions depending on Impl + ///////////////////////////////////////////////////// + void M5D (const FermionField &psi, const FermionField &phi, FermionField &chi, + std::vector &lower, std::vector &diag, std::vector &upper); + void M5Ddag (const FermionField &psi, const FermionField &phi, FermionField &chi, + std::vector &lower, std::vector &diag, std::vector &upper); + void MooeeInternal (const FermionField &in, FermionField &out, int dag, int inv); + void MooeeInternalCompute(int dag, int inv, Vector >& Matp, Vector >& Matm); + void MooeeInternalAsm (const FermionField &in, FermionField &out, int LLs, int site, + Vector >& Matp, Vector >& Matm); + void MooeeInternalZAsm (const FermionField &in, FermionField &out, int LLs, int site, + Vector >& Matp, Vector >& Matm); + + virtual void RefreshShiftCoefficients(RealD new_shift); + + // Constructors + DomainWallEOFAFermion(GaugeField& _Umu, + GridCartesian& FiveDimGrid, + GridRedBlackCartesian& FiveDimRedBlackGrid, + GridCartesian& FourDimGrid, + GridRedBlackCartesian& FourDimRedBlackGrid, + RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, + int pm, RealD _M5, const ImplParams& p=ImplParams()); + + protected: + virtual void SetCoefficientsInternal(RealD zolo_hi, std::vector &gamma, RealD b, RealD c); + }; +}} + +#define INSTANTIATE_DPERP_DWF_EOFA(A)\ +template void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,\ + std::vector& lower, std::vector& diag, std::vector& upper); \ +template void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,\ + std::vector& lower, std::vector& diag, std::vector& upper); \ +template void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi); \ +template void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi); + +#undef DOMAIN_WALL_EOFA_DPERP_DENSE +#define DOMAIN_WALL_EOFA_DPERP_CACHE +#undef DOMAIN_WALL_EOFA_DPERP_LINALG +#define DOMAIN_WALL_EOFA_DPERP_VEC + +#endif diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc b/lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc new file mode 100644 index 00000000..0b214d31 --- /dev/null +++ b/lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc @@ -0,0 +1,248 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include + +namespace Grid { +namespace QCD { + + // FIXME -- make a version of these routines with site loop outermost for cache reuse. + + // Pminus fowards + // Pplus backwards.. + template + void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + { + int Ls = this->Ls; + GridBase* grid = psi._grid; + + assert(phi.checkerboard == psi.checkerboard); + chi.checkerboard = psi.checkerboard; + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls + for(int s=0; sM5Dtime += usecond(); + } + + template + void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + { + int Ls = this->Ls; + GridBase* grid = psi._grid; + assert(phi.checkerboard == psi.checkerboard); + chi.checkerboard=psi.checkerboard; + + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls + auto tmp = psi._odata[0]; + for(int s=0; sM5Dtime += usecond(); + } + + template + void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) + { + GridBase* grid = psi._grid; + int Ls = this->Ls; + + chi.checkerboard = psi.checkerboard; + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls + + auto tmp1 = psi._odata[0]; + auto tmp2 = psi._odata[0]; + + // flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops + // Apply (L^{\prime})^{-1} + chi[ss] = psi[ss]; // chi[0]=psi[0] + for(int s=1; slee[s-1]*tmp1; + } + + // L_m^{-1} + for(int s=0; sleem[s]*tmp1; + } + + // U_m^{-1} D^{-1} + for(int s=0; sdee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls])*tmp1; + } + spProj5m(tmp2, chi[ss+Ls-1]); + chi[ss+Ls-1] = (1.0/this->dee[Ls])*tmp1 + (1.0/this->dee[Ls-1])*tmp2; + + // Apply U^{-1} + for(int s=Ls-2; s>=0; s--){ + spProj5m(tmp1, chi[ss+s+1]); + chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1; + } + } + + this->MooeeInvTime += usecond(); + } + + template + void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) + { + GridBase* grid = psi._grid; + int Ls = this->Ls; + + assert(psi.checkerboard == psi.checkerboard); + chi.checkerboard = psi.checkerboard; + + std::vector ueec(Ls); + std::vector deec(Ls+1); + std::vector leec(Ls); + std::vector ueemc(Ls); + std::vector leemc(Ls); + + for(int s=0; suee[s]); + deec[s] = conjugate(this->dee[s]); + leec[s] = conjugate(this->lee[s]); + ueemc[s] = conjugate(this->ueem[s]); + leemc[s] = conjugate(this->leem[s]); + } + deec[Ls] = conjugate(this->dee[Ls]); + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls + + auto tmp1 = psi._odata[0]; + auto tmp2 = psi._odata[0]; + + // Apply (U^{\prime})^{-dagger} + chi[ss] = psi[ss]; + for(int s=1; s=0; s--){ + spProj5p(tmp1, chi[ss+s+1]); + chi[ss+s] = chi[ss+s] - leec[s]*tmp1; + } + } + + this->MooeeInvTime += usecond(); + } + + #ifdef DOMAIN_WALL_EOFA_DPERP_CACHE + + INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF); + INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD); + INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF); + INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD); + INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF); + INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD); + + INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH); + INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF); + INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH); + INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF); + INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH); + INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF); + + #endif + +}} diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc b/lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc new file mode 100644 index 00000000..c27074d9 --- /dev/null +++ b/lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc @@ -0,0 +1,159 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include + +namespace Grid { +namespace QCD { + + /* + * Dense matrix versions of routines + */ + template + void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) + { + this->MooeeInternal(psi, chi, DaggerYes, InverseYes); + } + + template + void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) + { + this->MooeeInternal(psi, chi, DaggerNo, InverseYes); + } + + template + void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) + { + int Ls = this->Ls; + int LLs = psi._grid->_rdimensions[0]; + int vol = psi._grid->oSites()/LLs; + + chi.checkerboard = psi.checkerboard; + + assert(Ls==LLs); + + Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls); + Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls); + + for(int s=0;sbee[s]; + Pminus(s,s) = this->bee[s]; + } + + for(int s=0; scee[s]; + } + + for(int s=0; scee[s+1]; + } + + Pplus (0,Ls-1) = this->dp; + Pminus(Ls-1,0) = this->dm; + + Eigen::MatrixXd PplusMat ; + Eigen::MatrixXd PminusMat; + + if(inv) { + PplusMat = Pplus.inverse(); + PminusMat = Pminus.inverse(); + } else { + PplusMat = Pplus; + PminusMat = Pminus; + } + + if(dag){ + PplusMat.adjointInPlace(); + PminusMat.adjointInPlace(); + } + + // For the non-vectorised s-direction this is simple + + for(auto site=0; site::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + + INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH); + INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF); + INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH); + INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF); + INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH); + INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF); + + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + + #endif + +}} diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc b/lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc new file mode 100644 index 00000000..80a4bf09 --- /dev/null +++ b/lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc @@ -0,0 +1,168 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include + +namespace Grid { +namespace QCD { + + // FIXME -- make a version of these routines with site loop outermost for cache reuse. + // Pminus fowards + // Pplus backwards + template + void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + { + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; s + void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + { + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; s + void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) + { + Coeff_t one(1.0); + Coeff_t czero(0.0); + chi.checkerboard = psi.checkerboard; + int Ls = this->Ls; + + FermionField tmp(psi._grid); + + // Apply (L^{\prime})^{-1} + axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] + for(int s=1; slee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1] + } + + // L_m^{-1} + for(int s=0; sleem[s], chi, Ls-1, s); + } + + // U_m^{-1} D^{-1} + for(int s=0; sdee[s], chi, -this->ueem[s]/this->dee[Ls], chi, s, Ls-1); + } + axpby_ssp_pminus(tmp, czero, chi, one/this->dee[Ls-1], chi, Ls-1, Ls-1); + axpby_ssp_pplus(chi, one, tmp, one/this->dee[Ls], chi, Ls-1, Ls-1); + + // Apply U^{-1} + for(int s=Ls-2; s>=0; s--){ + axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls] + } + } + + template + void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) + { + Coeff_t one(1.0); + Coeff_t czero(0.0); + chi.checkerboard = psi.checkerboard; + int Ls = this->Ls; + + FermionField tmp(psi._grid); + + // Apply (U^{\prime})^{-dagger} + axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] + for(int s=1; suee[s-1]), chi, s, s-1); + } + + // U_m^{-\dagger} + for(int s=0; sueem[s]), chi, Ls-1, s); + } + + // L_m^{-\dagger} D^{-dagger} + for(int s=0; sdee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1); + } + axpby_ssp_pminus(tmp, czero, chi, one/conjugate(this->dee[Ls-1]), chi, Ls-1, Ls-1); + axpby_ssp_pplus(chi, one, tmp, one/conjugate(this->dee[Ls]), chi, Ls-1, Ls-1); + + // Apply L^{-dagger} + for(int s=Ls-2; s>=0; s--){ + axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls] + } + } + + #ifdef DOMAIN_WALL_EOFA_DPERP_LINALG + + INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF); + INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD); + INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF); + INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD); + INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF); + INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD); + + INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH); + INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF); + INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH); + INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF); + INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH); + INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF); + + #endif + +}} diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc b/lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc new file mode 100644 index 00000000..81ce448c --- /dev/null +++ b/lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc @@ -0,0 +1,605 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include + +namespace Grid { +namespace QCD { + + /* + * Dense matrix versions of routines + */ + template + void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) + { + this->MooeeInternal(psi, chi, DaggerYes, InverseYes); + } + + template + void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) + { + this->MooeeInternal(psi, chi, DaggerNo, InverseYes); + } + + template + void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + { + GridBase* grid = psi._grid; + int Ls = this->Ls; + int LLs = grid->_rdimensions[0]; + const int nsimd = Simd::Nsimd(); + + Vector > u(LLs); + Vector > l(LLs); + Vector > d(LLs); + + assert(Ls/LLs == nsimd); + assert(phi.checkerboard == psi.checkerboard); + + chi.checkerboard = psi.checkerboard; + + // just directly address via type pun + typedef typename Simd::scalar_type scalar_type; + scalar_type* u_p = (scalar_type*) &u[0]; + scalar_type* l_p = (scalar_type*) &l[0]; + scalar_type* d_p = (scalar_type*) &d[0]; + + for(int o=0;oM5Dcalls++; + this->M5Dtime -= usecond(); + + assert(Nc == 3); + + parallel_for(int ss=0; ssoSites(); ss+=LLs){ // adds LLs + + #if 0 + + alignas(64) SiteHalfSpinor hp; + alignas(64) SiteHalfSpinor hm; + alignas(64) SiteSpinor fp; + alignas(64) SiteSpinor fm; + + for(int v=0; v= v){ rotate(hm, hm, nsimd-1); } + + hp = 0.5*hp; + hm = 0.5*hm; + + spRecon5m(fp, hp); + spRecon5p(fm, hm); + + chi[ss+v] = d[v]*phi[ss+v]; + chi[ss+v] = chi[ss+v] + u[v]*fp; + chi[ss+v] = chi[ss+v] + l[v]*fm; + + } + + #else + + for(int v=0; v(hp_00.v); + hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); + hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); + hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); + hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); + hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); + } + + if(vm >= v){ + hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); + hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); + hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); + hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); + hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); + hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); + } + + // Can force these to real arithmetic and save 2x. + Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(), hm_00); + Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(), hm_01); + Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(), hm_02); + Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(), hm_10); + Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(), hm_11); + Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(), hm_12); + Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(), hp_00); + Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(), hp_01); + Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(), hp_02); + Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(), hp_10); + Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(), hp_11); + Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(), hp_12); + + vstream(chi[ss+v]()(0)(0), p_00); + vstream(chi[ss+v]()(0)(1), p_01); + vstream(chi[ss+v]()(0)(2), p_02); + vstream(chi[ss+v]()(1)(0), p_10); + vstream(chi[ss+v]()(1)(1), p_11); + vstream(chi[ss+v]()(1)(2), p_12); + vstream(chi[ss+v]()(2)(0), p_20); + vstream(chi[ss+v]()(2)(1), p_21); + vstream(chi[ss+v]()(2)(2), p_22); + vstream(chi[ss+v]()(3)(0), p_30); + vstream(chi[ss+v]()(3)(1), p_31); + vstream(chi[ss+v]()(3)(2), p_32); + } + + #endif + } + + this->M5Dtime += usecond(); + } + + template + void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + { + GridBase* grid = psi._grid; + int Ls = this->Ls; + int LLs = grid->_rdimensions[0]; + int nsimd = Simd::Nsimd(); + + Vector > u(LLs); + Vector > l(LLs); + Vector > d(LLs); + + assert(Ls/LLs == nsimd); + assert(phi.checkerboard == psi.checkerboard); + + chi.checkerboard = psi.checkerboard; + + // just directly address via type pun + typedef typename Simd::scalar_type scalar_type; + scalar_type* u_p = (scalar_type*) &u[0]; + scalar_type* l_p = (scalar_type*) &l[0]; + scalar_type* d_p = (scalar_type*) &d[0]; + + for(int o=0; oM5Dcalls++; + this->M5Dtime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=LLs){ // adds LLs + + #if 0 + + alignas(64) SiteHalfSpinor hp; + alignas(64) SiteHalfSpinor hm; + alignas(64) SiteSpinor fp; + alignas(64) SiteSpinor fm; + + for(int v=0; v= v){ rotate(hm, hm, nsimd-1); } + + hp = hp*0.5; + hm = hm*0.5; + spRecon5p(fp, hp); + spRecon5m(fm, hm); + + chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp; + chi[ss+v] = chi[ss+v] +l[v]*fm; + } + + #else + + for(int v=0; v(hp_00.v); + hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); + hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); + hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); + hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); + hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); + } + + if(vm >= v){ + hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); + hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); + hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); + hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); + hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); + hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); + } + + Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(), hp_00); + Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(), hp_01); + Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(), hp_02); + Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(), hp_10); + Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(), hp_11); + Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(), hp_12); + Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(), hm_00); + Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(), hm_01); + Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(), hm_02); + Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(), hm_10); + Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(), hm_11); + Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(), hm_12); + + vstream(chi[ss+v]()(0)(0), p_00); + vstream(chi[ss+v]()(0)(1), p_01); + vstream(chi[ss+v]()(0)(2), p_02); + vstream(chi[ss+v]()(1)(0), p_10); + vstream(chi[ss+v]()(1)(1), p_11); + vstream(chi[ss+v]()(1)(2), p_12); + vstream(chi[ss+v]()(2)(0), p_20); + vstream(chi[ss+v]()(2)(1), p_21); + vstream(chi[ss+v]()(2)(2), p_22); + vstream(chi[ss+v]()(3)(0), p_30); + vstream(chi[ss+v]()(3)(1), p_31); + vstream(chi[ss+v]()(3)(2), p_32); + } + #endif + + } + + this->M5Dtime += usecond(); + } + + #ifdef AVX512 + #include + #include + #include + #endif + + template + void DomainWallEOFAFermion::MooeeInternalAsm(const FermionField& psi, FermionField& chi, + int LLs, int site, Vector >& Matp, Vector >& Matm) + { + #ifndef AVX512 + { + SiteHalfSpinor BcastP; + SiteHalfSpinor BcastM; + SiteHalfSpinor SiteChiP; + SiteHalfSpinor SiteChiM; + + // Ls*Ls * 2 * 12 * vol flops + for(int s1=0; s1); + for(int s1=0; s1 + void DomainWallEOFAFermion::MooeeInternalZAsm(const FermionField& psi, FermionField& chi, + int LLs, int site, Vector >& Matp, Vector >& Matm) + { + std::cout << "Error: zMobius not implemented for EOFA" << std::endl; + exit(-1); + }; + + template + void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) + { + int Ls = this->Ls; + int LLs = psi._grid->_rdimensions[0]; + int vol = psi._grid->oSites()/LLs; + + chi.checkerboard = psi.checkerboard; + + Vector > Matp; + Vector > Matm; + Vector > *_Matp; + Vector > *_Matm; + + // MooeeInternalCompute(dag,inv,Matp,Matm); + if(inv && dag){ + _Matp = &this->MatpInvDag; + _Matm = &this->MatmInvDag; + } + + if(inv && (!dag)){ + _Matp = &this->MatpInv; + _Matm = &this->MatmInv; + } + + if(!inv){ + MooeeInternalCompute(dag, inv, Matp, Matm); + _Matp = &Matp; + _Matm = &Matm; + } + + assert(_Matp->size() == Ls*LLs); + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + if(switcheroo::iscomplex()){ + parallel_for(auto site=0; siteMooeeInvTime += usecond(); + } + + #ifdef DOMAIN_WALL_EOFA_DPERP_VEC + + INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplD); + INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplF); + INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplD); + INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplF); + + INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplDF); + INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplFH); + INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplDF); + INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplFH); + + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + + #endif + +}} diff --git a/lib/qcd/action/fermion/Fermion.h b/lib/qcd/action/fermion/Fermion.h index 0f803f44..ac2a94b2 100644 --- a/lib/qcd/action/fermion/Fermion.h +++ b/lib/qcd/action/fermion/Fermion.h @@ -55,7 +55,7 @@ Author: Peter Boyle #include #include // Cayley types #include -#include +#include #include #include #include @@ -113,6 +113,14 @@ typedef DomainWallFermion DomainWallFermionRL; typedef DomainWallFermion DomainWallFermionFH; typedef DomainWallFermion DomainWallFermionDF; +typedef DomainWallEOFAFermion DomainWallEOFAFermionR; +typedef DomainWallEOFAFermion DomainWallEOFAFermionF; +typedef DomainWallEOFAFermion DomainWallEOFAFermionD; + +typedef DomainWallEOFAFermion DomainWallEOFAFermionRL; +typedef DomainWallEOFAFermion DomainWallEOFAFermionFH; +typedef DomainWallEOFAFermion DomainWallEOFAFermionDF; + typedef MobiusFermion MobiusFermionR; typedef MobiusFermion MobiusFermionF; typedef MobiusFermion MobiusFermionD; @@ -138,6 +146,14 @@ typedef DomainWallFermion DomainWallFermionVec5dRL; typedef DomainWallFermion DomainWallFermionVec5dFH; typedef DomainWallFermion DomainWallFermionVec5dDF; +typedef DomainWallEOFAFermion DomainWallEOFAFermionVec5dR; +typedef DomainWallEOFAFermion DomainWallEOFAFermionVec5dF; +typedef DomainWallEOFAFermion DomainWallEOFAFermionVec5dD; + +typedef DomainWallEOFAFermion DomainWallEOFAFermionVec5dRL; +typedef DomainWallEOFAFermion DomainWallEOFAFermionVec5dFH; +typedef DomainWallEOFAFermion DomainWallEOFAFermionVec5dDF; + typedef MobiusFermion MobiusFermionVec5dR; typedef MobiusFermion MobiusFermionVec5dF; typedef MobiusFermion MobiusFermionVec5dD; @@ -206,6 +222,14 @@ typedef DomainWallFermion GparityDomainWallFermionRL; typedef DomainWallFermion GparityDomainWallFermionFH; typedef DomainWallFermion GparityDomainWallFermionDF; +typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionR; +typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionF; +typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionD; + +typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionRL; +typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionFH; +typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionDF; + typedef WilsonTMFermion GparityWilsonTMFermionR; typedef WilsonTMFermion GparityWilsonTMFermionF; typedef WilsonTMFermion GparityWilsonTMFermionD; diff --git a/lib/qcd/modules/FermionOperatorModules.h b/lib/qcd/modules/FermionOperatorModules.h index c66842c6..fc9d96a7 100644 --- a/lib/qcd/modules/FermionOperatorModules.h +++ b/lib/qcd/modules/FermionOperatorModules.h @@ -72,7 +72,7 @@ protected: } virtual unsigned int Ls(){ - return 0; + return 0; } virtual void print_parameters(){ @@ -97,7 +97,7 @@ class HMC_FermionOperatorModuleFactory : public Factory < FermionOperatorModuleBase > , Reader > { public: // use SINGLETON FUNCTOR MACRO HERE - typedef Reader TheReader; + typedef Reader TheReader; HMC_FermionOperatorModuleFactory(const HMC_FermionOperatorModuleFactory& e) = delete; void operator=(const HMC_FermionOperatorModuleFactory& e) = delete; @@ -122,7 +122,7 @@ namespace QCD{ // Modules class WilsonFermionParameters : Serializable { public: - GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonFermionParameters, + GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonFermionParameters, RealD, mass); }; @@ -144,7 +144,7 @@ class WilsonFermionModule: public FermionOperatorModuleGridRefs[0]; auto GridMod5d = this->GridRefs[1]; typename FermionImpl::GaugeField U(GridMod->get_full()); - this->FOPtr.reset(new MobiusFermion( U, *(GridMod->get_full()), *(GridMod->get_rb()), + this->FOPtr.reset(new MobiusFermion( U, *(GridMod->get_full()), *(GridMod->get_rb()), *(GridMod5d->get_full()), *(GridMod5d->get_rb()), this->Par_.mass, this->Par_.M5, this->Par_.b, this->Par_.c)); } @@ -175,7 +175,7 @@ class MobiusFermionModule: public FermionOperatorModuleGridRefs[0]; auto GridMod5d = this->GridRefs[1]; typename FermionImpl::GaugeField U(GridMod->get_full()); - this->FOPtr.reset(new DomainWallFermion( U, *(GridMod->get_full()), *(GridMod->get_rb()), + this->FOPtr.reset(new DomainWallFermion( U, *(GridMod->get_full()), *(GridMod->get_rb()), *(GridMod5d->get_full()), *(GridMod5d->get_rb()), this->Par_.mass, this->Par_.M5)); } }; +class DomainWallEOFAFermionParameters : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(DomainWallEOFAFermionParameters, + RealD, mq1, + RealD, mq2, + RealD, mq3, + RealD, shift, + int, pm, + RealD, M5, + unsigned int, Ls); +}; + +template +class DomainWallEOFAFermionModule: public FermionOperatorModule { + typedef FermionOperatorModule FermBase; + using FermBase::FermBase; // for constructors + + virtual unsigned int Ls(){ + return this->Par_.Ls; + } + + // acquire resource + virtual void initialize(){ + auto GridMod = this->GridRefs[0]; + auto GridMod5d = this->GridRefs[1]; + typename FermionImpl::GaugeField U(GridMod->get_full()); + this->FOPtr.reset(new DomainWallEOFAFermion( U, *(GridMod->get_full()), *(GridMod->get_rb()), + *(GridMod5d->get_full()), *(GridMod5d->get_rb()), + this->Par_.mq1, this->Par_.mq2, this->Par_.mq3, + this->Par_.shift, this->Par_.pm, this->Par_.M5)); + } +}; + } // QCD } // Grid -#endif //FERMIONOPERATOR_MODULES_H \ No newline at end of file +#endif //FERMIONOPERATOR_MODULES_H diff --git a/tests/core/Test_dwf_eofa_even_odd.cc b/tests/core/Test_dwf_eofa_even_odd.cc new file mode 100644 index 00000000..5fe0f653 --- /dev/null +++ b/tests/core/Test_dwf_eofa_even_odd.cc @@ -0,0 +1,239 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/core/Test_dwf_eofa_even_odd.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + +Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT +}; + +int main (int argc, char ** argv) +{ + Grid_init(&argc, &argv); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + const int Ls = 8; + // GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); + GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + + LatticeFermion src (FGrid); random(RNG5, src); + LatticeFermion phi (FGrid); random(RNG5, phi); + LatticeFermion chi (FGrid); random(RNG5, chi); + LatticeFermion result(FGrid); result = zero; + LatticeFermion ref (FGrid); ref = zero; + LatticeFermion tmp (FGrid); tmp = zero; + LatticeFermion err (FGrid); err = zero; + LatticeGaugeField Umu (UGrid); SU3::HotConfiguration(RNG4, Umu); + std::vector U(4,UGrid); + + // Only one non-zero (y) + Umu = zero; + for(int nn=0; nn0){ U[nn] = zero; } + PokeIndex(Umu, U[nn], nn); + } + + RealD mq1 = 0.1; + RealD mq2 = 0.5; + RealD mq3 = 1.0; + RealD shift = 0.1234; + RealD M5 = 1.8; + int pm = 1; + DomainWallEOFAFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mq1, mq2, mq3, shift, pm, M5); + + LatticeFermion src_e (FrbGrid); + LatticeFermion src_o (FrbGrid); + LatticeFermion r_e (FrbGrid); + LatticeFermion r_o (FrbGrid); + LatticeFermion r_eo (FGrid); + LatticeFermion r_eeoo(FGrid); + + std::cout << GridLogMessage << "==========================================================" << std::endl; + std::cout << GridLogMessage << "= Testing that Meo + Moe + Moo + Mee = Munprec " << std::endl; + std::cout << GridLogMessage << "==========================================================" << std::endl; + + pickCheckerboard(Even, src_e, src); + pickCheckerboard(Odd, src_o, src); + + Ddwf.Meooe(src_e, r_o); std::cout << GridLogMessage << "Applied Meo" << std::endl; + Ddwf.Meooe(src_o, r_e); std::cout << GridLogMessage << "Applied Moe" << std::endl; + setCheckerboard(r_eo, r_o); + setCheckerboard(r_eo, r_e); + + Ddwf.Mooee(src_e, r_e); std::cout << GridLogMessage << "Applied Mee" << std::endl; + Ddwf.Mooee(src_o, r_o); std::cout << GridLogMessage << "Applied Moo" << std::endl; + setCheckerboard(r_eeoo, r_e); + setCheckerboard(r_eeoo, r_o); + + r_eo = r_eo + r_eeoo; + Ddwf.M(src, ref); + + // std::cout << GridLogMessage << r_eo << std::endl; + // std::cout << GridLogMessage << ref << std::endl; + + err = ref - r_eo; + std::cout << GridLogMessage << "EO norm diff " << norm2(err) << " " << norm2(ref) << " " << norm2(r_eo) << std::endl; + + LatticeComplex cerr(FGrid); + cerr = localInnerProduct(err,err); + // std::cout << GridLogMessage << cerr << std::endl; + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl; + std::cout << GridLogMessage << "= < phi | Deo | chi > * = < chi | Deo^dag| phi> " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + LatticeFermion chi_e (FrbGrid); + LatticeFermion chi_o (FrbGrid); + + LatticeFermion dchi_e(FrbGrid); + LatticeFermion dchi_o(FrbGrid); + + LatticeFermion phi_e (FrbGrid); + LatticeFermion phi_o (FrbGrid); + + LatticeFermion dphi_e(FrbGrid); + LatticeFermion dphi_o(FrbGrid); + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd , chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd , phi_o, phi); + + Ddwf.Meooe (chi_e, dchi_o); + Ddwf.Meooe (chi_o, dchi_e); + Ddwf.MeooeDag(phi_e, dphi_o); + Ddwf.MeooeDag(phi_o, dphi_e); + + ComplexD pDce = innerProduct(phi_e, dchi_e); + ComplexD pDco = innerProduct(phi_o, dchi_o); + ComplexD cDpe = innerProduct(chi_e, dphi_e); + ComplexD cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce-conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco-conj(cDpe) << std::endl; + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeInv Mee = 1 " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd , chi_o, chi); + + Ddwf.Mooee (chi_e, src_e); + Ddwf.MooeeInv(src_e, phi_e); + + Ddwf.Mooee (chi_o, src_o); + Ddwf.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeInvDag MeeDag = 1 " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd , chi_o, chi); + + Ddwf.MooeeDag (chi_e, src_e); + Ddwf.MooeeInvDag(src_e, phi_e); + + Ddwf.MooeeDag (chi_o, src_o); + Ddwf.MooeeInvDag(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MpcDagMpc is Hermitian " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + random(RNG5, phi); + random(RNG5, chi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd , chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd , phi_o, phi); + RealD t1,t2; + + SchurDiagMooeeOperator HermOpEO(Ddwf); + HermOpEO.MpcDagMpc(chi_e, dchi_e, t1, t2); + HermOpEO.MpcDagMpc(chi_o, dchi_o, t1, t2); + + HermOpEO.MpcDagMpc(phi_e, dphi_e, t1, t2); + HermOpEO.MpcDagMpc(phi_o, dphi_o, t1, t2); + + pDce = innerProduct(phi_e, dchi_e); + pDco = innerProduct(phi_o, dchi_o); + cDpe = innerProduct(chi_e, dphi_e); + cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDco-conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDce-conj(cDpe) << std::endl; + + Grid_finalize(); +} diff --git a/tests/debug/Test_reweight_dwf_eofa.cc b/tests/debug/Test_reweight_dwf_eofa.cc new file mode 100644 index 00000000..98a17e2f --- /dev/null +++ b/tests/debug/Test_reweight_dwf_eofa.cc @@ -0,0 +1,206 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/debug/Test_reweight_dwf_eofa.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +// parameters for test +const std::vector grid_dim = { 8, 8, 8, 8 }; +const int Ls = 8; +const int Nhits = 25; +const int max_iter = 5000; +const RealD mf = 0.1; +const RealD mb = 0.11; +const RealD M5 = 1.8; +const RealD stop_tol = 1.0e-12; + +RealD mean(const std::vector& data) +{ + int N = data.size(); + RealD mean(0.0); + for(int i=0; i& data, int sample) +{ + int N = data.size(); + RealD mean(0.0); + for(int i=0; i& jacks, RealD mean) +{ + int N = jacks.size(); + RealD std(0.0); + for(int i=0; i jack_stats(const std::vector& data) +{ + int N = data.size(); + std::vector jack_samples(N); + std::vector jack_stats(2); + + jack_stats[0] = mean(data); + for(int i=0; i seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + + // Random gauge field + LatticeGaugeField Umu(UGrid); + SU3::HotConfiguration(RNG4, Umu); + + // Initialize RHMC fermion operators + DomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5); + DomainWallFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5); + SchurDiagMooeeOperator MdagM(Ddwf_f); + SchurDiagMooeeOperator VdagV(Ddwf_b); + + // Degree 12 rational approximations to x^(1/4) and x^(-1/4) + double lo = 0.0001; + double hi = 95.0; + int precision = 64; + int degree = 12; + AlgRemez remez(lo, hi, precision); + std::cout << GridLogMessage << "Generating degree " << degree << " for x^(1/4)" << std::endl; + remez.generateApprox(degree, 1, 4); + MultiShiftFunction PowerQuarter(remez, stop_tol, false); + MultiShiftFunction PowerNegQuarter(remez, stop_tol, true); + + // Stochastically estimate reweighting factor via RHMC + RealD scale = std::sqrt(0.5); + std::vector rw_rhmc(Nhits); + ConjugateGradientMultiShift msCG_V(max_iter, PowerQuarter); + ConjugateGradientMultiShift msCG_M(max_iter, PowerNegQuarter); + std::cout.precision(12); + + for(int hit=0; hit tmp(2, Ddwf_f.FermionRedBlackGrid()); + gaussian(RNG5, Phi); + Phi = Phi*scale; + + pickCheckerboard(Odd, PhiOdd, Phi); + + // evaluate -log(rw) + msCG_V(VdagV, PhiOdd, tmp[0]); + msCG_M(MdagM, tmp[0], tmp[1]); + rw_rhmc[hit] = norm2(tmp[1]) - norm2(PhiOdd); + std::cout << std::endl << "==================================================" << std::endl; + std::cout << " --- RHMC: Hit " << hit << ": rw = " << rw_rhmc[hit]; + std::cout << std::endl << "==================================================" << std::endl << std::endl; + + } + + // Initialize EOFA fermion operators + RealD shift_L = 0.0; + RealD shift_R = -1.0; + int pm = 1; + DomainWallEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5); + DomainWallEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5); + MdagMLinearOperator LdagL(Deofa_L); + MdagMLinearOperator RdagR(Deofa_R); + + // Stochastically estimate reweighting factor via EOFA + RealD k = Deofa_L.k; + std::vector rw_eofa(Nhits); + ConjugateGradient CG(stop_tol, max_iter); + SchurRedBlackDiagMooeeSolve SchurSolver(CG); + + for(int hit=0; hit tmp(2, Deofa_L.FermionGrid()); + gaussian(RNG5, Phi); + Phi = Phi*scale; + + // evaluate -log(rw) + // LH term + for(int s=0; s rhmc_result = jack_stats(rw_rhmc); + std::vector eofa_result = jack_stats(rw_eofa); + std::cout << std::endl << "RHMC: rw = " << rhmc_result[0] << " +/- " << rhmc_result[1] << std::endl; + std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl; + + Grid_finalize(); +} From b7f93aeb4de96ecadcc58bbf6afcb61decfd6a35 Mon Sep 17 00:00:00 2001 From: David Murphy Date: Tue, 15 Aug 2017 14:18:51 -0400 Subject: [PATCH 082/377] Change CayleyFermion5D::SetCoefficientsInternal to virtual to allow overriding in derived EOFA classes --- lib/qcd/action/fermion/CayleyFermion5D.h | 56 ++++++++++++------------ 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h index cce13e12..ef75235a 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.h +++ b/lib/qcd/action/fermion/CayleyFermion5D.h @@ -1,6 +1,6 @@ /************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid + Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/qcd/action/fermion/CayleyFermion5D.h @@ -35,24 +35,24 @@ namespace Grid { namespace QCD { - template struct switcheroo { - static inline int iscomplex() { return 0; } + template struct switcheroo { + static inline int iscomplex() { return 0; } template static inline vec mult(vec a, vec b) { return real_mult(a,b); } }; - template<> struct switcheroo { - static inline int iscomplex() { return 1; } + template<> struct switcheroo { + static inline int iscomplex() { return 1; } template static inline vec mult(vec a, vec b) { return a*b; } }; - template<> struct switcheroo { - static inline int iscomplex() { return 1; } + template<> struct switcheroo { + static inline int iscomplex() { return 1; } template static inline vec mult(vec a, vec b) { return a*b; @@ -90,14 +90,14 @@ namespace Grid { // Instantiate different versions depending on Impl ///////////////////////////////////////////////////// void M5D(const FermionField &psi, - const FermionField &phi, + const FermionField &phi, FermionField &chi, std::vector &lower, std::vector &diag, std::vector &upper); void M5Ddag(const FermionField &psi, - const FermionField &phi, + const FermionField &phi, FermionField &chi, std::vector &lower, std::vector &diag, @@ -125,7 +125,7 @@ namespace Grid { // Efficient support for multigrid coarsening virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); - + void Meooe5D (const FermionField &in, FermionField &out); void MeooeDag5D (const FermionField &in, FermionField &out); @@ -133,23 +133,23 @@ namespace Grid { RealD mass; // Cayley form Moebius (tanh and zolotarev) - std::vector omega; + std::vector omega; std::vector bs; // S dependent coeffs - std::vector cs; - std::vector as; + std::vector cs; + std::vector as; // For preconditioning Cayley form - std::vector bee; - std::vector cee; - std::vector aee; - std::vector beo; - std::vector ceo; - std::vector aeo; + std::vector bee; + std::vector cee; + std::vector aee; + std::vector beo; + std::vector ceo; + std::vector aeo; // LDU factorisation of the eeoo matrix - std::vector lee; - std::vector leem; - std::vector uee; - std::vector ueem; - std::vector dee; + std::vector lee; + std::vector leem; + std::vector uee; + std::vector ueem; + std::vector dee; // Matrices of 5d ee inverse params Vector > MatpInv; @@ -165,7 +165,7 @@ namespace Grid { GridRedBlackCartesian &FourDimRedBlackGrid, RealD _mass,RealD _M5,const ImplParams &p= ImplParams()); - + void CayleyReport(void); void CayleyZeroCounters(void); @@ -179,9 +179,9 @@ namespace Grid { double MooeeInvTime; protected: - void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); - void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); - void SetCoefficientsInternal(RealD zolo_hi,std::vector & gamma,RealD b,RealD c); + virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); + virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); + virtual void SetCoefficientsInternal(RealD zolo_hi,std::vector & gamma,RealD b,RealD c); }; } From 6d0786ff9d388d0ee24d2d4087f05db38655b32e Mon Sep 17 00:00:00 2001 From: David Murphy Date: Tue, 15 Aug 2017 22:47:00 -0400 Subject: [PATCH 083/377] Typo fixes and check-in of G-parity action test for DWF --- lib/qcd/action/fermion/AbstractEOFAFermion.h | 106 ++++----- .../action/fermion/DomainWallEOFAFermion.h | 130 ++++++----- tests/debug/Test_reweight_dwf_eofa_gparity.cc | 209 ++++++++++++++++++ 3 files changed, 336 insertions(+), 109 deletions(-) create mode 100644 tests/debug/Test_reweight_dwf_eofa_gparity.cc diff --git a/lib/qcd/action/fermion/AbstractEOFAFermion.h b/lib/qcd/action/fermion/AbstractEOFAFermion.h index abe06b8c..15faa401 100644 --- a/lib/qcd/action/fermion/AbstractEOFAFermion.h +++ b/lib/qcd/action/fermion/AbstractEOFAFermion.h @@ -35,66 +35,66 @@ See the full license in the file "LICENSE" in the top level distribution directo namespace Grid { namespace QCD { - // DJM: Abstract base class for EOFA fermion types. - // Defines layout of additional EOFA-specific parameters and operators. - // Use to construct EOFA pseudofermion actions that are agnostic to Shamir / Mobius / etc., - // and ensure that no one can construct EOFA pseudofermion action with non-EOFA fermion type. - template - class AbstractEOFAFermion : public CayleyFermion5D { - public: - INHERIT_IMPL_TYPES(Impl); + // DJM: Abstract base class for EOFA fermion types. + // Defines layout of additional EOFA-specific parameters and operators. + // Use to construct EOFA pseudofermion actions that are agnostic to + // Shamir / Mobius / etc., and ensure that no one can construct EOFA + // pseudofermion action with non-EOFA fermion type. + template + class AbstractEOFAFermion : public CayleyFermion5D { + public: + INHERIT_IMPL_TYPES(Impl); - public: - // Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} - RealD mq1; - RealD mq2; - RealD mq3; - RealD shift; - int pm; + public: + // Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} + RealD mq1; + RealD mq2; + RealD mq3; + RealD shift; + int pm; - RealD alpha; // Mobius scale - RealD k; // EOFA normalization constant + RealD alpha; // Mobius scale + RealD k; // EOFA normalization constant - virtual void Instantiatable(void) = 0; + virtual void Instantiatable(void) = 0; - // EOFA-specific operations - // Force user to implement in derived classes - virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0; - virtual void Dtilde (const FermionField& in, FermionField& out) = 0; - virtual void DtildeInv(const FermionField& in, FermionField& out) = 0; + // EOFA-specific operations + // Force user to implement in derived classes + virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0; + virtual void Dtilde (const FermionField& in, FermionField& out) = 0; + virtual void DtildeInv(const FermionField& in, FermionField& out) = 0; - // Implement derivatives in base clcass: for EOFA both DWF and Mobius just need d(Dw)/dU - virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ - this->DhopDeriv(mat, U, V, dag); - }; - virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ - this->DhopDerivOE(mat, U, V, dag); - }; - virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ - this->DhopDerivEO(mat, U, V, dag); - }; + // Implement derivatives in base class: + // for EOFA both DWF and Mobius just need d(Dw)/dU + virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ + this->DhopDeriv(mat, U, V, dag); + }; + virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ + this->DhopDerivOE(mat, U, V, dag); + }; + virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ + this->DhopDerivEO(mat, U, V, dag); + }; - // Recompute 5D coefficients for different value of shift constant (needed for heatbath loop over poles) - virtual void RefreshShiftCoefficients(RealD new_shift) = 0; + // Recompute 5D coefficients for different value of shift constant + // (needed for heatbath loop over poles) + virtual void RefreshShiftCoefficients(RealD new_shift) = 0; - // Constructors - AbstractEOFAFermion(GaugeField& _Umu, - GridCartesian& FiveDimGrid, - GridRedBlackCartesian& FiveDimRedBlackGrid, - GridCartesian& FourDimGrid, - GridRedBlackCartesian& FourDimRedBlackGrid, - RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm, - RealD _M5, RealD _b, RealD _c, const ImplParams &p= ImplParams()) : - CayleyFermion5D(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid, _mq1, _M5, p), - mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm) - { - int Ls = this->Ls; - this->alpha = _b + _c; - this->k = this->alpha * (_mq3 - _mq2) * std::pow(this->alpha+1.0,2*Ls) / - ( std::pow(alpha+1.0,Ls) + _mq2*std::pow(alpha-1.0,Ls) ) / - ( std::pow(alpha+1.0,Ls) + _mq3*std::pow(alpha-1.0,Ls) ); - } - }; + // Constructors + AbstractEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid, + GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid, + RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm, + RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams()) + : CayleyFermion5D(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid, + _mq1, _M5, p), mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm) + { + int Ls = this->Ls; + this->alpha = _b + _c; + this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) / + ( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) / + ( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) ); + }; + }; }} #endif diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermion.h b/lib/qcd/action/fermion/DomainWallEOFAFermion.h index 179736ba..d48e3b8f 100644 --- a/lib/qcd/action/fermion/DomainWallEOFAFermion.h +++ b/lib/qcd/action/fermion/DomainWallEOFAFermion.h @@ -35,75 +35,93 @@ See the full license in the file "LICENSE" in the top level distribution directo namespace Grid { namespace QCD { - // DJM: EOFA with (Shamir) domain wall fermions. - // We overload and re-implement only the routines which have a different operator - // structure than the CayleyFermion5D base class. - template - class DomainWallEOFAFermion : public AbstractEOFAFermion - { - public: - INHERIT_IMPL_TYPES(Impl); + /*template struct switcheroo { + static inline int iscomplex() { return 0; } + template + static inline vec mult(vec a, vec b){ return real_mult(a,b); } + }; - public: - // Modified (0,Ls-1) and (Ls-1,0) elements of Mooee for red-black preconditioned Shamir EOFA - Coeff_t dm; - Coeff_t dp; + template<> struct switcheroo { + static inline int iscomplex() { return 1; } + template + static inline vec mult(vec a, vec b){ return a*b; } + }; - virtual void Instantiatable(void) {}; + template<> struct switcheroo { + static inline int iscomplex() { return 1; } + template + static inline vec mult(vec a, vec b) { return a*b; } + };*/ - // EOFA specific operators - virtual void Omega (const FermionField& in, FermionField &out, int sign, int dag); - virtual void Dtilde (const FermionField& in, FermionField &out); - virtual void DtildeInv (const FermionField& in, FermionField &out); + template + class DomainWallEOFAFermion : public AbstractEOFAFermion + { + public: + INHERIT_IMPL_TYPES(Impl); - // override multiply - virtual RealD M (const FermionField& in, FermionField& out); - virtual RealD Mdag (const FermionField& in, FermionField& out); + public: + // Modified (0,Ls-1) and (Ls-1,0) elements of Mooee + // for red-black preconditioned Shamir EOFA + Coeff_t dm; + Coeff_t dp; - // half checkerboard operations - virtual void Mooee (const FermionField &in, FermionField &out); - virtual void MooeeDag (const FermionField &in, FermionField &out); - virtual void MooeeInv (const FermionField &in, FermionField &out); - virtual void MooeeInvDag(const FermionField &in, FermionField &out); + virtual void Instantiatable(void) {}; - virtual void M5D (const FermionField &psi, FermionField &chi); - virtual void M5Ddag (const FermionField &psi, FermionField &chi); + // EOFA-specific operations + virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag); + virtual void Dtilde (const FermionField& in, FermionField& out); + virtual void DtildeInv (const FermionField& in, FermionField& out); - ///////////////////////////////////////////////////// - // Instantiate different versions depending on Impl - ///////////////////////////////////////////////////// - void M5D (const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, std::vector &diag, std::vector &upper); - void M5Ddag (const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, std::vector &diag, std::vector &upper); - void MooeeInternal (const FermionField &in, FermionField &out, int dag, int inv); - void MooeeInternalCompute(int dag, int inv, Vector >& Matp, Vector >& Matm); - void MooeeInternalAsm (const FermionField &in, FermionField &out, int LLs, int site, - Vector >& Matp, Vector >& Matm); - void MooeeInternalZAsm (const FermionField &in, FermionField &out, int LLs, int site, - Vector >& Matp, Vector >& Matm); + // override multiply + virtual RealD M (const FermionField& in, FermionField& out); + virtual RealD Mdag (const FermionField& in, FermionField& out); - virtual void RefreshShiftCoefficients(RealD new_shift); + // half checkerboard operations + virtual void Mooee (const FermionField& in, FermionField& out); + virtual void MooeeDag (const FermionField& in, FermionField& out); + virtual void MooeeInv (const FermionField& in, FermionField& out); + virtual void MooeeInvDag(const FermionField& in, FermionField& out); - // Constructors - DomainWallEOFAFermion(GaugeField& _Umu, - GridCartesian& FiveDimGrid, - GridRedBlackCartesian& FiveDimRedBlackGrid, - GridCartesian& FourDimGrid, - GridRedBlackCartesian& FourDimRedBlackGrid, - RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, - int pm, RealD _M5, const ImplParams& p=ImplParams()); + virtual void M5D (const FermionField& psi, FermionField& chi); + virtual void M5Ddag (const FermionField& psi, FermionField& chi); - protected: - virtual void SetCoefficientsInternal(RealD zolo_hi, std::vector &gamma, RealD b, RealD c); - }; + ///////////////////////////////////////////////////// + // Instantiate different versions depending on Impl + ///////////////////////////////////////////////////// + void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, + std::vector& lower, std::vector& diag, std::vector& upper); + + void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, + std::vector& lower, std::vector& diag, std::vector& upper); + + void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv); + + void MooeeInternalCompute(int dag, int inv, Vector>& Matp, Vector>& Matm); + + void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site, + Vector>& Matp, Vector>& Matm); + + void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site, + Vector>& Matp, Vector>& Matm); + + virtual void RefreshShiftCoefficients(RealD new_shift); + + // Constructors + DomainWallEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid, + GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid, + RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm, + RealD _M5, const ImplParams& p=ImplParams()); + + protected: + void SetCoefficientsInternal(RealD zolo_hi, std::vector& gamma, RealD b, RealD c); + }; }} #define INSTANTIATE_DPERP_DWF_EOFA(A)\ -template void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,\ - std::vector& lower, std::vector& diag, std::vector& upper); \ -template void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,\ - std::vector& lower, std::vector& diag, std::vector& upper); \ +template void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \ + std::vector& lower, std::vector& diag, std::vector& upper); \ +template void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \ + std::vector& lower, std::vector& diag, std::vector& upper); \ template void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi); \ template void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi); diff --git a/tests/debug/Test_reweight_dwf_eofa_gparity.cc b/tests/debug/Test_reweight_dwf_eofa_gparity.cc new file mode 100644 index 00000000..b77ec33e --- /dev/null +++ b/tests/debug/Test_reweight_dwf_eofa_gparity.cc @@ -0,0 +1,209 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/debug/Test_reweight_dwf_eofa.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +typedef typename GparityDomainWallFermionR::FermionField FermionField; + +// parameters for test +const std::vector grid_dim = { 8, 8, 8, 8 }; +const int Ls = 8; +const int Nhits = 10; +const int max_iter = 5000; +const RealD mf = 0.1; +const RealD mb = 0.11; +const RealD M5 = 1.8; +const RealD stop_tol = 1.0e-12; + +RealD mean(const std::vector& data) +{ + int N = data.size(); + RealD mean(0.0); + for(int i=0; i& data, int sample) +{ + int N = data.size(); + RealD mean(0.0); + for(int i=0; i& jacks, RealD mean) +{ + int N = jacks.size(); + RealD std(0.0); + for(int i=0; i jack_stats(const std::vector& data) +{ + int N = data.size(); + std::vector jack_samples(N); + std::vector jack_stats(2); + + jack_stats[0] = mean(data); + for(int i=0; i seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + + // Random gauge field + LatticeGaugeField Umu(UGrid); + SU3::HotConfiguration(RNG4, Umu); + + // Initialize RHMC fermion operators + GparityDomainWallFermionR::ImplParams params; + GparityDomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, params); + GparityDomainWallFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5, params); + SchurDiagMooeeOperator MdagM(Ddwf_f); + SchurDiagMooeeOperator VdagV(Ddwf_b); + + // Degree 12 rational approximations to x^(1/4) and x^(-1/4) + double lo = 0.0001; + double hi = 95.0; + int precision = 64; + int degree = 12; + AlgRemez remez(lo, hi, precision); + std::cout << GridLogMessage << "Generating degree " << degree << " for x^(1/4)" << std::endl; + remez.generateApprox(degree, 1, 4); + MultiShiftFunction PowerQuarter(remez, stop_tol, false); + MultiShiftFunction PowerNegQuarter(remez, stop_tol, true); + + // Stochastically estimate reweighting factor via RHMC + RealD scale = std::sqrt(0.5); + std::vector rw_rhmc(Nhits); + ConjugateGradientMultiShift msCG_V(max_iter, PowerQuarter); + ConjugateGradientMultiShift msCG_M(max_iter, PowerNegQuarter); + std::cout.precision(12); + + for(int hit=0; hit tmp(2, Ddwf_f.FermionRedBlackGrid()); + gaussian(RNG5, Phi); + Phi = Phi*scale; + + pickCheckerboard(Odd, PhiOdd, Phi); + + // evaluate -log(rw) + msCG_V(VdagV, PhiOdd, tmp[0]); + msCG_M(MdagM, tmp[0], tmp[1]); + rw_rhmc[hit] = norm2(tmp[1]) - norm2(PhiOdd); + std::cout << std::endl << "==================================================" << std::endl; + std::cout << " --- RHMC: Hit " << hit << ": rw = " << rw_rhmc[hit]; + std::cout << std::endl << "==================================================" << std::endl << std::endl; + + } + + // Initialize EOFA fermion operators + RealD shift_L = 0.0; + RealD shift_R = -1.0; + int pm = 1; + GparityDomainWallEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5, params); + GparityDomainWallEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5, params); + MdagMLinearOperator LdagL(Deofa_L); + MdagMLinearOperator RdagR(Deofa_R); + + // Stochastically estimate reweighting factor via EOFA + RealD k = Deofa_L.k; + std::vector rw_eofa(Nhits); + ConjugateGradient CG(stop_tol, max_iter); + SchurRedBlackDiagMooeeSolve SchurSolver(CG); + + for(int hit=0; hit tmp(2, Deofa_L.FermionGrid()); + gaussian(RNG5, Phi); + Phi = Phi*scale; + + // evaluate -log(rw) + // LH term + for(int s=0; s rhmc_result = jack_stats(rw_rhmc); + std::vector eofa_result = jack_stats(rw_eofa); + std::cout << std::endl << "RHMC: rw = " << rhmc_result[0] << " +/- " << rhmc_result[1] << std::endl; + std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl; + + Grid_finalize(); +} From 41f73ec0836fd5ec3093edba4174b466815cf799 Mon Sep 17 00:00:00 2001 From: David Murphy Date: Wed, 16 Aug 2017 12:37:38 -0400 Subject: [PATCH 084/377] Add ChronoForecast class for forecasting solutions across poles in the EOFA heatbath --- lib/algorithms/Algorithms.h | 3 +- lib/algorithms/approx/Forecast.h | 152 +++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 lib/algorithms/approx/Forecast.h diff --git a/lib/algorithms/Algorithms.h b/lib/algorithms/Algorithms.h index 5123c7a1..361ccd9c 100644 --- a/lib/algorithms/Algorithms.h +++ b/lib/algorithms/Algorithms.h @@ -1,6 +1,6 @@ /************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid + Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/Algorithms.h @@ -37,6 +37,7 @@ Author: Peter Boyle #include #include #include +#include #include #include diff --git a/lib/algorithms/approx/Forecast.h b/lib/algorithms/approx/Forecast.h new file mode 100644 index 00000000..87eb84a6 --- /dev/null +++ b/lib/algorithms/approx/Forecast.h @@ -0,0 +1,152 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/algorithms/approx/Forecast.h + +Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#ifndef INCLUDED_FORECAST_H +#define INCLUDED_FORECAST_H + +namespace Grid { + + // Abstract base class. + // Takes a matrix (Mat), a source (phi), and a vector of Fields (chi) + // and returns a forecasted solution to the system D*psi = phi (psi). + template + class Forecast + { + public: + virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector& chi) = 0; + }; + + // Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012), + // used to forecast solutions across poles of the EOFA heatbath. + // + // Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C) + template + class ChronoForecast : public Forecast + { + public: + Field operator()(Matrix &Mat, const Field& phi, const std::vector& prev_solns) + { + int degree = prev_solns.size(); + Field chi(phi); // forecasted solution + + // Trivial cases + if(degree == 0){ chi = zero; return chi; } + else if(degree == 1){ return prev_solns[0]; } + + RealD dot; + ComplexD xp; + Field r(phi); // residual + Field Mv(phi); + std::vector v(prev_solns); // orthonormalized previous solutions + std::vector MdagMv(degree,phi); + + // Array to hold the matrix elements + std::vector> G(degree, std::vector(degree)); + + // Solution and source vectors + std::vector a(degree); + std::vector b(degree); + + // Orthonormalize the vector basis + for(int i=0; i std::abs(G[k][k])){ k = j; } } + if(k != i){ + xp = b[k]; + b[k] = b[i]; + b[i] = xp; + for(int j=0; j=0; i--){ + a[i] = 0.0; + for(int j=i+1; j + class ExactOneFlavourRatioPseudoFermionAction : public Action + { + public: + INHERIT_IMPL_TYPES(Impl); + typedef OneFlavourRationalParams Params; + Params param; + MultiShiftFunction PowerNegHalf; + + private: + bool use_heatbath_forecasting; + AbstractEOFAFermion& Lop; // the basic LH operator + AbstractEOFAFermion& Rop; // the basic RH operator + SchurRedBlackDiagMooeeSolve Solver; + FermionField Phi; // the pseudofermion field for this trajectory + + public: + ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, AbstractEOFAFermion& _Rop, + OperatorFunction& S, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), Solver(S), + Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc) + { + AlgRemez remez(param.lo, param.hi, param.precision); + + // MdagM^(+- 1/2) + std::cout << GridLogMessage << "Generating degree " << param.degree << " for x^(-1/2)" << std::endl; + remez.generateApprox(param.degree, 1, 2); + PowerNegHalf.Init(remez, param.tolerance, true); + }; + + virtual std::string action_name() { return "ExactOneFlavourRatioPseudoFermionAction"; } + + virtual std::string LogParameters() { + std::stringstream sstream; + sstream << GridLogMessage << "[" << action_name() << "] Low :" << param.lo << std::endl; + sstream << GridLogMessage << "[" << action_name() << "] High :" << param.hi << std::endl; + sstream << GridLogMessage << "[" << action_name() << "] Max iterations :" << param.MaxIter << std::endl; + sstream << GridLogMessage << "[" << action_name() << "] Tolerance :" << param.tolerance << std::endl; + sstream << GridLogMessage << "[" << action_name() << "] Degree :" << param.degree << std::endl; + sstream << GridLogMessage << "[" << action_name() << "] Precision :" << param.precision << std::endl; + return sstream.str(); + } + + // Spin projection + void spProj(const FermionField& in, FermionField& out, int sign, int Ls) + { + if(sign == 1){ for(int s=0; s tmp(2, Lop.FermionGrid()); + + // Use chronological inverter to forecast solutions across poles + std::vector prev_solns; + if(use_heatbath_forecasting){ prev_solns.reserve(param.degree); } + ChronoForecast, FermionField> Forecast; + + // Seed with Gaussian noise vector (var = 0.5) + RealD scale = std::sqrt(0.5); + gaussian(pRNG,eta); + eta = eta * scale; + printf("Heatbath source vector: <\eta|\eta> = %1.15e\n", norm2(eta)); + + // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta + RealD N(PowerNegHalf.norm); + for(int k=0; k tmp(2, Lop.FermionGrid()); + + // S = <\Phi|\Phi> + RealD action(norm2(Phi)); + + // LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi> + spProj(Phi, spProj_Phi, -1, Lop.Ls); + Lop.Omega(spProj_Phi, tmp[0], -1, 0); + G5R5(tmp[1], tmp[0]); + tmp[0] = zero; + Solver(Lop, tmp[1], tmp[0]); + Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back + Lop.Omega(tmp[1], tmp[0], -1, 1); + action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real(); + + // RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb) + // - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi> + spProj(Phi, spProj_Phi, 1, Rop.Ls); + Rop.Omega(spProj_Phi, tmp[0], 1, 0); + G5R5(tmp[1], tmp[0]); + tmp[0] = zero; + Solver(Rop, tmp[1], tmp[0]); + Rop.Dtilde(tmp[0], tmp[1]); + Rop.Omega(tmp[1], tmp[0], 1, 1); + action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real(); + + return action; + }; + + // EOFA pseudofermion force: see Eqns. (34)-(36) of arXiv:1706.05843 + virtual void deriv(const GaugeField& U, GaugeField& dSdU) + { + }; + }; +}} + +#endif diff --git a/lib/qcd/action/pseudofermion/PseudoFermion.h b/lib/qcd/action/pseudofermion/PseudoFermion.h index bccca3d4..133ebb7d 100644 --- a/lib/qcd/action/pseudofermion/PseudoFermion.h +++ b/lib/qcd/action/pseudofermion/PseudoFermion.h @@ -38,5 +38,6 @@ directory #include #include #include +#include #endif diff --git a/tests/debug/Test_heatbath_dwf_eofa.cc b/tests/debug/Test_heatbath_dwf_eofa.cc new file mode 100644 index 00000000..b77bc982 --- /dev/null +++ b/tests/debug/Test_heatbath_dwf_eofa.cc @@ -0,0 +1,102 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/debug/Test_heatbath_dwf_eofa.cc + + Copyright (C) 2017 + +Author: Peter Boyle +Author: paboyle +Author: David Murphy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ + +////////////////////////////////////////////////////////////////////////////////////////// +// This program sets up the initial pseudofermion field |Phi> = Meofa^{-1/2}*|eta>, and +// then uses this Phi to compute the action . +// If all is working, one should find that = . +////////////////////////////////////////////////////////////////////////////////////////// + +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +// Parameters for test +const std::vector grid_dim = { 8, 8, 8, 8 }; +const int Ls = 8; +const int Npoles = 12; +const RealD mf = 0.01; +const RealD mpv = 1.0; +const RealD M5 = 1.8; + +int main(int argc, char** argv) +{ + Grid_init(&argc, &argv); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is set up to use " << threads << " threads" << std::endl; + + // Initialize spacetime grid + std::cout << GridLogMessage << "Lattice dimensions: " << grid_dim << " Ls: " << Ls << std::endl; + GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim, + GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); + GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + + // Set up RNGs + std::vector seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + + // Random gauge field + LatticeGaugeField Umu(UGrid); + SU3::HotConfiguration(RNG4, Umu); + + DomainWallEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5); + DomainWallEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5); + + // Construct the action and test the heatbath (zero initial guess) + { + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles); + ConjugateGradient CG(1.0e-12, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, false); + + Meofa.refresh(Umu, RNG5); + printf(" = %1.15e\n", Meofa.S(Umu)); + } + + // Construct the action and test the heatbath (forecasted initial guesses) + { + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles); + ConjugateGradient CG(1.0e-12, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, true); + + Meofa.refresh(Umu, RNG5); + printf(" = %1.15e\n", Meofa.S(Umu)); + } + + return 0; +} diff --git a/tests/debug/Test_heatbath_dwf_eofa_gparity.cc b/tests/debug/Test_heatbath_dwf_eofa_gparity.cc new file mode 100644 index 00000000..5c9d4923 --- /dev/null +++ b/tests/debug/Test_heatbath_dwf_eofa_gparity.cc @@ -0,0 +1,108 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/debug/Test_heatbath_dwf_eofa.cc + + Copyright (C) 2017 + +Author: Peter Boyle +Author: paboyle +Author: David Murphy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ + +////////////////////////////////////////////////////////////////////////////////////////// +// This program sets up the initial pseudofermion field |Phi> = Meofa^{-1/2}*|eta>, and +// then uses this Phi to compute the action . +// If all is working, one should find that = . +////////////////////////////////////////////////////////////////////////////////////////// + +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +typedef GparityWilsonImplR FermionImplPolicy; +typedef GparityDomainWallEOFAFermionR FermionAction; +typedef typename FermionAction::FermionField FermionField; + +// Parameters for test +const std::vector grid_dim = { 8, 8, 8, 8 }; +const int Ls = 8; +const int Npoles = 12; +const RealD mf = 0.01; +const RealD mpv = 1.0; +const RealD M5 = 1.8; + +int main(int argc, char** argv) +{ + Grid_init(&argc, &argv); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is set up to use " << threads << " threads" << std::endl; + + // Initialize spacetime grid + std::cout << GridLogMessage << "Lattice dimensions: " << grid_dim << " Ls: " << Ls << std::endl; + GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim, + GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); + GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + + // Set up RNGs + std::vector seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + + // Random gauge field + LatticeGaugeField Umu(UGrid); + SU3::HotConfiguration(RNG4, Umu); + + // GparityDomainWallFermionR::ImplParams params; + FermionAction::ImplParams params; + FermionAction Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, params); + FermionAction Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, params); + + // Construct the action and test the heatbath (zero initial guess) + { + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles); + ConjugateGradient CG(1.0e-12, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, false); + + Meofa.refresh(Umu, RNG5); + printf(" = %1.15e\n", Meofa.S(Umu)); + } + + // Construct the action and test the heatbath (forecasted initial guesses) + { + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles); + ConjugateGradient CG(1.0e-12, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, true); + + Meofa.refresh(Umu, RNG5); + printf(" = %1.15e\n", Meofa.S(Umu)); + } + + return 0; +} From 47a12ec7b560c60d3dde74392598517c98d8b83d Mon Sep 17 00:00:00 2001 From: David Murphy Date: Wed, 16 Aug 2017 19:50:08 -0400 Subject: [PATCH 086/377] Implement EOFA pseudofermion force and Shamir tests for G-parity and non G-parity cases --- .../pseudofermion/ExactOneFlavourRatio.h | 36 +++- tests/forces/Test_dwf_force_eofa.cc | 164 +++++++++++++++++ tests/forces/Test_dwf_gpforce_eofa.cc | 169 ++++++++++++++++++ 3 files changed, 368 insertions(+), 1 deletion(-) create mode 100644 tests/forces/Test_dwf_force_eofa.cc create mode 100644 tests/forces/Test_dwf_gpforce_eofa.cc diff --git a/lib/qcd/action/pseudofermion/ExactOneFlavourRatio.h b/lib/qcd/action/pseudofermion/ExactOneFlavourRatio.h index 38c7380c..9c1e2921 100644 --- a/lib/qcd/action/pseudofermion/ExactOneFlavourRatio.h +++ b/lib/qcd/action/pseudofermion/ExactOneFlavourRatio.h @@ -118,7 +118,7 @@ namespace QCD{ RealD scale = std::sqrt(0.5); gaussian(pRNG,eta); eta = eta * scale; - printf("Heatbath source vector: <\eta|\eta> = %1.15e\n", norm2(eta)); + printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta)); // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta RealD N(PowerNegHalf.norm); @@ -223,6 +223,40 @@ namespace QCD{ // EOFA pseudofermion force: see Eqns. (34)-(36) of arXiv:1706.05843 virtual void deriv(const GaugeField& U, GaugeField& dSdU) { + Lop.ImportGauge(U); + Rop.ImportGauge(U); + + FermionField spProj_Phi (Lop.FermionGrid()); + FermionField Omega_spProj_Phi(Lop.FermionGrid()); + FermionField CG_src (Lop.FermionGrid()); + FermionField Chi (Lop.FermionGrid()); + FermionField g5_R5_Chi (Lop.FermionGrid()); + + GaugeField force(Lop.GaugeGrid()); + + // LH: dSdU = k \chi_{L}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{L} + // \chi_{L} = H(mf)^{-1} \Omega_{-} P_{-} \Phi + spProj(Phi, spProj_Phi, -1, Lop.Ls); + Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0); + G5R5(CG_src, Omega_spProj_Phi); + spProj_Phi = zero; + Solver(Lop, CG_src, spProj_Phi); + Lop.Dtilde(spProj_Phi, Chi); + G5R5(g5_R5_Chi, Chi); + Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo); + dSdU = Lop.k * force; + + // RH: dSdU = dSdU - k \chi_{R}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{} + // \chi_{R} = ( H(mb) - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \Phi + spProj(Phi, spProj_Phi, 1, Rop.Ls); + Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0); + G5R5(CG_src, Omega_spProj_Phi); + spProj_Phi = zero; + Solver(Rop, CG_src, spProj_Phi); + Rop.Dtilde(spProj_Phi, Chi); + G5R5(g5_R5_Chi, Chi); + Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo); + dSdU = dSdU - Rop.k * force; }; }; }} diff --git a/tests/forces/Test_dwf_force_eofa.cc b/tests/forces/Test_dwf_force_eofa.cc new file mode 100644 index 00000000..f17579ae --- /dev/null +++ b/tests/forces/Test_dwf_force_eofa.cc @@ -0,0 +1,164 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/forces/Test_dwf_force_eofa.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char** argv) +{ + Grid_init(&argc, &argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + const int Ls = 8; + + GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); + GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + + // Want a different conf at every run + // First create an instance of an engine. + std::random_device rnd_device; + // Specify the engine and distribution. + std::mt19937 mersenne_engine(rnd_device()); + std::uniform_int_distribution dist(1, 100); + + auto gen = std::bind(dist, mersenne_engine); + std::vector seeds4(4); + generate(begin(seeds4), end(seeds4), gen); + + //std::vector seeds4({1,2,3,5}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + LatticeFermion phi (FGrid); gaussian(RNG5, phi); + LatticeFermion Mphi (FGrid); + LatticeFermion MphiPrime (FGrid); + + LatticeGaugeField U(UGrid); + SU3::HotConfiguration(RNG4,U); + + //////////////////////////////////// + // Unmodified matrix element + //////////////////////////////////// + RealD mf = 0.01; + RealD mb = 1.0; + RealD M5 = 1.8; + DomainWallEOFAFermionR Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5); + DomainWallEOFAFermionR Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5); + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12); + ConjugateGradient CG(1.0e-12, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, true); + + Meofa.refresh(U, RNG5); + RealD S = Meofa.S(U); // pdag M p + + // get the deriv of phidag M phi with respect to "U" + LatticeGaugeField UdSdU(UGrid); + Meofa.deriv(U, UdSdU); + + //////////////////////////////////// + // Modify the gauge field a little + //////////////////////////////////// + RealD dt = 0.0001; + + LatticeColourMatrix mommu(UGrid); + LatticeColourMatrix forcemu(UGrid); + LatticeGaugeField mom(UGrid); + LatticeGaugeField Uprime(UGrid); + + for(int mu=0; mu(mom, mommu, mu); + + // fourth order exponential approx + parallel_for(auto i=mom.begin(); i(UdSdU, mu); + mommu = Ta(mommu)*2.0; + PokeIndex(UdSdU, mommu, mu); + } + + for(int mu=0; mu(UdSdU, mu); + mommu = PeekIndex(mom, mu); + + // Update PF action density + dS = dS + trace(mommu*forcemu)*dt; + } + + ComplexD dSpred = sum(dS); + + /*std::cout << GridLogMessage << " S " << S << std::endl; + std::cout << GridLogMessage << " Sprime " << Sprime << std::endl; + std::cout << GridLogMessage << "dS " << Sprime-S << std::endl; + std::cout << GridLogMessage << "predict dS " << dSpred << std::endl;*/ + printf("\nS = %1.15e\n", S); + printf("Sprime = %1.15e\n", Sprime); + printf("dS = %1.15e\n", Sprime - S); + printf("real(dS_predict) = %1.15e\n", dSpred.real()); + printf("imag(dS_predict) = %1.15e\n\n", dSpred.imag()); + + assert( fabs(real(Sprime-S-dSpred)) < 1.0 ) ; + + std::cout << GridLogMessage << "Done" << std::endl; + Grid_finalize(); +} diff --git a/tests/forces/Test_dwf_gpforce_eofa.cc b/tests/forces/Test_dwf_gpforce_eofa.cc new file mode 100644 index 00000000..50789c89 --- /dev/null +++ b/tests/forces/Test_dwf_gpforce_eofa.cc @@ -0,0 +1,169 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/forces/Test_dwf_force_eofa.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +typedef GparityWilsonImplR FermionImplPolicy; +typedef GparityDomainWallEOFAFermionR FermionAction; +typedef typename FermionAction::FermionField FermionField; + +int main (int argc, char** argv) +{ + Grid_init(&argc, &argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + const int Ls = 8; + + GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); + GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + + // Want a different conf at every run + // First create an instance of an engine. + std::random_device rnd_device; + // Specify the engine and distribution. + std::mt19937 mersenne_engine(rnd_device()); + std::uniform_int_distribution dist(1, 100); + + auto gen = std::bind(dist, mersenne_engine); + std::vector seeds4(4); + generate(begin(seeds4), end(seeds4), gen); + + //std::vector seeds4({1,2,3,5}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + LatticeFermion phi (FGrid); gaussian(RNG5, phi); + LatticeFermion Mphi (FGrid); + LatticeFermion MphiPrime (FGrid); + + LatticeGaugeField U(UGrid); + SU3::HotConfiguration(RNG4,U); + + //////////////////////////////////// + // Unmodified matrix element + //////////////////////////////////// + RealD mf = 0.01; + RealD mb = 1.0; + RealD M5 = 1.8; + FermionAction::ImplParams params; + FermionAction Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5, params); + FermionAction Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, params); + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12); + ConjugateGradient CG(1.0e-12, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, true); + + Meofa.refresh(U, RNG5); + RealD S = Meofa.S(U); // pdag M p + + // get the deriv of phidag M phi with respect to "U" + LatticeGaugeField UdSdU(UGrid); + Meofa.deriv(U, UdSdU); + + //////////////////////////////////// + // Modify the gauge field a little + //////////////////////////////////// + RealD dt = 0.0001; + + LatticeColourMatrix mommu(UGrid); + LatticeColourMatrix forcemu(UGrid); + LatticeGaugeField mom(UGrid); + LatticeGaugeField Uprime(UGrid); + + for(int mu=0; mu(mom, mommu, mu); + + // fourth order exponential approx + parallel_for(auto i=mom.begin(); i(UdSdU, mu); + mommu = Ta(mommu)*2.0; + PokeIndex(UdSdU, mommu, mu); + } + + for(int mu=0; mu(UdSdU, mu); + mommu = PeekIndex(mom, mu); + + // Update PF action density + dS = dS + trace(mommu*forcemu)*dt; + } + + ComplexD dSpred = sum(dS); + + /*std::cout << GridLogMessage << " S " << S << std::endl; + std::cout << GridLogMessage << " Sprime " << Sprime << std::endl; + std::cout << GridLogMessage << "dS " << Sprime-S << std::endl; + std::cout << GridLogMessage << "predict dS " << dSpred << std::endl;*/ + printf("\nS = %1.15e\n", S); + printf("Sprime = %1.15e\n", Sprime); + printf("dS = %1.15e\n", Sprime - S); + printf("real(dS_predict) = %1.15e\n", dSpred.real()); + printf("imag(dS_predict) = %1.15e\n\n", dSpred.imag()); + + assert( fabs(real(Sprime-S-dSpred)) < 1.0 ) ; + + std::cout << GridLogMessage << "Done" << std::endl; + Grid_finalize(); +} From d9d3d30cc7334bdd7c046f6073b43e339dbabd21 Mon Sep 17 00:00:00 2001 From: David Murphy Date: Wed, 16 Aug 2017 20:57:51 -0400 Subject: [PATCH 087/377] Minor clean-up --- lib/qcd/action/fermion/DomainWallEOFAFermion.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermion.h b/lib/qcd/action/fermion/DomainWallEOFAFermion.h index d48e3b8f..5362cda8 100644 --- a/lib/qcd/action/fermion/DomainWallEOFAFermion.h +++ b/lib/qcd/action/fermion/DomainWallEOFAFermion.h @@ -35,24 +35,6 @@ See the full license in the file "LICENSE" in the top level distribution directo namespace Grid { namespace QCD { - /*template struct switcheroo { - static inline int iscomplex() { return 0; } - template - static inline vec mult(vec a, vec b){ return real_mult(a,b); } - }; - - template<> struct switcheroo { - static inline int iscomplex() { return 1; } - template - static inline vec mult(vec a, vec b){ return a*b; } - }; - - template<> struct switcheroo { - static inline int iscomplex() { return 1; } - template - static inline vec mult(vec a, vec b) { return a*b; } - };*/ - template class DomainWallEOFAFermion : public AbstractEOFAFermion { From e140b3f802549b63809fcf0ad88657f19497a75c Mon Sep 17 00:00:00 2001 From: David Murphy Date: Wed, 16 Aug 2017 23:36:23 -0400 Subject: [PATCH 088/377] Beginning to re-import Mobius EOFA --- lib/qcd/action/fermion/Fermion.h | 31 +- lib/qcd/action/fermion/MobiusEOFAFermion.cc | 469 ++++++++++++++++++++ lib/qcd/action/fermion/MobiusEOFAFermion.h | 133 ++++++ 3 files changed, 631 insertions(+), 2 deletions(-) create mode 100644 lib/qcd/action/fermion/MobiusEOFAFermion.cc create mode 100644 lib/qcd/action/fermion/MobiusEOFAFermion.h diff --git a/lib/qcd/action/fermion/Fermion.h b/lib/qcd/action/fermion/Fermion.h index ac2a94b2..99d97895 100644 --- a/lib/qcd/action/fermion/Fermion.h +++ b/lib/qcd/action/fermion/Fermion.h @@ -1,6 +1,6 @@ /************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid + Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/qcd/action/fermion/Fermion_base_aggregate.h @@ -38,6 +38,8 @@ Author: Peter Boyle // - ContinuedFractionFermion5D.cc // - WilsonFermion.cc // - WilsonKernels.cc +// - DomainWallEOFAFermion.cc +// - MobiusEOFAFermion.cc // // The explicit instantiation is only avoidable if we move this source to headers and end up with include/parse/recompile // for EVERY .cc file. This define centralises the list and restores global push of impl cases @@ -57,6 +59,7 @@ Author: Peter Boyle #include #include #include +#include #include #include #include @@ -129,6 +132,14 @@ typedef MobiusFermion MobiusFermionRL; typedef MobiusFermion MobiusFermionFH; typedef MobiusFermion MobiusFermionDF; +typedef MobiusEOFAFermion MobiusEOFAFermionR; +typedef MobiusEOFAFermion MobiusEOFAFermionF; +typedef MobiusEOFAFermion MobiusEOFAFermionD; + +typedef MobiusEOFAFermion MobiusEOFAFermionRL; +typedef MobiusEOFAFermion MobiusEOFAFermionFH; +typedef MobiusEOFAFermion MobiusEOFAFermionDF; + typedef ZMobiusFermion ZMobiusFermionR; typedef ZMobiusFermion ZMobiusFermionF; typedef ZMobiusFermion ZMobiusFermionD; @@ -137,7 +148,7 @@ typedef ZMobiusFermion ZMobiusFermionRL; typedef ZMobiusFermion ZMobiusFermionFH; typedef ZMobiusFermion ZMobiusFermionDF; -// Ls vectorised +// Ls vectorised typedef DomainWallFermion DomainWallFermionVec5dR; typedef DomainWallFermion DomainWallFermionVec5dF; typedef DomainWallFermion DomainWallFermionVec5dD; @@ -162,6 +173,14 @@ typedef MobiusFermion MobiusFermionVec5dRL; typedef MobiusFermion MobiusFermionVec5dFH; typedef MobiusFermion MobiusFermionVec5dDF; +typedef MobiusEOFAFermion MobiusEOFAFermionVec5dR; +typedef MobiusEOFAFermion MobiusEOFAFermionVec5dF; +typedef MobiusEOFAFermion MobiusEOFAFermionVec5dD; + +typedef MobiusEOFAFermion MobiusEOFAFermionVec5dRL; +typedef MobiusEOFAFermion MobiusEOFAFermionVec5dFH; +typedef MobiusEOFAFermion MobiusEOFAFermionVec5dDF; + typedef ZMobiusFermion ZMobiusFermionVec5dR; typedef ZMobiusFermion ZMobiusFermionVec5dF; typedef ZMobiusFermion ZMobiusFermionVec5dD; @@ -246,6 +265,14 @@ typedef MobiusFermion GparityMobiusFermionRL; typedef MobiusFermion GparityMobiusFermionFH; typedef MobiusFermion GparityMobiusFermionDF; +typedef MobiusEOFAFermion GparityMobiusFermionR; +typedef MobiusEOFAFermion GparityMobiusFermionF; +typedef MobiusEOFAFermion GparityMobiusFermionD; + +typedef MobiusEOFAFermion GparityMobiusEOFAFermionRL; +typedef MobiusEOFAFermion GparityMobiusEOFAFermionFH; +typedef MobiusEOFAFermion GparityMobiusEOFAFermionDF; + typedef ImprovedStaggeredFermion ImprovedStaggeredFermionR; typedef ImprovedStaggeredFermion ImprovedStaggeredFermionF; typedef ImprovedStaggeredFermion ImprovedStaggeredFermionD; diff --git a/lib/qcd/action/fermion/MobiusEOFAFermion.cc b/lib/qcd/action/fermion/MobiusEOFAFermion.cc new file mode 100644 index 00000000..9d0d8b11 --- /dev/null +++ b/lib/qcd/action/fermion/MobiusEOFAFermion.cc @@ -0,0 +1,469 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include + +namespace Grid { +namespace QCD { + + template + MobiusEOFAFermion::MobiusEOFAFermion( + GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mq1, RealD _mq2, RealD _mq3, + RealD _shift, int _pm, RealD _M5, + RealD _b, RealD _c, const ImplParams &p) : + AbstractEOFAFermion(_Umu, FiveDimGrid, FiveDimRedBlackGrid, + FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3, + _shift, _pm, _M5, _b, _c, p) + { + int Ls = this->Ls; + + RealD eps = 1.0; + Approx::zolotarev_data *zdata = Approx::higham(eps, this->Ls); + assert(zdata->n == this->Ls); + + std::cout << GridLogMessage << "MobiusEOFAFermion (b=" << _b << + ",c=" << _c << ") with Ls=" << Ls << std::endl; + this->SetCoefficientsTanh(zdata, _b, _c); + std::cout << GridLogMessage << "EOFA parameters: (mq1=" << _mq1 << + ",mq2=" << _mq2 << ",mq3=" << _mq3 << ",shift=" << _shift << + ",pm=" << _pm << ")" << std::endl; + + Approx::zolotarev_free(zdata); + + if(_shift != 0.0){ + SetCoefficientsPrecondShiftOps(); + } else { + Mooee_shift.resize(Ls, 0.0); + MooeeInv_shift_lc.resize(Ls, 0.0); + MooeeInv_shift_norm.resize(Ls, 0.0); + MooeeInvDag_shift_lc.resize(Ls, 0.0); + MooeeInvDag_shift_norm.resize(Ls, 0.0); + } + } + + /*************************************************************** + /* Additional EOFA operators only called outside the inverter. + /* Since speed is not essential, simple axpby-style + /* implementations should be fine. + /***************************************************************/ + template + void MobiusEOFAFermion::Omega(const FermionField& psi, FermionField& Din, int sign, int dag) + { + int Ls = this->Ls; + RealD alpha = this->alpha; + + Din = zero; + if((sign == 1) && (dag == 0)) { // \Omega_{+} + for(int s=0; s + void MobiusEOFAFermion::Dtilde(const FermionField& psi, FermionField& chi) + { + int Ls = this->Ls; + RealD b = 0.5 * ( 1.0 + this->alpha ); + RealD c = 0.5 * ( 1.0 - this->alpha ); + RealD mq1 = this->mq1; + + for(int s=0; s + void MobiusEOFAFermion::DtildeInv(const FermionField& psi, FermionField& chi){ } + + /*****************************************************************************************************/ + + template + RealD MobiusEOFAFermion::M(const FermionField& psi, FermionField& chi) + { + int Ls = this->Ls; + + FermionField Din(psi._grid); + + this->Meooe5D(psi, Din); + this->DW(Din, chi, DaggerNo); + axpby(chi, 1.0, 1.0, chi, psi); + this->M5D(psi, chi); + return(norm2(chi)); + } + + template + RealD MobiusEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) + { + int Ls = this->Ls; + + FermionField Din(psi._grid); + + this->DW(psi, Din, DaggerYes); + this->MeooeDag5D(Din, chi); + this->M5Ddag(psi, chi); + axpby(chi, 1.0, 1.0, chi, psi); + return(norm2(chi)); + } + + /******************************************************************** + /* Performance critical fermion operators called inside the inverter + /********************************************************************/ + + template + void MobiusEOFAFermion::M5D(const FermionField& psi, FermionField& chi) + { + int Ls = this->Ls; + + std::vector diag(Ls,1.0); + std::vector upper(Ls,-1.0); upper[Ls-1] = this->mq1; + std::vector lower(Ls,-1.0); lower[0] = this->mq1; + + // no shift term + if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); } + + // fused M + shift operation + else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); } + } + + template + void MobiusEOFAFermion::M5Ddag(const FermionField& psi, FermionField& chi) + { + int Ls = this->Ls; + int pm = this->pm; + RealD shift = this->shift; + RealD mq1 = this->mq1; + RealD mq2 = this->mq2; + RealD mq3 = this->mq3; + + // coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} ) + Coeff_t shiftp(0.0), shiftm(0.0); + if(shift != 0.0){ + if(pm == 1){ shiftp = shift*(mq3-mq2); } + else{ shiftm = -shift*(mq3-mq2); } + } + + std::vector diag(Ls,1.0); + std::vector upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp; + std::vector lower(Ls,-1.0); lower[0] = mq1 + shiftm; + + #if(0) + std::cout << GridLogMessage << "MobiusEOFAFermion::M5Ddag(FF&,FF&):" << std::endl; + for(int i=0; i::iscomplex()) { + sp[l] = PplusMat (l*istride+s1*ostride,s2); + sm[l] = PminusMat(l*istride+s1*ostride,s2); + } else { + // if real + scalar_type tmp; + tmp = PplusMat (l*istride+s1*ostride,s2); + sp[l] = scalar_type(tmp.real(),tmp.real()); + tmp = PminusMat(l*istride+s1*ostride,s2); + sm[l] = scalar_type(tmp.real(),tmp.real()); + } + } + Matp[LLs*s2+s1] = Vp; + Matm[LLs*s2+s1] = Vm; + }} + } + + FermOpTemplateInstantiate(MobiusEOFAFermion); + GparityFermOpTemplateInstantiate(MobiusEOFAFermion); + +}} diff --git a/lib/qcd/action/fermion/MobiusEOFAFermion.h b/lib/qcd/action/fermion/MobiusEOFAFermion.h new file mode 100644 index 00000000..0a8d1788 --- /dev/null +++ b/lib/qcd/action/fermion/MobiusEOFAFermion.h @@ -0,0 +1,133 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.h + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_QCD_MOBIUS_EOFA_FERMION_H +#define GRID_QCD_MOBIUS_EOFA_FERMION_H + +#include + +namespace Grid { +namespace QCD { + + template + class MobiusEOFAFermion : public AbstractEOFAFermion + { + public: + INHERIT_IMPL_TYPES(Impl); + + public: + // Shift operator coefficients for red-black preconditioned Mobius EOFA + std::vector Mooee_shift; + std::vector MooeeInv_shift_lc; + std::vector MooeeInv_shift_norm; + std::vector MooeeInvDag_shift_lc; + std::vector MooeeInvDag_shift_norm; + + virtual void Instantiatable(void) {}; + + // EOFA-specific operations + virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag); + virtual void Dtilde (const FermionField& in, FermionField& out); + virtual void DtildeInv (const FermionField& in, FermionField& out); + + // override multiply + virtual RealD M (const FermionField& in, FermionField& out); + virtual RealD Mdag (const FermionField& in, FermionField& out); + + // half checkerboard operations + virtual void Mooee (const FermionField& in, FermionField& out); + virtual void MooeeDag (const FermionField& in, FermionField& out); + virtual void MooeeInv (const FermionField& in, FermionField& out); + virtual void MooeeInv_shift (const FermionField& in, FermionField& out); + virtual void MooeeInvDag (const FermionField& in, FermionField& out); + virtual void MooeeInvDag_shift(const FermionField& in, FermionField& out); + + virtual void M5D (const FermionField& psi, FermionField& chi); + virtual void M5Ddag (const FermionField& psi, FermionField& chi); + + ///////////////////////////////////////////////////// + // Instantiate different versions depending on Impl + ///////////////////////////////////////////////////// + void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, + std::vector& lower, std::vector& diag, std::vector& upper); + + void M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, + std::vector& lower, std::vector& diag, std::vector& upper, + std::vector& shift_coeffs); + + void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, + std::vector& lower, std::vector& diag, std::vector& upper); + + void M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, + std::vector& lower, std::vector& diag, std::vector& upper, + std::vector& shift_coeffs); + + void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv); + + void MooeeInternalCompute(int dag, int inv, Vector>& Matp, Vector>& Matm); + + void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site, + Vector>& Matp, Vector>& Matm); + + void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site, + Vector>& Matp, Vector>& Matm); + + virtual void RefreshShiftCoefficients(RealD new_shift); + + // Constructors + MobiusEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid, + GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid, + RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm, + RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams()); + + protected: + void SetCoefficientsPrecondShiftOps(void); + }; +}} + +#define INSTANTIATE_DPERP_DWF_EOFA(A)\ +template void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \ + std::vector& lower, std::vector& diag, std::vector& upper); \ +template void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \ + std::vector& lower, std::vector& diag, std::vector& upper, std::vector& shift_coeffs); \ +template void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \ + std::vector& lower, std::vector& diag, std::vector& upper); \ +template void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \ + std::vector& lower, std::vector& diag, std::vector& upper, std::vector& shift_coeffs); \ +template void MobiusEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi); \ +template void MobiusEOFAFermion::MooeeInv_shift(const FermionField& psi, FermionField& chi); \ +template void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi); +template void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, FermionField& chi); + +#undef MOBIUS_EOFA_DPERP_DENSE +#define MOBIUS_EOFA_DPERP_CACHE +#undef MOBIUS_EOFA_DPERP_LINALG +#define MOBIUS_EOFA_DPERP_VEC + +#endif From ac9e6b63c092dedde5af757a7b0fcd7b182f1523 Mon Sep 17 00:00:00 2001 From: David Murphy Date: Thu, 17 Aug 2017 19:28:53 -0400 Subject: [PATCH 089/377] More re-import of Mobius EOFA --- .../action/fermion/DomainWallEOFAFermion.cc | 4 +- lib/qcd/action/fermion/MobiusEOFAFermion.cc | 486 ++++++++++-------- 2 files changed, 261 insertions(+), 229 deletions(-) diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermion.cc b/lib/qcd/action/fermion/DomainWallEOFAFermion.cc index dc4f6504..dd8a500d 100644 --- a/lib/qcd/action/fermion/DomainWallEOFAFermion.cc +++ b/lib/qcd/action/fermion/DomainWallEOFAFermion.cc @@ -347,9 +347,7 @@ namespace QCD { GridBase* grid = this->FermionRedBlackGrid(); int LLs = grid->_rdimensions[0]; - if(LLs == Ls){ - return; // Not vectorised in 5th direction - } + if(LLs == Ls){ return; } // Not vectorised in 5th direction Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls); Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); diff --git a/lib/qcd/action/fermion/MobiusEOFAFermion.cc b/lib/qcd/action/fermion/MobiusEOFAFermion.cc index 9d0d8b11..c0837d78 100644 --- a/lib/qcd/action/fermion/MobiusEOFAFermion.cc +++ b/lib/qcd/action/fermion/MobiusEOFAFermion.cc @@ -133,7 +133,36 @@ namespace QCD { } template - void MobiusEOFAFermion::DtildeInv(const FermionField& psi, FermionField& chi){ } + void MobiusEOFAFermion::DtildeInv(const FermionField& psi, FermionField& chi) + { + int Ls = this->Ls; + RealD m = this->mq1; + RealD c = 0.5 * this->alpha; + RealD d = 0.5; + + RealD DtInv_p(0.0), DtInv_m(0.0); + RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls); + FermionField tmp = zero; + + for(int s=0; s sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1); + + if(dag){ + RealD tmp(DtInv_p); + DtInv_p = DtInv_m; + DtInv_m = tmp; + } + + axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp); + axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp); + + }} + } /*****************************************************************************************************/ @@ -172,298 +201,303 @@ namespace QCD { template void MobiusEOFAFermion::M5D(const FermionField& psi, FermionField& chi) { - int Ls = this->Ls; + int Ls = this->Ls; - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1] = this->mq1; - std::vector lower(Ls,-1.0); lower[0] = this->mq1; + std::vector diag(Ls,1.0); + std::vector upper(Ls,-1.0); upper[Ls-1] = this->mq1; + std::vector lower(Ls,-1.0); lower[0] = this->mq1; - // no shift term - if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); } + // no shift term + if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); } - // fused M + shift operation - else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); } + // fused M + shift operation + else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); } } template void MobiusEOFAFermion::M5Ddag(const FermionField& psi, FermionField& chi) { - int Ls = this->Ls; - int pm = this->pm; - RealD shift = this->shift; - RealD mq1 = this->mq1; - RealD mq2 = this->mq2; - RealD mq3 = this->mq3; + int Ls = this->Ls; - // coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} ) - Coeff_t shiftp(0.0), shiftm(0.0); - if(shift != 0.0){ - if(pm == 1){ shiftp = shift*(mq3-mq2); } - else{ shiftm = -shift*(mq3-mq2); } - } + std::vector diag(Ls,1.0); + std::vector upper(Ls,-1.0); upper[Ls-1] = this->mq1 + shiftp; + std::vector lower(Ls,-1.0); lower[0] = this->mq1 + shiftm; - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp; - std::vector lower(Ls,-1.0); lower[0] = mq1 + shiftm; + // no shift term + if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); } - #if(0) - std::cout << GridLogMessage << "MobiusEOFAFermion::M5Ddag(FF&,FF&):" << std::endl; - for(int i=0; ishift != 0.0){ + RealD c = 0.5 * this->alpha; + RealD d = 0.5; + RealD N = this->shift * this->k * ( std::pow(c+d,Ls) + this->mq1*std::pow(c-d,Ls) ); + if(this->pm == 1) { + for(int s=0; s::iscomplex()) { + sp[l] = PplusMat (l*istride+s1*ostride,s2); + sm[l] = PminusMat(l*istride+s1*ostride,s2); + } else { + // if real + scalar_type tmp; + tmp = PplusMat (l*istride+s1*ostride,s2); + sp[l] = scalar_type(tmp.real(),tmp.real()); + tmp = PminusMat(l*istride+s1*ostride,s2); + sm[l] = scalar_type(tmp.real(),tmp.real()); + } } + Matp[LLs*s2+s1] = Vp; + Matm[LLs*s2+s1] = Vm; + }} + } - typedef typename SiteHalfSpinor::scalar_type scalar_type; - const int Nsimd = Simd::Nsimd(); - Matp.resize(Ls*LLs); - Matm.resize(Ls*LLs); - - for(int s2=0; s2::iscomplex()) { - sp[l] = PplusMat (l*istride+s1*ostride,s2); - sm[l] = PminusMat(l*istride+s1*ostride,s2); - } else { - // if real - scalar_type tmp; - tmp = PplusMat (l*istride+s1*ostride,s2); - sp[l] = scalar_type(tmp.real(),tmp.real()); - tmp = PminusMat(l*istride+s1*ostride,s2); - sm[l] = scalar_type(tmp.real(),tmp.real()); - } - } - Matp[LLs*s2+s1] = Vp; - Matm[LLs*s2+s1] = Vm; - }} - } - - FermOpTemplateInstantiate(MobiusEOFAFermion); - GparityFermOpTemplateInstantiate(MobiusEOFAFermion); + FermOpTemplateInstantiate(MobiusEOFAFermion); + GparityFermOpTemplateInstantiate(MobiusEOFAFermion); }} From 9d45fca8bc11caf55cbbaebb290c8786cc6d34b8 Mon Sep 17 00:00:00 2001 From: David Murphy Date: Thu, 17 Aug 2017 23:45:36 -0400 Subject: [PATCH 090/377] Implement MobiusEOFAFermioncache.cc --- .../action/fermion/MobiusEOFAFermioncache.cc | 429 ++++++++++++++++++ 1 file changed, 429 insertions(+) create mode 100644 lib/qcd/action/fermion/MobiusEOFAFermioncache.cc diff --git a/lib/qcd/action/fermion/MobiusEOFAFermioncache.cc b/lib/qcd/action/fermion/MobiusEOFAFermioncache.cc new file mode 100644 index 00000000..d184ebe2 --- /dev/null +++ b/lib/qcd/action/fermion/MobiusEOFAFermioncache.cc @@ -0,0 +1,429 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include + +namespace Grid { +namespace QCD { + + // FIXME -- make a version of these routines with site loop outermost for cache reuse. + + template + void MobiusEOFAFermion::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, + std::vector &lower, std::vector &diag, std::vector &upper) + { + int Ls = this->Ls; + GridBase *grid = psi._grid; + + assert(phi.checkerboard == psi.checkerboard); + chi.checkerboard = psi.checkerboard; + + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ + for(int s=0; sM5Dtime += usecond(); + } + + template + void MobiusEOFAFermion::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi, + std::vector &lower, std::vector &diag, std::vector &upper, + std::vector &shift_coeffs) + { + int Ls = this->Ls; + int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator + GridBase *grid = psi._grid; + + assert(phi.checkerboard == psi.checkerboard); + chi.checkerboard = psi.checkerboard; + + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ + for(int s=0; spm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); } + else{ spProj5m(tmp, psi._odata[ss+shift_s]); } + chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp; + } + } + + this->M5Dtime += usecond(); + } + + template + void MobiusEOFAFermion::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, + std::vector &lower, std::vector &diag, std::vector &upper) + { + int Ls = this->Ls; + GridBase *grid = psi._grid; + + assert(phi.checkerboard == psi.checkerboard); + chi.checkerboard = psi.checkerboard; + + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ + auto tmp = psi._odata[0]; + for(int s=0; sM5Dtime += usecond(); + } + + template + void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi, + std::vector &lower, std::vector &diag, std::vector &upper, + std::vector &shift_coeffs) + { + int Ls = this->Ls; + int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator + GridBase *grid = psi._grid; + + assert(phi.checkerboard == psi.checkerboard); + chi.checkerboard = psi.checkerboard; + + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ + chi[ss+Ls-1] = zero; + auto tmp = psi._odata[0]; + for(int s=0; spm == 1){ spProj5p(tmp, psi._odata[ss+s]); } + else{ spProj5m(tmp, psi._odata[ss+s]); } + chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp; + } + } + + this->M5Dtime += usecond(); + } + + template + void MobiusEOFAFermion::MooeeInv(const FermionField &psi, FermionField &chi) + { + if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; } + + GridBase *grid = psi._grid; + int Ls = this->Ls; + + chi.checkerboard = psi.checkerboard; + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ + + auto tmp = psi._odata[0]; + + // Apply (L^{\prime})^{-1} + chi[ss] = psi[ss]; // chi[0]=psi[0] + for(int s=1; slee[s-1]*tmp; + } + + // L_m^{-1} + for(int s=0; sleem[s]*tmp; + } + + // U_m^{-1} D^{-1} + for(int s=0; sdee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp; + } + chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1]; + + // Apply U^{-1} + for(int s=Ls-2; s>=0; s--){ + spProj5m(tmp, chi[ss+s+1]); + chi[ss+s] = chi[ss+s] - this->uee[s]*tmp; + } + } + + this->MooeeInvTime += usecond(); + } + + template + void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi, FermionField &chi) + { + GridBase *grid = psi._grid; + int Ls = this->Ls; + + chi.checkerboard = psi.checkerboard; + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ + + auto tmp1 = psi._odata[0]; + auto tmp2 = psi._odata[0]; + auto tmp2_spProj = psi._odata[0]; + + // Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2 + chi[ss] = psi[ss]; // chi[0]=psi[0] + tmp2 = MooeeInv_shift_lc[0]*psi[ss]; + for(int s=1; slee[s-1]*tmp1; + tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi[ss+s]; + } + if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);} + else{ spProj5m(tmp2_spProj, tmp2); } + + // L_m^{-1} + for(int s=0; sleem[s]*tmp1; + } + + // U_m^{-1} D^{-1} + for(int s=0; sdee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp1; + } + // chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj; + chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1]; + spProj5m(tmp1, chi[ss+Ls-1]); + chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj; + + // Apply U^{-1} and add shift term + for(int s=Ls-2; s>=0; s--){ + chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1; + spProj5m(tmp1, chi[ss+s]); + chi[ss+s] = chi[ss+s] + MooeeInv_shift_norm[s]*tmp2_spProj; + } + } + + this->MooeeInvTime += usecond(); + } + + template + void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi, FermionField &chi) + { + if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; } + + GridBase *grid = psi._grid; + int Ls = this->Ls; + + chi.checkerboard = psi.checkerboard; + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ + + auto tmp = psi._odata[0]; + + // Apply (U^{\prime})^{-dag} + chi[ss] = psi[ss]; + for(int s=1; suee[s-1]*tmp; + } + + // U_m^{-\dag} + for(int s=0; sueem[s]*tmp; + } + + // L_m^{-\dag} D^{-dag} + for(int s=0; sdee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp; + } + chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1]; + + // Apply L^{-dag} + for(int s=Ls-2; s>=0; s--){ + spProj5p(tmp, chi[ss+s+1]); + chi[ss+s] = chi[ss+s] - this->lee[s]*tmp; + } + } + + this->MooeeInvTime += usecond(); + } + + template + void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField &psi, FermionField &chi) + { + GridBase *grid = psi._grid; + int Ls = this->Ls; + + chi.checkerboard = psi.checkerboard; + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ + + auto tmp1 = psi._odata[0]; + auto tmp2 = psi._odata[0]; + auto tmp2_spProj = psi._odata[0]; + + // Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2 + chi[ss] = psi[ss]; + tmp2 = MooeeInvDag_shift_lc[0]*psi[ss]; + for(int s=1; suee[s-1]*tmp1; + tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi[ss+s]; + } + if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);} + else{ spProj5m(tmp2_spProj, tmp2); } + + // U_m^{-\dag} + for(int s=0; sueem[s]*tmp1; + } + + // L_m^{-\dag} D^{-dag} + for(int s=0; sdee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp1; + } + chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1]; + spProj5p(tmp1, chi[ss+Ls-1]); + chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj; + + // Apply L^{-dag} + for(int s=Ls-2; s>=0; s--){ + chi[ss+s] = chi[ss+s] - this->lee[s]*tmp1; + spProj5p(tmp1, chi[ss+s]); + chi[ss+s] = chi[ss+s] + MooeeInvDag_shift_norm[s]*tmp2_spProj; + } + } + + this->MooeeInvTime += usecond(); + } + + #ifdef MOBIUS_EOFA_DPERP_CACHE + + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF); + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD); + + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF); + + #endif + +}} From ab50145001d7a2f27c9f724440e0bfd88a6373e2 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Tue, 22 Aug 2017 17:12:25 -0400 Subject: [PATCH 091/377] Implemented first, unoptimized version of hand-unrolled G-parity kernels Improved Test_gparity --- lib/qcd/action/fermion/FermionOperatorImpl.h | 81 +++- lib/qcd/action/fermion/WilsonKernelsHand.cc | 456 +++++++++++++------ tests/core/Test_gparity.cc | 148 +++--- 3 files changed, 485 insertions(+), 200 deletions(-) diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 524179f5..1e344521 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -425,6 +425,22 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres //////////////////////////////////////////////////////////////////////////////////////// // Flavour doubled spinors; is Gparity the only? what about C*? //////////////////////////////////////////////////////////////////////////////////////// +namespace GparityWilsonImpl_helper{ + template + struct getAB; + + template + struct getAB{ + static inline A & ref(A &a, B &b){ return a; } + }; + template + struct getAB{ + static inline B & ref(A &a, B &b){ return b; } + }; +}; + + + template class GparityWilsonImpl : public ConjugateGaugeImpl > { public: @@ -462,7 +478,10 @@ class GparityWilsonImpl : public ConjugateGaugeImpl > tmp_full; + std::vector > tmp_half; + + GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p), tmp_full(GridThread::GetThreads()), tmp_half(GridThread::GetThreads()){}; bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; @@ -538,6 +557,66 @@ class GparityWilsonImpl : public ConjugateGaugeImpl + inline void loadLinkElement(Simd ®, ref &memory) { + reg = memory; + } + + template + void GparityTwistPermute(SiteSpinorType &into, const SiteSpinorType &from, const int direction, const int distance, const int perm, GridBase* grid){ + typedef typename SiteSpinorType::scalar_object sobj; + sobj stmp; + std::vector vals(grid->Nsimd()); + extract(from,vals); + std::vector icoor; + for(int s=0;sNsimd();s++){ + grid->iCoorFromIindex(icoor,s); + assert((icoor[direction]==0)||(icoor[direction]==1)); + + int permute_lane; + if ( distance == 1) { + permute_lane = icoor[direction]?1:0; + } else { + permute_lane = icoor[direction]?0:1; + } + if(perm) permute_lane = !permute_lane; + + if ( permute_lane ) { + stmp(0) = vals[s](1); + stmp(1) = vals[s](0); + vals[s] = stmp; + } + } + merge(into,vals); + } + + + template + const SiteSpinorType & GparityGetChi(int &g, SiteSpinorType const* in, const int dir, const int f, StencilEntry *SE, StencilImpl &st){ + const int mmu = dir % 4; + const int direction = st._directions[dir]; + const int sl = st._grid->_simd_layout[direction]; + const int perm = SE->_permute; + g = f; + + if(SE->_around_the_world && Params.twists[mmu]){ + if(sl == 1){ //not SIMD vectorized in G-parity direction so just change the flavor index accessed to implement the twist + g = (f+1) % 2; + return in[SE->_offset]; + }else{ //SIMD vectorized in Gparity direction + const int me = omp_get_thread_num(); + const int distance = st._distances[dir]; + assert(distance == -1 || distance == 1); + SiteSpinorType &tmp = GparityWilsonImpl_helper::getAB::ref(tmp_full[me], tmp_half[me]); + GparityTwistPermute(tmp, in[SE->_offset], direction, distance, perm, st._grid); + return tmp; + } + }else return in[SE->_offset]; + } + + + inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) { conformable(Uds._grid,GaugeGrid); diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index 96b8ab0a..866e30d2 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -30,60 +30,77 @@ Author: paboyle #define REGISTER -#define LOAD_CHIMU \ - {const SiteSpinor & ref (in._odata[offset]); \ - Chimu_00=ref()(0)(0);\ - Chimu_01=ref()(0)(1);\ - Chimu_02=ref()(0)(2);\ - Chimu_10=ref()(1)(0);\ - Chimu_11=ref()(1)(1);\ - Chimu_12=ref()(1)(2);\ - Chimu_20=ref()(2)(0);\ - Chimu_21=ref()(2)(1);\ - Chimu_22=ref()(2)(2);\ - Chimu_30=ref()(3)(0);\ - Chimu_31=ref()(3)(1);\ - Chimu_32=ref()(3)(2);} +#define LOAD_CHIMU_BODY(F) \ + Chimu_00=ref(F)(0)(0); \ + Chimu_01=ref(F)(0)(1); \ + Chimu_02=ref(F)(0)(2); \ + Chimu_10=ref(F)(1)(0); \ + Chimu_11=ref(F)(1)(1); \ + Chimu_12=ref(F)(1)(2); \ + Chimu_20=ref(F)(2)(0); \ + Chimu_21=ref(F)(2)(1); \ + Chimu_22=ref(F)(2)(2); \ + Chimu_30=ref(F)(3)(0); \ + Chimu_31=ref(F)(3)(1); \ + Chimu_32=ref(F)(3)(2) + +#define LOAD_CHIMU(DIR,F) \ + { const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); } + +#define LOAD_CHIMU_GPARITY(DIR,F) \ + { int g; const SiteSpinor & ref = GparityGetChi(g,in._odata.data(),DIR,F,SE,st); LOAD_CHIMU_BODY(g); } + +#define LOAD_CHI_BODY(F) \ + Chi_00 = ref(F)(0)(0);\ + Chi_01 = ref(F)(0)(1);\ + Chi_02 = ref(F)(0)(2);\ + Chi_10 = ref(F)(1)(0);\ + Chi_11 = ref(F)(1)(1);\ + Chi_12 = ref(F)(1)(2) + +#define LOAD_CHI(DIR,F) \ + {const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); } + +#define LOAD_CHI_GPARITY(DIR,F) \ + { int g; const SiteHalfSpinor &ref = GparityGetChi(g,buf,DIR,F,SE,st); LOAD_CHI_BODY(g); } -#define LOAD_CHI\ - {const SiteHalfSpinor &ref(buf[offset]); \ - Chi_00 = ref()(0)(0);\ - Chi_01 = ref()(0)(1);\ - Chi_02 = ref()(0)(2);\ - Chi_10 = ref()(1)(0);\ - Chi_11 = ref()(1)(1);\ - Chi_12 = ref()(1)(2);} // To splat or not to splat depends on the implementation -#define MULT_2SPIN(A)\ - {auto & ref(U._odata[sU](A)); \ - Impl::loadLinkElement(U_00,ref()(0,0)); \ - Impl::loadLinkElement(U_10,ref()(1,0)); \ - Impl::loadLinkElement(U_20,ref()(2,0)); \ - Impl::loadLinkElement(U_01,ref()(0,1)); \ - Impl::loadLinkElement(U_11,ref()(1,1)); \ - Impl::loadLinkElement(U_21,ref()(2,1)); \ - UChi_00 = U_00*Chi_00;\ - UChi_10 = U_00*Chi_10;\ - UChi_01 = U_10*Chi_00;\ - UChi_11 = U_10*Chi_10;\ - UChi_02 = U_20*Chi_00;\ - UChi_12 = U_20*Chi_10;\ - UChi_00+= U_01*Chi_01;\ - UChi_10+= U_01*Chi_11;\ - UChi_01+= U_11*Chi_01;\ - UChi_11+= U_11*Chi_11;\ - UChi_02+= U_21*Chi_01;\ - UChi_12+= U_21*Chi_11;\ - Impl::loadLinkElement(U_00,ref()(0,2)); \ - Impl::loadLinkElement(U_10,ref()(1,2)); \ - Impl::loadLinkElement(U_20,ref()(2,2)); \ - UChi_00+= U_00*Chi_02;\ - UChi_10+= U_00*Chi_12;\ - UChi_01+= U_10*Chi_02;\ - UChi_11+= U_10*Chi_12;\ - UChi_02+= U_20*Chi_02;\ - UChi_12+= U_20*Chi_12;} +#define MULT_2SPIN_BODY \ + Impl::loadLinkElement(U_00,ref()(0,0)); \ + Impl::loadLinkElement(U_10,ref()(1,0)); \ + Impl::loadLinkElement(U_20,ref()(2,0)); \ + Impl::loadLinkElement(U_01,ref()(0,1)); \ + Impl::loadLinkElement(U_11,ref()(1,1)); \ + Impl::loadLinkElement(U_21,ref()(2,1)); \ + UChi_00 = U_00*Chi_00; \ + UChi_10 = U_00*Chi_10; \ + UChi_01 = U_10*Chi_00; \ + UChi_11 = U_10*Chi_10; \ + UChi_02 = U_20*Chi_00; \ + UChi_12 = U_20*Chi_10; \ + UChi_00+= U_01*Chi_01; \ + UChi_10+= U_01*Chi_11; \ + UChi_01+= U_11*Chi_01; \ + UChi_11+= U_11*Chi_11; \ + UChi_02+= U_21*Chi_01; \ + UChi_12+= U_21*Chi_11; \ + Impl::loadLinkElement(U_00,ref()(0,2)); \ + Impl::loadLinkElement(U_10,ref()(1,2)); \ + Impl::loadLinkElement(U_20,ref()(2,2)); \ + UChi_00+= U_00*Chi_02; \ + UChi_10+= U_00*Chi_12; \ + UChi_01+= U_10*Chi_02; \ + UChi_11+= U_10*Chi_12; \ + UChi_02+= U_20*Chi_02; \ + UChi_12+= U_20*Chi_12 + + +#define MULT_2SPIN(A,F) \ + {auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; } + +#define MULT_2SPIN_GPARITY(A,F) \ + {auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; } #define PERMUTE_DIR(dir) \ @@ -307,84 +324,85 @@ Author: paboyle result_31-= UChi_11; \ result_32-= UChi_12; -#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \ +#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ SE=st.GetEntry(ptype,DIR,ss); \ offset = SE->_offset; \ local = SE->_is_local; \ perm = SE->_permute; \ if ( local ) { \ - LOAD_CHIMU; \ + LOAD_CHIMU_IMPL(DIR,F); \ PROJ; \ if ( perm) { \ PERMUTE_DIR(PERM); \ } \ } else { \ - LOAD_CHI; \ + LOAD_CHI_IMPL(DIR,F); \ } \ - MULT_2SPIN(DIR); \ + MULT_2SPIN_IMPL(DIR,F); \ RECON; -#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \ + +#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ SE=st.GetEntry(ptype,DIR,ss); \ offset = SE->_offset; \ local = SE->_is_local; \ perm = SE->_permute; \ if ( local ) { \ - LOAD_CHIMU; \ + LOAD_CHIMU_IMPL(DIR,F); \ PROJ; \ if ( perm) { \ PERMUTE_DIR(PERM); \ } \ } else if ( st.same_node[DIR] ) { \ - LOAD_CHI; \ + LOAD_CHI_IMPL(DIR,F); \ } \ if (local || st.same_node[DIR] ) { \ - MULT_2SPIN(DIR); \ + MULT_2SPIN_IMPL(DIR,F); \ RECON; \ } -#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \ +#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ SE=st.GetEntry(ptype,DIR,ss); \ offset = SE->_offset; \ if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \ - LOAD_CHI; \ - MULT_2SPIN(DIR); \ + LOAD_CHI_IMPL(DIR,F); \ + MULT_2SPIN_IMPL(DIR,F); \ RECON; \ nmu++; \ } -#define HAND_RESULT(ss) \ +#define HAND_RESULT(ss,F) \ { \ SiteSpinor & ref (out._odata[ss]); \ - vstream(ref()(0)(0),result_00); \ - vstream(ref()(0)(1),result_01); \ - vstream(ref()(0)(2),result_02); \ - vstream(ref()(1)(0),result_10); \ - vstream(ref()(1)(1),result_11); \ - vstream(ref()(1)(2),result_12); \ - vstream(ref()(2)(0),result_20); \ - vstream(ref()(2)(1),result_21); \ - vstream(ref()(2)(2),result_22); \ - vstream(ref()(3)(0),result_30); \ - vstream(ref()(3)(1),result_31); \ - vstream(ref()(3)(2),result_32); \ + vstream(ref(F)(0)(0),result_00); \ + vstream(ref(F)(0)(1),result_01); \ + vstream(ref(F)(0)(2),result_02); \ + vstream(ref(F)(1)(0),result_10); \ + vstream(ref(F)(1)(1),result_11); \ + vstream(ref(F)(1)(2),result_12); \ + vstream(ref(F)(2)(0),result_20); \ + vstream(ref(F)(2)(1),result_21); \ + vstream(ref(F)(2)(2),result_22); \ + vstream(ref(F)(3)(0),result_30); \ + vstream(ref(F)(3)(1),result_31); \ + vstream(ref(F)(3)(2),result_32); \ } -#define HAND_RESULT_EXT(ss) \ +#define HAND_RESULT_EXT(ss,F) \ if (nmu){ \ SiteSpinor & ref (out._odata[ss]); \ - ref()(0)(0)+=result_00; \ - ref()(0)(1)+=result_01; \ - ref()(0)(2)+=result_02; \ - ref()(1)(0)+=result_10; \ - ref()(1)(1)+=result_11; \ - ref()(1)(2)+=result_12; \ - ref()(2)(0)+=result_20; \ - ref()(2)(1)+=result_21; \ - ref()(2)(2)+=result_22; \ - ref()(3)(0)+=result_30; \ - ref()(3)(1)+=result_31; \ - ref()(3)(2)+=result_32; \ + ref(F)(0)(0)+=result_00; \ + ref(F)(0)(1)+=result_01; \ + ref(F)(0)(2)+=result_02; \ + ref(F)(1)(0)+=result_10; \ + ref(F)(1)(1)+=result_11; \ + ref(F)(1)(2)+=result_12; \ + ref(F)(2)(0)+=result_20; \ + ref(F)(2)(1)+=result_21; \ + ref(F)(2)(2)+=result_22; \ + ref(F)(3)(0)+=result_30; \ + ref(F)(3)(1)+=result_31; \ + ref(F)(3)(2)+=result_32; \ } @@ -463,15 +481,18 @@ WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge int offset,local,perm, ptype; StencilEntry *SE; - HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON); - HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM); - HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM); - HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM); - HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM); - HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM); - HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM); - HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM); - HAND_RESULT(ss); +#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ + HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_RESULT(ss,F) + + HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN); } template @@ -485,16 +506,19 @@ void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub StencilEntry *SE; int offset,local,perm, ptype; - - HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON); - HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM); - HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM); - HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM); - HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM); - HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM); - HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM); - HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM); - HAND_RESULT(ss); + +#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ + HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_RESULT(ss,F) + + HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN); } template void @@ -509,16 +533,20 @@ WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa int offset,local,perm, ptype; StencilEntry *SE; - ZERO_RESULT; - HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM); - HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM); - HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM); - HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM); - HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM); - HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM); - HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM); - HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM); - HAND_RESULT(ss); + +#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ + ZERO_RESULT; \ + HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_RESULT(ss,F) + + HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN); } template @@ -532,16 +560,20 @@ void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D StencilEntry *SE; int offset,local,perm, ptype; - ZERO_RESULT; - HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM); - HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM); - HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM); - HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM); - HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM); - HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM); - HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM); - HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM); - HAND_RESULT(ss); + +#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ + ZERO_RESULT; \ + HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_RESULT(ss,F) + + HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN); } template void @@ -557,16 +589,20 @@ WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa int offset,local,perm, ptype; StencilEntry *SE; int nmu=0; - ZERO_RESULT; - HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM); - HAND_RESULT_EXT(ss); + +#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ + ZERO_RESULT; \ + HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_RESULT_EXT(ss,F) + + HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN); } template @@ -581,16 +617,20 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D StencilEntry *SE; int offset,local,perm, ptype; int nmu=0; - ZERO_RESULT; - HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM); - HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM); - HAND_RESULT_EXT(ss); + +#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ + ZERO_RESULT; \ + HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ + HAND_RESULT_EXT(ss,F) + + HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN); } //////////////////////////////////////////////// @@ -647,10 +687,130 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D FermionField &out){ assert(0); } \ HAND_SPECIALISE_EMPTY(GparityWilsonImplF); - HAND_SPECIALISE_EMPTY(GparityWilsonImplD); + //HAND_SPECIALISE_EMPTY(GparityWilsonImplD); HAND_SPECIALISE_EMPTY(GparityWilsonImplFH); HAND_SPECIALISE_EMPTY(GparityWilsonImplDF); + + + + +template<> void +WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionField &in, FermionField &out) +{ +// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... + typedef GparityWilsonImplD Impl; + typedef typename Simd::scalar_type S; + typedef typename Simd::vector_type V; + + HAND_DECLARATIONS(ignore); + + int offset,local,perm, ptype; + StencilEntry *SE; + HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); + HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); +} + +template<> +void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionField &in, FermionField &out) +{ + typedef GparityWilsonImplD Impl; + typedef typename Simd::scalar_type S; + typedef typename Simd::vector_type V; + + HAND_DECLARATIONS(ignore); + + StencilEntry *SE; + int offset,local,perm, ptype; + HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); + HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); +} + +template<> void +WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionField &in, FermionField &out) +{ +// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... + typedef GparityWilsonImplD Impl; + typedef typename Simd::scalar_type S; + typedef typename Simd::vector_type V; + + HAND_DECLARATIONS(ignore); + + int offset,local,perm, ptype; + StencilEntry *SE; + HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); + HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); +} + +template<> +void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionField &in, FermionField &out) +{ + typedef GparityWilsonImplD Impl; + typedef typename Simd::scalar_type S; + typedef typename Simd::vector_type V; + + HAND_DECLARATIONS(ignore); + + StencilEntry *SE; + int offset,local,perm, ptype; + HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); + HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); +} + +template<> void +WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionField &in, FermionField &out) +{ +// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... + typedef GparityWilsonImplD Impl; + typedef typename Simd::scalar_type S; + typedef typename Simd::vector_type V; + + HAND_DECLARATIONS(ignore); + + int offset,local,perm, ptype; + StencilEntry *SE; + int nmu=0; + HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); + nmu = 0; + HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); +} + +template<> +void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionField &in, FermionField &out) +{ + typedef GparityWilsonImplD Impl; + typedef typename Simd::scalar_type S; + typedef typename Simd::vector_type V; + + HAND_DECLARATIONS(ignore); + + StencilEntry *SE; + int offset,local,perm, ptype; + int nmu=0; + HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); + nmu = 0; + HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); +} + + + + + + + + + + + + + + ////////////// Wilson ; uses this implementation ///////////////////// #define INSTANTIATE_THEM(A) \ diff --git a/tests/core/Test_gparity.cc b/tests/core/Test_gparity.cc index cfb5d2c3..81091e9e 100644 --- a/tests/core/Test_gparity.cc +++ b/tests/core/Test_gparity.cc @@ -33,22 +33,68 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -typedef typename GparityDomainWallFermionR::FermionField FermionField; +//typedef GparityDomainWallFermionD GparityDiracOp; +//typedef DomainWallFermionD StandardDiracOp; +//#define DOP_PARAMS +typedef GparityMobiusFermionD GparityDiracOp; +typedef MobiusFermionD StandardDiracOp; +#define DOP_PARAMS ,1.5, 0.5 + + +typedef typename GparityDiracOp::FermionField GparityFermionField; +typedef typename GparityDiracOp::GaugeField GparityGaugeField; +typedef typename GparityFermionField::vector_type vComplexType; + +typedef typename StandardDiracOp::FermionField StandardFermionField; +typedef typename StandardDiracOp::GaugeField StandardGaugeField; + +enum{ same_vComplex = std::is_same::value }; +static_assert(same_vComplex == 1, "Dirac Operators must have same underlying SIMD complex type"); int main (int argc, char ** argv) { - const int nu = 3; + int nu = 0; Grid_init(&argc,&argv); + for(int i=1;i> nu; + std::cout << GridLogMessage << "Set Gparity direction to " << nu << std::endl; + } + } + + std::cout << GridLogMessage<< "*****************************************************************" < latt_2f(Nd,L); - std::vector latt_1f(Nd,L); latt_1f[nu] = 2*L; + //const int L =4; + //std::vector latt_2f(Nd,L); - std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector latt_2f = GridDefaultLatt(); + std::vector latt_1f(latt_2f); latt_1f[nu] = 2*latt_2f[nu]; + int L = latt_2f[nu]; + + + std::vector simd_layout = GridDefaultSimd(Nd,vComplexType::Nsimd()); + + std::cout << GridLogMessage << "SIMD layout: "; + for(int i=0;i mpi_layout = GridDefaultMpi(); //node layout GridCartesian * UGrid_1f = SpaceTimeGrid::makeFourDimGrid(latt_1f, simd_layout, mpi_layout); @@ -67,13 +113,13 @@ int main (int argc, char ** argv) GridParallelRNG RNG5_2f(FGrid_2f); RNG5_2f.SeedFixedIntegers(seeds5); GridParallelRNG RNG4_2f(UGrid_2f); RNG4_2f.SeedFixedIntegers(seeds4); - LatticeGaugeField Umu_2f(UGrid_2f); + GparityGaugeField Umu_2f(UGrid_2f); SU3::HotConfiguration(RNG4_2f,Umu_2f); - LatticeFermion src (FGrid_2f); - LatticeFermion tmpsrc(FGrid_2f); - FermionField src_2f(FGrid_2f); - LatticeFermion src_1f(FGrid_1f); + StandardFermionField src (FGrid_2f); + StandardFermionField tmpsrc(FGrid_2f); + GparityFermionField src_2f(FGrid_2f); + StandardFermionField src_1f(FGrid_1f); // Replicate fermion source random(RNG5_2f,src); @@ -81,8 +127,8 @@ int main (int argc, char ** argv) tmpsrc=src*2.0; PokeIndex<0>(src_2f,tmpsrc,1); - LatticeFermion result_1f(FGrid_1f); result_1f=zero; - LatticeGaugeField Umu_1f(UGrid_1f); + StandardFermionField result_1f(FGrid_1f); result_1f=zero; + StandardGaugeField Umu_1f(UGrid_1f); Replicate(Umu_2f,Umu_1f); //Coordinate grid for reference @@ -92,7 +138,7 @@ int main (int argc, char ** argv) //Copy-conjugate the gauge field //First C-shift the lattice by Lx/2 { - LatticeGaugeField Umu_shift = conjugate( Cshift(Umu_1f,nu,L) ); + StandardGaugeField Umu_shift = conjugate( Cshift(Umu_1f,nu,L) ); Umu_1f = where( xcoor_1f >= Integer(L), Umu_shift, Umu_1f ); // hack test to check the same @@ -101,7 +147,7 @@ int main (int argc, char ** argv) cout << GridLogMessage << "Umu diff " << norm2(Umu_shift)<(Umu_1f,nu)) Unu(UGrid_1f); Unu = PeekIndex(Umu_1f,nu); Unu = where(xcoor_1f == Integer(2*L-1), -Unu, Unu); PokeIndex(Umu_1f,Unu,nu); @@ -115,33 +161,33 @@ int main (int argc, char ** argv) RealD mass=0.0; RealD M5=1.8; - DomainWallFermionR Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5); + StandardDiracOp Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5 DOP_PARAMS); - LatticeFermion src_o_1f(FrbGrid_1f); - LatticeFermion result_o_1f(FrbGrid_1f); + StandardFermionField src_o_1f(FrbGrid_1f); + StandardFermionField result_o_1f(FrbGrid_1f); pickCheckerboard(Odd,src_o_1f,src_1f); result_o_1f=zero; - SchurDiagMooeeOperator HermOpEO(Ddwf); - ConjugateGradient CG(1.0e-8,10000); + SchurDiagMooeeOperator HermOpEO(Ddwf); + ConjugateGradient CG(1.0e-8,10000); CG(HermOpEO,src_o_1f,result_o_1f); // const int nu = 3; std::vector twists(Nd,0); twists[nu] = 1; - GparityDomainWallFermionR::ImplParams params; + GparityDiracOp::ImplParams params; params.twists = twists; - GparityDomainWallFermionR GPDdwf(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5,params); + GparityDiracOp GPDdwf(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5 DOP_PARAMS,params); for(int disp=-1;disp<=1;disp+=2) for(int mu=0;mu<5;mu++) { - FermionField Dsrc_2f(FGrid_2f); + GparityFermionField Dsrc_2f(FGrid_2f); - LatticeFermion Dsrc_1f(FGrid_1f); - LatticeFermion Dsrc_2freplica(FGrid_1f); - LatticeFermion Dsrc_2freplica0(FGrid_1f); - LatticeFermion Dsrc_2freplica1(FGrid_1f); + StandardFermionField Dsrc_1f(FGrid_1f); + StandardFermionField Dsrc_2freplica(FGrid_1f); + StandardFermionField Dsrc_2freplica0(FGrid_1f); + StandardFermionField Dsrc_2freplica1(FGrid_1f); if ( mu ==0 ) { std::cout << GridLogMessage<< " Cross checking entire hopping term"<(Dsrc_2f,0); - LatticeFermion Dsrc_2f1(FGrid_2f); Dsrc_2f1 = PeekIndex<0>(Dsrc_2f,1); + StandardFermionField Dsrc_2f0(FGrid_2f); Dsrc_2f0 = PeekIndex<0>(Dsrc_2f,0); + StandardFermionField Dsrc_2f1(FGrid_2f); Dsrc_2f1 = PeekIndex<0>(Dsrc_2f,1); // Dsrc_2f1 = Dsrc_2f1 - Dsrc_2f0; // std::cout << GridLogMessage << " Cross check two halves " < CG2f(1.0e-8,10000); - SchurDiagMooeeOperator HermOpEO2f(GPDdwf); + ConjugateGradient CG2f(1.0e-8,10000); + SchurDiagMooeeOperator HermOpEO2f(GPDdwf); CG2f(HermOpEO2f,src_o_2f,result_o_2f); std::cout << "2f cb "< Date: Tue, 22 Aug 2017 18:12:12 -0400 Subject: [PATCH 092/377] Replaced slow unpack-repack in G-parity BC twist with intrinsics version --- lib/qcd/action/fermion/FermionOperatorImpl.h | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 1e344521..5300063b 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -565,6 +565,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl void GparityTwistPermute(SiteSpinorType &into, const SiteSpinorType &from, const int direction, const int distance, const int perm, GridBase* grid){ +#if 0 typedef typename SiteSpinorType::scalar_object sobj; sobj stmp; std::vector vals(grid->Nsimd()); @@ -589,6 +590,31 @@ class GparityWilsonImpl : public ConjugateGaugeImplPermuteType(direction); + typedef typename SiteSpinorType::vector_type vtype; + vtype tmp1, tmp2, tmp3, tmp4; + + for(int s=0;s 1h 1l + exchange(tmp2,tmp3, from(0)(s)(c), tmp1, permute_type); // 0l 0h , 1h 1l -> 0l 1h 0h,1l + permute(tmp4, tmp3, permute_type); //0h,1l -> 1l,0h + + if( (distance == 1 && !perm) || (distance == -1 && perm) ){ + //Pulled fermion through forwards face, GPBC on upper component + //Need 0= 0l 1h 1= 1l 0h + into(0)(s)(c) = tmp2; + into(1)(s)(c) = tmp4; + }else if( (distance == -1 && !perm) || (distance == 1 && perm) ){ + //Pulled fermion through backwards face, GPBC on lower component + //Need 0= 1l 0h 1= 0l 1h + into(0)(s)(c) = tmp4; + into(1)(s)(c) = tmp2; + }else assert(0); + } + } +#endif } From 459f70e8d4a578ee9d176c5c068dfcb924b34866 Mon Sep 17 00:00:00 2001 From: David Murphy Date: Tue, 22 Aug 2017 22:38:30 -0400 Subject: [PATCH 093/377] Check-in of working Mobius EOFA class and tests --- lib/qcd/action/fermion/Fermion.h | 6 +- lib/qcd/action/fermion/MobiusEOFAFermion.cc | 21 +- lib/qcd/action/fermion/MobiusEOFAFermion.h | 4 +- .../action/fermion/MobiusEOFAFermioncache.cc | 24 +- .../action/fermion/MobiusEOFAFermiondense.cc | 184 +++++ .../action/fermion/MobiusEOFAFermionssp.cc | 290 ++++++++ .../action/fermion/MobiusEOFAFermionvec.cc | 654 ++++++++++++++++++ tests/core/Test_mobius_eofa_even_odd.cc | 241 +++++++ tests/debug/Test_heatbath_mobius_eofa.cc | 104 +++ .../Test_heatbath_mobius_eofa_gparity.cc | 109 +++ tests/debug/Test_reweight_dwf_eofa_gparity.cc | 2 +- tests/debug/Test_reweight_mobius_eofa.cc | 215 ++++++ .../Test_reweight_mobius_eofa_gparity.cc | 218 ++++++ tests/forces/Test_dwf_gpforce_eofa.cc | 6 +- tests/forces/Test_mobius_force_eofa.cc | 166 +++++ tests/forces/Test_mobius_gpforce_eofa.cc | 171 +++++ 16 files changed, 2383 insertions(+), 32 deletions(-) create mode 100644 lib/qcd/action/fermion/MobiusEOFAFermiondense.cc create mode 100644 lib/qcd/action/fermion/MobiusEOFAFermionssp.cc create mode 100644 lib/qcd/action/fermion/MobiusEOFAFermionvec.cc create mode 100644 tests/core/Test_mobius_eofa_even_odd.cc create mode 100644 tests/debug/Test_heatbath_mobius_eofa.cc create mode 100644 tests/debug/Test_heatbath_mobius_eofa_gparity.cc create mode 100644 tests/debug/Test_reweight_mobius_eofa.cc create mode 100644 tests/debug/Test_reweight_mobius_eofa_gparity.cc create mode 100644 tests/forces/Test_mobius_force_eofa.cc create mode 100644 tests/forces/Test_mobius_gpforce_eofa.cc diff --git a/lib/qcd/action/fermion/Fermion.h b/lib/qcd/action/fermion/Fermion.h index 99d97895..ad2f383d 100644 --- a/lib/qcd/action/fermion/Fermion.h +++ b/lib/qcd/action/fermion/Fermion.h @@ -265,9 +265,9 @@ typedef MobiusFermion GparityMobiusFermionRL; typedef MobiusFermion GparityMobiusFermionFH; typedef MobiusFermion GparityMobiusFermionDF; -typedef MobiusEOFAFermion GparityMobiusFermionR; -typedef MobiusEOFAFermion GparityMobiusFermionF; -typedef MobiusEOFAFermion GparityMobiusFermionD; +typedef MobiusEOFAFermion GparityMobiusEOFAFermionR; +typedef MobiusEOFAFermion GparityMobiusEOFAFermionF; +typedef MobiusEOFAFermion GparityMobiusEOFAFermionD; typedef MobiusEOFAFermion GparityMobiusEOFAFermionRL; typedef MobiusEOFAFermion GparityMobiusEOFAFermionFH; diff --git a/lib/qcd/action/fermion/MobiusEOFAFermion.cc b/lib/qcd/action/fermion/MobiusEOFAFermion.cc index c0837d78..085fa988 100644 --- a/lib/qcd/action/fermion/MobiusEOFAFermion.cc +++ b/lib/qcd/action/fermion/MobiusEOFAFermion.cc @@ -142,7 +142,7 @@ namespace QCD { RealD DtInv_p(0.0), DtInv_m(0.0); RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls); - FermionField tmp = zero; + FermionField tmp(this->FermionGrid()); for(int s=0; s sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1); - if(dag){ - RealD tmp(DtInv_p); - DtInv_p = DtInv_m; - DtInv_m = tmp; + if(sp == 0){ + axpby_ssp_pplus (tmp, 0.0, tmp, DtInv_p, psi, s, sp); + axpby_ssp_pminus(tmp, 0.0, tmp, DtInv_m, psi, s, sp); + } else { + axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp); + axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp); } - axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp); - axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp); - }} } @@ -217,11 +216,11 @@ namespace QCD { template void MobiusEOFAFermion::M5Ddag(const FermionField& psi, FermionField& chi) { - int Ls = this->Ls; + int Ls = this->Ls; std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1] = this->mq1 + shiftp; - std::vector lower(Ls,-1.0); lower[0] = this->mq1 + shiftm; + std::vector upper(Ls,-1.0); upper[Ls-1] = this->mq1; + std::vector lower(Ls,-1.0); lower[0] = this->mq1; // no shift term if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); } diff --git a/lib/qcd/action/fermion/MobiusEOFAFermion.h b/lib/qcd/action/fermion/MobiusEOFAFermion.h index 0a8d1788..519b49e7 100644 --- a/lib/qcd/action/fermion/MobiusEOFAFermion.h +++ b/lib/qcd/action/fermion/MobiusEOFAFermion.h @@ -111,7 +111,7 @@ namespace QCD { }; }} -#define INSTANTIATE_DPERP_DWF_EOFA(A)\ +#define INSTANTIATE_DPERP_MOBIUS_EOFA(A)\ template void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \ std::vector& lower, std::vector& diag, std::vector& upper); \ template void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \ @@ -122,7 +122,7 @@ template void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const std::vector& lower, std::vector& diag, std::vector& upper, std::vector& shift_coeffs); \ template void MobiusEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi); \ template void MobiusEOFAFermion::MooeeInv_shift(const FermionField& psi, FermionField& chi); \ -template void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi); +template void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi); \ template void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, FermionField& chi); #undef MOBIUS_EOFA_DPERP_DENSE diff --git a/lib/qcd/action/fermion/MobiusEOFAFermioncache.cc b/lib/qcd/action/fermion/MobiusEOFAFermioncache.cc index d184ebe2..420f6390 100644 --- a/lib/qcd/action/fermion/MobiusEOFAFermioncache.cc +++ b/lib/qcd/action/fermion/MobiusEOFAFermioncache.cc @@ -410,19 +410,19 @@ namespace QCD { #ifdef MOBIUS_EOFA_DPERP_CACHE - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF); - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD); + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF); + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD); - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF); + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF); #endif diff --git a/lib/qcd/action/fermion/MobiusEOFAFermiondense.cc b/lib/qcd/action/fermion/MobiusEOFAFermiondense.cc new file mode 100644 index 00000000..d66b8cd9 --- /dev/null +++ b/lib/qcd/action/fermion/MobiusEOFAFermiondense.cc @@ -0,0 +1,184 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermiondense.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include + +namespace Grid { +namespace QCD { + + /* + * Dense matrix versions of routines + */ + template + void MobiusEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) + { + this->MooeeInternal(psi, chi, DaggerNo, InverseYes); + } + + template + void MobiusEOFAFermion::MooeeInv_shift(const FermionField& psi, FermionField& chi) + { + this->MooeeInternal(psi, chi, DaggerNo, InverseYes); + } + + template + void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) + { + this->MooeeInternal(psi, chi, DaggerYes, InverseYes); + } + + template + void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, FermionField& chi) + { + this->MooeeInternal(psi, chi, DaggerYes, InverseYes); + } + + template + void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) + { + int Ls = this->Ls; + int LLs = psi._grid->_rdimensions[0]; + int vol = psi._grid->oSites()/LLs; + + int pm = this->pm; + RealD shift = this->shift; + RealD alpha = this->alpha; + RealD k = this->k; + RealD mq1 = this->mq1; + + chi.checkerboard = psi.checkerboard; + + assert(Ls==LLs); + + Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls); + Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls); + + for(int s=0;sbee[s]; + Pminus(s,s) = this->bee[s]; + } + + for(int s=0; scee[s]; + } + + for(int s=0; scee[s+1]; + } + Pplus (0,Ls-1) = mq1*this->cee[0]; + Pminus(Ls-1,0) = mq1*this->cee[Ls-1]; + + if(shift != 0.0){ + Coeff_t N = 2.0 * ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) ); + for(int s=0; s::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF); + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF); + + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + + #endif + +}} diff --git a/lib/qcd/action/fermion/MobiusEOFAFermionssp.cc b/lib/qcd/action/fermion/MobiusEOFAFermionssp.cc new file mode 100644 index 00000000..c86bb995 --- /dev/null +++ b/lib/qcd/action/fermion/MobiusEOFAFermionssp.cc @@ -0,0 +1,290 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionssp.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include + +namespace Grid { +namespace QCD { + + // FIXME -- make a version of these routines with site loop outermost for cache reuse. + // Pminus fowards + // Pplus backwards + template + void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + { + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; s + void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, + std::vector& shift_coeffs) + { + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); } + else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); } + } + } + + template + void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + { + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; s + void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, + std::vector& shift_coeffs) + { + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); } + else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); } + } + } + + template + void MobiusEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) + { + if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; } + + Coeff_t one(1.0); + Coeff_t czero(0.0); + chi.checkerboard = psi.checkerboard; + int Ls = this->Ls; + + // Apply (L^{\prime})^{-1} + axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] + for(int s=1; slee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1] + } + + // L_m^{-1} + for(int s=0; sleem[s], chi, Ls-1, s); + } + + // U_m^{-1} D^{-1} + for(int s=0; sdee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1); + } + axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1); + + // Apply U^{-1} + for(int s=Ls-2; s>=0; s--){ + axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls] + } + } + + template + void MobiusEOFAFermion::MooeeInv_shift(const FermionField& psi, FermionField& chi) + { + Coeff_t one(1.0); + Coeff_t czero(0.0); + chi.checkerboard = psi.checkerboard; + int Ls = this->Ls; + + FermionField tmp(psi._grid); + + // Apply (L^{\prime})^{-1} + axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] + axpby_ssp(tmp, czero, tmp, this->MooeeInv_shift_lc[0], psi, 0, 0); + for(int s=1; slee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1] + axpby_ssp(tmp, one, tmp, this->MooeeInv_shift_lc[s], psi, 0, s); + } + + // L_m^{-1} + for(int s=0; sleem[s], chi, Ls-1, s); + } + + // U_m^{-1} D^{-1} + for(int s=0; sdee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1); + } + axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1); + + // Apply U^{-1} and add shift term + if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); } + else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); } + for(int s=Ls-2; s>=0; s--){ + axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls] + if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); } + else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); } + } + } + + template + void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) + { + if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; } + + Coeff_t one(1.0); + Coeff_t czero(0.0); + chi.checkerboard = psi.checkerboard; + int Ls = this->Ls; + + // Apply (U^{\prime})^{-dagger} + axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] + for(int s=1; suee[s-1]), chi, s, s-1); + } + + // U_m^{-\dagger} + for(int s=0; sueem[s]), chi, Ls-1, s); + } + + // L_m^{-\dagger} D^{-dagger} + for(int s=0; sdee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1); + } + axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1); + + // Apply L^{-dagger} + for(int s=Ls-2; s>=0; s--){ + axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls] + } + } + + template + void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, FermionField& chi) + { + Coeff_t one(1.0); + Coeff_t czero(0.0); + chi.checkerboard = psi.checkerboard; + int Ls = this->Ls; + + FermionField tmp(psi._grid); + + // Apply (U^{\prime})^{-dagger} and accumulate (MooeeInvDag_shift_lc)_{j} \psi_{j} in tmp[0] + axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] + axpby_ssp(tmp, czero, tmp, this->MooeeInvDag_shift_lc[0], psi, 0, 0); + for(int s=1; suee[s-1]), chi, s, s-1); + axpby_ssp(tmp, one, tmp, this->MooeeInvDag_shift_lc[s], psi, 0, s); + } + + // U_m^{-\dagger} + for(int s=0; sueem[s]), chi, Ls-1, s); + } + + // L_m^{-\dagger} D^{-dagger} + for(int s=0; sdee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1); + } + axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1); + + // Apply L^{-dagger} and add shift + if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); } + else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); } + for(int s=Ls-2; s>=0; s--){ + axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls] + if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); } + else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); } + } + } + + #ifdef MOBIUS_EOFA_DPERP_LINALG + + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF); + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD); + + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF); + + #endif + +}} diff --git a/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc b/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc new file mode 100644 index 00000000..59544e5a --- /dev/null +++ b/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc @@ -0,0 +1,654 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionvec.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include + +namespace Grid { +namespace QCD { + + /* + * Dense matrix versions of routines + */ + template + void MobiusEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) + { + this->MooeeInternal(psi, chi, DaggerNo, InverseYes); + } + + template + void MobiusEOFAFermion::MooeeInv_shift(const FermionField& psi, FermionField& chi) + { + this->MooeeInternal(psi, chi, DaggerNo, InverseYes); + } + + template + void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) + { + this->MooeeInternal(psi, chi, DaggerYes, InverseYes); + } + + template + void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, FermionField& chi) + { + this->MooeeInternal(psi, chi, DaggerYes, InverseYes); + } + + template + void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + { + GridBase* grid = psi._grid; + int Ls = this->Ls; + int LLs = grid->_rdimensions[0]; + const int nsimd = Simd::Nsimd(); + + Vector> u(LLs); + Vector> l(LLs); + Vector> d(LLs); + + assert(Ls/LLs == nsimd); + assert(phi.checkerboard == psi.checkerboard); + + chi.checkerboard = psi.checkerboard; + + // just directly address via type pun + typedef typename Simd::scalar_type scalar_type; + scalar_type* u_p = (scalar_type*) &u[0]; + scalar_type* l_p = (scalar_type*) &l[0]; + scalar_type* d_p = (scalar_type*) &d[0]; + + for(int o=0; oM5Dcalls++; + this->M5Dtime -= usecond(); + + assert(Nc == 3); + + parallel_for(int ss=0; ssoSites(); ss+=LLs){ // adds LLs + + #if 0 + + alignas(64) SiteHalfSpinor hp; + alignas(64) SiteHalfSpinor hm; + alignas(64) SiteSpinor fp; + alignas(64) SiteSpinor fm; + + for(int v=0; v= v){ rotate(hm, hm, nsimd-1); } + + hp = 0.5*hp; + hm = 0.5*hm; + + spRecon5m(fp, hp); + spRecon5p(fm, hm); + + chi[ss+v] = d[v]*phi[ss+v]; + chi[ss+v] = chi[ss+v] + u[v]*fp; + chi[ss+v] = chi[ss+v] + l[v]*fm; + + } + + #else + + for(int v=0; v(hp_00.v); + hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); + hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); + hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); + hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); + hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); + } + + if(vm >= v){ + hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); + hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); + hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); + hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); + hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); + hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); + } + + // Can force these to real arithmetic and save 2x. + Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(), hm_00); + Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(), hm_01); + Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(), hm_02); + Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(), hm_10); + Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(), hm_11); + Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(), hm_12); + Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(), hp_00); + Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(), hp_01); + Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(), hp_02); + Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(), hp_10); + Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(), hp_11); + Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(), hp_12); + + vstream(chi[ss+v]()(0)(0), p_00); + vstream(chi[ss+v]()(0)(1), p_01); + vstream(chi[ss+v]()(0)(2), p_02); + vstream(chi[ss+v]()(1)(0), p_10); + vstream(chi[ss+v]()(1)(1), p_11); + vstream(chi[ss+v]()(1)(2), p_12); + vstream(chi[ss+v]()(2)(0), p_20); + vstream(chi[ss+v]()(2)(1), p_21); + vstream(chi[ss+v]()(2)(2), p_22); + vstream(chi[ss+v]()(3)(0), p_30); + vstream(chi[ss+v]()(3)(1), p_31); + vstream(chi[ss+v]()(3)(2), p_32); + } + + #endif + } + + this->M5Dtime += usecond(); + } + + template + void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, + std::vector& shift_coeffs) + { + this->M5D(psi, phi, chi, lower, diag, upper); + + // FIXME: possible gain from vectorizing shift operation as well? + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); } + else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); } + } + } + + template + void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + { + GridBase* grid = psi._grid; + int Ls = this->Ls; + int LLs = grid->_rdimensions[0]; + int nsimd = Simd::Nsimd(); + + Vector > u(LLs); + Vector > l(LLs); + Vector > d(LLs); + + assert(Ls/LLs == nsimd); + assert(phi.checkerboard == psi.checkerboard); + + chi.checkerboard = psi.checkerboard; + + // just directly address via type pun + typedef typename Simd::scalar_type scalar_type; + scalar_type* u_p = (scalar_type*) &u[0]; + scalar_type* l_p = (scalar_type*) &l[0]; + scalar_type* d_p = (scalar_type*) &d[0]; + + for(int o=0; oM5Dcalls++; + this->M5Dtime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=LLs){ // adds LLs + + #if 0 + + alignas(64) SiteHalfSpinor hp; + alignas(64) SiteHalfSpinor hm; + alignas(64) SiteSpinor fp; + alignas(64) SiteSpinor fm; + + for(int v=0; v= v){ rotate(hm, hm, nsimd-1); } + + hp = hp*0.5; + hm = hm*0.5; + spRecon5p(fp, hp); + spRecon5m(fm, hm); + + chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp; + chi[ss+v] = chi[ss+v] +l[v]*fm; + + } + + #else + + for(int v=0; v(hp_00.v); + hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); + hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); + hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); + hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); + hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); + } + + if(vm >= v){ + hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); + hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); + hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); + hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); + hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); + hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); + } + + Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(), hp_00); + Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(), hp_01); + Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(), hp_02); + Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(), hp_10); + Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(), hp_11); + Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(), hp_12); + Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(), hm_00); + Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(), hm_01); + Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(), hm_02); + Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(), hm_10); + Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(), hm_11); + Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(), hm_12); + + vstream(chi[ss+v]()(0)(0), p_00); + vstream(chi[ss+v]()(0)(1), p_01); + vstream(chi[ss+v]()(0)(2), p_02); + vstream(chi[ss+v]()(1)(0), p_10); + vstream(chi[ss+v]()(1)(1), p_11); + vstream(chi[ss+v]()(1)(2), p_12); + vstream(chi[ss+v]()(2)(0), p_20); + vstream(chi[ss+v]()(2)(1), p_21); + vstream(chi[ss+v]()(2)(2), p_22); + vstream(chi[ss+v]()(3)(0), p_30); + vstream(chi[ss+v]()(3)(1), p_31); + vstream(chi[ss+v]()(3)(2), p_32); + + } + + #endif + + } + + this->M5Dtime += usecond(); + } + + template + void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, + std::vector& shift_coeffs) + { + this->M5Ddag(psi, phi, chi, lower, diag, upper); + + // FIXME: possible gain from vectorizing shift operation as well? + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); } + else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); } + } + } + + #ifdef AVX512 + #include + #include + #include + #endif + + template + void MobiusEOFAFermion::MooeeInternalAsm(const FermionField& psi, FermionField& chi, + int LLs, int site, Vector >& Matp, Vector >& Matm) + { + #ifndef AVX512 + { + SiteHalfSpinor BcastP; + SiteHalfSpinor BcastM; + SiteHalfSpinor SiteChiP; + SiteHalfSpinor SiteChiM; + + // Ls*Ls * 2 * 12 * vol flops + for(int s1=0; s1); + + for(int s1=0; s1 + void MobiusEOFAFermion::MooeeInternalZAsm(const FermionField& psi, FermionField& chi, + int LLs, int site, Vector >& Matp, Vector >& Matm) + { + std::cout << "Error: zMobius not implemented for EOFA" << std::endl; + exit(-1); + }; + + template + void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) + { + int Ls = this->Ls; + int LLs = psi._grid->_rdimensions[0]; + int vol = psi._grid->oSites()/LLs; + + chi.checkerboard = psi.checkerboard; + + Vector> Matp; + Vector> Matm; + Vector>* _Matp; + Vector>* _Matm; + + // MooeeInternalCompute(dag,inv,Matp,Matm); + if(inv && dag){ + _Matp = &this->MatpInvDag; + _Matm = &this->MatmInvDag; + } + + if(inv && (!dag)){ + _Matp = &this->MatpInv; + _Matm = &this->MatmInv; + } + + if(!inv){ + MooeeInternalCompute(dag, inv, Matp, Matm); + _Matp = &Matp; + _Matm = &Matm; + } + + assert(_Matp->size() == Ls*LLs); + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + if(switcheroo::iscomplex()){ + parallel_for(auto site=0; siteMooeeInvTime += usecond(); + } + + #ifdef MOBIUS_EOFA_DPERP_VEC + + INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplD); + INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplF); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplD); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplF); + + INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplDF); + INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplFH); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplDF); + INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplFH); + + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + + #endif + +}} diff --git a/tests/core/Test_mobius_eofa_even_odd.cc b/tests/core/Test_mobius_eofa_even_odd.cc new file mode 100644 index 00000000..fceb58f6 --- /dev/null +++ b/tests/core/Test_mobius_eofa_even_odd.cc @@ -0,0 +1,241 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/core/Test_dwf_eofa_even_odd.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + +Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT +}; + +int main (int argc, char ** argv) +{ + Grid_init(&argc, &argv); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + const int Ls = 8; + // GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); + GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + + LatticeFermion src (FGrid); random(RNG5, src); + LatticeFermion phi (FGrid); random(RNG5, phi); + LatticeFermion chi (FGrid); random(RNG5, chi); + LatticeFermion result(FGrid); result = zero; + LatticeFermion ref (FGrid); ref = zero; + LatticeFermion tmp (FGrid); tmp = zero; + LatticeFermion err (FGrid); err = zero; + LatticeGaugeField Umu (UGrid); SU3::HotConfiguration(RNG4, Umu); + std::vector U(4,UGrid); + + // Only one non-zero (y) + Umu = zero; + for(int nn=0; nn0){ U[nn] = zero; } + PokeIndex(Umu, U[nn], nn); + } + + RealD b = 2.5; + RealD c = 1.5; + RealD mq1 = 0.1; + RealD mq2 = 0.5; + RealD mq3 = 1.0; + RealD shift = 0.1234; + RealD M5 = 1.8; + int pm = 1; + MobiusEOFAFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mq1, mq2, mq3, shift, pm, M5, b, c); + + LatticeFermion src_e (FrbGrid); + LatticeFermion src_o (FrbGrid); + LatticeFermion r_e (FrbGrid); + LatticeFermion r_o (FrbGrid); + LatticeFermion r_eo (FGrid); + LatticeFermion r_eeoo(FGrid); + + std::cout << GridLogMessage << "==========================================================" << std::endl; + std::cout << GridLogMessage << "= Testing that Meo + Moe + Moo + Mee = Munprec " << std::endl; + std::cout << GridLogMessage << "==========================================================" << std::endl; + + pickCheckerboard(Even, src_e, src); + pickCheckerboard(Odd, src_o, src); + + Ddwf.Meooe(src_e, r_o); std::cout << GridLogMessage << "Applied Meo" << std::endl; + Ddwf.Meooe(src_o, r_e); std::cout << GridLogMessage << "Applied Moe" << std::endl; + setCheckerboard(r_eo, r_o); + setCheckerboard(r_eo, r_e); + + Ddwf.Mooee(src_e, r_e); std::cout << GridLogMessage << "Applied Mee" << std::endl; + Ddwf.Mooee(src_o, r_o); std::cout << GridLogMessage << "Applied Moo" << std::endl; + setCheckerboard(r_eeoo, r_e); + setCheckerboard(r_eeoo, r_o); + + r_eo = r_eo + r_eeoo; + Ddwf.M(src, ref); + + // std::cout << GridLogMessage << r_eo << std::endl; + // std::cout << GridLogMessage << ref << std::endl; + + err = ref - r_eo; + std::cout << GridLogMessage << "EO norm diff " << norm2(err) << " " << norm2(ref) << " " << norm2(r_eo) << std::endl; + + LatticeComplex cerr(FGrid); + cerr = localInnerProduct(err,err); + // std::cout << GridLogMessage << cerr << std::endl; + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl; + std::cout << GridLogMessage << "= < phi | Deo | chi > * = < chi | Deo^dag| phi> " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + LatticeFermion chi_e (FrbGrid); + LatticeFermion chi_o (FrbGrid); + + LatticeFermion dchi_e(FrbGrid); + LatticeFermion dchi_o(FrbGrid); + + LatticeFermion phi_e (FrbGrid); + LatticeFermion phi_o (FrbGrid); + + LatticeFermion dphi_e(FrbGrid); + LatticeFermion dphi_o(FrbGrid); + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd , chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd , phi_o, phi); + + Ddwf.Meooe (chi_e, dchi_o); + Ddwf.Meooe (chi_o, dchi_e); + Ddwf.MeooeDag(phi_e, dphi_o); + Ddwf.MeooeDag(phi_o, dphi_e); + + ComplexD pDce = innerProduct(phi_e, dchi_e); + ComplexD pDco = innerProduct(phi_o, dchi_o); + ComplexD cDpe = innerProduct(chi_e, dphi_e); + ComplexD cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce-conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco-conj(cDpe) << std::endl; + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeInv Mee = 1 " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd , chi_o, chi); + + Ddwf.Mooee (chi_e, src_e); + Ddwf.MooeeInv(src_e, phi_e); + + Ddwf.Mooee (chi_o, src_o); + Ddwf.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeInvDag MeeDag = 1 " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd , chi_o, chi); + + Ddwf.MooeeDag (chi_e, src_e); + Ddwf.MooeeInvDag(src_e, phi_e); + + Ddwf.MooeeDag (chi_o, src_o); + Ddwf.MooeeInvDag(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MpcDagMpc is Hermitian " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + random(RNG5, phi); + random(RNG5, chi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd , chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd , phi_o, phi); + RealD t1,t2; + + SchurDiagMooeeOperator HermOpEO(Ddwf); + HermOpEO.MpcDagMpc(chi_e, dchi_e, t1, t2); + HermOpEO.MpcDagMpc(chi_o, dchi_o, t1, t2); + + HermOpEO.MpcDagMpc(phi_e, dphi_e, t1, t2); + HermOpEO.MpcDagMpc(phi_o, dphi_o, t1, t2); + + pDce = innerProduct(phi_e, dchi_e); + pDco = innerProduct(phi_o, dchi_o); + cDpe = innerProduct(chi_e, dphi_e); + cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDco-conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDce-conj(cDpe) << std::endl; + + Grid_finalize(); +} diff --git a/tests/debug/Test_heatbath_mobius_eofa.cc b/tests/debug/Test_heatbath_mobius_eofa.cc new file mode 100644 index 00000000..a952873d --- /dev/null +++ b/tests/debug/Test_heatbath_mobius_eofa.cc @@ -0,0 +1,104 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/debug/Test_heatbath_dwf_eofa.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +////////////////////////////////////////////////////////////////////////////////////////// +// This program sets up the initial pseudofermion field |Phi> = Meofa^{-1/2}*|eta>, and +// then uses this Phi to compute the action . +// If all is working, one should find that = . +////////////////////////////////////////////////////////////////////////////////////////// + +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +// Parameters for test +const std::vector grid_dim = { 8, 8, 8, 8 }; +const int Ls = 8; +const int Npoles = 12; +const RealD b = 2.5; +const RealD c = 1.5; +const RealD mf = 0.01; +const RealD mpv = 1.0; +const RealD M5 = 1.8; + +int main(int argc, char** argv) +{ + Grid_init(&argc, &argv); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is set up to use " << threads << " threads" << std::endl; + + // Initialize spacetime grid + std::cout << GridLogMessage << "Lattice dimensions: " << grid_dim << " Ls: " << Ls << std::endl; + GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim, + GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); + GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + + // Set up RNGs + std::vector seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + + // Random gauge field + LatticeGaugeField Umu(UGrid); + SU3::HotConfiguration(RNG4, Umu); + + MobiusEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c); + MobiusEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, b, c); + + // Construct the action and test the heatbath (zero initial guess) + { + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles); + ConjugateGradient CG(1.0e-12, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, false); + + Meofa.refresh(Umu, RNG5); + printf(" = %1.15e\n", Meofa.S(Umu)); + } + + // Construct the action and test the heatbath (forecasted initial guesses) + { + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles); + ConjugateGradient CG(1.0e-12, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, true); + + Meofa.refresh(Umu, RNG5); + printf(" = %1.15e\n", Meofa.S(Umu)); + } + + return 0; +} diff --git a/tests/debug/Test_heatbath_mobius_eofa_gparity.cc b/tests/debug/Test_heatbath_mobius_eofa_gparity.cc new file mode 100644 index 00000000..08c6d566 --- /dev/null +++ b/tests/debug/Test_heatbath_mobius_eofa_gparity.cc @@ -0,0 +1,109 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/debug/Test_heatbath_dwf_eofa.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +////////////////////////////////////////////////////////////////////////////////////////// +// This program sets up the initial pseudofermion field |Phi> = Meofa^{-1/2}*|eta>, and +// then uses this Phi to compute the action . +// If all is working, one should find that = . +////////////////////////////////////////////////////////////////////////////////////////// + +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +typedef GparityWilsonImplR FermionImplPolicy; +typedef GparityMobiusEOFAFermionR FermionAction; +typedef typename FermionAction::FermionField FermionField; + +// Parameters for test +const std::vector grid_dim = { 8, 8, 8, 8 }; +const int Ls = 8; +const int Npoles = 12; +const RealD b = 2.5; +const RealD c = 1.5; +const RealD mf = 0.01; +const RealD mpv = 1.0; +const RealD M5 = 1.8; + +int main(int argc, char** argv) +{ + Grid_init(&argc, &argv); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is set up to use " << threads << " threads" << std::endl; + + // Initialize spacetime grid + std::cout << GridLogMessage << "Lattice dimensions: " << grid_dim << " Ls: " << Ls << std::endl; + GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim, + GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); + GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + + // Set up RNGs + std::vector seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + + // Random gauge field + LatticeGaugeField Umu(UGrid); + SU3::HotConfiguration(RNG4, Umu); + + FermionAction::ImplParams params; + FermionAction Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c, params); + FermionAction Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, b, c, params); + + // Construct the action and test the heatbath (zero initial guess) + { + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles); + ConjugateGradient CG(1.0e-12, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, false); + + Meofa.refresh(Umu, RNG5); + printf(" = %1.15e\n", Meofa.S(Umu)); + } + + // Construct the action and test the heatbath (forecasted initial guesses) + { + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles); + ConjugateGradient CG(1.0e-12, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, true); + + Meofa.refresh(Umu, RNG5); + printf(" = %1.15e\n", Meofa.S(Umu)); + } + + return 0; +} diff --git a/tests/debug/Test_reweight_dwf_eofa_gparity.cc b/tests/debug/Test_reweight_dwf_eofa_gparity.cc index b77ec33e..bb0fd98e 100644 --- a/tests/debug/Test_reweight_dwf_eofa_gparity.cc +++ b/tests/debug/Test_reweight_dwf_eofa_gparity.cc @@ -2,7 +2,7 @@ Grid physics library, www.github.com/paboyle/Grid -Source file: ./tests/debug/Test_reweight_dwf_eofa.cc +Source file: ./tests/debug/Test_reweight_dwf_eofa_gparity.cc Copyright (C) 2017 diff --git a/tests/debug/Test_reweight_mobius_eofa.cc b/tests/debug/Test_reweight_mobius_eofa.cc new file mode 100644 index 00000000..c4fa78d0 --- /dev/null +++ b/tests/debug/Test_reweight_mobius_eofa.cc @@ -0,0 +1,215 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/debug/Test_reweight_dwf_eofa.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +// parameters for test +const std::vector grid_dim = { 8, 8, 8, 8 }; +const int Ls = 8; +const int Nhits = 10; +const int max_iter = 5000; +const RealD b = 2.5; +const RealD c = 1.5; +const RealD mf = 0.1; +const RealD mb = 0.11; +const RealD M5 = 1.8; +const RealD stop_tol = 1.0e-12; + +RealD mean(const std::vector& data) +{ + int N = data.size(); + RealD mean(0.0); + for(int i=0; i& data, int sample) +{ + int N = data.size(); + RealD mean(0.0); + for(int i=0; i& jacks, RealD mean) +{ + int N = jacks.size(); + RealD std(0.0); + for(int i=0; i jack_stats(const std::vector& data) +{ + int N = data.size(); + std::vector jack_samples(N); + std::vector jack_stats(2); + + jack_stats[0] = mean(data); + for(int i=0; i seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + + // Random gauge field + LatticeGaugeField Umu(UGrid); + SU3::HotConfiguration(RNG4, Umu); + + // Initialize RHMC fermion operators + MobiusFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c); + MobiusFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5, b, c); + SchurDiagMooeeOperator MdagM(Ddwf_f); + SchurDiagMooeeOperator VdagV(Ddwf_b); + + // Degree 12 rational approximations to x^(1/4) and x^(-1/4) + double lo = 0.0001; + double hi = 95.0; + int precision = 64; + int degree = 12; + AlgRemez remez(lo, hi, precision); + std::cout << GridLogMessage << "Generating degree " << degree << " for x^(1/4)" << std::endl; + remez.generateApprox(degree, 1, 4); + MultiShiftFunction PowerQuarter(remez, stop_tol, false); + MultiShiftFunction PowerNegQuarter(remez, stop_tol, true); + + // Stochastically estimate reweighting factor via RHMC + RealD scale = std::sqrt(0.5); + std::vector rw_rhmc(Nhits); + ConjugateGradientMultiShift msCG_V(max_iter, PowerQuarter); + ConjugateGradientMultiShift msCG_M(max_iter, PowerNegQuarter); + std::cout.precision(12); + + for(int hit=0; hit tmp(2, Ddwf_f.FermionRedBlackGrid()); + gaussian(RNG5, Phi); + Phi = Phi*scale; + + pickCheckerboard(Odd, PhiOdd, Phi); + + // evaluate -log(rw) + msCG_V(VdagV, PhiOdd, tmp[0]); + msCG_M(MdagM, tmp[0], tmp[1]); + rw_rhmc[hit] = norm2(tmp[1]) - norm2(PhiOdd); + std::cout << std::endl << "==================================================" << std::endl; + std::cout << " --- RHMC: Hit " << hit << ": rw = " << rw_rhmc[hit]; + std::cout << std::endl << "==================================================" << std::endl << std::endl; + + } + + // Initialize EOFA fermion operators + RealD shift_L = 0.0; + RealD shift_R = -1.0; + int pm = 1; + MobiusEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5, b, c); + MobiusEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5, b, c); + MdagMLinearOperator LdagL(Deofa_L); + MdagMLinearOperator RdagR(Deofa_R); + + // Stochastically estimate reweighting factor via EOFA + RealD k = Deofa_L.k; + std::vector rw_eofa(Nhits); + ConjugateGradient CG(stop_tol, max_iter); + SchurRedBlackDiagMooeeSolve SchurSolver(CG); + + // Compute -log(Z), where: ( RHMC det ratio ) = Z * ( EOFA det ratio ) + RealD Z = std::pow(b+c+1.0,Ls) + mf*std::pow(b+c-1.0,Ls); + Z /= std::pow(b+c+1.0,Ls) + mb*std::pow(b+c-1.0,Ls); + Z = -12.0*grid_dim[0]*grid_dim[1]*grid_dim[2]*grid_dim[3]*std::log(Z); + + for(int hit=0; hit tmp(2, Deofa_L.FermionGrid()); + gaussian(RNG5, Phi); + Phi = Phi*scale; + + // evaluate -log(rw) + // LH term + for(int s=0; s rhmc_result = jack_stats(rw_rhmc); + std::vector eofa_result = jack_stats(rw_eofa); + std::cout << std::endl << "RHMC: rw = " << rhmc_result[0] << " +/- " << rhmc_result[1] << std::endl; + std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl; + + Grid_finalize(); +} diff --git a/tests/debug/Test_reweight_mobius_eofa_gparity.cc b/tests/debug/Test_reweight_mobius_eofa_gparity.cc new file mode 100644 index 00000000..11a242d2 --- /dev/null +++ b/tests/debug/Test_reweight_mobius_eofa_gparity.cc @@ -0,0 +1,218 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/debug/Test_reweight_dwf_eofa.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +typedef typename GparityDomainWallFermionR::FermionField FermionField; + +// parameters for test +const std::vector grid_dim = { 8, 8, 8, 8 }; +const int Ls = 8; +const int Nhits = 10; +const int max_iter = 5000; +const RealD b = 2.5; +const RealD c = 1.5; +const RealD mf = 0.1; +const RealD mb = 0.11; +const RealD M5 = 1.8; +const RealD stop_tol = 1.0e-12; + +RealD mean(const std::vector& data) +{ + int N = data.size(); + RealD mean(0.0); + for(int i=0; i& data, int sample) +{ + int N = data.size(); + RealD mean(0.0); + for(int i=0; i& jacks, RealD mean) +{ + int N = jacks.size(); + RealD std(0.0); + for(int i=0; i jack_stats(const std::vector& data) +{ + int N = data.size(); + std::vector jack_samples(N); + std::vector jack_stats(2); + + jack_stats[0] = mean(data); + for(int i=0; i seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + + // Random gauge field + LatticeGaugeField Umu(UGrid); + SU3::HotConfiguration(RNG4, Umu); + + // Initialize RHMC fermion operators + GparityDomainWallFermionR::ImplParams params; + GparityMobiusFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c, params); + GparityMobiusFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5, b, c, params); + SchurDiagMooeeOperator MdagM(Ddwf_f); + SchurDiagMooeeOperator VdagV(Ddwf_b); + + // Degree 12 rational approximations to x^(1/4) and x^(-1/4) + double lo = 0.0001; + double hi = 95.0; + int precision = 64; + int degree = 12; + AlgRemez remez(lo, hi, precision); + std::cout << GridLogMessage << "Generating degree " << degree << " for x^(1/4)" << std::endl; + remez.generateApprox(degree, 1, 4); + MultiShiftFunction PowerQuarter(remez, stop_tol, false); + MultiShiftFunction PowerNegQuarter(remez, stop_tol, true); + + // Stochastically estimate reweighting factor via RHMC + RealD scale = std::sqrt(0.5); + std::vector rw_rhmc(Nhits); + ConjugateGradientMultiShift msCG_V(max_iter, PowerQuarter); + ConjugateGradientMultiShift msCG_M(max_iter, PowerNegQuarter); + std::cout.precision(12); + + for(int hit=0; hit tmp(2, Ddwf_f.FermionRedBlackGrid()); + gaussian(RNG5, Phi); + Phi = Phi*scale; + + pickCheckerboard(Odd, PhiOdd, Phi); + + // evaluate -log(rw) + msCG_V(VdagV, PhiOdd, tmp[0]); + msCG_M(MdagM, tmp[0], tmp[1]); + rw_rhmc[hit] = norm2(tmp[1]) - norm2(PhiOdd); + std::cout << std::endl << "==================================================" << std::endl; + std::cout << " --- RHMC: Hit " << hit << ": rw = " << rw_rhmc[hit]; + std::cout << std::endl << "==================================================" << std::endl << std::endl; + + } + + // Initialize EOFA fermion operators + RealD shift_L = 0.0; + RealD shift_R = -1.0; + int pm = 1; + GparityMobiusEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5, b, c, params); + GparityMobiusEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5, b, c, params); + MdagMLinearOperator LdagL(Deofa_L); + MdagMLinearOperator RdagR(Deofa_R); + + // Stochastically estimate reweighting factor via EOFA + RealD k = Deofa_L.k; + std::vector rw_eofa(Nhits); + ConjugateGradient CG(stop_tol, max_iter); + SchurRedBlackDiagMooeeSolve SchurSolver(CG); + + // Compute -log(Z), where: ( RHMC det ratio ) = Z * ( EOFA det ratio ) + RealD Z = std::pow(b+c+1.0,Ls) + mf*std::pow(b+c-1.0,Ls); + Z /= std::pow(b+c+1.0,Ls) + mb*std::pow(b+c-1.0,Ls); + Z = -12.0*grid_dim[0]*grid_dim[1]*grid_dim[2]*grid_dim[3]*std::log(Z); + + for(int hit=0; hit tmp(2, Deofa_L.FermionGrid()); + gaussian(RNG5, Phi); + Phi = Phi*scale; + + // evaluate -log(rw) + // LH term + for(int s=0; s rhmc_result = jack_stats(rw_rhmc); + std::vector eofa_result = jack_stats(rw_eofa); + std::cout << std::endl << "RHMC: rw = " << rhmc_result[0] << " +/- " << rhmc_result[1] << std::endl; + std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl; + + Grid_finalize(); +} diff --git a/tests/forces/Test_dwf_gpforce_eofa.cc b/tests/forces/Test_dwf_gpforce_eofa.cc index 50789c89..3afeaa43 100644 --- a/tests/forces/Test_dwf_gpforce_eofa.cc +++ b/tests/forces/Test_dwf_gpforce_eofa.cc @@ -71,9 +71,9 @@ int main (int argc, char** argv) int threads = GridThread::GetThreads(); std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; - LatticeFermion phi (FGrid); gaussian(RNG5, phi); - LatticeFermion Mphi (FGrid); - LatticeFermion MphiPrime (FGrid); + FermionField phi (FGrid); gaussian(RNG5, phi); + FermionField Mphi (FGrid); + FermionField MphiPrime (FGrid); LatticeGaugeField U(UGrid); SU3::HotConfiguration(RNG4,U); diff --git a/tests/forces/Test_mobius_force_eofa.cc b/tests/forces/Test_mobius_force_eofa.cc new file mode 100644 index 00000000..2a5a7d04 --- /dev/null +++ b/tests/forces/Test_mobius_force_eofa.cc @@ -0,0 +1,166 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/forces/Test_dwf_force_eofa.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char** argv) +{ + Grid_init(&argc, &argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + const int Ls = 8; + + GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); + GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + + // Want a different conf at every run + // First create an instance of an engine. + std::random_device rnd_device; + // Specify the engine and distribution. + std::mt19937 mersenne_engine(rnd_device()); + std::uniform_int_distribution dist(1, 100); + + auto gen = std::bind(dist, mersenne_engine); + std::vector seeds4(4); + generate(begin(seeds4), end(seeds4), gen); + + //std::vector seeds4({1,2,3,5}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + LatticeFermion phi (FGrid); gaussian(RNG5, phi); + LatticeFermion Mphi (FGrid); + LatticeFermion MphiPrime (FGrid); + + LatticeGaugeField U(UGrid); + SU3::HotConfiguration(RNG4,U); + + //////////////////////////////////// + // Unmodified matrix element + //////////////////////////////////// + RealD b = 2.5; + RealD c = 1.5; + RealD mf = 0.01; + RealD mb = 1.0; + RealD M5 = 1.8; + MobiusEOFAFermionR Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5, b, c); + MobiusEOFAFermionR Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, b, c); + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12); + ConjugateGradient CG(1.0e-12, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, false); + + Meofa.refresh(U, RNG5); + RealD S = Meofa.S(U); // pdag M p + + // get the deriv of phidag M phi with respect to "U" + LatticeGaugeField UdSdU(UGrid); + Meofa.deriv(U, UdSdU); + + //////////////////////////////////// + // Modify the gauge field a little + //////////////////////////////////// + RealD dt = 0.0001; + + LatticeColourMatrix mommu(UGrid); + LatticeColourMatrix forcemu(UGrid); + LatticeGaugeField mom(UGrid); + LatticeGaugeField Uprime(UGrid); + + for(int mu=0; mu(mom, mommu, mu); + + // fourth order exponential approx + parallel_for(auto i=mom.begin(); i(UdSdU, mu); + mommu = Ta(mommu)*2.0; + PokeIndex(UdSdU, mommu, mu); + } + + for(int mu=0; mu(UdSdU, mu); + mommu = PeekIndex(mom, mu); + + // Update PF action density + dS = dS + trace(mommu*forcemu)*dt; + } + + ComplexD dSpred = sum(dS); + + /*std::cout << GridLogMessage << " S " << S << std::endl; + std::cout << GridLogMessage << " Sprime " << Sprime << std::endl; + std::cout << GridLogMessage << "dS " << Sprime-S << std::endl; + std::cout << GridLogMessage << "predict dS " << dSpred << std::endl;*/ + printf("\nS = %1.15e\n", S); + printf("Sprime = %1.15e\n", Sprime); + printf("dS = %1.15e\n", Sprime - S); + printf("real(dS_predict) = %1.15e\n", dSpred.real()); + printf("imag(dS_predict) = %1.15e\n\n", dSpred.imag()); + + assert( fabs(real(Sprime-S-dSpred)) < 1.0 ) ; + + std::cout << GridLogMessage << "Done" << std::endl; + Grid_finalize(); +} diff --git a/tests/forces/Test_mobius_gpforce_eofa.cc b/tests/forces/Test_mobius_gpforce_eofa.cc new file mode 100644 index 00000000..72f1dee2 --- /dev/null +++ b/tests/forces/Test_mobius_gpforce_eofa.cc @@ -0,0 +1,171 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/forces/Test_dwf_force_eofa.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +typedef GparityWilsonImplR FermionImplPolicy; +typedef GparityMobiusEOFAFermionR FermionAction; +typedef typename FermionAction::FermionField FermionField; + +int main (int argc, char** argv) +{ + Grid_init(&argc, &argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + const int Ls = 8; + + GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); + GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + + // Want a different conf at every run + // First create an instance of an engine. + std::random_device rnd_device; + // Specify the engine and distribution. + std::mt19937 mersenne_engine(rnd_device()); + std::uniform_int_distribution dist(1, 100); + + auto gen = std::bind(dist, mersenne_engine); + std::vector seeds4(4); + generate(begin(seeds4), end(seeds4), gen); + + //std::vector seeds4({1,2,3,5}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + FermionField phi (FGrid); gaussian(RNG5, phi); + FermionField Mphi (FGrid); + FermionField MphiPrime (FGrid); + + LatticeGaugeField U(UGrid); + SU3::HotConfiguration(RNG4,U); + + //////////////////////////////////// + // Unmodified matrix element + //////////////////////////////////// + RealD b = 2.5; + RealD c = 1.5; + RealD mf = 0.01; + RealD mb = 1.0; + RealD M5 = 1.8; + FermionAction::ImplParams params; + FermionAction Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5, b, c, params); + FermionAction Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, b, c, params); + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12); + ConjugateGradient CG(1.0e-12, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, false); + + Meofa.refresh(U, RNG5); + RealD S = Meofa.S(U); // pdag M p + + // get the deriv of phidag M phi with respect to "U" + LatticeGaugeField UdSdU(UGrid); + Meofa.deriv(U, UdSdU); + + //////////////////////////////////// + // Modify the gauge field a little + //////////////////////////////////// + RealD dt = 0.0001; + + LatticeColourMatrix mommu(UGrid); + LatticeColourMatrix forcemu(UGrid); + LatticeGaugeField mom(UGrid); + LatticeGaugeField Uprime(UGrid); + + for(int mu=0; mu(mom, mommu, mu); + + // fourth order exponential approx + parallel_for(auto i=mom.begin(); i(UdSdU, mu); + mommu = Ta(mommu)*2.0; + PokeIndex(UdSdU, mommu, mu); + } + + for(int mu=0; mu(UdSdU, mu); + mommu = PeekIndex(mom, mu); + + // Update PF action density + dS = dS + trace(mommu*forcemu)*dt; + } + + ComplexD dSpred = sum(dS); + + /*std::cout << GridLogMessage << " S " << S << std::endl; + std::cout << GridLogMessage << " Sprime " << Sprime << std::endl; + std::cout << GridLogMessage << "dS " << Sprime-S << std::endl; + std::cout << GridLogMessage << "predict dS " << dSpred << std::endl;*/ + printf("\nS = %1.15e\n", S); + printf("Sprime = %1.15e\n", Sprime); + printf("dS = %1.15e\n", Sprime - S); + printf("real(dS_predict) = %1.15e\n", dSpred.real()); + printf("imag(dS_predict) = %1.15e\n\n", dSpred.imag()); + + assert( fabs(real(Sprime-S-dSpred)) < 1.0 ) ; + + std::cout << GridLogMessage << "Done" << std::endl; + Grid_finalize(); +} From b61835c1a53ab6077840aabdb3ef8e77ea161008 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Wed, 23 Aug 2017 12:33:48 -0400 Subject: [PATCH 094/377] Added inplace version of intrinsic G-parity twist to hand-unrolled kernel --- lib/qcd/action/fermion/WilsonKernelsHand.cc | 90 ++++++++++++++++++++- 1 file changed, 86 insertions(+), 4 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index 866e30d2..045a2cda 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -47,9 +47,6 @@ Author: paboyle #define LOAD_CHIMU(DIR,F) \ { const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); } -#define LOAD_CHIMU_GPARITY(DIR,F) \ - { int g; const SiteSpinor & ref = GparityGetChi(g,in._odata.data(),DIR,F,SE,st); LOAD_CHIMU_BODY(g); } - #define LOAD_CHI_BODY(F) \ Chi_00 = ref(F)(0)(0);\ Chi_01 = ref(F)(0)(1);\ @@ -61,10 +58,95 @@ Author: paboyle #define LOAD_CHI(DIR,F) \ {const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); } -#define LOAD_CHI_GPARITY(DIR,F) \ + +//G-parity implementations using implementation method +#define LOAD_CHIMU_GPARITY_IMPL(DIR,F) \ + { int g; const SiteSpinor & ref = GparityGetChi(g,in._odata.data(),DIR,F,SE,st); LOAD_CHIMU_BODY(g); } + +#define LOAD_CHI_GPARITY_IMPL(DIR,F) \ { int g; const SiteHalfSpinor &ref = GparityGetChi(g,buf,DIR,F,SE,st); LOAD_CHI_BODY(g); } +//G-parity implementations using in-place intrinsic ops + +//1l 1h -> 1h 1l +//0l 0h , 1h 1l -> 0l 1h 0h,1l +//0h,1l -> 1l,0h +//if( (distance == 1 && !perm_will_occur) || (distance == -1 && perm_will_occur) ) +//Pulled fermion through forwards face, GPBC on upper component +//Need 0= 0l 1h 1= 1l 0h +//else if( (distance == -1 && !perm) || (distance == 1 && perm) ) +//Pulled fermion through backwards face, GPBC on lower component +//Need 0= 1l 0h 1= 0l 1h +#define DO_TWIST(INTO,S,C,F, tmp1, tmp2, tmp3, tmp4) \ + permute(tmp1, ref(1)(S)(C), permute_type); \ + exchange(tmp2,tmp3, ref(0)(S)(C), tmp1, permute_type); \ + permute(tmp4, tmp3, permute_type); \ + if( (distance == 1 && !perm) || (distance == -1 && perm) ){ \ + INTO = F == 0 ? tmp2 : tmp4; \ + }else if( (distance == -1 && !perm) || (distance == 1 && perm) ){ \ + INTO = F == 0 ? tmp4 : tmp2; \ + } + +#define LOAD_CHI_SETUP(DIR,F) \ + int g = F; \ + const int direction = st._directions[DIR]; \ + const int distance = st._distances[DIR]; \ + const int sl = st._grid->_simd_layout[direction]; \ + int inplace_twist = 0; \ + if(SE->_around_the_world && this->Params.twists[DIR % 4]){ \ + if(sl == 1){ \ + g = (F+1) % 2; \ + }else{ \ + inplace_twist = 1; \ + } \ + } + +#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F) \ + { const SiteSpinor &ref(in._odata[offset]); \ + LOAD_CHI_SETUP(DIR,F); \ + if(!inplace_twist){ \ + LOAD_CHIMU_BODY(g); \ + }else{ \ + const int permute_type = st._grid->PermuteType(direction); \ + DO_TWIST(Chimu_00,0,0,F, U_00,U_01,U_10,U_11); \ + DO_TWIST(Chimu_01,0,1,F, U_20,U_21,U_00,U_01); \ + DO_TWIST(Chimu_02,0,2,F, U_10,U_11,U_20,U_21); \ + DO_TWIST(Chimu_10,1,0,F, U_00,U_01,U_10,U_11); \ + DO_TWIST(Chimu_11,1,1,F, U_20,U_21,U_00,U_01); \ + DO_TWIST(Chimu_12,1,2,F, U_10,U_11,U_20,U_21); \ + DO_TWIST(Chimu_20,2,0,F, U_00,U_01,U_10,U_11); \ + DO_TWIST(Chimu_21,2,1,F, U_20,U_21,U_00,U_01); \ + DO_TWIST(Chimu_22,2,2,F, U_10,U_11,U_20,U_21); \ + DO_TWIST(Chimu_30,3,0,F, U_00,U_01,U_10,U_11); \ + DO_TWIST(Chimu_31,3,1,F, U_20,U_21,U_00,U_01); \ + DO_TWIST(Chimu_32,3,2,F, U_10,U_11,U_20,U_21); \ + } \ + } + + +#define LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F) \ + { const SiteHalfSpinor &ref(buf[offset]); \ + LOAD_CHI_SETUP(DIR,F); \ + if(!inplace_twist){ \ + LOAD_CHI_BODY(g); \ + }else{ \ + const int permute_type = st._grid->PermuteType(direction); \ + DO_TWIST(Chi_00,0,0,F, U_00,U_01,U_10,U_11); \ + DO_TWIST(Chi_01,0,1,F, U_20,U_21,UChi_00,UChi_01); \ + DO_TWIST(Chi_02,0,2,F, UChi_02,UChi_10,UChi_11,UChi_12); \ + DO_TWIST(Chi_10,1,0,F, U_00,U_01,U_10,U_11); \ + DO_TWIST(Chi_11,1,1,F, U_20,U_21,UChi_00,UChi_01); \ + DO_TWIST(Chi_12,1,2,F, UChi_02,UChi_10,UChi_11,UChi_12); \ + } \ + } + +//#define LOAD_CHI_GPARITY(DIR,F) LOAD_CHI_GPARITY_IMPL(DIR,F) +#define LOAD_CHI_GPARITY(DIR,F) LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F) + +//#define LOAD_CHIMU_GPARITY(DIR,F) LOAD_CHIMU_GPARITY_IMPL(DIR,F) +#define LOAD_CHIMU_GPARITY(DIR,F) LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F) + // To splat or not to splat depends on the implementation #define MULT_2SPIN_BODY \ Impl::loadLinkElement(U_00,ref()(0,0)); \ From dd8f1ea189b3c9998b9c94bbe595ce8b89efc2ae Mon Sep 17 00:00:00 2001 From: David Murphy Date: Wed, 23 Aug 2017 13:17:26 -0400 Subject: [PATCH 095/377] Vectorized Mobius EOFA Dperp + shift operation --- .../action/fermion/MobiusEOFAFermionvec.cc | 367 +++++++++++++++++- 1 file changed, 348 insertions(+), 19 deletions(-) diff --git a/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc b/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc index 59544e5a..c4eaf0f3 100644 --- a/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc +++ b/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc @@ -213,15 +213,180 @@ namespace QCD { FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, std::vector& shift_coeffs) { - this->M5D(psi, phi, chi, lower, diag, upper); + #if 0 - // FIXME: possible gain from vectorizing shift operation as well? - Coeff_t one(1.0); - int Ls = this->Ls; - for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); } - else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); } - } + this->M5D(psi, phi, chi, lower, diag, upper); + + // FIXME: possible gain from vectorizing shift operation as well? + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); } + else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); } + } + + #else + + GridBase* grid = psi._grid; + int Ls = this->Ls; + int LLs = grid->_rdimensions[0]; + const int nsimd = Simd::Nsimd(); + + Vector> u(LLs); + Vector> l(LLs); + Vector> d(LLs); + Vector> s(LLs); + + assert(Ls/LLs == nsimd); + assert(phi.checkerboard == psi.checkerboard); + + chi.checkerboard = psi.checkerboard; + + // just directly address via type pun + typedef typename Simd::scalar_type scalar_type; + scalar_type* u_p = (scalar_type*) &u[0]; + scalar_type* l_p = (scalar_type*) &l[0]; + scalar_type* d_p = (scalar_type*) &d[0]; + scalar_type* s_p = (scalar_type*) &s[0]; + + for(int o=0; oM5Dcalls++; + this->M5Dtime -= usecond(); + + assert(Nc == 3); + + parallel_for(int ss=0; ssoSites(); ss+=LLs){ // adds LLs + + int vs = (this->pm == 1) ? LLs-1 : 0; + Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(2)(0) : psi[ss+vs]()(0)(0); + Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(2)(1) : psi[ss+vs]()(0)(1); + Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(2)(2) : psi[ss+vs]()(0)(2); + Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(3)(0) : psi[ss+vs]()(1)(0); + Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(3)(1) : psi[ss+vs]()(1)(1); + Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(3)(2) : psi[ss+vs]()(1)(2); + + for(int v=0; v(hp_00.v); + hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); + hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); + hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); + hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); + hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); + } + + if(this->pm == 1 && vs <= v){ + hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v); + hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v); + hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v); + hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v); + hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v); + hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v); + } + + if(vm >= v){ + hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); + hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); + hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); + hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); + hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); + hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); + } + + if(this->pm == -1 && vs >= v){ + hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v); + hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v); + hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v); + hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v); + hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v); + hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v); + } + + // Can force these to real arithmetic and save 2x. + Simd p_00 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(), hm_00) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(), hm_00) + + switcheroo::mult(s[v]()()(), hs_00); + Simd p_01 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(), hm_01) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(), hm_01) + + switcheroo::mult(s[v]()()(), hs_01); + Simd p_02 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(), hm_02) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(), hm_02) + + switcheroo::mult(s[v]()()(), hs_02); + Simd p_10 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(), hm_10) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(), hm_10) + + switcheroo::mult(s[v]()()(), hs_10); + Simd p_11 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(), hm_11) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(), hm_11) + + switcheroo::mult(s[v]()()(), hs_11); + Simd p_12 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(), hm_12) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(), hm_12) + + switcheroo::mult(s[v]()()(), hs_12); + Simd p_20 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(), hp_00) + + switcheroo::mult(s[v]()()(), hs_00) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(), hp_00); + Simd p_21 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(), hp_01) + + switcheroo::mult(s[v]()()(), hs_01) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(), hp_01); + Simd p_22 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(), hp_02) + + switcheroo::mult(s[v]()()(), hs_02) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(), hp_02); + Simd p_30 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(), hp_10) + + switcheroo::mult(s[v]()()(), hs_10) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(), hp_10); + Simd p_31 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(), hp_11) + + switcheroo::mult(s[v]()()(), hs_11) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(), hp_11); + Simd p_32 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(), hp_12) + + switcheroo::mult(s[v]()()(), hs_12) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(), hp_12); + + vstream(chi[ss+v]()(0)(0), p_00); + vstream(chi[ss+v]()(0)(1), p_01); + vstream(chi[ss+v]()(0)(2), p_02); + vstream(chi[ss+v]()(1)(0), p_10); + vstream(chi[ss+v]()(1)(1), p_11); + vstream(chi[ss+v]()(1)(2), p_12); + vstream(chi[ss+v]()(2)(0), p_20); + vstream(chi[ss+v]()(2)(1), p_21); + vstream(chi[ss+v]()(2)(2), p_22); + vstream(chi[ss+v]()(3)(0), p_30); + vstream(chi[ss+v]()(3)(1), p_31); + vstream(chi[ss+v]()(3)(2), p_32); + } + } + + this->M5Dtime += usecond(); + + #endif } template @@ -233,9 +398,9 @@ namespace QCD { int LLs = grid->_rdimensions[0]; int nsimd = Simd::Nsimd(); - Vector > u(LLs); - Vector > l(LLs); - Vector > d(LLs); + Vector> u(LLs); + Vector> l(LLs); + Vector> d(LLs); assert(Ls/LLs == nsimd); assert(phi.checkerboard == psi.checkerboard); @@ -371,15 +536,179 @@ namespace QCD { FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, std::vector& shift_coeffs) { - this->M5Ddag(psi, phi, chi, lower, diag, upper); + #if 0 - // FIXME: possible gain from vectorizing shift operation as well? - Coeff_t one(1.0); - int Ls = this->Ls; - for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); } - else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); } - } + this->M5Ddag(psi, phi, chi, lower, diag, upper); + + // FIXME: possible gain from vectorizing shift operation as well? + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); } + else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); } + } + + #else + + GridBase* grid = psi._grid; + int Ls = this->Ls; + int LLs = grid->_rdimensions[0]; + int nsimd = Simd::Nsimd(); + + Vector> u(LLs); + Vector> l(LLs); + Vector> d(LLs); + Vector> s(LLs); + + assert(Ls/LLs == nsimd); + assert(phi.checkerboard == psi.checkerboard); + + chi.checkerboard = psi.checkerboard; + + // just directly address via type pun + typedef typename Simd::scalar_type scalar_type; + scalar_type* u_p = (scalar_type*) &u[0]; + scalar_type* l_p = (scalar_type*) &l[0]; + scalar_type* d_p = (scalar_type*) &d[0]; + scalar_type* s_p = (scalar_type*) &s[0]; + + for(int o=0; oM5Dcalls++; + this->M5Dtime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=LLs){ // adds LLs + + int vs = (this->pm == 1) ? LLs-1 : 0; + Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(0)(0) : psi[ss+vs]()(2)(0); + Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(0)(1) : psi[ss+vs]()(2)(1); + Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(0)(2) : psi[ss+vs]()(2)(2); + Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(1)(0) : psi[ss+vs]()(3)(0); + Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(1)(1) : psi[ss+vs]()(3)(1); + Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(1)(2) : psi[ss+vs]()(3)(2); + + for(int v=0; v(hp_00.v); + hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); + hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); + hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); + hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); + hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); + } + + if(this->pm == 1 && vs <= v){ + hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v); + hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v); + hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v); + hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v); + hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v); + hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v); + } + + if(vm >= v){ + hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); + hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); + hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); + hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); + hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); + hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); + } + + if(this->pm == -1 && vs >= v){ + hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v); + hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v); + hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v); + hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v); + hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v); + hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v); + } + + Simd p_00 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(), hp_00) + + switcheroo::mult(s[v]()()(), hs_00) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(), hp_00); + Simd p_01 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(), hp_01) + + switcheroo::mult(s[v]()()(), hs_01) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(), hp_01); + Simd p_02 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(), hp_02) + + switcheroo::mult(s[v]()()(), hs_02) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(), hp_02); + Simd p_10 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(), hp_10) + + switcheroo::mult(s[v]()()(), hs_10) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(), hp_10); + Simd p_11 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(), hp_11) + + switcheroo::mult(s[v]()()(), hs_11) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(), hp_11); + Simd p_12 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(), hp_12) + + switcheroo::mult(s[v]()()(), hs_12) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(), hp_12); + Simd p_20 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(), hm_00) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(), hm_00) + + switcheroo::mult(s[v]()()(), hs_00); + Simd p_21 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(), hm_01) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(), hm_01) + + switcheroo::mult(s[v]()()(), hs_01); + Simd p_22 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(), hm_02) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(), hm_02) + + switcheroo::mult(s[v]()()(), hs_02); + Simd p_30 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(), hm_10) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(), hm_10) + + switcheroo::mult(s[v]()()(), hs_10); + Simd p_31 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(), hm_11) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(), hm_11) + + switcheroo::mult(s[v]()()(), hs_11); + Simd p_32 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(), hm_12) + : switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(), hm_12) + + switcheroo::mult(s[v]()()(), hs_12); + + vstream(chi[ss+v]()(0)(0), p_00); + vstream(chi[ss+v]()(0)(1), p_01); + vstream(chi[ss+v]()(0)(2), p_02); + vstream(chi[ss+v]()(1)(0), p_10); + vstream(chi[ss+v]()(1)(1), p_11); + vstream(chi[ss+v]()(1)(2), p_12); + vstream(chi[ss+v]()(2)(0), p_20); + vstream(chi[ss+v]()(2)(1), p_21); + vstream(chi[ss+v]()(2)(2), p_22); + vstream(chi[ss+v]()(3)(0), p_30); + vstream(chi[ss+v]()(3)(1), p_31); + vstream(chi[ss+v]()(3)(2), p_32); + + } + + } + + this->M5Dtime += usecond(); + + #endif } #ifdef AVX512 From 46f88e6d726c238b84a9b05894f7157cb6f5c2a3 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Wed, 23 Aug 2017 13:21:10 -0400 Subject: [PATCH 096/377] G-parity hand-unrolled intrinsics twist now uses one less permute and one less temporary --- lib/qcd/action/fermion/WilsonKernelsHand.cc | 66 +++++++++++++-------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index 045a2cda..6e03379e 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -78,16 +78,30 @@ Author: paboyle //else if( (distance == -1 && !perm) || (distance == 1 && perm) ) //Pulled fermion through backwards face, GPBC on lower component //Need 0= 1l 0h 1= 0l 1h -#define DO_TWIST(INTO,S,C,F, tmp1, tmp2, tmp3, tmp4) \ + +//1l 1h -> 1h 1l +//0l 0h , 1h 1l -> 0l 1h 0h,1l +#define DO_TWIST_0L_1H(INTO,S,C,F, tmp1, tmp2, tmp3) \ permute(tmp1, ref(1)(S)(C), permute_type); \ exchange(tmp2,tmp3, ref(0)(S)(C), tmp1, permute_type); \ - permute(tmp4, tmp3, permute_type); \ - if( (distance == 1 && !perm) || (distance == -1 && perm) ){ \ - INTO = F == 0 ? tmp2 : tmp4; \ - }else if( (distance == -1 && !perm) || (distance == 1 && perm) ){ \ - INTO = F == 0 ? tmp4 : tmp2; \ + INTO = tmp2; + +//0l 0h -> 0h 0l +//1l 1h, 0h 0l -> 1l 0h, 1h 0l +#define DO_TWIST_1L_0H(INTO,S,C,F, tmp1, tmp2, tmp3) \ + permute(tmp1, ref(0)(S)(C), permute_type); \ + exchange(tmp2,tmp3, ref(1)(S)(C), tmp1, permute_type); \ + INTO = tmp2; + +#define DO_TWIST(INTO,S,C,F, tmp1, tmp2, tmp3) \ + if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \ + ( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \ + DO_TWIST_0L_1H(INTO,S,C,F,tmp1,tmp2,tmp3); \ + }else{ \ + DO_TWIST_1L_0H(INTO,S,C,F,tmp1,tmp2,tmp3); \ } + #define LOAD_CHI_SETUP(DIR,F) \ int g = F; \ const int direction = st._directions[DIR]; \ @@ -109,18 +123,18 @@ Author: paboyle LOAD_CHIMU_BODY(g); \ }else{ \ const int permute_type = st._grid->PermuteType(direction); \ - DO_TWIST(Chimu_00,0,0,F, U_00,U_01,U_10,U_11); \ - DO_TWIST(Chimu_01,0,1,F, U_20,U_21,U_00,U_01); \ - DO_TWIST(Chimu_02,0,2,F, U_10,U_11,U_20,U_21); \ - DO_TWIST(Chimu_10,1,0,F, U_00,U_01,U_10,U_11); \ - DO_TWIST(Chimu_11,1,1,F, U_20,U_21,U_00,U_01); \ - DO_TWIST(Chimu_12,1,2,F, U_10,U_11,U_20,U_21); \ - DO_TWIST(Chimu_20,2,0,F, U_00,U_01,U_10,U_11); \ - DO_TWIST(Chimu_21,2,1,F, U_20,U_21,U_00,U_01); \ - DO_TWIST(Chimu_22,2,2,F, U_10,U_11,U_20,U_21); \ - DO_TWIST(Chimu_30,3,0,F, U_00,U_01,U_10,U_11); \ - DO_TWIST(Chimu_31,3,1,F, U_20,U_21,U_00,U_01); \ - DO_TWIST(Chimu_32,3,2,F, U_10,U_11,U_20,U_21); \ + DO_TWIST(Chimu_00,0,0,F, U_00,U_01,U_10); \ + DO_TWIST(Chimu_01,0,1,F, U_11,U_20,U_21); \ + DO_TWIST(Chimu_02,0,2,F, U_00,U_01,U_10); \ + DO_TWIST(Chimu_10,1,0,F, U_11,U_20,U_21); \ + DO_TWIST(Chimu_11,1,1,F, U_00,U_01,U_10); \ + DO_TWIST(Chimu_12,1,2,F, U_11,U_20,U_21); \ + DO_TWIST(Chimu_20,2,0,F, U_00,U_01,U_10); \ + DO_TWIST(Chimu_21,2,1,F, U_11,U_20,U_21); \ + DO_TWIST(Chimu_22,2,2,F, U_00,U_01,U_10); \ + DO_TWIST(Chimu_30,3,0,F, U_11,U_20,U_21); \ + DO_TWIST(Chimu_31,3,1,F, U_00,U_01,U_10); \ + DO_TWIST(Chimu_32,3,2,F, U_11,U_20,U_21); \ } \ } @@ -132,15 +146,19 @@ Author: paboyle LOAD_CHI_BODY(g); \ }else{ \ const int permute_type = st._grid->PermuteType(direction); \ - DO_TWIST(Chi_00,0,0,F, U_00,U_01,U_10,U_11); \ - DO_TWIST(Chi_01,0,1,F, U_20,U_21,UChi_00,UChi_01); \ - DO_TWIST(Chi_02,0,2,F, UChi_02,UChi_10,UChi_11,UChi_12); \ - DO_TWIST(Chi_10,1,0,F, U_00,U_01,U_10,U_11); \ - DO_TWIST(Chi_11,1,1,F, U_20,U_21,UChi_00,UChi_01); \ - DO_TWIST(Chi_12,1,2,F, UChi_02,UChi_10,UChi_11,UChi_12); \ + DO_TWIST(Chi_00,0,0,F, U_00,U_01,U_10); \ + DO_TWIST(Chi_01,0,1,F, U_11,U_20,U_21); \ + DO_TWIST(Chi_02,0,2,F, UChi_00,UChi_01,UChi_02); \ + DO_TWIST(Chi_10,1,0,F, UChi_10,UChi_11,UChi_12); \ + DO_TWIST(Chi_11,1,1,F, U_00,U_01,U_10); \ + DO_TWIST(Chi_12,1,2,F, U_11,U_20,U_21); \ } \ } + + + + //#define LOAD_CHI_GPARITY(DIR,F) LOAD_CHI_GPARITY_IMPL(DIR,F) #define LOAD_CHI_GPARITY(DIR,F) LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F) From a0bb8e5b4660634db2b4a7cf994b2665926b24e9 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Wed, 23 Aug 2017 14:44:40 -0400 Subject: [PATCH 097/377] Added hand-unrolled kernel implementations of all the other dslash precision / comms precision combinations with G-parity --- lib/qcd/action/fermion/WilsonKernelsHand.cc | 213 ++++++++++---------- 1 file changed, 103 insertions(+), 110 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index 6e03379e..ea04845e 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -786,120 +786,113 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D const FermionField &in, \ FermionField &out){ assert(0); } \ - HAND_SPECIALISE_EMPTY(GparityWilsonImplF); - //HAND_SPECIALISE_EMPTY(GparityWilsonImplD); - HAND_SPECIALISE_EMPTY(GparityWilsonImplFH); - HAND_SPECIALISE_EMPTY(GparityWilsonImplDF); +#define HAND_SPECIALISE_GPARITY(IMPL) \ + template<> void \ + WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionField &in, FermionField &out) \ + { \ + typedef IMPL Impl; \ + typedef typename Simd::scalar_type S; \ + typedef typename Simd::vector_type V; \ + \ + HAND_DECLARATIONS(ignore); \ + \ + int offset,local,perm, ptype; \ + StencilEntry *SE; \ + HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ + HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ + } \ + \ + template<> \ + void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionField &in, FermionField &out) \ + { \ + typedef IMPL Impl; \ + typedef typename Simd::scalar_type S; \ + typedef typename Simd::vector_type V; \ + \ + HAND_DECLARATIONS(ignore); \ + \ + StencilEntry *SE; \ + int offset,local,perm, ptype; \ + HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ + HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ + } \ + \ + template<> void \ + WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionField &in, FermionField &out) \ + { \ + typedef IMPL Impl; \ + typedef typename Simd::scalar_type S; \ + typedef typename Simd::vector_type V; \ + \ + HAND_DECLARATIONS(ignore); \ + \ + int offset,local,perm, ptype; \ + StencilEntry *SE; \ + HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ + HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ + } \ + \ + template<> \ + void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionField &in, FermionField &out) \ + { \ + typedef IMPL Impl; \ + typedef typename Simd::scalar_type S; \ + typedef typename Simd::vector_type V; \ + \ + HAND_DECLARATIONS(ignore); \ + \ + StencilEntry *SE; \ + int offset,local,perm, ptype; \ + HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ + HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ + } \ + \ + template<> void \ + WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionField &in, FermionField &out) \ + { \ + typedef IMPL Impl; \ + typedef typename Simd::scalar_type S; \ + typedef typename Simd::vector_type V; \ + \ + HAND_DECLARATIONS(ignore); \ + \ + int offset,local,perm, ptype; \ + StencilEntry *SE; \ + int nmu=0; \ + HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ + nmu = 0; \ + HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ + } \ + template<> \ + void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionField &in, FermionField &out) \ + { \ + typedef IMPL Impl; \ + typedef typename Simd::scalar_type S; \ + typedef typename Simd::vector_type V; \ + \ + HAND_DECLARATIONS(ignore); \ + \ + StencilEntry *SE; \ + int offset,local,perm, ptype; \ + int nmu=0; \ + HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ + nmu = 0; \ + HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ + } - -template<> void -WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) -{ -// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - typedef GparityWilsonImplD Impl; - typedef typename Simd::scalar_type S; - typedef typename Simd::vector_type V; - - HAND_DECLARATIONS(ignore); - - int offset,local,perm, ptype; - StencilEntry *SE; - HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); - HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); -} - -template<> -void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) -{ - typedef GparityWilsonImplD Impl; - typedef typename Simd::scalar_type S; - typedef typename Simd::vector_type V; - - HAND_DECLARATIONS(ignore); - - StencilEntry *SE; - int offset,local,perm, ptype; - HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); - HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); -} - -template<> void -WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) -{ -// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - typedef GparityWilsonImplD Impl; - typedef typename Simd::scalar_type S; - typedef typename Simd::vector_type V; - - HAND_DECLARATIONS(ignore); - - int offset,local,perm, ptype; - StencilEntry *SE; - HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); - HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); -} - -template<> -void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) -{ - typedef GparityWilsonImplD Impl; - typedef typename Simd::scalar_type S; - typedef typename Simd::vector_type V; - - HAND_DECLARATIONS(ignore); - - StencilEntry *SE; - int offset,local,perm, ptype; - HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); - HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); -} - -template<> void -WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) -{ -// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - typedef GparityWilsonImplD Impl; - typedef typename Simd::scalar_type S; - typedef typename Simd::vector_type V; - - HAND_DECLARATIONS(ignore); - - int offset,local,perm, ptype; - StencilEntry *SE; - int nmu=0; - HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); - nmu = 0; - HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); -} - -template<> -void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) -{ - typedef GparityWilsonImplD Impl; - typedef typename Simd::scalar_type S; - typedef typename Simd::vector_type V; - - HAND_DECLARATIONS(ignore); - - StencilEntry *SE; - int offset,local,perm, ptype; - int nmu=0; - HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); - nmu = 0; - HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); -} - - - +HAND_SPECIALISE_GPARITY(GparityWilsonImplF); +HAND_SPECIALISE_GPARITY(GparityWilsonImplD); +HAND_SPECIALISE_GPARITY(GparityWilsonImplFH); +HAND_SPECIALISE_GPARITY(GparityWilsonImplDF); From ce5df177eeccee214f3f394bbdec385f7c19caeb Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Wed, 23 Aug 2017 15:05:22 -0400 Subject: [PATCH 098/377] Removed superfluous implementation of G-parity twist for hand-unrolled kernel from GparityWilsonImpl --- lib/qcd/action/fermion/FermionOperatorImpl.h | 85 +------------------- lib/qcd/action/fermion/WilsonKernelsHand.cc | 14 ---- 2 files changed, 1 insertion(+), 98 deletions(-) diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 5300063b..ffb82989 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -478,10 +478,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl > tmp_full; - std::vector > tmp_half; - - GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p), tmp_full(GridThread::GetThreads()), tmp_half(GridThread::GetThreads()){}; + GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; @@ -563,86 +560,6 @@ class GparityWilsonImpl : public ConjugateGaugeImpl - void GparityTwistPermute(SiteSpinorType &into, const SiteSpinorType &from, const int direction, const int distance, const int perm, GridBase* grid){ -#if 0 - typedef typename SiteSpinorType::scalar_object sobj; - sobj stmp; - std::vector vals(grid->Nsimd()); - extract(from,vals); - std::vector icoor; - for(int s=0;sNsimd();s++){ - grid->iCoorFromIindex(icoor,s); - assert((icoor[direction]==0)||(icoor[direction]==1)); - - int permute_lane; - if ( distance == 1) { - permute_lane = icoor[direction]?1:0; - } else { - permute_lane = icoor[direction]?0:1; - } - if(perm) permute_lane = !permute_lane; - - if ( permute_lane ) { - stmp(0) = vals[s](1); - stmp(1) = vals[s](0); - vals[s] = stmp; - } - } - merge(into,vals); -#else - int permute_type = grid->PermuteType(direction); - typedef typename SiteSpinorType::vector_type vtype; - vtype tmp1, tmp2, tmp3, tmp4; - - for(int s=0;s 1h 1l - exchange(tmp2,tmp3, from(0)(s)(c), tmp1, permute_type); // 0l 0h , 1h 1l -> 0l 1h 0h,1l - permute(tmp4, tmp3, permute_type); //0h,1l -> 1l,0h - - if( (distance == 1 && !perm) || (distance == -1 && perm) ){ - //Pulled fermion through forwards face, GPBC on upper component - //Need 0= 0l 1h 1= 1l 0h - into(0)(s)(c) = tmp2; - into(1)(s)(c) = tmp4; - }else if( (distance == -1 && !perm) || (distance == 1 && perm) ){ - //Pulled fermion through backwards face, GPBC on lower component - //Need 0= 1l 0h 1= 0l 1h - into(0)(s)(c) = tmp4; - into(1)(s)(c) = tmp2; - }else assert(0); - } - } -#endif - } - - - template - const SiteSpinorType & GparityGetChi(int &g, SiteSpinorType const* in, const int dir, const int f, StencilEntry *SE, StencilImpl &st){ - const int mmu = dir % 4; - const int direction = st._directions[dir]; - const int sl = st._grid->_simd_layout[direction]; - const int perm = SE->_permute; - g = f; - - if(SE->_around_the_world && Params.twists[mmu]){ - if(sl == 1){ //not SIMD vectorized in G-parity direction so just change the flavor index accessed to implement the twist - g = (f+1) % 2; - return in[SE->_offset]; - }else{ //SIMD vectorized in Gparity direction - const int me = omp_get_thread_num(); - const int distance = st._distances[dir]; - assert(distance == -1 || distance == 1); - SiteSpinorType &tmp = GparityWilsonImpl_helper::getAB::ref(tmp_full[me], tmp_half[me]); - GparityTwistPermute(tmp, in[SE->_offset], direction, distance, perm, st._grid); - return tmp; - } - }else return in[SE->_offset]; - } - - - inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) { conformable(Uds._grid,GaugeGrid); diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index ea04845e..e1243304 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -59,14 +59,6 @@ Author: paboyle {const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); } -//G-parity implementations using implementation method -#define LOAD_CHIMU_GPARITY_IMPL(DIR,F) \ - { int g; const SiteSpinor & ref = GparityGetChi(g,in._odata.data(),DIR,F,SE,st); LOAD_CHIMU_BODY(g); } - -#define LOAD_CHI_GPARITY_IMPL(DIR,F) \ - { int g; const SiteHalfSpinor &ref = GparityGetChi(g,buf,DIR,F,SE,st); LOAD_CHI_BODY(g); } - - //G-parity implementations using in-place intrinsic ops //1l 1h -> 1h 1l @@ -156,13 +148,7 @@ Author: paboyle } - - - -//#define LOAD_CHI_GPARITY(DIR,F) LOAD_CHI_GPARITY_IMPL(DIR,F) #define LOAD_CHI_GPARITY(DIR,F) LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F) - -//#define LOAD_CHIMU_GPARITY(DIR,F) LOAD_CHIMU_GPARITY_IMPL(DIR,F) #define LOAD_CHIMU_GPARITY(DIR,F) LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F) // To splat or not to splat depends on the implementation From edabb3577ff13df048fccb8e9fb17080531e32df Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Wed, 23 Aug 2017 16:54:06 -0400 Subject: [PATCH 099/377] Imported Benchmark_gparity --- benchmarks/Benchmark_gparity.cc | 190 ++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 benchmarks/Benchmark_gparity.cc diff --git a/benchmarks/Benchmark_gparity.cc b/benchmarks/Benchmark_gparity.cc new file mode 100644 index 00000000..f6036aa8 --- /dev/null +++ b/benchmarks/Benchmark_gparity.cc @@ -0,0 +1,190 @@ +#include +#include +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + +typedef typename GparityDomainWallFermionF::FermionField GparityLatticeFermionF; +typedef typename GparityDomainWallFermionD::FermionField GparityLatticeFermionD; + + + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int Ls=16; + for(int i=0;i> Ls; + } + + + int threads = GridThread::GetThreads(); + std::cout<_Nprocessors; + RealD NN = UGrid->NodeCount(); + + std::cout << GridLogMessage<< "*****************************************************************" <Barrier(); + Dw.ZeroCounters(); + Dw.Dhop(src,result,0); + std::cout<Barrier(); + + double volume=Ls; for(int mu=0;muBarrier(); + DwH.ZeroCounters(); + DwH.Dhop(src,result,0); + double t0=usecond(); + for(int i=0;iBarrier(); + + double volume=Ls; for(int mu=0;muBarrier(); + DwD.ZeroCounters(); + DwD.Dhop(src_d,result_d,0); + std::cout<Barrier(); + + double volume=Ls; for(int mu=0;mu= 0); + const int page_size = 4096; + uint64_t virt_pfn = (uint64_t)Buf / page_size; + off_t offset = sizeof(uint64_t) * virt_pfn; + uint64_t npages = (BYTES + page_size-1) / page_size; + uint64_t pagedata[npages]; + uint64_t ret = lseek(fd, offset, SEEK_SET); + assert(ret == offset); + ret = ::read(fd, pagedata, sizeof(uint64_t)*npages); + assert(ret == sizeof(uint64_t) * npages); + int nhugepages = npages / 512; + int n4ktotal, nnothuge; + n4ktotal = 0; + nnothuge = 0; + for (int i = 0; i < nhugepages; ++i) { + uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size; + for (int j = 0; j < 512; ++j) { + uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size; + ++n4ktotal; + if (pageaddr != baseaddr + j * page_size) + ++nnothuge; + } + } + int rank = CartesianCommunicator::RankWorld(); + printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge); +#endif +} + } diff --git a/lib/allocator/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h index c5ad0883..e64a5949 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/lib/allocator/AlignedAllocator.h @@ -64,6 +64,8 @@ namespace Grid { }; + void check_huge_pages(void *Buf,uint64_t BYTES); + //////////////////////////////////////////////////////////////////// // A lattice of something, but assume the something is SIMDized. //////////////////////////////////////////////////////////////////// From 4b4c2a715b319bcc7060ef9ae8aa983c49471167 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sat, 26 Aug 2017 11:38:04 +0100 Subject: [PATCH 103/377] fcntl.h needed --- lib/allocator/AlignedAllocator.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/allocator/AlignedAllocator.cc b/lib/allocator/AlignedAllocator.cc index 764bd732..967b2571 100644 --- a/lib/allocator/AlignedAllocator.cc +++ b/lib/allocator/AlignedAllocator.cc @@ -1,7 +1,5 @@ - - - #include +#include namespace Grid { From d36d2fb40d1e2871db5f7be4d79e58811037ad5d Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Mon, 28 Aug 2017 06:53:56 -0700 Subject: [PATCH 104/377] Added ability to override default Ls in Benchmark_dwf --- benchmarks/Benchmark_dwf.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 7814ec7d..1f9c7624 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -51,7 +51,13 @@ int main (int argc, char ** argv) std::cout< latt4 = GridDefaultLatt(); - const int Ls=16; + int Ls=16; + for(int i=0;i> Ls; + } + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); From 74af885d4eda81453b1eb83b062a11810011d5f5 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Tue, 29 Aug 2017 09:50:37 -0400 Subject: [PATCH 105/377] Removed some no-longer-needed associated with G-parity hand unrolled kernel --- lib/qcd/action/fermion/FermionOperatorImpl.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index ffb82989..9d24deb2 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -425,22 +425,6 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres //////////////////////////////////////////////////////////////////////////////////////// // Flavour doubled spinors; is Gparity the only? what about C*? //////////////////////////////////////////////////////////////////////////////////////// -namespace GparityWilsonImpl_helper{ - template - struct getAB; - - template - struct getAB{ - static inline A & ref(A &a, B &b){ return a; } - }; - template - struct getAB{ - static inline B & ref(A &a, B &b){ return b; } - }; -}; - - - template class GparityWilsonImpl : public ConjugateGaugeImpl > { public: From 59bd1fe21b3fc81b8c50e9caa53f2adc65b3d7b5 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Tue, 29 Aug 2017 13:07:37 -0700 Subject: [PATCH 106/377] Fix for 'perm' and 'local' not being set for hand-unrolled external-site Dslash, which caused incorrect behavior of G-parity kernel --- lib/qcd/action/fermion/WilsonKernelsHand.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index a0f5ffec..80b81714 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -468,6 +468,8 @@ Author: paboyle #define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ SE=st.GetEntry(ptype,DIR,ss); \ offset = SE->_offset; \ + local = SE->_is_local; \ + perm = SE->_permute; \ if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \ LOAD_CHI_IMPL(DIR,F,PERM); \ MULT_2SPIN_IMPL(DIR,F); \ From b83b2b1415e8e4276486bff22230c56b9866e419 Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Mon, 4 Sep 2017 14:09:47 +0100 Subject: [PATCH 107/377] Stability improvement to BCG. Force m_rr hermitian beyond rounding. --- .../iterative/BlockConjugateGradient.h | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/lib/algorithms/iterative/BlockConjugateGradient.h b/lib/algorithms/iterative/BlockConjugateGradient.h index d7817c05..e0eeddcb 100644 --- a/lib/algorithms/iterative/BlockConjugateGradient.h +++ b/lib/algorithms/iterative/BlockConjugateGradient.h @@ -87,15 +87,22 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr, //////////////////////////////////////////////////////////////////////////////////////////////////// sliceInnerProductMatrix(m_rr,R,R,Orthog); - //////////////////////////////////////////////////////////////////////////////////////////////////// - // Cholesky from Eigen - // There exists a ldlt that is documented as more stable - //////////////////////////////////////////////////////////////////////////////////////////////////// - Eigen::MatrixXcd L = m_rr.llt().matrixL(); + // Force manifest hermitian to avoid rounding related + m_rr = 0.5*(m_rr+m_rr.adjoint()); +#if 0 + std::cout << " Calling Cholesky ldlt on m_rr " << m_rr < Date: Fri, 8 Sep 2017 13:41:53 +0100 Subject: [PATCH 108/377] Fixing Intel compiler error for the JSON parser --- lib/serialisation/JSON_IO.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/serialisation/JSON_IO.h b/lib/serialisation/JSON_IO.h index c82648fc..23b9a836 100644 --- a/lib/serialisation/JSON_IO.h +++ b/lib/serialisation/JSON_IO.h @@ -120,7 +120,7 @@ namespace Grid ss_ << os.str() << " ," ; } -/* + // specialize for string template <> void JSONWriter::writeDefault(const std::string &s, const std::string &x) @@ -133,7 +133,7 @@ namespace Grid else ss_ << os.str() << " ," ; } -*/ + template From 1950ac92940b85aa32dbd4ce066e68f4e7220d45 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 8 Sep 2017 15:18:59 +0100 Subject: [PATCH 109/377] Fixed the Intel compiler problem with the JSON classes --- lib/json/json.hpp | 14281 +++++++++++++++++---------------- lib/serialisation/JSON_IO.cc | 3 +- lib/serialisation/JSON_IO.h | 18 +- 3 files changed, 7226 insertions(+), 7076 deletions(-) diff --git a/lib/json/json.hpp b/lib/json/json.hpp index e53fbcf2..e7c42920 100644 --- a/lib/json/json.hpp +++ b/lib/json/json.hpp @@ -29,7 +29,7 @@ SOFTWARE. #ifndef NLOHMANN_JSON_HPP #define NLOHMANN_JSON_HPP -#include // all_of, copy, fill, find, for_each, none_of, remove, reverse, transform +#include // all_of, copy, fill, find, for_each, generate_n, none_of, remove, reverse, transform #include // array #include // assert #include // and, not, or @@ -43,7 +43,7 @@ SOFTWARE. #include // function, hash, less #include // initializer_list #include // hex -#include // istream, ostream +#include // istream, ostream #include // advance, begin, back_inserter, bidirectional_iterator_tag, distance, end, inserter, iterator, iterator_traits, next, random_access_iterator_tag, reverse_iterator #include // numeric_limits #include // locale @@ -54,6 +54,7 @@ SOFTWARE. #include // getline, stoi, string, to_string #include // add_pointer, conditional, decay, enable_if, false_type, integral_constant, is_arithmetic, is_base_of, is_const, is_constructible, is_convertible, is_default_constructible, is_enum, is_floating_point, is_integral, is_nothrow_move_assignable, is_nothrow_move_constructible, is_pointer, is_reference, is_same, is_scalar, is_signed, remove_const, remove_cv, remove_pointer, remove_reference, true_type, underlying_type #include // declval, forward, make_pair, move, pair, swap +#include // valarray #include // vector // exclude unsupported compilers @@ -115,6 +116,38 @@ SOFTWARE. */ namespace nlohmann { +template +struct adl_serializer; + +// forward declaration of basic_json (required to split the class) +template class ObjectType = + std::map, + template class ArrayType = std::vector, + class StringType = std::string, class BooleanType = bool, + class NumberIntegerType = std::int64_t, + class NumberUnsignedType = std::uint64_t, + class NumberFloatType = double, + template class AllocatorType = std::allocator, + template class JSONSerializer = + adl_serializer> +class basic_json; + +// Ugly macros to avoid uglier copy-paste when specializing basic_json +// This is only temporary and will be removed in 3.0 + +#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ + template class ObjectType, \ + template class ArrayType, \ + class StringType, class BooleanType, class NumberIntegerType, \ + class NumberUnsignedType, class NumberFloatType, \ + template class AllocatorType, \ + template class JSONSerializer> + +#define NLOHMANN_BASIC_JSON_TPL \ + basic_json + /*! @brief unnamed namespace with internal helper functions @@ -133,12 +166,28 @@ namespace detail /*! @brief general exception of the @ref basic_json class -Extension of std::exception objects with a member @a id for exception ids. +This class is an extension of `std::exception` objects with a member @a id for +exception ids. It is used as the base class for all exceptions thrown by the +@ref basic_json class. This class can hence be used as "wildcard" to catch +exceptions. +Subclasses: +- @ref parse_error for exceptions indicating a parse error +- @ref invalid_iterator for exceptions indicating errors with iterators +- @ref type_error for exceptions indicating executing a member function with + a wrong type +- @ref out_of_range for exceptions indicating access out of the defined range +- @ref other_error for exceptions indicating other library errors + +@internal @note To have nothrow-copy-constructible exceptions, we internally use - std::runtime_error which can cope with arbitrary-length error messages. + `std::runtime_error` which can cope with arbitrary-length error messages. Intermediate strings are built with static functions and then passed to the actual constructor. +@endinternal + +@liveexample{The following code shows how arbitrary library exceptions can be +caught.,exception} @since version 3.0.0 */ @@ -146,7 +195,7 @@ class exception : public std::exception { public: /// returns the explanatory string - virtual const char* what() const noexcept override + const char* what() const noexcept override { return m.what(); } @@ -155,9 +204,7 @@ class exception : public std::exception const int id; protected: - exception(int id_, const char* what_arg) - : id(id_), m(what_arg) - {} + exception(int id_, const char* what_arg) : id(id_), m(what_arg) {} static std::string name(const std::string& ename, int id) { @@ -172,21 +219,16 @@ class exception : public std::exception /*! @brief exception indicating a parse error -This excpetion is thrown by the library when a parse error occurs. Parse -errors can occur during the deserialization of JSON text as well as when -using JSON Patch. +This excpetion is thrown by the library when a parse error occurs. Parse errors +can occur during the deserialization of JSON text, CBOR, MessagePack, as well +as when using JSON Patch. Member @a byte holds the byte index of the last read character in the input file. -@note For an input with n bytes, 1 is the index of the first character - and n+1 is the index of the terminating null byte or the end of - file. This also holds true when reading a byte vector (CBOR or - MessagePack). - Exceptions have ids 1xx. -name / id | example massage | description +name / id | example message | description ------------------------------ | --------------- | ------------------------- json.exception.parse_error.101 | parse error at 2: unexpected end of input; expected string literal | This error indicates a syntax error while deserializing a JSON text. The error message describes that an unexpected token (character) was encountered, and the member @a byte indicates the error position. json.exception.parse_error.102 | parse error at 14: missing or wrong low surrogate | JSON uses the `\uxxxx` format to describe Unicode characters. Code points above above 0xFFFF are split into two `\uxxxx` entries ("surrogate pairs"). This error indicates that the surrogate pair is incomplete or contains an invalid code point. @@ -198,10 +240,23 @@ json.exception.parse_error.107 | parse error: JSON pointer must be empty or begi json.exception.parse_error.108 | parse error: escape character '~' must be followed with '0' or '1' | In a JSON Pointer, only `~0` and `~1` are valid escape sequences. json.exception.parse_error.109 | parse error: array index 'one' is not a number | A JSON Pointer array index must be a number. json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read. -json.exception.parse_error.111 | parse error: bad input stream | Parsing CBOR or MessagePack from an input stream where the [`badbit` or `failbit`](http://en.cppreference.com/w/cpp/io/ios_base/iostate) is set. json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xf8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. +@note For an input with n bytes, 1 is the index of the first character and n+1 + is the index of the terminating null byte or the end of file. This also + holds true when reading a byte vector (CBOR or MessagePack). + +@liveexample{The following code shows how a `parse_error` exception can be +caught.,parse_error} + +@sa @ref exception for the base class of the library exceptions +@sa @ref invalid_iterator for exceptions indicating errors with iterators +@sa @ref type_error for exceptions indicating executing a member function with + a wrong type +@sa @ref out_of_range for exceptions indicating access out of the defined range +@sa @ref other_error for exceptions indicating other library errors + @since version 3.0.0 */ class parse_error : public exception @@ -209,13 +264,13 @@ class parse_error : public exception public: /*! @brief create a parse error exception - @param[in] id the id of the exception - @param[in] byte_ the byte index where the error occured (or 0 if - the position cannot be determined) - @param[in] what_arg the explanatory string + @param[in] id the id of the exception + @param[in] byte_ the byte index where the error occurred (or 0 if the + position cannot be determined) + @param[in] what_arg the explanatory string @return parse_error object */ - static parse_error create(int id, size_t byte_, const std::string& what_arg) + static parse_error create(int id, std::size_t byte_, const std::string& what_arg) { std::string w = exception::name("parse_error", id) + "parse error" + (byte_ != 0 ? (" at " + std::to_string(byte_)) : "") + @@ -228,25 +283,26 @@ class parse_error : public exception The byte index of the last read character in the input file. - @note For an input with n bytes, 1 is the index of the first character - and n+1 is the index of the terminating null byte or the end of - file. This also holds true when reading a byte vector (CBOR or - MessagePack). + @note For an input with n bytes, 1 is the index of the first character and + n+1 is the index of the terminating null byte or the end of file. + This also holds true when reading a byte vector (CBOR or MessagePack). */ - const size_t byte; + const std::size_t byte; private: - parse_error(int id_, size_t byte_, const char* what_arg) - : exception(id_, what_arg), byte(byte_) - {} + parse_error(int id_, std::size_t byte_, const char* what_arg) + : exception(id_, what_arg), byte(byte_) {} }; /*! @brief exception indicating errors with iterators +This exception is thrown if iterators passed to a library function do not match +the expected semantics. + Exceptions have ids 2xx. -name / id | example massage | description +name / id | example message | description ----------------------------------- | --------------- | ------------------------- json.exception.invalid_iterator.201 | iterators are not compatible | The iterators passed to constructor @ref basic_json(InputIT first, InputIT last) are not compatible, meaning they do not belong to the same container. Therefore, the range (@a first, @a last) is invalid. json.exception.invalid_iterator.202 | iterator does not fit current value | In an erase or insert function, the passed iterator @a pos does not belong to the JSON value for which the function was called. It hence does not define a valid position for the deletion/insertion. @@ -260,9 +316,19 @@ json.exception.invalid_iterator.209 | cannot use offsets with object iterators | json.exception.invalid_iterator.210 | iterators do not fit | The iterator range passed to the insert function are not compatible, meaning they do not belong to the same container. Therefore, the range (@a first, @a last) is invalid. json.exception.invalid_iterator.211 | passed iterators may not belong to container | The iterator range passed to the insert function must not be a subrange of the container to insert to. json.exception.invalid_iterator.212 | cannot compare iterators of different containers | When two iterators are compared, they must belong to the same container. -json.exception.invalid_iterator.213 | cannot compare order of object iterators | The order of object iterators cannot be compated, because JSON objects are unordered. +json.exception.invalid_iterator.213 | cannot compare order of object iterators | The order of object iterators cannot be compared, because JSON objects are unordered. json.exception.invalid_iterator.214 | cannot get value | Cannot get value for iterator: Either the iterator belongs to a null value or it is an iterator to a primitive type (number, boolean, or string), but the iterator is different to @ref begin(). +@liveexample{The following code shows how an `invalid_iterator` exception can be +caught.,invalid_iterator} + +@sa @ref exception for the base class of the library exceptions +@sa @ref parse_error for exceptions indicating a parse error +@sa @ref type_error for exceptions indicating executing a member function with + a wrong type +@sa @ref out_of_range for exceptions indicating access out of the defined range +@sa @ref other_error for exceptions indicating other library errors + @since version 3.0.0 */ class invalid_iterator : public exception @@ -276,13 +342,15 @@ class invalid_iterator : public exception private: invalid_iterator(int id_, const char* what_arg) - : exception(id_, what_arg) - {} + : exception(id_, what_arg) {} }; /*! @brief exception indicating executing a member function with a wrong type +This exception is thrown in case of a type error; that is, a library function is +executed on a JSON value whose type does not match the expected semantics. + Exceptions have ids 3xx. name / id | example message | description @@ -298,10 +366,20 @@ json.exception.type_error.308 | cannot use push_back() with string | The @ref pu json.exception.type_error.309 | cannot use insert() with | The @ref insert() member functions can only be executed for certain JSON types. json.exception.type_error.310 | cannot use swap() with number | The @ref swap() member functions can only be executed for certain JSON types. json.exception.type_error.311 | cannot use emplace_back() with string | The @ref emplace_back() member function can only be executed for certain JSON types. +json.exception.type_error.312 | cannot use update() with string | The @ref update() member functions can only be executed for certain JSON types. json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten function converts an object whose keys are JSON Pointers back into an arbitrary nested JSON value. The JSON Pointers must not overlap, because then the resulting value would not be well defined. json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers. json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive. +@liveexample{The following code shows how a `type_error` exception can be +caught.,type_error} + +@sa @ref exception for the base class of the library exceptions +@sa @ref parse_error for exceptions indicating a parse error +@sa @ref invalid_iterator for exceptions indicating errors with iterators +@sa @ref out_of_range for exceptions indicating access out of the defined range +@sa @ref other_error for exceptions indicating other library errors + @since version 3.0.0 */ class type_error : public exception @@ -314,14 +392,16 @@ class type_error : public exception } private: - type_error(int id_, const char* what_arg) - : exception(id_, what_arg) - {} + type_error(int id_, const char* what_arg) : exception(id_, what_arg) {} }; /*! @brief exception indicating access out of the defined range +This exception is thrown in case a library function is called on an input +parameter that exceeds the expected range, for instance in case of array +indices or nonexisting object keys. + Exceptions have ids 4xx. name / id | example message | description @@ -333,6 +413,16 @@ json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value. json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. +@liveexample{The following code shows how an `out_of_range` exception can be +caught.,out_of_range} + +@sa @ref exception for the base class of the library exceptions +@sa @ref parse_error for exceptions indicating a parse error +@sa @ref invalid_iterator for exceptions indicating errors with iterators +@sa @ref type_error for exceptions indicating executing a member function with + a wrong type +@sa @ref other_error for exceptions indicating other library errors + @since version 3.0.0 */ class out_of_range : public exception @@ -345,13 +435,14 @@ class out_of_range : public exception } private: - out_of_range(int id_, const char* what_arg) - : exception(id_, what_arg) - {} + out_of_range(int id_, const char* what_arg) : exception(id_, what_arg) {} }; /*! -@brief exception indicating other errors +@brief exception indicating other library errors + +This exception is thrown in case of errors that cannot be classified with the +other exception types. Exceptions have ids 5xx. @@ -360,6 +451,16 @@ name / id | example message | description json.exception.other_error.501 | unsuccessful: {"op":"test","path":"/baz", "value":"bar"} | A JSON Patch operation 'test' failed. The unsuccessful operation is also printed. json.exception.other_error.502 | invalid object size for conversion | Some conversions to user-defined types impose constraints on the object size (e.g. std::pair) +@sa @ref exception for the base class of the library exceptions +@sa @ref parse_error for exceptions indicating a parse error +@sa @ref invalid_iterator for exceptions indicating errors with iterators +@sa @ref type_error for exceptions indicating executing a member function with + a wrong type +@sa @ref out_of_range for exceptions indicating access out of the defined range + +@liveexample{The following code shows how an `other_error` exception can be +caught.,other_error} + @since version 3.0.0 */ class other_error : public exception @@ -372,9 +473,7 @@ class other_error : public exception } private: - other_error(int id_, const char* what_arg) - : exception(id_, what_arg) - {} + other_error(int id_, const char* what_arg) : exception(id_, what_arg) {} }; @@ -409,15 +508,15 @@ value with the default value for a given type */ enum class value_t : uint8_t { - null, ///< null value - object, ///< object (unordered set of name/value pairs) - array, ///< array (ordered collection of values) - string, ///< string value - boolean, ///< boolean value - number_integer, ///< number value (signed integer) - number_unsigned, ///< number value (unsigned integer) - number_float, ///< number value (floating-point) - discarded ///< discarded by the the parser callback function + null, ///< null value + object, ///< object (unordered set of name/value pairs) + array, ///< array (ordered collection of values) + string, ///< string value + boolean, ///< boolean value + number_integer, ///< number value (signed integer) + number_unsigned, ///< number value (unsigned integer) + number_float, ///< number value (floating-point) + discarded ///< discarded by the the parser callback function }; /*! @@ -444,13 +543,8 @@ inline bool operator<(const value_t lhs, const value_t rhs) noexcept }; // discarded values are not comparable - if (lhs == value_t::discarded or rhs == value_t::discarded) - { - return false; - } - - return order[static_cast(lhs)] < - order[static_cast(rhs)]; + return lhs != value_t::discarded and rhs != value_t::discarded and + order[static_cast(lhs)] < order[static_cast(rhs)]; } @@ -458,6 +552,11 @@ inline bool operator<(const value_t lhs, const value_t rhs) noexcept // helpers // ///////////// +template struct is_basic_json : std::false_type {}; + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +struct is_basic_json : std::true_type {}; + // alias templates to reduce boilerplate template using enable_if_t = typename std::enable_if::type; @@ -467,7 +566,7 @@ using uncvref_t = typename std::remove_cv::typ // implementation of C++14 index_sequence and affiliates // source: https://stackoverflow.com/a/32223343 -template +template struct index_sequence { using type = index_sequence; @@ -478,19 +577,19 @@ struct index_sequence } }; -template +template struct merge_and_renumber; -template +template struct merge_and_renumber, index_sequence> : index_sequence < I1..., (sizeof...(I1) + I2)... > - { }; + {}; -template +template struct make_index_sequence : merge_and_renumber < typename make_index_sequence < N / 2 >::type, typename make_index_sequence < N - N / 2 >::type > -{ }; +{}; template<> struct make_index_sequence<0> : index_sequence<> { }; template<> struct make_index_sequence<1> : index_sequence<0> { }; @@ -551,6 +650,14 @@ struct external_constructor j.m_value = s; j.assert_invariant(); } + + template + static void construct(BasicJsonType& j, typename BasicJsonType::string_t&& s) + { + j.m_type = value_t::string; + j.m_value = std::move(s); + j.assert_invariant(); + } }; template<> @@ -600,6 +707,14 @@ struct external_constructor j.assert_invariant(); } + template + static void construct(BasicJsonType& j, typename BasicJsonType::array_t&& arr) + { + j.m_type = value_t::array; + j.m_value = std::move(arr); + j.assert_invariant(); + } + template::value, @@ -625,6 +740,18 @@ struct external_constructor } j.assert_invariant(); } + + template::value, int> = 0> + static void construct(BasicJsonType& j, const std::valarray& arr) + { + using std::begin; + using std::end; + j.m_type = value_t::array; + j.m_value = value_t::array; + j.m_value.array = j.template create(begin(arr), end(arr)); + j.assert_invariant(); + } }; template<> @@ -638,10 +765,17 @@ struct external_constructor j.assert_invariant(); } + template + static void construct(BasicJsonType& j, typename BasicJsonType::object_t&& obj) + { + j.m_type = value_t::object; + j.m_value = std::move(obj); + j.assert_invariant(); + } + template::value, - int> = 0> + typename BasicJsonType::object_t>::value, int> = 0> static void construct(BasicJsonType& j, const CompatibleObjectType& obj) { using std::begin; @@ -694,10 +828,8 @@ template struct is_compatible_object_type_impl { static constexpr auto value = - std::is_constructible::value and - std::is_constructible::value; + std::is_constructible::value and + std::is_constructible::value; }; template @@ -716,8 +848,7 @@ struct is_basic_json_nested_type static auto constexpr value = std::is_same::value or std::is_same::value or std::is_same::value or - std::is_same::value or - std::is_same::value; + std::is_same::value; }; template @@ -745,8 +876,7 @@ struct is_compatible_integer_type_impl; static constexpr auto value = - std::is_constructible::value and + std::is_constructible::value and CompatibleLimits::is_integer and RealLimits::is_signed == CompatibleLimits::is_signed; }; @@ -831,6 +961,12 @@ void to_json(BasicJsonType& j, const CompatibleString& s) external_constructor::construct(j, s); } +template +void to_json(BasicJsonType& j, typename BasicJsonType::string_t&& s) +{ + external_constructor::construct(j, std::move(s)); +} + template::value, int> = 0> void to_json(BasicJsonType& j, FloatType val) noexcept @@ -876,42 +1012,61 @@ template < is_compatible_array_type::value or std::is_same::value, int > = 0 > -void to_json(BasicJsonType& j, const CompatibleArrayType& arr) +void to_json(BasicJsonType& j, const CompatibleArrayType& arr) { external_constructor::construct(j, arr); } +template ::value, int> = 0> +void to_json(BasicJsonType& j, std::valarray arr) +{ + external_constructor::construct(j, std::move(arr)); +} + +template +void to_json(BasicJsonType& j, typename BasicJsonType::array_t&& arr) +{ + external_constructor::construct(j, std::move(arr)); +} + template < typename BasicJsonType, typename CompatibleObjectType, enable_if_t::value, int> = 0 > -void to_json(BasicJsonType& j, const CompatibleObjectType& arr) +void to_json(BasicJsonType& j, const CompatibleObjectType& obj) { - external_constructor::construct(j, arr); + external_constructor::construct(j, obj); } -template ::value, - int> = 0> +template +void to_json(BasicJsonType& j, typename BasicJsonType::object_t&& obj) +{ + external_constructor::construct(j, std::move(obj)); +} + +template::value, + int> = 0> void to_json(BasicJsonType& j, T (&arr)[N]) { external_constructor::construct(j, arr); } -template +template void to_json(BasicJsonType& j, const std::pair& p) { j = {p.first, p.second}; } -template +template void to_json_tuple_impl(BasicJsonType& j, const Tuple& t, index_sequence) { j = {std::get(t)...}; } -template +template void to_json(BasicJsonType& j, const std::tuple& t) { to_json_tuple_impl(j, t, index_sequence_for {}); @@ -933,35 +1088,31 @@ void get_arithmetic_value(const BasicJsonType& j, ArithmeticType& val) { case value_t::number_unsigned: { - val = static_cast( - *j.template get_ptr()); + val = static_cast(*j.template get_ptr()); break; } case value_t::number_integer: { - val = static_cast( - *j.template get_ptr()); + val = static_cast(*j.template get_ptr()); break; } case value_t::number_float: { - val = static_cast( - *j.template get_ptr()); + val = static_cast(*j.template get_ptr()); break; } + default: - { - JSON_THROW(type_error::create(302, "type must be number, but is " + j.type_name())); - } + JSON_THROW(type_error::create(302, "type must be number, but is " + std::string(j.type_name()))); } } template void from_json(const BasicJsonType& j, typename BasicJsonType::boolean_t& b) { - if (not j.is_boolean()) + if (JSON_UNLIKELY(not j.is_boolean())) { - JSON_THROW(type_error::create(302, "type must be boolean, but is " + j.type_name())); + JSON_THROW(type_error::create(302, "type must be boolean, but is " + std::string(j.type_name()))); } b = *j.template get_ptr(); } @@ -969,9 +1120,9 @@ void from_json(const BasicJsonType& j, typename BasicJsonType::boolean_t& b) template void from_json(const BasicJsonType& j, typename BasicJsonType::string_t& s) { - if (not j.is_string()) + if (JSON_UNLIKELY(not j.is_string())) { - JSON_THROW(type_error::create(302, "type must be string, but is " + j.type_name())); + JSON_THROW(type_error::create(302, "type must be string, but is " + std::string(j.type_name()))); } s = *j.template get_ptr(); } @@ -1006,9 +1157,9 @@ void from_json(const BasicJsonType& j, EnumType& e) template void from_json(const BasicJsonType& j, typename BasicJsonType::array_t& arr) { - if (not j.is_array()) + if (JSON_UNLIKELY(not j.is_array())) { - JSON_THROW(type_error::create(302, "type must be array, but is " + j.type_name())); + JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()))); } arr = *j.template get_ptr(); } @@ -1018,21 +1169,36 @@ template::value, int> = 0> void from_json(const BasicJsonType& j, std::forward_list& l) { - if (not j.is_array()) + if (JSON_UNLIKELY(not j.is_array())) { - JSON_THROW(type_error::create(302, "type must be array, but is " + j.type_name())); + JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()))); } - - for (auto it = j.rbegin(), end = j.rend(); it != end; ++it) + std::transform(j.rbegin(), j.rend(), + std::front_inserter(l), [](const BasicJsonType & i) { - l.push_front(it->template get()); + return i.template get(); + }); +} + +// valarray doesn't have an insert method +template::value, int> = 0> +void from_json(const BasicJsonType& j, std::valarray& l) +{ + if (JSON_UNLIKELY(not j.is_array())) + { + JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()))); + } + l.resize(j.size()); + for (size_t i = 0; i < j.size(); ++i) + { + l[i] = j[i]; } } template -void from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, priority_tag<0>) +void from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, priority_tag<0> /*unused*/) { - using std::begin; using std::end; std::transform(j.begin(), j.end(), @@ -1045,12 +1211,11 @@ void from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, prio } template -auto from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, priority_tag<1>) +auto from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, priority_tag<1> /*unused*/) -> decltype( arr.reserve(std::declval()), void()) { - using std::begin; using std::end; arr.reserve(j.size()); @@ -1063,8 +1228,8 @@ auto from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, prio }); } -template -void from_json_array_impl(const BasicJsonType& j, std::array& arr, priority_tag<2>) +template +void from_json_array_impl(const BasicJsonType& j, std::array& arr, priority_tag<2> /*unused*/) { for (std::size_t i = 0; i < N; ++i) { @@ -1078,9 +1243,9 @@ template::value, int> = 0> void from_json(const BasicJsonType& j, CompatibleArrayType& arr) { - if (not j.is_array()) + if (JSON_UNLIKELY(not j.is_array())) { - JSON_THROW(type_error::create(302, "type must be array, but is " + j.type_name())); + JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()))); } from_json_array_impl(j, arr, priority_tag<2> {}); @@ -1090,24 +1255,19 @@ template::value, int> = 0> void from_json(const BasicJsonType& j, CompatibleObjectType& obj) { - if (not j.is_object()) + if (JSON_UNLIKELY(not j.is_object())) { - JSON_THROW(type_error::create(302, "type must be object, but is " + j.type_name())); + JSON_THROW(type_error::create(302, "type must be object, but is " + std::string(j.type_name()))); } auto inner_object = j.template get_ptr(); - using std::begin; - using std::end; using value_type = typename CompatibleObjectType::value_type; std::transform( inner_object->begin(), inner_object->end(), std::inserter(obj, obj.begin()), [](typename BasicJsonType::object_t::value_type const & p) { - return value_type( - p.first, - p.second - .template get()); + return value_type(p.first, p.second.template get()); }); } @@ -1147,26 +1307,25 @@ void from_json(const BasicJsonType& j, ArithmeticType& val) val = static_cast(*j.template get_ptr()); break; } + default: - { - JSON_THROW(type_error::create(302, "type must be number, but is " + j.type_name())); - } + JSON_THROW(type_error::create(302, "type must be number, but is " + std::string(j.type_name()))); } } -template -void from_json(const BasicJsonType& j, std::pair& p) +template +void from_json(const BasicJsonType& j, std::pair& p) { - p = {j.at(0), j.at(1)}; + p = {j.at(0).template get(), j.at(1).template get()}; } -template +template void from_json_tuple_impl(const BasicJsonType& j, Tuple& t, index_sequence) { - t = std::make_tuple(j.at(Idx)...); + t = std::make_tuple(j.at(Idx).template get::type>()...); } -template +template void from_json(const BasicJsonType& j, std::tuple& t) { from_json_tuple_impl(j, t, index_sequence_for {}); @@ -1176,14 +1335,14 @@ struct to_json_fn { private: template - auto call(BasicJsonType& j, T&& val, priority_tag<1>) const noexcept(noexcept(to_json(j, std::forward(val)))) + auto call(BasicJsonType& j, T&& val, priority_tag<1> /*unused*/) const noexcept(noexcept(to_json(j, std::forward(val)))) -> decltype(to_json(j, std::forward(val)), void()) { return to_json(j, std::forward(val)); } template - void call(BasicJsonType&, T&&, priority_tag<0>) const noexcept + void call(BasicJsonType& /*unused*/, T&& /*unused*/, priority_tag<0> /*unused*/) const noexcept { static_assert(sizeof(BasicJsonType) == 0, "could not find to_json() method in T's namespace"); @@ -1202,7 +1361,7 @@ struct from_json_fn { private: template - auto call(const BasicJsonType& j, T& val, priority_tag<1>) const + auto call(const BasicJsonType& j, T& val, priority_tag<1> /*unused*/) const noexcept(noexcept(from_json(j, val))) -> decltype(from_json(j, val), void()) { @@ -1210,7 +1369,7 @@ struct from_json_fn } template - void call(const BasicJsonType&, T&, priority_tag<0>) const noexcept + void call(const BasicJsonType& /*unused*/, T& /*unused*/, priority_tag<0> /*unused*/) const noexcept { static_assert(sizeof(BasicJsonType) == 0, "could not find from_json() method in T's namespace"); @@ -1234,8 +1393,5478 @@ struct static_const template constexpr T static_const::value; -} // namespace detail +//////////////////// +// input adapters // +//////////////////// + +/// abstract input adapter interface +struct input_adapter_protocol +{ + virtual int get_character() = 0; + virtual std::string read(std::size_t offset, std::size_t length) = 0; + virtual ~input_adapter_protocol() = default; +}; + +/// a type to simplify interfaces +using input_adapter_t = std::shared_ptr; + +/// input adapter for cached stream input +template +class cached_input_stream_adapter : public input_adapter_protocol +{ + public: + explicit cached_input_stream_adapter(std::istream& i) + : is(i), start_position(is.tellg()) + { + fill_buffer(); + + // skip byte order mark + if (fill_size >= 3 and buffer[0] == '\xEF' and buffer[1] == '\xBB' and buffer[2] == '\xBF') + { + buffer_pos += 3; + processed_chars += 3; + } + } + + ~cached_input_stream_adapter() override + { + // clear stream flags + is.clear(); + // We initially read a lot of characters into the buffer, and we may + // not have processed all of them. Therefore, we need to "rewind" the + // stream after the last processed char. + is.seekg(start_position); + is.ignore(static_cast(processed_chars)); + // clear stream flags + is.clear(); + } + + int get_character() override + { + // check if refilling is necessary and possible + if (buffer_pos == fill_size and not eof) + { + fill_buffer(); + + // check and remember that filling did not yield new input + if (fill_size == 0) + { + eof = true; + return std::char_traits::eof(); + } + + // the buffer is ready + buffer_pos = 0; + } + + ++processed_chars; + assert(buffer_pos < buffer.size()); + return buffer[buffer_pos++] & 0xFF; + } + + std::string read(std::size_t offset, std::size_t length) override + { + // create buffer + std::string result(length, '\0'); + + // save stream position + const auto current_pos = is.tellg(); + // save stream flags + const auto flags = is.rdstate(); + + // clear stream flags + is.clear(); + // set stream position + is.seekg(static_cast(offset)); + // read bytes + is.read(&result[0], static_cast(length)); + + // reset stream position + is.seekg(current_pos); + // reset stream flags + is.setstate(flags); + + return result; + } + + private: + void fill_buffer() + { + // fill + is.read(buffer.data(), static_cast(buffer.size())); + // store number of bytes in the buffer + fill_size = static_cast(is.gcount()); + } + + /// the associated input stream + std::istream& is; + + /// chars returned via get_character() + std::size_t processed_chars = 0; + /// chars processed in the current buffer + std::size_t buffer_pos = 0; + + /// whether stream reached eof + bool eof = false; + /// how many chars have been copied to the buffer by last (re)fill + std::size_t fill_size = 0; + + /// position of the stream when we started + const std::streampos start_position; + + /// internal buffer + std::array buffer{{}}; +}; + +/// input adapter for buffer input +class input_buffer_adapter : public input_adapter_protocol +{ + public: + input_buffer_adapter(const char* b, const std::size_t l) + : cursor(b), limit(b + l), start(b) + { + // skip byte order mark + if (l >= 3 and b[0] == '\xEF' and b[1] == '\xBB' and b[2] == '\xBF') + { + cursor += 3; + } + } + + // delete because of pointer members + input_buffer_adapter(const input_buffer_adapter&) = delete; + input_buffer_adapter& operator=(input_buffer_adapter&) = delete; + + int get_character() noexcept override + { + if (JSON_LIKELY(cursor < limit)) + { + return *(cursor++) & 0xFF; + } + + return std::char_traits::eof(); + } + + std::string read(std::size_t offset, std::size_t length) override + { + // avoid reading too many characters + const auto max_length = static_cast(limit - start); + return std::string(start + offset, (std::min)(length, max_length - offset)); + } + + private: + /// pointer to the current character + const char* cursor; + /// pointer past the last character + const char* limit; + /// pointer to the first character + const char* start; +}; + +class input_adapter +{ + public: + // native support + + /// input adapter for input stream + input_adapter(std::istream& i) + : ia(std::make_shared>(i)) {} + + /// input adapter for input stream + input_adapter(std::istream&& i) + : ia(std::make_shared>(i)) {} + + /// input adapter for buffer + template::value and + std::is_integral< + typename std::remove_pointer::type>::value and + sizeof(typename std::remove_pointer::type) == 1, + int>::type = 0> + input_adapter(CharT b, std::size_t l) + : ia(std::make_shared(reinterpret_cast(b), l)) {} + + // derived support + + /// input adapter for string literal + template::value and + std::is_integral< + typename std::remove_pointer::type>::value and + sizeof(typename std::remove_pointer::type) == 1, + int>::type = 0> + input_adapter(CharT b) + : input_adapter(reinterpret_cast(b), + std::strlen(reinterpret_cast(b))) {} + + /// input adapter for iterator range with contiguous storage + template::iterator_category, + std::random_access_iterator_tag>::value, + int>::type = 0> + input_adapter(IteratorType first, IteratorType last) + { + // assertion to check that the iterator range is indeed contiguous, + // see http://stackoverflow.com/a/35008842/266378 for more discussion + assert(std::accumulate( + first, last, std::pair(true, 0), + [&first](std::pair res, decltype(*first) val) + { + res.first &= (val == *(std::next(std::addressof(*first), res.second++))); + return res; + }).first); + + // assertion to check that each element is 1 byte long + static_assert( + sizeof(typename std::iterator_traits::value_type) == 1, + "each element in the iterator range must have the size of 1 byte"); + + const auto len = static_cast(std::distance(first, last)); + if (JSON_LIKELY(len > 0)) + { + // there is at least one element: use the address of first + ia = std::make_shared(reinterpret_cast(&(*first)), len); + } + else + { + // the address of first cannot be used: use nullptr + ia = std::make_shared(nullptr, len); + } + } + + /// input adapter for array + template + input_adapter(T (&array)[N]) + : input_adapter(std::begin(array), std::end(array)) {} + + /// input adapter for contiguous container + template < + class ContiguousContainer, + typename std::enable_if < + not std::is_pointer::value and + std::is_base_of()))>::iterator_category>::value, + int >::type = 0 > + input_adapter(const ContiguousContainer& c) + : input_adapter(std::begin(c), std::end(c)) {} + + operator input_adapter_t() + { + return ia; + } + + private: + /// the actual adapter + input_adapter_t ia = nullptr; +}; + +////////////////////// +// lexer and parser // +////////////////////// + +/*! +@brief lexical analysis + +This class organizes the lexical analysis during JSON deserialization. +*/ +template +class lexer +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + + public: + /// token types for the parser + enum class token_type + { + uninitialized, ///< indicating the scanner is uninitialized + literal_true, ///< the `true` literal + literal_false, ///< the `false` literal + literal_null, ///< the `null` literal + value_string, ///< a string -- use get_string() for actual value + value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value + value_integer, ///< a signed integer -- use get_number_integer() for actual value + value_float, ///< an floating point number -- use get_number_float() for actual value + begin_array, ///< the character for array begin `[` + begin_object, ///< the character for object begin `{` + end_array, ///< the character for array end `]` + end_object, ///< the character for object end `}` + name_separator, ///< the name separator `:` + value_separator, ///< the value separator `,` + parse_error, ///< indicating a parse error + end_of_input, ///< indicating the end of the input buffer + literal_or_value ///< a literal or the begin of a value (only for diagnostics) + }; + + /// return name of values of type token_type (only used for errors) + static const char* token_type_name(const token_type t) noexcept + { + switch (t) + { + case token_type::uninitialized: + return ""; + case token_type::literal_true: + return "true literal"; + case token_type::literal_false: + return "false literal"; + case token_type::literal_null: + return "null literal"; + case token_type::value_string: + return "string literal"; + case lexer::token_type::value_unsigned: + case lexer::token_type::value_integer: + case lexer::token_type::value_float: + return "number literal"; + case token_type::begin_array: + return "'['"; + case token_type::begin_object: + return "'{'"; + case token_type::end_array: + return "']'"; + case token_type::end_object: + return "'}'"; + case token_type::name_separator: + return "':'"; + case token_type::value_separator: + return "','"; + case token_type::parse_error: + return ""; + case token_type::end_of_input: + return "end of input"; + case token_type::literal_or_value: + return "'[', '{', or a literal"; + default: // catch non-enum values + return "unknown token"; // LCOV_EXCL_LINE + } + } + + explicit lexer(detail::input_adapter_t adapter) + : ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {} + + // delete because of pointer members + lexer(const lexer&) = delete; + lexer& operator=(lexer&) = delete; + + private: + ///////////////////// + // locales + ///////////////////// + + /// return the locale-dependent decimal point + static char get_decimal_point() noexcept + { + const auto loc = localeconv(); + assert(loc != nullptr); + return (loc->decimal_point == nullptr) ? '.' : loc->decimal_point[0]; + } + + ///////////////////// + // scan functions + ///////////////////// + + /*! + @brief get codepoint from 4 hex characters following `\u` + + For input "\u c1 c2 c3 c4" the codepoint is: + (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4 + = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0) + + Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f' + must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The + conversion is done by subtracting the offset (0x30, 0x37, and 0x57) + between the ASCII value of the character and the desired integer value. + + @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or + non-hex character) + */ + int get_codepoint() + { + // this function only makes sense after reading `\u` + assert(current == 'u'); + int codepoint = 0; + + const auto factors = { 12, 8, 4, 0 }; + for (const auto factor : factors) + { + get(); + + if (current >= '0' and current <= '9') + { + codepoint += ((current - 0x30) << factor); + } + else if (current >= 'A' and current <= 'F') + { + codepoint += ((current - 0x37) << factor); + } + else if (current >= 'a' and current <= 'f') + { + codepoint += ((current - 0x57) << factor); + } + else + { + return -1; + } + } + + assert(0x0000 <= codepoint and codepoint <= 0xFFFF); + return codepoint; + } + + /*! + @brief check if the next byte(s) are inside a given range + + Adds the current byte and, for each passed range, reads a new byte and + checks if it is inside the range. If a violation was detected, set up an + error message and return false. Otherwise, return true. + + @return true if and only if no range violation was detected + */ + bool next_byte_in_range(std::initializer_list ranges) + { + assert(ranges.size() == 2 or ranges.size() == 4 or ranges.size() == 6); + add(current); + + for (auto range = ranges.begin(); range != ranges.end(); ++range) + { + get(); + if (JSON_LIKELY(*range <= current and current <= *(++range))) + { + add(current); + } + else + { + error_message = "invalid string: ill-formed UTF-8 byte"; + return false; + } + } + + return true; + } + + /*! + @brief scan a string literal + + This function scans a string according to Sect. 7 of RFC 7159. While + scanning, bytes are escaped and copied into buffer yytext. Then the + function returns successfully, yytext is null-terminated and yylen + contains the number of bytes in the string. + + @return token_type::value_string if string could be successfully scanned, + token_type::parse_error otherwise + + @note In case of errors, variable error_message contains a textual + description. + */ + token_type scan_string() + { + // reset yytext (ignore opening quote) + reset(); + + // we entered the function by reading an open quote + assert(current == '\"'); + + while (true) + { + // get next character + switch (get()) + { + // end of file while parsing string + case std::char_traits::eof(): + { + error_message = "invalid string: missing closing quote"; + return token_type::parse_error; + } + + // closing quote + case '\"': + { + // terminate yytext + add('\0'); + --yylen; + return token_type::value_string; + } + + // escapes + case '\\': + { + switch (get()) + { + // quotation mark + case '\"': + add('\"'); + break; + // reverse solidus + case '\\': + add('\\'); + break; + // solidus + case '/': + add('/'); + break; + // backspace + case 'b': + add('\b'); + break; + // form feed + case 'f': + add('\f'); + break; + // line feed + case 'n': + add('\n'); + break; + // carriage return + case 'r': + add('\r'); + break; + // tab + case 't': + add('\t'); + break; + + // unicode escapes + case 'u': + { + int codepoint; + const int codepoint1 = get_codepoint(); + + if (JSON_UNLIKELY(codepoint1 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } + + // check if code point is a high surrogate + if (0xD800 <= codepoint1 and codepoint1 <= 0xDBFF) + { + // expect next \uxxxx entry + if (JSON_LIKELY(get() == '\\' and get() == 'u')) + { + const int codepoint2 = get_codepoint(); + + if (JSON_UNLIKELY(codepoint2 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } + + // check if codepoint2 is a low surrogate + if (JSON_LIKELY(0xDC00 <= codepoint2 and codepoint2 <= 0xDFFF)) + { + codepoint = + // high surrogate occupies the most significant 22 bits + (codepoint1 << 10) + // low surrogate occupies the least significant 15 bits + + codepoint2 + // there is still the 0xD800, 0xDC00 and 0x10000 noise + // in the result so we have to subtract with: + // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 + - 0x35FDC00; + } + else + { + error_message = "invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF"; + return token_type::parse_error; + } + } + else + { + error_message = "invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF"; + return token_type::parse_error; + } + } + else + { + if (JSON_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF)) + { + error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"; + return token_type::parse_error; + } + + // only work with first code point + codepoint = codepoint1; + } + + // result of the above calculation yields a proper codepoint + assert(0x00 <= codepoint and codepoint <= 0x10FFFF); + + // translate code point to bytes + if (codepoint < 0x80) + { + // 1-byte characters: 0xxxxxxx (ASCII) + add(codepoint); + } + else if (codepoint <= 0x7ff) + { + // 2-byte characters: 110xxxxx 10xxxxxx + add(0xC0 | (codepoint >> 6)); + add(0x80 | (codepoint & 0x3F)); + } + else if (codepoint <= 0xffff) + { + // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx + add(0xE0 | (codepoint >> 12)); + add(0x80 | ((codepoint >> 6) & 0x3F)); + add(0x80 | (codepoint & 0x3F)); + } + else + { + // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + add(0xF0 | (codepoint >> 18)); + add(0x80 | ((codepoint >> 12) & 0x3F)); + add(0x80 | ((codepoint >> 6) & 0x3F)); + add(0x80 | (codepoint & 0x3F)); + } + + break; + } + + // other characters after escape + default: + error_message = "invalid string: forbidden character after backslash"; + return token_type::parse_error; + } + + break; + } + + // invalid control characters + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0a: + case 0x0b: + case 0x0c: + case 0x0d: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + error_message = "invalid string: control character must be escaped"; + return token_type::parse_error; + } + + // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) + case 0x20: + case 0x21: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3a: + case 0x3b: + case 0x3c: + case 0x3d: + case 0x3e: + case 0x3f: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5a: + case 0x5b: + case 0x5d: + case 0x5e: + case 0x5f: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7a: + case 0x7b: + case 0x7c: + case 0x7d: + case 0x7e: + case 0x7f: + { + add(current); + break; + } + + // U+0080..U+07FF: bytes C2..DF 80..BF + case 0xc2: + case 0xc3: + case 0xc4: + case 0xc5: + case 0xc6: + case 0xc7: + case 0xc8: + case 0xc9: + case 0xca: + case 0xcb: + case 0xcc: + case 0xcd: + case 0xce: + case 0xcf: + case 0xd0: + case 0xd1: + case 0xd2: + case 0xd3: + case 0xd4: + case 0xd5: + case 0xd6: + case 0xd7: + case 0xd8: + case 0xd9: + case 0xda: + case 0xdb: + case 0xdc: + case 0xdd: + case 0xde: + case 0xdf: + { + if (JSON_UNLIKELY(not next_byte_in_range({0x80, 0xBF}))) + { + return token_type::parse_error; + } + break; + } + + // U+0800..U+0FFF: bytes E0 A0..BF 80..BF + case 0xe0: + { + if (JSON_UNLIKELY(not (next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF + // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF + case 0xe1: + case 0xe2: + case 0xe3: + case 0xe4: + case 0xe5: + case 0xe6: + case 0xe7: + case 0xe8: + case 0xe9: + case 0xea: + case 0xeb: + case 0xec: + case 0xee: + case 0xef: + { + if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+D000..U+D7FF: bytes ED 80..9F 80..BF + case 0xed: + { + if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x9F, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF + case 0xf0: + { + if (JSON_UNLIKELY(not (next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF + case 0xf1: + case 0xf2: + case 0xf3: + { + if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF + case 0xf4: + { + if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // remaining bytes (80..C1 and F5..FF) are ill-formed + default: + { + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; + } + } + } + } + + static void strtof(float& f, const char* str, char** endptr) noexcept + { + f = std::strtof(str, endptr); + } + + static void strtof(double& f, const char* str, char** endptr) noexcept + { + f = std::strtod(str, endptr); + } + + static void strtof(long double& f, const char* str, char** endptr) noexcept + { + f = std::strtold(str, endptr); + } + + /*! + @brief scan a number literal + + This function scans a string according to Sect. 6 of RFC 7159. + + The function is realized with a deterministic finite state machine derived + from the grammar described in RFC 7159. Starting in state "init", the + input is read and used to determined the next state. Only state "done" + accepts the number. State "error" is a trap state to model errors. In the + table below, "anything" means any character but the ones listed before. + + state | 0 | 1-9 | e E | + | - | . | anything + ---------|----------|----------|----------|---------|---------|----------|----------- + init | zero | any1 | [error] | [error] | minus | [error] | [error] + minus | zero | any1 | [error] | [error] | [error] | [error] | [error] + zero | done | done | exponent | done | done | decimal1 | done + any1 | any1 | any1 | exponent | done | done | decimal1 | done + decimal1 | decimal2 | [error] | [error] | [error] | [error] | [error] | [error] + decimal2 | decimal2 | decimal2 | exponent | done | done | done | done + exponent | any2 | any2 | [error] | sign | sign | [error] | [error] + sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] + any2 | any2 | any2 | done | done | done | done | done + + The state machine is realized with one label per state (prefixed with + "scan_number_") and `goto` statements between them. The state machine + contains cycles, but any cycle can be left when EOF is read. Therefore, + the function is guaranteed to terminate. + + During scanning, the read bytes are stored in yytext. This string is + then converted to a signed integer, an unsigned integer, or a + floating-point number. + + @return token_type::value_unsigned, token_type::value_integer, or + token_type::value_float if number could be successfully scanned, + token_type::parse_error otherwise + + @note The scanner is independent of the current locale. Internally, the + locale's decimal point is used instead of `.` to work with the + locale-dependent converters. + */ + token_type scan_number() + { + // reset yytext to store the number's bytes + reset(); + + // the type of the parsed number; initially set to unsigned; will be + // changed if minus sign, decimal point or exponent is read + token_type number_type = token_type::value_unsigned; + + // state (init): we just found out we need to scan a number + switch (current) + { + case '-': + { + add(current); + goto scan_number_minus; + } + + case '0': + { + add(current); + goto scan_number_zero; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + default: + { + // all other characters are rejected outside scan_number() + assert(false); // LCOV_EXCL_LINE + } + } + +scan_number_minus: + // state: we just parsed a leading minus sign + number_type = token_type::value_integer; + switch (get()) + { + case '0': + { + add(current); + goto scan_number_zero; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + default: + { + error_message = "invalid number; expected digit after '-'"; + return token_type::parse_error; + } + } + +scan_number_zero: + // state: we just parse a zero (maybe with a leading minus sign) + switch (get()) + { + case '.': + { + add(decimal_point_char); + goto scan_number_decimal1; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + goto scan_number_done; + } + +scan_number_any1: + // state: we just parsed a number 0-9 (maybe with a leading minus sign) + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + case '.': + { + add(decimal_point_char); + goto scan_number_decimal1; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + goto scan_number_done; + } + +scan_number_decimal1: + // state: we just parsed a decimal point + number_type = token_type::value_float; + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } + + default: + { + error_message = "invalid number; expected digit after '.'"; + return token_type::parse_error; + } + } + +scan_number_decimal2: + // we just parsed at least one number after a decimal point + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + goto scan_number_done; + } + +scan_number_exponent: + // we just parsed an exponent + number_type = token_type::value_float; + switch (get()) + { + case '+': + case '-': + { + add(current); + goto scan_number_sign; + } + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + { + error_message = + "invalid number; expected '+', '-', or digit after exponent"; + return token_type::parse_error; + } + } + +scan_number_sign: + // we just parsed an exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + { + error_message = "invalid number; expected digit after exponent sign"; + return token_type::parse_error; + } + } + +scan_number_any2: + // we just parsed a number after the exponent or exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + goto scan_number_done; + } + +scan_number_done: + // unget the character after the number (we only read it to know that + // we are done scanning a number) + --chars_read; + next_unget = true; + + // terminate token + add('\0'); + --yylen; + + char* endptr = nullptr; + errno = 0; + + // try to parse integers first and fall back to floats + if (number_type == token_type::value_unsigned) + { + const auto x = std::strtoull(yytext.data(), &endptr, 10); + + // we checked the number format before + assert(endptr == yytext.data() + yylen); + + if (errno == 0) + { + value_unsigned = static_cast(x); + if (value_unsigned == x) + { + return token_type::value_unsigned; + } + } + } + else if (number_type == token_type::value_integer) + { + const auto x = std::strtoll(yytext.data(), &endptr, 10); + + // we checked the number format before + assert(endptr == yytext.data() + yylen); + + if (errno == 0) + { + value_integer = static_cast(x); + if (value_integer == x) + { + return token_type::value_integer; + } + } + } + + // this code is reached if we parse a floating-point number or if an + // integer conversion above failed + strtof(value_float, yytext.data(), &endptr); + + // we checked the number format before + assert(endptr == yytext.data() + yylen); + + return token_type::value_float; + } + + /*! + @param[in] literal_text the literal text to expect + @param[in] length the length of the passed literal text + @param[in] return_type the token type to return on success + */ + token_type scan_literal(const char* literal_text, const std::size_t length, + token_type return_type) + { + assert(current == literal_text[0]); + for (std::size_t i = 1; i < length; ++i) + { + if (JSON_UNLIKELY(get() != literal_text[i])) + { + error_message = "invalid literal"; + return token_type::parse_error; + } + } + return return_type; + } + + ///////////////////// + // input management + ///////////////////// + + /// reset yytext + void reset() noexcept + { + yylen = 0; + start_pos = chars_read - 1; + } + + /// get a character from the input + int get() + { + ++chars_read; + return next_unget ? (next_unget = false, current) + : (current = ia->get_character()); + } + + /// add a character to yytext + void add(int c) + { + // resize yytext if necessary; this condition is deemed unlikely, + // because we start with a 1024-byte buffer + if (JSON_UNLIKELY((yylen + 1 > yytext.capacity()))) + { + yytext.resize(2 * yytext.capacity(), '\0'); + } + assert(yylen < yytext.size()); + yytext[yylen++] = static_cast(c); + } + + public: + ///////////////////// + // value getters + ///////////////////// + + /// return integer value + constexpr number_integer_t get_number_integer() const noexcept + { + return value_integer; + } + + /// return unsigned integer value + constexpr number_unsigned_t get_number_unsigned() const noexcept + { + return value_unsigned; + } + + /// return floating-point value + constexpr number_float_t get_number_float() const noexcept + { + return value_float; + } + + /// return string value + const std::string get_string() + { + // yytext cannot be returned as char*, because it may contain a null + // byte (parsed as "\u0000") + return std::string(yytext.data(), yylen); + } + + ///////////////////// + // diagnostics + ///////////////////// + + /// return position of last read token + constexpr std::size_t get_position() const noexcept + { + return chars_read; + } + + /// return the last read token (for errors only) + std::string get_token_string() const + { + // get the raw byte sequence of the last token + std::string s = ia->read(start_pos, chars_read - start_pos); + + // escape control characters + std::string result; + for (auto c : s) + { + if (c == '\0' or c == std::char_traits::eof()) + { + // ignore EOF + continue; + } + else if ('\x00' <= c and c <= '\x1f') + { + // escape control characters + std::stringstream ss; + ss << "(c) << ">"; + result += ss.str(); + } + else + { + // add character as is + result.push_back(c); + } + } + + return result; + } + + /// return syntax error message + constexpr const char* get_error_message() const noexcept + { + return error_message; + } + + ///////////////////// + // actual scanner + ///////////////////// + + token_type scan() + { + // read next character and ignore whitespace + do + { + get(); + } + while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + + switch (current) + { + // structural characters + case '[': + return token_type::begin_array; + case ']': + return token_type::end_array; + case '{': + return token_type::begin_object; + case '}': + return token_type::end_object; + case ':': + return token_type::name_separator; + case ',': + return token_type::value_separator; + + // literals + case 't': + return scan_literal("true", 4, token_type::literal_true); + case 'f': + return scan_literal("false", 5, token_type::literal_false); + case 'n': + return scan_literal("null", 4, token_type::literal_null); + + // string + case '\"': + return scan_string(); + + // number + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return scan_number(); + + // end of input (the null byte is needed when parsing from + // string literals) + case '\0': + case std::char_traits::eof(): + return token_type::end_of_input; + + // error + default: + error_message = "invalid literal"; + return token_type::parse_error; + } + } + + private: + /// input adapter + detail::input_adapter_t ia = nullptr; + + /// the current character + int current = std::char_traits::eof(); + + /// whether get() should return the last character again + bool next_unget = false; + + /// the number of characters read + std::size_t chars_read = 0; + /// the start position of the current token + std::size_t start_pos = 0; + + /// buffer for variable-length tokens (numbers, strings) + std::vector yytext = std::vector(1024, '\0'); + /// current index in yytext + std::size_t yylen = 0; + + /// a description of occurred lexer errors + const char* error_message = ""; + + // number values + number_integer_t value_integer = 0; + number_unsigned_t value_unsigned = 0; + number_float_t value_float = 0; + + /// the decimal point + const char decimal_point_char = '.'; +}; + +/*! +@brief syntax analysis + +This class implements a recursive decent parser. +*/ +template +class parser +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using lexer_t = lexer; + using token_type = typename lexer_t::token_type; + + public: + enum class parse_event_t : uint8_t + { + /// the parser read `{` and started to process a JSON object + object_start, + /// the parser read `}` and finished processing a JSON object + object_end, + /// the parser read `[` and started to process a JSON array + array_start, + /// the parser read `]` and finished processing a JSON array + array_end, + /// the parser read a key of a value in an object + key, + /// the parser finished reading a JSON value + value + }; + + using parser_callback_t = + std::function; + + /// a parser reading from an input adapter + explicit parser(detail::input_adapter_t adapter, + const parser_callback_t cb = nullptr, + const bool allow_exceptions_ = true) + : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_) + {} + + /*! + @brief public parser interface + + @param[in] strict whether to expect the last token to be EOF + @param[in,out] result parsed JSON value + + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + */ + void parse(const bool strict, BasicJsonType& result) + { + // read first token + get_token(); + + parse_internal(true, result); + result.assert_invariant(); + + // in strict mode, input must be completely read + if (strict) + { + get_token(); + expect(token_type::end_of_input); + } + + // in case of an error, return discarded value + if (errored) + { + result = value_t::discarded; + return; + } + + // set top-level value to null if it was discarded by the callback + // function + if (result.is_discarded()) + { + result = nullptr; + } + } + + /*! + @brief public accept interface + + @param[in] strict whether to expect the last token to be EOF + @return whether the input is a proper JSON text + */ + bool accept(const bool strict = true) + { + // read first token + get_token(); + + if (not accept_internal()) + { + return false; + } + + // strict => last token must be EOF + return not strict or (get_token() == token_type::end_of_input); + } + + private: + /*! + @brief the actual parser + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + */ + void parse_internal(bool keep, BasicJsonType& result) + { + // never parse after a parse error was detected + assert(not errored); + + // start with a discarded value + if (not result.is_discarded()) + { + result.m_value.destroy(result.m_type); + result.m_type = value_t::discarded; + } + + switch (last_token) + { + case token_type::begin_object: + { + if (keep and (not callback or ((keep = callback(depth++, parse_event_t::object_start, result))))) + { + // explicitly set result to object to cope with {} + result.m_type = value_t::object; + result.m_value = value_t::object; + } + + // read next token + get_token(); + + // closing } -> we are done + if (last_token == token_type::end_object) + { + if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) + { + result.m_value.destroy(result.m_type); + result.m_type = value_t::discarded; + } + break; + } + + // parse values + std::string key; + BasicJsonType value; + while (true) + { + // store key + if (not expect(token_type::value_string)) + { + return; + } + key = m_lexer.get_string(); + + bool keep_tag = false; + if (keep) + { + if (callback) + { + BasicJsonType k(key); + keep_tag = callback(depth, parse_event_t::key, k); + } + else + { + keep_tag = true; + } + } + + // parse separator (:) + get_token(); + if (not expect(token_type::name_separator)) + { + return; + } + + // parse and add value + get_token(); + value.m_value.destroy(value.m_type); + value.m_type = value_t::discarded; + parse_internal(keep, value); + + if (JSON_UNLIKELY(errored)) + { + return; + } + + if (keep and keep_tag and not value.is_discarded()) + { + result.m_value.object->emplace(std::move(key), std::move(value)); + } + + // comma -> next value + get_token(); + if (last_token == token_type::value_separator) + { + get_token(); + continue; + } + + // closing } + if (not expect(token_type::end_object)) + { + return; + } + break; + } + + if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) + { + result.m_value.destroy(result.m_type); + result.m_type = value_t::discarded; + } + break; + } + + case token_type::begin_array: + { + if (keep and (not callback or ((keep = callback(depth++, parse_event_t::array_start, result))))) + { + // explicitly set result to object to cope with [] + result.m_type = value_t::array; + result.m_value = value_t::array; + } + + // read next token + get_token(); + + // closing ] -> we are done + if (last_token == token_type::end_array) + { + if (callback and not callback(--depth, parse_event_t::array_end, result)) + { + result.m_value.destroy(result.m_type); + result.m_type = value_t::discarded; + } + break; + } + + // parse values + BasicJsonType value; + while (true) + { + // parse value + value.m_value.destroy(value.m_type); + value.m_type = value_t::discarded; + parse_internal(keep, value); + + if (JSON_UNLIKELY(errored)) + { + return; + } + + if (keep and not value.is_discarded()) + { + result.m_value.array->push_back(std::move(value)); + } + + // comma -> next value + get_token(); + if (last_token == token_type::value_separator) + { + get_token(); + continue; + } + + // closing ] + if (not expect(token_type::end_array)) + { + return; + } + break; + } + + if (keep and callback and not callback(--depth, parse_event_t::array_end, result)) + { + result.m_value.destroy(result.m_type); + result.m_type = value_t::discarded; + } + break; + } + + case token_type::literal_null: + { + result.m_type = value_t::null; + break; + } + + case token_type::value_string: + { + result.m_type = value_t::string; + result.m_value = m_lexer.get_string(); + break; + } + + case token_type::literal_true: + { + result.m_type = value_t::boolean; + result.m_value = true; + break; + } + + case token_type::literal_false: + { + result.m_type = value_t::boolean; + result.m_value = false; + break; + } + + case token_type::value_unsigned: + { + result.m_type = value_t::number_unsigned; + result.m_value = m_lexer.get_number_unsigned(); + break; + } + + case token_type::value_integer: + { + result.m_type = value_t::number_integer; + result.m_value = m_lexer.get_number_integer(); + break; + } + + case token_type::value_float: + { + result.m_type = value_t::number_float; + result.m_value = m_lexer.get_number_float(); + + // throw in case of infinity or NAN + if (JSON_UNLIKELY(not std::isfinite(result.m_value.number_float))) + { + if (allow_exceptions) + { + JSON_THROW(out_of_range::create(406, "number overflow parsing '" + + m_lexer.get_token_string() + "'")); + } + expect(token_type::uninitialized); + } + break; + } + + case token_type::parse_error: + { + // using "uninitialized" to avoid "expected" message + if (not expect(token_type::uninitialized)) + { + return; + } + break; // LCOV_EXCL_LINE + } + + default: + { + // the last token was unexpected; we expected a value + if (not expect(token_type::literal_or_value)) + { + return; + } + break; // LCOV_EXCL_LINE + } + } + + if (keep and callback and not callback(depth, parse_event_t::value, result)) + { + result.m_type = value_t::discarded; + } + } + + /*! + @brief the acutal acceptor + + @invariant 1. The last token is not yet processed. Therefore, the caller + of this function must make sure a token has been read. + 2. When this function returns, the last token is processed. + That is, the last read character was already considered. + + This invariant makes sure that no token needs to be "unput". + */ + bool accept_internal() + { + switch (last_token) + { + case token_type::begin_object: + { + // read next token + get_token(); + + // closing } -> we are done + if (last_token == token_type::end_object) + { + return true; + } + + // parse values + while (true) + { + // parse key + if (last_token != token_type::value_string) + { + return false; + } + + // parse separator (:) + get_token(); + if (last_token != token_type::name_separator) + { + return false; + } + + // parse value + get_token(); + if (not accept_internal()) + { + return false; + } + + // comma -> next value + get_token(); + if (last_token == token_type::value_separator) + { + get_token(); + continue; + } + + // closing } + return (last_token == token_type::end_object); + } + } + + case token_type::begin_array: + { + // read next token + get_token(); + + // closing ] -> we are done + if (last_token == token_type::end_array) + { + return true; + } + + // parse values + while (true) + { + // parse value + if (not accept_internal()) + { + return false; + } + + // comma -> next value + get_token(); + if (last_token == token_type::value_separator) + { + get_token(); + continue; + } + + // closing ] + return (last_token == token_type::end_array); + } + } + + case token_type::value_float: + { + // reject infinity or NAN + return std::isfinite(m_lexer.get_number_float()); + } + + case token_type::literal_false: + case token_type::literal_null: + case token_type::literal_true: + case token_type::value_integer: + case token_type::value_string: + case token_type::value_unsigned: + return true; + + default: // the last token was unexpected + return false; + } + } + + /// get next token from lexer + token_type get_token() + { + return (last_token = m_lexer.scan()); + } + + /*! + @throw parse_error.101 if expected token did not occur + */ + bool expect(token_type t) + { + if (JSON_UNLIKELY(t != last_token)) + { + errored = true; + expected = t; + if (allow_exceptions) + { + throw_exception(); + } + else + { + return false; + } + } + + return true; + } + + [[noreturn]] void throw_exception() const + { + std::string error_msg = "syntax error - "; + if (last_token == token_type::parse_error) + { + error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" + + m_lexer.get_token_string() + "'"; + } + else + { + error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token)); + } + + if (expected != token_type::uninitialized) + { + error_msg += "; expected " + std::string(lexer_t::token_type_name(expected)); + } + + JSON_THROW(parse_error::create(101, m_lexer.get_position(), error_msg)); + } + + private: + /// current level of recursion + int depth = 0; + /// callback function + const parser_callback_t callback = nullptr; + /// the type of the last read token + token_type last_token = token_type::uninitialized; + /// the lexer + lexer_t m_lexer; + /// whether a syntax error occurred + bool errored = false; + /// possible reason for the syntax error + token_type expected = token_type::uninitialized; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; +}; + +/////////////// +// iterators // +/////////////// + +/*! +@brief an iterator for primitive JSON types + +This class models an iterator for primitive JSON types (boolean, number, +string). It's only purpose is to allow the iterator/const_iterator classes +to "iterate" over primitive values. Internally, the iterator is modeled by +a `difference_type` variable. Value begin_value (`0`) models the begin, +end_value (`1`) models past the end. +*/ +class primitive_iterator_t +{ + public: + using difference_type = std::ptrdiff_t; + + constexpr difference_type get_value() const noexcept + { + return m_it; + } + + /// set iterator to a defined beginning + void set_begin() noexcept + { + m_it = begin_value; + } + + /// set iterator to a defined past the end + void set_end() noexcept + { + m_it = end_value; + } + + /// return whether the iterator can be dereferenced + constexpr bool is_begin() const noexcept + { + return (m_it == begin_value); + } + + /// return whether the iterator is at end + constexpr bool is_end() const noexcept + { + return (m_it == end_value); + } + + friend constexpr bool operator==(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return (lhs.m_it == rhs.m_it); + } + + friend constexpr bool operator!=(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return not(lhs == rhs); + } + + friend constexpr bool operator<(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it < rhs.m_it; + } + + friend constexpr bool operator<=(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it <= rhs.m_it; + } + + friend constexpr bool operator>(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it > rhs.m_it; + } + + friend constexpr bool operator>=(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it >= rhs.m_it; + } + + primitive_iterator_t operator+(difference_type i) + { + auto result = *this; + result += i; + return result; + } + + friend constexpr difference_type operator-(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it - rhs.m_it; + } + + friend std::ostream& operator<<(std::ostream& os, primitive_iterator_t it) + { + return os << it.m_it; + } + + primitive_iterator_t& operator++() + { + ++m_it; + return *this; + } + + primitive_iterator_t operator++(int) + { + auto result = *this; + m_it++; + return result; + } + + primitive_iterator_t& operator--() + { + --m_it; + return *this; + } + + primitive_iterator_t operator--(int) + { + auto result = *this; + m_it--; + return result; + } + + primitive_iterator_t& operator+=(difference_type n) + { + m_it += n; + return *this; + } + + primitive_iterator_t& operator-=(difference_type n) + { + m_it -= n; + return *this; + } + + private: + static constexpr difference_type begin_value = 0; + static constexpr difference_type end_value = begin_value + 1; + + /// iterator as signed integer type + difference_type m_it = std::numeric_limits::denorm_min(); +}; + +/*! +@brief an iterator value + +@note This structure could easily be a union, but MSVC currently does not allow +unions members with complex constructors, see https://github.com/nlohmann/json/pull/105. +*/ +template struct internal_iterator +{ + /// iterator for JSON objects + typename BasicJsonType::object_t::iterator object_iterator {}; + /// iterator for JSON arrays + typename BasicJsonType::array_t::iterator array_iterator {}; + /// generic iterator for all other types + primitive_iterator_t primitive_iterator {}; +}; + +template class iteration_proxy; + +/*! +@brief a template for a random access iterator for the @ref basic_json class + +This class implements a both iterators (iterator and const_iterator) for the +@ref basic_json class. + +@note An iterator is called *initialized* when a pointer to a JSON value has + been set (e.g., by a constructor or a copy assignment). If the iterator is + default-constructed, it is *uninitialized* and most methods are undefined. + **The library uses assertions to detect calls on uninitialized iterators.** + +@requirement The class satisfies the following concept requirements: +- +[RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator): + The iterator that can be moved to point (forward and backward) to any + element in constant time. + +@since version 1.0.0, simplified in version 2.0.9 +*/ +template +class iter_impl : public std::iterator +{ + /// allow basic_json to access private members + friend iter_impl::value, typename std::remove_const::type, const BasicJsonType>::type>; + friend BasicJsonType; + friend iteration_proxy; + + using object_t = typename BasicJsonType::object_t; + using array_t = typename BasicJsonType::array_t; + // make sure BasicJsonType is basic_json or const basic_json + static_assert(is_basic_json::type>::value, + "iter_impl only accepts (const) basic_json"); + + public: + /// the type of the values when the iterator is dereferenced + using value_type = typename BasicJsonType::value_type; + /// a type to represent differences between iterators + using difference_type = typename BasicJsonType::difference_type; + /// defines a pointer to the type iterated over (value_type) + using pointer = typename std::conditional::value, + typename BasicJsonType::const_pointer, + typename BasicJsonType::pointer>::type; + /// defines a reference to the type iterated over (value_type) + using reference = + typename std::conditional::value, + typename BasicJsonType::const_reference, + typename BasicJsonType::reference>::type; + /// the category of the iterator + using iterator_category = std::bidirectional_iterator_tag; + + /// default constructor + iter_impl() = default; + + /*! + @brief constructor for a given JSON instance + @param[in] object pointer to a JSON object for this iterator + @pre object != nullptr + @post The iterator is initialized; i.e. `m_object != nullptr`. + */ + explicit iter_impl(pointer object) noexcept : m_object(object) + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + m_it.object_iterator = typename object_t::iterator(); + break; + } + + case value_t::array: + { + m_it.array_iterator = typename array_t::iterator(); + break; + } + + default: + { + m_it.primitive_iterator = primitive_iterator_t(); + break; + } + } + } + + /*! + @note The conventional copy constructor and copy assignment are implicitly + defined. Combined with the following converting constructor and + assignment, they support: (1) copy from iterator to iterator, (2) + copy from const iterator to const iterator, and (3) conversion from + iterator to const iterator. However conversion from const iterator + to iterator is not defined. + */ + + /*! + @brief converting constructor + @param[in] other non-const iterator to copy from + @note It is not checked whether @a other is initialized. + */ + iter_impl(const iter_impl::type>& other) noexcept + : m_object(other.m_object), m_it(other.m_it) {} + + /*! + @brief converting assignment + @param[in,out] other non-const iterator to copy from + @return const/non-const iterator + @note It is not checked whether @a other is initialized. + */ + iter_impl& operator=(const iter_impl::type>& other) noexcept + { + m_object = other.m_object; + m_it = other.m_it; + return *this; + } + + private: + /*! + @brief set the iterator to the first value + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + void set_begin() noexcept + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + m_it.object_iterator = m_object->m_value.object->begin(); + break; + } + + case value_t::array: + { + m_it.array_iterator = m_object->m_value.array->begin(); + break; + } + + case value_t::null: + { + // set to end so begin()==end() is true: null is empty + m_it.primitive_iterator.set_end(); + break; + } + + default: + { + m_it.primitive_iterator.set_begin(); + break; + } + } + } + + /*! + @brief set the iterator past the last value + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + void set_end() noexcept + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + m_it.object_iterator = m_object->m_value.object->end(); + break; + } + + case value_t::array: + { + m_it.array_iterator = m_object->m_value.array->end(); + break; + } + + default: + { + m_it.primitive_iterator.set_end(); + break; + } + } + } + + public: + /*! + @brief return a reference to the value pointed to by the iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + reference operator*() const + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + assert(m_it.object_iterator != m_object->m_value.object->end()); + return m_it.object_iterator->second; + } + + case value_t::array: + { + assert(m_it.array_iterator != m_object->m_value.array->end()); + return *m_it.array_iterator; + } + + case value_t::null: + JSON_THROW(invalid_iterator::create(214, "cannot get value")); + + default: + { + if (JSON_LIKELY(m_it.primitive_iterator.is_begin())) + { + return *m_object; + } + + JSON_THROW(invalid_iterator::create(214, "cannot get value")); + } + } + } + + /*! + @brief dereference the iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + pointer operator->() const + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + assert(m_it.object_iterator != m_object->m_value.object->end()); + return &(m_it.object_iterator->second); + } + + case value_t::array: + { + assert(m_it.array_iterator != m_object->m_value.array->end()); + return &*m_it.array_iterator; + } + + default: + { + if (JSON_LIKELY(m_it.primitive_iterator.is_begin())) + { + return m_object; + } + + JSON_THROW(invalid_iterator::create(214, "cannot get value")); + } + } + } + + /*! + @brief post-increment (it++) + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl operator++(int) + { + auto result = *this; + ++(*this); + return result; + } + + /*! + @brief pre-increment (++it) + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl& operator++() + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + std::advance(m_it.object_iterator, 1); + break; + } + + case value_t::array: + { + std::advance(m_it.array_iterator, 1); + break; + } + + default: + { + ++m_it.primitive_iterator; + break; + } + } + + return *this; + } + + /*! + @brief post-decrement (it--) + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl operator--(int) + { + auto result = *this; + --(*this); + return result; + } + + /*! + @brief pre-decrement (--it) + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl& operator--() + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + std::advance(m_it.object_iterator, -1); + break; + } + + case value_t::array: + { + std::advance(m_it.array_iterator, -1); + break; + } + + default: + { + --m_it.primitive_iterator; + break; + } + } + + return *this; + } + + /*! + @brief comparison: equal + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + bool operator==(const iter_impl& other) const + { + // if objects are not the same, the comparison is undefined + if (JSON_UNLIKELY(m_object != other.m_object)) + { + JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers")); + } + + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + return (m_it.object_iterator == other.m_it.object_iterator); + + case value_t::array: + return (m_it.array_iterator == other.m_it.array_iterator); + + default: + return (m_it.primitive_iterator == other.m_it.primitive_iterator); + } + } + + /*! + @brief comparison: not equal + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + bool operator!=(const iter_impl& other) const + { + return not operator==(other); + } + + /*! + @brief comparison: smaller + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + bool operator<(const iter_impl& other) const + { + // if objects are not the same, the comparison is undefined + if (JSON_UNLIKELY(m_object != other.m_object)) + { + JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers")); + } + + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + JSON_THROW(invalid_iterator::create(213, "cannot compare order of object iterators")); + + case value_t::array: + return (m_it.array_iterator < other.m_it.array_iterator); + + default: + return (m_it.primitive_iterator < other.m_it.primitive_iterator); + } + } + + /*! + @brief comparison: less than or equal + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + bool operator<=(const iter_impl& other) const + { + return not other.operator < (*this); + } + + /*! + @brief comparison: greater than + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + bool operator>(const iter_impl& other) const + { + return not operator<=(other); + } + + /*! + @brief comparison: greater than or equal + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + bool operator>=(const iter_impl& other) const + { + return not operator<(other); + } + + /*! + @brief add to iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl& operator+=(difference_type i) + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + JSON_THROW(invalid_iterator::create(209, "cannot use offsets with object iterators")); + + case value_t::array: + { + std::advance(m_it.array_iterator, i); + break; + } + + default: + { + m_it.primitive_iterator += i; + break; + } + } + + return *this; + } + + /*! + @brief subtract from iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl& operator-=(difference_type i) + { + return operator+=(-i); + } + + /*! + @brief add to iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl operator+(difference_type i) const + { + auto result = *this; + result += i; + return result; + } + + /*! + @brief addition of distance and iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + friend iter_impl operator+(difference_type i, const iter_impl& it) + { + auto result = it; + result += i; + return result; + } + + /*! + @brief subtract from iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl operator-(difference_type i) const + { + auto result = *this; + result -= i; + return result; + } + + /*! + @brief return difference + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + difference_type operator-(const iter_impl& other) const + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + JSON_THROW(invalid_iterator::create(209, "cannot use offsets with object iterators")); + + case value_t::array: + return m_it.array_iterator - other.m_it.array_iterator; + + default: + return m_it.primitive_iterator - other.m_it.primitive_iterator; + } + } + + /*! + @brief access to successor + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + reference operator[](difference_type n) const + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + JSON_THROW(invalid_iterator::create(208, "cannot use operator[] for object iterators")); + + case value_t::array: + return *std::next(m_it.array_iterator, n); + + case value_t::null: + JSON_THROW(invalid_iterator::create(214, "cannot get value")); + + default: + { + if (JSON_LIKELY(m_it.primitive_iterator.get_value() == -n)) + { + return *m_object; + } + + JSON_THROW(invalid_iterator::create(214, "cannot get value")); + } + } + } + + /*! + @brief return the key of an object iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + typename object_t::key_type key() const + { + assert(m_object != nullptr); + + if (JSON_LIKELY(m_object->is_object())) + { + return m_it.object_iterator->first; + } + + JSON_THROW(invalid_iterator::create(207, "cannot use key() for non-object iterators")); + } + + /*! + @brief return the value of an iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + reference value() const + { + return operator*(); + } + + private: + /// associated JSON instance + pointer m_object = nullptr; + /// the actual iterator of the associated instance + internal_iterator::type> m_it = {}; +}; + +/// proxy class for the iterator_wrapper functions +template class iteration_proxy +{ + private: + /// helper class for iteration + class iteration_proxy_internal + { + private: + /// the iterator + IteratorType anchor; + /// an index for arrays (used to create key names) + std::size_t array_index = 0; + + public: + explicit iteration_proxy_internal(IteratorType it) noexcept : anchor(it) {} + + /// dereference operator (needed for range-based for) + iteration_proxy_internal& operator*() + { + return *this; + } + + /// increment operator (needed for range-based for) + iteration_proxy_internal& operator++() + { + ++anchor; + ++array_index; + + return *this; + } + + /// inequality operator (needed for range-based for) + bool operator!=(const iteration_proxy_internal& o) const noexcept + { + return anchor != o.anchor; + } + + /// return key of the iterator + std::string key() const + { + assert(anchor.m_object != nullptr); + + switch (anchor.m_object->type()) + { + // use integer array index as key + case value_t::array: + return std::to_string(array_index); + + // use key from the object + case value_t::object: + return anchor.key(); + + // use an empty key for all primitive types + default: + return ""; + } + } + + /// return value of the iterator + typename IteratorType::reference value() const + { + return anchor.value(); + } + }; + + /// the container to iterate + typename IteratorType::reference container; + + public: + /// construct iteration proxy from a container + explicit iteration_proxy(typename IteratorType::reference cont) + : container(cont) {} + + /// return iterator begin (needed for range-based for) + iteration_proxy_internal begin() noexcept + { + return iteration_proxy_internal(container.begin()); + } + + /// return iterator end (needed for range-based for) + iteration_proxy_internal end() noexcept + { + return iteration_proxy_internal(container.end()); + } +}; + +/*! +@brief a template for a reverse iterator class + +@tparam Base the base iterator type to reverse. Valid types are @ref +iterator (to create @ref reverse_iterator) and @ref const_iterator (to +create @ref const_reverse_iterator). + +@requirement The class satisfies the following concept requirements: +- +[RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator): + The iterator that can be moved to point (forward and backward) to any + element in constant time. +- [OutputIterator](http://en.cppreference.com/w/cpp/concept/OutputIterator): + It is possible to write to the pointed-to element (only if @a Base is + @ref iterator). + +@since version 1.0.0 +*/ +template +class json_reverse_iterator : public std::reverse_iterator +{ + public: + using difference_type = std::ptrdiff_t; + /// shortcut to the reverse iterator adaptor + using base_iterator = std::reverse_iterator; + /// the reference type for the pointed-to element + using reference = typename Base::reference; + + /// create reverse iterator from iterator + json_reverse_iterator(const typename base_iterator::iterator_type& it) noexcept + : base_iterator(it) {} + + /// create reverse iterator from base class + json_reverse_iterator(const base_iterator& it) noexcept : base_iterator(it) {} + + /// post-increment (it++) + json_reverse_iterator operator++(int) + { + return static_cast(base_iterator::operator++(1)); + } + + /// pre-increment (++it) + json_reverse_iterator& operator++() + { + return static_cast(base_iterator::operator++()); + } + + /// post-decrement (it--) + json_reverse_iterator operator--(int) + { + return static_cast(base_iterator::operator--(1)); + } + + /// pre-decrement (--it) + json_reverse_iterator& operator--() + { + return static_cast(base_iterator::operator--()); + } + + /// add to iterator + json_reverse_iterator& operator+=(difference_type i) + { + return static_cast(base_iterator::operator+=(i)); + } + + /// add to iterator + json_reverse_iterator operator+(difference_type i) const + { + return static_cast(base_iterator::operator+(i)); + } + + /// subtract from iterator + json_reverse_iterator operator-(difference_type i) const + { + return static_cast(base_iterator::operator-(i)); + } + + /// return difference + difference_type operator-(const json_reverse_iterator& other) const + { + return base_iterator(*this) - base_iterator(other); + } + + /// access to successor + reference operator[](difference_type n) const + { + return *(this->operator+(n)); + } + + /// return the key of an object iterator + auto key() const -> decltype(std::declval().key()) + { + auto it = --this->base(); + return it.key(); + } + + /// return the value of an iterator + reference value() const + { + auto it = --this->base(); + return it.operator * (); + } +}; + +///////////////////// +// output adapters // +///////////////////// + +/// abstract output adapter interface +template struct output_adapter_protocol +{ + virtual void write_character(CharType c) = 0; + virtual void write_characters(const CharType* s, std::size_t length) = 0; + virtual ~output_adapter_protocol() = default; +}; + +/// a type to simplify interfaces +template +using output_adapter_t = std::shared_ptr>; + +/// output adapter for byte vectors +template +class output_vector_adapter : public output_adapter_protocol +{ + public: + explicit output_vector_adapter(std::vector& vec) : v(vec) {} + + void write_character(CharType c) override + { + v.push_back(c); + } + + void write_characters(const CharType* s, std::size_t length) override + { + std::copy(s, s + length, std::back_inserter(v)); + } + + private: + std::vector& v; +}; + +/// output adapter for output streams +template +class output_stream_adapter : public output_adapter_protocol +{ + public: + explicit output_stream_adapter(std::basic_ostream& s) : stream(s) {} + + void write_character(CharType c) override + { + stream.put(c); + } + + void write_characters(const CharType* s, std::size_t length) override + { + stream.write(s, static_cast(length)); + } + + private: + std::basic_ostream& stream; +}; + +/// output adapter for basic_string +template +class output_string_adapter : public output_adapter_protocol +{ + public: + explicit output_string_adapter(std::basic_string& s) : str(s) {} + + void write_character(CharType c) override + { + str.push_back(c); + } + + void write_characters(const CharType* s, std::size_t length) override + { + str.append(s, length); + } + + private: + std::basic_string& str; +}; + +template +class output_adapter +{ + public: + output_adapter(std::vector& vec) + : oa(std::make_shared>(vec)) {} + + output_adapter(std::basic_ostream& s) + : oa(std::make_shared>(s)) {} + + output_adapter(std::basic_string& s) + : oa(std::make_shared>(s)) {} + + operator output_adapter_t() + { + return oa; + } + + private: + output_adapter_t oa = nullptr; +}; + +////////////////////////////// +// binary reader and writer // +////////////////////////////// + +/*! +@brief deserialization of CBOR and MessagePack values +*/ +template +class binary_reader +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + + public: + /*! + @brief create a binary reader + + @param[in] adapter input adapter to read from + */ + explicit binary_reader(input_adapter_t adapter) : ia(std::move(adapter)) + { + assert(ia); + } + + /*! + @brief create a JSON value from CBOR input + + @param[in] strict whether to expect the input to be consumed completed + @return JSON value created from CBOR input + + @throw parse_error.110 if input ended unexpectedly or the end of file was + not reached when @a strict was set to true + @throw parse_error.112 if unsupported byte was read + */ + BasicJsonType parse_cbor(const bool strict) + { + const auto res = parse_cbor_internal(); + if (strict) + { + get(); + check_eof(true); + } + return res; + } + + /*! + @brief create a JSON value from MessagePack input + + @param[in] strict whether to expect the input to be consumed completed + @return JSON value created from MessagePack input + + @throw parse_error.110 if input ended unexpectedly or the end of file was + not reached when @a strict was set to true + @throw parse_error.112 if unsupported byte was read + */ + BasicJsonType parse_msgpack(const bool strict) + { + const auto res = parse_msgpack_internal(); + if (strict) + { + get(); + check_eof(true); + } + return res; + } + + /*! + @brief determine system byte order + + @return true if and only if system's byte order is little endian + + @note from http://stackoverflow.com/a/1001328/266378 + */ + static constexpr bool little_endianess(int num = 1) noexcept + { + return (*reinterpret_cast(&num) == 1); + } + + private: + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + */ + BasicJsonType parse_cbor_internal(const bool get_char = true) + { + switch (get_char ? get() : current) + { + // EOF + case std::char_traits::eof(): + JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + + // Integer 0x00..0x17 (0..23) + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0a: + case 0x0b: + case 0x0c: + case 0x0d: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + return static_cast(current); + + case 0x18: // Unsigned integer (one-byte uint8_t follows) + return get_number(); + + case 0x19: // Unsigned integer (two-byte uint16_t follows) + return get_number(); + + case 0x1a: // Unsigned integer (four-byte uint32_t follows) + return get_number(); + + case 0x1b: // Unsigned integer (eight-byte uint64_t follows) + return get_number(); + + // Negative integer -1-0x00..-1-0x17 (-1..-24) + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + return static_cast(0x20 - 1 - current); + + case 0x38: // Negative integer (one-byte uint8_t follows) + { + // must be uint8_t ! + return static_cast(-1) - get_number(); + } + + case 0x39: // Negative integer -1-n (two-byte uint16_t follows) + { + return static_cast(-1) - get_number(); + } + + case 0x3a: // Negative integer -1-n (four-byte uint32_t follows) + { + return static_cast(-1) - get_number(); + } + + case 0x3b: // Negative integer -1-n (eight-byte uint64_t follows) + { + return static_cast(-1) - + static_cast(get_number()); + } + + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + case 0x7a: // UTF-8 string (four-byte uint32_t for n follow) + case 0x7b: // UTF-8 string (eight-byte uint64_t for n follow) + case 0x7f: // UTF-8 string (indefinite length) + { + return get_cbor_string(); + } + + // array (0x00..0x17 data items follow) + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8a: + case 0x8b: + case 0x8c: + case 0x8d: + case 0x8e: + case 0x8f: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + { + return get_cbor_array(current & 0x1f); + } + + case 0x98: // array (one-byte uint8_t for n follows) + { + return get_cbor_array(get_number()); + } + + case 0x99: // array (two-byte uint16_t for n follow) + { + return get_cbor_array(get_number()); + } + + case 0x9a: // array (four-byte uint32_t for n follow) + { + return get_cbor_array(get_number()); + } + + case 0x9b: // array (eight-byte uint64_t for n follow) + { + return get_cbor_array(get_number()); + } + + case 0x9f: // array (indefinite length) + { + BasicJsonType result = value_t::array; + while (get() != 0xff) + { + result.push_back(parse_cbor_internal(false)); + } + return result; + } + + // map (0x00..0x17 pairs of data items follow) + case 0xa0: + case 0xa1: + case 0xa2: + case 0xa3: + case 0xa4: + case 0xa5: + case 0xa6: + case 0xa7: + case 0xa8: + case 0xa9: + case 0xaa: + case 0xab: + case 0xac: + case 0xad: + case 0xae: + case 0xaf: + case 0xb0: + case 0xb1: + case 0xb2: + case 0xb3: + case 0xb4: + case 0xb5: + case 0xb6: + case 0xb7: + { + return get_cbor_object(current & 0x1f); + } + + case 0xb8: // map (one-byte uint8_t for n follows) + { + return get_cbor_object(get_number()); + } + + case 0xb9: // map (two-byte uint16_t for n follow) + { + return get_cbor_object(get_number()); + } + + case 0xba: // map (four-byte uint32_t for n follow) + { + return get_cbor_object(get_number()); + } + + case 0xbb: // map (eight-byte uint64_t for n follow) + { + return get_cbor_object(get_number()); + } + + case 0xbf: // map (indefinite length) + { + BasicJsonType result = value_t::object; + while (get() != 0xff) + { + auto key = get_cbor_string(); + result[key] = parse_cbor_internal(); + } + return result; + } + + case 0xf4: // false + { + return false; + } + + case 0xf5: // true + { + return true; + } + + case 0xf6: // null + { + return value_t::null; + } + + case 0xf9: // Half-Precision Float (two-byte IEEE 754) + { + const int byte1 = get(); + check_eof(); + const int byte2 = get(); + check_eof(); + + // code from RFC 7049, Appendix D, Figure 3: + // As half-precision floating-point numbers were only added + // to IEEE 754 in 2008, today's programming platforms often + // still only have limited support for them. It is very + // easy to include at least decoding support for them even + // without such support. An example of a small decoder for + // half-precision floating-point numbers in the C language + // is shown in Fig. 3. + const int half = (byte1 << 8) + byte2; + const int exp = (half >> 10) & 0x1f; + const int mant = half & 0x3ff; + double val; + if (exp == 0) + { + val = std::ldexp(mant, -24); + } + else if (exp != 31) + { + val = std::ldexp(mant + 1024, exp - 25); + } + else + { + val = (mant == 0) ? std::numeric_limits::infinity() + : std::numeric_limits::quiet_NaN(); + } + return (half & 0x8000) != 0 ? -val : val; + } + + case 0xfa: // Single-Precision Float (four-byte IEEE 754) + { + return get_number(); + } + + case 0xfb: // Double-Precision Float (eight-byte IEEE 754) + { + return get_number(); + } + + default: // anything else (0xFF is handled inside the other types) + { + std::stringstream ss; + ss << std::setw(2) << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + ss.str())); + } + } + } + + BasicJsonType parse_msgpack_internal() + { + switch (get()) + { + // EOF + case std::char_traits::eof(): + JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + + // positive fixint + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0a: + case 0x0b: + case 0x0c: + case 0x0d: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3a: + case 0x3b: + case 0x3c: + case 0x3d: + case 0x3e: + case 0x3f: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5a: + case 0x5b: + case 0x5c: + case 0x5d: + case 0x5e: + case 0x5f: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7a: + case 0x7b: + case 0x7c: + case 0x7d: + case 0x7e: + case 0x7f: + return static_cast(current); + + // fixmap + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8a: + case 0x8b: + case 0x8c: + case 0x8d: + case 0x8e: + case 0x8f: + { + return get_msgpack_object(current & 0x0f); + } + + // fixarray + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9a: + case 0x9b: + case 0x9c: + case 0x9d: + case 0x9e: + case 0x9f: + { + return get_msgpack_array(current & 0x0f); + } + + // fixstr + case 0xa0: + case 0xa1: + case 0xa2: + case 0xa3: + case 0xa4: + case 0xa5: + case 0xa6: + case 0xa7: + case 0xa8: + case 0xa9: + case 0xaa: + case 0xab: + case 0xac: + case 0xad: + case 0xae: + case 0xaf: + case 0xb0: + case 0xb1: + case 0xb2: + case 0xb3: + case 0xb4: + case 0xb5: + case 0xb6: + case 0xb7: + case 0xb8: + case 0xb9: + case 0xba: + case 0xbb: + case 0xbc: + case 0xbd: + case 0xbe: + case 0xbf: + return get_msgpack_string(); + + case 0xc0: // nil + return value_t::null; + + case 0xc2: // false + return false; + + case 0xc3: // true + return true; + + case 0xca: // float 32 + return get_number(); + + case 0xcb: // float 64 + return get_number(); + + case 0xcc: // uint 8 + return get_number(); + + case 0xcd: // uint 16 + return get_number(); + + case 0xce: // uint 32 + return get_number(); + + case 0xcf: // uint 64 + return get_number(); + + case 0xd0: // int 8 + return get_number(); + + case 0xd1: // int 16 + return get_number(); + + case 0xd2: // int 32 + return get_number(); + + case 0xd3: // int 64 + return get_number(); + + case 0xd9: // str 8 + case 0xda: // str 16 + case 0xdb: // str 32 + return get_msgpack_string(); + + case 0xdc: // array 16 + { + return get_msgpack_array(get_number()); + } + + case 0xdd: // array 32 + { + return get_msgpack_array(get_number()); + } + + case 0xde: // map 16 + { + return get_msgpack_object(get_number()); + } + + case 0xdf: // map 32 + { + return get_msgpack_object(get_number()); + } + + // positive fixint + case 0xe0: + case 0xe1: + case 0xe2: + case 0xe3: + case 0xe4: + case 0xe5: + case 0xe6: + case 0xe7: + case 0xe8: + case 0xe9: + case 0xea: + case 0xeb: + case 0xec: + case 0xed: + case 0xee: + case 0xef: + case 0xf0: + case 0xf1: + case 0xf2: + case 0xf3: + case 0xf4: + case 0xf5: + case 0xf6: + case 0xf7: + case 0xf8: + case 0xf9: + case 0xfa: + case 0xfb: + case 0xfc: + case 0xfd: + case 0xfe: + case 0xff: + return static_cast(current); + + default: // anything else + { + std::stringstream ss; + ss << std::setw(2) << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(112, chars_read, + "error reading MessagePack; last byte: 0x" + ss.str())); + } + } + } + + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns + `std::char_traits::eof()` in that case. + + @return character read from the input + */ + int get() + { + ++chars_read; + return (current = ia->get_character()); + } + + /* + @brief read a number from the input + + @tparam NumberType the type of the number + + @return number of type @a NumberType + + @note This function needs to respect the system's endianess, because + bytes in CBOR and MessagePack are stored in network order (big + endian) and therefore need reordering on little endian systems. + + @throw parse_error.110 if input has less than `sizeof(NumberType)` bytes + */ + template NumberType get_number() + { + // step 1: read input into array with system's byte order + std::array vec; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + check_eof(); + + // reverse byte order prior to conversion if necessary + if (is_little_endian) + { + vec[sizeof(NumberType) - i - 1] = static_cast(current); + } + else + { + vec[i] = static_cast(current); // LCOV_EXCL_LINE + } + } + + // step 2: convert array into number of type T and return + NumberType result; + std::memcpy(&result, vec.data(), sizeof(NumberType)); + return result; + } + + /*! + @brief create a string by reading characters from the input + + @param[in] len number of bytes to read + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref check_eof() detects the end of + the input before we run out of string memory. + + @return string created by reading @a len bytes + + @throw parse_error.110 if input has less than @a len bytes + */ + template + std::string get_string(const NumberType len) + { + std::string result; + std::generate_n(std::back_inserter(result), len, [this]() + { + get(); + check_eof(); + return current; + }); + return result; + } + + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @return string + + @throw parse_error.110 if input ended + @throw parse_error.113 if an unexpected byte is read + */ + std::string get_cbor_string() + { + check_eof(); + + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + return get_string(current & 0x1f); + } + + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + { + return get_string(get_number()); + } + + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + { + return get_string(get_number()); + } + + case 0x7a: // UTF-8 string (four-byte uint32_t for n follow) + { + return get_string(get_number()); + } + + case 0x7b: // UTF-8 string (eight-byte uint64_t for n follow) + { + return get_string(get_number()); + } + + case 0x7f: // UTF-8 string (indefinite length) + { + std::string result; + while (get() != 0xff) + { + check_eof(); + result.push_back(static_cast(current)); + } + return result; + } + + default: + { + std::stringstream ss; + ss << std::setw(2) << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(113, chars_read, "expected a CBOR string; last byte: 0x" + ss.str())); + } + } + } + + template + BasicJsonType get_cbor_array(const NumberType len) + { + BasicJsonType result = value_t::array; + std::generate_n(std::back_inserter(*result.m_value.array), len, [this]() + { + return parse_cbor_internal(); + }); + return result; + } + + template + BasicJsonType get_cbor_object(const NumberType len) + { + BasicJsonType result = value_t::object; + std::generate_n(std::inserter(*result.m_value.object, + result.m_value.object->end()), + len, [this]() + { + get(); + auto key = get_cbor_string(); + auto val = parse_cbor_internal(); + return std::make_pair(std::move(key), std::move(val)); + }); + return result; + } + + /*! + @brief reads a MessagePack string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + + @return string + + @throw parse_error.110 if input ended + @throw parse_error.113 if an unexpected byte is read + */ + std::string get_msgpack_string() + { + check_eof(); + + switch (current) + { + // fixstr + case 0xa0: + case 0xa1: + case 0xa2: + case 0xa3: + case 0xa4: + case 0xa5: + case 0xa6: + case 0xa7: + case 0xa8: + case 0xa9: + case 0xaa: + case 0xab: + case 0xac: + case 0xad: + case 0xae: + case 0xaf: + case 0xb0: + case 0xb1: + case 0xb2: + case 0xb3: + case 0xb4: + case 0xb5: + case 0xb6: + case 0xb7: + case 0xb8: + case 0xb9: + case 0xba: + case 0xbb: + case 0xbc: + case 0xbd: + case 0xbe: + case 0xbf: + { + return get_string(current & 0x1f); + } + + case 0xd9: // str 8 + { + return get_string(get_number()); + } + + case 0xda: // str 16 + { + return get_string(get_number()); + } + + case 0xdb: // str 32 + { + return get_string(get_number()); + } + + default: + { + std::stringstream ss; + ss << std::setw(2) << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(113, chars_read, + "expected a MessagePack string; last byte: 0x" + ss.str())); + } + } + } + + template + BasicJsonType get_msgpack_array(const NumberType len) + { + BasicJsonType result = value_t::array; + std::generate_n(std::back_inserter(*result.m_value.array), len, [this]() + { + return parse_msgpack_internal(); + }); + return result; + } + + template + BasicJsonType get_msgpack_object(const NumberType len) + { + BasicJsonType result = value_t::object; + std::generate_n(std::inserter(*result.m_value.object, + result.m_value.object->end()), + len, [this]() + { + get(); + auto key = get_msgpack_string(); + auto val = parse_msgpack_internal(); + return std::make_pair(std::move(key), std::move(val)); + }); + return result; + } + + /*! + @brief check if input ended + @throw parse_error.110 if input ended + */ + void check_eof(const bool expect_eof = false) const + { + if (expect_eof) + { + if (JSON_UNLIKELY(current != std::char_traits::eof())) + { + JSON_THROW(parse_error::create(110, chars_read, "expected end of input")); + } + } + else + { + if (JSON_UNLIKELY(current == std::char_traits::eof())) + { + JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + } + } + } + + private: + /// input adapter + input_adapter_t ia = nullptr; + + /// the current character + int current = std::char_traits::eof(); + + /// the number of characters read + std::size_t chars_read = 0; + + /// whether we can assume little endianess + const bool is_little_endian = little_endianess(); +}; + +/*! +@brief serialization to CBOR and MessagePack values +*/ +template +class binary_writer +{ + public: + /*! + @brief create a binary writer + + @param[in] adapter output adapter to write to + */ + explicit binary_writer(output_adapter_t adapter) : oa(adapter) + { + assert(oa); + } + + /*! + @brief[in] j JSON value to serialize + */ + void write_cbor(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::null: + { + oa->write_character(static_cast(0xf6)); + break; + } + + case value_t::boolean: + { + oa->write_character(j.m_value.boolean + ? static_cast(0xf5) + : static_cast(0xf4)); + break; + } + + case value_t::number_integer: + { + if (j.m_value.number_integer >= 0) + { + // CBOR does not differentiate between positive signed + // integers and unsigned integers. Therefore, we used the + // code from the value_t::number_unsigned case here. + if (j.m_value.number_integer <= 0x17) + { + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer <= (std::numeric_limits::max)()) + { + oa->write_character(static_cast(0x18)); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer <= (std::numeric_limits::max)()) + { + oa->write_character(static_cast(0x19)); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer <= (std::numeric_limits::max)()) + { + oa->write_character(static_cast(0x1a)); + write_number(static_cast(j.m_value.number_integer)); + } + else + { + oa->write_character(static_cast(0x1b)); + write_number(static_cast(j.m_value.number_integer)); + } + } + else + { + // The conversions below encode the sign in the first + // byte, and the value is converted to a positive number. + const auto positive_number = -1 - j.m_value.number_integer; + if (j.m_value.number_integer >= -24) + { + write_number(static_cast(0x20 + positive_number)); + } + else if (positive_number <= (std::numeric_limits::max)()) + { + oa->write_character(static_cast(0x38)); + write_number(static_cast(positive_number)); + } + else if (positive_number <= (std::numeric_limits::max)()) + { + oa->write_character(static_cast(0x39)); + write_number(static_cast(positive_number)); + } + else if (positive_number <= (std::numeric_limits::max)()) + { + oa->write_character(static_cast(0x3a)); + write_number(static_cast(positive_number)); + } + else + { + oa->write_character(static_cast(0x3b)); + write_number(static_cast(positive_number)); + } + } + break; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned <= 0x17) + { + write_number(static_cast(j.m_value.number_unsigned)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + oa->write_character(static_cast(0x18)); + write_number(static_cast(j.m_value.number_unsigned)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + oa->write_character(static_cast(0x19)); + write_number(static_cast(j.m_value.number_unsigned)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + oa->write_character(static_cast(0x1a)); + write_number(static_cast(j.m_value.number_unsigned)); + } + else + { + oa->write_character(static_cast(0x1b)); + write_number(static_cast(j.m_value.number_unsigned)); + } + break; + } + + case value_t::number_float: // Double-Precision Float + { + oa->write_character(static_cast(0xfb)); + write_number(j.m_value.number_float); + break; + } + + case value_t::string: + { + // step 1: write control byte and the string length + const auto N = j.m_value.string->size(); + if (N <= 0x17) + { + write_number(static_cast(0x60 + N)); + } + else if (N <= 0xff) + { + oa->write_character(static_cast(0x78)); + write_number(static_cast(N)); + } + else if (N <= 0xffff) + { + oa->write_character(static_cast(0x79)); + write_number(static_cast(N)); + } + else if (N <= 0xffffffff) + { + oa->write_character(static_cast(0x7a)); + write_number(static_cast(N)); + } + // LCOV_EXCL_START + else if (N <= 0xffffffffffffffff) + { + oa->write_character(static_cast(0x7b)); + write_number(static_cast(N)); + } + // LCOV_EXCL_STOP + + // step 2: write the string + oa->write_characters( + reinterpret_cast(j.m_value.string->c_str()), + j.m_value.string->size()); + break; + } + + case value_t::array: + { + // step 1: write control byte and the array size + const auto N = j.m_value.array->size(); + if (N <= 0x17) + { + write_number(static_cast(0x80 + N)); + } + else if (N <= 0xff) + { + oa->write_character(static_cast(0x98)); + write_number(static_cast(N)); + } + else if (N <= 0xffff) + { + oa->write_character(static_cast(0x99)); + write_number(static_cast(N)); + } + else if (N <= 0xffffffff) + { + oa->write_character(static_cast(0x9a)); + write_number(static_cast(N)); + } + // LCOV_EXCL_START + else if (N <= 0xffffffffffffffff) + { + oa->write_character(static_cast(0x9b)); + write_number(static_cast(N)); + } + // LCOV_EXCL_STOP + + // step 2: write each element + for (const auto& el : *j.m_value.array) + { + write_cbor(el); + } + break; + } + + case value_t::object: + { + // step 1: write control byte and the object size + const auto N = j.m_value.object->size(); + if (N <= 0x17) + { + write_number(static_cast(0xa0 + N)); + } + else if (N <= 0xff) + { + oa->write_character(static_cast(0xb8)); + write_number(static_cast(N)); + } + else if (N <= 0xffff) + { + oa->write_character(static_cast(0xb9)); + write_number(static_cast(N)); + } + else if (N <= 0xffffffff) + { + oa->write_character(static_cast(0xba)); + write_number(static_cast(N)); + } + // LCOV_EXCL_START + else if (N <= 0xffffffffffffffff) + { + oa->write_character(static_cast(0xbb)); + write_number(static_cast(N)); + } + // LCOV_EXCL_STOP + + // step 2: write each element + for (const auto& el : *j.m_value.object) + { + write_cbor(el.first); + write_cbor(el.second); + } + break; + } + + default: + break; + } + } + + /*! + @brief[in] j JSON value to serialize + */ + void write_msgpack(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::null: // nil + { + oa->write_character(static_cast(0xc0)); + break; + } + + case value_t::boolean: // true and false + { + oa->write_character(j.m_value.boolean + ? static_cast(0xc3) + : static_cast(0xc2)); + break; + } + + case value_t::number_integer: + { + if (j.m_value.number_integer >= 0) + { + // MessagePack does not differentiate between positive + // signed integers and unsigned integers. Therefore, we used + // the code from the value_t::number_unsigned case here. + if (j.m_value.number_unsigned < 128) + { + // positive fixnum + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 8 + oa->write_character(static_cast(0xcc)); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 16 + oa->write_character(static_cast(0xcd)); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 32 + oa->write_character(static_cast(0xce)); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 64 + oa->write_character(static_cast(0xcf)); + write_number(static_cast(j.m_value.number_integer)); + } + } + else + { + if (j.m_value.number_integer >= -32) + { + // negative fixnum + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits::min)() and + j.m_value.number_integer <= (std::numeric_limits::max)()) + { + // int 8 + oa->write_character(static_cast(0xd0)); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits::min)() and + j.m_value.number_integer <= (std::numeric_limits::max)()) + { + // int 16 + oa->write_character(static_cast(0xd1)); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits::min)() and + j.m_value.number_integer <= (std::numeric_limits::max)()) + { + // int 32 + oa->write_character(static_cast(0xd2)); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits::min)() and + j.m_value.number_integer <= (std::numeric_limits::max)()) + { + // int 64 + oa->write_character(static_cast(0xd3)); + write_number(static_cast(j.m_value.number_integer)); + } + } + break; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned < 128) + { + // positive fixnum + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 8 + oa->write_character(static_cast(0xcc)); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 16 + oa->write_character(static_cast(0xcd)); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 32 + oa->write_character(static_cast(0xce)); + write_number(static_cast(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + // uint 64 + oa->write_character(static_cast(0xcf)); + write_number(static_cast(j.m_value.number_integer)); + } + break; + } + + case value_t::number_float: // float 64 + { + oa->write_character(static_cast(0xcb)); + write_number(j.m_value.number_float); + break; + } + + case value_t::string: + { + // step 1: write control byte and the string length + const auto N = j.m_value.string->size(); + if (N <= 31) + { + // fixstr + write_number(static_cast(0xa0 | N)); + } + else if (N <= 255) + { + // str 8 + oa->write_character(static_cast(0xd9)); + write_number(static_cast(N)); + } + else if (N <= 65535) + { + // str 16 + oa->write_character(static_cast(0xda)); + write_number(static_cast(N)); + } + else if (N <= 4294967295) + { + // str 32 + oa->write_character(static_cast(0xdb)); + write_number(static_cast(N)); + } + + // step 2: write the string + oa->write_characters( + reinterpret_cast(j.m_value.string->c_str()), + j.m_value.string->size()); + break; + } + + case value_t::array: + { + // step 1: write control byte and the array size + const auto N = j.m_value.array->size(); + if (N <= 15) + { + // fixarray + write_number(static_cast(0x90 | N)); + } + else if (N <= 0xffff) + { + // array 16 + oa->write_character(static_cast(0xdc)); + write_number(static_cast(N)); + } + else if (N <= 0xffffffff) + { + // array 32 + oa->write_character(static_cast(0xdd)); + write_number(static_cast(N)); + } + + // step 2: write each element + for (const auto& el : *j.m_value.array) + { + write_msgpack(el); + } + break; + } + + case value_t::object: + { + // step 1: write control byte and the object size + const auto N = j.m_value.object->size(); + if (N <= 15) + { + // fixmap + write_number(static_cast(0x80 | (N & 0xf))); + } + else if (N <= 65535) + { + // map 16 + oa->write_character(static_cast(0xde)); + write_number(static_cast(N)); + } + else if (N <= 4294967295) + { + // map 32 + oa->write_character(static_cast(0xdf)); + write_number(static_cast(N)); + } + + // step 2: write each element + for (const auto& el : *j.m_value.object) + { + write_msgpack(el.first); + write_msgpack(el.second); + } + break; + } + + default: + break; + } + } + + private: + /* + @brief write a number to output input + + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + + @note This function needs to respect the system's endianess, because bytes + in CBOR and MessagePack are stored in network order (big endian) and + therefore need reordering on little endian systems. + */ + template void write_number(NumberType n) + { + // step 1: write number to array of length NumberType + std::array vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (is_little_endian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters(vec.data(), sizeof(NumberType)); + } + + private: + /// whether we can assume little endianess + const bool is_little_endian = binary_reader::little_endianess(); + + /// the output + output_adapter_t oa = nullptr; +}; + +/////////////////// +// serialization // +/////////////////// + +template +class serializer +{ + using string_t = typename BasicJsonType::string_t; + using number_float_t = typename BasicJsonType::number_float_t; + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + public: + /*! + @param[in] s output stream to serialize to + @param[in] ichar indentation character to use + */ + serializer(output_adapter_t s, const char ichar) + : o(std::move(s)), loc(std::localeconv()), + thousands_sep(loc->thousands_sep == nullptr ? '\0' : loc->thousands_sep[0]), + decimal_point(loc->decimal_point == nullptr ? '\0' : loc->decimal_point[0]), + indent_char(ichar), indent_string(512, indent_char) {} + + // delete because of pointer members + serializer(const serializer&) = delete; + serializer& operator=(const serializer&) = delete; + + /*! + @brief internal implementation of the serialization function + + This function is called by the public member function dump and organizes + the serialization internally. The indentation level is propagated as + additional parameter. In case of arrays and objects, the function is + called recursively. + + - strings and object keys are escaped using `escape_string()` + - integer numbers are converted implicitly via `operator<<` + - floating-point numbers are converted to a string using `"%g"` format + + @param[in] val value to serialize + @param[in] pretty_print whether the output shall be pretty-printed + @param[in] indent_step the indent level + @param[in] current_indent the current indent level (only used internally) + */ + void dump(const BasicJsonType& val, const bool pretty_print, + const bool ensure_ascii, + const unsigned int indent_step, + const unsigned int current_indent = 0) + { + switch (val.m_type) + { + case value_t::object: + { + if (val.m_value.object->empty()) + { + o->write_characters("{}", 2); + return; + } + + if (pretty_print) + { + o->write_characters("{\n", 2); + + // variable to hold indentation for recursive calls + const auto new_indent = current_indent + indent_step; + if (JSON_UNLIKELY(indent_string.size() < new_indent)) + { + indent_string.resize(indent_string.size() * 2, ' '); + } + + // first n-1 elements + auto i = val.m_value.object->cbegin(); + for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) + { + o->write_characters(indent_string.c_str(), new_indent); + o->write_character('\"'); + dump_escaped(i->first, ensure_ascii); + o->write_characters("\": ", 3); + dump(i->second, true, ensure_ascii, indent_step, new_indent); + o->write_characters(",\n", 2); + } + + // last element + assert(i != val.m_value.object->cend()); + assert(std::next(i) == val.m_value.object->cend()); + o->write_characters(indent_string.c_str(), new_indent); + o->write_character('\"'); + dump_escaped(i->first, ensure_ascii); + o->write_characters("\": ", 3); + dump(i->second, true, ensure_ascii, indent_step, new_indent); + + o->write_character('\n'); + o->write_characters(indent_string.c_str(), current_indent); + o->write_character('}'); + } + else + { + o->write_character('{'); + + // first n-1 elements + auto i = val.m_value.object->cbegin(); + for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) + { + o->write_character('\"'); + dump_escaped(i->first, ensure_ascii); + o->write_characters("\":", 2); + dump(i->second, false, ensure_ascii, indent_step, current_indent); + o->write_character(','); + } + + // last element + assert(i != val.m_value.object->cend()); + assert(std::next(i) == val.m_value.object->cend()); + o->write_character('\"'); + dump_escaped(i->first, ensure_ascii); + o->write_characters("\":", 2); + dump(i->second, false, ensure_ascii, indent_step, current_indent); + + o->write_character('}'); + } + + return; + } + + case value_t::array: + { + if (val.m_value.array->empty()) + { + o->write_characters("[]", 2); + return; + } + + if (pretty_print) + { + o->write_characters("[\n", 2); + + // variable to hold indentation for recursive calls + const auto new_indent = current_indent + indent_step; + if (JSON_UNLIKELY(indent_string.size() < new_indent)) + { + indent_string.resize(indent_string.size() * 2, ' '); + } + + // first n-1 elements + for (auto i = val.m_value.array->cbegin(); + i != val.m_value.array->cend() - 1; ++i) + { + o->write_characters(indent_string.c_str(), new_indent); + dump(*i, true, ensure_ascii, indent_step, new_indent); + o->write_characters(",\n", 2); + } + + // last element + assert(not val.m_value.array->empty()); + o->write_characters(indent_string.c_str(), new_indent); + dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent); + + o->write_character('\n'); + o->write_characters(indent_string.c_str(), current_indent); + o->write_character(']'); + } + else + { + o->write_character('['); + + // first n-1 elements + for (auto i = val.m_value.array->cbegin(); + i != val.m_value.array->cend() - 1; ++i) + { + dump(*i, false, ensure_ascii, indent_step, current_indent); + o->write_character(','); + } + + // last element + assert(not val.m_value.array->empty()); + dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent); + + o->write_character(']'); + } + + return; + } + + case value_t::string: + { + o->write_character('\"'); + dump_escaped(*val.m_value.string, ensure_ascii); + o->write_character('\"'); + return; + } + + case value_t::boolean: + { + if (val.m_value.boolean) + { + o->write_characters("true", 4); + } + else + { + o->write_characters("false", 5); + } + return; + } + + case value_t::number_integer: + { + dump_integer(val.m_value.number_integer); + return; + } + + case value_t::number_unsigned: + { + dump_integer(val.m_value.number_unsigned); + return; + } + + case value_t::number_float: + { + dump_float(val.m_value.number_float); + return; + } + + case value_t::discarded: + { + o->write_characters("", 11); + return; + } + + case value_t::null: + { + o->write_characters("null", 4); + return; + } + } + } + + private: + /*! + @brief returns the number of expected bytes following in UTF-8 string + + @param[in] u the first byte of a UTF-8 string + @return the number of expected bytes following + */ + static constexpr std::size_t bytes_following(const uint8_t u) + { + return ((0 <= u and u <= 127) ? 0 + : ((192 <= u and u <= 223) ? 1 + : ((224 <= u and u <= 239) ? 2 + : ((240 <= u and u <= 247) ? 3 : std::string::npos)))); + } + + /*! + @brief calculates the extra space to escape a JSON string + + @param[in] s the string to escape + @param[in] ensure_ascii whether to escape non-ASCII characters with + \uXXXX sequences + @return the number of characters required to escape string @a s + + @complexity Linear in the length of string @a s. + */ + static std::size_t extra_space(const string_t& s, + const bool ensure_ascii) noexcept + { + std::size_t res = 0; + + for (std::size_t i = 0; i < s.size(); ++i) + { + switch (s[i]) + { + // control characters that can be escaped with a backslash + case '"': + case '\\': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + { + // from c (1 byte) to \x (2 bytes) + res += 1; + break; + } + + // control characters that need \uxxxx escaping + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + // from c (1 byte) to \uxxxx (6 bytes) + res += 5; + break; + } + + default: + { + if (ensure_ascii and (s[i] & 0x80 or s[i] == 0x7F)) + { + const auto bytes = bytes_following(static_cast(s[i])); + if (bytes == std::string::npos) + { + // invalid characters are treated as is, so no + // additional space will be used + break; + } + + if (bytes == 3) + { + // codepoints that need 4 bytes (i.e., 3 additional + // bytes) in UTF-8 need a surrogate pair when \u + // escaping is used: from 4 bytes to \uxxxx\uxxxx + // (12 bytes) + res += (12 - bytes - 1); + } + else + { + // from x bytes to \uxxxx (6 bytes) + res += (6 - bytes - 1); + } + + // skip the additional bytes + i += bytes; + } + break; + } + } + } + + return res; + } + + static void escape_codepoint(int codepoint, string_t& result, std::size_t& pos) + { + // expecting a proper codepoint + assert(0x00 <= codepoint and codepoint <= 0x10FFFF); + + // the last written character was the backslash before the 'u' + assert(result[pos] == '\\'); + + // write the 'u' + result[++pos] = 'u'; + + // convert a number 0..15 to its hex representation (0..f) + static const std::array hexify = + { + { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + } + }; + + if (codepoint < 0x10000) + { + // codepoints U+0000..U+FFFF can be represented as \uxxxx. + result[++pos] = hexify[(codepoint >> 12) & 0x0F]; + result[++pos] = hexify[(codepoint >> 8) & 0x0F]; + result[++pos] = hexify[(codepoint >> 4) & 0x0F]; + result[++pos] = hexify[codepoint & 0x0F]; + } + else + { + // codepoints U+10000..U+10FFFF need a surrogate pair to be + // represented as \uxxxx\uxxxx. + // http://www.unicode.org/faq/utf_bom.html#utf16-4 + codepoint -= 0x10000; + const int high_surrogate = 0xD800 | ((codepoint >> 10) & 0x3FF); + const int low_surrogate = 0xDC00 | (codepoint & 0x3FF); + result[++pos] = hexify[(high_surrogate >> 12) & 0x0F]; + result[++pos] = hexify[(high_surrogate >> 8) & 0x0F]; + result[++pos] = hexify[(high_surrogate >> 4) & 0x0F]; + result[++pos] = hexify[high_surrogate & 0x0F]; + ++pos; // backslash is already in output + result[++pos] = 'u'; + result[++pos] = hexify[(low_surrogate >> 12) & 0x0F]; + result[++pos] = hexify[(low_surrogate >> 8) & 0x0F]; + result[++pos] = hexify[(low_surrogate >> 4) & 0x0F]; + result[++pos] = hexify[low_surrogate & 0x0F]; + } + + ++pos; + } + + /*! + @brief dump escaped string + + Escape a string by replacing certain special characters by a sequence of an + escape character (backslash) and another character and other control + characters by a sequence of "\u" followed by a four-digit hex + representation. The escaped string is written to output stream @a o. + + @param[in] s the string to escape + @param[in] ensure_ascii whether to escape non-ASCII characters with + \uXXXX sequences + + @complexity Linear in the length of string @a s. + */ + void dump_escaped(const string_t& s, const bool ensure_ascii) const + { + const auto space = extra_space(s, ensure_ascii); + if (space == 0) + { + o->write_characters(s.c_str(), s.size()); + return; + } + + // create a result string of necessary size + string_t result(s.size() + space, '\\'); + std::size_t pos = 0; + + for (std::size_t i = 0; i < s.size(); ++i) + { + switch (s[i]) + { + case '"': // quotation mark (0x22) + { + result[pos + 1] = '"'; + pos += 2; + break; + } + + case '\\': // reverse solidus (0x5c) + { + // nothing to change + pos += 2; + break; + } + + case '\b': // backspace (0x08) + { + result[pos + 1] = 'b'; + pos += 2; + break; + } + + case '\f': // formfeed (0x0c) + { + result[pos + 1] = 'f'; + pos += 2; + break; + } + + case '\n': // newline (0x0a) + { + result[pos + 1] = 'n'; + pos += 2; + break; + } + + case '\r': // carriage return (0x0d) + { + result[pos + 1] = 'r'; + pos += 2; + break; + } + + case '\t': // horizontal tab (0x09) + { + result[pos + 1] = 't'; + pos += 2; + break; + } + + default: + { + // escape control characters (0x00..0x1F) or, if + // ensure_ascii parameter is used, non-ASCII characters + if ((0x00 <= s[i] and s[i] <= 0x1F) or + (ensure_ascii and (s[i] & 0x80 or s[i] == 0x7F))) + { + const auto bytes = bytes_following(static_cast(s[i])); + if (bytes == std::string::npos) + { + // copy invalid character as is + result[pos++] = s[i]; + break; + } + + // check that the additional bytes are present + assert(i + bytes < s.size()); + + // to use \uxxxx escaping, we first need to caluclate + // the codepoint from the UTF-8 bytes + int codepoint = 0; + + assert(0 <= bytes and bytes <= 3); + switch (bytes) + { + case 0: + { + codepoint = s[i] & 0xFF; + break; + } + + case 1: + { + codepoint = ((s[i] & 0x3F) << 6) + + (s[i + 1] & 0x7F); + break; + } + + case 2: + { + codepoint = ((s[i] & 0x1F) << 12) + + ((s[i + 1] & 0x7F) << 6) + + (s[i + 2] & 0x7F); + break; + } + + case 3: + { + codepoint = ((s[i] & 0xF) << 18) + + ((s[i + 1] & 0x7F) << 12) + + ((s[i + 2] & 0x7F) << 6) + + (s[i + 3] & 0x7F); + break; + } + + default: + break; // LCOV_EXCL_LINE + } + + escape_codepoint(codepoint, result, pos); + i += bytes; + } + else + { + // all other characters are added as-is + result[pos++] = s[i]; + } + break; + } + } + } + + assert(pos == result.size()); + o->write_characters(result.c_str(), result.size()); + } + + /*! + @brief dump an integer + + Dump a given integer to output stream @a o. Works internally with + @a number_buffer. + + @param[in] x integer number (signed or unsigned) to dump + @tparam NumberType either @a number_integer_t or @a number_unsigned_t + */ + template < + typename NumberType, + detail::enable_if_t::value or + std::is_same::value, + int> = 0 > + void dump_integer(NumberType x) + { + // special case for "0" + if (x == 0) + { + o->write_character('0'); + return; + } + + const bool is_negative = x < 0; + std::size_t i = 0; + + // spare 1 byte for '\0' + while (x != 0 and i < number_buffer.size() - 1) + { + const auto digit = std::labs(static_cast(x % 10)); + number_buffer[i++] = static_cast('0' + digit); + x /= 10; + } + + // make sure the number has been processed completely + assert(x == 0); + + if (is_negative) + { + // make sure there is capacity for the '-' + assert(i < number_buffer.size() - 2); + number_buffer[i++] = '-'; + } + + std::reverse(number_buffer.begin(), number_buffer.begin() + i); + o->write_characters(number_buffer.data(), i); + } + + /*! + @brief dump a floating-point number + + Dump a given floating-point number to output stream @a o. Works internally + with @a number_buffer. + + @param[in] x floating-point number to dump + */ + void dump_float(number_float_t x) + { + // NaN / inf + if (not std::isfinite(x) or std::isnan(x)) + { + o->write_characters("null", 4); + return; + } + + // special case for 0.0 and -0.0 + if (x == 0) + { + if (std::signbit(x)) + { + o->write_characters("-0.0", 4); + } + else + { + o->write_characters("0.0", 3); + } + return; + } + + // get number of digits for a text -> float -> text round-trip + static constexpr auto d = std::numeric_limits::digits10; + + // the actual conversion + std::ptrdiff_t len = snprintf(number_buffer.data(), number_buffer.size(), "%.*g", d, x); + + // negative value indicates an error + assert(len > 0); + // check if buffer was large enough + assert(static_cast(len) < number_buffer.size()); + + // erase thousands separator + if (thousands_sep != '\0') + { + const auto end = std::remove(number_buffer.begin(), + number_buffer.begin() + len, thousands_sep); + std::fill(end, number_buffer.end(), '\0'); + assert((end - number_buffer.begin()) <= len); + len = (end - number_buffer.begin()); + } + + // convert decimal point to '.' + if (decimal_point != '\0' and decimal_point != '.') + { + const auto dec_pos = std::find(number_buffer.begin(), number_buffer.end(), decimal_point); + if (dec_pos != number_buffer.end()) + { + *dec_pos = '.'; + } + } + + o->write_characters(number_buffer.data(), static_cast(len)); + + // determine if need to append ".0" + const bool value_is_int_like = + std::none_of(number_buffer.begin(), number_buffer.begin() + len + 1, + [](char c) + { + return (c == '.' or c == 'e'); + }); + + if (value_is_int_like) + { + o->write_characters(".0", 2); + } + } + + private: + /// the output of the serializer + output_adapter_t o = nullptr; + + /// a (hopefully) large enough character buffer + std::array number_buffer{{}}; + + /// the locale + const std::lconv* loc = nullptr; + /// the locale's thousand separator character + const char thousands_sep = '\0'; + /// the locale's decimal point character + const char decimal_point = '\0'; + + /// the indentation character + const char indent_char; + + /// the indentation string + string_t indent_string; +}; + +template +class json_ref +{ + public: + using value_type = BasicJsonType; + + json_ref(value_type&& value) + : owned_value(std::move(value)), + value_ref(&owned_value), + is_rvalue(true) + {} + + json_ref(const value_type& value) + : value_ref(const_cast(&value)), + is_rvalue(false) + {} + + json_ref(std::initializer_list init) + : owned_value(init), + value_ref(&owned_value), + is_rvalue(true) + {} + + template + json_ref(Args... args) + : owned_value(std::forward(args)...), + value_ref(&owned_value), + is_rvalue(true) + {} + + // class should be movable only + json_ref(json_ref&&) = default; + json_ref(const json_ref&) = delete; + json_ref& operator=(const json_ref&) = delete; + + value_type moved_or_copied() const + { + if (is_rvalue) + { + return std::move(*value_ref); + } + return *value_ref; + } + + value_type const& operator*() const + { + return *static_cast(value_ref); + } + + value_type const* operator->() const + { + return static_cast(value_ref); + } + + private: + mutable value_type owned_value = nullptr; + value_type* value_ref = nullptr; + const bool is_rvalue; +}; + +} // namespace detail /// namespace to hold default `to_json` / `from_json` functions namespace @@ -1252,7 +6881,7 @@ This serializer ignores the template arguments and uses ADL ([argument-dependent lookup](http://en.cppreference.com/w/cpp/language/adl)) for serialization. */ -template +template struct adl_serializer { /*! @@ -1288,6 +6917,326 @@ struct adl_serializer } }; +/*! +@brief JSON Pointer + +A JSON pointer defines a string syntax for identifying a specific value +within a JSON document. It can be used with functions `at` and +`operator[]`. Furthermore, JSON pointers are the base for JSON patches. + +@sa [RFC 6901](https://tools.ietf.org/html/rfc6901) + +@since version 2.0.0 +*/ +class json_pointer +{ + /// allow basic_json to access private members + NLOHMANN_BASIC_JSON_TPL_DECLARATION + friend class basic_json; + + public: + /*! + @brief create JSON pointer + + Create a JSON pointer according to the syntax described in + [Section 3 of RFC6901](https://tools.ietf.org/html/rfc6901#section-3). + + @param[in] s string representing the JSON pointer; if omitted, the empty + string is assumed which references the whole JSON value + + @throw parse_error.107 if the given JSON pointer @a s is nonempty and + does not begin with a slash (`/`); see example below + + @throw parse_error.108 if a tilde (`~`) in the given JSON pointer @a s + is not followed by `0` (representing `~`) or `1` (representing `/`); + see example below + + @liveexample{The example shows the construction several valid JSON + pointers as well as the exceptional behavior.,json_pointer} + + @since version 2.0.0 + */ + explicit json_pointer(const std::string& s = "") : reference_tokens(split(s)) {} + + /*! + @brief return a string representation of the JSON pointer + + @invariant For each JSON pointer `ptr`, it holds: + @code {.cpp} + ptr == json_pointer(ptr.to_string()); + @endcode + + @return a string representation of the JSON pointer + + @liveexample{The example shows the result of `to_string`., + json_pointer__to_string} + + @since version 2.0.0 + */ + std::string to_string() const noexcept + { + return std::accumulate(reference_tokens.begin(), reference_tokens.end(), + std::string{}, + [](const std::string & a, const std::string & b) + { + return a + "/" + escape(b); + }); + } + + /// @copydoc to_string() + operator std::string() const + { + return to_string(); + } + + private: + /*! + @brief remove and return last reference pointer + @throw out_of_range.405 if JSON pointer has no parent + */ + std::string pop_back() + { + if (JSON_UNLIKELY(is_root())) + { + JSON_THROW(detail::out_of_range::create(405, "JSON pointer has no parent")); + } + + auto last = reference_tokens.back(); + reference_tokens.pop_back(); + return last; + } + + /// return whether pointer points to the root document + bool is_root() const + { + return reference_tokens.empty(); + } + + json_pointer top() const + { + if (JSON_UNLIKELY(is_root())) + { + JSON_THROW(detail::out_of_range::create(405, "JSON pointer has no parent")); + } + + json_pointer result = *this; + result.reference_tokens = {reference_tokens[0]}; + return result; + } + + + /*! + @brief create and return a reference to the pointed to value + + @complexity Linear in the number of reference tokens. + + @throw parse_error.109 if array index is not a number + @throw type_error.313 if value cannot be unflattened + */ + NLOHMANN_BASIC_JSON_TPL_DECLARATION + NLOHMANN_BASIC_JSON_TPL& get_and_create(NLOHMANN_BASIC_JSON_TPL& j) const; + + /*! + @brief return a reference to the pointed to value + + @note This version does not throw if a value is not present, but tries to + create nested values instead. For instance, calling this function + with pointer `"/this/that"` on a null value is equivalent to calling + `operator[]("this").operator[]("that")` on that value, effectively + changing the null value to an object. + + @param[in] ptr a JSON value + + @return reference to the JSON value pointed to by the JSON pointer + + @complexity Linear in the length of the JSON pointer. + + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.404 if the JSON pointer can not be resolved + */ + NLOHMANN_BASIC_JSON_TPL_DECLARATION + NLOHMANN_BASIC_JSON_TPL& get_unchecked(NLOHMANN_BASIC_JSON_TPL* ptr) const; + + /*! + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.402 if the array index '-' is used + @throw out_of_range.404 if the JSON pointer can not be resolved + */ + NLOHMANN_BASIC_JSON_TPL_DECLARATION + NLOHMANN_BASIC_JSON_TPL& get_checked(NLOHMANN_BASIC_JSON_TPL* ptr) const; + + /*! + @brief return a const reference to the pointed to value + + @param[in] ptr a JSON value + + @return const reference to the JSON value pointed to by the JSON + pointer + + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.402 if the array index '-' is used + @throw out_of_range.404 if the JSON pointer can not be resolved + */ + NLOHMANN_BASIC_JSON_TPL_DECLARATION + const NLOHMANN_BASIC_JSON_TPL& get_unchecked(const NLOHMANN_BASIC_JSON_TPL* ptr) const; + + /*! + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.402 if the array index '-' is used + @throw out_of_range.404 if the JSON pointer can not be resolved + */ + NLOHMANN_BASIC_JSON_TPL_DECLARATION + const NLOHMANN_BASIC_JSON_TPL& get_checked(const NLOHMANN_BASIC_JSON_TPL* ptr) const; + + /*! + @brief split the string input to reference tokens + + @note This function is only called by the json_pointer constructor. + All exceptions below are documented there. + + @throw parse_error.107 if the pointer is not empty or begins with '/' + @throw parse_error.108 if character '~' is not followed by '0' or '1' + */ + static std::vector split(const std::string& reference_string) + { + std::vector result; + + // special case: empty reference string -> no reference tokens + if (reference_string.empty()) + { + return result; + } + + // check if nonempty reference string begins with slash + if (JSON_UNLIKELY(reference_string[0] != '/')) + { + JSON_THROW(detail::parse_error::create(107, 1, + "JSON pointer must be empty or begin with '/' - was: '" + + reference_string + "'")); + } + + // extract the reference tokens: + // - slash: position of the last read slash (or end of string) + // - start: position after the previous slash + for ( + // search for the first slash after the first character + std::size_t slash = reference_string.find_first_of('/', 1), + // set the beginning of the first reference token + start = 1; + // we can stop if start == string::npos+1 = 0 + start != 0; + // set the beginning of the next reference token + // (will eventually be 0 if slash == std::string::npos) + start = slash + 1, + // find next slash + slash = reference_string.find_first_of('/', start)) + { + // use the text between the beginning of the reference token + // (start) and the last slash (slash). + auto reference_token = reference_string.substr(start, slash - start); + + // check reference tokens are properly escaped + for (std::size_t pos = reference_token.find_first_of('~'); + pos != std::string::npos; + pos = reference_token.find_first_of('~', pos + 1)) + { + assert(reference_token[pos] == '~'); + + // ~ must be followed by 0 or 1 + if (JSON_UNLIKELY(pos == reference_token.size() - 1 or + (reference_token[pos + 1] != '0' and + reference_token[pos + 1] != '1'))) + { + JSON_THROW(detail::parse_error::create(108, 0, "escape character '~' must be followed with '0' or '1'")); + } + } + + // finally, store the reference token + unescape(reference_token); + result.push_back(reference_token); + } + + return result; + } + + /*! + @brief replace all occurrences of a substring by another string + + @param[in,out] s the string to manipulate; changed so that all + occurrences of @a f are replaced with @a t + @param[in] f the substring to replace with @a t + @param[in] t the string to replace @a f + + @pre The search string @a f must not be empty. **This precondition is + enforced with an assertion.** + + @since version 2.0.0 + */ + static void replace_substring(std::string& s, const std::string& f, + const std::string& t) + { + assert(not f.empty()); + for (auto pos = s.find(f); // find first occurrence of f + pos != std::string::npos; // make sure f was found + s.replace(pos, f.size(), t), // replace with t, and + pos = s.find(f, pos + t.size())) // find next occurrence of f + {} + } + + /// escape "~"" to "~0" and "/" to "~1" + static std::string escape(std::string s) + { + replace_substring(s, "~", "~0"); + replace_substring(s, "/", "~1"); + return s; + } + + /// unescape "~1" to tilde and "~0" to slash (order is important!) + static void unescape(std::string& s) + { + replace_substring(s, "~1", "/"); + replace_substring(s, "~0", "~"); + } + + /*! + @param[in] reference_string the reference string to the current value + @param[in] value the value to consider + @param[in,out] result the result object to insert values to + + @note Empty objects or arrays are flattened to `null`. + */ + NLOHMANN_BASIC_JSON_TPL_DECLARATION + static void flatten(const std::string& reference_string, + const NLOHMANN_BASIC_JSON_TPL& value, + NLOHMANN_BASIC_JSON_TPL& result); + + /*! + @param[in] value flattened JSON + + @return unflattened JSON + + @throw parse_error.109 if array index is not a number + @throw type_error.314 if value is not an object + @throw type_error.315 if object values are not primitive + @throw type_error.313 if value cannot be unflattened + */ + NLOHMANN_BASIC_JSON_TPL_DECLARATION + static NLOHMANN_BASIC_JSON_TPL + unflatten(const NLOHMANN_BASIC_JSON_TPL& value); + + friend bool operator==(json_pointer const& lhs, + json_pointer const& rhs) noexcept; + + friend bool operator!=(json_pointer const& lhs, + json_pointer const& rhs) noexcept; + + /// the reference tokens + std::vector reference_tokens; +}; /*! @brief a class to store JSON values @@ -1370,35 +7319,53 @@ Format](http://rfc7159.net/rfc7159) @nosubgrouping */ -template < - template class ObjectType = std::map, - template class ArrayType = std::vector, - class StringType = std::string, - class BooleanType = bool, - class NumberIntegerType = std::int64_t, - class NumberUnsignedType = std::uint64_t, - class NumberFloatType = double, - template class AllocatorType = std::allocator, - template class JSONSerializer = adl_serializer - > +NLOHMANN_BASIC_JSON_TPL_DECLARATION class basic_json { private: template friend struct detail::external_constructor; + friend ::nlohmann::json_pointer; + friend ::nlohmann::detail::parser; + friend ::nlohmann::detail::serializer; + template + friend class ::nlohmann::detail::iter_impl; + template + friend class ::nlohmann::detail::binary_writer; + template + friend class ::nlohmann::detail::binary_reader; + /// workaround type for MSVC - using basic_json_t = basic_json; + using basic_json_t = NLOHMANN_BASIC_JSON_TPL; + + // convenience aliases for types residing in namespace detail; + using lexer = ::nlohmann::detail::lexer; + using parser = ::nlohmann::detail::parser; + + using primitive_iterator_t = ::nlohmann::detail::primitive_iterator_t; + template + using internal_iterator = ::nlohmann::detail::internal_iterator; + template + using iter_impl = ::nlohmann::detail::iter_impl; + template + using iteration_proxy = ::nlohmann::detail::iteration_proxy; + template using json_reverse_iterator = ::nlohmann::detail::json_reverse_iterator; + + template + using output_adapter_t = ::nlohmann::detail::output_adapter_t; + + using binary_reader = ::nlohmann::detail::binary_reader; + template using binary_writer = ::nlohmann::detail::binary_writer; + + using serializer = ::nlohmann::detail::serializer; public: using value_t = detail::value_t; // forward declarations - template class iter_impl; - template class json_reverse_iterator; - class json_pointer; + using json_pointer = ::nlohmann::json_pointer; template using json_serializer = JSONSerializer; + using initializer_list_t = std::initializer_list>; //////////////// // exceptions // @@ -1493,6 +7460,9 @@ class basic_json @liveexample{The following code shows an example output of the `meta()` function.,meta} + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + @complexity Constant. @since 2.1.0 @@ -1521,10 +7491,10 @@ class basic_json result["platform"] = "unknown"; #endif -#if defined(__clang__) - result["compiler"] = {{"family", "clang"}, {"version", __clang_version__}}; -#elif defined(__ICC) || defined(__INTEL_COMPILER) +#if defined(__ICC) || defined(__INTEL_COMPILER) result["compiler"] = {{"family", "icc"}, {"version", __INTEL_COMPILER}}; +#elif defined(__clang__) + result["compiler"] = {{"family", "clang"}, {"version", __clang_version__}}; #elif defined(__GNUC__) || defined(__GNUG__) result["compiler"] = {{"family", "gcc"}, {"version", std::to_string(__GNUC__) + "." + std::to_string(__GNUC_MINOR__) + "." + std::to_string(__GNUC_PATCHLEVEL__)}}; #elif defined(__HP_cc) || defined(__HP_aCC) @@ -1619,7 +7589,7 @@ class basic_json [RFC 7159](http://rfc7159.net/rfc7159) specifies: > An implementation may set limits on the maximum depth of nesting. - In this class, the object's limit of nesting is not constraint explicitly. + In this class, the object's limit of nesting is not explicitly constrained. However, a maximum depth of nesting may be introduced by the compiler or runtime environment. A theoretical limit can be queried by calling the @ref max_size function of a JSON object. @@ -1678,7 +7648,7 @@ class basic_json [RFC 7159](http://rfc7159.net/rfc7159) specifies: > An implementation may set limits on the maximum depth of nesting. - In this class, the array's limit of nesting is not constraint explicitly. + In this class, the array's limit of nesting is not explicitly constrained. However, a maximum depth of nesting may be introduced by the compiler or runtime environment. A theoretical limit can be queried by calling the @ref max_size function of a JSON array. @@ -2127,17 +8097,70 @@ class basic_json string = create(value); } + /// constructor for rvalue strings + json_value(string_t&& value) + { + string = create(std::move(value)); + } + /// constructor for objects json_value(const object_t& value) { object = create(value); } + /// constructor for rvalue objects + json_value(object_t&& value) + { + object = create(std::move(value)); + } + /// constructor for arrays json_value(const array_t& value) { array = create(value); } + + /// constructor for rvalue arrays + json_value(array_t&& value) + { + array = create(std::move(value)); + } + + void destroy(value_t t) + { + switch (t) + { + case value_t::object: + { + AllocatorType alloc; + alloc.destroy(object); + alloc.deallocate(object, 1); + break; + } + + case value_t::array: + { + AllocatorType alloc; + alloc.destroy(array); + alloc.deallocate(array, 1); + break; + } + + case value_t::string: + { + AllocatorType alloc; + alloc.destroy(string); + alloc.deallocate(string, 1); + break; + } + + default: + { + break; + } + } + } }; /*! @@ -2161,31 +8184,7 @@ class basic_json // JSON parser callback // ////////////////////////// - /*! - @brief JSON callback events - - This enumeration lists the parser events that can trigger calling a - callback function of type @ref parser_callback_t during parsing. - - @image html callback_events.png "Example when certain parse events are triggered" - - @since version 1.0.0 - */ - enum class parse_event_t : uint8_t - { - /// the parser read `{` and started to process a JSON object - object_start, - /// the parser read `}` and finished processing a JSON object - object_end, - /// the parser read `[` and started to process a JSON array - array_start, - /// the parser read `]` and finished processing a JSON array - array_end, - /// the parser read a key of a value in an object - key, - /// the parser finished reading a JSON value - value - }; + using parse_event_t = typename parser::parse_event_t; /*! @brief per-element parser callback type @@ -2239,9 +8238,7 @@ class basic_json @since version 1.0.0 */ - using parser_callback_t = std::function; + using parser_callback_t = typename parser::parser_callback_t; ////////////////// @@ -2268,17 +8265,22 @@ class basic_json object | `{}` array | `[]` - @param[in] value_type the type of the value to create + @param[in] v the type of the value to create @complexity Constant. + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + @liveexample{The following code shows the constructor for different @ref value_t values,basic_json__value_t} + @sa @ref clear() -- restores the postcondition of this constructor + @since version 1.0.0 */ - basic_json(const value_t value_type) - : m_type(value_type), m_value(value_type) + basic_json(const value_t v) + : m_type(v), m_value(v) { assert_invariant(); } @@ -2319,9 +8321,9 @@ class basic_json following types: - **arrays**: @ref array_t and all kinds of compatible containers such as `std::vector`, `std::deque`, `std::list`, `std::forward_list`, - `std::array`, `std::set`, `std::unordered_set`, `std::multiset`, and - `unordered_multiset` with a `value_type` from which a @ref basic_json - value can be constructed. + `std::array`, `std::valarray`, `std::set`, `std::unordered_set`, + `std::multiset`, and `std::unordered_multiset` with a `value_type` from + which a @ref basic_json value can be constructed. - **objects**: @ref object_t and all kinds of compatible associative containers such as `std::map`, `std::unordered_map`, `std::multimap`, and `std::unordered_multimap` with a `key_type` compatible to @@ -2347,13 +8349,16 @@ class basic_json @tparam U = `uncvref_t` - @param[in] val the value to be forwarded + @param[in] val the value to be forwarded to the respective constructor @complexity Usually linear in the size of the passed @a val, also depending on the implementation of the called `to_json()` method. - @throw what `json_serializer::to_json()` throws + @exceptionsafety Depends on the called constructor. For types directly + supported by the library (i.e., all types for which no `to_json()` function + was provided), strong guarantee holds: if an exception is thrown, there are + no changes to any JSON value. @liveexample{The following code shows the constructor with several compatible types.,basic_json__CompatibleType} @@ -2393,7 +8398,7 @@ class basic_json 1. The empty initializer list is written as `{}` which is exactly an empty JSON object. - 2. C++ has now way of describing mapped types other than to list a list of + 2. C++ has no way of describing mapped types other than to list a list of pairs. As JSON requires that keys must be of type string, rule 2 is the weakest constraint one can pose on initializer lists to interpret them as an object. @@ -2403,10 +8408,10 @@ class basic_json With the rules described above, the following JSON values cannot be expressed by an initializer list: - - the empty array (`[]`): use @ref array(std::initializer_list) + - the empty array (`[]`): use @ref array(initializer_list_t) with an empty initializer list in this case - arrays whose elements satisfy rule 2: use @ref - array(std::initializer_list) with the same initializer list + array(initializer_list_t) with the same initializer list in this case @note When used without parentheses around an empty initializer list, @ref @@ -2418,8 +8423,8 @@ class basic_json @param[in] type_deduction internal parameter; when set to `true`, the type of the JSON value is deducted from the initializer list @a init; when set to `false`, the type provided via @a manual_type is forced. This mode is - used by the functions @ref array(std::initializer_list) and - @ref object(std::initializer_list). + used by the functions @ref array(initializer_list_t) and + @ref object(initializer_list_t). @param[in] manual_type internal parameter; when @a type_deduction is set to `false`, the created JSON value will use the provided type (only @ref @@ -2430,31 +8435,34 @@ class basic_json `value_t::object`, but @a init contains an element which is not a pair whose first element is a string. In this case, the constructor could not create an object. If @a type_deduction would have be `true`, an array - would have been created. See @ref object(std::initializer_list) + would have been created. See @ref object(initializer_list_t) for an example. @complexity Linear in the size of the initializer list @a init. + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + @liveexample{The example below shows how JSON values are created from initializer lists.,basic_json__list_init_t} - @sa @ref array(std::initializer_list) -- create a JSON array + @sa @ref array(initializer_list_t) -- create a JSON array value from an initializer list - @sa @ref object(std::initializer_list) -- create a JSON object + @sa @ref object(initializer_list_t) -- create a JSON object value from an initializer list @since version 1.0.0 */ - basic_json(std::initializer_list init, + basic_json(initializer_list_t init, bool type_deduction = true, value_t manual_type = value_t::array) { // check if each element is an array with two elements whose first // element is a string bool is_an_object = std::all_of(init.begin(), init.end(), - [](const basic_json & element) + [](const detail::json_ref& element_ref) { - return element.is_array() and element.size() == 2 and element[0].is_string(); + return (element_ref->is_array() and element_ref->size() == 2 and (*element_ref)[0].is_string()); }); // adjust type if type deduction is not wanted @@ -2467,7 +8475,7 @@ class basic_json } // if object is wanted but impossible, throw an exception - if (manual_type == value_t::object and not is_an_object) + if (JSON_UNLIKELY(manual_type == value_t::object and not is_an_object)) { JSON_THROW(type_error::create(301, "cannot create object from initializer list")); } @@ -2479,16 +8487,19 @@ class basic_json m_type = value_t::object; m_value = value_t::object; - std::for_each(init.begin(), init.end(), [this](const basic_json & element) + std::for_each(init.begin(), init.end(), [this](const detail::json_ref& element_ref) { - m_value.object->emplace(*(element[0].m_value.string), element[1]); + auto element = element_ref.moved_or_copied(); + m_value.object->emplace( + std::move(*((*element.m_value.array)[0].m_value.string)), + std::move((*element.m_value.array)[1])); }); } else { // the initializer list describes an array -> create array m_type = value_t::array; - m_value.array = create(init); + m_value.array = create(init.begin(), init.end()); } assert_invariant(); @@ -2503,7 +8514,7 @@ class basic_json @note This function is only needed to express two edge cases that cannot be realized with the initializer list constructor (@ref - basic_json(std::initializer_list, bool, value_t)). These cases + basic_json(initializer_list_t, bool, value_t)). These cases are: 1. creating an array whose elements are all pairs whose first element is a string -- in this case, the initializer list constructor would create an @@ -2518,18 +8529,20 @@ class basic_json @complexity Linear in the size of @a init. + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + @liveexample{The following code shows an example for the `array` function.,array} - @sa @ref basic_json(std::initializer_list, bool, value_t) -- + @sa @ref basic_json(initializer_list_t, bool, value_t) -- create a JSON value from an initializer list - @sa @ref object(std::initializer_list) -- create a JSON object + @sa @ref object(initializer_list_t) -- create a JSON object value from an initializer list @since version 1.0.0 */ - static basic_json array(std::initializer_list init = - std::initializer_list()) + static basic_json array(initializer_list_t init = {}) { return basic_json(init, false, value_t::array); } @@ -2542,10 +8555,10 @@ class basic_json the initializer list is empty, the empty object `{}` is created. @note This function is only added for symmetry reasons. In contrast to the - related function @ref array(std::initializer_list), there are + related function @ref array(initializer_list_t), there are no cases which can only be expressed by this function. That is, any initializer list @a init can also be passed to the initializer list - constructor @ref basic_json(std::initializer_list, bool, value_t). + constructor @ref basic_json(initializer_list_t, bool, value_t). @param[in] init initializer list to create an object from (optional) @@ -2553,24 +8566,26 @@ class basic_json @throw type_error.301 if @a init is not a list of pairs whose first elements are strings. In this case, no object can be created. When such a - value is passed to @ref basic_json(std::initializer_list, bool, value_t), + value is passed to @ref basic_json(initializer_list_t, bool, value_t), an array would have been created from the passed initializer list @a init. See example below. @complexity Linear in the size of @a init. + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + @liveexample{The following code shows an example for the `object` function.,object} - @sa @ref basic_json(std::initializer_list, bool, value_t) -- + @sa @ref basic_json(initializer_list_t, bool, value_t) -- create a JSON value from an initializer list - @sa @ref array(std::initializer_list) -- create a JSON array + @sa @ref array(initializer_list_t) -- create a JSON array value from an initializer list @since version 1.0.0 */ - static basic_json object(std::initializer_list init = - std::initializer_list()) + static basic_json object(initializer_list_t init = {}) { return basic_json(init, false, value_t::object); } @@ -2579,14 +8594,18 @@ class basic_json @brief construct an array with count copies of given value Constructs a JSON array value by creating @a cnt copies of a passed value. - In case @a cnt is `0`, an empty array is created. As postcondition, - `std::distance(begin(),end()) == cnt` holds. + In case @a cnt is `0`, an empty array is created. @param[in] cnt the number of JSON copies of @a val to create @param[in] val the JSON value to copy + @post `std::distance(begin(),end()) == cnt` holds. + @complexity Linear in @a cnt. + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + @liveexample{The following code shows examples for the @ref basic_json(size_type\, const basic_json&) constructor.,basic_json__size_type_basic_json} @@ -2605,12 +8624,13 @@ class basic_json Constructs the JSON value with the contents of the range `[first, last)`. The semantics depends on the different types a JSON value can have: - - In case of primitive types (number, boolean, or string), @a first must - be `begin()` and @a last must be `end()`. In this case, the value is + - In case of a null type, invalid_iterator.206 is thrown. + - In case of other primitive types (number, boolean, or string), @a first + must be `begin()` and @a last must be `end()`. In this case, the value is copied. Otherwise, invalid_iterator.204 is thrown. - In case of structured types (array, object), the constructor behaves as - similar versions for `std::vector`. - - In case of a null type, invalid_iterator.206 is thrown. + similar versions for `std::vector` or `std::map`; that is, a JSON array + or object is constructed from the values in the range. @tparam InputIT an input iterator type (@ref iterator or @ref const_iterator) @@ -2619,11 +8639,20 @@ class basic_json @param[in] last end of the range to copy from (excluded) @pre Iterators @a first and @a last must be initialized. **This - precondition is enforced with an assertion.** + precondition is enforced with an assertion (see warning).** If + assertions are switched off, a violation of this precondition yields + undefined behavior. @pre Range `[first, last)` is valid. Usually, this precondition cannot be checked efficiently. Only certain edge cases are detected; see the - description of the exceptions below. + description of the exceptions below. A violation of this precondition + yields undefined behavior. + + @warning A precondition is enforced with a runtime assertion that will + result in calling `std::abort` if this precondition is not met. + Assertions can be disabled by defining `NDEBUG` at compile time. + See http://en.cppreference.com/w/cpp/error/assert for more + information. @throw invalid_iterator.201 if iterators @a first and @a last are not compatible (i.e., do not belong to the same JSON value). In this case, @@ -2637,6 +8666,9 @@ class basic_json @complexity Linear in distance between @a first and @a last. + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + @liveexample{The example below shows several ways to create JSON values by specifying a subrange with iterators.,basic_json__InputIt_InputIt} @@ -2651,7 +8683,7 @@ class basic_json assert(last.m_object != nullptr); // make sure iterator fits the current value - if (first.m_object != last.m_object) + if (JSON_UNLIKELY(first.m_object != last.m_object)) { JSON_THROW(invalid_iterator::create(201, "iterators are not compatible")); } @@ -2668,7 +8700,8 @@ class basic_json case value_t::number_unsigned: case value_t::string: { - if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end()) + if (JSON_UNLIKELY(not first.m_it.primitive_iterator.is_begin() + or not last.m_it.primitive_iterator.is_end())) { JSON_THROW(invalid_iterator::create(204, "iterators out of range")); } @@ -2676,9 +8709,7 @@ class basic_json } default: - { break; - } } switch (m_type) @@ -2728,10 +8759,8 @@ class basic_json } default: - { JSON_THROW(invalid_iterator::create(206, "cannot construct with iterators from " + - first.m_object->type_name())); - } + std::string(first.m_object->type_name()))); } assert_invariant(); @@ -2742,6 +8771,11 @@ class basic_json // other constructors and destructor // /////////////////////////////////////// + /// @private + basic_json(const detail::json_ref& ref) + : basic_json(ref.moved_or_copied()) + {} + /*! @brief copy constructor @@ -2749,8 +8783,13 @@ class basic_json @param[in] other the JSON value to copy + @post `*this == other` + @complexity Linear in the size of @a other. + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + @requirement This function helps `basic_json` satisfying the [Container](http://en.cppreference.com/w/cpp/concept/Container) requirements: @@ -2813,9 +8852,7 @@ class basic_json } default: - { break; - } } assert_invariant(); @@ -2830,10 +8867,18 @@ class basic_json @param[in,out] other value to move to this object - @post @a other is a JSON null value + @post `*this` has the same value as @a other before the call. + @post @a other is a JSON null value. @complexity Constant. + @exceptionsafety No-throw guarantee: this constructor never throws + exceptions. + + @requirement This function helps `basic_json` satisfying the + [MoveConstructible](http://en.cppreference.com/w/cpp/concept/MoveConstructible) + requirements. + @liveexample{The code below shows the move constructor explicitly called via std::move.,basic_json__moveconstructor} @@ -2858,7 +8903,7 @@ class basic_json Copy assignment operator. Copies a JSON value via the "copy and swap" strategy: It is expressed in terms of the copy constructor, destructor, - and the swap() member function. + and the `swap()` member function. @param[in] other value to copy from @@ -2912,39 +8957,7 @@ class basic_json ~basic_json() { assert_invariant(); - - switch (m_type) - { - case value_t::object: - { - AllocatorType alloc; - alloc.destroy(m_value.object); - alloc.deallocate(m_value.object, 1); - break; - } - - case value_t::array: - { - AllocatorType alloc; - alloc.destroy(m_value.array); - alloc.deallocate(m_value.array, 1); - break; - } - - case value_t::string: - { - AllocatorType alloc; - alloc.destroy(m_value.string); - alloc.deallocate(m_value.string, 1); - break; - } - - default: - { - // all other types need no specific destructor - break; - } - } + m_value.destroy(m_type); } /// @} @@ -2963,38 +8976,47 @@ class basic_json Serialization function for JSON values. The function tries to mimic Python's `json.dumps()` function, and currently supports its @a indent - parameter. + and @a ensure_ascii parameters. @param[in] indent If indent is nonnegative, then array elements and object members will be pretty-printed with that indent level. An indent level of `0` will only insert newlines. `-1` (the default) selects the most compact representation. - @param[in] indent_char The character to use for indentation of @a indent is - greate than `0`. The default is ` ` (space). + @param[in] indent_char The character to use for indentation if @a indent is + greater than `0`. The default is ` ` (space). + @param[in] ensure_ascii If @a ensure_ascii is true, all non-ASCII characters + in the output are escaped with \uXXXX sequences, and the result consists + of ASCII characters only. @return string containing the serialization of the JSON value @complexity Linear. - @liveexample{The following example shows the effect of different @a indent - parameters to the result of the serialization.,dump} + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. + + @liveexample{The following example shows the effect of different @a indent\, + @a indent_char\, and @a ensure_ascii parameters to the result of the + serialization.,dump} @see https://docs.python.org/2/library/json.html#json.dump - @since version 1.0.0; indentaction character added in version 3.0.0 + @since version 1.0.0; indentation character @a indent_char and option + @a ensure_ascii added in version 3.0.0 */ - string_t dump(const int indent = -1, const char indent_char = ' ') const + string_t dump(const int indent = -1, const char indent_char = ' ', + const bool ensure_ascii = false) const { string_t result; - serializer s(output_adapter::create(result), indent_char); + serializer s(detail::output_adapter(result), indent_char); if (indent >= 0) { - s.dump(*this, true, static_cast(indent)); + s.dump(*this, true, ensure_ascii, static_cast(indent)); } else { - s.dump(*this, false, 0); + s.dump(*this, false, ensure_ascii, 0); } return result; @@ -3007,6 +9029,17 @@ class basic_json enumeration. @return the type of the JSON value + Value type | return value + ------------------------- | ------------------------- + null | value_t::null + boolean | value_t::boolean + string | value_t::string + number (integer) | value_t::number_integer + number (unsigned integer) | value_t::number_unsigned + number (foating-point) | value_t::number_float + object | value_t::object + array | value_t::array + discarded | value_t::discarded @complexity Constant. @@ -3016,6 +9049,9 @@ class basic_json @liveexample{The following code exemplifies `type()` for all JSON types.,type} + @sa @ref operator value_t() -- return the type of the JSON value (implicit) + @sa @ref type_name() -- return the type as string + @since version 1.0.0 */ constexpr value_t type() const noexcept @@ -3026,8 +9062,8 @@ class basic_json /*! @brief return whether type is primitive - This function returns true iff the JSON type is primitive (string, number, - boolean, or null). + This function returns true if and only if the JSON type is primitive + (string, number, boolean, or null). @return `true` if type is primitive (string, number, boolean, or null), `false` otherwise. @@ -3056,8 +9092,8 @@ class basic_json /*! @brief return whether type is structured - This function returns true iff the JSON type is structured (array or - object). + This function returns true if and only if the JSON type is structured + (array or object). @return `true` if type is structured (array or object), `false` otherwise. @@ -3083,7 +9119,7 @@ class basic_json /*! @brief return whether value is null - This function returns true iff the JSON value is null. + This function returns true if and only if the JSON value is null. @return `true` if type is null, `false` otherwise. @@ -3099,13 +9135,13 @@ class basic_json */ constexpr bool is_null() const noexcept { - return m_type == value_t::null; + return (m_type == value_t::null); } /*! @brief return whether value is a boolean - This function returns true iff the JSON value is a boolean. + This function returns true if and only if the JSON value is a boolean. @return `true` if type is boolean, `false` otherwise. @@ -3121,14 +9157,14 @@ class basic_json */ constexpr bool is_boolean() const noexcept { - return m_type == value_t::boolean; + return (m_type == value_t::boolean); } /*! @brief return whether value is a number - This function returns true iff the JSON value is a number. This includes - both integer and floating-point values. + This function returns true if and only if the JSON value is a number. This + includes both integer (signed and unsigned) and floating-point values. @return `true` if type is number (regardless whether integer, unsigned integer or floating-type), `false` otherwise. @@ -3157,8 +9193,8 @@ class basic_json /*! @brief return whether value is an integer number - This function returns true iff the JSON value is an integer or unsigned - integer number. This excludes floating-point values. + This function returns true if and only if the JSON value is a signed or + unsigned integer number. This excludes floating-point values. @return `true` if type is an integer or unsigned integer number, `false` otherwise. @@ -3180,14 +9216,14 @@ class basic_json */ constexpr bool is_number_integer() const noexcept { - return m_type == value_t::number_integer or m_type == value_t::number_unsigned; + return (m_type == value_t::number_integer or m_type == value_t::number_unsigned); } /*! @brief return whether value is an unsigned integer number - This function returns true iff the JSON value is an unsigned integer - number. This excludes floating-point and (signed) integer values. + This function returns true if and only if the JSON value is an unsigned + integer number. This excludes floating-point and signed integer values. @return `true` if type is an unsigned integer number, `false` otherwise. @@ -3208,14 +9244,14 @@ class basic_json */ constexpr bool is_number_unsigned() const noexcept { - return m_type == value_t::number_unsigned; + return (m_type == value_t::number_unsigned); } /*! @brief return whether value is a floating-point number - This function returns true iff the JSON value is a floating-point number. - This excludes integer and unsigned integer values. + This function returns true if and only if the JSON value is a + floating-point number. This excludes signed and unsigned integer values. @return `true` if type is a floating-point number, `false` otherwise. @@ -3236,13 +9272,13 @@ class basic_json */ constexpr bool is_number_float() const noexcept { - return m_type == value_t::number_float; + return (m_type == value_t::number_float); } /*! @brief return whether value is an object - This function returns true iff the JSON value is an object. + This function returns true if and only if the JSON value is an object. @return `true` if type is object, `false` otherwise. @@ -3258,13 +9294,13 @@ class basic_json */ constexpr bool is_object() const noexcept { - return m_type == value_t::object; + return (m_type == value_t::object); } /*! @brief return whether value is an array - This function returns true iff the JSON value is an array. + This function returns true if and only if the JSON value is an array. @return `true` if type is array, `false` otherwise. @@ -3280,13 +9316,13 @@ class basic_json */ constexpr bool is_array() const noexcept { - return m_type == value_t::array; + return (m_type == value_t::array); } /*! @brief return whether value is a string - This function returns true iff the JSON value is a string. + This function returns true if and only if the JSON value is a string. @return `true` if type is string, `false` otherwise. @@ -3302,14 +9338,14 @@ class basic_json */ constexpr bool is_string() const noexcept { - return m_type == value_t::string; + return (m_type == value_t::string); } /*! @brief return whether value is discarded - This function returns true iff the JSON value was discarded during parsing - with a callback function (see @ref parser_callback_t). + This function returns true if and only if the JSON value was discarded + during parsing with a callback function (see @ref parser_callback_t). @note This function will always be `false` for JSON values after parsing. That is, discarded values can only occur during parsing, but will be @@ -3329,7 +9365,7 @@ class basic_json */ constexpr bool is_discarded() const noexcept { - return m_type == value_t::discarded; + return (m_type == value_t::discarded); } /*! @@ -3348,6 +9384,9 @@ class basic_json @liveexample{The following code exemplifies the @ref value_t operator for all JSON types.,operator__value_t} + @sa @ref type() -- return the type of the JSON value (explicit) + @sa @ref type_name() -- return the type as string + @since version 1.0.0 */ constexpr operator value_t() const noexcept @@ -3365,12 +9404,12 @@ class basic_json /// get a boolean (explicit) boolean_t get_impl(boolean_t* /*unused*/) const { - if (is_boolean()) + if (JSON_LIKELY(is_boolean())) { return m_value.boolean; } - JSON_THROW(type_error::create(302, "type must be boolean, but is " + type_name())); + JSON_THROW(type_error::create(302, "type must be boolean, but is " + std::string(type_name()))); } /// get a pointer to the value (object) @@ -3460,7 +9499,7 @@ class basic_json /*! @brief helper function to implement get_ref() - This funcion helps to implement get_ref() without code duplication for + This function helps to implement get_ref() without code duplication for const and non-const overloads @tparam ThisType will be deduced as `basic_json` or `const basic_json` @@ -3471,18 +9510,15 @@ class basic_json template static ReferenceType get_ref_impl(ThisType& obj) { - // helper type - using PointerType = typename std::add_pointer::type; - // delegate the call to get_ptr<>() - auto ptr = obj.template get_ptr(); + auto ptr = obj.template get_ptr::type>(); - if (ptr != nullptr) + if (JSON_LIKELY(ptr != nullptr)) { return *ptr; } - JSON_THROW(type_error::create(303, "incompatible ReferenceType for get_ref, actual type is " + obj.type_name())); + JSON_THROW(type_error::create(303, "incompatible ReferenceType for get_ref, actual type is " + std::string(obj.type_name()))); } public: @@ -3824,6 +9860,7 @@ class basic_json */ template < typename ValueType, typename std::enable_if < not std::is_pointer::value and + not std::is_same>::value and not std::is_same::value #ifndef _MSC_VER // fix for issue #167 operator<< ambiguity under VS2015 and not std::is_same>::value @@ -3878,7 +9915,7 @@ class basic_json reference at(size_type idx) { // at only works for arrays - if (is_array()) + if (JSON_LIKELY(is_array())) { JSON_TRY { @@ -3892,7 +9929,7 @@ class basic_json } else { - JSON_THROW(type_error::create(304, "cannot use at() with " + type_name())); + JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()))); } } @@ -3925,7 +9962,7 @@ class basic_json const_reference at(size_type idx) const { // at only works for arrays - if (is_array()) + if (JSON_LIKELY(is_array())) { JSON_TRY { @@ -3939,7 +9976,7 @@ class basic_json } else { - JSON_THROW(type_error::create(304, "cannot use at() with " + type_name())); + JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()))); } } @@ -3976,7 +10013,7 @@ class basic_json reference at(const typename object_t::key_type& key) { // at only works for objects - if (is_object()) + if (JSON_LIKELY(is_object())) { JSON_TRY { @@ -3990,7 +10027,7 @@ class basic_json } else { - JSON_THROW(type_error::create(304, "cannot use at() with " + type_name())); + JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()))); } } @@ -4027,7 +10064,7 @@ class basic_json const_reference at(const typename object_t::key_type& key) const { // at only works for objects - if (is_object()) + if (JSON_LIKELY(is_object())) { JSON_TRY { @@ -4041,7 +10078,7 @@ class basic_json } else { - JSON_THROW(type_error::create(304, "cannot use at() with " + type_name())); + JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()))); } } @@ -4081,7 +10118,7 @@ class basic_json } // operator[] only works for arrays - if (is_array()) + if (JSON_LIKELY(is_array())) { // fill up array with null values if given idx is outside range if (idx >= m_value.array->size()) @@ -4094,7 +10131,7 @@ class basic_json return m_value.array->operator[](idx); } - JSON_THROW(type_error::create(305, "cannot use operator[] with " + type_name())); + JSON_THROW(type_error::create(305, "cannot use operator[] with " + std::string(type_name()))); } /*! @@ -4119,12 +10156,12 @@ class basic_json const_reference operator[](size_type idx) const { // const operator[] only works for arrays - if (is_array()) + if (JSON_LIKELY(is_array())) { return m_value.array->operator[](idx); } - JSON_THROW(type_error::create(305, "cannot use operator[] with " + type_name())); + JSON_THROW(type_error::create(305, "cannot use operator[] with " + std::string(type_name()))); } /*! @@ -4165,12 +10202,12 @@ class basic_json } // operator[] only works for objects - if (is_object()) + if (JSON_LIKELY(is_object())) { return m_value.object->operator[](key); } - JSON_THROW(type_error::create(305, "cannot use operator[] with " + type_name())); + JSON_THROW(type_error::create(305, "cannot use operator[] with " + std::string(type_name()))); } /*! @@ -4206,81 +10243,13 @@ class basic_json const_reference operator[](const typename object_t::key_type& key) const { // const operator[] only works for objects - if (is_object()) + if (JSON_LIKELY(is_object())) { assert(m_value.object->find(key) != m_value.object->end()); return m_value.object->find(key)->second; } - JSON_THROW(type_error::create(305, "cannot use operator[] with " + type_name())); - } - - /*! - @brief access specified object element - - Returns a reference to the element at with specified key @a key. - - @note If @a key is not found in the object, then it is silently added to - the object and filled with a `null` value to make `key` a valid reference. - In case the value was `null` before, it is converted to an object. - - @param[in] key key of the element to access - - @return reference to the element at key @a key - - @throw type_error.305 if the JSON value is not an object or null; in that - cases, using the [] operator with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be read and - written using the `[]` operator.,operatorarray__key_type} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref value() for access by value with a default value - - @since version 1.0.0 - */ - template - reference operator[](T * (&key)[n]) - { - return operator[](static_cast(key)); - } - - /*! - @brief read-only access specified object element - - Returns a const reference to the element at with specified key @a key. No - bounds checking is performed. - - @warning If the element with key @a key does not exist, the behavior is - undefined. - - @note This function is required for compatibility reasons with Clang. - - @param[in] key key of the element to access - - @return const reference to the element at key @a key - - @throw type_error.305 if the JSON value is not an object; in that cases, - using the [] operator with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be read using - the `[]` operator.,operatorarray__key_type_const} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref value() for access by value with a default value - - @since version 1.0.0 - */ - template - const_reference operator[](T * (&key)[n]) const - { - return operator[](static_cast(key)); + JSON_THROW(type_error::create(305, "cannot use operator[] with " + std::string(type_name()))); } /*! @@ -4322,12 +10291,12 @@ class basic_json } // at only works for objects - if (is_object()) + if (JSON_LIKELY(is_object())) { return m_value.object->operator[](key); } - JSON_THROW(type_error::create(305, "cannot use operator[] with " + type_name())); + JSON_THROW(type_error::create(305, "cannot use operator[] with " + std::string(type_name()))); } /*! @@ -4364,13 +10333,13 @@ class basic_json const_reference operator[](T* key) const { // at only works for objects - if (is_object()) + if (JSON_LIKELY(is_object())) { assert(m_value.object->find(key) != m_value.object->end()); return m_value.object->find(key)->second; } - JSON_THROW(type_error::create(305, "cannot use operator[] with " + type_name())); + JSON_THROW(type_error::create(305, "cannot use operator[] with " + std::string(type_name()))); } /*! @@ -4423,10 +10392,10 @@ class basic_json */ template::value, int>::type = 0> - ValueType value(const typename object_t::key_type& key, ValueType default_value) const + ValueType value(const typename object_t::key_type& key, const ValueType& default_value) const { // at only works for objects - if (is_object()) + if (JSON_LIKELY(is_object())) { // if key is found, return value and given default value otherwise const auto it = find(key); @@ -4437,10 +10406,8 @@ class basic_json return default_value; } - else - { - JSON_THROW(type_error::create(306, "cannot use value() with " + type_name())); - } + + JSON_THROW(type_error::create(306, "cannot use value() with " + std::string(type_name()))); } /*! @@ -4495,10 +10462,10 @@ class basic_json */ template::value, int>::type = 0> - ValueType value(const json_pointer& ptr, ValueType default_value) const + ValueType value(const json_pointer& ptr, const ValueType& default_value) const { // at only works for objects - if (is_object()) + if (JSON_LIKELY(is_object())) { // if pointer resolves a value, return it or use default value JSON_TRY @@ -4511,7 +10478,7 @@ class basic_json } } - JSON_THROW(type_error::create(306, "cannot use value() with " + type_name())); + JSON_THROW(type_error::create(306, "cannot use value() with " + std::string(type_name()))); } /*! @@ -4662,7 +10629,7 @@ class basic_json IteratorType erase(IteratorType pos) { // make sure iterator fits the current value - if (this != pos.m_object) + if (JSON_UNLIKELY(this != pos.m_object)) { JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); } @@ -4677,7 +10644,7 @@ class basic_json case value_t::number_unsigned: case value_t::string: { - if (not pos.m_it.primitive_iterator.is_begin()) + if (JSON_UNLIKELY(not pos.m_it.primitive_iterator.is_begin())) { JSON_THROW(invalid_iterator::create(205, "iterator out of range")); } @@ -4708,9 +10675,7 @@ class basic_json } default: - { - JSON_THROW(type_error::create(307, "cannot use erase() with " + type_name())); - } + JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()))); } return result; @@ -4769,7 +10734,7 @@ class basic_json IteratorType erase(IteratorType first, IteratorType last) { // make sure iterator fits the current value - if (this != first.m_object or this != last.m_object) + if (JSON_UNLIKELY(this != first.m_object or this != last.m_object)) { JSON_THROW(invalid_iterator::create(203, "iterators do not fit current value")); } @@ -4784,7 +10749,8 @@ class basic_json case value_t::number_unsigned: case value_t::string: { - if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end()) + if (JSON_LIKELY(not first.m_it.primitive_iterator.is_begin() + or not last.m_it.primitive_iterator.is_end())) { JSON_THROW(invalid_iterator::create(204, "iterators out of range")); } @@ -4817,9 +10783,7 @@ class basic_json } default: - { - JSON_THROW(type_error::create(307, "cannot use erase() with " + type_name())); - } + JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()))); } return result; @@ -4857,12 +10821,12 @@ class basic_json size_type erase(const typename object_t::key_type& key) { // this erase only works for objects - if (is_object()) + if (JSON_LIKELY(is_object())) { return m_value.object->erase(key); } - JSON_THROW(type_error::create(307, "cannot use erase() with " + type_name())); + JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()))); } /*! @@ -4892,9 +10856,9 @@ class basic_json void erase(const size_type idx) { // this erase only works for arrays - if (is_array()) + if (JSON_LIKELY(is_array())) { - if (idx >= size()) + if (JSON_UNLIKELY(idx >= size())) { JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); } @@ -4903,7 +10867,7 @@ class basic_json } else { - JSON_THROW(type_error::create(307, "cannot use erase() with " + type_name())); + JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()))); } } @@ -5277,10 +11241,6 @@ class basic_json return const_reverse_iterator(cbegin()); } - private: - // forward declaration - template class iteration_proxy; - public: /*! @brief wrapper to access iterator member functions in range-based for @@ -5319,9 +11279,9 @@ class basic_json /// @{ /*! - @brief checks whether the container is empty + @brief checks whether the container is empty. - Checks if a JSON value has no elements. + Checks if a JSON value has no elements (i.e. whether its @ref size is `0`). @return The return value depends on the different types and is defined as follows: @@ -5334,23 +11294,27 @@ class basic_json object | result of function `object_t::empty()` array | result of function `array_t::empty()` - @note This function does not return whether a string stored as JSON value - is empty - it returns whether the JSON container itself is empty which is - false in the case of a string. + @liveexample{The following code uses `empty()` to check if a JSON + object contains any elements.,empty} @complexity Constant, as long as @ref array_t and @ref object_t satisfy the Container concept; that is, their `empty()` functions have constant complexity. + @iterators No changes. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @note This function does not return whether a string stored as JSON value + is empty - it returns whether the JSON container itself is empty which is + false in the case of a string. + @requirement This function helps `basic_json` satisfying the [Container](http://en.cppreference.com/w/cpp/concept/Container) requirements: - The complexity is constant. - Has the semantics of `begin() == end()`. - @liveexample{The following code uses `empty()` to check if a JSON - object contains any elements.,empty} - @sa @ref size() -- returns the number of elements @since version 1.0.0 @@ -5401,23 +11365,27 @@ class basic_json object | result of function object_t::size() array | result of function array_t::size() - @note This function does not return the length of a string stored as JSON - value - it returns the number of elements in the JSON value which is 1 in - the case of a string. + @liveexample{The following code calls `size()` on the different value + types.,size} @complexity Constant, as long as @ref array_t and @ref object_t satisfy the Container concept; that is, their size() functions have constant complexity. + @iterators No changes. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @note This function does not return the length of a string stored as JSON + value - it returns the number of elements in the JSON value which is 1 in + the case of a string. + @requirement This function helps `basic_json` satisfying the [Container](http://en.cppreference.com/w/cpp/concept/Container) requirements: - The complexity is constant. - Has the semantics of `std::distance(begin(), end())`. - @liveexample{The following code calls `size()` on the different value - types.,size} - @sa @ref empty() -- checks whether the container is empty @sa @ref max_size() -- returns the maximal number of elements @@ -5471,10 +11439,17 @@ class basic_json object | result of function `object_t::max_size()` array | result of function `array_t::max_size()` + @liveexample{The following code calls `max_size()` on the different value + types. Note the output is implementation specific.,max_size} + @complexity Constant, as long as @ref array_t and @ref object_t satisfy the Container concept; that is, their `max_size()` functions have constant complexity. + @iterators No changes. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + @requirement This function helps `basic_json` satisfying the [Container](http://en.cppreference.com/w/cpp/concept/Container) requirements: @@ -5482,9 +11457,6 @@ class basic_json - Has the semantics of returning `b.size()` where `b` is the largest possible JSON value. - @liveexample{The following code calls `max_size()` on the different value - types. Note the output is implementation specific.,max_size} - @sa @ref size() -- returns the number of elements @since version 1.0.0 @@ -5527,7 +11499,8 @@ class basic_json @brief clears the contents Clears the content of a JSON value and resets it to the default value as - if @ref basic_json(value_t) would have been called: + if @ref basic_json(value_t) would have been called with the current value + type from @ref type(): Value type | initial value ----------- | ------------- @@ -5538,11 +11511,24 @@ class basic_json object | `{}` array | `[]` - @complexity Linear in the size of the JSON value. + @post Has the same effect as calling + @code {.cpp} + *this = basic_json(type()); + @endcode @liveexample{The example below shows the effect of `clear()` to different JSON types.,clear} + @complexity Linear in the size of the JSON value. + + @iterators All iterators, pointers and references related to this container + are invalidated. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @sa @ref basic_json(value_t) -- constructor that creates an object with the + same value than calling `clear()` + @since version 1.0.0 */ void clear() noexcept @@ -5592,9 +11578,7 @@ class basic_json } default: - { break; - } } } @@ -5621,9 +11605,9 @@ class basic_json void push_back(basic_json&& val) { // push_back only works for null objects or arrays - if (not(is_null() or is_array())) + if (JSON_UNLIKELY(not(is_null() or is_array()))) { - JSON_THROW(type_error::create(308, "cannot use push_back() with " + type_name())); + JSON_THROW(type_error::create(308, "cannot use push_back() with " + std::string(type_name()))); } // transform null object into an array @@ -5657,9 +11641,9 @@ class basic_json void push_back(const basic_json& val) { // push_back only works for null objects or arrays - if (not(is_null() or is_array())) + if (JSON_UNLIKELY(not(is_null() or is_array()))) { - JSON_THROW(type_error::create(308, "cannot use push_back() with " + type_name())); + JSON_THROW(type_error::create(308, "cannot use push_back() with " + std::string(type_name()))); } // transform null object into an array @@ -5707,9 +11691,9 @@ class basic_json void push_back(const typename object_t::value_type& val) { // push_back only works for null objects or objects - if (not(is_null() or is_object())) + if (JSON_UNLIKELY(not(is_null() or is_object()))) { - JSON_THROW(type_error::create(308, "cannot use push_back() with " + type_name())); + JSON_THROW(type_error::create(308, "cannot use push_back() with " + std::string(type_name()))); } // transform null object into an object @@ -5759,12 +11743,13 @@ class basic_json @liveexample{The example shows how initializer lists are treated as objects when possible.,push_back__initializer_list} */ - void push_back(std::initializer_list init) + void push_back(initializer_list_t init) { - if (is_object() and init.size() == 2 and init.begin()->is_string()) + if (is_object() and init.size() == 2 and (*init.begin())->is_string()) { - const string_t key = *init.begin(); - push_back(typename object_t::value_type(key, *(init.begin() + 1))); + basic_json&& key = init.begin()->moved_or_copied(); + push_back(typename object_t::value_type( + std::move(key.get_ref()), (init.begin() + 1)->moved_or_copied())); } else { @@ -5774,9 +11759,9 @@ class basic_json /*! @brief add an object to an object - @copydoc push_back(std::initializer_list) + @copydoc push_back(initializer_list_t) */ - reference operator+=(std::initializer_list init) + reference operator+=(initializer_list_t init) { push_back(init); return *this; @@ -5807,9 +11792,9 @@ class basic_json void emplace_back(Args&& ... args) { // emplace_back only works for null objects or arrays - if (not(is_null() or is_array())) + if (JSON_UNLIKELY(not(is_null() or is_array()))) { - JSON_THROW(type_error::create(311, "cannot use emplace_back() with " + type_name())); + JSON_THROW(type_error::create(311, "cannot use emplace_back() with " + std::string(type_name()))); } // transform null object into an array @@ -5855,9 +11840,9 @@ class basic_json std::pair emplace(Args&& ... args) { // emplace only works for null objects or arrays - if (not(is_null() or is_object())) + if (JSON_UNLIKELY(not(is_null() or is_object()))) { - JSON_THROW(type_error::create(311, "cannot use emplace() with " + type_name())); + JSON_THROW(type_error::create(311, "cannot use emplace() with " + std::string(type_name()))); } // transform null object into an object @@ -5903,10 +11888,10 @@ class basic_json iterator insert(const_iterator pos, const basic_json& val) { // insert only works for arrays - if (is_array()) + if (JSON_LIKELY(is_array())) { // check if iterator pos fits to this JSON value - if (pos.m_object != this) + if (JSON_UNLIKELY(pos.m_object != this)) { JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); } @@ -5917,7 +11902,7 @@ class basic_json return result; } - JSON_THROW(type_error::create(309, "cannot use insert() with " + type_name())); + JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); } /*! @@ -5956,10 +11941,10 @@ class basic_json iterator insert(const_iterator pos, size_type cnt, const basic_json& val) { // insert only works for arrays - if (is_array()) + if (JSON_LIKELY(is_array())) { // check if iterator pos fits to this JSON value - if (pos.m_object != this) + if (JSON_UNLIKELY(pos.m_object != this)) { JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); } @@ -5970,7 +11955,7 @@ class basic_json return result; } - JSON_THROW(type_error::create(309, "cannot use insert() with " + type_name())); + JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); } /*! @@ -6006,24 +11991,24 @@ class basic_json iterator insert(const_iterator pos, const_iterator first, const_iterator last) { // insert only works for arrays - if (not is_array()) + if (JSON_UNLIKELY(not is_array())) { - JSON_THROW(type_error::create(309, "cannot use insert() with " + type_name())); + JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); } // check if iterator pos fits to this JSON value - if (pos.m_object != this) + if (JSON_UNLIKELY(pos.m_object != this)) { JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); } // check if range iterators belong to the same JSON object - if (first.m_object != last.m_object) + if (JSON_UNLIKELY(first.m_object != last.m_object)) { JSON_THROW(invalid_iterator::create(210, "iterators do not fit")); } - if (first.m_object == this or last.m_object == this) + if (JSON_UNLIKELY(first.m_object == this or last.m_object == this)) { JSON_THROW(invalid_iterator::create(211, "passed iterators may not belong to container")); } @@ -6061,23 +12046,23 @@ class basic_json @since version 1.0.0 */ - iterator insert(const_iterator pos, std::initializer_list ilist) + iterator insert(const_iterator pos, initializer_list_t ilist) { // insert only works for arrays - if (not is_array()) + if (JSON_UNLIKELY(not is_array())) { - JSON_THROW(type_error::create(309, "cannot use insert() with " + type_name())); + JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); } // check if iterator pos fits to this JSON value - if (pos.m_object != this) + if (JSON_UNLIKELY(pos.m_object != this)) { JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); } // insert to array and return iterator iterator result(this); - result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, ilist); + result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, ilist.begin(), ilist.end()); return result; } @@ -6107,19 +12092,20 @@ class basic_json void insert(const_iterator first, const_iterator last) { // insert only works for objects - if (not is_object()) + if (JSON_UNLIKELY(not is_object())) { - JSON_THROW(type_error::create(309, "cannot use insert() with " + type_name())); + JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); } // check if range iterators belong to the same JSON object - if (first.m_object != last.m_object) + if (JSON_UNLIKELY(first.m_object != last.m_object)) { JSON_THROW(invalid_iterator::create(210, "iterators do not fit")); } // passed iterators must belong to objects - if (not first.m_object->is_object() or not first.m_object->is_object()) + if (JSON_UNLIKELY(not first.m_object->is_object() + or not last.m_object->is_object())) { JSON_THROW(invalid_iterator::create(202, "iterators first and last must point to objects")); } @@ -6127,6 +12113,110 @@ class basic_json m_value.object->insert(first.m_it.object_iterator, last.m_it.object_iterator); } + /*! + @brief updates a JSON object from another object, overwriting existing keys + + Inserts all values from JSON object @a j and overwrites existing keys. + + @param[in] j JSON object to read values from + + @throw type_error.312 if called on JSON values other than objects; example: + `"cannot use update() with string"` + + @complexity O(N*log(size() + N)), where N is the number of elements to + insert. + + @liveexample{The example shows how `update()` is used.,update} + + @sa https://docs.python.org/3.6/library/stdtypes.html#dict.update + + @since version 3.0.0 + */ + void update(const_reference j) + { + // implicitly convert null value to an empty object + if (is_null()) + { + m_type = value_t::object; + m_value.object = create(); + assert_invariant(); + } + + if (JSON_UNLIKELY(not is_object())) + { + JSON_THROW(type_error::create(312, "cannot use update() with " + std::string(type_name()))); + } + if (JSON_UNLIKELY(not j.is_object())) + { + JSON_THROW(type_error::create(312, "cannot use update() with " + std::string(j.type_name()))); + } + + for (auto it = j.begin(); it != j.end(); ++it) + { + m_value.object->operator[](it.key()) = it.value(); + } + } + + /*! + @brief updates a JSON object from another object, overwriting existing keys + + Inserts all values from from range `[first, last)` and overwrites existing + keys. + + @param[in] first begin of the range of elements to insert + @param[in] last end of the range of elements to insert + + @throw type_error.312 if called on JSON values other than objects; example: + `"cannot use update() with string"` + @throw invalid_iterator.202 if iterator @a first or @a last does does not + point to an object; example: `"iterators first and last must point to + objects"` + @throw invalid_iterator.210 if @a first and @a last do not belong to the + same JSON value; example: `"iterators do not fit"` + + @complexity O(N*log(size() + N)), where N is the number of elements to + insert. + + @liveexample{The example shows how `update()` is used__range.,update} + + @sa https://docs.python.org/3.6/library/stdtypes.html#dict.update + + @since version 3.0.0 + */ + void update(const_iterator first, const_iterator last) + { + // implicitly convert null value to an empty object + if (is_null()) + { + m_type = value_t::object; + m_value.object = create(); + assert_invariant(); + } + + if (JSON_UNLIKELY(not is_object())) + { + JSON_THROW(type_error::create(312, "cannot use update() with " + std::string(type_name()))); + } + + // check if range iterators belong to the same JSON object + if (JSON_UNLIKELY(first.m_object != last.m_object)) + { + JSON_THROW(invalid_iterator::create(210, "iterators do not fit")); + } + + // passed iterators must belong to objects + if (JSON_UNLIKELY(not first.m_object->is_object() + or not first.m_object->is_object())) + { + JSON_THROW(invalid_iterator::create(202, "iterators first and last must point to objects")); + } + + for (auto it = first; it != last; ++it) + { + m_value.object->operator[](it.key()) = it.value(); + } + } + /*! @brief exchanges the values @@ -6179,13 +12269,13 @@ class basic_json void swap(array_t& other) { // swap only works for arrays - if (is_array()) + if (JSON_LIKELY(is_array())) { std::swap(*(m_value.array), other); } else { - JSON_THROW(type_error::create(310, "cannot use swap() with " + type_name())); + JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()))); } } @@ -6212,13 +12302,13 @@ class basic_json void swap(object_t& other) { // swap only works for objects - if (is_object()) + if (JSON_LIKELY(is_object())) { std::swap(*(m_value.object), other); } else { - JSON_THROW(type_error::create(310, "cannot use swap() with " + type_name())); + JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()))); } } @@ -6245,13 +12335,13 @@ class basic_json void swap(string_t& other) { // swap only works for strings - if (is_string()) + if (JSON_LIKELY(is_string())) { std::swap(*(m_value.string), other); } else { - JSON_THROW(type_error::create(310, "cannot use swap() with " + type_name())); + JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()))); } } @@ -6273,18 +12363,30 @@ class basic_json their stored values are the same according to their respective `operator==`. - Integer and floating-point numbers are automatically converted before - comparison. Floating-point numbers are compared indirectly: two - floating-point numbers `f1` and `f2` are considered equal if neither - `f1 > f2` nor `f2 > f1` holds. Note than two NaN values are always - treated as unequal. + comparison. Note than two NaN values are always treated as unequal. - Two JSON null values are equal. + @note Floating-point inside JSON values numbers are compared with + `json::number_float_t::operator==` which is `double::operator==` by + default. To compare floating-point while respecting an epsilon, an alternative + [comparison function](https://github.com/mariokonrad/marnav/blob/master/src/marnav/math/floatingpoint.hpp#L34-#L39) + could be used, for instance + @code {.cpp} + template ::value, T>::type> + inline bool is_same(T a, T b, T epsilon = std::numeric_limits::epsilon()) noexcept + { + return std::abs(a - b) <= epsilon; + } + @endcode + @note NaN values never compare equal to themselves or to other NaN values. @param[in] lhs first JSON value to consider @param[in] rhs second JSON value to consider @return whether the values @a lhs and @a rhs are equal + @exceptionsafety No-throw guarantee: this function never throws exceptions. + @complexity Linear. @liveexample{The example demonstrates comparing several JSON @@ -6302,66 +12404,56 @@ class basic_json switch (lhs_type) { case value_t::array: - { - return *lhs.m_value.array == *rhs.m_value.array; - } + return (*lhs.m_value.array == *rhs.m_value.array); + case value_t::object: - { - return *lhs.m_value.object == *rhs.m_value.object; - } + return (*lhs.m_value.object == *rhs.m_value.object); + case value_t::null: - { return true; - } + case value_t::string: - { - return *lhs.m_value.string == *rhs.m_value.string; - } + return (*lhs.m_value.string == *rhs.m_value.string); + case value_t::boolean: - { - return lhs.m_value.boolean == rhs.m_value.boolean; - } + return (lhs.m_value.boolean == rhs.m_value.boolean); + case value_t::number_integer: - { - return lhs.m_value.number_integer == rhs.m_value.number_integer; - } + return (lhs.m_value.number_integer == rhs.m_value.number_integer); + case value_t::number_unsigned: - { - return lhs.m_value.number_unsigned == rhs.m_value.number_unsigned; - } + return (lhs.m_value.number_unsigned == rhs.m_value.number_unsigned); + case value_t::number_float: - { - return lhs.m_value.number_float == rhs.m_value.number_float; - } + return (lhs.m_value.number_float == rhs.m_value.number_float); + default: - { return false; - } } } else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_float) { - return static_cast(lhs.m_value.number_integer) == rhs.m_value.number_float; + return (static_cast(lhs.m_value.number_integer) == rhs.m_value.number_float); } else if (lhs_type == value_t::number_float and rhs_type == value_t::number_integer) { - return lhs.m_value.number_float == static_cast(rhs.m_value.number_integer); + return (lhs.m_value.number_float == static_cast(rhs.m_value.number_integer)); } else if (lhs_type == value_t::number_unsigned and rhs_type == value_t::number_float) { - return static_cast(lhs.m_value.number_unsigned) == rhs.m_value.number_float; + return (static_cast(lhs.m_value.number_unsigned) == rhs.m_value.number_float); } else if (lhs_type == value_t::number_float and rhs_type == value_t::number_unsigned) { - return lhs.m_value.number_float == static_cast(rhs.m_value.number_unsigned); + return (lhs.m_value.number_float == static_cast(rhs.m_value.number_unsigned)); } else if (lhs_type == value_t::number_unsigned and rhs_type == value_t::number_integer) { - return static_cast(lhs.m_value.number_unsigned) == rhs.m_value.number_integer; + return (static_cast(lhs.m_value.number_unsigned) == rhs.m_value.number_integer); } else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_unsigned) { - return lhs.m_value.number_integer == static_cast(rhs.m_value.number_unsigned); + return (lhs.m_value.number_integer == static_cast(rhs.m_value.number_unsigned)); } return false; @@ -6400,6 +12492,8 @@ class basic_json @complexity Linear. + @exceptionsafety No-throw guarantee: this function never throws exceptions. + @liveexample{The example demonstrates comparing several JSON types.,operator__notequal} @@ -6451,6 +12545,8 @@ class basic_json @complexity Linear. + @exceptionsafety No-throw guarantee: this function never throws exceptions. + @liveexample{The example demonstrates comparing several JSON types.,operator__less} @@ -6466,41 +12562,31 @@ class basic_json switch (lhs_type) { case value_t::array: - { return (*lhs.m_value.array) < (*rhs.m_value.array); - } + case value_t::object: - { return *lhs.m_value.object < *rhs.m_value.object; - } + case value_t::null: - { return false; - } + case value_t::string: - { return *lhs.m_value.string < *rhs.m_value.string; - } + case value_t::boolean: - { return lhs.m_value.boolean < rhs.m_value.boolean; - } + case value_t::number_integer: - { return lhs.m_value.number_integer < rhs.m_value.number_integer; - } + case value_t::number_unsigned: - { return lhs.m_value.number_unsigned < rhs.m_value.number_unsigned; - } + case value_t::number_float: - { return lhs.m_value.number_float < rhs.m_value.number_float; - } + default: - { return false; - } } } else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_float) @@ -6568,6 +12654,8 @@ class basic_json @complexity Linear. + @exceptionsafety No-throw guarantee: this function never throws exceptions. + @liveexample{The example demonstrates comparing several JSON types.,operator__greater} @@ -6612,6 +12700,8 @@ class basic_json @complexity Linear. + @exceptionsafety No-throw guarantee: this function never throws exceptions. + @liveexample{The example demonstrates comparing several JSON types.,operator__lessequal} @@ -6656,6 +12746,8 @@ class basic_json @complexity Linear. + @exceptionsafety No-throw guarantee: this function never throws exceptions. + @liveexample{The example demonstrates comparing several JSON types.,operator__greaterequal} @@ -6690,110 +12782,6 @@ class basic_json /// @} - private: - ///////////////////// - // output adapters // - ///////////////////// - - /// abstract output adapter interface - template - class output_adapter - { - public: - virtual void write_character(CharType c) = 0; - virtual void write_characters(const CharType* s, size_t length) = 0; - virtual ~output_adapter() {} - - static std::shared_ptr> create(std::vector& vec) - { - return std::shared_ptr(new output_vector_adapter(vec)); - } - - static std::shared_ptr> create(std::ostream& s) - { - return std::shared_ptr(new output_stream_adapter(s)); - } - - static std::shared_ptr> create(std::string& s) - { - return std::shared_ptr(new output_string_adapter(s)); - } - }; - - /// a type to simplify interfaces - template - using output_adapter_t = std::shared_ptr>; - - /// output adapter for byte vectors - template - class output_vector_adapter : public output_adapter - { - public: - output_vector_adapter(std::vector& vec) - : v(vec) - {} - - void write_character(CharType c) override - { - v.push_back(c); - } - - void write_characters(const CharType* s, size_t length) override - { - std::copy(s, s + length, std::back_inserter(v)); - } - - private: - std::vector& v; - }; - - /// putput adatpter for output streams - template - class output_stream_adapter : public output_adapter - { - public: - output_stream_adapter(std::basic_ostream& s) - : stream(s) - {} - - void write_character(CharType c) override - { - stream.put(c); - } - - void write_characters(const CharType* s, size_t length) override - { - stream.write(s, static_cast(length)); - } - - private: - std::basic_ostream& stream; - }; - - /// output adapter for basic_string - template - class output_string_adapter : public output_adapter - { - public: - output_string_adapter(std::string& s) - : str(s) - {} - - void write_character(CharType c) override - { - str.push_back(c); - } - - void write_characters(const CharType* s, size_t length) override - { - str.append(s, length); - } - - private: - std::basic_string& str; - }; - - /////////////////// // serialization // /////////////////// @@ -6801,598 +12789,6 @@ class basic_json /// @name serialization /// @{ - private: - /*! - @brief wrapper around the serialization functions - */ - class serializer - { - public: - /*! - @param[in] s output stream to serialize to - @param[in] ichar indentation character to use - */ - serializer(output_adapter_t s, const char ichar) - : o(s), loc(std::localeconv()), - thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]), - decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0]), - indent_char(ichar), indent_string(512, indent_char) - {} - - // delete because of pointer members - serializer(const serializer&) = delete; - serializer& operator=(const serializer&) = delete; - - /*! - @brief internal implementation of the serialization function - - This function is called by the public member function dump and - organizes the serialization internally. The indentation level is - propagated as additional parameter. In case of arrays and objects, the - function is called recursively. - - - strings and object keys are escaped using `escape_string()` - - integer numbers are converted implicitly via `operator<<` - - floating-point numbers are converted to a string using `"%g"` format - - @param[in] val value to serialize - @param[in] pretty_print whether the output shall be pretty-printed - @param[in] indent_step the indent level - @param[in] current_indent the current indent level (only used internally) - */ - void dump(const basic_json& val, - const bool pretty_print, - const unsigned int indent_step, - const unsigned int current_indent = 0) - { - switch (val.m_type) - { - case value_t::object: - { - if (val.m_value.object->empty()) - { - o->write_characters("{}", 2); - return; - } - - if (pretty_print) - { - o->write_characters("{\n", 2); - - // variable to hold indentation for recursive calls - const auto new_indent = current_indent + indent_step; - if (indent_string.size() < new_indent) - { - indent_string.resize(new_indent, ' '); - } - - // first n-1 elements - auto i = val.m_value.object->cbegin(); - for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) - { - o->write_characters(indent_string.c_str(), new_indent); - o->write_character('\"'); - dump_escaped(i->first); - o->write_characters("\": ", 3); - dump(i->second, true, indent_step, new_indent); - o->write_characters(",\n", 2); - } - - // last element - assert(i != val.m_value.object->cend()); - o->write_characters(indent_string.c_str(), new_indent); - o->write_character('\"'); - dump_escaped(i->first); - o->write_characters("\": ", 3); - dump(i->second, true, indent_step, new_indent); - - o->write_character('\n'); - o->write_characters(indent_string.c_str(), current_indent); - o->write_character('}'); - } - else - { - o->write_character('{'); - - // first n-1 elements - auto i = val.m_value.object->cbegin(); - for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) - { - o->write_character('\"'); - dump_escaped(i->first); - o->write_characters("\":", 2); - dump(i->second, false, indent_step, current_indent); - o->write_character(','); - } - - // last element - assert(i != val.m_value.object->cend()); - o->write_character('\"'); - dump_escaped(i->first); - o->write_characters("\":", 2); - dump(i->second, false, indent_step, current_indent); - - o->write_character('}'); - } - - return; - } - - case value_t::array: - { - if (val.m_value.array->empty()) - { - o->write_characters("[]", 2); - return; - } - - if (pretty_print) - { - o->write_characters("[\n", 2); - - // variable to hold indentation for recursive calls - const auto new_indent = current_indent + indent_step; - if (indent_string.size() < new_indent) - { - indent_string.resize(new_indent, ' '); - } - - // first n-1 elements - for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) - { - o->write_characters(indent_string.c_str(), new_indent); - dump(*i, true, indent_step, new_indent); - o->write_characters(",\n", 2); - } - - // last element - assert(not val.m_value.array->empty()); - o->write_characters(indent_string.c_str(), new_indent); - dump(val.m_value.array->back(), true, indent_step, new_indent); - - o->write_character('\n'); - o->write_characters(indent_string.c_str(), current_indent); - o->write_character(']'); - } - else - { - o->write_character('['); - - // first n-1 elements - for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) - { - dump(*i, false, indent_step, current_indent); - o->write_character(','); - } - - // last element - assert(not val.m_value.array->empty()); - dump(val.m_value.array->back(), false, indent_step, current_indent); - - o->write_character(']'); - } - - return; - } - - case value_t::string: - { - o->write_character('\"'); - dump_escaped(*val.m_value.string); - o->write_character('\"'); - return; - } - - case value_t::boolean: - { - if (val.m_value.boolean) - { - o->write_characters("true", 4); - } - else - { - o->write_characters("false", 5); - } - return; - } - - case value_t::number_integer: - { - dump_integer(val.m_value.number_integer); - return; - } - - case value_t::number_unsigned: - { - dump_integer(val.m_value.number_unsigned); - return; - } - - case value_t::number_float: - { - dump_float(val.m_value.number_float); - return; - } - - case value_t::discarded: - { - o->write_characters("", 11); - return; - } - - case value_t::null: - { - o->write_characters("null", 4); - return; - } - } - } - - private: - /*! - @brief calculates the extra space to escape a JSON string - - @param[in] s the string to escape - @return the number of characters required to escape string @a s - - @complexity Linear in the length of string @a s. - */ - static std::size_t extra_space(const string_t& s) noexcept - { - return std::accumulate(s.begin(), s.end(), size_t{}, - [](size_t res, typename string_t::value_type c) - { - switch (c) - { - case '"': - case '\\': - case '\b': - case '\f': - case '\n': - case '\r': - case '\t': - { - // from c (1 byte) to \x (2 bytes) - return res + 1; - } - - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x0b: - case 0x0e: - case 0x0f: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - case 0x18: - case 0x19: - case 0x1a: - case 0x1b: - case 0x1c: - case 0x1d: - case 0x1e: - case 0x1f: - { - // from c (1 byte) to \uxxxx (6 bytes) - return res + 5; - } - - default: - { - return res; - } - } - }); - } - - /*! - @brief dump escaped string - - Escape a string by replacing certain special characters by a sequence - of an escape character (backslash) and another character and other - control characters by a sequence of "\u" followed by a four-digit hex - representation. The escaped string is written to output stream @a o. - - @param[in] s the string to escape - - @complexity Linear in the length of string @a s. - */ - void dump_escaped(const string_t& s) const - { - const auto space = extra_space(s); - if (space == 0) - { - o->write_characters(s.c_str(), s.size()); - return; - } - - // create a result string of necessary size - string_t result(s.size() + space, '\\'); - std::size_t pos = 0; - - for (const auto& c : s) - { - switch (c) - { - // quotation mark (0x22) - case '"': - { - result[pos + 1] = '"'; - pos += 2; - break; - } - - // reverse solidus (0x5c) - case '\\': - { - // nothing to change - pos += 2; - break; - } - - // backspace (0x08) - case '\b': - { - result[pos + 1] = 'b'; - pos += 2; - break; - } - - // formfeed (0x0c) - case '\f': - { - result[pos + 1] = 'f'; - pos += 2; - break; - } - - // newline (0x0a) - case '\n': - { - result[pos + 1] = 'n'; - pos += 2; - break; - } - - // carriage return (0x0d) - case '\r': - { - result[pos + 1] = 'r'; - pos += 2; - break; - } - - // horizontal tab (0x09) - case '\t': - { - result[pos + 1] = 't'; - pos += 2; - break; - } - - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x0b: - case 0x0e: - case 0x0f: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - case 0x18: - case 0x19: - case 0x1a: - case 0x1b: - case 0x1c: - case 0x1d: - case 0x1e: - case 0x1f: - { - // convert a number 0..15 to its hex representation - // (0..f) - static const char hexify[16] = - { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' - }; - - // print character c as \uxxxx - for (const char m : - { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] - }) - { - result[++pos] = m; - } - - ++pos; - break; - } - - default: - { - // all other characters are added as-is - result[pos++] = c; - break; - } - } - } - - assert(pos == s.size() + space); - o->write_characters(result.c_str(), result.size()); - } - - /*! - @brief dump an integer - - Dump a given integer to output stream @a o. Works internally with - @a number_buffer. - - @param[in] x integer number (signed or unsigned) to dump - @tparam NumberType either @a number_integer_t or @a number_unsigned_t - */ - template::value or - std::is_same::value, int> = 0> - void dump_integer(NumberType x) - { - // special case for "0" - if (x == 0) - { - o->write_character('0'); - return; - } - - const bool is_negative = x < 0; - size_t i = 0; - - // spare 1 byte for '\0' - while (x != 0 and i < number_buffer.size() - 1) - { - const auto digit = std::labs(static_cast(x % 10)); - number_buffer[i++] = static_cast('0' + digit); - x /= 10; - } - - // make sure the number has been processed completely - assert(x == 0); - - if (is_negative) - { - // make sure there is capacity for the '-' - assert(i < number_buffer.size() - 2); - number_buffer[i++] = '-'; - } - - std::reverse(number_buffer.begin(), number_buffer.begin() + i); - o->write_characters(number_buffer.data(), i); - } - - /*! - @brief dump a floating-point number - - Dump a given floating-point number to output stream @a o. Works - internally with @a number_buffer. - - @param[in] x floating-point number to dump - */ - void dump_float(number_float_t x) - { - // NaN / inf - if (not std::isfinite(x) or std::isnan(x)) - { - o->write_characters("null", 4); - return; - } - - // special case for 0.0 and -0.0 - if (x == 0) - { - if (std::signbit(x)) - { - o->write_characters("-0.0", 4); - } - else - { - o->write_characters("0.0", 3); - } - return; - } - - // get number of digits for a text -> float -> text round-trip - static constexpr auto d = std::numeric_limits::digits10; - - // the actual conversion - std::ptrdiff_t len = snprintf(number_buffer.data(), number_buffer.size(), - "%.*g", d, x); - - // negative value indicates an error - assert(len > 0); - // check if buffer was large enough - assert(static_cast(len) < number_buffer.size()); - - // erase thousands separator - if (thousands_sep != '\0') - { - const auto end = std::remove(number_buffer.begin(), - number_buffer.begin() + len, - thousands_sep); - std::fill(end, number_buffer.end(), '\0'); - assert((end - number_buffer.begin()) <= len); - len = (end - number_buffer.begin()); - } - - // convert decimal point to '.' - if (decimal_point != '\0' and decimal_point != '.') - { - for (auto& c : number_buffer) - { - if (c == decimal_point) - { - c = '.'; - break; - } - } - } - - o->write_characters(number_buffer.data(), static_cast(len)); - - // determine if need to append ".0" - const bool value_is_int_like = std::none_of(number_buffer.begin(), - number_buffer.begin() + len + 1, - [](char c) - { - return c == '.' or c == 'e'; - }); - - if (value_is_int_like) - { - o->write_characters(".0", 2); - } - } - - private: - /// the output of the serializer - output_adapter_t o = nullptr; - - /// a (hopefully) large enough character buffer - std::array number_buffer{{}}; - - /// the locale - const std::lconv* loc = nullptr; - /// the locale's thousand separator character - const char thousands_sep = '\0'; - /// the locale's decimal point character - const char decimal_point = '\0'; - - /// the indentation character - const char indent_char; - - /// the indentation string - string_t indent_string; - }; - - public: /*! @brief serialize to stream @@ -7431,8 +12827,8 @@ class basic_json o.width(0); // do the actual serialization - serializer s(output_adapter::create(o), o.fill()); - s.dump(j, pretty_print, static_cast(indentation)); + serializer s(detail::output_adapter(o), o.fill()); + s.dump(j, pretty_print, false, static_cast(indentation)); return o; } @@ -7440,7 +12836,7 @@ class basic_json @brief serialize to stream @deprecated This stream operator is deprecated and will be removed in a future version of the library. Please use - @ref std::ostream& operator<<(std::ostream&, const basic_json&) + @ref operator<<(std::ostream&, const basic_json&) instead; that is, replace calls like `j >> o;` with `o << j;`. */ JSON_DEPRECATED @@ -7460,15 +12856,36 @@ class basic_json /// @{ /*! - @brief deserialize from an array + @brief deserialize from a compatible input - This function reads from an array of 1-byte values. + This function reads from a compatible input. Examples are: + - an array of 1-byte values + - strings with character/literal type with size of 1 byte + - input streams + - container with contiguous storage of 1-byte values. Compatible container + types include `std::vector`, `std::string`, `std::array`, + `std::valarray`, and `std::initializer_list`. Furthermore, C-style + arrays can be used with `std::begin()`/`std::end()`. User-defined + containers can be used as long as they implement random-access iterators + and a contiguous storage. @pre Each element of the container has a size of 1 byte. Violating this precondition yields undefined behavior. **This precondition is enforced with a static assertion.** - @param[in] array array to read from + @pre The container storage is contiguous. Violating this precondition + yields undefined behavior. **This precondition is enforced with an + assertion.** + @pre Each element of the container has a size of 1 byte. Violating this + precondition yields undefined behavior. **This precondition is enforced + with a static assertion.** + + @warning There is no way to enforce all preconditions at compile-time. If + the function is called with a noncompliant container and with + assertions switched off, the behavior is undefined and will most + likely yield segmentation violation. + + @param[in] i input to read from @param[in] cb a parser callback function of type @ref parser_callback_t which is used to control the deserialization by filtering unwanted values (optional) @@ -7489,125 +12906,46 @@ class basic_json @liveexample{The example below demonstrates the `parse()` function reading from an array.,parse__array__parser_callback_t} - @since version 2.0.3 - */ - template - static basic_json parse(T (&array)[N], - const parser_callback_t cb = nullptr) - { - // delegate the call to the iterator-range parse overload - return parse(std::begin(array), std::end(array), cb); - } - - template - static bool accept(T (&array)[N]) - { - // delegate the call to the iterator-range accept overload - return accept(std::begin(array), std::end(array)); - } - - /*! - @brief deserialize from string literal - - @tparam CharT character/literal type with size of 1 byte - @param[in] s string literal to read a serialized JSON value from - @param[in] cb a parser callback function of type @ref parser_callback_t - which is used to control the deserialization by filtering unwanted values - (optional) - - @return result of the deserialization - - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. The complexity can be higher if the parser callback function - @a cb has a super-linear complexity. - - @note A UTF-8 byte order mark is silently ignored. - @note String containers like `std::string` or @ref string_t can be parsed - with @ref parse(const ContiguousContainer&, const parser_callback_t) - @liveexample{The example below demonstrates the `parse()` function with and without callback function.,parse__string__parser_callback_t} - @sa @ref parse(std::istream&, const parser_callback_t) for a version that - reads from an input stream - - @since version 1.0.0 (originally for @ref string_t) - */ - template::value and - std::is_integral::type>::value and - sizeof(typename std::remove_pointer::type) == 1, int>::type = 0> - static basic_json parse(const CharT s, - const parser_callback_t cb = nullptr) - { - return parser(input_adapter::create(s), cb).parse(true); - } - - template::value and - std::is_integral::type>::value and - sizeof(typename std::remove_pointer::type) == 1, int>::type = 0> - static bool accept(const CharT s) - { - return parser(input_adapter::create(s)).accept(true); - } - - /*! - @brief deserialize from stream - - @param[in,out] i stream to read a serialized JSON value from - @param[in] cb a parser callback function of type @ref parser_callback_t - which is used to control the deserialization by filtering unwanted values - (optional) - - @return result of the deserialization - - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - @throw parse_error.111 if input stream is in a bad state - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. The complexity can be higher if the parser callback function - @a cb has a super-linear complexity. - - @note A UTF-8 byte order mark is silently ignored. - @liveexample{The example below demonstrates the `parse()` function with and without callback function.,parse__istream__parser_callback_t} - @sa @ref parse(const CharT, const parser_callback_t) for a version - that reads from a string + @liveexample{The example below demonstrates the `parse()` function reading + from a contiguous container.,parse__contiguouscontainer__parser_callback_t} - @since version 1.0.0 + @since version 2.0.3 (contiguous containers) */ - static basic_json parse(std::istream& i, - const parser_callback_t cb = nullptr) + static basic_json parse(detail::input_adapter i, + const parser_callback_t cb = nullptr, + const bool allow_exceptions = true) { - return parser(input_adapter::create(i), cb).parse(true); - } - - static bool accept(std::istream& i) - { - return parser(input_adapter::create(i)).accept(true); + basic_json result; + parser(i, cb, allow_exceptions).parse(true, result); + return result; } /*! - @copydoc parse(std::istream&, const parser_callback_t) + @copydoc basic_json parse(detail::input_adapter, const parser_callback_t) */ - static basic_json parse(std::istream&& i, - const parser_callback_t cb = nullptr) + static basic_json parse(detail::input_adapter& i, + const parser_callback_t cb = nullptr, + const bool allow_exceptions = true) { - return parser(input_adapter::create(i), cb).parse(true); + basic_json result; + parser(i, cb, allow_exceptions).parse(true, result); + return result; } - static bool accept(std::istream&& i) + static bool accept(detail::input_adapter i) { - return parser(input_adapter::create(i)).accept(true); + return parser(i).accept(true); + } + + static bool accept(detail::input_adapter& i) + { + return parser(i).accept(true); } /*! @@ -7660,9 +12998,12 @@ class basic_json std::random_access_iterator_tag, typename std::iterator_traits::iterator_category>::value, int>::type = 0> static basic_json parse(IteratorType first, IteratorType last, - const parser_callback_t cb = nullptr) + const parser_callback_t cb = nullptr, + const bool allow_exceptions = true) { - return parser(input_adapter::create(first, last), cb).parse(true); + basic_json result; + parser(detail::input_adapter(first, last), cb, allow_exceptions).parse(true, result); + return result; } template::iterator_category>::value, int>::type = 0> static bool accept(IteratorType first, IteratorType last) { - return parser(input_adapter::create(first, last)).accept(true); - } - - /*! - @brief deserialize from a container with contiguous storage - - This function reads from a container with contiguous storage of 1-byte - values. Compatible container types include `std::vector`, `std::string`, - `std::array`, and `std::initializer_list`. User-defined containers can be - used as long as they implement random-access iterators and a contiguous - storage. - - @pre The container storage is contiguous. Violating this precondition - yields undefined behavior. **This precondition is enforced with an - assertion.** - @pre Each element of the container has a size of 1 byte. Violating this - precondition yields undefined behavior. **This precondition is enforced - with a static assertion.** - - @warning There is no way to enforce all preconditions at compile-time. If - the function is called with a noncompliant container and with - assertions switched off, the behavior is undefined and will most - likely yield segmentation violation. - - @tparam ContiguousContainer container type with contiguous storage - @param[in] c container to read from - @param[in] cb a parser callback function of type @ref parser_callback_t - which is used to control the deserialization by filtering unwanted values - (optional) - - @return result of the deserialization - - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. The complexity can be higher if the parser callback function - @a cb has a super-linear complexity. - - @note A UTF-8 byte order mark is silently ignored. - - @liveexample{The example below demonstrates the `parse()` function reading - from a contiguous container.,parse__contiguouscontainer__parser_callback_t} - - @since version 2.0.3 - */ - template::value and - std::is_base_of< - std::random_access_iterator_tag, - typename std::iterator_traits()))>::iterator_category>::value - , int>::type = 0> - static basic_json parse(const ContiguousContainer& c, - const parser_callback_t cb = nullptr) - { - // delegate the call to the iterator-range parse overload - return parse(std::begin(c), std::end(c), cb); - } - - template::value and - std::is_base_of< - std::random_access_iterator_tag, - typename std::iterator_traits()))>::iterator_category>::value - , int>::type = 0> - static bool accept(const ContiguousContainer& c) - { - // delegate the call to the iterator-range accept overload - return accept(std::begin(c), std::end(c)); + return parser(detail::input_adapter(first, last)).accept(true); } /*! @brief deserialize from stream @deprecated This stream operator is deprecated and will be removed in a future version of the library. Please use - @ref std::istream& operator>>(std::istream&, basic_json&) + @ref operator>>(std::istream&, basic_json&) instead; that is, replace calls like `j << i;` with `i >> j;`. */ JSON_DEPRECATED friend std::istream& operator<<(basic_json& j, std::istream& i) { - j = parser(input_adapter::create(i)).parse(false); - return i; + return operator>>(i, j); } /*! @@ -7768,7 +13039,6 @@ class basic_json @throw parse_error.101 in case of an unexpected token @throw parse_error.102 if to_unicode fails or surrogate error @throw parse_error.103 if to_unicode fails - @throw parse_error.111 if input stream is in a bad state @complexity Linear in the length of the input. The parser is a predictive LL(1) parser. @@ -7785,7 +13055,7 @@ class basic_json */ friend std::istream& operator>>(std::istream& i, basic_json& j) { - j = parser(input_adapter::create(i)).parse(false); + parser(detail::input_adapter(i)).parse(false, j); return i; } @@ -7801,16 +13071,31 @@ class basic_json Returns the type name as string to be used in error messages - usually to indicate that a function was called on a wrong JSON type. - @return basically a string representation of a the @a m_type member + @return a string representation of a the @a m_type member: + Value type | return value + ----------- | ------------- + null | `"null"` + boolean | `"boolean"` + string | `"string"` + number | `"number"` (for all number types) + object | `"object"` + array | `"array"` + discarded | `"discarded"` + + @exceptionsafety No-throw guarantee: this function never throws exceptions. @complexity Constant. @liveexample{The following code exemplifies `type_name()` for all JSON types.,type_name} - @since version 1.0.0, public since 2.1.0 + @sa @ref type() -- return the type of the JSON value + @sa @ref operator value_t() -- return the type of the JSON value (implicit) + + @since version 1.0.0, public since 2.1.0, `const char*` and `noexcept` + since 3.0.0 */ - std::string type_name() const + const char* type_name() const noexcept { { switch (m_type) @@ -7845,1239 +13130,6 @@ class basic_json /// the value of the current element json_value m_value = {}; - - private: - /////////////// - // iterators // - /////////////// - - /*! - @brief an iterator for primitive JSON types - - This class models an iterator for primitive JSON types (boolean, number, - string). It's only purpose is to allow the iterator/const_iterator classes - to "iterate" over primitive values. Internally, the iterator is modeled by - a `difference_type` variable. Value begin_value (`0`) models the begin, - end_value (`1`) models past the end. - */ - class primitive_iterator_t - { - public: - - difference_type get_value() const noexcept - { - return m_it; - } - /// set iterator to a defined beginning - void set_begin() noexcept - { - m_it = begin_value; - } - - /// set iterator to a defined past the end - void set_end() noexcept - { - m_it = end_value; - } - - /// return whether the iterator can be dereferenced - constexpr bool is_begin() const noexcept - { - return (m_it == begin_value); - } - - /// return whether the iterator is at end - constexpr bool is_end() const noexcept - { - return (m_it == end_value); - } - - friend constexpr bool operator==(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return lhs.m_it == rhs.m_it; - } - - friend constexpr bool operator!=(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return !(lhs == rhs); - } - - friend constexpr bool operator<(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return lhs.m_it < rhs.m_it; - } - - friend constexpr bool operator<=(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return lhs.m_it <= rhs.m_it; - } - - friend constexpr bool operator>(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return lhs.m_it > rhs.m_it; - } - - friend constexpr bool operator>=(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return lhs.m_it >= rhs.m_it; - } - - primitive_iterator_t operator+(difference_type i) - { - auto result = *this; - result += i; - return result; - } - - friend constexpr difference_type operator-(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept - { - return lhs.m_it - rhs.m_it; - } - - friend std::ostream& operator<<(std::ostream& os, primitive_iterator_t it) - { - return os << it.m_it; - } - - primitive_iterator_t& operator++() - { - ++m_it; - return *this; - } - - primitive_iterator_t operator++(int) - { - auto result = *this; - m_it++; - return result; - } - - primitive_iterator_t& operator--() - { - --m_it; - return *this; - } - - primitive_iterator_t operator--(int) - { - auto result = *this; - m_it--; - return result; - } - - primitive_iterator_t& operator+=(difference_type n) - { - m_it += n; - return *this; - } - - primitive_iterator_t& operator-=(difference_type n) - { - m_it -= n; - return *this; - } - - private: - static constexpr difference_type begin_value = 0; - static constexpr difference_type end_value = begin_value + 1; - - /// iterator as signed integer type - difference_type m_it = std::numeric_limits::denorm_min(); - }; - - /*! - @brief an iterator value - - @note This structure could easily be a union, but MSVC currently does not - allow unions members with complex constructors, see - https://github.com/nlohmann/json/pull/105. - */ - struct internal_iterator - { - /// iterator for JSON objects - typename object_t::iterator object_iterator; - /// iterator for JSON arrays - typename array_t::iterator array_iterator; - /// generic iterator for all other types - primitive_iterator_t primitive_iterator; - - /// create an uninitialized internal_iterator - internal_iterator() noexcept - : object_iterator(), array_iterator(), primitive_iterator() - {} - }; - - /// proxy class for the iterator_wrapper functions - template - class iteration_proxy - { - private: - /// helper class for iteration - class iteration_proxy_internal - { - private: - /// the iterator - IteratorType anchor; - /// an index for arrays (used to create key names) - size_t array_index = 0; - - public: - explicit iteration_proxy_internal(IteratorType it) noexcept - : anchor(it) - {} - - /// dereference operator (needed for range-based for) - iteration_proxy_internal& operator*() - { - return *this; - } - - /// increment operator (needed for range-based for) - iteration_proxy_internal& operator++() - { - ++anchor; - ++array_index; - - return *this; - } - - /// inequality operator (needed for range-based for) - bool operator!= (const iteration_proxy_internal& o) const - { - return anchor != o.anchor; - } - - /// return key of the iterator - typename basic_json::string_t key() const - { - assert(anchor.m_object != nullptr); - - switch (anchor.m_object->type()) - { - // use integer array index as key - case value_t::array: - { - return std::to_string(array_index); - } - - // use key from the object - case value_t::object: - { - return anchor.key(); - } - - // use an empty key for all primitive types - default: - { - return ""; - } - } - } - - /// return value of the iterator - typename IteratorType::reference value() const - { - return anchor.value(); - } - }; - - /// the container to iterate - typename IteratorType::reference container; - - public: - /// construct iteration proxy from a container - explicit iteration_proxy(typename IteratorType::reference cont) - : container(cont) - {} - - /// return iterator begin (needed for range-based for) - iteration_proxy_internal begin() noexcept - { - return iteration_proxy_internal(container.begin()); - } - - /// return iterator end (needed for range-based for) - iteration_proxy_internal end() noexcept - { - return iteration_proxy_internal(container.end()); - } - }; - - public: - /*! - @brief a template for a random access iterator for the @ref basic_json class - - This class implements a both iterators (iterator and const_iterator) for the - @ref basic_json class. - - @note An iterator is called *initialized* when a pointer to a JSON value - has been set (e.g., by a constructor or a copy assignment). If the - iterator is default-constructed, it is *uninitialized* and most - methods are undefined. **The library uses assertions to detect calls - on uninitialized iterators.** - - @requirement The class satisfies the following concept requirements: - - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator): - The iterator that can be moved to point (forward and backward) to any - element in constant time. - - @since version 1.0.0, simplified in version 2.0.9 - */ - template - class iter_impl : public std::iterator - { - /// allow basic_json to access private members - friend class basic_json; - - // make sure U is basic_json or const basic_json - static_assert(std::is_same::value - or std::is_same::value, - "iter_impl only accepts (const) basic_json"); - - public: - /// the type of the values when the iterator is dereferenced - using value_type = typename basic_json::value_type; - /// a type to represent differences between iterators - using difference_type = typename basic_json::difference_type; - /// defines a pointer to the type iterated over (value_type) - using pointer = typename std::conditional::value, - typename basic_json::const_pointer, - typename basic_json::pointer>::type; - /// defines a reference to the type iterated over (value_type) - using reference = typename std::conditional::value, - typename basic_json::const_reference, - typename basic_json::reference>::type; - /// the category of the iterator - using iterator_category = std::bidirectional_iterator_tag; - - /// default constructor - iter_impl() = default; - - /*! - @brief constructor for a given JSON instance - @param[in] object pointer to a JSON object for this iterator - @pre object != nullptr - @post The iterator is initialized; i.e. `m_object != nullptr`. - */ - explicit iter_impl(pointer object) noexcept - : m_object(object) - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - m_it.object_iterator = typename object_t::iterator(); - break; - } - - case basic_json::value_t::array: - { - m_it.array_iterator = typename array_t::iterator(); - break; - } - - default: - { - m_it.primitive_iterator = primitive_iterator_t(); - break; - } - } - } - - /*! - @note The conventional copy constructor and copy assignment are - implicitly defined. - Combined with the following converting constructor and assigment, - they support: copy from iterator to iterator, - copy from const iterator to const iterator, - and conversion from iterator to const iterator. - However conversion from const iterator to iterator is not defined. - */ - - /*! - @brief converting constructor - @param[in] other non-const iterator to copy from - @note It is not checked whether @a other is initialized. - */ - iter_impl(const iter_impl& other) noexcept - : m_object(other.m_object), m_it(other.m_it) - {} - - /*! - @brief converting assignment - @param[in,out] other non-const iterator to copy from - @return const/non-const iterator - @note It is not checked whether @a other is initialized. - */ - iter_impl& operator=(const iter_impl& other) noexcept - { - m_object = other.m_object; - m_it = other.m_it; - return *this; - } - - private: - /*! - @brief set the iterator to the first value - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - void set_begin() noexcept - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - m_it.object_iterator = m_object->m_value.object->begin(); - break; - } - - case basic_json::value_t::array: - { - m_it.array_iterator = m_object->m_value.array->begin(); - break; - } - - case basic_json::value_t::null: - { - // set to end so begin()==end() is true: null is empty - m_it.primitive_iterator.set_end(); - break; - } - - default: - { - m_it.primitive_iterator.set_begin(); - break; - } - } - } - - /*! - @brief set the iterator past the last value - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - void set_end() noexcept - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - m_it.object_iterator = m_object->m_value.object->end(); - break; - } - - case basic_json::value_t::array: - { - m_it.array_iterator = m_object->m_value.array->end(); - break; - } - - default: - { - m_it.primitive_iterator.set_end(); - break; - } - } - } - - public: - /*! - @brief return a reference to the value pointed to by the iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - reference operator*() const - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - assert(m_it.object_iterator != m_object->m_value.object->end()); - return m_it.object_iterator->second; - } - - case basic_json::value_t::array: - { - assert(m_it.array_iterator != m_object->m_value.array->end()); - return *m_it.array_iterator; - } - - case basic_json::value_t::null: - { - JSON_THROW(invalid_iterator::create(214, "cannot get value")); - } - - default: - { - if (m_it.primitive_iterator.is_begin()) - { - return *m_object; - } - - JSON_THROW(invalid_iterator::create(214, "cannot get value")); - } - } - } - - /*! - @brief dereference the iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - pointer operator->() const - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - assert(m_it.object_iterator != m_object->m_value.object->end()); - return &(m_it.object_iterator->second); - } - - case basic_json::value_t::array: - { - assert(m_it.array_iterator != m_object->m_value.array->end()); - return &*m_it.array_iterator; - } - - default: - { - if (m_it.primitive_iterator.is_begin()) - { - return m_object; - } - - JSON_THROW(invalid_iterator::create(214, "cannot get value")); - } - } - } - - /*! - @brief post-increment (it++) - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl operator++(int) - { - auto result = *this; - ++(*this); - return result; - } - - /*! - @brief pre-increment (++it) - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl& operator++() - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - std::advance(m_it.object_iterator, 1); - break; - } - - case basic_json::value_t::array: - { - std::advance(m_it.array_iterator, 1); - break; - } - - default: - { - ++m_it.primitive_iterator; - break; - } - } - - return *this; - } - - /*! - @brief post-decrement (it--) - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl operator--(int) - { - auto result = *this; - --(*this); - return result; - } - - /*! - @brief pre-decrement (--it) - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl& operator--() - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - std::advance(m_it.object_iterator, -1); - break; - } - - case basic_json::value_t::array: - { - std::advance(m_it.array_iterator, -1); - break; - } - - default: - { - --m_it.primitive_iterator; - break; - } - } - - return *this; - } - - /*! - @brief comparison: equal - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - bool operator==(const iter_impl& other) const - { - // if objects are not the same, the comparison is undefined - if (m_object != other.m_object) - { - JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers")); - } - - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - return (m_it.object_iterator == other.m_it.object_iterator); - } - - case basic_json::value_t::array: - { - return (m_it.array_iterator == other.m_it.array_iterator); - } - - default: - { - return (m_it.primitive_iterator == other.m_it.primitive_iterator); - } - } - } - - /*! - @brief comparison: not equal - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - bool operator!=(const iter_impl& other) const - { - return not operator==(other); - } - - /*! - @brief comparison: smaller - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - bool operator<(const iter_impl& other) const - { - // if objects are not the same, the comparison is undefined - if (m_object != other.m_object) - { - JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers")); - } - - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - JSON_THROW(invalid_iterator::create(213, "cannot compare order of object iterators")); - } - - case basic_json::value_t::array: - { - return (m_it.array_iterator < other.m_it.array_iterator); - } - - default: - { - return (m_it.primitive_iterator < other.m_it.primitive_iterator); - } - } - } - - /*! - @brief comparison: less than or equal - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - bool operator<=(const iter_impl& other) const - { - return not other.operator < (*this); - } - - /*! - @brief comparison: greater than - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - bool operator>(const iter_impl& other) const - { - return not operator<=(other); - } - - /*! - @brief comparison: greater than or equal - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - bool operator>=(const iter_impl& other) const - { - return not operator<(other); - } - - /*! - @brief add to iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl& operator+=(difference_type i) - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - JSON_THROW(invalid_iterator::create(209, "cannot use offsets with object iterators")); - } - - case basic_json::value_t::array: - { - std::advance(m_it.array_iterator, i); - break; - } - - default: - { - m_it.primitive_iterator += i; - break; - } - } - - return *this; - } - - /*! - @brief subtract from iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl& operator-=(difference_type i) - { - return operator+=(-i); - } - - /*! - @brief add to iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl operator+(difference_type i) const - { - auto result = *this; - result += i; - return result; - } - - /*! - @brief addition of distance and iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - friend iter_impl operator+(difference_type i, const iter_impl& it) - { - auto result = it; - result += i; - return result; - } - - /*! - @brief subtract from iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - iter_impl operator-(difference_type i) const - { - auto result = *this; - result -= i; - return result; - } - - /*! - @brief return difference - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - difference_type operator-(const iter_impl& other) const - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - JSON_THROW(invalid_iterator::create(209, "cannot use offsets with object iterators")); - } - - case basic_json::value_t::array: - { - return m_it.array_iterator - other.m_it.array_iterator; - } - - default: - { - return m_it.primitive_iterator - other.m_it.primitive_iterator; - } - } - } - - /*! - @brief access to successor - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - reference operator[](difference_type n) const - { - assert(m_object != nullptr); - - switch (m_object->m_type) - { - case basic_json::value_t::object: - { - JSON_THROW(invalid_iterator::create(208, "cannot use operator[] for object iterators")); - } - - case basic_json::value_t::array: - { - return *std::next(m_it.array_iterator, n); - } - - case basic_json::value_t::null: - { - JSON_THROW(invalid_iterator::create(214, "cannot get value")); - } - - default: - { - if (m_it.primitive_iterator.get_value() == -n) - { - return *m_object; - } - - JSON_THROW(invalid_iterator::create(214, "cannot get value")); - } - } - } - - /*! - @brief return the key of an object iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - typename object_t::key_type key() const - { - assert(m_object != nullptr); - - if (m_object->is_object()) - { - return m_it.object_iterator->first; - } - - JSON_THROW(invalid_iterator::create(207, "cannot use key() for non-object iterators")); - } - - /*! - @brief return the value of an iterator - @pre The iterator is initialized; i.e. `m_object != nullptr`. - */ - reference value() const - { - return operator*(); - } - - private: - /// associated JSON instance - pointer m_object = nullptr; - /// the actual iterator of the associated instance - struct internal_iterator m_it = internal_iterator(); - }; - - /*! - @brief a template for a reverse iterator class - - @tparam Base the base iterator type to reverse. Valid types are @ref - iterator (to create @ref reverse_iterator) and @ref const_iterator (to - create @ref const_reverse_iterator). - - @requirement The class satisfies the following concept requirements: - - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator): - The iterator that can be moved to point (forward and backward) to any - element in constant time. - - [OutputIterator](http://en.cppreference.com/w/cpp/concept/OutputIterator): - It is possible to write to the pointed-to element (only if @a Base is - @ref iterator). - - @since version 1.0.0 - */ - template - class json_reverse_iterator : public std::reverse_iterator - { - public: - /// shortcut to the reverse iterator adaptor - using base_iterator = std::reverse_iterator; - /// the reference type for the pointed-to element - using reference = typename Base::reference; - - /// create reverse iterator from iterator - json_reverse_iterator(const typename base_iterator::iterator_type& it) noexcept - : base_iterator(it) - {} - - /// create reverse iterator from base class - json_reverse_iterator(const base_iterator& it) noexcept - : base_iterator(it) - {} - - /// post-increment (it++) - json_reverse_iterator operator++(int) - { - return static_cast(base_iterator::operator++(1)); - } - - /// pre-increment (++it) - json_reverse_iterator& operator++() - { - return static_cast(base_iterator::operator++()); - } - - /// post-decrement (it--) - json_reverse_iterator operator--(int) - { - return static_cast(base_iterator::operator--(1)); - } - - /// pre-decrement (--it) - json_reverse_iterator& operator--() - { - return static_cast(base_iterator::operator--()); - } - - /// add to iterator - json_reverse_iterator& operator+=(difference_type i) - { - return static_cast(base_iterator::operator+=(i)); - } - - /// add to iterator - json_reverse_iterator operator+(difference_type i) const - { - return static_cast(base_iterator::operator+(i)); - } - - /// subtract from iterator - json_reverse_iterator operator-(difference_type i) const - { - return static_cast(base_iterator::operator-(i)); - } - - /// return difference - difference_type operator-(const json_reverse_iterator& other) const - { - return base_iterator(*this) - base_iterator(other); - } - - /// access to successor - reference operator[](difference_type n) const - { - return *(this->operator+(n)); - } - - /// return the key of an object iterator - typename object_t::key_type key() const - { - auto it = --this->base(); - return it.key(); - } - - /// return the value of an iterator - reference value() const - { - auto it = --this->base(); - return it.operator * (); - } - }; - - - private: - //////////////////// - // input adapters // - //////////////////// - - /// abstract input adapter interface - class input_adapter - { - public: - virtual int get_character() = 0; - virtual std::string read(size_t offset, size_t length) = 0; - virtual ~input_adapter() {} - - // native support - - /// input adapter for input stream - static std::shared_ptr create(std::istream& i) - { - return std::shared_ptr(new cached_input_stream_adapter<16384>(i)); - } - - /// input adapter for input stream - static std::shared_ptr create(std::istream&& i) - { - return std::shared_ptr(new cached_input_stream_adapter<16384>(i)); - } - - /// input adapter for buffer - static std::shared_ptr create(const char* b, size_t l) - { - return std::shared_ptr(new input_buffer_adapter(b, l)); - } - - // derived support - - /// input adapter for string literal - template::value and - std::is_integral::type>::value and - sizeof(typename std::remove_pointer::type) == 1, int>::type = 0> - static std::shared_ptr create(CharT b) - { - return create(reinterpret_cast(b), - std::strlen(reinterpret_cast(b))); - } - - /// input adapter for iterator range with contiguous storage - template::iterator_category, std::random_access_iterator_tag>::value - , int>::type - = 0> - static std::shared_ptr create(IteratorType first, IteratorType last) - { - // assertion to check that the iterator range is indeed contiguous, - // see http://stackoverflow.com/a/35008842/266378 for more discussion - assert(std::accumulate(first, last, std::pair(true, 0), - [&first](std::pair res, decltype(*first) val) - { - res.first &= (val == *(std::next(std::addressof(*first), res.second++))); - return res; - }).first); - - // assertion to check that each element is 1 byte long - static_assert(sizeof(typename std::iterator_traits::value_type) == 1, - "each element in the iterator range must have the size of 1 byte"); - - return create(reinterpret_cast(&(*first)), - static_cast(std::distance(first, last))); - } - - /// input adapter for array - template - static std::shared_ptr create(T (&array)[N]) - { - // delegate the call to the iterator-range overload - return create(std::begin(array), std::end(array)); - } - - /// input adapter for contiguous container - template::value and - std::is_base_of< - std::random_access_iterator_tag, - typename std::iterator_traits()))>::iterator_category>::value - , int>::type = 0> - static std::shared_ptr create(const ContiguousContainer& c) - { - // delegate the call to the iterator-range overload - return create(std::begin(c), std::end(c)); - } - }; - - /// a type to simplify interfaces - using input_adapter_t = std::shared_ptr; - - /// input adapter for cached stream input - template - class cached_input_stream_adapter : public input_adapter - { - public: - cached_input_stream_adapter(std::istream& i) - : is(i), start_position(is.tellg()) - { - // immediately abort if stream is erroneous - if (JSON_UNLIKELY(i.fail())) - { - JSON_THROW(parse_error::create(111, 0, "bad input stream")); - } - - fill_buffer(); - - // skip byte order mark - if (fill_size >= 3 and buffer[0] == '\xEF' and buffer[1] == '\xBB' and buffer[2] == '\xBF') - { - buffer_pos += 3; - processed_chars += 3; - } - } - - ~cached_input_stream_adapter() override - { - // clear stream flags - is.clear(); - // We initially read a lot of characters into the buffer, and we - // may not have processed all of them. Therefore, we need to - // "rewind" the stream after the last processed char. - is.seekg(start_position); - is.ignore(static_cast(processed_chars)); - // clear stream flags - is.clear(); - } - - int get_character() override - { - // check if refilling is necessary and possible - if (buffer_pos == fill_size and not eof) - { - fill_buffer(); - - // check and remember that filling did not yield new input - if (fill_size == 0) - { - eof = true; - return std::char_traits::eof(); - } - - // the buffer is ready - buffer_pos = 0; - } - - ++processed_chars; - assert(buffer_pos < buffer.size()); - return buffer[buffer_pos++] & 0xFF; - } - - std::string read(size_t offset, size_t length) override - { - // create buffer - std::string result(length, '\0'); - - // save stream position - const auto current_pos = is.tellg(); - // save stream flags - const auto flags = is.rdstate(); - - // clear stream flags - is.clear(); - // set stream position - is.seekg(static_cast(offset)); - // read bytes - is.read(&result[0], static_cast(length)); - - // reset stream position - is.seekg(current_pos); - // reset stream flags - is.setstate(flags); - - return result; - } - - private: - void fill_buffer() - { - // fill - is.read(buffer.data(), static_cast(buffer.size())); - // store number of bytes in the buffer - fill_size = static_cast(is.gcount()); - } - - /// the associated input stream - std::istream& is; - - /// chars returned via get_character() - size_t processed_chars = 0; - /// chars processed in the current buffer - size_t buffer_pos = 0; - - /// whether stream reached eof - bool eof = false; - /// how many chars have been copied to the buffer by last (re)fill - size_t fill_size = 0; - - /// position of the stream when we started - const std::streampos start_position; - - /// internal buffer - std::array buffer{{}}; - }; - - /// input adapter for buffer input - class input_buffer_adapter : public input_adapter - { - public: - input_buffer_adapter(const char* b, size_t l) - : input_adapter(), cursor(b), limit(b + l), start(b) - { - // skip byte order mark - if (l >= 3 and b[0] == '\xEF' and b[1] == '\xBB' and b[2] == '\xBF') - { - cursor += 3; - } - } - - // delete because of pointer members - input_buffer_adapter(const input_buffer_adapter&) = delete; - input_buffer_adapter& operator=(input_buffer_adapter&) = delete; - - int get_character() noexcept override - { - if (JSON_LIKELY(cursor < limit)) - { - return *(cursor++) & 0xFF; - } - else - { - return std::char_traits::eof(); - } - } - - std::string read(size_t offset, size_t length) override - { - // avoid reading too many characters - const size_t max_length = static_cast(limit - start); - return std::string(start + offset, (std::min)(length, max_length - offset)); - } - - private: - /// pointer to the current character - const char* cursor; - /// pointer past the last character - const char* limit; - /// pointer to the first character - const char* start; - }; - ////////////////////////////////////////// // binary serialization/deserialization // ////////////////////////////////////////// @@ -9085,1678 +13137,6 @@ class basic_json /// @name binary serialization/deserialization support /// @{ - private: - /*! - @brief deserialization of CBOR and MessagePack values - */ - class binary_reader - { - public: - /*! - @brief create a binary reader - - @param[in] adapter input adapter to read from - */ - explicit binary_reader(input_adapter_t adapter) - : ia(adapter), is_little_endian(little_endianess()) - { - assert(ia); - } - - /*! - @brief create a JSON value from CBOR input - - @param[in] get_char whether a new character should be retrieved from - the input (true, default) or whether the last - read character should be considered instead - - @return JSON value created from CBOR input - - @throw parse_error.110 if input ended unexpectedly - @throw parse_error.112 if unsupported byte was read - */ - basic_json parse_cbor(const bool get_char = true) - { - switch (get_char ? get() : current) - { - // EOF - case std::char_traits::eof(): - { - JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); - } - - // Integer 0x00..0x17 (0..23) - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x08: - case 0x09: - case 0x0a: - case 0x0b: - case 0x0c: - case 0x0d: - case 0x0e: - case 0x0f: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - { - return static_cast(current); - } - - case 0x18: // Unsigned integer (one-byte uint8_t follows) - { - return get_number(); - } - - case 0x19: // Unsigned integer (two-byte uint16_t follows) - { - return get_number(); - } - - case 0x1a: // Unsigned integer (four-byte uint32_t follows) - { - return get_number(); - } - - case 0x1b: // Unsigned integer (eight-byte uint64_t follows) - { - return get_number(); - } - - // Negative integer -1-0x00..-1-0x17 (-1..-24) - case 0x20: - case 0x21: - case 0x22: - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - case 0x29: - case 0x2a: - case 0x2b: - case 0x2c: - case 0x2d: - case 0x2e: - case 0x2f: - case 0x30: - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: - { - return static_cast(0x20 - 1 - current); - } - - case 0x38: // Negative integer (one-byte uint8_t follows) - { - // must be uint8_t ! - return static_cast(-1) - get_number(); - } - - case 0x39: // Negative integer -1-n (two-byte uint16_t follows) - { - return static_cast(-1) - get_number(); - } - - case 0x3a: // Negative integer -1-n (four-byte uint32_t follows) - { - return static_cast(-1) - get_number(); - } - - case 0x3b: // Negative integer -1-n (eight-byte uint64_t follows) - { - return static_cast(-1) - static_cast(get_number()); - } - - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6a: - case 0x6b: - case 0x6c: - case 0x6d: - case 0x6e: - case 0x6f: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - case 0x7a: // UTF-8 string (four-byte uint32_t for n follow) - case 0x7b: // UTF-8 string (eight-byte uint64_t for n follow) - case 0x7f: // UTF-8 string (indefinite length) - { - return get_cbor_string(); - } - - // array (0x00..0x17 data items follow) - case 0x80: - case 0x81: - case 0x82: - case 0x83: - case 0x84: - case 0x85: - case 0x86: - case 0x87: - case 0x88: - case 0x89: - case 0x8a: - case 0x8b: - case 0x8c: - case 0x8d: - case 0x8e: - case 0x8f: - case 0x90: - case 0x91: - case 0x92: - case 0x93: - case 0x94: - case 0x95: - case 0x96: - case 0x97: - { - basic_json result = value_t::array; - const auto len = static_cast(current & 0x1f); - for (size_t i = 0; i < len; ++i) - { - result.push_back(parse_cbor()); - } - return result; - } - - case 0x98: // array (one-byte uint8_t for n follows) - { - basic_json result = value_t::array; - const auto len = static_cast(get_number()); - for (size_t i = 0; i < len; ++i) - { - result.push_back(parse_cbor()); - } - return result; - } - - case 0x99: // array (two-byte uint16_t for n follow) - { - basic_json result = value_t::array; - const auto len = static_cast(get_number()); - for (size_t i = 0; i < len; ++i) - { - result.push_back(parse_cbor()); - } - return result; - } - - case 0x9a: // array (four-byte uint32_t for n follow) - { - basic_json result = value_t::array; - const auto len = static_cast(get_number()); - for (size_t i = 0; i < len; ++i) - { - result.push_back(parse_cbor()); - } - return result; - } - - case 0x9b: // array (eight-byte uint64_t for n follow) - { - basic_json result = value_t::array; - const auto len = static_cast(get_number()); - for (size_t i = 0; i < len; ++i) - { - result.push_back(parse_cbor()); - } - return result; - } - - case 0x9f: // array (indefinite length) - { - basic_json result = value_t::array; - while (get() != 0xff) - { - result.push_back(parse_cbor(false)); - } - return result; - } - - // map (0x00..0x17 pairs of data items follow) - case 0xa0: - case 0xa1: - case 0xa2: - case 0xa3: - case 0xa4: - case 0xa5: - case 0xa6: - case 0xa7: - case 0xa8: - case 0xa9: - case 0xaa: - case 0xab: - case 0xac: - case 0xad: - case 0xae: - case 0xaf: - case 0xb0: - case 0xb1: - case 0xb2: - case 0xb3: - case 0xb4: - case 0xb5: - case 0xb6: - case 0xb7: - { - basic_json result = value_t::object; - const auto len = static_cast(current & 0x1f); - for (size_t i = 0; i < len; ++i) - { - get(); - auto key = get_cbor_string(); - result[key] = parse_cbor(); - } - return result; - } - - case 0xb8: // map (one-byte uint8_t for n follows) - { - basic_json result = value_t::object; - const auto len = static_cast(get_number()); - for (size_t i = 0; i < len; ++i) - { - get(); - auto key = get_cbor_string(); - result[key] = parse_cbor(); - } - return result; - } - - case 0xb9: // map (two-byte uint16_t for n follow) - { - basic_json result = value_t::object; - const auto len = static_cast(get_number()); - for (size_t i = 0; i < len; ++i) - { - get(); - auto key = get_cbor_string(); - result[key] = parse_cbor(); - } - return result; - } - - case 0xba: // map (four-byte uint32_t for n follow) - { - basic_json result = value_t::object; - const auto len = static_cast(get_number()); - for (size_t i = 0; i < len; ++i) - { - get(); - auto key = get_cbor_string(); - result[key] = parse_cbor(); - } - return result; - } - - case 0xbb: // map (eight-byte uint64_t for n follow) - { - basic_json result = value_t::object; - const auto len = static_cast(get_number()); - for (size_t i = 0; i < len; ++i) - { - get(); - auto key = get_cbor_string(); - result[key] = parse_cbor(); - } - return result; - } - - case 0xbf: // map (indefinite length) - { - basic_json result = value_t::object; - while (get() != 0xff) - { - auto key = get_cbor_string(); - result[key] = parse_cbor(); - } - return result; - } - - case 0xf4: // false - { - return false; - } - - case 0xf5: // true - { - return true; - } - - case 0xf6: // null - { - return value_t::null; - } - - case 0xf9: // Half-Precision Float (two-byte IEEE 754) - { - const int byte1 = get(); - check_eof(); - const int byte2 = get(); - check_eof(); - - // code from RFC 7049, Appendix D, Figure 3: - // As half-precision floating-point numbers were only added - // to IEEE 754 in 2008, today's programming platforms often - // still only have limited support for them. It is very - // easy to include at least decoding support for them even - // without such support. An example of a small decoder for - // half-precision floating-point numbers in the C language - // is shown in Fig. 3. - const int half = (byte1 << 8) + byte2; - const int exp = (half >> 10) & 0x1f; - const int mant = half & 0x3ff; - double val; - if (exp == 0) - { - val = std::ldexp(mant, -24); - } - else if (exp != 31) - { - val = std::ldexp(mant + 1024, exp - 25); - } - else - { - val = mant == 0 - ? std::numeric_limits::infinity() - : std::numeric_limits::quiet_NaN(); - } - return (half & 0x8000) != 0 ? -val : val; - } - - case 0xfa: // Single-Precision Float (four-byte IEEE 754) - { - return get_number(); - } - - case 0xfb: // Double-Precision Float (eight-byte IEEE 754) - { - return get_number(); - } - - default: // anything else (0xFF is handled inside the other types) - { - std::stringstream ss; - ss << std::setw(2) << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + ss.str())); - } - } - } - - /*! - @brief create a JSON value from MessagePack input - - @return JSON value created from MessagePack input - - @throw parse_error.110 if input ended unexpectedly - @throw parse_error.112 if unsupported byte was read - */ - basic_json parse_msgpack() - { - switch (get()) - { - // EOF - case std::char_traits::eof(): - { - JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); - } - - // positive fixint - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x08: - case 0x09: - case 0x0a: - case 0x0b: - case 0x0c: - case 0x0d: - case 0x0e: - case 0x0f: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - case 0x18: - case 0x19: - case 0x1a: - case 0x1b: - case 0x1c: - case 0x1d: - case 0x1e: - case 0x1f: - case 0x20: - case 0x21: - case 0x22: - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - case 0x29: - case 0x2a: - case 0x2b: - case 0x2c: - case 0x2d: - case 0x2e: - case 0x2f: - case 0x30: - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: - case 0x38: - case 0x39: - case 0x3a: - case 0x3b: - case 0x3c: - case 0x3d: - case 0x3e: - case 0x3f: - case 0x40: - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x45: - case 0x46: - case 0x47: - case 0x48: - case 0x49: - case 0x4a: - case 0x4b: - case 0x4c: - case 0x4d: - case 0x4e: - case 0x4f: - case 0x50: - case 0x51: - case 0x52: - case 0x53: - case 0x54: - case 0x55: - case 0x56: - case 0x57: - case 0x58: - case 0x59: - case 0x5a: - case 0x5b: - case 0x5c: - case 0x5d: - case 0x5e: - case 0x5f: - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6a: - case 0x6b: - case 0x6c: - case 0x6d: - case 0x6e: - case 0x6f: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - case 0x78: - case 0x79: - case 0x7a: - case 0x7b: - case 0x7c: - case 0x7d: - case 0x7e: - case 0x7f: - { - return static_cast(current); - } - - // fixmap - case 0x80: - case 0x81: - case 0x82: - case 0x83: - case 0x84: - case 0x85: - case 0x86: - case 0x87: - case 0x88: - case 0x89: - case 0x8a: - case 0x8b: - case 0x8c: - case 0x8d: - case 0x8e: - case 0x8f: - { - basic_json result = value_t::object; - const auto len = static_cast(current & 0x0f); - for (size_t i = 0; i < len; ++i) - { - get(); - auto key = get_msgpack_string(); - result[key] = parse_msgpack(); - } - return result; - } - - // fixarray - case 0x90: - case 0x91: - case 0x92: - case 0x93: - case 0x94: - case 0x95: - case 0x96: - case 0x97: - case 0x98: - case 0x99: - case 0x9a: - case 0x9b: - case 0x9c: - case 0x9d: - case 0x9e: - case 0x9f: - { - basic_json result = value_t::array; - const auto len = static_cast(current & 0x0f); - for (size_t i = 0; i < len; ++i) - { - result.push_back(parse_msgpack()); - } - return result; - } - - // fixstr - case 0xa0: - case 0xa1: - case 0xa2: - case 0xa3: - case 0xa4: - case 0xa5: - case 0xa6: - case 0xa7: - case 0xa8: - case 0xa9: - case 0xaa: - case 0xab: - case 0xac: - case 0xad: - case 0xae: - case 0xaf: - case 0xb0: - case 0xb1: - case 0xb2: - case 0xb3: - case 0xb4: - case 0xb5: - case 0xb6: - case 0xb7: - case 0xb8: - case 0xb9: - case 0xba: - case 0xbb: - case 0xbc: - case 0xbd: - case 0xbe: - case 0xbf: - { - return get_msgpack_string(); - } - - case 0xc0: // nil - { - return value_t::null; - } - - case 0xc2: // false - { - return false; - } - - case 0xc3: // true - { - return true; - } - - case 0xca: // float 32 - { - return get_number(); - } - - case 0xcb: // float 64 - { - return get_number(); - } - - case 0xcc: // uint 8 - { - return get_number(); - } - - case 0xcd: // uint 16 - { - return get_number(); - } - - case 0xce: // uint 32 - { - return get_number(); - } - - case 0xcf: // uint 64 - { - return get_number(); - } - - case 0xd0: // int 8 - { - return get_number(); - } - - case 0xd1: // int 16 - { - return get_number(); - } - - case 0xd2: // int 32 - { - return get_number(); - } - - case 0xd3: // int 64 - { - return get_number(); - } - - case 0xd9: // str 8 - case 0xda: // str 16 - case 0xdb: // str 32 - { - return get_msgpack_string(); - } - - case 0xdc: // array 16 - { - basic_json result = value_t::array; - const auto len = static_cast(get_number()); - for (size_t i = 0; i < len; ++i) - { - result.push_back(parse_msgpack()); - } - return result; - } - - case 0xdd: // array 32 - { - basic_json result = value_t::array; - const auto len = static_cast(get_number()); - for (size_t i = 0; i < len; ++i) - { - result.push_back(parse_msgpack()); - } - return result; - } - - case 0xde: // map 16 - { - basic_json result = value_t::object; - const auto len = static_cast(get_number()); - for (size_t i = 0; i < len; ++i) - { - get(); - auto key = get_msgpack_string(); - result[key] = parse_msgpack(); - } - return result; - } - - case 0xdf: // map 32 - { - basic_json result = value_t::object; - const auto len = static_cast(get_number()); - for (size_t i = 0; i < len; ++i) - { - get(); - auto key = get_msgpack_string(); - result[key] = parse_msgpack(); - } - return result; - } - - // positive fixint - case 0xe0: - case 0xe1: - case 0xe2: - case 0xe3: - case 0xe4: - case 0xe5: - case 0xe6: - case 0xe7: - case 0xe8: - case 0xe9: - case 0xea: - case 0xeb: - case 0xec: - case 0xed: - case 0xee: - case 0xef: - case 0xf0: - case 0xf1: - case 0xf2: - case 0xf3: - case 0xf4: - case 0xf5: - case 0xf6: - case 0xf7: - case 0xf8: - case 0xf9: - case 0xfa: - case 0xfb: - case 0xfc: - case 0xfd: - case 0xfe: - case 0xff: - { - return static_cast(current); - } - - default: // anything else - { - std::stringstream ss; - ss << std::setw(2) << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(112, chars_read, "error reading MessagePack; last byte: 0x" + ss.str())); - } - } - } - - /*! - @brief determine system byte order - - @return true iff system's byte order is little endian - - @note from http://stackoverflow.com/a/1001328/266378 - */ - static bool little_endianess() noexcept - { - int num = 1; - return (*reinterpret_cast(&num) == 1); - } - - private: - /*! - @brief get next character from the input - - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns - `std::char_traits::eof()` in that case. - - @return character read from the input - */ - int get() - { - ++chars_read; - return (current = ia->get_character()); - } - - /* - @brief read a number from the input - - @tparam T the type of the number - - @return number of type @a T - - @note This function needs to respect the system's endianess, because - bytes in CBOR and MessagePack are stored in network order (big - endian) and therefore need reordering on little endian systems. - - @throw parse_error.110 if input has less than `sizeof(T)` bytes - */ - template - T get_number() - { - // step 1: read input into array with system's byte order - std::array vec; - for (size_t i = 0; i < sizeof(T); ++i) - { - get(); - check_eof(); - - // reverse byte order prior to conversion if necessary - if (is_little_endian) - { - vec[sizeof(T) - i - 1] = static_cast(current); - } - else - { - vec[i] = static_cast(current); // LCOV_EXCL_LINE - } - } - - // step 2: convert array into number of type T and return - T result; - std::memcpy(&result, vec.data(), sizeof(T)); - return result; - } - - /*! - @brief create a string by reading characters from the input - - @param[in] len number of bytes to read - - @return string created by reading @a len bytes - - @throw parse_error.110 if input has less than @a len bytes - */ - std::string get_string(const size_t len) - { - std::string result; - for (size_t i = 0; i < len; ++i) - { - get(); - check_eof(); - result.append(1, static_cast(current)); - } - return result; - } - - /*! - @brief reads a CBOR string - - This function first reads starting bytes to determine the expected - string length and then copies this number of bytes into a string. - Additionally, CBOR's strings with indefinite lengths are supported. - - @return string - - @throw parse_error.110 if input ended - @throw parse_error.113 if an unexpexted byte is read - */ - std::string get_cbor_string() - { - check_eof(); - - switch (current) - { - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6a: - case 0x6b: - case 0x6c: - case 0x6d: - case 0x6e: - case 0x6f: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - { - const auto len = static_cast(current & 0x1f); - return get_string(len); - } - - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { - const auto len = static_cast(get_number()); - return get_string(len); - } - - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { - const auto len = static_cast(get_number()); - return get_string(len); - } - - case 0x7a: // UTF-8 string (four-byte uint32_t for n follow) - { - const auto len = static_cast(get_number()); - return get_string(len); - } - - case 0x7b: // UTF-8 string (eight-byte uint64_t for n follow) - { - const auto len = static_cast(get_number()); - return get_string(len); - } - - case 0x7f: // UTF-8 string (indefinite length) - { - std::string result; - while (get() != 0xff) - { - check_eof(); - result.append(1, static_cast(current)); - } - return result; - } - - default: - { - std::stringstream ss; - ss << std::setw(2) << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(113, chars_read, "expected a CBOR string; last byte: 0x" + ss.str())); - } - } - } - - /*! - @brief reads a MessagePack string - - This function first reads starting bytes to determine the expected - string length and then copies this number of bytes into a string. - - @return string - - @throw parse_error.110 if input ended - @throw parse_error.113 if an unexpexted byte is read - */ - std::string get_msgpack_string() - { - check_eof(); - - switch (current) - { - // fixstr - case 0xa0: - case 0xa1: - case 0xa2: - case 0xa3: - case 0xa4: - case 0xa5: - case 0xa6: - case 0xa7: - case 0xa8: - case 0xa9: - case 0xaa: - case 0xab: - case 0xac: - case 0xad: - case 0xae: - case 0xaf: - case 0xb0: - case 0xb1: - case 0xb2: - case 0xb3: - case 0xb4: - case 0xb5: - case 0xb6: - case 0xb7: - case 0xb8: - case 0xb9: - case 0xba: - case 0xbb: - case 0xbc: - case 0xbd: - case 0xbe: - case 0xbf: - { - const auto len = static_cast(current & 0x1f); - return get_string(len); - } - - case 0xd9: // str 8 - { - const auto len = static_cast(get_number()); - return get_string(len); - } - - case 0xda: // str 16 - { - const auto len = static_cast(get_number()); - return get_string(len); - } - - case 0xdb: // str 32 - { - const auto len = static_cast(get_number()); - return get_string(len); - } - - default: - { - std::stringstream ss; - ss << std::setw(2) << std::setfill('0') << std::hex << current; - JSON_THROW(parse_error::create(113, chars_read, "expected a MessagePack string; last byte: 0x" + ss.str())); - } - } - } - - /*! - @brief check if input ended - @throw parse_error.110 if input ended - */ - void check_eof() const - { - if (JSON_UNLIKELY(current == std::char_traits::eof())) - { - JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); - } - } - - private: - /// input adapter - input_adapter_t ia = nullptr; - - /// the current character - int current = std::char_traits::eof(); - - /// the number of characters read - size_t chars_read = 0; - - /// whether we can assume little endianess - const bool is_little_endian = true; - }; - - /*! - @brief serialization to CBOR and MessagePack values - */ - class binary_writer - { - public: - /*! - @brief create a binary writer - - @param[in] adapter output adapter to write to - */ - explicit binary_writer(output_adapter_t adapter) - : is_little_endian(binary_reader::little_endianess()), oa(adapter) - { - assert(oa); - } - - /*! - @brief[in] j JSON value to serialize - */ - void write_cbor(const basic_json& j) - { - switch (j.type()) - { - case value_t::null: - { - oa->write_character(0xf6); - break; - } - - case value_t::boolean: - { - oa->write_character(j.m_value.boolean ? 0xf5 : 0xf4); - break; - } - - case value_t::number_integer: - { - if (j.m_value.number_integer >= 0) - { - // CBOR does not differentiate between positive signed - // integers and unsigned integers. Therefore, we used the - // code from the value_t::number_unsigned case here. - if (j.m_value.number_integer <= 0x17) - { - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_integer <= (std::numeric_limits::max)()) - { - oa->write_character(0x18); - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_integer <= (std::numeric_limits::max)()) - { - oa->write_character(0x19); - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_integer <= (std::numeric_limits::max)()) - { - oa->write_character(0x1a); - write_number(static_cast(j.m_value.number_integer)); - } - else - { - oa->write_character(0x1b); - write_number(static_cast(j.m_value.number_integer)); - } - } - else - { - // The conversions below encode the sign in the first - // byte, and the value is converted to a positive number. - const auto positive_number = -1 - j.m_value.number_integer; - if (j.m_value.number_integer >= -24) - { - write_number(static_cast(0x20 + positive_number)); - } - else if (positive_number <= (std::numeric_limits::max)()) - { - oa->write_character(0x38); - write_number(static_cast(positive_number)); - } - else if (positive_number <= (std::numeric_limits::max)()) - { - oa->write_character(0x39); - write_number(static_cast(positive_number)); - } - else if (positive_number <= (std::numeric_limits::max)()) - { - oa->write_character(0x3a); - write_number(static_cast(positive_number)); - } - else - { - oa->write_character(0x3b); - write_number(static_cast(positive_number)); - } - } - break; - } - - case value_t::number_unsigned: - { - if (j.m_value.number_unsigned <= 0x17) - { - write_number(static_cast(j.m_value.number_unsigned)); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - oa->write_character(0x18); - write_number(static_cast(j.m_value.number_unsigned)); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - oa->write_character(0x19); - write_number(static_cast(j.m_value.number_unsigned)); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - oa->write_character(0x1a); - write_number(static_cast(j.m_value.number_unsigned)); - } - else - { - oa->write_character(0x1b); - write_number(static_cast(j.m_value.number_unsigned)); - } - break; - } - - case value_t::number_float: - { - // Double-Precision Float - oa->write_character(0xfb); - write_number(j.m_value.number_float); - break; - } - - case value_t::string: - { - // step 1: write control byte and the string length - const auto N = j.m_value.string->size(); - if (N <= 0x17) - { - write_number(static_cast(0x60 + N)); - } - else if (N <= 0xff) - { - oa->write_character(0x78); - write_number(static_cast(N)); - } - else if (N <= 0xffff) - { - oa->write_character(0x79); - write_number(static_cast(N)); - } - else if (N <= 0xffffffff) - { - oa->write_character(0x7a); - write_number(static_cast(N)); - } - // LCOV_EXCL_START - else if (N <= 0xffffffffffffffff) - { - oa->write_character(0x7b); - write_number(static_cast(N)); - } - // LCOV_EXCL_STOP - - // step 2: write the string - oa->write_characters(reinterpret_cast(j.m_value.string->c_str()), - j.m_value.string->size()); - break; - } - - case value_t::array: - { - // step 1: write control byte and the array size - const auto N = j.m_value.array->size(); - if (N <= 0x17) - { - write_number(static_cast(0x80 + N)); - } - else if (N <= 0xff) - { - oa->write_character(0x98); - write_number(static_cast(N)); - } - else if (N <= 0xffff) - { - oa->write_character(0x99); - write_number(static_cast(N)); - } - else if (N <= 0xffffffff) - { - oa->write_character(0x9a); - write_number(static_cast(N)); - } - // LCOV_EXCL_START - else if (N <= 0xffffffffffffffff) - { - oa->write_character(0x9b); - write_number(static_cast(N)); - } - // LCOV_EXCL_STOP - - // step 2: write each element - for (const auto& el : *j.m_value.array) - { - write_cbor(el); - } - break; - } - - case value_t::object: - { - // step 1: write control byte and the object size - const auto N = j.m_value.object->size(); - if (N <= 0x17) - { - write_number(static_cast(0xa0 + N)); - } - else if (N <= 0xff) - { - oa->write_character(0xb8); - write_number(static_cast(N)); - } - else if (N <= 0xffff) - { - oa->write_character(0xb9); - write_number(static_cast(N)); - } - else if (N <= 0xffffffff) - { - oa->write_character(0xba); - write_number(static_cast(N)); - } - // LCOV_EXCL_START - else if (N <= 0xffffffffffffffff) - { - oa->write_character(0xbb); - write_number(static_cast(N)); - } - // LCOV_EXCL_STOP - - // step 2: write each element - for (const auto& el : *j.m_value.object) - { - write_cbor(el.first); - write_cbor(el.second); - } - break; - } - - default: - { - break; - } - } - } - - /*! - @brief[in] j JSON value to serialize - */ - void write_msgpack(const basic_json& j) - { - switch (j.type()) - { - case value_t::null: - { - // nil - oa->write_character(0xc0); - break; - } - - case value_t::boolean: - { - // true and false - oa->write_character(j.m_value.boolean ? 0xc3 : 0xc2); - break; - } - - case value_t::number_integer: - { - if (j.m_value.number_integer >= 0) - { - // MessagePack does not differentiate between positive - // signed integers and unsigned integers. Therefore, we - // used the code from the value_t::number_unsigned case - // here. - if (j.m_value.number_unsigned < 128) - { - // positive fixnum - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 8 - oa->write_character(0xcc); - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 16 - oa->write_character(0xcd); - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 32 - oa->write_character(0xce); - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 64 - oa->write_character(0xcf); - write_number(static_cast(j.m_value.number_integer)); - } - } - else - { - if (j.m_value.number_integer >= -32) - { - // negative fixnum - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) - { - // int 8 - oa->write_character(0xd0); - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) - { - // int 16 - oa->write_character(0xd1); - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) - { - // int 32 - oa->write_character(0xd2); - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) - { - // int 64 - oa->write_character(0xd3); - write_number(static_cast(j.m_value.number_integer)); - } - } - break; - } - - case value_t::number_unsigned: - { - if (j.m_value.number_unsigned < 128) - { - // positive fixnum - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 8 - oa->write_character(0xcc); - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 16 - oa->write_character(0xcd); - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 32 - oa->write_character(0xce); - write_number(static_cast(j.m_value.number_integer)); - } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) - { - // uint 64 - oa->write_character(0xcf); - write_number(static_cast(j.m_value.number_integer)); - } - break; - } - - case value_t::number_float: - { - // float 64 - oa->write_character(0xcb); - write_number(j.m_value.number_float); - break; - } - - case value_t::string: - { - // step 1: write control byte and the string length - const auto N = j.m_value.string->size(); - if (N <= 31) - { - // fixstr - write_number(static_cast(0xa0 | N)); - } - else if (N <= 255) - { - // str 8 - oa->write_character(0xd9); - write_number(static_cast(N)); - } - else if (N <= 65535) - { - // str 16 - oa->write_character(0xda); - write_number(static_cast(N)); - } - else if (N <= 4294967295) - { - // str 32 - oa->write_character(0xdb); - write_number(static_cast(N)); - } - - // step 2: write the string - oa->write_characters(reinterpret_cast(j.m_value.string->c_str()), - j.m_value.string->size()); - break; - } - - case value_t::array: - { - // step 1: write control byte and the array size - const auto N = j.m_value.array->size(); - if (N <= 15) - { - // fixarray - write_number(static_cast(0x90 | N)); - } - else if (N <= 0xffff) - { - // array 16 - oa->write_character(0xdc); - write_number(static_cast(N)); - } - else if (N <= 0xffffffff) - { - // array 32 - oa->write_character(0xdd); - write_number(static_cast(N)); - } - - // step 2: write each element - for (const auto& el : *j.m_value.array) - { - write_msgpack(el); - } - break; - } - - case value_t::object: - { - // step 1: write control byte and the object size - const auto N = j.m_value.object->size(); - if (N <= 15) - { - // fixmap - write_number(static_cast(0x80 | (N & 0xf))); - } - else if (N <= 65535) - { - // map 16 - oa->write_character(0xde); - write_number(static_cast(N)); - } - else if (N <= 4294967295) - { - // map 32 - oa->write_character(0xdf); - write_number(static_cast(N)); - } - - // step 2: write each element - for (const auto& el : *j.m_value.object) - { - write_msgpack(el.first); - write_msgpack(el.second); - } - break; - } - - default: - { - break; - } - } - } - - private: - /* - @brief write a number to output input - - @param[in] n number of type @a T - @tparam T the type of the number - - @note This function needs to respect the system's endianess, because - bytes in CBOR and MessagePack are stored in network order (big - endian) and therefore need reordering on little endian systems. - */ - template - void write_number(T n) - { - // step 1: write number to array of length T - std::array vec; - std::memcpy(vec.data(), &n, sizeof(T)); - - // step 2: write array to output (with possible reordering) - for (size_t i = 0; i < sizeof(T); ++i) - { - // reverse byte order prior to conversion if necessary - if (is_little_endian) - { - oa->write_character(vec[sizeof(T) - i - 1]); - } - else - { - oa->write_character(vec[i]); // LCOV_EXCL_LINE - } - } - } - - private: - /// whether we can assume little endianess - const bool is_little_endian = true; - - /// the output - output_adapter_t oa = nullptr; - }; - public: /*! @brief create a CBOR serialization of a given JSON value @@ -10809,6 +13189,10 @@ class basic_json @note The mapping is **complete** in the sense that any JSON value type can be converted to a CBOR value. + @note If NaN or Infinity are stored inside a JSON number, they are + serialized properly. This behavior differs from the @ref dump() + function which serializes NaN or Infinity to `null`. + @note The following CBOR types are not used in the conversion: - byte strings (0x40..0x5f) - UTF-8 strings terminated by "break" (0x7f) @@ -10836,18 +13220,27 @@ class basic_json @sa http://cbor.io @sa @ref from_cbor(const std::vector&, const size_t) for the analogous deserialization - @sa @ref to_msgpack(const basic_json& for the related MessagePack format + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format @since version 2.0.9 */ static std::vector to_cbor(const basic_json& j) { std::vector result; - binary_writer bw(output_adapter::create(result)); - bw.write_cbor(j); + to_cbor(j, result); return result; } + static void to_cbor(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_cbor(j); + } + + static void to_cbor(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_cbor(j); + } + /*! @brief create a MessagePack serialization of a given JSON value @@ -10907,6 +13300,10 @@ class basic_json @note Any MessagePack output created @ref to_msgpack can be successfully parsed by @ref from_msgpack. + @note If NaN or Infinity are stored inside a JSON number, they are + serialized properly. This behavior differs from the @ref dump() + function which serializes NaN or Infinity to `null`. + @param[in] j JSON value to serialize @return MessagePack serialization as byte vector @@ -10925,16 +13322,25 @@ class basic_json static std::vector to_msgpack(const basic_json& j) { std::vector result; - binary_writer bw(output_adapter::create(result)); - bw.write_msgpack(j); + to_msgpack(j, result); return result; } - /*! - @brief create a JSON value from a byte vector in CBOR format + static void to_msgpack(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_msgpack(j); + } - Deserializes a given byte vector @a v to a JSON value using the CBOR - (Concise Binary Object Representation) serialization format. + static void to_msgpack(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_msgpack(j); + } + + /*! + @brief create a JSON value from an input in CBOR format + + Deserializes a given input @a i to a JSON value using the CBOR (Concise + Binary Object Representation) serialization format. The library maps CBOR types to JSON value types as follows: @@ -10996,39 +13402,51 @@ class basic_json @note Any CBOR output created @ref to_cbor can be successfully parsed by @ref from_cbor. - @param[in] v a byte vector in CBOR format - @param[in] start_index the index to start reading from @a v (0 by default) + @param[in] i an input in CBOR format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) @return deserialized JSON value - @throw parse_error.110 if the given vector ends prematurely + @throw parse_error.110 if the given input ends prematurely or the end of + file was not reached when @a strict was set to true @throw parse_error.112 if unsupported features from CBOR were - used in the given vector @a v or if the input is not valid CBOR + used in the given input @a v or if the input is not valid CBOR @throw parse_error.113 if a string was expected as map key, but not found - @complexity Linear in the size of the byte vector @a v. + @complexity Linear in the size of the input @a i. @liveexample{The example shows the deserialization of a byte vector in CBOR format to a JSON value.,from_cbor} @sa http://cbor.io @sa @ref to_cbor(const basic_json&) for the analogous serialization - @sa @ref from_msgpack(const std::vector&, const size_t) for the + @sa @ref from_msgpack(detail::input_adapter, const bool) for the related MessagePack format - @since version 2.0.9, parameter @a start_index since 2.1.1 + @since version 2.0.9; parameter @a start_index since 2.1.1; changed to + consume input adapters, removed start_index parameter, and added + @a strict parameter since 3.0.0 */ - static basic_json from_cbor(const std::vector& v, - const size_t start_index = 0) + static basic_json from_cbor(detail::input_adapter i, + const bool strict = true) { - binary_reader br(input_adapter::create(v.begin() + static_cast(start_index), v.end())); - return br.parse_cbor(); + return binary_reader(i).parse_cbor(strict); } + /*! + @copydoc from_cbor(detail::input_adapter, const bool) + */ + template::value, int> = 0> + static basic_json from_cbor(A1 && a1, A2 && a2, const bool strict = true) + { + return binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).parse_cbor(strict); + } /*! - @brief create a JSON value from a byte vector in MessagePack format + @brief create a JSON value from an input in MessagePack format - Deserializes a given byte vector @a v to a JSON value using the MessagePack + Deserializes a given input @a i to a JSON value using the MessagePack serialization format. The library maps MessagePack types to JSON value types as follows: @@ -11071,2693 +13489,49 @@ class basic_json @note Any MessagePack output created @ref to_msgpack can be successfully parsed by @ref from_msgpack. - @param[in] v a byte vector in MessagePack format - @param[in] start_index the index to start reading from @a v (0 by default) - @return deserialized JSON value + @param[in] i an input in MessagePack format convertible to an input + adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) - @throw parse_error.110 if the given vector ends prematurely + @throw parse_error.110 if the given input ends prematurely or the end of + file was not reached when @a strict was set to true @throw parse_error.112 if unsupported features from MessagePack were - used in the given vector @a v or if the input is not valid MessagePack + used in the given input @a i or if the input is not valid MessagePack @throw parse_error.113 if a string was expected as map key, but not found - @complexity Linear in the size of the byte vector @a v. + @complexity Linear in the size of the input @a i. @liveexample{The example shows the deserialization of a byte vector in MessagePack format to a JSON value.,from_msgpack} @sa http://msgpack.org @sa @ref to_msgpack(const basic_json&) for the analogous serialization - @sa @ref from_cbor(const std::vector&, const size_t) for the - related CBOR format + @sa @ref from_cbor(detail::input_adapter, const bool) for the related CBOR + format - @since version 2.0.9, parameter @a start_index since 2.1.1 + @since version 2.0.9; parameter @a start_index since 2.1.1; changed to + consume input adapters, removed start_index parameter, and added + @a strict parameter since 3.0.0 */ - static basic_json from_msgpack(const std::vector& v, - const size_t start_index = 0) + static basic_json from_msgpack(detail::input_adapter i, + const bool strict = true) { - binary_reader br(input_adapter::create(v.begin() + static_cast(start_index), v.end())); - return br.parse_msgpack(); + return binary_reader(i).parse_msgpack(strict); + } + + /*! + @copydoc from_msgpack(detail::input_adapter, const bool) + */ + template::value, int> = 0> + static basic_json from_msgpack(A1 && a1, A2 && a2, const bool strict = true) + { + return binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).parse_msgpack(strict); } /// @} - ////////////////////// - // lexer and parser // - ////////////////////// - - private: - /*! - @brief lexical analysis - - This class organizes the lexical analysis during JSON deserialization. - */ - class lexer - { - public: - /// token types for the parser - enum class token_type - { - uninitialized, ///< indicating the scanner is uninitialized - literal_true, ///< the `true` literal - literal_false, ///< the `false` literal - literal_null, ///< the `null` literal - value_string, ///< a string -- use get_string() for actual value - value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value - value_integer, ///< a signed integer -- use get_number_integer() for actual value - value_float, ///< an floating point number -- use get_number_float() for actual value - begin_array, ///< the character for array begin `[` - begin_object, ///< the character for object begin `{` - end_array, ///< the character for array end `]` - end_object, ///< the character for object end `}` - name_separator, ///< the name separator `:` - value_separator, ///< the value separator `,` - parse_error, ///< indicating a parse error - end_of_input, ///< indicating the end of the input buffer - literal_or_value ///< a literal or the begin of a value (only for diagnostics) - }; - - /// return name of values of type token_type (only used for errors) - static const char* token_type_name(const token_type t) noexcept - { - switch (t) - { - case token_type::uninitialized: - return ""; - case token_type::literal_true: - return "true literal"; - case token_type::literal_false: - return "false literal"; - case token_type::literal_null: - return "null literal"; - case token_type::value_string: - return "string literal"; - case lexer::token_type::value_unsigned: - case lexer::token_type::value_integer: - case lexer::token_type::value_float: - return "number literal"; - case token_type::begin_array: - return "'['"; - case token_type::begin_object: - return "'{'"; - case token_type::end_array: - return "']'"; - case token_type::end_object: - return "'}'"; - case token_type::name_separator: - return "':'"; - case token_type::value_separator: - return "','"; - case token_type::parse_error: - return ""; - case token_type::end_of_input: - return "end of input"; - case token_type::literal_or_value: - return "'[', '{', or a literal"; - default: - { - // catch non-enum values - return "unknown token"; // LCOV_EXCL_LINE - } - } - } - - explicit lexer(input_adapter_t adapter) - : ia(adapter), decimal_point_char(get_decimal_point()) - {} - - // delete because of pointer members - lexer(const lexer&) = delete; - lexer& operator=(lexer&) = delete; - - private: - ///////////////////// - // locales - ///////////////////// - - /// return the locale-dependent decimal point - static char get_decimal_point() noexcept - { - const auto loc = localeconv(); - assert(loc != nullptr); - return (loc->decimal_point == nullptr) ? '.' : loc->decimal_point[0]; - } - - ///////////////////// - // scan functions - ///////////////////// - - /*! - @brief get codepoint from 4 hex characters following `\u` - - @return codepoint or -1 in case of an error (e.g. EOF or non-hex - character) - */ - int get_codepoint() - { - // this function only makes sense after reading `\u` - assert(current == 'u'); - int codepoint = 0; - - // byte 1: \uXxxx - switch (get()) - { - case '0': - break; - case '1': - codepoint += 0x1000; - break; - case '2': - codepoint += 0x2000; - break; - case '3': - codepoint += 0x3000; - break; - case '4': - codepoint += 0x4000; - break; - case '5': - codepoint += 0x5000; - break; - case '6': - codepoint += 0x6000; - break; - case '7': - codepoint += 0x7000; - break; - case '8': - codepoint += 0x8000; - break; - case '9': - codepoint += 0x9000; - break; - case 'A': - case 'a': - codepoint += 0xa000; - break; - case 'B': - case 'b': - codepoint += 0xb000; - break; - case 'C': - case 'c': - codepoint += 0xc000; - break; - case 'D': - case 'd': - codepoint += 0xd000; - break; - case 'E': - case 'e': - codepoint += 0xe000; - break; - case 'F': - case 'f': - codepoint += 0xf000; - break; - default: - return -1; - } - - // byte 2: \uxXxx - switch (get()) - { - case '0': - break; - case '1': - codepoint += 0x0100; - break; - case '2': - codepoint += 0x0200; - break; - case '3': - codepoint += 0x0300; - break; - case '4': - codepoint += 0x0400; - break; - case '5': - codepoint += 0x0500; - break; - case '6': - codepoint += 0x0600; - break; - case '7': - codepoint += 0x0700; - break; - case '8': - codepoint += 0x0800; - break; - case '9': - codepoint += 0x0900; - break; - case 'A': - case 'a': - codepoint += 0x0a00; - break; - case 'B': - case 'b': - codepoint += 0x0b00; - break; - case 'C': - case 'c': - codepoint += 0x0c00; - break; - case 'D': - case 'd': - codepoint += 0x0d00; - break; - case 'E': - case 'e': - codepoint += 0x0e00; - break; - case 'F': - case 'f': - codepoint += 0x0f00; - break; - default: - return -1; - } - - // byte 3: \uxxXx - switch (get()) - { - case '0': - break; - case '1': - codepoint += 0x0010; - break; - case '2': - codepoint += 0x0020; - break; - case '3': - codepoint += 0x0030; - break; - case '4': - codepoint += 0x0040; - break; - case '5': - codepoint += 0x0050; - break; - case '6': - codepoint += 0x0060; - break; - case '7': - codepoint += 0x0070; - break; - case '8': - codepoint += 0x0080; - break; - case '9': - codepoint += 0x0090; - break; - case 'A': - case 'a': - codepoint += 0x00a0; - break; - case 'B': - case 'b': - codepoint += 0x00b0; - break; - case 'C': - case 'c': - codepoint += 0x00c0; - break; - case 'D': - case 'd': - codepoint += 0x00d0; - break; - case 'E': - case 'e': - codepoint += 0x00e0; - break; - case 'F': - case 'f': - codepoint += 0x00f0; - break; - default: - return -1; - } - - // byte 4: \uxxxX - switch (get()) - { - case '0': - break; - case '1': - codepoint += 0x0001; - break; - case '2': - codepoint += 0x0002; - break; - case '3': - codepoint += 0x0003; - break; - case '4': - codepoint += 0x0004; - break; - case '5': - codepoint += 0x0005; - break; - case '6': - codepoint += 0x0006; - break; - case '7': - codepoint += 0x0007; - break; - case '8': - codepoint += 0x0008; - break; - case '9': - codepoint += 0x0009; - break; - case 'A': - case 'a': - codepoint += 0x000a; - break; - case 'B': - case 'b': - codepoint += 0x000b; - break; - case 'C': - case 'c': - codepoint += 0x000c; - break; - case 'D': - case 'd': - codepoint += 0x000d; - break; - case 'E': - case 'e': - codepoint += 0x000e; - break; - case 'F': - case 'f': - codepoint += 0x000f; - break; - default: - return -1; - } - - return codepoint; - } - - /*! - @brief scan a string literal - - This function scans a string according to Sect. 7 of RFC 7159. While - scanning, bytes are escaped and copied into buffer yytext. Then the - function returns successfully, yytext is null-terminated and yylen - contains the number of bytes in the string. - - @return token_type::value_string if string could be successfully - scanned, token_type::parse_error otherwise - - @note In case of errors, variable error_message contains a textual - description. - */ - token_type scan_string() - { - // reset yytext (ignore opening quote) - reset(); - - // we entered the function by reading an open quote - assert(current == '\"'); - - while (true) - { - // get next character - switch (get()) - { - // end of file while parsing string - case std::char_traits::eof(): - { - error_message = "invalid string: missing closing quote"; - return token_type::parse_error; - } - - // closing quote - case '\"': - { - // terminate yytext - add('\0'); - --yylen; - return token_type::value_string; - } - - // escapes - case '\\': - { - switch (get()) - { - // quotation mark - case '\"': - add('\"'); - break; - // reverse solidus - case '\\': - add('\\'); - break; - // solidus - case '/': - add('/'); - break; - // backspace - case 'b': - add('\b'); - break; - // form feed - case 'f': - add('\f'); - break; - // line feed - case 'n': - add('\n'); - break; - // carriage return - case 'r': - add('\r'); - break; - // tab - case 't': - add('\t'); - break; - - // unicode escapes - case 'u': - { - int codepoint; - int codepoint1 = get_codepoint(); - - if (JSON_UNLIKELY(codepoint1 == -1)) - { - error_message = "invalid string: '\\u' must be followed by 4 hex digits"; - return token_type::parse_error; - } - - // check if code point is a high surrogate - if (0xD800 <= codepoint1 and codepoint1 <= 0xDBFF) - { - // expect next \uxxxx entry - if (JSON_LIKELY(get() == '\\' and get() == 'u')) - { - const int codepoint2 = get_codepoint(); - - if (JSON_UNLIKELY(codepoint2 == -1)) - { - error_message = "invalid string: '\\u' must be followed by 4 hex digits"; - return token_type::parse_error; - } - - // check if codepoint2 is a low surrogate - if (JSON_LIKELY(0xDC00 <= codepoint2 and codepoint2 <= 0xDFFF)) - { - codepoint = - // high surrogate occupies the most significant 22 bits - (codepoint1 << 10) - // low surrogate occupies the least significant 15 bits - + codepoint2 - // there is still the 0xD800, 0xDC00 and 0x10000 noise - // in the result so we have to subtract with: - // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 - - 0x35FDC00; - } - else - { - error_message = "invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF"; - return token_type::parse_error; - } - } - else - { - error_message = "invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF"; - return token_type::parse_error; - } - } - else - { - if (JSON_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF)) - { - error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"; - return token_type::parse_error; - } - - // only work with first code point - codepoint = codepoint1; - } - - // result of the above calculation yields a proper codepoint - assert(0x00 <= codepoint and codepoint <= 0x10FFFF); - - // translate code point to bytes - if (codepoint < 0x80) - { - // 1-byte characters: 0xxxxxxx (ASCII) - add(codepoint); - } - else if (codepoint <= 0x7ff) - { - // 2-byte characters: 110xxxxx 10xxxxxx - add(0xC0 | (codepoint >> 6)); - add(0x80 | (codepoint & 0x3F)); - } - else if (codepoint <= 0xffff) - { - // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx - add(0xE0 | (codepoint >> 12)); - add(0x80 | ((codepoint >> 6) & 0x3F)); - add(0x80 | (codepoint & 0x3F)); - } - else - { - // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - add(0xF0 | (codepoint >> 18)); - add(0x80 | ((codepoint >> 12) & 0x3F)); - add(0x80 | ((codepoint >> 6) & 0x3F)); - add(0x80 | (codepoint & 0x3F)); - } - - break; - } - - // other characters after escape - default: - error_message = "invalid string: forbidden character after backslash"; - return token_type::parse_error; - } - - break; - } - - // invalid control characters - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x08: - case 0x09: - case 0x0a: - case 0x0b: - case 0x0c: - case 0x0d: - case 0x0e: - case 0x0f: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - case 0x18: - case 0x19: - case 0x1a: - case 0x1b: - case 0x1c: - case 0x1d: - case 0x1e: - case 0x1f: - { - error_message = "invalid string: control character must be escaped"; - return token_type::parse_error; - } - - // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) - case 0x20: - case 0x21: - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - case 0x29: - case 0x2a: - case 0x2b: - case 0x2c: - case 0x2d: - case 0x2e: - case 0x2f: - case 0x30: - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: - case 0x38: - case 0x39: - case 0x3a: - case 0x3b: - case 0x3c: - case 0x3d: - case 0x3e: - case 0x3f: - case 0x40: - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x45: - case 0x46: - case 0x47: - case 0x48: - case 0x49: - case 0x4a: - case 0x4b: - case 0x4c: - case 0x4d: - case 0x4e: - case 0x4f: - case 0x50: - case 0x51: - case 0x52: - case 0x53: - case 0x54: - case 0x55: - case 0x56: - case 0x57: - case 0x58: - case 0x59: - case 0x5a: - case 0x5b: - case 0x5d: - case 0x5e: - case 0x5f: - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6a: - case 0x6b: - case 0x6c: - case 0x6d: - case 0x6e: - case 0x6f: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - case 0x78: - case 0x79: - case 0x7a: - case 0x7b: - case 0x7c: - case 0x7d: - case 0x7e: - case 0x7f: - { - add(current); - break; - } - - // U+0080..U+07FF: bytes C2..DF 80..BF - case 0xc2: - case 0xc3: - case 0xc4: - case 0xc5: - case 0xc6: - case 0xc7: - case 0xc8: - case 0xc9: - case 0xca: - case 0xcb: - case 0xcc: - case 0xcd: - case 0xce: - case 0xcf: - case 0xd0: - case 0xd1: - case 0xd2: - case 0xd3: - case 0xd4: - case 0xd5: - case 0xd6: - case 0xd7: - case 0xd8: - case 0xd9: - case 0xda: - case 0xdb: - case 0xdc: - case 0xdd: - case 0xde: - case 0xdf: - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) - { - add(current); - continue; - } - - error_message = "invalid string: ill-formed UTF-8 byte"; - return token_type::parse_error; - } - - // U+0800..U+0FFF: bytes E0 A0..BF 80..BF - case 0xe0: - { - add(current); - get(); - if (JSON_LIKELY(0xa0 <= current and current <= 0xbf)) - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) - { - add(current); - continue; - } - } - - error_message = "invalid string: ill-formed UTF-8 byte"; - return token_type::parse_error; - } - - // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF - // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF - case 0xe1: - case 0xe2: - case 0xe3: - case 0xe4: - case 0xe5: - case 0xe6: - case 0xe7: - case 0xe8: - case 0xe9: - case 0xea: - case 0xeb: - case 0xec: - case 0xee: - case 0xef: - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) - { - add(current); - continue; - } - } - - error_message = "invalid string: ill-formed UTF-8 byte"; - return token_type::parse_error; - } - - // U+D000..U+D7FF: bytes ED 80..9F 80..BF - case 0xed: - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0x9f)) - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) - { - add(current); - continue; - } - } - - error_message = "invalid string: ill-formed UTF-8 byte"; - return token_type::parse_error; - } - - // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF - case 0xf0: - { - add(current); - get(); - if (JSON_LIKELY(0x90 <= current and current <= 0xbf)) - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) - { - add(current); - continue; - } - } - } - - error_message = "invalid string: ill-formed UTF-8 byte"; - return token_type::parse_error; - } - - // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF - case 0xf1: - case 0xf2: - case 0xf3: - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) - { - add(current); - continue; - } - } - } - - error_message = "invalid string: ill-formed UTF-8 byte"; - return token_type::parse_error; - } - - // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF - case 0xf4: - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0x8f)) - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) - { - add(current); - get(); - if (JSON_LIKELY(0x80 <= current and current <= 0xbf)) - { - add(current); - continue; - } - } - } - - error_message = "invalid string: ill-formed UTF-8 byte"; - return token_type::parse_error; - } - - // remaining bytes (80..C1 and F5..FF) are ill-formed - default: - { - error_message = "invalid string: ill-formed UTF-8 byte"; - return token_type::parse_error; - } - } - } - } - - static void strtof(float& f, const char* str, char** endptr) noexcept - { - f = std::strtof(str, endptr); - } - - static void strtof(double& f, const char* str, char** endptr) noexcept - { - f = std::strtod(str, endptr); - } - - static void strtof(long double& f, const char* str, char** endptr) noexcept - { - f = std::strtold(str, endptr); - } - - /*! - @brief scan a number literal - - This function scans a string according to Sect. 6 of RFC 7159. - - The function is realized with a deterministic finite state machine - derived from the grammar described in RFC 7159. Starting in state - "init", the input is read and used to determined the next state. Only - state "done" accepts the number. State "error" is a trap state to model - errors. In the table below, "anything" means any character but the ones - listed before. - - state | 0 | 1-9 | e E | + | - | . | anything - ---------|----------|----------|----------|---------|---------|----------|----------- - init | zero | any1 | [error] | [error] | minus | [error] | [error] - minus | zero | any1 | [error] | [error] | [error] | [error] | [error] - zero | done | done | exponent | done | done | decimal1 | done - any1 | any1 | any1 | exponent | done | done | decimal1 | done - decimal1 | decimal2 | [error] | [error] | [error] | [error] | [error] | [error] - decimal2 | decimal2 | decimal2 | exponent | done | done | done | done - exponent | any2 | any2 | [error] | sign | sign | [error] | [error] - sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] - any2 | any2 | any2 | done | done | done | done | done - - The state machine is realized with one label per state (prefixed with - "scan_number_") and `goto` statements between them. The state machine - contains cycles, but any cycle can be left when EOF is read. Therefore, - the function is guaranteed to terminate. - - During scanning, the read bytes are stored in yytext. This string is - then converted to a signed integer, an unsigned integer, or a - floating-point number. - - @return token_type::value_unsigned, token_type::value_integer, or - token_type::value_float if number could be successfully scanned, - token_type::parse_error otherwise - - @note The scanner is independent of the current locale. Internally, the - locale's decimal point is used instead of `.` to work with the - locale-dependent converters. - */ - token_type scan_number() - { - // reset yytext to store the number's bytes - reset(); - - // the type of the parsed number; initially set to unsigned; will be - // changed if minus sign, decimal point or exponent is read - token_type number_type = token_type::value_unsigned; - - // state (init): we just found out we need to scan a number - switch (current) - { - case '-': - { - add(current); - goto scan_number_minus; - } - - case '0': - { - add(current); - goto scan_number_zero; - } - - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any1; - } - - default: - { - // all other characters are rejected outside scan_number() - assert(false); // LCOV_EXCL_LINE - } - } - -scan_number_minus: - // state: we just parsed a leading minus sign - number_type = token_type::value_integer; - switch (get()) - { - case '0': - { - add(current); - goto scan_number_zero; - } - - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any1; - } - - default: - { - error_message = "invalid number; expected digit after '-'"; - return token_type::parse_error; - } - } - -scan_number_zero: - // state: we just parse a zero (maybe with a leading minus sign) - switch (get()) - { - case '.': - { - add(decimal_point_char); - goto scan_number_decimal1; - } - - case 'e': - case 'E': - { - add(current); - goto scan_number_exponent; - } - - default: - { - goto scan_number_done; - } - } - -scan_number_any1: - // state: we just parsed a number 0-9 (maybe with a leading minus sign) - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any1; - } - - case '.': - { - add(decimal_point_char); - goto scan_number_decimal1; - } - - case 'e': - case 'E': - { - add(current); - goto scan_number_exponent; - } - - default: - { - goto scan_number_done; - } - } - -scan_number_decimal1: - // state: we just parsed a decimal point - number_type = token_type::value_float; - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_decimal2; - } - - default: - { - error_message = "invalid number; expected digit after '.'"; - return token_type::parse_error; - } - } - -scan_number_decimal2: - // we just parsed at least one number after a decimal point - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_decimal2; - } - - case 'e': - case 'E': - { - add(current); - goto scan_number_exponent; - } - - default: - { - goto scan_number_done; - } - } - -scan_number_exponent: - // we just parsed an exponent - number_type = token_type::value_float; - switch (get()) - { - case '+': - case '-': - { - add(current); - goto scan_number_sign; - } - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any2; - } - - default: - { - error_message = "invalid number; expected '+', '-', or digit after exponent"; - return token_type::parse_error; - } - } - -scan_number_sign: - // we just parsed an exponent sign - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any2; - } - - default: - { - error_message = "invalid number; expected digit after exponent sign"; - return token_type::parse_error; - } - } - -scan_number_any2: - // we just parsed a number after the exponent or exponent sign - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any2; - } - - default: - { - goto scan_number_done; - } - } - -scan_number_done: - // unget the character after the number (we only read it to know - // that we are done scanning a number) - --chars_read; - next_unget = true; - - // terminate token - add('\0'); - --yylen; - - // try to parse integers first and fall back to floats - if (number_type == token_type::value_unsigned) - { - char* endptr = nullptr; - errno = 0; - const auto x = std::strtoull(yytext.data(), &endptr, 10); - - // we checked the number format before - assert(endptr == yytext.data() + yylen); - - if (errno == 0) - { - value_unsigned = static_cast(x); - if (value_unsigned == x) - { - return token_type::value_unsigned; - } - } - } - else if (number_type == token_type::value_integer) - { - char* endptr = nullptr; - errno = 0; - const auto x = std::strtoll(yytext.data(), &endptr, 10); - - // we checked the number format before - assert(endptr == yytext.data() + yylen); - - if (errno == 0) - { - value_integer = static_cast(x); - if (value_integer == x) - { - return token_type::value_integer; - } - } - } - - // this code is reached if we parse a floating-point number or if - // an integer conversion above failed - strtof(value_float, yytext.data(), nullptr); - return token_type::value_float; - } - - /*! - @param[in] literal_text the literal text to expect - @param[in] length the length of the passed literal text - @param[in] return_type the token type to return on success - */ - token_type scan_literal(const char* literal_text, const size_t length, - token_type return_type) - { - assert(current == literal_text[0]); - for (size_t i = 1; i < length; ++i) - { - if (JSON_UNLIKELY(get() != literal_text[i])) - { - error_message = "invalid literal"; - return token_type::parse_error; - } - } - return return_type; - } - - ///////////////////// - // input management - ///////////////////// - - /// reset yytext - void reset() noexcept - { - yylen = 0; - start_pos = chars_read - 1; - } - - /// get a character from the input - int get() - { - ++chars_read; - return next_unget - ? (next_unget = false, current) - : (current = ia->get_character()); - } - - /// add a character to yytext - void add(int c) - { - // resize yytext if necessary; this condition is deemed unlikely, - // because we start with a 1024-byte buffer - if (JSON_UNLIKELY((yylen + 1 > yytext.capacity()))) - { - yytext.resize(2 * yytext.capacity(), '\0'); - } - assert(yylen < yytext.size()); - yytext[yylen++] = static_cast(c); - } - - public: - ///////////////////// - // value getters - ///////////////////// - - /// return integer value - constexpr number_integer_t get_number_integer() const noexcept - { - return value_integer; - } - - /// return unsigned integer value - constexpr number_unsigned_t get_number_unsigned() const noexcept - { - return value_unsigned; - } - - /// return floating-point value - constexpr number_float_t get_number_float() const noexcept - { - return value_float; - } - - /// return string value - const std::string get_string() - { - // yytext cannot be returned as char*, because it may contain a - // null byte (parsed as "\u0000") - return std::string(yytext.data(), yylen); - } - - ///////////////////// - // diagnostics - ///////////////////// - - /// return position of last read token - constexpr size_t get_position() const noexcept - { - return chars_read; - } - - /// return the last read token (for errors only) - std::string get_token_string() const - { - // get the raw byte sequence of the last token - std::string s = ia->read(start_pos, chars_read - start_pos); - - // escape control characters - std::string result; - for (auto c : s) - { - if (c == '\0' or c == std::char_traits::eof()) - { - // ignore EOF - continue; - } - else if ('\x00' <= c and c <= '\x1f') - { - // escape control characters - std::stringstream ss; - ss << "(c) << ">"; - result += ss.str(); - } - else - { - // add character as is - result.append(1, c); - } - } - - return result; - } - - /// return syntax error message - constexpr const char* get_error_message() const noexcept - { - return error_message; - } - - ///////////////////// - // actual scanner - ///////////////////// - - token_type scan() - { - // read next character and ignore whitespace - do - { - get(); - } - while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); - - switch (current) - { - // structural characters - case '[': - return token_type::begin_array; - case ']': - return token_type::end_array; - case '{': - return token_type::begin_object; - case '}': - return token_type::end_object; - case ':': - return token_type::name_separator; - case ',': - return token_type::value_separator; - - // literals - case 't': - return scan_literal("true", 4, token_type::literal_true); - case 'f': - return scan_literal("false", 5, token_type::literal_false); - case 'n': - return scan_literal("null", 4, token_type::literal_null); - - // string - case '\"': - return scan_string(); - - // number - case '-': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - return scan_number(); - - // end of input (the null byte is needed when parsing from - // string literals) - case '\0': - case std::char_traits::eof(): - return token_type::end_of_input; - - // error - default: - error_message = "invalid literal"; - return token_type::parse_error; - } - } - - private: - /// input adapter - input_adapter_t ia = nullptr; - - /// the current character - int current = std::char_traits::eof(); - - /// whether get() should return the last character again - bool next_unget = false; - - /// the number of characters read - size_t chars_read = 0; - /// the start position of the current token - size_t start_pos = 0; - - /// buffer for variable-length tokens (numbers, strings) - std::vector yytext = std::vector(1024, '\0'); - /// current index in yytext - size_t yylen = 0; - - /// a description of occurred lexer errors - const char* error_message = ""; - - // number values - number_integer_t value_integer = 0; - number_unsigned_t value_unsigned = 0; - number_float_t value_float = 0; - - /// the decimal point - const char decimal_point_char = '.'; - }; - - /*! - @brief syntax analysis - - This class implements a recursive decent parser. - */ - class parser - { - public: - /// a parser reading from an input adapter - explicit parser(input_adapter_t adapter, - const parser_callback_t cb = nullptr) - : callback(cb), m_lexer(adapter) - {} - - /*! - @brief public parser interface - - @param[in] strict whether to expect the last token to be EOF - @return parsed JSON value - - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - */ - basic_json parse(const bool strict = true) - { - // read first token - get_token(); - - basic_json result = parse_internal(true); - result.assert_invariant(); - - if (strict) - { - get_token(); - expect(lexer::token_type::end_of_input); - } - - // return parser result and replace it with null in case the - // top-level value was discarded by the callback function - return result.is_discarded() ? basic_json() : std::move(result); - } - - /*! - @brief public accept interface - - @param[in] strict whether to expect the last token to be EOF - @return whether the input is a proper JSON text - */ - bool accept(const bool strict = true) - { - // read first token - get_token(); - - if (not accept_internal()) - { - return false; - } - - if (strict and get_token() != lexer::token_type::end_of_input) - { - return false; - } - - return true; - } - - private: - /*! - @brief the actual parser - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - */ - basic_json parse_internal(bool keep) - { - auto result = basic_json(value_t::discarded); - - switch (last_token) - { - case lexer::token_type::begin_object: - { - if (keep and (not callback - or ((keep = callback(depth++, parse_event_t::object_start, result)) != 0))) - { - // explicitly set result to object to cope with {} - result.m_type = value_t::object; - result.m_value = value_t::object; - } - - // read next token - get_token(); - - // closing } -> we are done - if (last_token == lexer::token_type::end_object) - { - if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) - { - result = basic_json(value_t::discarded); - } - return result; - } - - // parse values - while (true) - { - // store key - expect(lexer::token_type::value_string); - const auto key = m_lexer.get_string(); - - bool keep_tag = false; - if (keep) - { - if (callback) - { - basic_json k(key); - keep_tag = callback(depth, parse_event_t::key, k); - } - else - { - keep_tag = true; - } - } - - // parse separator (:) - get_token(); - expect(lexer::token_type::name_separator); - - // parse and add value - get_token(); - auto value = parse_internal(keep); - if (keep and keep_tag and not value.is_discarded()) - { - result[key] = std::move(value); - } - - // comma -> next value - get_token(); - if (last_token == lexer::token_type::value_separator) - { - get_token(); - continue; - } - - // closing } - expect(lexer::token_type::end_object); - break; - } - - if (keep and callback and not callback(--depth, parse_event_t::object_end, result)) - { - result = basic_json(value_t::discarded); - } - - return result; - } - - case lexer::token_type::begin_array: - { - if (keep and (not callback - or ((keep = callback(depth++, parse_event_t::array_start, result)) != 0))) - { - // explicitly set result to object to cope with [] - result.m_type = value_t::array; - result.m_value = value_t::array; - } - - // read next token - get_token(); - - // closing ] -> we are done - if (last_token == lexer::token_type::end_array) - { - if (callback and not callback(--depth, parse_event_t::array_end, result)) - { - result = basic_json(value_t::discarded); - } - return result; - } - - // parse values - while (true) - { - // parse value - auto value = parse_internal(keep); - if (keep and not value.is_discarded()) - { - result.push_back(std::move(value)); - } - - // comma -> next value - get_token(); - if (last_token == lexer::token_type::value_separator) - { - get_token(); - continue; - } - - // closing ] - expect(lexer::token_type::end_array); - break; - } - - if (keep and callback and not callback(--depth, parse_event_t::array_end, result)) - { - result = basic_json(value_t::discarded); - } - - return result; - } - - case lexer::token_type::literal_null: - { - result.m_type = value_t::null; - break; - } - - case lexer::token_type::value_string: - { - result = basic_json(m_lexer.get_string()); - break; - } - - case lexer::token_type::literal_true: - { - result.m_type = value_t::boolean; - result.m_value = true; - break; - } - - case lexer::token_type::literal_false: - { - result.m_type = value_t::boolean; - result.m_value = false; - break; - } - - case lexer::token_type::value_unsigned: - { - result.m_type = value_t::number_unsigned; - result.m_value = m_lexer.get_number_unsigned(); - break; - } - - case lexer::token_type::value_integer: - { - result.m_type = value_t::number_integer; - result.m_value = m_lexer.get_number_integer(); - break; - } - - case lexer::token_type::value_float: - { - result.m_type = value_t::number_float; - result.m_value = m_lexer.get_number_float(); - - // throw in case of infinity or NAN - if (JSON_UNLIKELY(not std::isfinite(result.m_value.number_float))) - { - JSON_THROW(out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); - } - - break; - } - - case lexer::token_type::parse_error: - { - // using "uninitialized" to avoid "expected" message - expect(lexer::token_type::uninitialized); - } - - default: - { - // the last token was unexpected; we expected a value - expect(lexer::token_type::literal_or_value); - } - } - - if (keep and callback and not callback(depth, parse_event_t::value, result)) - { - result = basic_json(value_t::discarded); - } - return result; - } - - /*! - @brief the acutal acceptor - - @invariant 1. The last token is not yet processed. Therefore, the - caller of this function must make sure a token has - been read. - 2. When this function returns, the last token is processed. - That is, the last read character was already considered. - - This invariant makes sure that no token needs to be "unput". - */ - bool accept_internal() - { - switch (last_token) - { - case lexer::token_type::begin_object: - { - // read next token - get_token(); - - // closing } -> we are done - if (last_token == lexer::token_type::end_object) - { - return true; - } - - // parse values - while (true) - { - // parse key - if (last_token != lexer::token_type::value_string) - { - return false; - } - - // parse separator (:) - get_token(); - if (last_token != lexer::token_type::name_separator) - { - return false; - } - - // parse value - get_token(); - if (not accept_internal()) - { - return false; - } - - // comma -> next value - get_token(); - if (last_token == lexer::token_type::value_separator) - { - get_token(); - continue; - } - - // closing } - if (last_token != lexer::token_type::end_object) - { - return false; - } - - return true; - } - } - - case lexer::token_type::begin_array: - { - // read next token - get_token(); - - // closing ] -> we are done - if (last_token == lexer::token_type::end_array) - { - return true; - } - - // parse values - while (true) - { - // parse value - if (not accept_internal()) - { - return false; - } - - // comma -> next value - get_token(); - if (last_token == lexer::token_type::value_separator) - { - get_token(); - continue; - } - - // closing ] - if (last_token != lexer::token_type::end_array) - { - return false; - } - - return true; - } - } - - case lexer::token_type::literal_false: - case lexer::token_type::literal_null: - case lexer::token_type::literal_true: - case lexer::token_type::value_float: - case lexer::token_type::value_integer: - case lexer::token_type::value_string: - case lexer::token_type::value_unsigned: - { - return true; - } - - default: - { - // the last token was unexpected - return false; - } - } - } - - /// get next token from lexer - typename lexer::token_type get_token() - { - return (last_token = m_lexer.scan()); - } - - /*! - @throw parse_error.101 if expected token did not occur - */ - void expect(typename lexer::token_type t) - { - if (JSON_UNLIKELY(t != last_token)) - { - errored = true; - expected = t; - throw_exception(); - } - } - - [[noreturn]] void throw_exception() const - { - std::string error_msg = "syntax error - "; - if (last_token == lexer::token_type::parse_error) - { - error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" + m_lexer.get_token_string() + "'"; - } - else - { - error_msg += "unexpected " + std::string(lexer::token_type_name(last_token)); - } - - if (expected != lexer::token_type::uninitialized) - { - error_msg += "; expected " + std::string(lexer::token_type_name(expected)); - } - - JSON_THROW(parse_error::create(101, m_lexer.get_position(), error_msg)); - } - - private: - /// current level of recursion - int depth = 0; - /// callback function - const parser_callback_t callback = nullptr; - /// the type of the last read token - typename lexer::token_type last_token = lexer::token_type::uninitialized; - /// the lexer - lexer m_lexer; - /// whether a syntax error occurred - bool errored = false; - /// possible reason for the syntax error - typename lexer::token_type expected = lexer::token_type::uninitialized; - }; - - public: - /*! - @brief JSON Pointer - - A JSON pointer defines a string syntax for identifying a specific value - within a JSON document. It can be used with functions `at` and - `operator[]`. Furthermore, JSON pointers are the base for JSON patches. - - @sa [RFC 6901](https://tools.ietf.org/html/rfc6901) - - @since version 2.0.0 - */ - class json_pointer - { - /// allow basic_json to access private members - friend class basic_json; - - public: - /*! - @brief create JSON pointer - - Create a JSON pointer according to the syntax described in - [Section 3 of RFC6901](https://tools.ietf.org/html/rfc6901#section-3). - - @param[in] s string representing the JSON pointer; if omitted, the - empty string is assumed which references the whole JSON - value - - @throw parse_error.107 if the given JSON pointer @a s is nonempty and - does not begin with a slash (`/`); see example below - - @throw parse_error.108 if a tilde (`~`) in the given JSON pointer @a s - is not followed by `0` (representing `~`) or `1` (representing `/`); - see example below - - @liveexample{The example shows the construction several valid JSON - pointers as well as the exceptional behavior.,json_pointer} - - @since version 2.0.0 - */ - explicit json_pointer(const std::string& s = "") - : reference_tokens(split(s)) - {} - - /*! - @brief return a string representation of the JSON pointer - - @invariant For each JSON pointer `ptr`, it holds: - @code {.cpp} - ptr == json_pointer(ptr.to_string()); - @endcode - - @return a string representation of the JSON pointer - - @liveexample{The example shows the result of `to_string`., - json_pointer__to_string} - - @since version 2.0.0 - */ - std::string to_string() const noexcept - { - return std::accumulate(reference_tokens.begin(), - reference_tokens.end(), std::string{}, - [](const std::string & a, const std::string & b) - { - return a + "/" + escape(b); - }); - } - - /// @copydoc to_string() - operator std::string() const - { - return to_string(); - } - - private: - /*! - @brief remove and return last reference pointer - @throw out_of_range.405 if JSON pointer has no parent - */ - std::string pop_back() - { - if (is_root()) - { - JSON_THROW(out_of_range::create(405, "JSON pointer has no parent")); - } - - auto last = reference_tokens.back(); - reference_tokens.pop_back(); - return last; - } - - /// return whether pointer points to the root document - bool is_root() const - { - return reference_tokens.empty(); - } - - json_pointer top() const - { - if (is_root()) - { - JSON_THROW(out_of_range::create(405, "JSON pointer has no parent")); - } - - json_pointer result = *this; - result.reference_tokens = {reference_tokens[0]}; - return result; - } - - /*! - @brief create and return a reference to the pointed to value - - @complexity Linear in the number of reference tokens. - - @throw parse_error.109 if array index is not a number - @throw type_error.313 if value cannot be unflattened - */ - reference get_and_create(reference j) const - { - pointer result = &j; - - // in case no reference tokens exist, return a reference to the - // JSON value j which will be overwritten by a primitive value - for (const auto& reference_token : reference_tokens) - { - switch (result->m_type) - { - case value_t::null: - { - if (reference_token == "0") - { - // start a new array if reference token is 0 - result = &result->operator[](0); - } - else - { - // start a new object otherwise - result = &result->operator[](reference_token); - } - break; - } - - case value_t::object: - { - // create an entry in the object - result = &result->operator[](reference_token); - break; - } - - case value_t::array: - { - // create an entry in the array - JSON_TRY - { - result = &result->operator[](static_cast(std::stoi(reference_token))); - } - JSON_CATCH (std::invalid_argument&) - { - JSON_THROW(parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); - } - break; - } - - /* - The following code is only reached if there exists a - reference token _and_ the current value is primitive. In - this case, we have an error situation, because primitive - values may only occur as single value; that is, with an - empty list of reference tokens. - */ - default: - { - JSON_THROW(type_error::create(313, "invalid value to unflatten")); - } - } - } - - return *result; - } - - /*! - @brief return a reference to the pointed to value - - @note This version does not throw if a value is not present, but tries - to create nested values instead. For instance, calling this function - with pointer `"/this/that"` on a null value is equivalent to calling - `operator[]("this").operator[]("that")` on that value, effectively - changing the null value to an object. - - @param[in] ptr a JSON value - - @return reference to the JSON value pointed to by the JSON pointer - - @complexity Linear in the length of the JSON pointer. - - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - @throw out_of_range.404 if the JSON pointer can not be resolved - */ - reference get_unchecked(pointer ptr) const - { - for (const auto& reference_token : reference_tokens) - { - // convert null values to arrays or objects before continuing - if (ptr->m_type == value_t::null) - { - // check if reference token is a number - const bool nums = std::all_of(reference_token.begin(), - reference_token.end(), - [](const char x) - { - return (x >= '0' and x <= '9'); - }); - - // change value to array for numbers or "-" or to object - // otherwise - if (nums or reference_token == "-") - { - *ptr = value_t::array; - } - else - { - *ptr = value_t::object; - } - } - - switch (ptr->m_type) - { - case value_t::object: - { - // use unchecked object access - ptr = &ptr->operator[](reference_token); - break; - } - - case value_t::array: - { - // error condition (cf. RFC 6901, Sect. 4) - if (reference_token.size() > 1 and reference_token[0] == '0') - { - JSON_THROW(parse_error::create(106, 0, "array index '" + reference_token + "' must not begin with '0'")); - } - - if (reference_token == "-") - { - // explicitly treat "-" as index beyond the end - ptr = &ptr->operator[](ptr->m_value.array->size()); - } - else - { - // convert array index to number; unchecked access - JSON_TRY - { - ptr = &ptr->operator[](static_cast(std::stoi(reference_token))); - } - JSON_CATCH (std::invalid_argument&) - { - JSON_THROW(parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); - } - } - break; - } - - default: - { - JSON_THROW(out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); - } - } - } - - return *ptr; - } - - /*! - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - @throw out_of_range.402 if the array index '-' is used - @throw out_of_range.404 if the JSON pointer can not be resolved - */ - reference get_checked(pointer ptr) const - { - for (const auto& reference_token : reference_tokens) - { - switch (ptr->m_type) - { - case value_t::object: - { - // note: at performs range check - ptr = &ptr->at(reference_token); - break; - } - - case value_t::array: - { - if (reference_token == "-") - { - // "-" always fails the range check - JSON_THROW(out_of_range::create(402, "array index '-' (" + - std::to_string(ptr->m_value.array->size()) + - ") is out of range")); - } - - // error condition (cf. RFC 6901, Sect. 4) - if (reference_token.size() > 1 and reference_token[0] == '0') - { - JSON_THROW(parse_error::create(106, 0, "array index '" + reference_token + "' must not begin with '0'")); - } - - // note: at performs range check - JSON_TRY - { - ptr = &ptr->at(static_cast(std::stoi(reference_token))); - } - JSON_CATCH (std::invalid_argument&) - { - JSON_THROW(parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); - } - break; - } - - default: - { - JSON_THROW(out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); - } - } - } - - return *ptr; - } - - /*! - @brief return a const reference to the pointed to value - - @param[in] ptr a JSON value - - @return const reference to the JSON value pointed to by the JSON - pointer - - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - @throw out_of_range.402 if the array index '-' is used - @throw out_of_range.404 if the JSON pointer can not be resolved - */ - const_reference get_unchecked(const_pointer ptr) const - { - for (const auto& reference_token : reference_tokens) - { - switch (ptr->m_type) - { - case value_t::object: - { - // use unchecked object access - ptr = &ptr->operator[](reference_token); - break; - } - - case value_t::array: - { - if (reference_token == "-") - { - // "-" cannot be used for const access - JSON_THROW(out_of_range::create(402, "array index '-' (" + - std::to_string(ptr->m_value.array->size()) + - ") is out of range")); - } - - // error condition (cf. RFC 6901, Sect. 4) - if (reference_token.size() > 1 and reference_token[0] == '0') - { - JSON_THROW(parse_error::create(106, 0, "array index '" + reference_token + "' must not begin with '0'")); - } - - // use unchecked array access - JSON_TRY - { - ptr = &ptr->operator[](static_cast(std::stoi(reference_token))); - } - JSON_CATCH (std::invalid_argument&) - { - JSON_THROW(parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); - } - break; - } - - default: - { - JSON_THROW(out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); - } - } - } - - return *ptr; - } - - /*! - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - @throw out_of_range.402 if the array index '-' is used - @throw out_of_range.404 if the JSON pointer can not be resolved - */ - const_reference get_checked(const_pointer ptr) const - { - for (const auto& reference_token : reference_tokens) - { - switch (ptr->m_type) - { - case value_t::object: - { - // note: at performs range check - ptr = &ptr->at(reference_token); - break; - } - - case value_t::array: - { - if (reference_token == "-") - { - // "-" always fails the range check - JSON_THROW(out_of_range::create(402, "array index '-' (" + - std::to_string(ptr->m_value.array->size()) + - ") is out of range")); - } - - // error condition (cf. RFC 6901, Sect. 4) - if (reference_token.size() > 1 and reference_token[0] == '0') - { - JSON_THROW(parse_error::create(106, 0, "array index '" + reference_token + "' must not begin with '0'")); - } - - // note: at performs range check - JSON_TRY - { - ptr = &ptr->at(static_cast(std::stoi(reference_token))); - } - JSON_CATCH (std::invalid_argument&) - { - JSON_THROW(parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); - } - break; - } - - default: - { - JSON_THROW(out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); - } - } - } - - return *ptr; - } - - /*! - @brief split the string input to reference tokens - - @note This function is only called by the json_pointer constructor. - All exceptions below are documented there. - - @throw parse_error.107 if the pointer is not empty or begins with '/' - @throw parse_error.108 if character '~' is not followed by '0' or '1' - */ - static std::vector split(const std::string& reference_string) - { - std::vector result; - - // special case: empty reference string -> no reference tokens - if (reference_string.empty()) - { - return result; - } - - // check if nonempty reference string begins with slash - if (reference_string[0] != '/') - { - JSON_THROW(parse_error::create(107, 1, "JSON pointer must be empty or begin with '/' - was: '" + reference_string + "'")); - } - - // extract the reference tokens: - // - slash: position of the last read slash (or end of string) - // - start: position after the previous slash - for ( - // search for the first slash after the first character - size_t slash = reference_string.find_first_of('/', 1), - // set the beginning of the first reference token - start = 1; - // we can stop if start == string::npos+1 = 0 - start != 0; - // set the beginning of the next reference token - // (will eventually be 0 if slash == std::string::npos) - start = slash + 1, - // find next slash - slash = reference_string.find_first_of('/', start)) - { - // use the text between the beginning of the reference token - // (start) and the last slash (slash). - auto reference_token = reference_string.substr(start, slash - start); - - // check reference tokens are properly escaped - for (size_t pos = reference_token.find_first_of('~'); - pos != std::string::npos; - pos = reference_token.find_first_of('~', pos + 1)) - { - assert(reference_token[pos] == '~'); - - // ~ must be followed by 0 or 1 - if (pos == reference_token.size() - 1 or - (reference_token[pos + 1] != '0' and - reference_token[pos + 1] != '1')) - { - JSON_THROW(parse_error::create(108, 0, "escape character '~' must be followed with '0' or '1'")); - } - } - - // finally, store the reference token - unescape(reference_token); - result.push_back(reference_token); - } - - return result; - } - - /*! - @brief replace all occurrences of a substring by another string - - @param[in,out] s the string to manipulate; changed so that all - occurrences of @a f are replaced with @a t - @param[in] f the substring to replace with @a t - @param[in] t the string to replace @a f - - @pre The search string @a f must not be empty. **This precondition is - enforced with an assertion.** - - @since version 2.0.0 - */ - static void replace_substring(std::string& s, - const std::string& f, - const std::string& t) - { - assert(not f.empty()); - - for ( - size_t pos = s.find(f); // find first occurrence of f - pos != std::string::npos; // make sure f was found - s.replace(pos, f.size(), t), // replace with t - pos = s.find(f, pos + t.size()) // find next occurrence of f - ); - } - - /// escape tilde and slash - static std::string escape(std::string s) - { - // escape "~"" to "~0" and "/" to "~1" - replace_substring(s, "~", "~0"); - replace_substring(s, "/", "~1"); - return s; - } - - /// unescape tilde and slash - static void unescape(std::string& s) - { - // first transform any occurrence of the sequence '~1' to '/' - replace_substring(s, "~1", "/"); - // then transform any occurrence of the sequence '~0' to '~' - replace_substring(s, "~0", "~"); - } - - /*! - @param[in] reference_string the reference string to the current value - @param[in] value the value to consider - @param[in,out] result the result object to insert values to - - @note Empty objects or arrays are flattened to `null`. - */ - static void flatten(const std::string& reference_string, - const basic_json& value, - basic_json& result) - { - switch (value.m_type) - { - case value_t::array: - { - if (value.m_value.array->empty()) - { - // flatten empty array as null - result[reference_string] = nullptr; - } - else - { - // iterate array and use index as reference string - for (size_t i = 0; i < value.m_value.array->size(); ++i) - { - flatten(reference_string + "/" + std::to_string(i), - value.m_value.array->operator[](i), result); - } - } - break; - } - - case value_t::object: - { - if (value.m_value.object->empty()) - { - // flatten empty object as null - result[reference_string] = nullptr; - } - else - { - // iterate object and use keys as reference string - for (const auto& element : *value.m_value.object) - { - flatten(reference_string + "/" + escape(element.first), - element.second, result); - } - } - break; - } - - default: - { - // add primitive value with its reference string - result[reference_string] = value; - break; - } - } - } - - /*! - @param[in] value flattened JSON - - @return unflattened JSON - - @throw parse_error.109 if array index is not a number - @throw type_error.314 if value is not an object - @throw type_error.315 if object values are not primitive - @throw type_error.313 if value cannot be unflattened - */ - static basic_json unflatten(const basic_json& value) - { - if (not value.is_object()) - { - JSON_THROW(type_error::create(314, "only objects can be unflattened")); - } - - basic_json result; - - // iterate the JSON object values - for (const auto& element : *value.m_value.object) - { - if (not element.second.is_primitive()) - { - JSON_THROW(type_error::create(315, "values in object must be primitive")); - } - - // assign value to reference pointed to by JSON pointer; Note - // that if the JSON pointer is "" (i.e., points to the whole - // value), function get_and_create returns a reference to - // result itself. An assignment will then create a primitive - // value. - json_pointer(element.first).get_and_create(result) = element.second; - } - - return result; - } - - friend bool operator==(json_pointer const& lhs, - json_pointer const& rhs) noexcept - { - return lhs.reference_tokens == rhs.reference_tokens; - } - - friend bool operator!=(json_pointer const& lhs, - json_pointer const& rhs) noexcept - { - return !(lhs == rhs); - } - - /// the reference tokens - std::vector reference_tokens {}; - }; - ////////////////////////// // JSON Pointer support // ////////////////////////// @@ -13961,7 +13735,7 @@ scan_number_done: @complexity Linear in the size the JSON value. @throw type_error.314 if value is not an object - @throw type_error.315 if object values are not primitve + @throw type_error.315 if object values are not primitive @liveexample{The following code shows how a flattened JSON object is unflattened into the original nested JSON object.,unflatten} @@ -14110,7 +13884,7 @@ scan_number_done: else { const auto idx = std::stoi(last_path); - if (static_cast(idx) > parent.size()) + if (JSON_UNLIKELY(static_cast(idx) > parent.size())) { // avoid undefined behavior JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); @@ -14145,7 +13919,7 @@ scan_number_done: { // perform range check auto it = parent.find(last_path); - if (it != parent.end()) + if (JSON_LIKELY(it != parent.end())) { parent.erase(it); } @@ -14162,7 +13936,7 @@ scan_number_done: }; // type check: top level value must be an array - if (not json_patch.is_array()) + if (JSON_UNLIKELY(not json_patch.is_array())) { JSON_THROW(parse_error::create(104, 0, "JSON patch must be an array of objects")); } @@ -14182,13 +13956,13 @@ scan_number_done: const auto error_msg = (op == "op") ? "operation" : "operation '" + op + "'"; // check if desired value is present - if (it == val.m_value.object->end()) + if (JSON_UNLIKELY(it == val.m_value.object->end())) { JSON_THROW(parse_error::create(105, 0, error_msg + " must have member '" + member + "'")); } // check if result is of type string - if (string_type and not it->second.is_string()) + if (JSON_UNLIKELY(string_type and not it->second.is_string())) { JSON_THROW(parse_error::create(105, 0, error_msg + " must have string member '" + member + "'")); } @@ -14198,7 +13972,7 @@ scan_number_done: }; // type check: every element of the array must be an object - if (not val.is_object()) + if (JSON_UNLIKELY(not val.is_object())) { JSON_THROW(parse_error::create(104, 0, "JSON patch must be an array of objects")); } @@ -14271,7 +14045,7 @@ scan_number_done: } // throw an exception if test fails - if (not success) + if (JSON_UNLIKELY(not success)) { JSON_THROW(other_error::create(501, "unsuccessful: " + val.dump())); } @@ -14323,8 +14097,7 @@ scan_number_done: @since version 2.0.0 */ - static basic_json diff(const basic_json& source, - const basic_json& target, + static basic_json diff(const basic_json& source, const basic_json& target, const std::string& path = "") { // the patch @@ -14341,9 +14114,7 @@ scan_number_done: // different types: replace value result.push_back( { - {"op", "replace"}, - {"path", path}, - {"value", target} + {"op", "replace"}, {"path", path}, {"value", target} }); } else @@ -14353,7 +14124,7 @@ scan_number_done: case value_t::array: { // first pass: traverse common elements - size_t i = 0; + std::size_t i = 0; while (i < source.size() and i < target.size()) { // recursive call to compare array values at index i @@ -14413,8 +14184,7 @@ scan_number_done: // found a key that is not in o -> remove it result.push_back(object( { - {"op", "remove"}, - {"path", path + "/" + key} + {"op", "remove"}, {"path", path + "/" + key} })); } } @@ -14428,8 +14198,7 @@ scan_number_done: const auto key = json_pointer::escape(it.key()); result.push_back( { - {"op", "add"}, - {"path", path + "/" + key}, + {"op", "add"}, {"path", path + "/" + key}, {"value", it.value()} }); } @@ -14443,9 +14212,7 @@ scan_number_done: // both primitive type: replace value result.push_back( { - {"op", "replace"}, - {"path", path}, - {"value", target} + {"op", "replace"}, {"path", path}, {"value", target} }); break; } @@ -14471,6 +14238,400 @@ uses the standard template types. @since version 1.0.0 */ using json = basic_json<>; + +////////////////// +// json_pointer // +////////////////// + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +NLOHMANN_BASIC_JSON_TPL& +json_pointer::get_and_create(NLOHMANN_BASIC_JSON_TPL& j) const +{ + using size_type = typename NLOHMANN_BASIC_JSON_TPL::size_type; + auto result = &j; + + // in case no reference tokens exist, return a reference to the JSON value + // j which will be overwritten by a primitive value + for (const auto& reference_token : reference_tokens) + { + switch (result->m_type) + { + case detail::value_t::null: + { + if (reference_token == "0") + { + // start a new array if reference token is 0 + result = &result->operator[](0); + } + else + { + // start a new object otherwise + result = &result->operator[](reference_token); + } + break; + } + + case detail::value_t::object: + { + // create an entry in the object + result = &result->operator[](reference_token); + break; + } + + case detail::value_t::array: + { + // create an entry in the array + JSON_TRY + { + result = &result->operator[](static_cast(std::stoi(reference_token))); + } + JSON_CATCH(std::invalid_argument&) + { + JSON_THROW(detail::parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } + break; + } + + /* + The following code is only reached if there exists a reference + token _and_ the current value is primitive. In this case, we have + an error situation, because primitive values may only occur as + single value; that is, with an empty list of reference tokens. + */ + default: + JSON_THROW(detail::type_error::create(313, "invalid value to unflatten")); + } + } + + return *result; +} + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +NLOHMANN_BASIC_JSON_TPL& +json_pointer::get_unchecked(NLOHMANN_BASIC_JSON_TPL* ptr) const +{ + using size_type = typename NLOHMANN_BASIC_JSON_TPL::size_type; + for (const auto& reference_token : reference_tokens) + { + // convert null values to arrays or objects before continuing + if (ptr->m_type == detail::value_t::null) + { + // check if reference token is a number + const bool nums = + std::all_of(reference_token.begin(), reference_token.end(), + [](const char x) + { + return (x >= '0' and x <= '9'); + }); + + // change value to array for numbers or "-" or to object otherwise + *ptr = (nums or reference_token == "-") + ? detail::value_t::array + : detail::value_t::object; + } + + switch (ptr->m_type) + { + case detail::value_t::object: + { + // use unchecked object access + ptr = &ptr->operator[](reference_token); + break; + } + + case detail::value_t::array: + { + // error condition (cf. RFC 6901, Sect. 4) + if (JSON_UNLIKELY(reference_token.size() > 1 and reference_token[0] == '0')) + { + JSON_THROW(detail::parse_error::create(106, 0, + "array index '" + reference_token + + "' must not begin with '0'")); + } + + if (reference_token == "-") + { + // explicitly treat "-" as index beyond the end + ptr = &ptr->operator[](ptr->m_value.array->size()); + } + else + { + // convert array index to number; unchecked access + JSON_TRY + { + ptr = &ptr->operator[]( + static_cast(std::stoi(reference_token))); + } + JSON_CATCH(std::invalid_argument&) + { + JSON_THROW(detail::parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } + } + break; + } + + default: + JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); + } + } + + return *ptr; +} + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +NLOHMANN_BASIC_JSON_TPL& +json_pointer::get_checked(NLOHMANN_BASIC_JSON_TPL* ptr) const +{ + using size_type = typename NLOHMANN_BASIC_JSON_TPL::size_type; + for (const auto& reference_token : reference_tokens) + { + switch (ptr->m_type) + { + case detail::value_t::object: + { + // note: at performs range check + ptr = &ptr->at(reference_token); + break; + } + + case detail::value_t::array: + { + if (JSON_UNLIKELY(reference_token == "-")) + { + // "-" always fails the range check + JSON_THROW(detail::out_of_range::create(402, + "array index '-' (" + std::to_string(ptr->m_value.array->size()) + + ") is out of range")); + } + + // error condition (cf. RFC 6901, Sect. 4) + if (JSON_UNLIKELY(reference_token.size() > 1 and reference_token[0] == '0')) + { + JSON_THROW(detail::parse_error::create(106, 0, + "array index '" + reference_token + + "' must not begin with '0'")); + } + + // note: at performs range check + JSON_TRY + { + ptr = &ptr->at(static_cast(std::stoi(reference_token))); + } + JSON_CATCH(std::invalid_argument&) + { + JSON_THROW(detail::parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } + break; + } + + default: + JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); + } + } + + return *ptr; +} + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +const NLOHMANN_BASIC_JSON_TPL& +json_pointer::get_unchecked(const NLOHMANN_BASIC_JSON_TPL* ptr) const +{ + using size_type = typename NLOHMANN_BASIC_JSON_TPL::size_type; + for (const auto& reference_token : reference_tokens) + { + switch (ptr->m_type) + { + case detail::value_t::object: + { + // use unchecked object access + ptr = &ptr->operator[](reference_token); + break; + } + + case detail::value_t::array: + { + if (JSON_UNLIKELY(reference_token == "-")) + { + // "-" cannot be used for const access + JSON_THROW(detail::out_of_range::create(402, + "array index '-' (" + std::to_string(ptr->m_value.array->size()) + + ") is out of range")); + } + + // error condition (cf. RFC 6901, Sect. 4) + if (JSON_UNLIKELY(reference_token.size() > 1 and reference_token[0] == '0')) + { + JSON_THROW(detail::parse_error::create(106, 0, + "array index '" + reference_token + + "' must not begin with '0'")); + } + + // use unchecked array access + JSON_TRY + { + ptr = &ptr->operator[]( + static_cast(std::stoi(reference_token))); + } + JSON_CATCH(std::invalid_argument&) + { + JSON_THROW(detail::parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } + break; + } + + default: + JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); + } + } + + return *ptr; +} + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +const NLOHMANN_BASIC_JSON_TPL& +json_pointer::get_checked(const NLOHMANN_BASIC_JSON_TPL* ptr) const +{ + using size_type = typename NLOHMANN_BASIC_JSON_TPL::size_type; + for (const auto& reference_token : reference_tokens) + { + switch (ptr->m_type) + { + case detail::value_t::object: + { + // note: at performs range check + ptr = &ptr->at(reference_token); + break; + } + + case detail::value_t::array: + { + if (JSON_UNLIKELY(reference_token == "-")) + { + // "-" always fails the range check + JSON_THROW(detail::out_of_range::create(402, + "array index '-' (" + std::to_string(ptr->m_value.array->size()) + + ") is out of range")); + } + + // error condition (cf. RFC 6901, Sect. 4) + if (JSON_UNLIKELY(reference_token.size() > 1 and reference_token[0] == '0')) + { + JSON_THROW(detail::parse_error::create(106, 0, + "array index '" + reference_token + + "' must not begin with '0'")); + } + + // note: at performs range check + JSON_TRY + { + ptr = &ptr->at(static_cast(std::stoi(reference_token))); + } + JSON_CATCH(std::invalid_argument&) + { + JSON_THROW(detail::parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } + break; + } + + default: + JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); + } + } + + return *ptr; +} + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +void json_pointer::flatten(const std::string& reference_string, + const NLOHMANN_BASIC_JSON_TPL& value, + NLOHMANN_BASIC_JSON_TPL& result) +{ + switch (value.m_type) + { + case detail::value_t::array: + { + if (value.m_value.array->empty()) + { + // flatten empty array as null + result[reference_string] = nullptr; + } + else + { + // iterate array and use index as reference string + for (std::size_t i = 0; i < value.m_value.array->size(); ++i) + { + flatten(reference_string + "/" + std::to_string(i), + value.m_value.array->operator[](i), result); + } + } + break; + } + + case detail::value_t::object: + { + if (value.m_value.object->empty()) + { + // flatten empty object as null + result[reference_string] = nullptr; + } + else + { + // iterate object and use keys as reference string + for (const auto& element : *value.m_value.object) + { + flatten(reference_string + "/" + escape(element.first), element.second, result); + } + } + break; + } + + default: + { + // add primitive value with its reference string + result[reference_string] = value; + break; + } + } +} + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +NLOHMANN_BASIC_JSON_TPL +json_pointer::unflatten(const NLOHMANN_BASIC_JSON_TPL& value) +{ + if (JSON_UNLIKELY(not value.is_object())) + { + JSON_THROW(detail::type_error::create(314, "only objects can be unflattened")); + } + + NLOHMANN_BASIC_JSON_TPL result; + + // iterate the JSON object values + for (const auto& element : *value.m_value.object) + { + if (JSON_UNLIKELY(not element.second.is_primitive())) + { + JSON_THROW(detail::type_error::create(315, "values in object must be primitive")); + } + + // assign value to reference pointed to by JSON pointer; Note that if + // the JSON pointer is "" (i.e., points to the whole value), function + // get_and_create returns a reference to result itself. An assignment + // will then create a primitive value. + json_pointer(element.first).get_and_create(result) = element.second; + } + + return result; +} + +inline bool operator==(json_pointer const& lhs, json_pointer const& rhs) noexcept +{ + return (lhs.reference_tokens == rhs.reference_tokens); +} + +inline bool operator!=(json_pointer const& lhs, json_pointer const& rhs) noexcept +{ + return not (lhs == rhs); +} } // namespace nlohmann @@ -14514,8 +14675,10 @@ struct hash }; /// specialization for std::less -template <> -struct less<::nlohmann::detail::value_t> +/// @note: do not remove the space after '<', +/// see https://github.com/nlohmann/json/pull/679 +template<> +struct less< ::nlohmann::detail::value_t> { /*! @brief compare two value_t enum values @@ -14581,5 +14744,7 @@ inline nlohmann::json::json_pointer operator "" _json_pointer(const char* s, std #undef JSON_LIKELY #undef JSON_UNLIKELY #undef JSON_DEPRECATED +#undef NLOHMANN_BASIC_JSON_TPL_DECLARATION +#undef NLOHMANN_BASIC_JSON_TPL #endif diff --git a/lib/serialisation/JSON_IO.cc b/lib/serialisation/JSON_IO.cc index 23a78b3e..99a9cdd6 100644 --- a/lib/serialisation/JSON_IO.cc +++ b/lib/serialisation/JSON_IO.cc @@ -76,10 +76,9 @@ void JSONWriter::delete_comma() // annoying, but necessary for TravisCI namespace Grid { - template<> void JSONWriter::writeDefault(const std::string &s, const std::string &x) { - //std::cout << "JSONWriter::writeDefault(string) : " << s << std::endl; + //std::cout << "JSONWriter::writeDefault(string) : " << s << std::endl; std::ostringstream os; os << std::boolalpha << x; if (s.size()) diff --git a/lib/serialisation/JSON_IO.h b/lib/serialisation/JSON_IO.h index 23b9a836..00287447 100644 --- a/lib/serialisation/JSON_IO.h +++ b/lib/serialisation/JSON_IO.h @@ -64,6 +64,8 @@ namespace Grid template void writeDefault(const std::string &s, const char(&x)[N]); + void writeDefault(const std::string &s, const std::string &x); + private: void delete_comma(); @@ -120,22 +122,6 @@ namespace Grid ss_ << os.str() << " ," ; } - - // specialize for string - template <> - void JSONWriter::writeDefault(const std::string &s, const std::string &x) - { - //std::cout << "JSONWriter::writeDefault(string) : " << s << std::endl; - std::ostringstream os; - os << std::boolalpha << x; - if (s.size()) - ss_ << "\""<< s << "\" : \"" << os.str() << "\" ," ; - else - ss_ << os.str() << " ," ; - } - - - template void JSONWriter::writeDefault(const std::string &s, const std::complex &x) { From 91eaace19de25f45db52420a10350b36c548ad5e Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 15 Sep 2017 11:33:45 +0100 Subject: [PATCH 110/377] Added support for FFT accelerated updates --- lib/qcd/action/scalar/ScalarImpl.h | 129 +++++++++++++++++++++++++---- 1 file changed, 114 insertions(+), 15 deletions(-) diff --git a/lib/qcd/action/scalar/ScalarImpl.h b/lib/qcd/action/scalar/ScalarImpl.h index f85ab840..3755d0ee 100644 --- a/lib/qcd/action/scalar/ScalarImpl.h +++ b/lib/qcd/action/scalar/ScalarImpl.h @@ -16,12 +16,12 @@ class ScalarImplTypes { typedef iImplField SiteField; typedef SiteField SitePropagator; typedef SiteField SiteComplex; - + typedef Lattice Field; typedef Field ComplexField; typedef Field FermionField; typedef Field PropagatorField; - + static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ gaussian(pRNG, P); } @@ -47,54 +47,58 @@ class ScalarImplTypes { static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { U = 1.0; } - + static void MomentumSpacePropagator(Field &out, RealD m) { GridBase *grid = out._grid; Field kmu(grid), one(grid); const unsigned int nd = grid->_ndimension; std::vector &l = grid->_fdimensions; - + one = Complex(1.0,0.0); out = m*m; for(int mu = 0; mu < nd; mu++) { Real twoPiL = M_PI*2./l[mu]; - + LatticeCoordinate(kmu,mu); kmu = 2.*sin(.5*twoPiL*kmu); out = out + kmu*kmu; } out = one/out; } - + static void FreePropagator(const Field &in, Field &out, const Field &momKernel) { FFT fft((GridCartesian *)in._grid); Field inFT(in._grid); - + fft.FFT_all_dim(inFT, in, FFT::forward); inFT = inFT*momKernel; fft.FFT_all_dim(out, inFT, FFT::backward); } - + static void FreePropagator(const Field &in, Field &out, RealD m) { Field momKernel(in._grid); - + MomentumSpacePropagator(momKernel, m); FreePropagator(in, out, momKernel); } - + }; + + #define USE_FFT_ACCELERATION + + template class ScalarAdjMatrixImplTypes { public: typedef S Simd; typedef QCD::SU Group; - + template using iImplField = iScalar>>; template @@ -103,24 +107,119 @@ class ScalarImplTypes { typedef iImplField SiteField; typedef SiteField SitePropagator; typedef iImplComplex SiteComplex; - + typedef Lattice Field; typedef Lattice ComplexField; typedef Field FermionField; typedef Field PropagatorField; + + static void MomentaSquare(ComplexField& out){ + GridBase *grid = out._grid; + const std::vector &l = grid->FullDimensions(); + ComplexField kmu(grid); + + for(int mu = 0; mu < grid->Nd(); mu++) + { + Real twoPiL = M_PI*2.0/l[mu]; + LatticeCoordinate(kmu,mu); + kmu = 2.0*sin(0.5*twoPiL*kmu); + out += kmu*kmu; + } + } + + static void MomentumSpacePropagator(ComplexField &out, RealD m) + { + GridBase *grid = out._grid; + ComplexField one(grid); one = Complex(1.0,0.0); + out = m*m; + MomentaSquare(out); + out = one/out; + } + + static inline void generate_momenta(Field& P, GridParallelRNG& pRNG) { + #ifndef USE_FFT_ACCELERATION Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P); + #else + + Field Ptmp(P._grid), Pp(P._grid); + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Ptmp); + // if we change the mass I need a renormalization here + // transform and multiply by (M*M+p*p)^-1 + GridCartesian *Grid = dynamic_cast(P._grid); + FFT theFFT(Grid); + ComplexField p2(Grid); + RealD M = 1.0; + p2= zero; + + theFFT.FFT_all_dim(Pp,Ptmp,FFT::forward); + MomentaSquare(p2); + p2 += M*M; + p2 = sqrt(p2); + Pp *= p2; + theFFT.FFT_all_dim(P,Pp,FFT::backward); + + #endif //USE_FFT_ACCELERATION } static inline Field projectForce(Field& P) {return P;} static inline void update_field(Field& P, Field& U, double ep) { + #ifndef USE_FFT_ACCELERATION U += P*ep; + #else + // Here we can eventually add the Fourier acceleration + // FFT transform P(x) -> P(p) + // divide by (M^2+p^2) M external parameter (how to pass?) + // P'(p) = P(p)/(M^2+p^2) + // Transform back -> P'(x) + // U += P'(x)*ep + + // the dynamic cast is safe + GridCartesian *Grid = dynamic_cast(U._grid); + FFT theFFT(Grid); + Field Pp(Grid), Pnew(Grid); + std::vector full_dim = Grid->FullDimensions(); + + theFFT.FFT_all_dim(Pp,P,FFT::forward); + RealD M = 1.0; + static bool first_call = true; + static ComplexField p2(Grid); + if (first_call){ + MomentumSpacePropagator(p2,M); + first_call = false; + } + Pp *= p2; + theFFT.FFT_all_dim(Pnew,Pp,FFT::backward); + U += Pnew * ep; + + #endif //USE_FFT_ACCELERATION } - static inline RealD FieldSquareNorm(Field& U) { + static inline RealD FieldSquareNorm(Field &U) + { + #ifndef USE_FFT_ACCELERATION return (TensorRemove(sum(trace(U*U))).real()); + #else + // In case of Fourier acceleration we have to: + // compute U(p)*U(p)/(M^2+p^2)) Parseval theorem + // 1 FFT needed U(x) -> U(p) + // M to be passed + + GridCartesian *Grid = dynamic_cast(U._grid); + FFT theFFT(Grid); + Field Up(Grid), Utilde(Grid); + std::vector full_dim = Grid->FullDimensions(); + + theFFT.FFT_all_dim(Up, U, FFT::forward); + RealD M = 1.0; + ComplexField p2(Grid); + MomentumSpacePropagator(p2,M); + Field Up2 = Up*p2; + // from the definition of the DFT we need to divide by the volume + return (-TensorRemove(sum(trace(adj(Up)*Up2))).real()/U._grid->gSites()); + #endif //USE_FFT_ACCELERATION } static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { @@ -146,7 +245,7 @@ class ScalarImplTypes { typedef ScalarImplTypes ScalarImplCR; typedef ScalarImplTypes ScalarImplCF; typedef ScalarImplTypes ScalarImplCD; - + // Hardcoding here the size of the matrices typedef ScalarAdjMatrixImplTypes ScalarAdjImplR; typedef ScalarAdjMatrixImplTypes ScalarAdjImplF; @@ -155,7 +254,7 @@ class ScalarImplTypes { template using ScalarNxNAdjImplR = ScalarAdjMatrixImplTypes; template using ScalarNxNAdjImplF = ScalarAdjMatrixImplTypes; template using ScalarNxNAdjImplD = ScalarAdjMatrixImplTypes; - + //} } From b542d349b8784fdd47339977e94575a7fdef5a58 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 15 Sep 2017 11:48:36 +0100 Subject: [PATCH 111/377] Minor cosmetic changes --- lib/qcd/action/scalar/ScalarImpl.h | 137 ++++++++++++++--------------- 1 file changed, 68 insertions(+), 69 deletions(-) diff --git a/lib/qcd/action/scalar/ScalarImpl.h b/lib/qcd/action/scalar/ScalarImpl.h index 3755d0ee..3dd3cc70 100644 --- a/lib/qcd/action/scalar/ScalarImpl.h +++ b/lib/qcd/action/scalar/ScalarImpl.h @@ -91,6 +91,9 @@ class ScalarImplTypes { #define USE_FFT_ACCELERATION + #ifdef USE_FFT_ACCELERATION + #define FFT_MASS 0.707 + #endif template @@ -113,113 +116,109 @@ class ScalarImplTypes { typedef Field FermionField; typedef Field PropagatorField; + static void MomentaSquare(ComplexField &out) + { + GridBase *grid = out._grid; + const std::vector &l = grid->FullDimensions(); + ComplexField kmu(grid); - static void MomentaSquare(ComplexField& out){ - GridBase *grid = out._grid; - const std::vector &l = grid->FullDimensions(); - ComplexField kmu(grid); - - for(int mu = 0; mu < grid->Nd(); mu++) + for (int mu = 0; mu < grid->Nd(); mu++) { - Real twoPiL = M_PI*2.0/l[mu]; - LatticeCoordinate(kmu,mu); - kmu = 2.0*sin(0.5*twoPiL*kmu); - out += kmu*kmu; + Real twoPiL = M_PI * 2.0 / l[mu]; + LatticeCoordinate(kmu, mu); + kmu = 2.0 * sin(0.5 * twoPiL * kmu); + out += kmu * kmu; } } static void MomentumSpacePropagator(ComplexField &out, RealD m) { - GridBase *grid = out._grid; - ComplexField one(grid); one = Complex(1.0,0.0); - out = m*m; + GridBase *grid = out._grid; + ComplexField one(grid); + one = Complex(1.0, 0.0); + out = m * m; MomentaSquare(out); - out = one/out; + out = one / out; } - - static inline void generate_momenta(Field& P, GridParallelRNG& pRNG) { - #ifndef USE_FFT_ACCELERATION + static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) + { +#ifndef USE_FFT_ACCELERATION Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P); - #else - - Field Ptmp(P._grid), Pp(P._grid); - Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Ptmp); - // if we change the mass I need a renormalization here - // transform and multiply by (M*M+p*p)^-1 - GridCartesian *Grid = dynamic_cast(P._grid); - FFT theFFT(Grid); - ComplexField p2(Grid); - RealD M = 1.0; - p2= zero; +#else - theFFT.FFT_all_dim(Pp,Ptmp,FFT::forward); + Field Pgaussian(P._grid), Pp(P._grid); + ComplexField p2(P._grid); p2 = zero; + RealD M = FFT_MASS; + + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pgaussian); + + FFT theFFT((GridCartesian*)P._grid); + theFFT.FFT_all_dim(Pp, Pgaussian, FFT::forward); MomentaSquare(p2); - p2 += M*M; + p2 += M * M; p2 = sqrt(p2); Pp *= p2; - theFFT.FFT_all_dim(P,Pp,FFT::backward); - - #endif //USE_FFT_ACCELERATION + theFFT.FFT_all_dim(P, Pp, FFT::backward); + +#endif //USE_FFT_ACCELERATION } static inline Field projectForce(Field& P) {return P;} - static inline void update_field(Field& P, Field& U, double ep) { - #ifndef USE_FFT_ACCELERATION - U += P*ep; - #else - // Here we can eventually add the Fourier acceleration + static inline void update_field(Field &P, Field &U, double ep) + { +#ifndef USE_FFT_ACCELERATION + U += P * ep; +#else // FFT transform P(x) -> P(p) // divide by (M^2+p^2) M external parameter (how to pass?) // P'(p) = P(p)/(M^2+p^2) // Transform back -> P'(x) // U += P'(x)*ep - - // the dynamic cast is safe - GridCartesian *Grid = dynamic_cast(U._grid); - FFT theFFT(Grid); - Field Pp(Grid), Pnew(Grid); - std::vector full_dim = Grid->FullDimensions(); - theFFT.FFT_all_dim(Pp,P,FFT::forward); - RealD M = 1.0; + Field Pp(U._grid), P_FFT(U._grid); + static ComplexField p2(U._grid); + RealD M = FFT_MASS; + + FFT theFFT((GridCartesian*)U._grid); + theFFT.FFT_all_dim(Pp, P, FFT::forward); + static bool first_call = true; - static ComplexField p2(Grid); - if (first_call){ - MomentumSpacePropagator(p2,M); - first_call = false; + if (first_call) + { + // avoid recomputing + MomentumSpacePropagator(p2, M); + first_call = false; } Pp *= p2; - theFFT.FFT_all_dim(Pnew,Pp,FFT::backward); - U += Pnew * ep; - - #endif //USE_FFT_ACCELERATION + theFFT.FFT_all_dim(P_FFT, Pp, FFT::backward); + U += P_FFT * ep; + +#endif //USE_FFT_ACCELERATION } static inline RealD FieldSquareNorm(Field &U) { - #ifndef USE_FFT_ACCELERATION - return (TensorRemove(sum(trace(U*U))).real()); - #else +#ifndef USE_FFT_ACCELERATION + return (TensorRemove(sum(trace(U * U))).real()); +#else // In case of Fourier acceleration we have to: // compute U(p)*U(p)/(M^2+p^2)) Parseval theorem // 1 FFT needed U(x) -> U(p) // M to be passed - - GridCartesian *Grid = dynamic_cast(U._grid); - FFT theFFT(Grid); - Field Up(Grid), Utilde(Grid); - std::vector full_dim = Grid->FullDimensions(); - + + FFT theFFT((GridCartesian*)U._grid); + Field Up(U._grid); + theFFT.FFT_all_dim(Up, U, FFT::forward); - RealD M = 1.0; - ComplexField p2(Grid); - MomentumSpacePropagator(p2,M); - Field Up2 = Up*p2; + RealD M = FFT_MASS; + ComplexField p2(U._grid); + MomentumSpacePropagator(p2, M); + Field Up2 = Up * p2; // from the definition of the DFT we need to divide by the volume - return (-TensorRemove(sum(trace(adj(Up)*Up2))).real()/U._grid->gSites()); - #endif //USE_FFT_ACCELERATION + return (-TensorRemove(sum(trace(adj(Up) * Up2))).real() / U._grid->gSites()); +#endif //USE_FFT_ACCELERATION } static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { From 5918769f9719491d357c29f4ac18bf5be0b1e3d1 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sat, 16 Sep 2017 12:51:26 +0100 Subject: [PATCH 112/377] Subtle Naik term bug updated in Stencil; less on logical && with a function call on right --- lib/stencil/Stencil.h | 16 +++-- tests/solver/Test_staggered_cg_prec.cc | 87 ++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 6 deletions(-) create mode 100644 tests/solver/Test_staggered_cg_prec.cc diff --git a/lib/stencil/Stencil.h b/lib/stencil/Stencil.h index cd0792d5..887d8a7c 100644 --- a/lib/stencil/Stencil.h +++ b/lib/stencil/Stencil.h @@ -400,11 +400,13 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal if ( sshift[0] == sshift[1] ) { if (splice_dim) { splicetime-=usecond(); - same_node = same_node && GatherSimd(source,dimension,shift,0x3,compress,face_idx); + auto tmp = GatherSimd(source,dimension,shift,0x3,compress,face_idx); + same_node = same_node && tmp; splicetime+=usecond(); } else { nosplicetime-=usecond(); - same_node = same_node && Gather(source,dimension,shift,0x3,compress,face_idx); + auto tmp = Gather(source,dimension,shift,0x3,compress,face_idx); + same_node = same_node && tmp; nosplicetime+=usecond(); } } else { @@ -412,13 +414,15 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal splicetime-=usecond(); // if checkerboard is unfavourable take two passes // both with block stride loop iteration - same_node = same_node && GatherSimd(source,dimension,shift,0x1,compress,face_idx); - same_node = same_node && GatherSimd(source,dimension,shift,0x2,compress,face_idx); + auto tmp1 = GatherSimd(source,dimension,shift,0x1,compress,face_idx); + auto tmp2 = GatherSimd(source,dimension,shift,0x2,compress,face_idx); + same_node = same_node && tmp1 && tmp2; splicetime+=usecond(); } else { nosplicetime-=usecond(); - same_node = same_node && Gather(source,dimension,shift,0x1,compress,face_idx); - same_node = same_node && Gather(source,dimension,shift,0x2,compress,face_idx); + auto tmp1 = Gather(source,dimension,shift,0x1,compress,face_idx); + auto tmp2 = Gather(source,dimension,shift,0x2,compress,face_idx); + same_node = same_node && tmp1 && tmp2; nosplicetime+=usecond(); } } diff --git a/tests/solver/Test_staggered_cg_prec.cc b/tests/solver/Test_staggered_cg_prec.cc new file mode 100644 index 00000000..66f11d3d --- /dev/null +++ b/tests/solver/Test_staggered_cg_prec.cc @@ -0,0 +1,87 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_wilson_cg_unprec.cc + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + +int main (int argc, char ** argv) +{ + typedef typename ImprovedStaggeredFermionR::FermionField FermionField; + typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField; + typename ImprovedStaggeredFermionR::ImplParams params; + + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + + std::vector seeds({1,2,3,4}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); + + FermionField src(&Grid); random(pRNG,src); + RealD nrm = norm2(src); + LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + + double volume=1; + for(int mu=0;mu HermOpEO(Ds); + ConjugateGradient CG(1.0e-8,10000); + CG(HermOpEO,src_o,res_o); + + Grid_finalize(); +} From 999c62359046674117c0e0e1348072e002622c15 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 18 Sep 2017 14:39:04 +0100 Subject: [PATCH 113/377] Solving a memory leak in Communicator_mpi --- lib/cartesian/Cartesian_base.h | 3 + lib/cartesian/Cartesian_full.h | 3 + lib/communicator/Communicator_base.h | 1 + lib/communicator/Communicator_mpi.cc | 7 + .../action/scalar/ScalarInteractionAction.h | 220 ++++++++++-------- 5 files changed, 135 insertions(+), 99 deletions(-) diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index f4f9a269..0c67e951 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -50,6 +50,9 @@ public: GridBase(const std::vector & processor_grid) : CartesianCommunicator(processor_grid) {}; + virtual ~GridBase() = default; + + // Physics Grid information. std::vector _simd_layout;// Which dimensions get relayed out over simd lanes. std::vector _fdimensions;// (full) Global dimensions of array prior to cb removal diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index 815e3b22..62481bb8 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -93,6 +93,7 @@ public: // Use a reduced simd grid _ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions + //std::cout << _ldimensions[d] << " " << _gdimensions[d] << " " << _processors[d] << std::endl; assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition @@ -137,6 +138,8 @@ public: block = block * _rdimensions[d]; } }; + + virtual ~GridCartesian() = default; }; } #endif diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index ac866ced..ada017b0 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -152,6 +152,7 @@ class CartesianCommunicator { // Constructor of any given grid //////////////////////////////////////////////// CartesianCommunicator(const std::vector &pdimensions_in); + virtual ~CartesianCommunicator(); //////////////////////////////////////////////////////////////////////////////////////// // Wraps MPI_Cart routines, or implements equivalent on other impls diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index bd2a62fb..a3427b00 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -75,6 +75,13 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) assert(Size==_Nprocessors); } + +CartesianCommunicator::~CartesianCommunicator(){ + if (communicator && !MPI::Is_finalized()) + MPI_Comm_free(&communicator); +} + + void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/qcd/action/scalar/ScalarInteractionAction.h b/lib/qcd/action/scalar/ScalarInteractionAction.h index 4d189352..a681b62c 100644 --- a/lib/qcd/action/scalar/ScalarInteractionAction.h +++ b/lib/qcd/action/scalar/ScalarInteractionAction.h @@ -30,119 +30,141 @@ directory #ifndef SCALAR_INT_ACTION_H #define SCALAR_INT_ACTION_H - // Note: this action can completely absorb the ScalarAction for real float fields // use the scalarObjs to generalise the structure -namespace Grid { - // FIXME drop the QCD namespace everywhere here +namespace Grid +{ +// FIXME drop the QCD namespace everywhere here - template - class ScalarInteractionAction : public QCD::Action { - public: - INHERIT_FIELD_TYPES(Impl); - private: - RealD mass_square; - RealD lambda; +template +class ScalarInteractionAction : public QCD::Action +{ +public: + INHERIT_FIELD_TYPES(Impl); +private: + RealD mass_square; + RealD lambda; - typedef typename Field::vector_object vobj; - typedef CartesianStencil Stencil; + typedef typename Field::vector_object vobj; + typedef CartesianStencil Stencil; - SimpleCompressor compressor; - int npoint = 2*Ndim; - std::vector directions;// = {0,1,2,3,0,1,2,3}; // forcing 4 dimensions - std::vector displacements;// = {1,1,1,1, -1,-1,-1,-1}; + SimpleCompressor compressor; + int npoint = 2 * Ndim; + std::vector directions; // = {0,1,2,3,0,1,2,3}; // forcing 4 dimensions + std::vector displacements; // = {1,1,1,1, -1,-1,-1,-1}; - - public: - - ScalarInteractionAction(RealD ms, RealD l) : mass_square(ms), lambda(l), displacements(2*Ndim,0), directions(2*Ndim,0){ - for (int mu = 0 ; mu < Ndim; mu++){ - directions[mu] = mu; directions[mu+Ndim] = mu; - displacements[mu] = 1; displacements[mu+Ndim] = -1; - } +public: + ScalarInteractionAction(RealD ms, RealD l) : mass_square(ms), lambda(l), displacements(2 * Ndim, 0), directions(2 * Ndim, 0) + { + for (int mu = 0; mu < Ndim; mu++) + { + directions[mu] = mu; + directions[mu + Ndim] = mu; + displacements[mu] = 1; + displacements[mu + Ndim] = -1; } + } - virtual std::string LogParameters() { - std::stringstream sstream; - sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl; - sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl; - return sstream.str(); - } + virtual std::string LogParameters() + { + std::stringstream sstream; + sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl; + sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl; + return sstream.str(); + } - virtual std::string action_name() {return "ScalarAction";} + virtual std::string action_name() { return "ScalarAction"; } - virtual void refresh(const Field &U, GridParallelRNG &pRNG) {} + virtual void refresh(const Field &U, GridParallelRNG &pRNG) {} - virtual RealD S(const Field &p) { - assert(p._grid->Nd() == Ndim); - static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); - phiStencil.HaloExchange(p, compressor); - Field action(p._grid), pshift(p._grid), phisquared(p._grid); - phisquared = p*p; - action = (2.0*Ndim + mass_square)*phisquared - lambda/24.*phisquared*phisquared; - for (int mu = 0; mu < Ndim; mu++) { - // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils - parallel_for (int i = 0; i < p._grid->oSites(); i++) { - int permute_type; - StencilEntry *SE; - vobj temp2; - const vobj *temp, *t_p; - - SE = phiStencil.GetEntry(permute_type, mu, i); - t_p = &p._odata[i]; - if ( SE->_is_local ) { - temp = &p._odata[SE->_offset]; - if ( SE->_permute ) { - permute(temp2, *temp, permute_type); - action._odata[i] -= temp2*(*t_p) + (*t_p)*temp2; - } else { - action._odata[i] -= (*temp)*(*t_p) + (*t_p)*(*temp); - } - } else { - action._odata[i] -= phiStencil.CommBuf()[SE->_offset]*(*t_p) + (*t_p)*phiStencil.CommBuf()[SE->_offset]; - } - } - // action -= pshift*p + p*pshift; - } - // NB the trace in the algebra is normalised to 1/2 - // minus sign coming from the antihermitian fields - return -(TensorRemove(sum(trace(action)))).real(); - }; - - virtual void deriv(const Field &p, Field &force) { - assert(p._grid->Nd() == Ndim); - force = (2.0*Ndim + mass_square)*p - lambda/12.*p*p*p; - // move this outside - static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); - phiStencil.HaloExchange(p, compressor); - - //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); - for (int point = 0; point < npoint; point++) { - parallel_for (int i = 0; i < p._grid->oSites(); i++) { - const vobj *temp; - vobj temp2; - int permute_type; - StencilEntry *SE; - SE = phiStencil.GetEntry(permute_type, point, i); - - if ( SE->_is_local ) { - temp = &p._odata[SE->_offset]; - if ( SE->_permute ) { - permute(temp2, *temp, permute_type); - force._odata[i] -= temp2; - } else { - force._odata[i] -= *temp; - } - } else { - force._odata[i] -= phiStencil.CommBuf()[SE->_offset]; - } - } + virtual RealD S(const Field &p) + { + assert(p._grid->Nd() == Ndim); + static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); + phiStencil.HaloExchange(p, compressor); + Field action(p._grid), pshift(p._grid), phisquared(p._grid); + phisquared = p * p; + action = (2.0 * Ndim + mass_square) * phisquared - lambda / 24. * phisquared * phisquared; + for (int mu = 0; mu < Ndim; mu++) + { + // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils + parallel_for(int i = 0; i < p._grid->oSites(); i++) + { + int permute_type; + StencilEntry *SE; + vobj temp2; + const vobj *temp, *t_p; + + SE = phiStencil.GetEntry(permute_type, mu, i); + t_p = &p._odata[i]; + if (SE->_is_local) + { + temp = &p._odata[SE->_offset]; + if (SE->_permute) + { + permute(temp2, *temp, permute_type); + action._odata[i] -= temp2 * (*t_p) + (*t_p) * temp2; + } + else + { + action._odata[i] -= (*temp) * (*t_p) + (*t_p) * (*temp); + } + } + else + { + action._odata[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset]; + } } + // action -= pshift*p + p*pshift; } + // NB the trace in the algebra is normalised to 1/2 + // minus sign coming from the antihermitian fields + return -(TensorRemove(sum(trace(action)))).real(); }; - -} // namespace Grid -#endif // SCALAR_INT_ACTION_H + virtual void deriv(const Field &p, Field &force) + { + assert(p._grid->Nd() == Ndim); + force = (2.0 * Ndim + mass_square) * p - lambda / 12. * p * p * p; + // move this outside + static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); + phiStencil.HaloExchange(p, compressor); + + //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); + for (int point = 0; point < npoint; point++) + { + parallel_for(int i = 0; i < p._grid->oSites(); i++) + { + const vobj *temp; + vobj temp2; + int permute_type; + StencilEntry *SE; + SE = phiStencil.GetEntry(permute_type, point, i); + + if (SE->_is_local) + { + temp = &p._odata[SE->_offset]; + if (SE->_permute) + { + permute(temp2, *temp, permute_type); + force._odata[i] -= temp2; + } + else + { + force._odata[i] -= *temp; + } + } + else + { + force._odata[i] -= phiStencil.CommBuf()[SE->_offset]; + } + } + } + } +}; + +} // namespace Grid + +#endif // SCALAR_INT_ACTION_H From 9a827d0242f7164a4bc02c5b8cefe606878fcb84 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 18 Sep 2017 14:55:51 +0100 Subject: [PATCH 114/377] Fixing a compilation error --- lib/communicator/Communicator_mpi3.cc | 3 +++ lib/communicator/Communicator_mpi3_leader.cc | 3 +++ lib/communicator/Communicator_mpit.cc | 3 +++ lib/communicator/Communicator_none.cc | 2 ++ lib/communicator/Communicator_shmem.cc | 3 +++ 5 files changed, 14 insertions(+) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 44aa1024..bb256e79 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -587,6 +587,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) } } }; + +CartesianCommunicator::~CartesianCommunicator() = default; + void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_mpi3_leader.cc b/lib/communicator/Communicator_mpi3_leader.cc index 6e26bd3e..da863508 100644 --- a/lib/communicator/Communicator_mpi3_leader.cc +++ b/lib/communicator/Communicator_mpi3_leader.cc @@ -830,6 +830,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); }; +CartesianCommunicator::~CartesianCommunicator() = default; + + void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index eb6ef87d..7b7ec14c 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -80,6 +80,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) assert(Size==_Nprocessors); } + +CartesianCommunicator::~CartesianCommunicator() = default; + void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index 5319ab93..a4e6cf54 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -53,6 +53,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) } } +CartesianCommunicator::~CartesianCommunicator() = default; + void CartesianCommunicator::GlobalSum(float &){} void CartesianCommunicator::GlobalSumVector(float *,int N){} void CartesianCommunicator::GlobalSum(double &){} diff --git a/lib/communicator/Communicator_shmem.cc b/lib/communicator/Communicator_shmem.cc index 3c76c808..826471c4 100644 --- a/lib/communicator/Communicator_shmem.cc +++ b/lib/communicator/Communicator_shmem.cc @@ -98,6 +98,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) assert(Size==_Nprocessors); } +CartesianCommunicator::~CartesianCommunicator() = default; + + void CartesianCommunicator::GlobalSum(uint32_t &u){ static long long source ; static long long dest ; From a9ec5cf564aafd737a9fbf224ae6ebf43b2ef780 Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 21 Sep 2017 10:32:41 +0100 Subject: [PATCH 115/377] Christoph bug report integrate --- lib/tensors/Tensor_index.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/tensors/Tensor_index.h b/lib/tensors/Tensor_index.h index f114baf8..500f6c8d 100644 --- a/lib/tensors/Tensor_index.h +++ b/lib/tensors/Tensor_index.h @@ -175,7 +175,7 @@ class TensorIndexRecursion { } } template inline static - void pokeIndex(iVector &ret, const iVector::peekIndex(ret._internal[0],0)),N> &arg, int i,int j) + void pokeIndex(iVector &ret, const iVector::peekIndex(ret._internal[0],0,0)),N> &arg, int i,int j) { for(int ii=0;ii::pokeIndex(ret._internal[ii],arg._internal[ii],i,j); @@ -191,7 +191,7 @@ class TensorIndexRecursion { }} } template inline static - void pokeIndex(iMatrix &ret, const iMatrix::peekIndex(ret._internal[0][0],0)),N> &arg, int i,int j) + void pokeIndex(iMatrix &ret, const iMatrix::peekIndex(ret._internal[0][0],0,0)),N> &arg, int i,int j) { for(int ii=0;ii Date: Thu, 21 Sep 2017 11:10:08 +0100 Subject: [PATCH 116/377] Bug fix with spreadout FFT --- lib/algorithms/FFT.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/algorithms/FFT.h b/lib/algorithms/FFT.h index 240f338b..ec558ad9 100644 --- a/lib/algorithms/FFT.h +++ b/lib/algorithms/FFT.h @@ -230,6 +230,7 @@ namespace Grid { // Barrel shift and collect global pencil std::vector lcoor(Nd), gcoor(Nd); result = source; + int pc = processor_coor[dim]; for(int p=0;plSites();idx++) { sgrid->LocalIndexToLocalCoor(idx,cbuf); peekLocalSite(s,result,cbuf); - cbuf[dim]+=p*L; + cbuf[dim]+=((pc+p) % processors[dim])*L; + // cbuf[dim]+=p*L; pokeLocalSite(s,pgbuf,cbuf); } } @@ -278,7 +280,6 @@ namespace Grid { flops+= flops_call*NN; // writing out result - int pc = processor_coor[dim]; PARALLEL_REGION { std::vector clbuf(Nd), cgbuf(Nd); From df21668f2c6d25b2c8c79e353514956517ed7682 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 22 Sep 2017 14:21:18 +0100 Subject: [PATCH 117/377] memory profiler update --- lib/allocator/AlignedAllocator.cc | 4 +- lib/allocator/AlignedAllocator.h | 72 +++++++++++++++++++++++++++---- 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/lib/allocator/AlignedAllocator.cc b/lib/allocator/AlignedAllocator.cc index 967b2571..944e287f 100644 --- a/lib/allocator/AlignedAllocator.cc +++ b/lib/allocator/AlignedAllocator.cc @@ -3,9 +3,11 @@ namespace Grid { +MemoryStats *MemoryProfiler::stats = nullptr; + int PointerCache::victim; - PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache]; +PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache]; void *PointerCache::Insert(void *ptr,size_t bytes) { diff --git a/lib/allocator/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h index e64a5949..68fad0d2 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/lib/allocator/AlignedAllocator.h @@ -63,6 +63,18 @@ namespace Grid { static void *Lookup(size_t bytes) ; }; + + struct MemoryStats + { + size_t totalAllocated{0}, maxAllocated{0}, + currentlyAllocated{0}, totalFreed{0}; + }; + + class MemoryProfiler + { + public: + static MemoryStats *stats; + }; void check_huge_pages(void *Buf,uint64_t BYTES); @@ -93,6 +105,13 @@ public: { size_type bytes = __n*sizeof(_Tp); + if (auto s = MemoryProfiler::stats) + { + s->totalAllocated += bytes; + s->currentlyAllocated += bytes; + s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); + } + _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); // if ( ptr != NULL ) // std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <totalFreed += bytes; + s->currentlyAllocated -= bytes; + } + pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); #ifdef HAVE_MM_MALLOC_H @@ -172,10 +197,18 @@ public: #ifdef GRID_COMMS_SHMEM pointer allocate(size_type __n, const void* _p= 0) { + size_type bytes = __n*sizeof(_Tp); + + if (auto s = MemoryProfiler::stats) + { + s->totalAllocated += bytes; + s->currentlyAllocated += bytes; + s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); + } #ifdef CRAY - _Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64); + _Tp *ptr = (_Tp *) shmem_align(bytes,64); #else - _Tp *ptr = (_Tp *) shmem_align(64,__n*sizeof(_Tp)); + _Tp *ptr = (_Tp *) shmem_align(64,bytes); #endif #ifdef PARANOID_SYMMETRIC_HEAP static void * bcast; @@ -193,18 +226,32 @@ public: #endif return ptr; } - void deallocate(pointer __p, size_type) { + void deallocate(pointer __p, size_type __n) { + size_type bytes = __n*sizeof(_Tp); + + if (auto s = MemoryProfiler::stats) + { + s->totalFreed += bytes; + s->currentlyAllocated -= bytes; + } shmem_free((void *)__p); } #else pointer allocate(size_type __n, const void* _p= 0) { -#ifdef HAVE_MM_MALLOC_H - _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),GRID_ALLOC_ALIGN); -#else - _Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,__n*sizeof(_Tp)); -#endif size_type bytes = __n*sizeof(_Tp); + + if (auto s = MemoryProfiler::stats) + { + s->totalAllocated += bytes; + s->currentlyAllocated += bytes; + s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); + } +#ifdef HAVE_MM_MALLOC_H + _Tp * ptr = (_Tp *) _mm_malloc(bytes, GRID_ALLOC_ALIGN); +#else + _Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN, bytes); +#endif uint8_t *cp = (uint8_t *)ptr; // One touch per 4k page, static OMP loop to catch same loop order #pragma omp parallel for schedule(static) @@ -213,7 +260,14 @@ public: } return ptr; } - void deallocate(pointer __p, size_type) { + void deallocate(pointer __p, size_type __n) { + size_type bytes = __n*sizeof(_Tp); + + if (auto s = MemoryProfiler::stats) + { + s->totalFreed += bytes; + s->currentlyAllocated -= bytes; + } #ifdef HAVE_MM_MALLOC_H _mm_free((void *)__p); #else From 56478d63a5589e0ece63d7394b3c5167d5ef11bd Mon Sep 17 00:00:00 2001 From: David Preti Date: Sun, 24 Sep 2017 19:32:15 +0200 Subject: [PATCH 118/377] clover + test (valence) --- lib/qcd/action/fermion/WilsonCloverFermion.cc | 126 +++++++++++++++--- lib/qcd/action/fermion/WilsonCloverFermion.h | 35 +++-- tests/core/Test_wilson_clover.cc | 44 +++--- 3 files changed, 142 insertions(+), 63 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index f8b62ba4..54e72353 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -86,8 +86,8 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) CloverTerm += fillCloverXY(Bz); CloverTerm += fillCloverXT(Ex); CloverTerm += fillCloverYT(Ey); - CloverTerm += fillCloverZT(Ez) ; - CloverTerm *= csw; + CloverTerm += fillCloverZT(Ez); + CloverTerm *= 0.5 * csw; // FieldStrength normalization? should be ( -i/8 ). Is it the anti-symmetric combination? int lvol = _Umu._grid->lSites(); int DimRep = Impl::Dimension; @@ -109,7 +109,7 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) for (int a = 0; a < DimRep; a++) for (int b = 0; b < DimRep; b++) EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b); - //std::cout << EigenCloverOp << std::endl; + // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; EigenInvCloverOp = EigenCloverOp.inverse(); @@ -119,6 +119,7 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) for (int a = 0; a < DimRep; a++) for (int b = 0; b < DimRep; b++) Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); + // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; pokeLocalSite(Qxinv, CloverTermInv, lcoor); } @@ -127,8 +128,17 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) pickCheckerboard(Even, CloverTermEven, CloverTerm); pickCheckerboard( Odd, CloverTermOdd, CloverTerm); + + pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm)); + pickCheckerboard( Odd, CloverTermDagOdd, adj(CloverTerm)); + + pickCheckerboard(Even, CloverTermInvEven, CloverTermInv); pickCheckerboard( Odd, CloverTermInvOdd, CloverTermInv); + + + pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); + pickCheckerboard( Odd, CloverTermInvDagOdd, adj(CloverTermInv)); } @@ -142,7 +152,7 @@ void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) template void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out) { - this->MooeeInternal(in, out, DaggerNo, InverseYes); + this->MooeeInternal(in, out, DaggerYes, InverseNo); } template @@ -154,7 +164,7 @@ void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &o template void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out) { - this->MooeeInternal(in, out, DaggerNo, InverseYes); + this->MooeeInternal(in, out, DaggerYes, InverseYes); } template @@ -164,26 +174,98 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie CloverFieldType *Clover; assert(in.checkerboard == Odd || in.checkerboard == Even); - if (in._grid->_isCheckerBoarded) - { - if (in.checkerboard == Odd) - { - std::cout << "Calling clover term Odd" << std::endl; - Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd; + + + + if (dag){ + if (in._grid->_isCheckerBoarded){ + if (in.checkerboard == Odd){ + std::cout << "Calling clover term adj Odd" << std::endl; + Clover = (inv) ? &CloverTermInvDagOdd : &CloverTermDagOdd; + +/* test + int DimRep = Impl::Dimension; + Eigen::MatrixXcd A = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + std::vector lcoor; + typename SiteCloverType::scalar_object Qx2 = zero; + GridBase *grid = in._grid; + int site = 0 ; + grid->LocalIndexToLocalCoor(site, lcoor); + peekLocalSite(Qx2, *Clover, lcoor); + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++) + A(a + j * DimRep, b + k * DimRep) = Qx2()(j, k)(a, b); + std::cout << "adj Odd =" << site << "\n" << A << std::endl; + end test */ + + + + } else { + std::cout << "Calling clover term adj Even" << std::endl; + Clover = (inv) ? &CloverTermInvDagEven : &CloverTermDagEven; + +/* test + int DimRep = Impl::Dimension; + Eigen::MatrixXcd A = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + std::vector lcoor; + typename SiteCloverType::scalar_object Qx2 = zero; + GridBase *grid = in._grid; + int site = 0 ; + grid->LocalIndexToLocalCoor(site, lcoor); + peekLocalSite(Qx2, *Clover, lcoor); + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++) + A(a + j * DimRep, b + k * DimRep) = Qx2()(j, k)(a, b); + std::cout << "adj Odd =" << site << "\n" << A << std::endl; + end test */ + + } - else - { - std::cout << "Calling clover term Even" << std::endl; - Clover = (inv) ? &CloverTermInvEven : &CloverTermEven; - } - } - else - { - Clover = (inv) ? &CloverTermInv : &CloverTerm; + std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; + out = *Clover * in; + } else { + Clover = (inv) ? &CloverTermInv : &CloverTerm; + out = adj(*Clover) * in; } - std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; - if (dag){ out = adj(*Clover) * in;} else { out = *Clover * in;} + + + + } else { + if (in._grid->_isCheckerBoarded){ + + if (in.checkerboard == Odd){ + std::cout << "Calling clover term Odd" << std::endl; + Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd; + } else { + std::cout << "Calling clover term Even" << std::endl; + Clover = (inv) ? &CloverTermInvEven : &CloverTermEven; + } + out = *Clover * in; + std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; + } else { + Clover = (inv) ? &CloverTermInv : &CloverTerm; + out = *Clover * in; + } + } + + + + + + + +/* + } else { + out = *Clover * in; + } + */ + + } // MooeeInternal // Derivative parts diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index fd9d1f60..e8654513 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -63,7 +63,11 @@ public: CloverTermEven(&Hgrid), CloverTermOdd(&Hgrid), CloverTermInvEven(&Hgrid), - CloverTermInvOdd(&Hgrid) + CloverTermInvOdd(&Hgrid), + CloverTermDagEven(&Hgrid), //test + CloverTermDagOdd(&Hgrid), //test + CloverTermInvDagEven(&Hgrid), //test + CloverTermInvDagOdd(&Hgrid) //test { csw = _csw; assert(Nd == 4); // require 4 dimensions @@ -91,6 +95,11 @@ private: CloverFieldType CloverTerm, CloverTermInv; // Clover term CloverFieldType CloverTermEven, CloverTermOdd; CloverFieldType CloverTermInvEven, CloverTermInvOdd; // Clover term + + CloverFieldType CloverTermInvDagEven, CloverTermInvDagOdd; //test + CloverFieldType CloverTermDagEven, CloverTermDagOdd; //test + + // eventually these two can be compressed into 6x6 blocks instead of the 12x12 // using the DeGrand-Rossi basis for the gamma matrices @@ -149,10 +158,10 @@ private: PARALLEL_FOR_LOOP for (int i = 0; i < CloverTerm._grid->oSites(); i++) { - T._odata[i]()(0, 1) = timesMinusI(F._odata[i]()()); - T._odata[i]()(1, 0) = timesMinusI(F._odata[i]()()); - T._odata[i]()(2, 3) = timesI(F._odata[i]()()); - T._odata[i]()(3, 2) = timesI(F._odata[i]()()); + T._odata[i]()(0, 1) = timesI(F._odata[i]()()); //fixed + T._odata[i]()(1, 0) = timesI(F._odata[i]()()); //fixed + T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()()); //fixed + T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()()); //fixed } return T; @@ -165,10 +174,10 @@ private: PARALLEL_FOR_LOOP for (int i = 0; i < CloverTerm._grid->oSites(); i++) { - T._odata[i]()(0, 1) = (F._odata[i]()()); - T._odata[i]()(1, 0) = -(F._odata[i]()()); - T._odata[i]()(2, 3) = -(F._odata[i]()()); - T._odata[i]()(3, 2) = (F._odata[i]()()); + T._odata[i]()(0, 1) = -(F._odata[i]()()); //fixed + T._odata[i]()(1, 0) = (F._odata[i]()()); //fixed + T._odata[i]()(2, 3) = (F._odata[i]()()); //fixed + T._odata[i]()(3, 2) = -(F._odata[i]()()); //fixed } return T; @@ -181,10 +190,10 @@ private: PARALLEL_FOR_LOOP for (int i = 0; i < CloverTerm._grid->oSites(); i++) { - T._odata[i]()(0, 0) = timesMinusI(F._odata[i]()()); - T._odata[i]()(1, 1) = timesI(F._odata[i]()()); - T._odata[i]()(2, 2) = timesI(F._odata[i]()()); - T._odata[i]()(3, 3) = timesMinusI(F._odata[i]()()); + T._odata[i]()(0, 0) = timesI(F._odata[i]()()); //fixed + T._odata[i]()(1, 1) = timesMinusI(F._odata[i]()()); //fixed + T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()()); //fixed + T._odata[i]()(3, 3) = timesI(F._odata[i]()()); //fixed } return T; diff --git a/tests/core/Test_wilson_clover.cc b/tests/core/Test_wilson_clover.cc index 1b208e2d..9e5b246e 100644 --- a/tests/core/Test_wilson_clover.cc +++ b/tests/core/Test_wilson_clover.cc @@ -172,11 +172,6 @@ int main (int argc, char ** argv) std::cout< HermOpEO(Dwc); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + Dwc.MooeeDag(chi_e,src_e); + Dwc.MooeeInv(src_e,phi_e); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + Dwc.MooeeDag(chi_o,src_o); + Dwc.MooeeInv(src_o,phi_o); - pDce = innerProduct(phi_e,dchi_e); - pDco = innerProduct(phi_o,dchi_o); - cDpe = innerProduct(chi_e,dphi_e); - cDpo = innerProduct(chi_o,dphi_o); - - std::cout< Date: Fri, 29 Sep 2017 16:09:34 +0100 Subject: [PATCH 119/377] Scalar: SU(N) action change to t'Hooft scaling --- lib/qcd/action/scalar/ScalarInteractionAction.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/qcd/action/scalar/ScalarInteractionAction.h b/lib/qcd/action/scalar/ScalarInteractionAction.h index a681b62c..9d855137 100644 --- a/lib/qcd/action/scalar/ScalarInteractionAction.h +++ b/lib/qcd/action/scalar/ScalarInteractionAction.h @@ -44,8 +44,9 @@ public: INHERIT_FIELD_TYPES(Impl); private: - RealD mass_square; - RealD lambda; + RealD mass_square; + RealD lambda; + const unsigned int N = Impl::Group::Dimension; typedef typename Field::vector_object vobj; typedef CartesianStencil Stencil; @@ -85,8 +86,8 @@ public: static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); phiStencil.HaloExchange(p, compressor); Field action(p._grid), pshift(p._grid), phisquared(p._grid); - phisquared = p * p; - action = (2.0 * Ndim + mass_square) * phisquared - lambda / 24. * phisquared * phisquared; + phisquared = p*p; + action = (2.*Ndim + mass_square) * phisquared - phisquared * phisquared; for (int mu = 0; mu < Ndim; mu++) { // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils @@ -121,13 +122,13 @@ public: } // NB the trace in the algebra is normalised to 1/2 // minus sign coming from the antihermitian fields - return -(TensorRemove(sum(trace(action)))).real(); + return -(TensorRemove(sum(trace(action)))).real()*N/lambda; }; virtual void deriv(const Field &p, Field &force) { assert(p._grid->Nd() == Ndim); - force = (2.0 * Ndim + mass_square) * p - lambda / 12. * p * p * p; + force = (2.0 * Ndim + mass_square) * p - 2. * p * p * p; // move this outside static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); phiStencil.HaloExchange(p, compressor); @@ -162,6 +163,7 @@ public: } } } + force *= N/lambda; } }; From f7072d1ac2b5552998bf49b01a47128d67d7b682 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 2 Oct 2017 07:13:40 +0100 Subject: [PATCH 120/377] Solving an annoying compilation error in json --- lib/json/json.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/json/json.hpp b/lib/json/json.hpp index e7c42920..9d589120 100644 --- a/lib/json/json.hpp +++ b/lib/json/json.hpp @@ -63,7 +63,7 @@ SOFTWARE. #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" #endif #elif defined(__GNUC__) - #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40900 + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40805 #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" #endif #endif From 073525c5b3a32e6ab4567246893c08d8da609ac9 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 2 Oct 2017 03:38:21 -0700 Subject: [PATCH 121/377] Small patch from cori --- tests/solver/Test_dwf_mrhs_cg.cc | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc index b4880148..2d2cfcb1 100644 --- a/tests/solver/Test_dwf_mrhs_cg.cc +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -128,12 +128,11 @@ int main (int argc, char ** argv) { for(int n=0;n Date: Mon, 2 Oct 2017 12:25:52 +0100 Subject: [PATCH 122/377] RB constructor change --- tests/solver/Test_staggered_cg_prec.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/solver/Test_staggered_cg_prec.cc b/tests/solver/Test_staggered_cg_prec.cc index 66f11d3d..0e359c2d 100644 --- a/tests/solver/Test_staggered_cg_prec.cc +++ b/tests/solver/Test_staggered_cg_prec.cc @@ -57,7 +57,7 @@ int main (int argc, char ** argv) std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); From 1edcf902b73b66240b293f6fae94b991117da1b7 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 2 Oct 2017 12:41:02 +0100 Subject: [PATCH 123/377] Macos ANON --- lib/communicator/Communicator_base.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index c60f6c6d..836bc01e 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -147,8 +147,13 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { } void CartesianCommunicator::ShmInitGeneric(void){ #if 1 - - int mmap_flag = MAP_SHARED | MAP_ANONYMOUS; + int mmap_flag =0; +#ifdef MAP_ANONYMOUS + mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; +#endif +#ifdef MAP_ANON + mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON; +#endif #ifdef MAP_HUGETLB if ( Hugepages ) mmap_flag |= MAP_HUGETLB; #endif From d54807b8c0cd1a7658ff8563bb00d1137b987e3e Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 2 Oct 2017 23:14:56 +0100 Subject: [PATCH 124/377] MPIT works with split grid now --- lib/communicator/Communicator_base.cc | 99 +++++++++++++++++++++++ lib/communicator/Communicator_base.h | 3 +- lib/communicator/Communicator_mpi.cc | 89 -------------------- lib/communicator/Communicator_mpit.cc | 27 ------- lib/qcd/action/fermion/CayleyFermion5D.cc | 2 - 5 files changed, 100 insertions(+), 120 deletions(-) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 836bc01e..bcf429ab 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -96,6 +96,105 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) GlobalSumVector((double *)c,2*N); } + +#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) + +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent) +{ + _ndimension = processors.size(); + assert(_ndimension = parent._ndimension); + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // split the communicator + ////////////////////////////////////////////////////////////////////////////////////////////////////// + int Nparent; + MPI_Comm_size(parent.communicator,&Nparent); + + int childsize=1; + for(int d=0;d 1 ) { + + std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< &processors, MPI_Comm communicator_base) +{ + // if ( communicator_base != communicator_world ) { + // std::cout << "Cartesian communicator created with a non-world communicator"< periodic(_ndimension,1); + MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],1,&communicator); + MPI_Comm_rank(communicator,&_processor); + MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); + + int Size; + MPI_Comm_size(communicator,&Size); + +#ifdef GRID_COMMS_MPIT + communicator_halo.resize (2*_ndimension); + for(int i=0;i<_ndimension*2;i++){ + MPI_Comm_dup(communicator,&communicator_halo[i]); + } +#endif + + assert(Size==_Nprocessors); +} + +CartesianCommunicator::CartesianCommunicator(const std::vector &processors) +{ + InitFromMPICommunicator(processors,communicator_world); +} + +#endif + #if !defined( GRID_COMMS_MPI3) int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index e8791cd8..bfdb0da1 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -157,8 +157,7 @@ class CartesianCommunicator { CartesianCommunicator(const std::vector &pdimensions_in); private: -#if defined (GRID_COMMS_MPI) - //|| defined (GRID_COMMS_MPI3) +#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) //////////////////////////////////////////////// // Private initialise from an MPI communicator // Can use after an MPI_Comm_split, but hidden from user so private diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 1a7a0c05..a55c0164 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -52,95 +52,6 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); ShmInitGeneric(); } - -CartesianCommunicator::CartesianCommunicator(const std::vector &processors) -{ - InitFromMPICommunicator(processors,communicator_world); - // std::cout << "Passed communicator world to a new communicator" < &processors,const CartesianCommunicator &parent) -{ - _ndimension = processors.size(); - assert(_ndimension = parent._ndimension); - - ////////////////////////////////////////////////////////////////////////////////////////////////////// - // split the communicator - ////////////////////////////////////////////////////////////////////////////////////////////////////// - int Nparent; - MPI_Comm_size(parent.communicator,&Nparent); - - int childsize=1; - for(int d=0;d 1 ) { - - std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< &processors, MPI_Comm communicator_base) -{ - // if ( communicator_base != communicator_world ) { - // std::cout << "Cartesian communicator created with a non-world communicator"< periodic(_ndimension,1); - MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],1,&communicator); - MPI_Comm_rank(communicator,&_processor); - MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); - - int Size; - MPI_Comm_size(communicator,&Size); - - assert(Size==_Nprocessors); -} void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index eb6ef87d..5137c27b 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -53,33 +53,6 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ShmInitGeneric(); } -CartesianCommunicator::CartesianCommunicator(const std::vector &processors) -{ - _ndimension = processors.size(); - std::vector periodic(_ndimension,1); - - _Nprocessors=1; - _processors = processors; - _processor_coor.resize(_ndimension); - - MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); - MPI_Comm_rank(communicator,&_processor); - MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); - - for(int i=0;i<_ndimension;i++){ - _Nprocessors*=_processors[i]; - } - - communicator_halo.resize (2*_ndimension); - for(int i=0;i<_ndimension*2;i++){ - MPI_Comm_dup(communicator,&communicator_halo[i]); - } - - int Size; - MPI_Comm_size(communicator,&Size); - - assert(Size==_Nprocessors); -} void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index 838b1c3d..eace6484 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -77,7 +77,6 @@ void CayleyFermion5D::DminusDag(const FermionField &psi, FermionField &chi } } - template void CayleyFermion5D::CayleyReport(void) { this->Report(); @@ -119,7 +118,6 @@ template void CayleyFermion5D::CayleyZeroCounters(void) MooeeInvTime=0; } - template void CayleyFermion5D::M5D (const FermionField &psi, FermionField &chi) { From 05c1c88440a9b00c4a35e8487ab92a27afb48aea Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 3 Oct 2017 14:26:20 +0100 Subject: [PATCH 125/377] Scalar: more action generalisation --- lib/qcd/action/scalar/ScalarInteractionAction.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/qcd/action/scalar/ScalarInteractionAction.h b/lib/qcd/action/scalar/ScalarInteractionAction.h index 9d855137..3848751d 100644 --- a/lib/qcd/action/scalar/ScalarInteractionAction.h +++ b/lib/qcd/action/scalar/ScalarInteractionAction.h @@ -46,6 +46,7 @@ public: private: RealD mass_square; RealD lambda; + RealD g; const unsigned int N = Impl::Group::Dimension; typedef typename Field::vector_object vobj; @@ -57,7 +58,7 @@ private: std::vector displacements; // = {1,1,1,1, -1,-1,-1,-1}; public: - ScalarInteractionAction(RealD ms, RealD l) : mass_square(ms), lambda(l), displacements(2 * Ndim, 0), directions(2 * Ndim, 0) + ScalarInteractionAction(RealD ms, RealD l, RealD gval) : mass_square(ms), lambda(l), g(gval), displacements(2 * Ndim, 0), directions(2 * Ndim, 0) { for (int mu = 0; mu < Ndim; mu++) { @@ -73,6 +74,7 @@ public: std::stringstream sstream; sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl; sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl; + sstream << GridLogMessage << "[ScalarAction] g : " << g << std::endl; return sstream.str(); } @@ -86,8 +88,8 @@ public: static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); phiStencil.HaloExchange(p, compressor); Field action(p._grid), pshift(p._grid), phisquared(p._grid); - phisquared = p*p; - action = (2.*Ndim + mass_square) * phisquared - phisquared * phisquared; + phisquared = p * p; + action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared; for (int mu = 0; mu < Ndim; mu++) { // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils @@ -122,13 +124,13 @@ public: } // NB the trace in the algebra is normalised to 1/2 // minus sign coming from the antihermitian fields - return -(TensorRemove(sum(trace(action)))).real()*N/lambda; + return -(TensorRemove(sum(trace(action)))).real()*N/g; }; virtual void deriv(const Field &p, Field &force) { assert(p._grid->Nd() == Ndim); - force = (2.0 * Ndim + mass_square) * p - 2. * p * p * p; + force = (2. * Ndim + mass_square) * p - 2. * lambda * p * p * p; // move this outside static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); phiStencil.HaloExchange(p, compressor); @@ -163,7 +165,7 @@ public: } } } - force *= N/lambda; + force *= N/g; } }; From 8784f2a88d780c7134574cf452d7c5550bda5769 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 3 Oct 2017 14:38:10 +0100 Subject: [PATCH 126/377] post-merge fix --- lib/communicator/Communicator_none.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index c97a181b..40feefec 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -56,8 +56,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) } } -CartesianCommunicator::~CartesianCommunicator() = default; - void CartesianCommunicator::GlobalSum(float &){} void CartesianCommunicator::GlobalSumVector(float *,int N){} void CartesianCommunicator::GlobalSum(double &){} From d38cee73bf1a9cc14bfa0e1f8aefcb2b99bdbb8d Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 3 Oct 2017 17:29:34 +0100 Subject: [PATCH 127/377] Scalar: easier Fourier acceleration parametrisation through -D flags --- lib/qcd/action/scalar/ScalarImpl.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/qcd/action/scalar/ScalarImpl.h b/lib/qcd/action/scalar/ScalarImpl.h index 3dd3cc70..650f4d17 100644 --- a/lib/qcd/action/scalar/ScalarImpl.h +++ b/lib/qcd/action/scalar/ScalarImpl.h @@ -89,13 +89,12 @@ class ScalarImplTypes { }; - - #define USE_FFT_ACCELERATION - #ifdef USE_FFT_ACCELERATION - #define FFT_MASS 0.707 + #ifdef USE_FFT_ACCELERATION + #ifndef FFT_MASS + #error "USE_FFT_ACCELERATION is defined but not FFT_MASS" #endif - - + #endif + template class ScalarAdjMatrixImplTypes { public: From 1e54882f7145bd38db8ac1681cb7d4f9bceb2297 Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Wed, 4 Oct 2017 10:51:06 +0100 Subject: [PATCH 128/377] Stagger --- tests/solver/Test_staggered_cg_prec.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/solver/Test_staggered_cg_prec.cc b/tests/solver/Test_staggered_cg_prec.cc index 66f11d3d..0a803c21 100644 --- a/tests/solver/Test_staggered_cg_prec.cc +++ b/tests/solver/Test_staggered_cg_prec.cc @@ -83,5 +83,14 @@ int main (int argc, char ** argv) ConjugateGradient CG(1.0e-8,10000); CG(HermOpEO,src_o,res_o); + FermionField tmp(&RBGrid); + + HermOpEO.Mpc(res_o,tmp); + std::cout << "check Mpc resid " << axpy_norm(tmp,-1.0,src_o,tmp)/norm2(src_o) << "\n"; + + RealD n1,n2; + HermOpEO.MpcDagMpc(res_o,tmp,n1,n2); + std::cout << "check MpcDagMpc resid " << axpy_norm(tmp,-1.0,src_o,tmp)/norm2(src_o) << "\n"; + Grid_finalize(); } From d810e8c8fb765236fadcc23d76daf9a2eb0f3144 Mon Sep 17 00:00:00 2001 From: David Preti Date: Thu, 5 Oct 2017 10:13:53 +0200 Subject: [PATCH 129/377] first attempt to write C terms in clover derivative. Some shifts to be fixed --- lib/perfmon/PerfCount.h | 3 +- lib/perfmon/Stat.cc | 30 ++++---- lib/qcd/action/fermion/WilsonCloverFermion.cc | 75 +++++++++++++++---- 3 files changed, 77 insertions(+), 31 deletions(-) diff --git a/lib/perfmon/PerfCount.h b/lib/perfmon/PerfCount.h index 73d2c70f..cb27c283 100644 --- a/lib/perfmon/PerfCount.h +++ b/lib/perfmon/PerfCount.h @@ -90,9 +90,10 @@ inline uint64_t cyclecount(void){ } #elif defined __x86_64__ inline uint64_t cyclecount(void){ - return __rdtsc(); + //return __rdtsc(); // unsigned int dummy; // return __rdtscp(&dummy); +return 0; // <- remove this; } #else diff --git a/lib/perfmon/Stat.cc b/lib/perfmon/Stat.cc index 3f47fd83..75679412 100644 --- a/lib/perfmon/Stat.cc +++ b/lib/perfmon/Stat.cc @@ -57,35 +57,37 @@ void PmuStat::start(void) pmu_start(); ++count; xmemctrs(&mrstart, &mwstart); - tstart = __rdtsc(); + //tstart = __rdtsc(); +tstart=0; #endif } void PmuStat::enter(int t) { #ifdef __x86_64__ - counters[0][t] = __rdpmc(0); - counters[1][t] = __rdpmc(1); - counters[2][t] = __rdpmc((1<<30)|0); - counters[3][t] = __rdpmc((1<<30)|1); - counters[4][t] = __rdpmc((1<<30)|2); - counters[5][t] = __rdtsc(); + counters[0][t] = 0;//__rdpmc(0); + counters[1][t] = 0;//__rdpmc(1); + counters[2][t] = 0;//__rdpmc((1<<30)|0); + counters[3][t] = 0;//__rdpmc((1<<30)|1); + counters[4][t] = 0;//__rdpmc((1<<30)|2); + counters[5][t] = 0;//__rdtsc(); #endif } void PmuStat::exit(int t) { #ifdef __x86_64__ - counters[0][t] = __rdpmc(0) - counters[0][t]; - counters[1][t] = __rdpmc(1) - counters[1][t]; - counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t]; - counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t]; - counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t]; - counters[5][t] = __rdtsc() - counters[5][t]; + counters[0][t] = 0;//__rdpmc(0) - counters[0][t]; + counters[1][t] = 0;// __rdpmc(1) - counters[1][t]; + counters[2][t] = 0;// __rdpmc((1<<30)|0) - counters[2][t]; + counters[3][t] = 0;// __rdpmc((1<<30)|1) - counters[3][t]; + counters[4][t] = 0;// __rdpmc((1<<30)|2) - counters[4][t]; + counters[5][t] = 0;// __rdtsc() - counters[5][t]; #endif } void PmuStat::accum(int nthreads) { #ifdef __x86_64__ - tend = __rdtsc(); + // tend = __rdtsc(); + tend =0 ; xmemctrs(&mrend, &mwend); pmu_stop(); for (int t = 0; t < nthreads; ++t) { diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index 54e72353..bc996ccb 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -253,25 +253,14 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie } } - - - - - - -/* - } else { - out = *Clover * in; - } - */ - - } // MooeeInternal // Derivative parts template void WilsonCloverFermion::MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { + + GaugeField tmp(mat._grid); conformable(U._grid, V._grid); @@ -287,10 +276,64 @@ void WilsonCloverFermion::MDeriv(GaugeField &mat, const FermionField &U, c // Derivative parts template -void WilsonCloverFermion::MooDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +void WilsonCloverFermion::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag) { - // Compute the 8 terms of the derivative - assert(0); // not implemented yet + +GridBase *grid = mat._grid; + +GaugeLinkField Lambda(grid), tmp(grid); +Lambda = zero; //Y*dag(X)+X*dag(Y); // I have to peek spin and decide the color structure + +conformable(mat._grid, X._grid); +conformable(Y._grid, X._grid); + +std::vector C1p(Nd,grid), C2p(Nd,grid), C3p(Nd,grid), C4p(Nd,grid); +std::vector C1m(Nd,grid), C2m(Nd,grid), C3m(Nd,grid), C4m(Nd,grid); +std::vector U(Nd, mat._grid); + +for (int mu = 0; mu < Nd; mu++) { + U[mu] = PeekIndex(mat, mu); + C1p[mu]=zero; C2p[mu]=zero; C3p[mu]=zero; C4p[mu]=zero; + C1m[mu]=zero; C2m[mu]=zero; C3m[mu]=zero; C4m[mu]=zero; +} + + +for (int mu=0;mu<4;mu++){ + for (int nu=0;nu<4;nu++){ +// insertion in upper staple + tmp = Impl::CovShiftIdentityBackward(Lambda, nu) * U[nu]; + C1p[mu]+= Cshift(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Cshift(U[nu], nu, -1))), mu, 1); + + tmp = Impl::CovShiftIdentityForward(Lambda, mu) * U[mu]; + C2p[mu]+= Cshift(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Cshift(U[nu], nu, -1))), mu, 1); + + tmp = Impl::CovShiftIdentityForward(Lambda, nu) * U[nu]; + C3p[mu]+= Cshift(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Cshift(tmp, nu, -1))), mu, 1); + + tmp = Lambda; + C4p[mu]+= Cshift(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Cshift(U[nu], nu, -1))),mu,1) * tmp; + +// insertion in lower staple + tmp = Impl::CovShiftIdentityForward(Lambda, nu) * U[nu]; + C1m[mu]+= Cshift(Impl::CovShiftBackward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu, 1); + + tmp = Cshift(Cshift(Lambda, nu, 2),mu, 1) * U[mu]; + C2m[mu]+= Cshift(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, U[nu])), mu ,1); + + tmp = Cshift(Lambda, nu, 2) * U[nu]; + C3m[mu]+= Cshift(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, tmp)), mu, 1); + + tmp = Lambda; + C4m[mu]+= Cshift(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu, 1)* tmp; + } +} + + +//Still implementing. Have to be tested, and understood how to project EO + + + + } // Derivative parts From 15d690e9b9bd79e3ee9b6dae1a12753f131c024f Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 9 Oct 2017 09:59:58 +0100 Subject: [PATCH 130/377] Adding the cartesian communicator destructor --- lib/communicator/Communicator_none.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index 40feefec..4b9029d6 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -56,6 +56,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) } } +CartesianCommunicator::~CartesianCommunicator(){} + + void CartesianCommunicator::GlobalSum(float &){} void CartesianCommunicator::GlobalSumVector(float *,int N){} void CartesianCommunicator::GlobalSum(double &){} From 07009c569a206b9e633e5ab01bdef386f10050c5 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 9 Oct 2017 23:16:51 +0100 Subject: [PATCH 131/377] Comms splitting improvements --- lib/communicator/Communicator_base.cc | 36 ++++++++++++++++----------- lib/communicator/Communicator_base.h | 17 +++++++++++++ lib/communicator/Communicator_mpi.cc | 17 +++++++++++++ lib/communicator/Communicator_none.cc | 4 +++ 4 files changed, 60 insertions(+), 14 deletions(-) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index bcf429ab..ce9a3cf0 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -117,32 +117,40 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, int Nchild = Nparent/childsize; assert (childsize * Nchild == Nparent); - int prank; MPI_Comm_rank(parent.communicator,&prank); - int crank = prank % childsize; - int ccomm = prank / childsize; + std::vector ccoor(_ndimension); // coor within subcommunicator + std::vector scoor(_ndimension); // coor of split within parent + std::vector ssize(_ndimension); // coor of split within parent + + for(int d=0;d<_ndimension;d++){ + ccoor[d] = parent._processor_coor[d] % processors[d]; + scoor[d] = parent._processor_coor[d] / processors[d]; + ssize[d] = parent._processors[d]/ processors[d]; + } + int crank,srank; // rank within subcomm ; rank of subcomm within blocks of subcomms + Lexicographic::IndexFromCoor(ccoor,crank,processors); + Lexicographic::IndexFromCoor(scoor,srank,ssize); MPI_Comm comm_split; if ( Nchild > 1 ) { - std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< void AllToAll(int dim,std::vector &in, std::vector &out){ + assert(dim>=0); + assert(dim<_ndimension); + int numnode = _processors[dim]; + // std::cerr << " AllToAll in.size() "< void Broadcast(int root,obj &data) { diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index a55c0164..678e4517 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -187,6 +187,21 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) root, communicator); assert(ierr==0); +} +void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes) +{ + std::vector row(_ndimension,1); + assert(dim>=0 && dim<_ndimension); + + // Split the communicator + row[dim] = _processors[dim]; + + CartesianCommunicator Comm(row,*this); + Comm.AllToAll(in,out,bytes); +} +void CartesianCommunicator::AllToAll(void *in,void *out,int bytes) +{ + MPI_Alltoall(in ,bytes,MPI_BYTE,out,bytes,MPI_BYTE,communicator); } /////////////////////////////////////////////////////// // Should only be used prior to Grid Init finished. @@ -207,5 +222,7 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) assert(ierr==0); } + + } diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index 40feefec..e9d71a15 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -98,6 +98,10 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector & { assert(0); } +void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes) +{ + bcopy(in,out,bytes); +} int CartesianCommunicator::RankWorld(void){return 0;} void CartesianCommunicator::Barrier(void){} From f7cbf82c0487be5a7be37fd6b7be148b74029884 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 9 Oct 2017 23:18:48 +0100 Subject: [PATCH 132/377] Better stdout/err debug --- lib/util/Init.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/util/Init.cc b/lib/util/Init.cc index 3232d32f..1266d34d 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -243,6 +243,12 @@ void Grid_init(int *argc,char ***argv) fname< Date: Mon, 9 Oct 2017 23:19:45 +0100 Subject: [PATCH 133/377] Split grid communication --- lib/lattice/Lattice_transfer.h | 301 +++++++++++++++++++++++++++++++++ 1 file changed, 301 insertions(+) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index cbf31f86..713a8788 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -684,6 +684,307 @@ void precisionChange(Lattice &out, const Lattice &in){ merge(out._odata[out_oidx], ptrs, 0); } } + +//////////////////////////////////////////////////////////////////////////////// +// Communicate between grids +//////////////////////////////////////////////////////////////////////////////// +// +// All to all plan +// +// Subvolume on fine grid is v. Vectors a,b,c,d +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +// SIMPLEST CASE: +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Mesh of nodes (2) ; subdivide to 1 subdivisions +// +// Lex ord: +// N0 va0 vb0 N1 va1 vb1 +// +// For each dimension do an all to all +// +// full AllToAll(0) +// N0 va0 va1 N1 vb0 vb1 +// +// REARRANGE +// N0 va01 N1 vb01 +// +// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract". +// NB: Easiest to programme if keep in lex order. +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +// SIMPLE CASE: +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Mesh of nodes (2x2) ; subdivide to 1x1 subdivisions +// +// Lex ord: +// N0 va0 vb0 vc0 vd0 N1 va1 vb1 vc1 vd1 +// N2 va2 vb2 vc2 vd2 N3 va3 vb3 vc3 vd3 +// +// Ratio = full[dim] / split[dim] +// +// For each dimension do an all to all; get Nvec -> Nvec / ratio +// Ldim -> Ldim * ratio +// LocalVol -> LocalVol * ratio +// full AllToAll(0) +// N0 va0 vb0 va1 vb1 N1 vc0 vd0 vc1 vd1 +// N2 va2 vb2 va3 vb3 N3 vc2 vd2 vc3 vd3 +// +// REARRANGE +// N0 va01 vb01 N1 vc01 vd01 +// N2 va23 vb23 N3 vc23 vd23 +// +// full AllToAll(1) // Not what is wanted. FIXME +// N0 va01 va23 N1 vc01 vc23 +// N2 vb01 vb23 N3 vd01 vd23 +// +// REARRANGE +// N0 va0123 N1 vc0123 +// N2 vb0123 N3 vd0123 +// +// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract". +// NB: Easiest to programme if keep in lex order. +// +///////////////////////////////////////////////////////// +template +void Grid_split(std::vector > & full,Lattice & split) +{ + typedef typename Vobj::scalar_object Sobj; + + int full_vecs = full.size(); + + assert(full_vecs>=1); + + GridBase * full_grid = full[0]._grid; + GridBase *split_grid = split._grid; + + int ndim = full_grid->_ndimension; + int full_nproc = full_grid->_Nprocessors; + int split_nproc =split_grid->_Nprocessors; + + //////////////////////////////// + // Checkerboard management + //////////////////////////////// + int cb = full[0].checkerboard; + split.checkerboard = cb; + + ////////////////////////////// + // Checks + ////////////////////////////// + assert(full_grid->_ndimension==split_grid->_ndimension); + for(int n=0;n_gdimensions[d]==split._grid->_gdimensions[d]); + assert(full[n]._grid->_fdimensions[d]==split._grid->_fdimensions[d]); + } + } + + int nvector =full_nproc/split_nproc; + assert(nvector*split_nproc==full_nproc); + assert(nvector == full_vecs); + + std::vector ratio(ndim); + for(int d=0;d_processors[d]/ split_grid->_processors[d]; + } + + int lsites = full_grid->lSites(); + Integer sz = lsites * nvector; + std::vector tmpdata(sz); + std::vector alldata(sz); + std::vector scalardata(lsites); + for(int v=0;v ldims = full_grid->_ldimensions; + std::vector lcoor(ndim); + + for(int d=0;dAllToAll(d,alldata,tmpdata); + + ////////////////////////////////////////// + //Local volume for this dimension is expanded by ratio of processor extents + // Number of vectors is decreased by same factor + // Rearrange to lexico for bigger volume + ////////////////////////////////////////// + nvec /= ratio[d]; + auto rdims = ldims; rdims[d] *= ratio[d]; + auto rsites= lsites*ratio[d]; + for(int v=0;v_processors[d] > 1 ) { + tmpdata = alldata; + split_grid->AllToAll(d,tmpdata,alldata); + } + } + } + + vectorizeFromLexOrdArray(alldata,split); +} + +template +void Grid_split(Lattice &full,Lattice & split) +{ + int nvector = full._grid->_Nprocessors / split._grid->_Nprocessors; + std::vector > full_v(nvector,full._grid); + for(int n=0;n +void Grid_unsplit(std::vector > & full,Lattice & split) +{ + typedef typename Vobj::scalar_object Sobj; + + int full_vecs = full.size(); + + assert(full_vecs>=1); + + GridBase * full_grid = full[0]._grid; + GridBase *split_grid = split._grid; + + int ndim = full_grid->_ndimension; + int full_nproc = full_grid->_Nprocessors; + int split_nproc =split_grid->_Nprocessors; + + //////////////////////////////// + // Checkerboard management + //////////////////////////////// + int cb = full[0].checkerboard; + split.checkerboard = cb; + + ////////////////////////////// + // Checks + ////////////////////////////// + assert(full_grid->_ndimension==split_grid->_ndimension); + for(int n=0;n_gdimensions[d]==split._grid->_gdimensions[d]); + assert(full[n]._grid->_fdimensions[d]==split._grid->_fdimensions[d]); + } + } + + int nvector =full_nproc/split_nproc; + assert(nvector*split_nproc==full_nproc); + assert(nvector == full_vecs); + + std::vector ratio(ndim); + for(int d=0;d_processors[d]/ split_grid->_processors[d]; + } + + int lsites = full_grid->lSites(); + Integer sz = lsites * nvector; + std::vector tmpdata(sz); + std::vector alldata(sz); + std::vector scalardata(lsites); + + unvectorizeToLexOrdArray(alldata,split); + + ///////////////////////////////////////////////////////////////// + // Start from split grid and work towards full grid + ///////////////////////////////////////////////////////////////// + std::vector lcoor(ndim); + std::vector rcoor(ndim); + + int nvec = 1; + lsites = split_grid->lSites(); + std::vector ldims = split_grid->_ldimensions; + + for(int d=ndim-1;d>=0;d--){ + + if ( ratio[d] != 1 ) { + + if ( split_grid->_processors[d] > 1 ) { + tmpdata = alldata; + split_grid->AllToAll(d,tmpdata,alldata); + } + + ////////////////////////////////////////// + //Local volume for this dimension is expanded by ratio of processor extents + // Number of vectors is decreased by same factor + // Rearrange to lexico for bigger volume + ////////////////////////////////////////// + auto rsites= lsites/ratio[d]; + auto rdims = ldims; rdims[d]/=ratio[d]; + + for(int v=0;v smaller local volume + // lsite, lcoor --> bigger original (single node?) volume + // For loop over each site within smaller subvol + for(int rsite=0;rsiteAllToAll(d,tmpdata,alldata); + } + } + + lsites = full_grid->lSites(); + for(int v=0;v Date: Mon, 9 Oct 2017 23:20:58 +0100 Subject: [PATCH 134/377] Split CG testing --- tests/solver/Test_dwf_mrhs_cg.cc | 64 +++++++--- tests/solver/Test_dwf_mrhs_cg_mpi.cc | 144 ++++++++++++++++++++++ tests/solver/Test_dwf_mrhs_cg_mpieo.cc | 163 +++++++++++++++++++++++++ 3 files changed, 356 insertions(+), 15 deletions(-) create mode 100644 tests/solver/Test_dwf_mrhs_cg_mpi.cc create mode 100644 tests/solver/Test_dwf_mrhs_cg_mpieo.cc diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc index 2d2cfcb1..d9215db2 100644 --- a/tests/solver/Test_dwf_mrhs_cg.cc +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -38,7 +38,7 @@ int main (int argc, char ** argv) typedef typename DomainWallFermionR::ComplexField ComplexField; typename DomainWallFermionR::ImplParams params; - const int Ls=8; + const int Ls=4; Grid_init(&argc,&argv); @@ -47,29 +47,24 @@ int main (int argc, char ** argv) std::vector mpi_layout = GridDefaultMpi(); std::vector mpi_split (mpi_layout.size(),1); - std::cout << "UGrid (world root)"<RankCount() ; ///////////////////////////////////////////// // Split into 1^4 mpi communicators ///////////////////////////////////////////// - std::cout << "SGrid (world root)"< src(nrhs,FGrid); + std::vector src_chk(nrhs,FGrid); std::vector result(nrhs,FGrid); + FermionField tmp(FGrid); for(int s=0;sThisRank(); LatticeGaugeField s_Umu(SGrid); FermionField s_src(SFGrid); + FermionField s_src_split(SFGrid); + FermionField s_tmp(SFGrid); FermionField s_res(SFGrid); { @@ -157,6 +156,24 @@ int main (int argc, char ** argv) FGrid->Barrier(); } + /////////////////////////////////////////////////////////////// + // split the source out using MPI instead of I/O + /////////////////////////////////////////////////////////////// + std::cout << GridLogMessage << " Splitting the grid data "<Barrier(); + if ( n==me ) { + std::cerr << GridLogMessage<<"Split "<< me << " " << norm2(s_src_split) << " " << norm2(s_src)<< " diff " << norm2(s_tmp)<Barrier(); + } + /////////////////////////////////////////////////////////////// // Set up N-solvers as trivially parallel @@ -164,6 +181,7 @@ int main (int argc, char ** argv) RealD mass=0.01; RealD M5=1.8; + DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5); DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5); std::cout << GridLogMessage << "****************************************************************** "< HermOp(Ddwf); + MdagMLinearOperator HermOpCk(Dchk); ConjugateGradient CG((1.0e-8/(me+1)),10000); s_res = zero; CG(HermOp,s_src,s_res); - /////////////////////////////////////// - // Share the information - /////////////////////////////////////// + ///////////////////////////////////////////////////////////// + // Report how long they all took + ///////////////////////////////////////////////////////////// std::vector iterations(nrhs,0); iterations[me] = CG.IterationsToComplete; for(int n=0;nGlobalSum(iterations[n]); + std::cout << GridLogMessage<<" Rank "< + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + typedef typename DomainWallFermionR::FermionField FermionField; + typedef typename DomainWallFermionR::ComplexField ComplexField; + typename DomainWallFermionR::ImplParams params; + + const int Ls=4; + + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + std::vector mpi_split (mpi_layout.size(),1); + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + int nrhs = UGrid->RankCount() ; + + ///////////////////////////////////////////// + // Split into 1^4 mpi communicators + ///////////////////////////////////////////// + GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + mpi_split, + *UGrid); + + GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); + GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); + GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid); + + /////////////////////////////////////////////// + // Set up the problem as a 4d spreadout job + /////////////////////////////////////////////// + std::vector seeds({1,2,3,4}); + + GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds); + GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds); + std::vector src(nrhs,FGrid); + std::vector src_chk(nrhs,FGrid); + std::vector result(nrhs,FGrid); + FermionField tmp(FGrid); + + for(int s=0;sThisRank(); + + LatticeGaugeField s_Umu(SGrid); + FermionField s_src(SFGrid); + FermionField s_tmp(SFGrid); + FermionField s_res(SFGrid); + + /////////////////////////////////////////////////////////////// + // split the source out using MPI instead of I/O + /////////////////////////////////////////////////////////////// + Grid_split (Umu,s_Umu); + Grid_split (src,s_src); + + /////////////////////////////////////////////////////////////// + // Set up N-solvers as trivially parallel + /////////////////////////////////////////////////////////////// + RealD mass=0.01; + RealD M5=1.8; + DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5); + DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5); + + std::cout << GridLogMessage << "****************************************************************** "< HermOp(Ddwf); + MdagMLinearOperator HermOpCk(Dchk); + ConjugateGradient CG((1.0e-8/(me+1)),10000); + s_res = zero; + CG(HermOp,s_src,s_res); + + ///////////////////////////////////////////////////////////// + // Report how long they all took + ///////////////////////////////////////////////////////////// + std::vector iterations(nrhs,0); + iterations[me] = CG.IterationsToComplete; + + for(int n=0;nGlobalSum(iterations[n]); + std::cout << GridLogMessage<<" Rank "< + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + typedef typename DomainWallFermionR::FermionField FermionField; + typedef typename DomainWallFermionR::ComplexField ComplexField; + typename DomainWallFermionR::ImplParams params; + + const int Ls=4; + + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + std::vector mpi_split (mpi_layout.size(),1); + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + int nrhs = UGrid->RankCount() ; + + ///////////////////////////////////////////// + // Split into 1^4 mpi communicators + ///////////////////////////////////////////// + GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + mpi_split, + *UGrid); + + GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); + GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); + GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid); + + /////////////////////////////////////////////// + // Set up the problem as a 4d spreadout job + /////////////////////////////////////////////// + std::vector seeds({1,2,3,4}); + + GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds); + GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds); + std::vector src(nrhs,FGrid); + std::vector src_chk(nrhs,FGrid); + std::vector result(nrhs,FGrid); + FermionField tmp(FGrid); + + std::vector src_e(nrhs,FrbGrid); + std::vector src_o(nrhs,FrbGrid); + + for(int s=0;sThisRank(); + + LatticeGaugeField s_Umu(SGrid); + FermionField s_src(SFGrid); + FermionField s_src_e(SFrbGrid); + FermionField s_src_o(SFrbGrid); + FermionField s_tmp(SFGrid); + FermionField s_res(SFGrid); + + /////////////////////////////////////////////////////////////// + // split the source out using MPI instead of I/O + /////////////////////////////////////////////////////////////// + Grid_split (Umu,s_Umu); + Grid_split (src,s_src); + + /////////////////////////////////////////////////////////////// + // Check even odd cases + /////////////////////////////////////////////////////////////// + for(int s=0;s HermOp(Ddwf); + MdagMLinearOperator HermOpCk(Dchk); + ConjugateGradient CG((1.0e-8/(me+1)),10000); + s_res = zero; + CG(HermOp,s_src,s_res); + + ///////////////////////////////////////////////////////////// + // Report how long they all took + ///////////////////////////////////////////////////////////// + std::vector iterations(nrhs,0); + iterations[me] = CG.IterationsToComplete; + + for(int n=0;nGlobalSum(iterations[n]); + std::cout << GridLogMessage<<" Rank "< Date: Tue, 10 Oct 2017 10:00:43 +0100 Subject: [PATCH 135/377] Schur staggered --- lib/algorithms/LinearOperator.h | 104 +++++++++- lib/algorithms/iterative/SchurRedBlack.h | 240 +++++++++++++++++++++++ 2 files changed, 342 insertions(+), 2 deletions(-) diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index 6cb77296..6e4da248 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -192,10 +192,10 @@ namespace Grid { ni=Mpc(in,tmp); no=MpcDag(tmp,out); } - void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ MpcDagMpc(in,out,n1,n2); } - void HermOp(const Field &in, Field &out){ + virtual void HermOp(const Field &in, Field &out){ RealD n1,n2; HermOpAndNorm(in,out,n1,n2); } @@ -300,6 +300,106 @@ namespace Grid { } }; + // + template + class SchurStaggeredOperator : public SchurOperatorBase { + protected: + Matrix &_Mat; + public: + SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){}; + virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + ComplexD dot; + n2=Mpc(in,out); + dot= innerProduct(in,out); + n1= real(dot); + } + virtual void HermOp(const Field &in, Field &out){ + Mpc(in,out); + } + virtual RealD Mpc (const Field &in, Field &out) { + Field tmp(in._grid); + _Mat.Meooe(in,tmp); + _Mat.MooeeInv(tmp,out); + _Mat.MeooeDag(out,tmp); + _Mat.Mooee(in,out); + return axpy_norm(out,-1.0,tmp,out); + } + virtual RealD MpcDag (const Field &in, Field &out){ + return Mpc(in,out); + } + virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) { + assert(0);// Never need with staggered + } + }; + template using SchurStagOperator = SchurStaggeredOperator; + + // This is specific to (Z)mobius fermions + template + class KappaSimilarityTransform { + public: + + typedef typename Matrix::Coeff_t Coeff_t; + std::vector kappa, kappaDag, kappaInv, kappaInvDag; + + KappaSimilarityTransform (Matrix &zmob) { + for (int i=0;i<(int)zmob.bs.size();i++) { + Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) ); + kappa.push_back( k ); + kappaDag.push_back( conj(k) ); + kappaInv.push_back( 1.0 / k ); + kappaInvDag.push_back( 1.0 / conj(k) ); + } + } + + template + void sscale(const Lattice& in, Lattice& out, Coeff_t* s) { + GridBase *grid=out._grid; + out.checkerboard = in.checkerboard; + assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now + int Ls = grid->_rdimensions[0]; + parallel_for(int ss=0;ssoSites();ss++){ + vobj tmp = s[ss % Ls]*in._odata[ss]; + vstream(out._odata[ss],tmp); + } + } + + RealD sscale_norm(const Field& in, Field& out, Coeff_t* s) { + sscale(in,out,s); + return norm2(out); + } + + virtual RealD M (const Field& in, Field& out) { return sscale_norm(in,out,&kappa[0]); } + virtual RealD MDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaDag[0]);} + virtual RealD MInv (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInv[0]);} + virtual RealD MInvDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInvDag[0]);} + + }; + + template + class SchurDiagTwoKappaOperator : public SchurOperatorBase { + public: + KappaSimilarityTransform _S; + SchurDiagTwoOperator _Mat; + + SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {}; + + virtual RealD Mpc (const Field &in, Field &out) { + Field tmp(in._grid); + + _S.MInv(in,out); + _Mat.Mpc(out,tmp); + return _S.M(tmp,out); + + } + virtual RealD MpcDag (const Field &in, Field &out){ + Field tmp(in._grid); + + _S.MDag(in,out); + _Mat.MpcDag(out,tmp); + return _S.MInvDag(tmp,out); + } + }; + ///////////////////////////////////////////////////////////// // Base classes for functions of operators diff --git a/lib/algorithms/iterative/SchurRedBlack.h b/lib/algorithms/iterative/SchurRedBlack.h index 5caabb4b..b6eab762 100644 --- a/lib/algorithms/iterative/SchurRedBlack.h +++ b/lib/algorithms/iterative/SchurRedBlack.h @@ -63,6 +63,85 @@ Author: Peter Boyle */ namespace Grid { + /////////////////////////////////////////////////////////////////////////////////////////////////////// + // Take a matrix and form a Red Black solver calling a Herm solver + // Use of RB info prevents making SchurRedBlackSolve conform to standard interface + /////////////////////////////////////////////////////////////////////////////////////////////////////// + + template class SchurRedBlackStaggeredSolve { + private: + OperatorFunction & _HermitianRBSolver; + int CBfactorise; + public: + + ///////////////////////////////////////////////////// + // Wrap the usual normal equations Schur trick + ///////////////////////////////////////////////////// + SchurRedBlackStaggeredSolve(OperatorFunction &HermitianRBSolver) : + _HermitianRBSolver(HermitianRBSolver) + { + CBfactorise=0; + }; + + template + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + + // FIXME CGdiagonalMee not implemented virtual function + // FIXME use CBfactorise to control schur decomp + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + + SchurStaggeredOperator _HermOpEO(_Matrix); + + Field src_e(grid); + Field src_o(grid); + Field sol_e(grid); + Field sol_o(grid); + Field tmp(grid); + Field Mtmp(grid); + Field resid(fgrid); + + pickCheckerboard(Even,src_e,in); + pickCheckerboard(Odd ,src_o,in); + pickCheckerboard(Even,sol_e,out); + pickCheckerboard(Odd ,sol_o,out); + + ///////////////////////////////////////////////////// + // src_o = Mdag * (source_o - Moe MeeInv source_e) + ///////////////////////////////////////////////////// + _Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even); + _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd); + tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd); + + _Matrix.Mooee(tmp,src_o); assert(src_o.checkerboard ==Odd); + + ////////////////////////////////////////////////////////////// + // Call the red-black solver + ////////////////////////////////////////////////////////////// + std::cout< using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve; + /////////////////////////////////////////////////////////////////////////////////////////////////////// // Take a matrix and form a Red Black solver calling a Herm solver // Use of RB info prevents making SchurRedBlackSolve conform to standard interface @@ -141,5 +220,166 @@ namespace Grid { } }; + + /////////////////////////////////////////////////////////////////////////////////////////////////////// + // Take a matrix and form a Red Black solver calling a Herm solver + // Use of RB info prevents making SchurRedBlackSolve conform to standard interface + /////////////////////////////////////////////////////////////////////////////////////////////////////// + template class SchurRedBlackDiagTwoSolve { + private: + OperatorFunction & _HermitianRBSolver; + int CBfactorise; + public: + + ///////////////////////////////////////////////////// + // Wrap the usual normal equations Schur trick + ///////////////////////////////////////////////////// + SchurRedBlackDiagTwoSolve(OperatorFunction &HermitianRBSolver) : + _HermitianRBSolver(HermitianRBSolver) + { + CBfactorise=0; + }; + + template + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + + // FIXME CGdiagonalMee not implemented virtual function + // FIXME use CBfactorise to control schur decomp + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + + SchurDiagTwoOperator _HermOpEO(_Matrix); + + Field src_e(grid); + Field src_o(grid); + Field sol_e(grid); + Field sol_o(grid); + Field tmp(grid); + Field Mtmp(grid); + Field resid(fgrid); + + pickCheckerboard(Even,src_e,in); + pickCheckerboard(Odd ,src_o,in); + pickCheckerboard(Even,sol_e,out); + pickCheckerboard(Odd ,sol_o,out); + + ///////////////////////////////////////////////////// + // src_o = Mdag * (source_o - Moe MeeInv source_e) + ///////////////////////////////////////////////////// + _Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even); + _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd); + tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd); + + // get the right MpcDag + _HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd); + + ////////////////////////////////////////////////////////////// + // Call the red-black solver + ////////////////////////////////////////////////////////////// + std::cout< class SchurRedBlackDiagTwoMixed { + private: + LinearFunction & _HermitianRBSolver; + int CBfactorise; + public: + + ///////////////////////////////////////////////////// + // Wrap the usual normal equations Schur trick + ///////////////////////////////////////////////////// + SchurRedBlackDiagTwoMixed(LinearFunction &HermitianRBSolver) : + _HermitianRBSolver(HermitianRBSolver) + { + CBfactorise=0; + }; + + template + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + + // FIXME CGdiagonalMee not implemented virtual function + // FIXME use CBfactorise to control schur decomp + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + + SchurDiagTwoOperator _HermOpEO(_Matrix); + + Field src_e(grid); + Field src_o(grid); + Field sol_e(grid); + Field sol_o(grid); + Field tmp(grid); + Field Mtmp(grid); + Field resid(fgrid); + + pickCheckerboard(Even,src_e,in); + pickCheckerboard(Odd ,src_o,in); + pickCheckerboard(Even,sol_e,out); + pickCheckerboard(Odd ,sol_o,out); + + ///////////////////////////////////////////////////// + // src_o = Mdag * (source_o - Moe MeeInv source_e) + ///////////////////////////////////////////////////// + _Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even); + _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd); + tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd); + + // get the right MpcDag + _HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd); + + ////////////////////////////////////////////////////////////// + // Call the red-black solver + ////////////////////////////////////////////////////////////// + std::cout< Date: Tue, 10 Oct 2017 12:02:18 +0100 Subject: [PATCH 136/377] Schur for staggered --- lib/algorithms/LinearOperator.h | 80 +----------- tests/solver/Test_staggered_block_cg_prec.cc | 130 +++++++++++++++++++ tests/solver/Test_staggered_cg_prec.cc | 6 +- 3 files changed, 135 insertions(+), 81 deletions(-) create mode 100644 tests/solver/Test_staggered_block_cg_prec.cc diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index 6e4da248..d402c5b7 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -162,15 +162,10 @@ namespace Grid { _Mat.M(in,out); } void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - ComplexD dot; - _Mat.M(in,out); - dot= innerProduct(in,out); - n1=real(dot); - - dot = innerProduct(out,out); - n2=real(dot); + ComplexD dot= innerProduct(in,out); n1=real(dot); + n2=norm2(out); } void HermOp(const Field &in, Field &out){ _Mat.M(in,out); @@ -309,9 +304,9 @@ namespace Grid { SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){}; virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ ComplexD dot; - n2=Mpc(in,out); + n2 = Mpc(in,out); dot= innerProduct(in,out); - n1= real(dot); + n1 = real(dot); } virtual void HermOp(const Field &in, Field &out){ Mpc(in,out); @@ -333,73 +328,6 @@ namespace Grid { }; template using SchurStagOperator = SchurStaggeredOperator; - // This is specific to (Z)mobius fermions - template - class KappaSimilarityTransform { - public: - - typedef typename Matrix::Coeff_t Coeff_t; - std::vector kappa, kappaDag, kappaInv, kappaInvDag; - - KappaSimilarityTransform (Matrix &zmob) { - for (int i=0;i<(int)zmob.bs.size();i++) { - Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) ); - kappa.push_back( k ); - kappaDag.push_back( conj(k) ); - kappaInv.push_back( 1.0 / k ); - kappaInvDag.push_back( 1.0 / conj(k) ); - } - } - - template - void sscale(const Lattice& in, Lattice& out, Coeff_t* s) { - GridBase *grid=out._grid; - out.checkerboard = in.checkerboard; - assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now - int Ls = grid->_rdimensions[0]; - parallel_for(int ss=0;ssoSites();ss++){ - vobj tmp = s[ss % Ls]*in._odata[ss]; - vstream(out._odata[ss],tmp); - } - } - - RealD sscale_norm(const Field& in, Field& out, Coeff_t* s) { - sscale(in,out,s); - return norm2(out); - } - - virtual RealD M (const Field& in, Field& out) { return sscale_norm(in,out,&kappa[0]); } - virtual RealD MDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaDag[0]);} - virtual RealD MInv (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInv[0]);} - virtual RealD MInvDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInvDag[0]);} - - }; - - template - class SchurDiagTwoKappaOperator : public SchurOperatorBase { - public: - KappaSimilarityTransform _S; - SchurDiagTwoOperator _Mat; - - SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {}; - - virtual RealD Mpc (const Field &in, Field &out) { - Field tmp(in._grid); - - _S.MInv(in,out); - _Mat.Mpc(out,tmp); - return _S.M(tmp,out); - - } - virtual RealD MpcDag (const Field &in, Field &out){ - Field tmp(in._grid); - - _S.MDag(in,out); - _Mat.MpcDag(out,tmp); - return _S.MInvDag(tmp,out); - } - }; - ///////////////////////////////////////////////////////////// // Base classes for functions of operators diff --git a/tests/solver/Test_staggered_block_cg_prec.cc b/tests/solver/Test_staggered_block_cg_prec.cc new file mode 100644 index 00000000..1d0117e0 --- /dev/null +++ b/tests/solver/Test_staggered_block_cg_prec.cc @@ -0,0 +1,130 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_wilson_cg_unprec.cc + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + +int main (int argc, char ** argv) +{ + typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField; + typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField; + typename ImprovedStaggeredFermion5DR::ImplParams params; + + const int Ls=8; + + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector seeds({1,2,3,4}); + GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds); + GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds); + + FermionField src(FGrid); random(pRNG5,src); + FermionField src_o(FrbGrid); pickCheckerboard(Odd,src_o,src); + FermionField result_o(FrbGrid); result_o=zero; + RealD nrm = norm2(src); + + LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); + + RealD mass=0.003; + ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass); + SchurDiagMooeeOperator HermOp(Ds); + + ConjugateGradient CG(1.0e-8,10000); + int blockDim = 0; + BlockConjugateGradient BCGrQ(BlockCGrQ,blockDim,1.0e-8,10000); + BlockConjugateGradient BCG (BlockCG,blockDim,1.0e-8,10000); + BlockConjugateGradient mCG (CGmultiRHS,blockDim,1.0e-8,10000); + + std::cout << GridLogMessage << "****************************************************************** "< HermOp4d(Ds4d); + FermionField src4d(UGrid); random(pRNG,src4d); + FermionField src4d_o(UrbGrid); pickCheckerboard(Odd,src4d_o,src4d); + FermionField result4d_o(UrbGrid); + + result4d_o=zero; + CG(HermOp4d,src4d_o,result4d_o); + std::cout << GridLogMessage << "************************************************************************ "< Date: Tue, 10 Oct 2017 13:48:51 +0100 Subject: [PATCH 137/377] Christop mods --- lib/algorithms/approx/Chebyshev.h | 42 +++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/lib/algorithms/approx/Chebyshev.h b/lib/algorithms/approx/Chebyshev.h index 2793f138..f8c21a05 100644 --- a/lib/algorithms/approx/Chebyshev.h +++ b/lib/algorithms/approx/Chebyshev.h @@ -8,6 +8,7 @@ Author: Peter Boyle Author: paboyle +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -193,6 +194,47 @@ namespace Grid { return sum; }; + RealD approxD(RealD x) + { + RealD Un; + RealD Unm; + RealD Unp; + + RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo)); + + RealD U0=1; + RealD U1=2*y; + + RealD sum; + sum = Coeffs[1]*U0; + sum+= Coeffs[2]*U1*2.0; + + Un =U1; + Unm=U0; + for(int i=2;i::quiet_NaN(); + } + // Implement the required interface void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { From a1d80282eca8df8c1c7eb521c48c3aa78ccdb389 Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 10 Oct 2017 13:49:31 +0100 Subject: [PATCH 138/377] cb factorise --- lib/algorithms/iterative/SchurRedBlack.h | 27 ++++++++++++++++++------ 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/lib/algorithms/iterative/SchurRedBlack.h b/lib/algorithms/iterative/SchurRedBlack.h index b6eab762..a309386b 100644 --- a/lib/algorithms/iterative/SchurRedBlack.h +++ b/lib/algorithms/iterative/SchurRedBlack.h @@ -53,13 +53,28 @@ Author: Peter Boyle * M psi = eta *********************** *Odd - * i) (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o + * i) D_oo psi_o = L^{-1} eta_o * eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) + * (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o *Even * ii) Mee psi_e + Meo psi_o = src_e * * => sol_e = M_ee^-1 * ( src_e - Meo sol_o )... * + * + * TODO: Other options: + * + * a) change checkerboards for Schur e<->o + * + * Left precon by Moo^-1 + * b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o + * eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e) + * + * Right precon by Moo^-1 + * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o + * eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) + * psi_o = M_oo^-1 phi_o + * TODO: Deflation */ namespace Grid { @@ -155,12 +170,10 @@ namespace Grid { ///////////////////////////////////////////////////// // Wrap the usual normal equations Schur trick ///////////////////////////////////////////////////// - SchurRedBlackDiagMooeeSolve(OperatorFunction &HermitianRBSolver) : - _HermitianRBSolver(HermitianRBSolver) - { - CBfactorise=0; - }; - + SchurRedBlackDiagMooeeSolve(OperatorFunction &HermitianRBSolver,int cb=0) : _HermitianRBSolver(HermitianRBSolver) + { + CBfactorise=cb; + }; template void operator() (Matrix & _Matrix,const Field &in, Field &out){ From 1374c943d4cbb493a6a909a54b7c55471b677a32 Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Tue, 10 Oct 2017 13:59:50 +0100 Subject: [PATCH 139/377] Correct Schur operator called --- tests/solver/Test_staggered_block_cg_prec.cc | 4 ++-- tests/solver/Test_staggered_cg_prec.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/solver/Test_staggered_block_cg_prec.cc b/tests/solver/Test_staggered_block_cg_prec.cc index 1d0117e0..0076e5a0 100644 --- a/tests/solver/Test_staggered_block_cg_prec.cc +++ b/tests/solver/Test_staggered_block_cg_prec.cc @@ -76,7 +76,7 @@ int main (int argc, char ** argv) RealD mass=0.003; ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass); - SchurDiagMooeeOperator HermOp(Ds); + SchurStaggeredOperator HermOp(Ds); ConjugateGradient CG(1.0e-8,10000); int blockDim = 0; @@ -88,7 +88,7 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << " Calling 4d CG "< HermOp4d(Ds4d); + SchurStaggeredOperator HermOp4d(Ds4d); FermionField src4d(UGrid); random(pRNG,src4d); FermionField src4d_o(UrbGrid); pickCheckerboard(Odd,src4d_o,src4d); FermionField result4d_o(UrbGrid); diff --git a/tests/solver/Test_staggered_cg_prec.cc b/tests/solver/Test_staggered_cg_prec.cc index 97251435..9a458f1f 100644 --- a/tests/solver/Test_staggered_cg_prec.cc +++ b/tests/solver/Test_staggered_cg_prec.cc @@ -79,7 +79,7 @@ int main (int argc, char ** argv) pickCheckerboard(Odd,src_o,src); res_o=zero; - SchurDiagMooeeOperator HermOpEO(Ds); + SchurStaggeredOperator HermOpEO(Ds); ConjugateGradient CG(1.0e-8,10000); CG(HermOpEO,src_o,res_o); From bf58557fb1ec710c766e19c9a8809b0a352de239 Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 10 Oct 2017 14:15:11 +0100 Subject: [PATCH 140/377] Block compressed Lanczos --- lib/algorithms/LinearOperator.h | 16 +- .../BlockImplicitlyRestartedLanczos.h | 754 ++++++++++++ .../BlockProjector.h | 143 +++ .../BlockedGrid.h | 401 ++++++ .../FieldBasisVector.h | 163 +++ .../FieldVectorIO.h | 1085 +++++++++++++++++ .../action/fermion/DomainWallEOFAFermion.cc | 12 +- lib/qcd/action/fermion/MobiusEOFAFermion.cc | 14 +- tests/solver/Params.h | 136 +++ tests/solver/Test_dwf_compressed_lanczos.cc | 727 +++++++++++ 10 files changed, 3432 insertions(+), 19 deletions(-) create mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h create mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h create mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h create mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h create mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldVectorIO.h create mode 100644 tests/solver/Params.h create mode 100644 tests/solver/Test_dwf_compressed_lanczos.cc diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index d402c5b7..f1b8820e 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -207,7 +207,6 @@ namespace Grid { void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); } - }; template class SchurDiagMooeeOperator : public SchurOperatorBase { @@ -265,7 +264,6 @@ namespace Grid { return axpy_norm(out,-1.0,tmp,in); } }; - template class SchurDiagTwoOperator : public SchurOperatorBase { protected: @@ -294,8 +292,15 @@ namespace Grid { return axpy_norm(out,-1.0,tmp,in); } }; - - // + /////////////////////////////////////////////////////////////////////////////////////////////////// + // Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta + // Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi + /////////////////////////////////////////////////////////////////////////////////////////////////// + template using SchurDiagOneRH = SchurDiagTwoOperator ; + template using SchurDiagOneLH = SchurDiagOneOperator ; + /////////////////////////////////////////////////////////////////////////////////////////////////// + // Staggered use + /////////////////////////////////////////////////////////////////////////////////////////////////// template class SchurStaggeredOperator : public SchurOperatorBase { protected: @@ -303,9 +308,8 @@ namespace Grid { public: SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){}; virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - ComplexD dot; n2 = Mpc(in,out); - dot= innerProduct(in,out); + ComplexD dot= innerProduct(in,out); n1 = real(dot); } virtual void HermOp(const Field &in, Field &out){ diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h new file mode 100644 index 00000000..82a00efa --- /dev/null +++ b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h @@ -0,0 +1,754 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle +Author: Chulwoo Jung +Author: Christoph Lehner + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_BIRL_H +#define GRID_BIRL_H + +#include //memset + +#include +#include + +#include +#include +#include +#include + +namespace Grid { + +///////////////////////////////////////////////////////////// +// Implicitly restarted lanczos +///////////////////////////////////////////////////////////// + + template + class BlockImplicitlyRestartedLanczos { + + const RealD small = 1.0e-16; +public: + int lock; + int get; + int Niter; + int converged; + + int Nminres; // Minimum number of restarts; only check for convergence after + int Nstop; // Number of evecs checked for convergence + int Nk; // Number of converged sought + int Np; // Np -- Number of spare vecs in kryloc space + int Nm; // Nm -- total number of vectors + + int orth_period; + + RealD OrthoTime; + + RealD eresid, betastp; + SortEigen _sort; + LinearFunction &_HermOp; + LinearFunction &_HermOpTest; + ///////////////////////// + // Constructor + ///////////////////////// + + BlockImplicitlyRestartedLanczos( + LinearFunction & HermOp, + LinearFunction & HermOpTest, + int _Nstop, // sought vecs + int _Nk, // sought vecs + int _Nm, // spare vecs + RealD _eresid, // resid in lmdue deficit + RealD _betastp, // if beta(k) < betastp: converged + int _Niter, // Max iterations + int _Nminres, int _orth_period = 1) : + _HermOp(HermOp), + _HermOpTest(HermOpTest), + Nstop(_Nstop), + Nk(_Nk), + Nm(_Nm), + eresid(_eresid), + betastp(_betastp), + Niter(_Niter), + Nminres(_Nminres), + orth_period(_orth_period) + { + Np = Nm-Nk; assert(Np>0); + }; + + BlockImplicitlyRestartedLanczos( + LinearFunction & HermOp, + LinearFunction & HermOpTest, + int _Nk, // sought vecs + int _Nm, // spare vecs + RealD _eresid, // resid in lmdue deficit + RealD _betastp, // if beta(k) < betastp: converged + int _Niter, // Max iterations + int _Nminres, + int _orth_period = 1) : + _HermOp(HermOp), + _HermOpTest(HermOpTest), + Nstop(_Nk), + Nk(_Nk), + Nm(_Nm), + eresid(_eresid), + betastp(_betastp), + Niter(_Niter), + Nminres(_Nminres), + orth_period(_orth_period) + { + Np = Nm-Nk; assert(Np>0); + }; + + +/* Saad PP. 195 +1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 +2. For k = 1,2,...,m Do: +3. wk:=Avk−βkv_{k−1} +4. αk:=(wk,vk) // +5. wk:=wk−αkvk // wk orthog vk +6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop +7. vk+1 := wk/βk+1 +8. EndDo + */ + void step(std::vector& lmd, + std::vector& lme, + BasisFieldVector& evec, + Field& w,int Nm,int k) + { + assert( k< Nm ); + + GridStopWatch gsw_op,gsw_o; + + Field& evec_k = evec[k]; + + gsw_op.Start(); + _HermOp(evec_k,w); + gsw_op.Stop(); + + if(k>0){ + w -= lme[k-1] * evec[k-1]; + } + + ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) + RealD alph = real(zalph); + + w = w - alph * evec_k;// 5. wk:=wk−αkvk + + RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop + // 7. vk+1 := wk/βk+1 + + std::cout<0 && k % orth_period == 0) { + orthogonalize(w,evec,k); // orthonormalise + } + gsw_o.Stop(); + + if(k < Nm-1) { + evec[k+1] = w; + } + + std::cout << GridLogMessage << "Timing: operator=" << gsw_op.Elapsed() << + " orth=" << gsw_o.Elapsed() << std::endl; + + } + + void qr_decomp(std::vector& lmd, + std::vector& lme, + int Nk, + int Nm, + std::vector& Qt, + RealD Dsh, + int kmin, + int kmax) + { + int k = kmin-1; + RealD x; + + RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); + RealD c = ( lmd[k] -Dsh) *Fden; + RealD s = -lme[k] *Fden; + + RealD tmpa1 = lmd[k]; + RealD tmpa2 = lmd[k+1]; + RealD tmpb = lme[k]; + + lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; + lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; + lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; + x =-s*lme[k+1]; + lme[k+1] = c*lme[k+1]; + + for(int i=0; i& lmd, + std::vector& lme, + int N1, + int N2, + std::vector& Qt, + GridBase *grid){ + + std::cout << GridLogMessage << "diagonalize_lapack start\n"; + GridStopWatch gsw; + + const int size = Nm; + // tevals.resize(size); + // tevecs.resize(size); + LAPACK_INT NN = N1; + std::vector evals_tmp(NN); + std::vector evec_tmp(NN*NN); + memset(&evec_tmp[0],0,sizeof(double)*NN*NN); + // double AA[NN][NN]; + std::vector DD(NN); + std::vector EE(NN); + for (int i = 0; i< NN; i++) + for (int j = i - 1; j <= i + 1; j++) + if ( j < NN && j >= 0 ) { + if (i==j) DD[i] = lmd[i]; + if (i==j) evals_tmp[i] = lmd[i]; + if (j==(i-1)) EE[j] = lme[j]; + } + LAPACK_INT evals_found; + LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; + LAPACK_INT liwork = 3+NN*10 ; + std::vector iwork(liwork); + std::vector work(lwork); + std::vector isuppz(2*NN); + char jobz = 'V'; // calculate evals & evecs + char range = 'I'; // calculate all evals + // char range = 'A'; // calculate all evals + char uplo = 'U'; // refer to upper half of original matrix + char compz = 'I'; // Compute eigenvectors of tridiagonal matrix + std::vector ifail(NN); + LAPACK_INT info; + // int total = QMP_get_number_of_nodes(); + // int node = QMP_get_node_number(); + // GridBase *grid = evec[0]._grid; + int total = grid->_Nprocessors; + int node = grid->_processor; + int interval = (NN/total)+1; + double vl = 0.0, vu = 0.0; + LAPACK_INT il = interval*node+1 , iu = interval*(node+1); + if (iu > NN) iu=NN; + double tol = 0.0; + if (1) { + memset(&evals_tmp[0],0,sizeof(double)*NN); + if ( il <= NN){ + std::cout << GridLogMessage << "dstegr started" << std::endl; + gsw.Start(); + dstegr(&jobz, &range, &NN, + (double*)&DD[0], (double*)&EE[0], + &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' + &tol, // tolerance + &evals_found, &evals_tmp[0], (double*)&evec_tmp[0], &NN, + &isuppz[0], + &work[0], &lwork, &iwork[0], &liwork, + &info); + gsw.Stop(); + std::cout << GridLogMessage << "dstegr completed in " << gsw.Elapsed() << std::endl; + for (int i = iu-1; i>= il-1; i--){ + evals_tmp[i] = evals_tmp[i - (il-1)]; + if (il>1) evals_tmp[i-(il-1)]=0.; + for (int j = 0; j< NN; j++){ + evec_tmp[i*NN + j] = evec_tmp[(i - (il-1)) * NN + j]; + if (il>1) evec_tmp[(i-(il-1)) * NN + j]=0.; + } + } + } + { + // QMP_sum_double_array(evals_tmp,NN); + // QMP_sum_double_array((double *)evec_tmp,NN*NN); + grid->GlobalSumVector(&evals_tmp[0],NN); + grid->GlobalSumVector(&evec_tmp[0],NN*NN); + } + } + // cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order. + for(int i=0;i& lmd, + std::vector& lme, + int N2, + int N1, + std::vector& Qt, + GridBase *grid) + { + +#ifdef USE_LAPACK_IRL + const int check_lapack=0; // just use lapack if 0, check against lapack if 1 + + if(!check_lapack) + return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid); + + std::vector lmd2(N1); + std::vector lme2(N1); + std::vector Qt2(N1*N1); + for(int k=0; k= kmin; --j){ + RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); + if(fabs(lme[j-1])+dds > dds){ + kmax = j+1; + goto continued; + } + } + Niter = iter; +#ifdef USE_LAPACK_IRL + if(check_lapack){ + const double SMALL=1e-8; + diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid); + std::vector lmd3(N2); + for(int k=0; kSMALL) std::cout<SMALL) std::cout<SMALL) std::cout< dds){ + kmin = j+1; + break; + } + } + } + std::cout< + static RealD normalise(T& v) + { + RealD nn = norm2(v); + nn = sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void orthogonalize(Field& w, + BasisFieldVector& evec, + int k) + { + double t0=-usecond()/1e6; + + evec.orthogonalize(w,k); + + normalise(w); + t0+=usecond()/1e6; + OrthoTime +=t0; + } + + void setUnit_Qt(int Nm, std::vector &Qt) { + for(int i=0; i K P = M − K † +Compute the factorization AVM = VM HM + fM eM +repeat + Q=I + for i = 1,...,P do + QiRi =HM −θiI Q = QQi + H M = Q †i H M Q i + end for + βK =HM(K+1,K) σK =Q(M,K) + r=vK+1βK +rσK + VK =VM(1:M)Q(1:M,1:K) + HK =HM(1:K,1:K) + →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM +until convergence +*/ + + void calc(std::vector& eval, + BasisFieldVector& evec, + const Field& src, + int& Nconv, + bool reverse, + int SkipTest) + { + + GridBase *grid = evec._v[0]._grid;//evec.get(0 + evec_offset)._grid; + assert(grid == src._grid); + + std::cout< lme(Nm); + std::vector lme2(Nm); + std::vector eval2(Nm); + std::vector eval2_copy(Nm); + std::vector Qt(Nm*Nm); + + + Field f(grid); + Field v(grid); + + int k1 = 1; + int k2 = Nk; + + Nconv = 0; + + RealD beta_k; + + // Set initial vector + evec[0] = src; + normalise(evec[0]); + std:: cout<0); + evec.rotate(Qt,k1-1,k2+1,0,Nm,Nm); + + t1=usecond()/1e6; + std::cout<= Nminres) { + std::cout << GridLogMessage << "Rotation to test convergence " << std::endl; + + Field ev0_orig(grid); + ev0_orig = evec[0]; + + evec.rotate(Qt,0,Nk,0,Nk,Nm); + + { + std::cout << GridLogMessage << "Test convergence" << std::endl; + Field B(grid); + + for(int j = 0; j=Nstop || beta_k < betastp){ + goto converged; + } + + std::cout << GridLogMessage << "Rotate back" << std::endl; + //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; + { + Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); + for (int k=0;k QtI(Nm*Nm); + for (int k=0;k +class BlockProjector { +public: + + BasisFieldVector& _evec; + BlockedGrid& _bgrid; + + BlockProjector(BasisFieldVector& evec, BlockedGrid& bgrid) : _evec(evec), _bgrid(bgrid) { + } + + void createOrthonormalBasis(RealD thres = 0.0) { + + GridStopWatch sw; + sw.Start(); + + int cnt = 0; + +#pragma omp parallel shared(cnt) + { + int lcnt = 0; + +#pragma omp for + for (int b=0;b<_bgrid._o_blocks;b++) { + + for (int i=0;i<_evec._Nm;i++) { + + auto nrm0 = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]); + + // |i> -= |j> + for (int j=0;j + void coarseToFine(const CoarseField& in, Field& out) { + + out = zero; + out.checkerboard = _evec._v[0].checkerboard; + + int Nbasis = sizeof(in._odata[0]._internal._internal) / sizeof(in._odata[0]._internal._internal[0]); + assert(Nbasis == _evec._Nm); + +#pragma omp parallel for + for (int b=0;b<_bgrid._o_blocks;b++) { + for (int j=0;j<_evec._Nm;j++) { + _bgrid.block_caxpy(b,out,in._odata[b]._internal._internal[j],_evec._v[j],out); + } + } + + } + + template + void fineToCoarse(const Field& in, CoarseField& out) { + + out = zero; + + int Nbasis = sizeof(out._odata[0]._internal._internal) / sizeof(out._odata[0]._internal._internal[0]); + assert(Nbasis == _evec._Nm); + + + Field tmp(_bgrid._grid); + tmp = in; + +#pragma omp parallel for + for (int b=0;b<_bgrid._o_blocks;b++) { + for (int j=0;j<_evec._Nm;j++) { + // |rhs> -= |j> + auto c = _bgrid.block_sp(b,_evec._v[j],tmp); + _bgrid.block_caxpy(b,tmp,-c,_evec._v[j],tmp); // may make this more numerically stable + out._odata[b]._internal._internal[j] = c; + } + } + + } + + template + void deflateFine(BasisFieldVector& _coef,const std::vector& eval,int N,const Field& src_orig,Field& result) { + result = zero; + for (int i=0;i + void deflateCoarse(BasisFieldVector& _coef,const std::vector& eval,int N,const Field& src_orig,Field& result) { + CoarseField src_coarse(_coef._v[0]._grid); + CoarseField result_coarse = src_coarse; + result_coarse = zero; + fineToCoarse(src_orig,src_coarse); + for (int i=0;i + void deflate(BasisFieldVector& _coef,const std::vector& eval,int N,const Field& src_orig,Field& result) { + // Deflation on coarse Grid is much faster, so use it by default. Deflation on fine Grid is kept for legacy reasons for now. + deflateCoarse(_coef,eval,N,src_orig,result); + } + +}; +} diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h new file mode 100644 index 00000000..821272de --- /dev/null +++ b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h @@ -0,0 +1,401 @@ +namespace Grid { + +template +class BlockedGrid { +public: + GridBase* _grid; + typedef typename Field::scalar_type Coeff_t; + typedef typename Field::vector_type vCoeff_t; + + std::vector _bs; // block size + std::vector _nb; // number of blocks + std::vector _l; // local dimensions irrespective of cb + std::vector _l_cb; // local dimensions of checkerboarded vector + std::vector _l_cb_o; // local dimensions of inner checkerboarded vector + std::vector _bs_cb; // block size in checkerboarded vector + std::vector _nb_o; // number of blocks of simd o-sites + + int _nd, _blocks, _cf_size, _cf_block_size, _cf_o_block_size, _o_blocks, _block_sites; + + BlockedGrid(GridBase* grid, const std::vector& block_size) : + _grid(grid), _bs(block_size), _nd((int)_bs.size()), + _nb(block_size), _l(block_size), _l_cb(block_size), _nb_o(block_size), + _l_cb_o(block_size), _bs_cb(block_size) { + + _blocks = 1; + _o_blocks = 1; + _l = grid->FullDimensions(); + _l_cb = grid->LocalDimensions(); + _l_cb_o = grid->_rdimensions; + + _cf_size = 1; + _block_sites = 1; + for (int i=0;i<_nd;i++) { + _l[i] /= grid->_processors[i]; + + assert(!(_l[i] % _bs[i])); // lattice must accommodate choice of blocksize + + int r = _l[i] / _l_cb[i]; + assert(!(_bs[i] % r)); // checkerboarding must accommodate choice of blocksize + _bs_cb[i] = _bs[i] / r; + _block_sites *= _bs_cb[i]; + _nb[i] = _l[i] / _bs[i]; + _nb_o[i] = _nb[i] / _grid->_simd_layout[i]; + if (_nb[i] % _grid->_simd_layout[i]) { // simd must accommodate choice of blocksize + std::cout << GridLogMessage << "Problem: _nb[" << i << "] = " << _nb[i] << " _grid->_simd_layout[" << i << "] = " << _grid->_simd_layout[i] << std::endl; + assert(0); + } + _blocks *= _nb[i]; + _o_blocks *= _nb_o[i]; + _cf_size *= _l[i]; + } + + _cf_size *= 12 / 2; + _cf_block_size = _cf_size / _blocks; + _cf_o_block_size = _cf_size / _o_blocks; + + std::cout << GridLogMessage << "BlockedGrid:" << std::endl; + std::cout << GridLogMessage << " _l = " << _l << std::endl; + std::cout << GridLogMessage << " _l_cb = " << _l_cb << std::endl; + std::cout << GridLogMessage << " _l_cb_o = " << _l_cb_o << std::endl; + std::cout << GridLogMessage << " _bs = " << _bs << std::endl; + std::cout << GridLogMessage << " _bs_cb = " << _bs_cb << std::endl; + + std::cout << GridLogMessage << " _nb = " << _nb << std::endl; + std::cout << GridLogMessage << " _nb_o = " << _nb_o << std::endl; + std::cout << GridLogMessage << " _blocks = " << _blocks << std::endl; + std::cout << GridLogMessage << " _o_blocks = " << _o_blocks << std::endl; + std::cout << GridLogMessage << " sizeof(vCoeff_t) = " << sizeof(vCoeff_t) << std::endl; + std::cout << GridLogMessage << " _cf_size = " << _cf_size << std::endl; + std::cout << GridLogMessage << " _cf_block_size = " << _cf_block_size << std::endl; + std::cout << GridLogMessage << " _block_sites = " << _block_sites << std::endl; + std::cout << GridLogMessage << " _grid->oSites() = " << _grid->oSites() << std::endl; + + // _grid->Barrier(); + //abort(); + } + + void block_to_coor(int b, std::vector& x0) { + + std::vector bcoor; + bcoor.resize(_nd); + x0.resize(_nd); + assert(b < _o_blocks); + Lexicographic::CoorFromIndex(bcoor,b,_nb_o); + int i; + + for (i=0;i<_nd;i++) { + x0[i] = bcoor[i]*_bs_cb[i]; + } + + //std::cout << GridLogMessage << "Map block b -> " << x0 << std::endl; + + } + + void block_site_to_o_coor(const std::vector& x0, std::vector& coor, int i) { + Lexicographic::CoorFromIndex(coor,i,_bs_cb); + for (int j=0;j<_nd;j++) + coor[j] += x0[j]; + } + + int block_site_to_o_site(const std::vector& x0, int i) { + std::vector coor; coor.resize(_nd); + block_site_to_o_coor(x0,coor,i); + Lexicographic::IndexFromCoor(coor,i,_l_cb_o); + return i; + } + + vCoeff_t block_sp(int b, const Field& x, const Field& y) { + + std::vector x0; + block_to_coor(b,x0); + + vCoeff_t ret = 0.0; + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + ret += TensorRemove(innerProduct(x._odata[ss],y._odata[ss])); + } + + return ret; + + } + + vCoeff_t block_sp(int b, const Field& x, const std::vector< ComplexD >& y) { + + std::vector x0; + block_to_coor(b,x0); + + constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t); + int lsize = _cf_o_block_size / _block_sites; + + std::vector< ComplexD > ret(nsimd); + for (int i=0;i + void vcaxpy(iScalar& r,const vCoeff_t& a,const iScalar& x,const iScalar& y) { + vcaxpy(r._internal,a,x._internal,y._internal); + } + + template + void vcaxpy(iVector& r,const vCoeff_t& a,const iVector& x,const iVector& y) { + for (int i=0;i x0; + block_to_coor(b,x0); + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + vcaxpy(ret._odata[ss],a,x._odata[ss],y._odata[ss]); + } + + } + + void block_caxpy(int b, std::vector< ComplexD >& ret, const vCoeff_t& a, const Field& x, const std::vector< ComplexD >& y) { + std::vector x0; + block_to_coor(b,x0); + + constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t); + int lsize = _cf_o_block_size / _block_sites; + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + + int n = lsize / nsimd; + for (int l=0;l& x) { + std::vector x0; + block_to_coor(b,x0); + + int lsize = _cf_o_block_size / _block_sites; + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + + for (int l=0;l& x) { + std::vector x0; + block_to_coor(b,x0); + + int lsize = _cf_o_block_size / _block_sites; + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + + for (int l=0;l + void vcscale(iScalar& r,const vCoeff_t& a,const iScalar& x) { + vcscale(r._internal,a,x._internal); + } + + template + void vcscale(iVector& r,const vCoeff_t& a,const iVector& x) { + for (int i=0;i x0; + block_to_coor(b,x0); + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + vcscale(ret._odata[ss],a,ret._odata[ss]); + } + } + + void getCanonicalBlockOffset(int cb, std::vector& x0) { + const int ndim = 5; + assert(_nb.size() == ndim); + std::vector _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] }; + std::vector _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; + x0.resize(ndim); + + assert(cb >= 0); + assert(cb < _nbc[0]*_nbc[1]*_nbc[2]*_nbc[3]*_nbc[4]); + + Lexicographic::CoorFromIndex(x0,cb,_nbc); + int i; + + for (i=0;i& buf) { + std::vector _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; + std::vector ldim = v._grid->LocalDimensions(); + std::vector cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] }; + const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4]; + // take canonical block cb of v and put it in canonical ordering in buf + std::vector cx0; + getCanonicalBlockOffset(cb,cx0); + +#pragma omp parallel + { + std::vector co0,cl0; + co0=cx0; cl0=cx0; + +#pragma omp for + for (int i=0;i<_nbsc;i++) { + Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo + for (int j=0;j<(int)_bsc.size();j++) + cl0[j] = cx0[j] + co0[j]; + + std::vector l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] }; + int oi = v._grid->oIndex(l0); + int ii = v._grid->iIndex(l0); + int lti = i; + + //if (cb < 2 && i<2) + // std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl; + + for (int s=0;s<4;s++) + for (int c=0;c<3;c++) { + Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii]; + int ti = 12*lti + 3*s + c; + ld = Coeff_t(buf[2*ti+0], buf[2*ti+1]); + } + } + } + } + + void peekBlockOfVectorCanonical(int cb,const Field& v,std::vector& buf) { + std::vector _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; + std::vector ldim = v._grid->LocalDimensions(); + std::vector cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] }; + const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4]; + // take canonical block cb of v and put it in canonical ordering in buf + std::vector cx0; + getCanonicalBlockOffset(cb,cx0); + + buf.resize(_cf_block_size * 2); + +#pragma omp parallel + { + std::vector co0,cl0; + co0=cx0; cl0=cx0; + +#pragma omp for + for (int i=0;i<_nbsc;i++) { + Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo + for (int j=0;j<(int)_bsc.size();j++) + cl0[j] = cx0[j] + co0[j]; + + std::vector l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] }; + int oi = v._grid->oIndex(l0); + int ii = v._grid->iIndex(l0); + int lti = i; + + //if (cb < 2 && i<2) + // std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl; + + for (int s=0;s<4;s++) + for (int c=0;c<3;c++) { + Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii]; + int ti = 12*lti + 3*s + c; + buf[2*ti+0] = ld.real(); + buf[2*ti+1] = ld.imag(); + } + } + } + } + + int globalToLocalCanonicalBlock(int slot,const std::vector& src_nodes,int nb) { + // processor coordinate + int _nd = (int)src_nodes.size(); + std::vector _src_nodes = src_nodes; + std::vector pco(_nd); + Lexicographic::CoorFromIndex(pco,slot,_src_nodes); + std::vector cpco = { pco[1], pco[2], pco[3], pco[4], pco[0] }; + + // get local block + std::vector _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] }; + assert(_nd == 5); + std::vector c_src_local_blocks(_nd); + for (int i=0;i<_nd;i++) { + assert(_grid->_fdimensions[i] % (src_nodes[i] * _bs[i]) == 0); + c_src_local_blocks[(i+4) % 5] = _grid->_fdimensions[i] / src_nodes[i] / _bs[i]; + } + std::vector cbcoor(_nd); // coordinate of block in slot in canonical form + Lexicographic::CoorFromIndex(cbcoor,nb,c_src_local_blocks); + + // cpco, cbcoor + std::vector clbcoor(_nd); + for (int i=0;i<_nd;i++) { + int cgcoor = cpco[i] * c_src_local_blocks[i] + cbcoor[i]; // global block coordinate + int pcoor = cgcoor / _nbc[i]; // processor coordinate in my Grid + int tpcoor = _grid->_processor_coor[(i+1)%5]; + if (pcoor != tpcoor) + return -1; + clbcoor[i] = cgcoor - tpcoor * _nbc[i]; // canonical local block coordinate for canonical dimension i + } + + int lnb; + Lexicographic::IndexFromCoor(clbcoor,lnb,_nbc); + //std::cout << "Mapped slot = " << slot << " nb = " << nb << " to " << lnb << std::endl; + return lnb; + } + + + }; + +} diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h new file mode 100644 index 00000000..e715fc25 --- /dev/null +++ b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h @@ -0,0 +1,163 @@ +namespace Grid { + +template +class BasisFieldVector { + public: + int _Nm; + + typedef typename Field::scalar_type Coeff_t; + typedef typename Field::vector_type vCoeff_t; + typedef typename Field::vector_object vobj; + typedef typename vobj::scalar_object sobj; + + std::vector _v; // _Nfull vectors + + void report(int n,GridBase* value) { + + std::cout << GridLogMessage << "BasisFieldVector allocated:\n"; + std::cout << GridLogMessage << " Delta N = " << n << "\n"; + std::cout << GridLogMessage << " Size of full vectors (size) = " << + ((double)n*sizeof(vobj)*value->oSites() / 1024./1024./1024.) << " GB\n"; + std::cout << GridLogMessage << " Size = " << _v.size() << " Capacity = " << _v.capacity() << std::endl; + + value->Barrier(); + + if (value->IsBoss()) { + system("cat /proc/meminfo"); + } + + value->Barrier(); + + } + + BasisFieldVector(int Nm,GridBase* value) : _Nm(Nm), _v(Nm,value) { + report(Nm,value); + } + + ~BasisFieldVector() { + } + + Field& operator[](int i) { + return _v[i]; + } + + void orthogonalize(Field& w, int k) { + for(int j=0; j& Qt,int j0, int j1, int k0,int k1,int Nm) { + + GridBase* grid = _v[0]._grid; + +#pragma omp parallel + { + std::vector < vobj > B(Nm); + +#pragma omp for + for(int ss=0;ss < grid->oSites();ss++){ + for(int j=j0; j _Nm) + _v.reserve(n); + + _v.resize(n,_v[0]._grid); + + if (n < _Nm) + _v.shrink_to_fit(); + + report(n - _Nm,_v[0]._grid); + + _Nm = n; + } + + std::vector getIndex(std::vector& sort_vals) { + + std::vector idx(sort_vals.size()); + iota(idx.begin(), idx.end(), 0); + + // sort indexes based on comparing values in v + sort(idx.begin(), idx.end(), + [&sort_vals](int i1, int i2) {return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);}); + + return idx; + } + + void reorderInPlace(std::vector& sort_vals, std::vector& idx) { + GridStopWatch gsw; + gsw.Start(); + + int nswaps = 0; + for (size_t i=0;i& sort_vals, bool reverse) { + + std::vector idx = getIndex(sort_vals); + if (reverse) + std::reverse(idx.begin(), idx.end()); + + reorderInPlace(sort_vals,idx); + + } + + void deflate(const std::vector& eval,const Field& src_orig,Field& result) { + result = zero; + int N = (int)_v.size(); + for (int i=0;i step) { + crc = crc32(crc,&data[blk],step); + blk += step; + len -= step; + } + + crc = crc32(crc,&data[blk],len); + return crc; + + } + + static int get_bfm_index( int* pos, int co, int* s ) { + + int ls = s[0]; + int NtHalf = s[4] / 2; + int simd_coor = pos[4] / NtHalf; + int regu_coor = (pos[1] + s[1] * (pos[2] + s[2] * ( pos[3] + s[3] * (pos[4] % NtHalf) ) )) / 2; + + return regu_coor * ls * 48 + pos[0] * 48 + co * 4 + simd_coor * 2; + } + + static void get_read_geometry(const GridBase* _grid,const std::vector& cnodes, + std::map >& slots, + std::vector& slot_lvol, + std::vector& lvol, + int64_t& slot_lsites,int& ntotal) { + + int _nd = (int)cnodes.size(); + std::vector nodes = cnodes; + + slots.clear(); + slot_lvol.clear(); + lvol.clear(); + + int i; + ntotal = 1; + int64_t lsites = 1; + slot_lsites = 1; + for (i=0;i<_nd;i++) { + assert(_grid->_fdimensions[i] % nodes[i] == 0); + slot_lvol.push_back(_grid->_fdimensions[i] / nodes[i]); + lvol.push_back(_grid->_fdimensions[i] / _grid->_processors[i]); + lsites *= lvol.back(); + slot_lsites *= slot_lvol.back(); + ntotal *= nodes[i]; + } + + std::vector lcoor, gcoor, scoor; + lcoor.resize(_nd); gcoor.resize(_nd); scoor.resize(_nd); + + // create mapping of indices to slots + for (int lidx = 0; lidx < lsites; lidx++) { + Lexicographic::CoorFromIndex(lcoor,lidx,lvol); + for (int i=0;i<_nd;i++) { + gcoor[i] = lcoor[i] + _grid->_processor_coor[i]*lvol[i]; + scoor[i] = gcoor[i] / slot_lvol[i]; + } + int slot; + Lexicographic::IndexFromCoor(scoor,slot,nodes); + auto sl = slots.find(slot); + if (sl == slots.end()) + slots[slot] = std::vector(); + slots[slot].push_back(lidx); + } + } + + static void canonical_block_to_coarse_coordinates(GridBase* _coarsegrid,int nb,int& ii,int& oi) { + // canonical nb needs to be mapped in a coordinate on my coarsegrid (ii,io) + std::vector _l = _coarsegrid->LocalDimensions(); + std::vector _cl = { _l[1], _l[2], _l[3], _l[4], _l[0] }; + std::vector _cc(_l.size()); + Lexicographic::CoorFromIndex(_cc,nb,_cl); + std::vector _c = { _cc[4], _cc[0], _cc[1], _cc[2], _cc[3] }; + ii = _coarsegrid->iIndex(_c); + oi = _coarsegrid->oIndex(_c); + } + + template + static bool read_argonne(BasisFieldVector& ret,const char* dir, const std::vector& cnodes) { + + GridBase* _grid = ret._v[0]._grid; + + std::map > slots; + std::vector slot_lvol, lvol; + int64_t slot_lsites; + int ntotal; + get_read_geometry(_grid,cnodes, + slots,slot_lvol,lvol,slot_lsites, + ntotal); + int _nd = (int)lvol.size(); + + // this is slow code to read the argonne file format for debugging purposes + int nperdir = ntotal / 32; + if (nperdir < 1) + nperdir=1; + std::cout << GridLogMessage << " Read " << dir << " nodes = " << cnodes << std::endl; + std::cout << GridLogMessage << " lvol = " << lvol << std::endl; + + // for error messages + char hostname[1024]; + gethostname(hostname, 1024); + + // now load one slot at a time and fill the vector + for (auto sl=slots.begin();sl!=slots.end();sl++) { + std::vector& idx = sl->second; + int slot = sl->first; + std::vector rdata; + + char buf[4096]; + + sprintf(buf,"%s/checksums.txt",dir); printf("read_argonne: Reading from %s\n",buf); + FILE* f = fopen(buf,"rt"); + if (!f) { + fprintf(stderr,"Node %s cannot read %s\n",hostname,buf); fflush(stderr); + return false; + } + + for (int l=0;l<3+slot;l++) + fgets(buf,sizeof(buf),f); + uint32_t crc_exp = strtol(buf, NULL, 16); + fclose(f); + + // load one slot vector + sprintf(buf,"%s/%2.2d/%10.10d",dir,slot/nperdir,slot); + f = fopen(buf,"rb"); + if (!f) { + fprintf(stderr,"Node %s cannot read %s\n",hostname,buf); fflush(stderr); + return false; + } + + fseeko(f,0,SEEK_END); + off_t total_size = ftello(f); + fseeko(f,0,SEEK_SET); + + int64_t size = slot_lsites / 2 * 24*4; + rdata.resize(size); + + assert(total_size % size == 0); + + int _Nfull = total_size / size; + ret._v.resize(_Nfull,ret._v[0]); + ret._Nm = _Nfull; + + uint32_t crc = 0x0; + GridStopWatch gsw,gsw2; + for (int nev = 0;nev < _Nfull;nev++) { + + gsw.Start(); + assert(fread(&rdata[0],size,1,f) == 1); + gsw.Stop(); + + gsw2.Start(); + crc = crc32_threaded((unsigned char*)&rdata[0],size,crc); + gsw2.Stop(); + + for (int i=0;i lcoor, gcoor, scoor, slcoor; + lcoor.resize(_nd); gcoor.resize(_nd); + slcoor.resize(_nd); scoor.resize(_nd); + +#pragma omp for + for (int64_t lidx = 0; lidx < idx.size(); lidx++) { + int llidx = idx[lidx]; + Lexicographic::CoorFromIndex(lcoor,llidx,lvol); + for (int i=0;i<_nd;i++) { + gcoor[i] = lcoor[i] + _grid->_processor_coor[i]*lvol[i]; + scoor[i] = gcoor[i] / slot_lvol[i]; + slcoor[i] = gcoor[i] - scoor[i]*slot_lvol[i]; + } + + if ((lcoor[1]+lcoor[2]+lcoor[3]+lcoor[4]) % 2 == 1) { + // poke + iScalar, 4> > sc; + for (int s=0;s<4;s++) + for (int c=0;c<3;c++) + sc()(s)(c) = *(std::complex*)&rdata[get_bfm_index(&slcoor[0],c+s*3, &slot_lvol[0] )]; + + pokeLocalSite(sc,ret._v[nev],lcoor); + } + + } + } + } + + fclose(f); + std::cout << GridLogMessage << "Loading slot " << slot << " with " << idx.size() << " points and " + << _Nfull << " vectors in " + << gsw.Elapsed() << " at " + << ( (double)size * _Nfull / 1024./1024./1024. / gsw.useconds()*1000.*1000. ) + << " GB/s " << " crc32 = " << std::hex << crc << " crc32_expected = " << crc_exp << std::dec + << " computed at " + << ( (double)size * _Nfull / 1024./1024./1024. / gsw2.useconds()*1000.*1000. ) + << " GB/s " + << std::endl; + + assert(crc == crc_exp); + } + + _grid->Barrier(); + std::cout << GridLogMessage << "Loading complete" << std::endl; + + return true; + } + + template + static bool read_argonne(BasisFieldVector& ret,const char* dir) { + + + GridBase* _grid = ret._v[0]._grid; + + char buf[4096]; + sprintf(buf,"%s/nodes.txt",dir); + FILE* f = fopen(buf,"rt"); + if (!f) { + if (_grid->IsBoss()) { + fprintf(stderr,"Attempting to load eigenvectors without secifying node layout failed due to absence of nodes.txt\n"); + fflush(stderr); + } + return false; + } + + + std::vector nodes((int)_grid->_processors.size()); + for (int i =0;i<(int)_grid->_processors.size();i++) + assert(fscanf(f,"%d\n",&nodes[i])==1); + fclose(f); + + return read_argonne(ret,dir,nodes); + } + + static void flush_bytes(FILE* f, std::vector& fbuf) { + if (fbuf.size()) { + + if (fwrite(&fbuf[0],fbuf.size(),1,f) != 1) { + fprintf(stderr,"Write failed of %g GB!\n",(double)fbuf.size() / 1024./1024./1024.); + exit(2); + } + + fbuf.resize(0); + + } + } + + static void write_bytes(void* buf, int64_t s, FILE* f, std::vector& fbuf, uint32_t& crc) { + static double data_counter = 0.0; + static GridStopWatch gsw_crc, gsw_flush1,gsw_flush2,gsw_write,gsw_memcpy; + if (s == 0) + return; + + // checksum + gsw_crc.Start(); + crc = crc32_threaded((unsigned char*)buf,s,crc); + gsw_crc.Stop(); + + if (s > fbuf.capacity()) { + // cannot buffer this, so first flush current buffer contents and then write this directly to file + gsw_flush1.Start(); + flush_bytes(f,fbuf); + gsw_flush1.Stop(); + + gsw_write.Start(); + if (fwrite(buf,s,1,f) != 1) { + fprintf(stderr,"Write failed of %g GB!\n",(double)s / 1024./1024./1024.); + exit(2); + } + gsw_write.Stop(); + + } + + // no room left in buffer, flush to disk + if (fbuf.size() + s > fbuf.capacity()) { + gsw_flush2.Start(); + flush_bytes(f,fbuf); + gsw_flush2.Stop(); + } + + // then fill buffer again + { + gsw_memcpy.Start(); + size_t t = fbuf.size(); + fbuf.resize(t + s); + memcpy(&fbuf[t],buf,s); + gsw_memcpy.Stop(); + } + + data_counter += (double)s; + if (data_counter > 1024.*1024.*20.) { + std::cout << GridLogMessage << "Writing " << ((double)data_counter / 1024./1024./1024.) << " GB at" + " crc = " << gsw_crc.Elapsed() << " flush1 = " << gsw_flush1.Elapsed() << " flush2 = " << gsw_flush2.Elapsed() << + " write = " << gsw_write.Elapsed() << " memcpy = " << gsw_memcpy.Elapsed() << std::endl; + data_counter = 0.0; + gsw_crc.Reset(); + gsw_write.Reset(); + gsw_memcpy.Reset(); + gsw_flush1.Reset(); + gsw_flush2.Reset(); + } + } + + static void write_floats(FILE* f, std::vector& fbuf, uint32_t& crc, float* buf, int64_t n) { + write_bytes(buf,n*sizeof(float),f,fbuf,crc); + } + + static void read_floats(char* & ptr, float* out, int64_t n) { + float* in = (float*)ptr; + ptr += 4*n; + + for (int64_t i=0;i 0, [0,6] -> 1; reconstruct 0 -> -3, 1-> 3 + // + // N=2 + // [-6,-2] -> 0, [-2,2] -> 1, [2,6] -> 2; reconstruct 0 -> -4, 1->0, 2->4 + int ret = (int) ( (float)(N+1) * ( (in - min) / (max - min) ) ); + if (ret == N+1) { + ret = N; + } + return ret; + } + + static float fp_unmap(int val, float min, float max, int N) { + return min + (float)(val + 0.5) * (max - min) / (float)( N + 1 ); + } + +#define SHRT_UMAX 65535 +#define FP16_BASE 1.4142135623730950488 +#define FP16_COEF_EXP_SHARE_FLOATS 10 + static float unmap_fp16_exp(unsigned short e) { + float de = (float)((int)e - SHRT_UMAX / 2); + return ::pow( FP16_BASE, de ); + } + + // can assume that v >=0 and need to guarantee that unmap_fp16_exp(map_fp16_exp(v)) >= v + static unsigned short map_fp16_exp(float v) { + // float has exponents 10^{-44.85} .. 10^{38.53} + int exp = (int)ceil(::log(v) / ::log(FP16_BASE)) + SHRT_UMAX / 2; + if (exp < 0 || exp > SHRT_UMAX) { + fprintf(stderr,"Error in map_fp16_exp(%g,%d)\n",v,exp); + exit(3); + } + + return (unsigned short)exp; + } + + template + static void read_floats_fp16(char* & ptr, OPT* out, int64_t n, int nsc) { + + int64_t nsites = n / nsc; + if (n % nsc) { + fprintf(stderr,"Invalid size in write_floats_fp16\n"); + exit(4); + } + + unsigned short* in = (unsigned short*)ptr; + ptr += 2*(n+nsites); + + // do for each site + for (int64_t site = 0;site + static void write_floats_fp16(FILE* f, std::vector& fbuf, uint32_t& crc, OPT* in, int64_t n, int nsc) { + + int64_t nsites = n / nsc; + if (n % nsc) { + fprintf(stderr,"Invalid size in write_floats_fp16\n"); + exit(4); + } + + unsigned short* buf = (unsigned short*)malloc( sizeof(short) * (n + nsites) ); + if (!buf) { + fprintf(stderr,"Out of mem\n"); + exit(1); + } + + // do for each site +#pragma omp parallel for + for (int64_t site = 0;site max) + max = fabs(ev[i]); + } + + unsigned short exp = map_fp16_exp(max); + max = unmap_fp16_exp(exp); + min = -max; + + *bptr++ = exp; + + for (int i=0;i SHRT_UMAX) { + fprintf(stderr,"Assert failed: val = %d (%d), ev[i] = %.15g, max = %.15g, exp = %d\n",val,SHRT_UMAX,ev[i],max,(int)exp); + exit(48); + } + *bptr++ = (unsigned short)val; + } + + } + + write_bytes(buf,sizeof(short)*(n + nsites),f,fbuf,crc); + + free(buf); + } + + template + static bool read_compressed_vectors(const char* dir,BlockProjector& pr,BasisFieldVector& coef, int ngroups = 1) { + + const BasisFieldVector& basis = pr._evec; + GridBase* _grid = basis._v[0]._grid; + + // for error messages + char hostname[1024]; + gethostname(hostname, 1024); + + std::cout << GridLogMessage << "Ready on host " << hostname << " with " << ngroups << " reader groups" << std::endl; + + // first read metadata + char buf[4096]; + sprintf(buf,"%s/metadata.txt",dir); + + std::vector s,b,nb,nn,crc32; + s.resize(5); b.resize(5); nb.resize(5); nn.resize(5); + uint32_t neig, nkeep, nkeep_single, blocks, _FP16_COEF_EXP_SHARE_FLOATS; + uint32_t nprocessors = 1; + + FILE* f = 0; + uint32_t status = 0; + if (_grid->IsBoss()) { + f = fopen(buf,"rb"); + status=f ? 1 : 0; + } + _grid->GlobalSum(status); + std::cout << GridLogMessage << "Read params status " << status << std::endl; + + if (!status) { + return false; + } + +#define _IRL_READ_INT(buf,p) if (f) { assert(fscanf(f,buf,p)==1); } else { *(p) = 0; } _grid->GlobalSum(*(p)); + + for (int i=0;i<5;i++) { + sprintf(buf,"s[%d] = %%d\n",i); + _IRL_READ_INT(buf,&s[(i+1)%5]); + } + for (int i=0;i<5;i++) { + sprintf(buf,"b[%d] = %%d\n",i); + _IRL_READ_INT(buf,&b[(i+1)%5]); + } + for (int i=0;i<5;i++) { + sprintf(buf,"nb[%d] = %%d\n",i); + _IRL_READ_INT(buf,&nb[(i+1)%5]); + } + _IRL_READ_INT("neig = %d\n",&neig); + _IRL_READ_INT("nkeep = %d\n",&nkeep); + _IRL_READ_INT("nkeep_single = %d\n",&nkeep_single); + _IRL_READ_INT("blocks = %d\n",&blocks); + _IRL_READ_INT("FP16_COEF_EXP_SHARE_FLOATS = %d\n",&_FP16_COEF_EXP_SHARE_FLOATS); + + for (int i=0;i<5;i++) { + assert(_grid->FullDimensions()[i] % s[i] == 0); + nn[i] = _grid->FullDimensions()[i] / s[i]; + nprocessors *= nn[i]; + } + + std::cout << GridLogMessage << "Reading data that was generated on node-layout " << nn << std::endl; + + crc32.resize(nprocessors); + for (int i =0;i > slots; + std::vector slot_lvol, lvol; + int64_t slot_lsites; + int ntotal; + std::vector _nn(nn.begin(),nn.end()); + get_read_geometry(_grid,_nn, + slots,slot_lvol,lvol,slot_lsites, + ntotal); + int _nd = (int)lvol.size(); + + // types + typedef typename Field::scalar_type Coeff_t; + typedef typename CoarseField::scalar_type CoeffCoarse_t; + + // slot layout + int nperdir = ntotal / 32; + if (nperdir < 1) + nperdir=1; + + // add read groups + for (int ngroup=0;ngroupThisRank() % ngroups == ngroup; + + std::cout << GridLogMessage << "Reading in group " << ngroup << " / " << ngroups << std::endl; + + // load all necessary slots and store them appropriately + for (auto sl=slots.begin();sl!=slots.end();sl++) { + + std::vector& idx = sl->second; + int slot = sl->first; + std::vector rdata; + + char buf[4096]; + + if (action) { + // load one slot vector + sprintf(buf,"%s/%2.2d/%10.10d.compressed",dir,slot/nperdir,slot); + f = fopen(buf,"rb"); + if (!f) { + fprintf(stderr,"Node %s cannot read %s\n",hostname,buf); fflush(stderr); + return false; + } + } + + uint32_t crc = 0x0; + off_t size; + + GridStopWatch gsw; + _grid->Barrier(); + gsw.Start(); + + std::vector raw_in(0); + if (action) { + fseeko(f,0,SEEK_END); + size = ftello(f); + fseeko(f,0,SEEK_SET); + + raw_in.resize(size); + assert(fread(&raw_in[0],size,1,f) == 1); + } + + _grid->Barrier(); + gsw.Stop(); + + RealD totalGB = (RealD)size / 1024./1024./1024 * _grid->_Nprocessors; + RealD seconds = gsw.useconds() / 1e6; + + if (action) { + std::cout << GridLogMessage << "[" << slot << "] Read " << totalGB << " GB of compressed data at " << totalGB/seconds << " GB/s" << std::endl; + + uint32_t crc_comp = crc32_threaded((unsigned char*)&raw_in[0],size,0); + + if (crc_comp != crc32[slot]) { + std::cout << "Node " << hostname << " found crc mismatch for file " << buf << " (" << std::hex << crc_comp << " vs " << crc32[slot] << std::dec << ")" << std::endl; + std::cout << "Byte size: " << size << std::endl; + } + + assert(crc_comp == crc32[slot]); + } + + _grid->Barrier(); + + if (action) { + fclose(f); + } + + char* ptr = &raw_in[0]; + + GridStopWatch gsw2; + gsw2.Start(); + if (action) { + int nsingleCap = nkeep_single; + if (pr._evec.size() < nsingleCap) + nsingleCap = pr._evec.size(); + + int _cf_block_size = slot_lsites * 12 / 2 / blocks; + +#define FP_16_SIZE(a,b) (( (a) + (a/b) )*2) + + // first read single precision basis vectors +#pragma omp parallel + { + std::vector buf(_cf_block_size * 2); +#pragma omp for + for (int nb=0;nb buf(_cf_block_size * 2); +#pragma omp for + for (int nb=0;nb buf1(nkeep_single*2); + std::vector buf2((nkeep - nkeep_single)*2); + +#pragma omp for + for (int j=0;j<(int)coef.size();j++) + for (int nb=0;nb + static void write_compressed_vectors(const char* dir,const BlockProjector& pr, + const BasisFieldVector& coef, + int nsingle,int writer_nodes = 0) { + + GridStopWatch gsw; + + const BasisFieldVector& basis = pr._evec; + GridBase* _grid = basis._v[0]._grid; + std::vector _l = _grid->FullDimensions(); + for (int i=0;i<(int)_l.size();i++) + _l[i] /= _grid->_processors[i]; + + _grid->Barrier(); + gsw.Start(); + + char buf[4096]; + + // Making the directories is somewhat tricky. + // If we run on a joint filesystem we would just + // have the boss create the directories and then + // have a barrier. We also want to be able to run + // on local /scratch, so potentially all nodes need + // to create their own directories. So do the following + // for now. + for (int j=0;j<_grid->_Nprocessors;j++) { + if (j == _grid->ThisRank()) { + conditionalMkDir(dir); + for (int i=0;i<32;i++) { + sprintf(buf,"%s/%2.2d",dir,i); + conditionalMkDir(buf); + } + _grid->Barrier(); // make sure directories are ready + } + } + + + typedef typename Field::scalar_type Coeff_t; + typedef typename CoarseField::scalar_type CoeffCoarse_t; + + int nperdir = _grid->_Nprocessors / 32; + if (nperdir < 1) + nperdir=1; + + int slot; + Lexicographic::IndexFromCoor(_grid->_processor_coor,slot,_grid->_processors); + + int64_t off = 0x0; + uint32_t crc = 0x0; + if (writer_nodes < 1) + writer_nodes = _grid->_Nprocessors; + int groups = _grid->_Nprocessors / writer_nodes; + if (groups<1) + groups = 1; + + std::cout << GridLogMessage << " Write " << dir << " nodes = " << writer_nodes << std::endl; + + for (int group=0;groupBarrier(); + if (_grid->ThisRank() % groups == group) { + + sprintf(buf,"%s/%2.2d/%10.10d.compressed",dir,slot/nperdir,slot); + FILE* f = fopen(buf,"wb"); + assert(f); + + //buffer does not seem to help + //assert(!setvbuf ( f , NULL , _IOFBF , 1024*1024*2 )); + + int nsingleCap = nsingle; + if (pr._evec.size() < nsingleCap) + nsingleCap = pr._evec.size(); + + GridStopWatch gsw1,gsw2,gsw3,gsw4,gsw5; + + gsw1.Start(); + + std::vector fbuf; + fbuf.reserve( 1024 * 1024 * 8 ); + + // first write single precision basis vectors + for (int nb=0;nb buf; + pr._bgrid.peekBlockOfVectorCanonical(nb,pr._evec._v[i],buf); + +#if 0 + { + RealD nrm = 0.0; + for (int j=0;j<(int)buf.size();j++) + nrm += buf[j]*buf[j]; + std::cout << GridLogMessage << "Norm: " << nrm << std::endl; + } +#endif + write_floats(f,fbuf,crc, &buf[0], buf.size() ); + } + } + + gsw1.Stop(); + gsw2.Start(); + + // then write fixed precision basis vectors + for (int nb=0;nb buf; + pr._bgrid.peekBlockOfVectorCanonical(nb,pr._evec._v[i],buf); + write_floats_fp16(f,fbuf,crc, &buf[0], buf.size(), 24); + } + } + + gsw2.Stop(); + assert(coef._v[0]._grid->_isites*coef._v[0]._grid->_osites == pr._bgrid._blocks); + + gsw3.Start(); + for (int j=0;j<(int)coef.size();j++) { + + int64_t size1 = nsingleCap*2; + int64_t size2 = 2*(pr._evec.size()-nsingleCap); + int64_t size = size1; + if (size2>size) + size=size2; + std::vector buf(size); + + //RealD nrmTest = 0.0; + for (int nb=0;nbGlobalSum(nrmTest); + //std::cout << GridLogMessage << "Test norm: " << nrmTest << std::endl; + } + gsw3.Stop(); + + flush_bytes(f,fbuf); + + off = ftello(f); + fclose(f); + + std::cout<Barrier(); + gsw.Stop(); + + RealD totalGB = (RealD)off / 1024./1024./1024 * _grid->_Nprocessors; + RealD seconds = gsw.useconds() / 1e6; + std::cout << GridLogMessage << "Write " << totalGB << " GB of compressed data at " << totalGB/seconds << " GB/s in " << seconds << " s" << std::endl; + + // gather crcs + std::vector crcs(_grid->_Nprocessors); + for (int i=0;i<_grid->_Nprocessors;i++) { + crcs[i] = 0x0; + } + crcs[slot] = crc; + for (int i=0;i<_grid->_Nprocessors;i++) { + _grid->GlobalSum(crcs[i]); + } + + if (_grid->IsBoss()) { + sprintf(buf,"%s/metadata.txt",dir); + FILE* f = fopen(buf,"wb"); + assert(f); + for (int i=0;i<5;i++) + fprintf(f,"s[%d] = %d\n",i,_grid->FullDimensions()[(i+1)%5] / _grid->_processors[(i+1)%5]); + for (int i=0;i<5;i++) + fprintf(f,"b[%d] = %d\n",i,pr._bgrid._bs[(i+1)%5]); + for (int i=0;i<5;i++) + fprintf(f,"nb[%d] = %d\n",i,pr._bgrid._nb[(i+1)%5]); + fprintf(f,"neig = %d\n",(int)coef.size()); + fprintf(f,"nkeep = %d\n",(int)pr._evec.size()); + fprintf(f,"nkeep_single = %d\n",nsingle); + fprintf(f,"blocks = %d\n",pr._bgrid._blocks); + fprintf(f,"FP16_COEF_EXP_SHARE_FLOATS = %d\n",FP16_COEF_EXP_SHARE_FLOATS); + for (int i =0;i<_grid->_Nprocessors;i++) + fprintf(f,"crc32[%d] = %X\n",i,crcs[i]); + fclose(f); + } + + } + + template + static void write_argonne(const BasisFieldVector& ret,const char* dir) { + + GridBase* _grid = ret._v[0]._grid; + std::vector _l = _grid->FullDimensions(); + for (int i=0;i<(int)_l.size();i++) + _l[i] /= _grid->_processors[i]; + + char buf[4096]; + + if (_grid->IsBoss()) { + mkdir(dir,ACCESSPERMS); + + for (int i=0;i<32;i++) { + sprintf(buf,"%s/%2.2d",dir,i); + mkdir(buf,ACCESSPERMS); + } + } + + _grid->Barrier(); // make sure directories are ready + + + int nperdir = _grid->_Nprocessors / 32; + if (nperdir < 1) + nperdir=1; + std::cout << GridLogMessage << " Write " << dir << " nodes = " << _grid->_Nprocessors << std::endl; + + int slot; + Lexicographic::IndexFromCoor(_grid->_processor_coor,slot,_grid->_processors); + //printf("Slot: %d <> %d\n",slot, _grid->ThisRank()); + + sprintf(buf,"%s/%2.2d/%10.10d",dir,slot/nperdir,slot); + FILE* f = fopen(buf,"wb"); + assert(f); + + int N = (int)ret._v.size(); + uint32_t crc = 0x0; + int64_t cf_size = _grid->oSites()*_grid->iSites()*12; + std::vector< float > rdata(cf_size*2); + + GridStopWatch gsw1,gsw2; + + for (int i=0;i coor(_l.size()); + for (coor[1] = 0;coor[1]<_l[1];coor[1]++) { + for (coor[2] = 0;coor[2]<_l[2];coor[2]++) { + for (coor[3] = 0;coor[3]<_l[3];coor[3]++) { + for (coor[4] = 0;coor[4]<_l[4];coor[4]++) { + for (coor[0] = 0;coor[0]<_l[0];coor[0]++) { + + if ((coor[1]+coor[2]+coor[3]+coor[4]) % 2 == 1) { + // peek + iScalar, 4> > sc; + peekLocalSite(sc,ret._v[i],coor); + for (int s=0;s<4;s++) + for (int c=0;c<3;c++) + *(std::complex*)&rdata[get_bfm_index(&coor[0],c+s*3, &_l[0] )] = sc()(s)(c); + } + } + } + } + } + } + + // endian flip + for (int i=0;i crcs(_grid->_Nprocessors); + for (int i=0;i<_grid->_Nprocessors;i++) { + crcs[i] = 0x0; + } + crcs[slot] = crc; + for (int i=0;i<_grid->_Nprocessors;i++) { + _grid->GlobalSum(crcs[i]); + } + + if (_grid->IsBoss()) { + sprintf(buf,"%s/checksums.txt",dir); + FILE* f = fopen(buf,"wt"); + assert(f); + fprintf(f,"00000000\n\n"); + for (int i =0;i<_grid->_Nprocessors;i++) + fprintf(f,"%X\n",crcs[i]); + fclose(f); + + sprintf(buf,"%s/nodes.txt",dir); + f = fopen(buf,"wt"); + assert(f); + for (int i =0;i<(int)_grid->_processors.size();i++) + fprintf(f,"%d\n",_grid->_processors[i]); + fclose(f); + } + + + std::cout << GridLogMessage << "Writing slot " << slot << " with " + << N << " vectors in " + << gsw2.Elapsed() << " at " + << ( (double)cf_size*2*4 * N / 1024./1024./1024. / gsw2.useconds()*1000.*1000. ) + << " GB/s with crc computed at " + << ( (double)cf_size*2*4 * N / 1024./1024./1024. / gsw1.useconds()*1000.*1000. ) + << " GB/s " + << std::endl; + + _grid->Barrier(); + std::cout << GridLogMessage << "Writing complete" << std::endl; + + } + } + +} diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermion.cc b/lib/qcd/action/fermion/DomainWallEOFAFermion.cc index dd8a500d..37ab5fa6 100644 --- a/lib/qcd/action/fermion/DomainWallEOFAFermion.cc +++ b/lib/qcd/action/fermion/DomainWallEOFAFermion.cc @@ -61,10 +61,10 @@ namespace QCD { } /*************************************************************** - /* Additional EOFA operators only called outside the inverter. - /* Since speed is not essential, simple axpby-style - /* implementations should be fine. - /***************************************************************/ + * Additional EOFA operators only called outside the inverter. + * Since speed is not essential, simple axpby-style + * implementations should be fine. + ***************************************************************/ template void DomainWallEOFAFermion::Omega(const FermionField& psi, FermionField& Din, int sign, int dag) { @@ -116,8 +116,8 @@ namespace QCD { } /******************************************************************** - /* Performance critical fermion operators called inside the inverter - /********************************************************************/ + * Performance critical fermion operators called inside the inverter + ********************************************************************/ template void DomainWallEOFAFermion::M5D(const FermionField& psi, FermionField& chi) diff --git a/lib/qcd/action/fermion/MobiusEOFAFermion.cc b/lib/qcd/action/fermion/MobiusEOFAFermion.cc index 085fa988..0344afbf 100644 --- a/lib/qcd/action/fermion/MobiusEOFAFermion.cc +++ b/lib/qcd/action/fermion/MobiusEOFAFermion.cc @@ -77,11 +77,11 @@ namespace QCD { } } - /*************************************************************** - /* Additional EOFA operators only called outside the inverter. - /* Since speed is not essential, simple axpby-style - /* implementations should be fine. - /***************************************************************/ + /**************************************************************** + * Additional EOFA operators only called outside the inverter. + * Since speed is not essential, simple axpby-style + * implementations should be fine. + ***************************************************************/ template void MobiusEOFAFermion::Omega(const FermionField& psi, FermionField& Din, int sign, int dag) { @@ -194,8 +194,8 @@ namespace QCD { } /******************************************************************** - /* Performance critical fermion operators called inside the inverter - /********************************************************************/ + * Performance critical fermion operators called inside the inverter + ********************************************************************/ template void MobiusEOFAFermion::M5D(const FermionField& psi, FermionField& chi) diff --git a/tests/solver/Params.h b/tests/solver/Params.h new file mode 100644 index 00000000..d9a6d3b3 --- /dev/null +++ b/tests/solver/Params.h @@ -0,0 +1,136 @@ +/* + Params IO + + Author: Christoph Lehner + Date: 2017 +*/ + +#define PADD(p,X) p.get(#X,X); + +class Params { + protected: + + std::string trim(const std::string& sc) { + std::string s = sc; + s.erase(s.begin(), std::find_if(s.begin(), s.end(), + std::not1(std::ptr_fun(std::isspace)))); + s.erase(std::find_if(s.rbegin(), s.rend(), + std::not1(std::ptr_fun(std::isspace))).base(), s.end()); + return s; + } + + public: + + std::map< std::string, std::string > lines; + std::string _fn; + + Params(const char* fn) : _fn(fn) { + FILE* f = fopen(fn,"rt"); + assert(f); + while (!feof(f)) { + char buf[4096]; + if (fgets(buf,sizeof(buf),f)) { + if (buf[0] != '#' && buf[0] != '\r' && buf[0] != '\n') { + char* sep = strchr(buf,'='); + assert(sep); + *sep = '\0'; + lines[trim(buf)] = trim(sep+1); + } + } + } + fclose(f); + } + + ~Params() { + } + + std::string loghead() { + return _fn + ": "; + } + + bool has(const char* name) { + auto f = lines.find(name); + return (f != lines.end()); + } + + const std::string& get(const char* name) { + auto f = lines.find(name); + if (f == lines.end()) { + std::cout << Grid::GridLogMessage << loghead() << "Could not find value for " << name << std::endl; + abort(); + } + return f->second; + } + + void parse(std::string& s, const std::string& cval) { + std::stringstream trimmer; + trimmer << cval; + s.clear(); + trimmer >> s; + } + + void parse(int& i, const std::string& cval) { + assert(sscanf(cval.c_str(),"%d",&i)==1); + } + + void parse(long long& i, const std::string& cval) { + assert(sscanf(cval.c_str(),"%lld",&i)==1); + } + + void parse(double& f, const std::string& cval) { + assert(sscanf(cval.c_str(),"%lf",&f)==1); + } + + void parse(float& f, const std::string& cval) { + assert(sscanf(cval.c_str(),"%f",&f)==1); + } + + void parse(bool& b, const std::string& cval) { + std::string lcval = cval; + std::transform(lcval.begin(), lcval.end(), lcval.begin(), ::tolower); + if (lcval == "true" || lcval == "yes") { + b = true; + } else if (lcval == "false" || lcval == "no") { + b = false; + } else { + std::cout << "Invalid value for boolean: " << b << std::endl; + assert(0); + } + } + + void parse(std::complex& f, const std::string& cval) { + double r,i; + assert(sscanf(cval.c_str(),"%lf %lf",&r,&i)==2); + f = std::complex(r,i); + } + + void parse(std::complex& f, const std::string& cval) { + float r,i; + assert(sscanf(cval.c_str(),"%f %f",&r,&i)==2); + f = std::complex(r,i); + } + + template + void get(const char* name, std::vector& v) { + int i = 0; + v.resize(0); + while (true) { + char buf[4096]; + sprintf(buf,"%s[%d]",name,i++); + if (!has(buf)) + break; + T val; + parse(val,get(buf)); + std::cout << Grid::GridLogMessage << loghead() << "Set " << buf << " to " << val << std::endl; + v.push_back(val); + } + } + + template + void get(const char* name, T& f) { + parse(f,get(name)); + std::cout << Grid::GridLogMessage << loghead() << "Set " << name << " to " << f << std::endl; + } + + +}; diff --git a/tests/solver/Test_dwf_compressed_lanczos.cc b/tests/solver/Test_dwf_compressed_lanczos.cc new file mode 100644 index 00000000..b42a2d55 --- /dev/null +++ b/tests/solver/Test_dwf_compressed_lanczos.cc @@ -0,0 +1,727 @@ +/* + Authors: Christoph Lehner + Date: 2017 + + Multigrid Lanczos + + + + TODO: + + High priority: + - Explore filtering of starting vector again, should really work: If cheby has 4 for low mode region and 1 for high mode, applying 15 iterations has 1e9 suppression + of high modes, which should create the desired invariant subspace already? Missing something here??? Maybe dynamic range dangerous, i.e., could also kill interesting + eigenrange if not careful. + + Better: Use all Cheby up to order N in order to approximate a step function; try this! Problem: width of step function. Can kill eigenspace > 1e-3 and have < 1e-5 equal + to 1 + + Low priority: + - Given that I seem to need many restarts and high degree poly to create the base and this takes about 1 day, seriously consider a simple method to create a basis + (ortho krylov low poly); and then fix up lowest say 200 eigenvalues by 1 run with high-degree poly (600 could be enough) +*/ +#include +#include "Params.h" + +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +bool read_evals(GridBase* _grid, char* fn, std::vector& evals) { + + FILE* f = 0; + uint32_t status = 0; + if (_grid->IsBoss()) { + f = fopen(fn,"rt"); + status = f ? 1 : 0; + } + _grid->GlobalSum(status); + + if (!status) + return false; + + uint32_t N; + if (f) + assert(fscanf(f,"%d\n",&N)==1); + else + N = 0; + _grid->GlobalSum(N); + + std::cout << "Reading " << N << " eigenvalues" << std::endl; + + evals.resize(N); + + for (int i=0;iGlobalSumVector(&evals[0],evals.size()); + + if (f) + fclose(f); + return true; +} + +void write_evals(char* fn, std::vector& evals) { + FILE* f = fopen(fn,"wt"); + assert(f); + + int N = (int)evals.size(); + fprintf(f,"%d\n",N); + + for (int i=0;i& hist) { + FILE* f = fopen(fn,"wt"); + assert(f); + + int N = (int)hist.size(); + for (int i=0;i +class FunctionHermOp : public LinearFunction { +public: + OperatorFunction & _poly; + LinearOperatorBase &_Linop; + + FunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop) : _poly(poly), _Linop(linop) { + } + + void operator()(const Field& in, Field& out) { + _poly(_Linop,in,out); + } +}; + +template +class CheckpointedLinearFunction : public LinearFunction { +public: + LinearFunction& _op; + std::string _dir; + int _max_apply; + int _apply, _apply_actual; + GridBase* _grid; + FILE* _f; + + CheckpointedLinearFunction(GridBase* grid, LinearFunction& op, const char* dir,int max_apply) : _op(op), _dir(dir), _grid(grid), _f(0), + _max_apply(max_apply), _apply(0), _apply_actual(0) { + + FieldVectorIO::conditionalMkDir(dir); + + char fn[4096]; + sprintf(fn,"%s/ckpt_op.%4.4d",_dir.c_str(),_grid->ThisRank()); + printf("CheckpointLinearFunction:: file %s\n",fn); + _f = fopen(fn,"r+b"); + if (!_f) + _f = fopen(fn,"w+b"); + assert(_f); + fseek(_f,0,SEEK_CUR); + + } + + ~CheckpointedLinearFunction() { + if (_f) { + fclose(_f); + _f = 0; + } + } + + bool load_ckpt(const Field& in, Field& out) { + + off_t cur = ftello(_f); + fseeko(_f,0,SEEK_END); + if (cur == ftello(_f)) + return false; + fseeko(_f,cur,SEEK_SET); + + size_t sz = sizeof(out._odata[0]) * out._odata.size(); + + GridStopWatch gsw; + gsw.Start(); + uint32_t crc_exp; + assert(fread(&crc_exp,4,1,_f)==1); + assert(fread(&out._odata[0],sz,1,_f)==1); + assert(FieldVectorIO::crc32_threaded((unsigned char*)&out._odata[0],sz,0x0)==crc_exp); + gsw.Stop(); + + printf("CheckpointLinearFunction:: reading %lld\n",(long long)sz); + std::cout << GridLogMessage << "Loading " << ((RealD)sz/1024./1024./1024.) << " GB in " << gsw.Elapsed() << std::endl; + return true; + } + + void save_ckpt(const Field& in, Field& out) { + + fseek(_f,0,SEEK_CUR); // switch to write + + size_t sz = sizeof(out._odata[0]) * out._odata.size(); + + GridStopWatch gsw; + gsw.Start(); + uint32_t crc = FieldVectorIO::crc32_threaded((unsigned char*)&out._odata[0],sz,0x0); + assert(fwrite(&crc,4,1,_f)==1); + assert(fwrite(&out._odata[0],sz,1,_f)==1); + fflush(_f); // try this on the GPFS to suppress OPA usage for disk during dslash; this is not needed at Lustre/JLAB + gsw.Stop(); + + printf("CheckpointLinearFunction:: writing %lld\n",(long long)sz); + std::cout << GridLogMessage << "Saving " << ((RealD)sz/1024./1024./1024.) << " GB in " << gsw.Elapsed() << std::endl; + } + + void operator()(const Field& in, Field& out) { + + _apply++; + + if (load_ckpt(in,out)) + return; + + _op(in,out); + + save_ckpt(in,out); + + if (_apply_actual++ >= _max_apply) { + std::cout << GridLogMessage << "Maximum application of operator reached, checkpoint and finish in future job" << std::endl; + if (_f) { fclose(_f); _f=0; } + in._grid->Barrier(); + Grid_finalize(); + exit(3); + } + } +}; + +template +class ProjectedFunctionHermOp : public LinearFunction { +public: + OperatorFunction & _poly; + LinearOperatorBase &_Linop; + BlockProjector& _pr; + + ProjectedFunctionHermOp(BlockProjector& pr,OperatorFunction & poly,LinearOperatorBase& linop) : _poly(poly), _Linop(linop), _pr(pr) { + } + + void operator()(const CoarseField& in, CoarseField& out) { + assert(_pr._bgrid._o_blocks == in._grid->oSites()); + + Field fin(_pr._bgrid._grid); + Field fout(_pr._bgrid._grid); + + GridStopWatch gsw1,gsw2,gsw3; + // fill fin + gsw1.Start(); + _pr.coarseToFine(in,fin); + gsw1.Stop(); + + // apply poly + gsw2.Start(); + _poly(_Linop,fin,fout); + gsw2.Stop(); + + // fill out + gsw3.Start(); + _pr.fineToCoarse(fout,out); + gsw3.Stop(); + + auto eps = innerProduct(in,out); + std::cout << GridLogMessage << "Operator timing details: c2f = " << gsw1.Elapsed() << " poly = " << gsw2.Elapsed() << " f2c = " << gsw3.Elapsed() << + " Complimentary Hermiticity check: " << eps.imag() / std::abs(eps) << std::endl; + + } +}; + +template +class ProjectedHermOp : public LinearFunction { +public: + LinearOperatorBase &_Linop; + BlockProjector& _pr; + + ProjectedHermOp(BlockProjector& pr,LinearOperatorBase& linop) : _Linop(linop), _pr(pr) { + } + + void operator()(const CoarseField& in, CoarseField& out) { + assert(_pr._bgrid._o_blocks == in._grid->oSites()); + Field fin(_pr._bgrid._grid); + Field fout(_pr._bgrid._grid); + _pr.coarseToFine(in,fin); + _Linop.HermOp(fin,fout); + _pr.fineToCoarse(fout,out); + + } +}; + +template +class PlainHermOp : public LinearFunction { +public: + LinearOperatorBase &_Linop; + + PlainHermOp(LinearOperatorBase& linop) : _Linop(linop) { + } + + void operator()(const Field& in, Field& out) { + _Linop.HermOp(in,out); + } +}; + +template using CoarseSiteFieldGeneral = iScalar< iVector >; +template using CoarseSiteFieldD = CoarseSiteFieldGeneral< vComplexD, N >; +template using CoarseSiteFieldF = CoarseSiteFieldGeneral< vComplexF, N >; +template using CoarseSiteField = CoarseSiteFieldGeneral< vComplex, N >; +template using CoarseLatticeFermion = Lattice< CoarseSiteField >; +template using CoarseLatticeFermionD = Lattice< CoarseSiteFieldD >; + +template +void CoarseGridLanczos(BlockProjector& pr,RealD alpha2,RealD beta,int Npoly2, + int Nstop2,int Nk2,int Nm2,RealD resid2,RealD betastp2,int MaxIt,int MinRes2, + LinearOperatorBase& HermOp, std::vector& eval1, bool cg_test_enabled, + int cg_test_maxiter,int nsingle,int SkipTest2, int MaxApply2,bool smoothed_eval_enabled, + int smoothed_eval_inner,int smoothed_eval_outer,int smoothed_eval_begin, + int smoothed_eval_end,RealD smoothed_eval_inner_resid) { + + BlockedGrid& bgrid = pr._bgrid; + BasisFieldVector& basis = pr._evec; + + + std::vector coarseFourDimLatt; + for (int i=0;i<4;i++) + coarseFourDimLatt.push_back(bgrid._nb[1+i] * bgrid._grid->_processors[1+i]); + assert(bgrid._grid->_processors[0] == 1); + + std::cout << GridLogMessage << "CoarseGrid = " << coarseFourDimLatt << " with basis = " << Nstop1 << std::endl; + GridCartesian * UCoarseGrid = SpaceTimeGrid::makeFourDimGrid(coarseFourDimLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridCartesian * FCoarseGrid = SpaceTimeGrid::makeFiveDimGrid(bgrid._nb[0],UCoarseGrid); + + Chebyshev Cheb2(alpha2,beta,Npoly2); + CoarseLatticeFermion src_coarse(FCoarseGrid); + + // Second round of Lanczos in blocked space + std::vector eval2(Nm2); + std::vector eval3(Nm2); + BasisFieldVector > coef(Nm2,FCoarseGrid); + + ProjectedFunctionHermOp,LatticeFermion> Op2plain(pr,Cheb2,HermOp); + CheckpointedLinearFunction > Op2ckpt(src_coarse._grid,Op2plain,"checkpoint",MaxApply2); + LinearFunction< CoarseLatticeFermion >* Op2; + if (MaxApply2) { + Op2 = &Op2ckpt; + } else { + Op2 = &Op2plain; + } + ProjectedHermOp,LatticeFermion> Op2nopoly(pr,HermOp); + BlockImplicitlyRestartedLanczos > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2); + + + src_coarse = 1.0; + + // Precision test + { + Field tmp(bgrid._grid); + CoarseLatticeFermion tmp2(FCoarseGrid); + CoarseLatticeFermion tmp3(FCoarseGrid); + tmp2 = 1.0; + tmp3 = 1.0; + + pr.coarseToFine(tmp2,tmp); + pr.fineToCoarse(tmp,tmp2); + + tmp2 -= tmp3; + std::cout << GridLogMessage << "Precision Test c->f->c: " << norm2(tmp2) / norm2(tmp3) << std::endl; + + //bgrid._grid->Barrier(); + //return; + } + + int Nconv; + if (!FieldVectorIO::read_compressed_vectors("lanczos.output",pr,coef) || + !read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt",eval3) || + !read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt.linear",eval1) || + !read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt.poly",eval2) + ) { + + + IRL2.calc(eval2,coef,src_coarse,Nconv,true,SkipTest2); + + coef.resize(Nstop2); + eval2.resize(Nstop2); + eval3.resize(Nstop2); + + std::vector step3_cache; + + // reconstruct eigenvalues of original operator + for (int i=0;iIsBoss()) { + write_evals((char *)"lanczos.output/eigen-values.txt",eval3); + write_evals((char *)"lanczos.output/eigen-values.txt.linear",eval1); + write_evals((char *)"lanczos.output/eigen-values.txt.poly",eval2); + } + + } + + // fix up eigenvalues + if (!read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt.smoothed",eval3) && smoothed_eval_enabled) { + + ConjugateGradient CG(smoothed_eval_inner_resid, smoothed_eval_inner, false); + + LatticeFermion v_i(basis[0]._grid); + auto tmp = v_i; + auto tmp2 = v_i; + + for (int i=smoothed_eval_begin;iIsBoss()) { + write_evals((char *)"lanczos.output/eigen-values.txt.smoothed",eval3); + write_evals((char *)"lanczos.output/eigen-values.txt",eval3); // also reset this to the best ones we have available + } + } + + // do CG test with and without deflation + if (cg_test_enabled) { + ConjugateGradient CG(1.0e-8, cg_test_maxiter, false); + LatticeFermion src_orig(bgrid._grid); + src_orig.checkerboard = Odd; + src_orig = 1.0; + src_orig = src_orig * (1.0 / ::sqrt(norm2(src_orig)) ); + auto result = src_orig; + + // undeflated solve + result = zero; + CG(HermOp, src_orig, result); + // if (UCoarseGrid->IsBoss()) + // write_history("cg_test.undefl",CG.ResHistory); + // CG.ResHistory.clear(); + + // deflated solve with all eigenvectors + result = zero; + pr.deflate(coef,eval2,Nstop2,src_orig,result); + CG(HermOp, src_orig, result); + // if (UCoarseGrid->IsBoss()) + // write_history("cg_test.defl_all",CG.ResHistory); + // CG.ResHistory.clear(); + + // deflated solve with non-blocked eigenvectors + result = zero; + pr.deflate(coef,eval1,Nstop1,src_orig,result); + CG(HermOp, src_orig, result); + // if (UCoarseGrid->IsBoss()) + // write_history("cg_test.defl_full",CG.ResHistory); + // CG.ResHistory.clear(); + + // deflated solve with all eigenvectors and original eigenvalues from proj + result = zero; + pr.deflate(coef,eval3,Nstop2,src_orig,result); + CG(HermOp, src_orig, result); + // if (UCoarseGrid->IsBoss()) + // write_history("cg_test.defl_all_ev3",CG.ResHistory); + // CG.ResHistory.clear(); + + } + +} + + +template +void quick_krylov_basis(BasisFieldVector& evec,Field& src,LinearFunction& Op,int Nstop) { + Field tmp = src; + Field tmp2 = tmp; + + for (int i=0;i HermOp(Ddwf); + + // Eigenvector storage + const int Nm1 = Np1 + Nk1; + const int Nm2 = Np2 + Nk2; // maximum number of vectors we need to keep + std::cout << GridLogMessage << "Keep " << Nm1 << " full vectors" << std::endl; + std::cout << GridLogMessage << "Keep " << Nm2 << " total vectors" << std::endl; + assert(Nm2 >= Nm1); + BasisFieldVector evec(Nm1,FrbGrid); // start off with keeping full vectors + + // First and second cheby + Chebyshev Cheb1(alpha1,beta,Npoly1); + FunctionHermOp Op1(Cheb1,HermOp); + PlainHermOp Op1test(HermOp); + + // Eigenvalue storage + std::vector eval1(evec.size()); + + // Construct source vector + LatticeFermion src(FrbGrid); + { + src=1.0; + src.checkerboard = Odd; + + // normalize + RealD nn = norm2(src); + nn = Grid::sqrt(nn); + src = src * (1.0/nn); + } + + // Do a benchmark and a quick exit if performance is too little (ugly but needed due to performance fluctuations) + if (max_cheb_time_ms) { + // one round of warmup + auto tmp = src; + GridStopWatch gsw1,gsw2; + gsw1.Start(); + Cheb1(HermOp,src,tmp); + gsw1.Stop(); + Ddwf.ZeroCounters(); + gsw2.Start(); + Cheb1(HermOp,src,tmp); + gsw2.Stop(); + Ddwf.Report(); + std::cout << GridLogMessage << "Performance check; warmup = " << gsw1.Elapsed() << " test = " << gsw2.Elapsed() << std::endl; + int ms = (int)(gsw2.useconds()/1e3); + if (ms > max_cheb_time_ms) { + std::cout << GridLogMessage << "Performance too poor: " << ms << " ms, cutoff = " << max_cheb_time_ms << " ms" << std::endl; + Grid_finalize(); + return 2; + } + + } + + // First round of Lanczos to get low mode basis + BlockImplicitlyRestartedLanczos IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,betastp1,MaxIt,MinRes1); + int Nconv; + + char tag[1024]; + if (!FieldVectorIO::read_argonne(evec,(char *)"checkpoint") || !read_evals(UGrid,(char *)"checkpoint/eigen-values.txt",eval1)) { + + if (simple_krylov_basis) { + quick_krylov_basis(evec,src,Op1,Nstop1); + } else { + IRL1.calc(eval1,evec,src,Nconv,false,1); + } + evec.resize(Nstop1); // and throw away superfluous + eval1.resize(Nstop1); + if (checkpoint_basis) + FieldVectorIO::write_argonne(evec,(char *)"checkpoint"); + if (UGrid->IsBoss() && checkpoint_basis) + write_evals((char *)"checkpoint/eigen-values.txt",eval1); + + Ddwf.Report(); + + if (exit_after_basis_calculation) { + Grid_finalize(); + return 0; + } + } + + // now test eigenvectors + if (!simple_krylov_basis) { + for (int i=0;i Date: Wed, 11 Oct 2017 10:12:07 +0100 Subject: [PATCH 141/377] Starting reorg of Blocked lanczos --- configure.ac | 1 + .../BlockImplicitlyRestartedLanczos.h | 1 - tests/Makefile.am | 2 +- .../lanczos}/FieldVectorIO.h | 0 tests/lanczos/Makefile.am | 1 + tests/{solver => lanczos}/Params.h | 0 tests/{solver => lanczos}/Test_dwf_compressed_lanczos.cc | 4 ++-- tests/{solver => lanczos}/Test_dwf_lanczos.cc | 0 tests/{debug => lanczos}/Test_synthetic_lanczos.cc | 0 tests/{solver => lanczos}/Test_wilson_lanczos.cc | 0 10 files changed, 5 insertions(+), 4 deletions(-) rename {lib/algorithms/iterative/BlockImplicitlyRestartedLanczos => tests/lanczos}/FieldVectorIO.h (100%) create mode 100644 tests/lanczos/Makefile.am rename tests/{solver => lanczos}/Params.h (100%) rename tests/{solver => lanczos}/Test_dwf_compressed_lanczos.cc (99%) rename tests/{solver => lanczos}/Test_dwf_lanczos.cc (100%) rename tests/{debug => lanczos}/Test_synthetic_lanczos.cc (100%) rename tests/{solver => lanczos}/Test_wilson_lanczos.cc (100%) diff --git a/configure.ac b/configure.ac index b11d6b42..496f7fd7 100644 --- a/configure.ac +++ b/configure.ac @@ -550,6 +550,7 @@ AC_CONFIG_FILES(tests/forces/Makefile) AC_CONFIG_FILES(tests/hadrons/Makefile) AC_CONFIG_FILES(tests/hmc/Makefile) AC_CONFIG_FILES(tests/solver/Makefile) +AC_CONFIG_FILES(tests/lanczos/Makefile) AC_CONFIG_FILES(tests/smearing/Makefile) AC_CONFIG_FILES(tests/qdpxx/Makefile) AC_CONFIG_FILES(tests/testu01/Makefile) diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h index 82a00efa..55a85552 100644 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h @@ -39,7 +39,6 @@ Author: Christoph Lehner #include #include #include -#include namespace Grid { diff --git a/tests/Makefile.am b/tests/Makefile.am index a8935268..7928a7fe 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = . core forces hmc solver debug smearing IO +SUBDIRS = . core forces hmc solver debug smearing IO lanczos if BUILD_CHROMA_REGRESSION SUBDIRS+= qdpxx diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldVectorIO.h b/tests/lanczos/FieldVectorIO.h similarity index 100% rename from lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldVectorIO.h rename to tests/lanczos/FieldVectorIO.h diff --git a/tests/lanczos/Makefile.am b/tests/lanczos/Makefile.am new file mode 100644 index 00000000..60b82dd7 --- /dev/null +++ b/tests/lanczos/Makefile.am @@ -0,0 +1 @@ +include Make.inc diff --git a/tests/solver/Params.h b/tests/lanczos/Params.h similarity index 100% rename from tests/solver/Params.h rename to tests/lanczos/Params.h diff --git a/tests/solver/Test_dwf_compressed_lanczos.cc b/tests/lanczos/Test_dwf_compressed_lanczos.cc similarity index 99% rename from tests/solver/Test_dwf_compressed_lanczos.cc rename to tests/lanczos/Test_dwf_compressed_lanczos.cc index b42a2d55..7fe37387 100644 --- a/tests/solver/Test_dwf_compressed_lanczos.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos.cc @@ -21,9 +21,9 @@ (ortho krylov low poly); and then fix up lowest say 200 eigenvalues by 1 run with high-degree poly (600 could be enough) */ #include -#include "Params.h" - #include +#include "FieldVectorIO.h" +#include "Params.h" using namespace std; using namespace Grid; diff --git a/tests/solver/Test_dwf_lanczos.cc b/tests/lanczos/Test_dwf_lanczos.cc similarity index 100% rename from tests/solver/Test_dwf_lanczos.cc rename to tests/lanczos/Test_dwf_lanczos.cc diff --git a/tests/debug/Test_synthetic_lanczos.cc b/tests/lanczos/Test_synthetic_lanczos.cc similarity index 100% rename from tests/debug/Test_synthetic_lanczos.cc rename to tests/lanczos/Test_synthetic_lanczos.cc diff --git a/tests/solver/Test_wilson_lanczos.cc b/tests/lanczos/Test_wilson_lanczos.cc similarity index 100% rename from tests/solver/Test_wilson_lanczos.cc rename to tests/lanczos/Test_wilson_lanczos.cc From cb9ff20249d90f528ba1b2609f4cbe3e62b1f437 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 13 Oct 2017 11:30:50 +0100 Subject: [PATCH 142/377] Approx tests and lanczos improvement --- lib/algorithms/approx/Chebyshev.h | 6 +- .../BlockImplicitlyRestartedLanczos.h | 1399 +++++++++-------- .../FieldBasisVector.h | 5 +- .../iterative/ImplicitlyRestartedLanczos.h | 3 +- lib/log/Log.cc | 10 +- lib/log/Log.h | 13 +- lib/threads/Threads.h | 2 + tests/debug/Test_cheby.cc | 36 +- tests/hmc/Test_remez.cc | 61 +- 9 files changed, 823 insertions(+), 712 deletions(-) diff --git a/lib/algorithms/approx/Chebyshev.h b/lib/algorithms/approx/Chebyshev.h index f8c21a05..5088c51b 100644 --- a/lib/algorithms/approx/Chebyshev.h +++ b/lib/algorithms/approx/Chebyshev.h @@ -83,8 +83,10 @@ namespace Grid { public: void csv(std::ostream &out){ - RealD diff = hi-lo; - for (RealD x=lo-0.2*diff; x #define GRID_BIRL_H #include //memset - -#include +//#include #include #include @@ -42,420 +41,185 @@ Author: Christoph Lehner namespace Grid { +template +void basisOrthogonalize(std::vector &basis,Field &w,int k) +{ + for(int j=0; j +void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) +{ + typedef typename Field::vector_object vobj; + GridBase* grid = basis[0]._grid; + + parallel_region + { + std::vector < vobj > B(Nm); // Thread private + + parallel_for_internal(int ss=0;ss < grid->oSites();ss++){ + for(int j=j0; j +void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, std::vector& idx) +{ + int vlen = idx.size(); + + assert(vlen>=1); + assert(vlen<=sort_vals.size()); + assert(vlen<=_v.size()); + + for (size_t i=0;i i); + ////////////////////////////////////// + // idx[i] is a table of desired sources giving a permutation. + // + // Swap v[i] with v[idx[i]]. + // + // Find j>i for which _vnew[j] = _vold[i], + // track the move idx[j] => idx[i] + // track the move idx[i] => i + ////////////////////////////////////// + size_t j; + for (j=i;j basisSortGetIndex(std::vector& sort_vals) +{ + std::vector idx(sort_vals.size()); + std::iota(idx.begin(), idx.end(), 0); + + // sort indexes based on comparing values in v + std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { + return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); + }); + return idx; +} + +template +void basisSortInPlace(std::vector & _v,std::vector& sort_vals, bool reverse) +{ + std::vector idx = basisSortGetIndex(sort_vals); + if (reverse) + std::reverse(idx.begin(), idx.end()); + + basisReorderInPlace(_v,sort_vals,idx); +} + +// PAB: faster to compute the inner products first then fuse loops. +// If performance critical can improve. +template +void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { + result = zero; + assert(_v.size()==eval.size()); + int N = (int)_v.size(); + for (int i=0;i - class BlockImplicitlyRestartedLanczos { - - const RealD small = 1.0e-16; +template +class BlockImplicitlyRestartedLanczos { + private: + const RealD small = 1.0e-8; + int MaxIter; + int MinRestart; // Minimum number of restarts; only check for convergence after + int Nstop; // Number of evecs checked for convergence + int Nk; // Number of converged sought + // int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk + int Nm; // Nm -- total number of vectors + IRLdiagonalisation diagonalisation; + int orth_period; + + RealD OrthoTime; + RealD eresid, betastp; + //////////////////////////////// + // Embedded objects + //////////////////////////////// + SortEigen _sort; + LinearFunction &_HermOp; + LinearFunction &_HermOpTest; + ///////////////////////// + // Constructor + ///////////////////////// public: - int lock; - int get; - int Niter; - int converged; + BlockImplicitlyRestartedLanczos(LinearFunction & HermOp, + LinearFunction & HermOpTest, + int _Nstop, // sought vecs + int _Nk, // sought vecs + int _Nm, // spare vecs + RealD _eresid, // resid in lmdue deficit + RealD _betastp, // if beta(k) < betastp: converged + int _MaxIter, // Max iterations + int _MinRestart, int _orth_period = 1, + IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : + _HermOp(HermOp), _HermOpTest(HermOpTest), + Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), + eresid(_eresid), betastp(_betastp), + MaxIter(_MaxIter) , MinRestart(_MinRestart), + orth_period(_orth_period), diagonalisation(_diagonalisation) { }; - int Nminres; // Minimum number of restarts; only check for convergence after - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - int Np; // Np -- Number of spare vecs in kryloc space - int Nm; // Nm -- total number of vectors + //////////////////////////////// + // Helpers + //////////////////////////////// + template static RealD normalise(T& v) + { + RealD nn = norm2(v); + nn = sqrt(nn); + v = v * (1.0/nn); + return nn; + } - int orth_period; - - RealD OrthoTime; - - RealD eresid, betastp; - SortEigen _sort; - LinearFunction &_HermOp; - LinearFunction &_HermOpTest; - ///////////////////////// - // Constructor - ///////////////////////// - - BlockImplicitlyRestartedLanczos( - LinearFunction & HermOp, - LinearFunction & HermOpTest, - int _Nstop, // sought vecs - int _Nk, // sought vecs - int _Nm, // spare vecs - RealD _eresid, // resid in lmdue deficit - RealD _betastp, // if beta(k) < betastp: converged - int _Niter, // Max iterations - int _Nminres, int _orth_period = 1) : - _HermOp(HermOp), - _HermOpTest(HermOpTest), - Nstop(_Nstop), - Nk(_Nk), - Nm(_Nm), - eresid(_eresid), - betastp(_betastp), - Niter(_Niter), - Nminres(_Nminres), - orth_period(_orth_period) - { - Np = Nm-Nk; assert(Np>0); - }; - - BlockImplicitlyRestartedLanczos( - LinearFunction & HermOp, - LinearFunction & HermOpTest, - int _Nk, // sought vecs - int _Nm, // spare vecs - RealD _eresid, // resid in lmdue deficit - RealD _betastp, // if beta(k) < betastp: converged - int _Niter, // Max iterations - int _Nminres, - int _orth_period = 1) : - _HermOp(HermOp), - _HermOpTest(HermOpTest), - Nstop(_Nk), - Nk(_Nk), - Nm(_Nm), - eresid(_eresid), - betastp(_betastp), - Niter(_Niter), - Nminres(_Nminres), - orth_period(_orth_period) - { - Np = Nm-Nk; assert(Np>0); - }; - - -/* Saad PP. 195 -1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 -2. For k = 1,2,...,m Do: -3. wk:=Avk−βkv_{k−1} -4. αk:=(wk,vk) // -5. wk:=wk−αkvk // wk orthog vk -6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop -7. vk+1 := wk/βk+1 -8. EndDo - */ - void step(std::vector& lmd, - std::vector& lme, - BasisFieldVector& evec, - Field& w,int Nm,int k) - { - assert( k< Nm ); - - GridStopWatch gsw_op,gsw_o; - - Field& evec_k = evec[k]; - - gsw_op.Start(); - _HermOp(evec_k,w); - gsw_op.Stop(); - - if(k>0){ - w -= lme[k-1] * evec[k-1]; - } - - ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) - RealD alph = real(zalph); - - w = w - alph * evec_k;// 5. wk:=wk−αkvk - - RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop - // 7. vk+1 := wk/βk+1 - - std::cout<0 && k % orth_period == 0) { - orthogonalize(w,evec,k); // orthonormalise - } - gsw_o.Stop(); - - if(k < Nm-1) { - evec[k+1] = w; - } - - std::cout << GridLogMessage << "Timing: operator=" << gsw_op.Elapsed() << - " orth=" << gsw_o.Elapsed() << std::endl; - - } - - void qr_decomp(std::vector& lmd, - std::vector& lme, - int Nk, - int Nm, - std::vector& Qt, - RealD Dsh, - int kmin, - int kmax) - { - int k = kmin-1; - RealD x; - - RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); - RealD c = ( lmd[k] -Dsh) *Fden; - RealD s = -lme[k] *Fden; - - RealD tmpa1 = lmd[k]; - RealD tmpa2 = lmd[k+1]; - RealD tmpb = lme[k]; - - lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; - lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; - lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; - x =-s*lme[k+1]; - lme[k+1] = c*lme[k+1]; - - for(int i=0; i& lmd, - std::vector& lme, - int N1, - int N2, - std::vector& Qt, - GridBase *grid){ - - std::cout << GridLogMessage << "diagonalize_lapack start\n"; - GridStopWatch gsw; - - const int size = Nm; - // tevals.resize(size); - // tevecs.resize(size); - LAPACK_INT NN = N1; - std::vector evals_tmp(NN); - std::vector evec_tmp(NN*NN); - memset(&evec_tmp[0],0,sizeof(double)*NN*NN); - // double AA[NN][NN]; - std::vector DD(NN); - std::vector EE(NN); - for (int i = 0; i< NN; i++) - for (int j = i - 1; j <= i + 1; j++) - if ( j < NN && j >= 0 ) { - if (i==j) DD[i] = lmd[i]; - if (i==j) evals_tmp[i] = lmd[i]; - if (j==(i-1)) EE[j] = lme[j]; - } - LAPACK_INT evals_found; - LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; - LAPACK_INT liwork = 3+NN*10 ; - std::vector iwork(liwork); - std::vector work(lwork); - std::vector isuppz(2*NN); - char jobz = 'V'; // calculate evals & evecs - char range = 'I'; // calculate all evals - // char range = 'A'; // calculate all evals - char uplo = 'U'; // refer to upper half of original matrix - char compz = 'I'; // Compute eigenvectors of tridiagonal matrix - std::vector ifail(NN); - LAPACK_INT info; - // int total = QMP_get_number_of_nodes(); - // int node = QMP_get_node_number(); - // GridBase *grid = evec[0]._grid; - int total = grid->_Nprocessors; - int node = grid->_processor; - int interval = (NN/total)+1; - double vl = 0.0, vu = 0.0; - LAPACK_INT il = interval*node+1 , iu = interval*(node+1); - if (iu > NN) iu=NN; - double tol = 0.0; - if (1) { - memset(&evals_tmp[0],0,sizeof(double)*NN); - if ( il <= NN){ - std::cout << GridLogMessage << "dstegr started" << std::endl; - gsw.Start(); - dstegr(&jobz, &range, &NN, - (double*)&DD[0], (double*)&EE[0], - &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' - &tol, // tolerance - &evals_found, &evals_tmp[0], (double*)&evec_tmp[0], &NN, - &isuppz[0], - &work[0], &lwork, &iwork[0], &liwork, - &info); - gsw.Stop(); - std::cout << GridLogMessage << "dstegr completed in " << gsw.Elapsed() << std::endl; - for (int i = iu-1; i>= il-1; i--){ - evals_tmp[i] = evals_tmp[i - (il-1)]; - if (il>1) evals_tmp[i-(il-1)]=0.; - for (int j = 0; j< NN; j++){ - evec_tmp[i*NN + j] = evec_tmp[(i - (il-1)) * NN + j]; - if (il>1) evec_tmp[(i-(il-1)) * NN + j]=0.; - } - } - } - { - // QMP_sum_double_array(evals_tmp,NN); - // QMP_sum_double_array((double *)evec_tmp,NN*NN); - grid->GlobalSumVector(&evals_tmp[0],NN); - grid->GlobalSumVector(&evec_tmp[0],NN*NN); - } - } - // cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order. - for(int i=0;i& lmd, - std::vector& lme, - int N2, - int N1, - std::vector& Qt, - GridBase *grid) - { - -#ifdef USE_LAPACK_IRL - const int check_lapack=0; // just use lapack if 0, check against lapack if 1 - - if(!check_lapack) - return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid); - - std::vector lmd2(N1); - std::vector lme2(N1); - std::vector Qt2(N1*N1); - for(int k=0; k= kmin; --j){ - RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); - if(fabs(lme[j-1])+dds > dds){ - kmax = j+1; - goto continued; - } - } - Niter = iter; -#ifdef USE_LAPACK_IRL - if(check_lapack){ - const double SMALL=1e-8; - diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid); - std::vector lmd3(N2); - for(int k=0; kSMALL) std::cout<SMALL) std::cout<SMALL) std::cout< dds){ - kmin = j+1; - break; - } - } - } - std::cout< - static RealD normalise(T& v) - { - RealD nn = norm2(v); - nn = sqrt(nn); - v = v * (1.0/nn); - return nn; - } - - void orthogonalize(Field& w, - BasisFieldVector& evec, - int k) - { - double t0=-usecond()/1e6; - - evec.orthogonalize(w,k); - - normalise(w); - t0+=usecond()/1e6; - OrthoTime +=t0; - } - - void setUnit_Qt(int Nm, std::vector &Qt) { - for(int i=0; i& evec,int k) + { + OrthoTime-=usecond()/1e6; + //evec.orthogonalize(w,k); + basisOrthogonalize(evec._v,w,k); + normalise(w); + OrthoTime+=usecond()/1e6; + } /* Rudy Arthur's thesis pp.137 ------------------------ @@ -474,280 +238,555 @@ repeat →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM until convergence */ - - void calc(std::vector& eval, - BasisFieldVector& evec, - const Field& src, - int& Nconv, - bool reverse, - int SkipTest) - { - - GridBase *grid = evec._v[0]._grid;//evec.get(0 + evec_offset)._grid; - assert(grid == src._grid); - - std::cout< lme(Nm); - std::vector lme2(Nm); - std::vector eval2(Nm); - std::vector eval2_copy(Nm); - std::vector Qt(Nm*Nm); - - - Field f(grid); - Field v(grid); - - int k1 = 1; - int k2 = Nk; - - Nconv = 0; - - RealD beta_k; - - // Set initial vector - evec[0] = src; - normalise(evec[0]); - std:: cout<0); - evec.rotate(Qt,k1-1,k2+1,0,Nm,Nm); - - t1=usecond()/1e6; - std::cout<= Nminres) { - std::cout << GridLogMessage << "Rotation to test convergence " << std::endl; - - Field ev0_orig(grid); - ev0_orig = evec[0]; - - evec.rotate(Qt,0,Nk,0,Nk,Nm); - - { - std::cout << GridLogMessage << "Test convergence" << std::endl; - Field B(grid); - - for(int j = 0; j=Nstop || beta_k < betastp){ - goto converged; - } - - std::cout << GridLogMessage << "Rotate back" << std::endl; - //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; - { - Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); - for (int k=0;k QtI(Nm*Nm); - for (int k=0;k lme(Nm); + std::vector lme2(Nm); + std::vector eval2(Nm); + std::vector eval2_copy(Nm); + Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); + + Field f(grid); + Field v(grid); + int k1 = 1; + int k2 = Nk; + RealD beta_k; + + Nconv = 0; + + // Set initial vector + evec[0] = src; + normalise(evec[0]); + + // Initial Nk steps + OrthoTime=0.; + for(int k=0; k0); + // evec.rotate(Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis + basisRotate(evec._v,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis + + std::cout<= MinRestart) { + std::cout << GridLogIRL << "Rotation to test convergence " << std::endl; + + Field ev0_orig(grid); + ev0_orig = evec[0]; + + // evec.rotate(Qt,0,Nk,0,Nk,Nm); + basisRotate(evec._v,Qt,0,Nk,0,Nk,Nm); + + { + std::cout << GridLogIRL << "Test convergence" << std::endl; + Field B(grid); + + for(int j = 0; j=Nstop || beta_k < betastp){ + goto converged; + } + + std::cout << GridLogIRL << "Convergence testing: Rotating back" << std::endl; + //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; + { + Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); // Restrict Qt to Nk x Nk + for (int k=0;k& lmd, + std::vector& lme, + BasisFieldVector& evec, + Field& w,int Nm,int k) + { + const RealD tiny = 1.0e-20; + assert( k< Nm ); + + GridStopWatch gsw_op,gsw_o; + + Field& evec_k = evec[k]; + + _HermOp(evec_k,w); + std::cout<0) w -= lme[k-1] * evec[k-1]; + + ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) + RealD alph = real(zalph); + + w = w - alph * evec_k;// 5. wk:=wk−αkvk + + RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop + // 7. vk+1 := wk/βk+1 + + lmd[k] = alph; + lme[k] = beta; + + std::cout<0 && k % orth_period == 0) { + orthogonalize(w,evec,k); // orthonormalise + std::cout<& lmd, std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, // Nm x Nm + GridBase *grid) + { + Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk); + + for(int i=0;i eigensolver(TriDiag); + + for (int i = 0; i < Nk; i++) { + lmd[Nk-1-i] = eigensolver.eigenvalues()(i); + } + for (int i = 0; i < Nk; i++) { + for (int j = 0; j < Nk; j++) { + Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i); + } + } + } + + /////////////////////////////////////////////////////////////////////////// + // File could end here if settle on Eigen ??? + /////////////////////////////////////////////////////////////////////////// + + void QR_decomp(std::vector& lmd, // Nm + std::vector& lme, // Nm + int Nk, int Nm, // Nk, Nm + Eigen::MatrixXd& Qt, // Nm x Nm matrix + RealD Dsh, int kmin, int kmax) + { + int k = kmin-1; + RealD x; + + RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); + RealD c = ( lmd[k] -Dsh) *Fden; + RealD s = -lme[k] *Fden; + + RealD tmpa1 = lmd[k]; + RealD tmpa2 = lmd[k+1]; + RealD tmpb = lme[k]; + + lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; + lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; + lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; + x =-s*lme[k+1]; + lme[k+1] = c*lme[k+1]; + + for(int i=0; i& lmd, std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, + GridBase *grid) + { + Qt = Eigen::MatrixXd::Identity(Nm,Nm); + if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { + diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid); + } else if ( diagonalisation == IRLdiagonaliseWithQR ) { + diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid); + } else if ( diagonalisation == IRLdiagonaliseWithEigen ) { + diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid); + } else { + assert(0); + } + } + +#ifdef USE_LAPACK +void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, + double *vl, double *vu, int *il, int *iu, double *abstol, + int *m, double *w, double *z, int *ldz, int *isuppz, + double *work, int *lwork, int *iwork, int *liwork, + int *info); #endif - }; - +void diagonalize_lapack(std::vector& lmd, + std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd& Qt, + GridBase *grid) +{ +#ifdef USE_LAPACK + const int size = Nm; + int NN = Nk; + double evals_tmp[NN]; + double evec_tmp[NN][NN]; + memset(evec_tmp[0],0,sizeof(double)*NN*NN); + double DD[NN]; + double EE[NN]; + for (int i = 0; i< NN; i++) { + for (int j = i - 1; j <= i + 1; j++) { + if ( j < NN && j >= 0 ) { + if (i==j) DD[i] = lmd[i]; + if (i==j) evals_tmp[i] = lmd[i]; + if (j==(i-1)) EE[j] = lme[j]; + } + } + } + int evals_found; + int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; + int liwork = 3+NN*10 ; + int iwork[liwork]; + double work[lwork]; + int isuppz[2*NN]; + char jobz = 'V'; // calculate evals & evecs + char range = 'I'; // calculate all evals + // char range = 'A'; // calculate all evals + char uplo = 'U'; // refer to upper half of original matrix + char compz = 'I'; // Compute eigenvectors of tridiagonal matrix + int ifail[NN]; + int info; + int total = grid->_Nprocessors; + int node = grid->_processor; + int interval = (NN/total)+1; + double vl = 0.0, vu = 0.0; + int il = interval*node+1 , iu = interval*(node+1); + if (iu > NN) iu=NN; + double tol = 0.0; + if (1) { + memset(evals_tmp,0,sizeof(double)*NN); + if ( il <= NN){ + LAPACK_dstegr(&jobz, &range, &NN, + (double*)DD, (double*)EE, + &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' + &tol, // tolerance + &evals_found, evals_tmp, (double*)evec_tmp, &NN, + isuppz, + work, &lwork, iwork, &liwork, + &info); + for (int i = iu-1; i>= il-1; i--){ + evals_tmp[i] = evals_tmp[i - (il-1)]; + if (il>1) evals_tmp[i-(il-1)]=0.; + for (int j = 0; j< NN; j++){ + evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; + if (il>1) evec_tmp[i-(il-1)][j]=0.; + } + } + } + { + grid->GlobalSumVector(evals_tmp,NN); + grid->GlobalSumVector((double*)evec_tmp,NN*NN); + } + } + // Safer to sort instead of just reversing it, + // but the document of the routine says evals are sorted in increasing order. + // qr gives evals in decreasing order. + for(int i=0;i& lmd, std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, + GridBase *grid) + { + int QRiter = 100*Nm; + int kmin = 1; + int kmax = Nk; + + // (this should be more sophisticated) + for(int iter=0; iter= kmin; --j){ + RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); + if(fabs(lme[j-1])+dds > dds){ + kmax = j+1; + goto continued; + } + } + QRiter = iter; + return; + + continued: + for(int j=0; j dds){ + kmin = j+1; + break; + } + } + } + std::cout << GridLogError << "[QL method] Error - Too many iteration: "<& Qt,int j0, int j1, int k0,int k1,int Nm) { + void rotate(Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) { GridBase* grid = _v[0]._grid; @@ -62,7 +62,7 @@ class BasisFieldVector { for(int j=j0; j &logstreams) { GridLogError.Active(0); diff --git a/lib/log/Log.h b/lib/log/Log.h index 74d080bb..8db83266 100644 --- a/lib/log/Log.h +++ b/lib/log/Log.h @@ -85,6 +85,7 @@ class Logger { protected: Colours &Painter; int active; + int timing_mode; static int timestamp; std::string name, topName; std::string COLOUR; @@ -101,20 +102,24 @@ public: name(nm), topName(topNm), Painter(col_class), + timing_mode(0), COLOUR(col) {} ; void Active(int on) {active = on;}; int isActive(void) {return active;}; static void Timestamp(int on) {timestamp = on;}; - + void Reset(void) { StopWatch.Reset(); } + void TimingMode(int on) { timing_mode = on; if(on) Reset(); } + friend std::ostream& operator<< (std::ostream& stream, Logger& log){ if ( log.active ) { - stream << log.background()<< std::setw(8) << std::left << log.topName << log.background()<< " : "; - stream << log.colour() << std::setw(10) << std::left << log.name << log.background() << " : "; + stream << log.background()<< std::left << log.topName << log.background()<< " : "; + stream << log.colour() << std::left << log.name << log.background() << " : "; if ( log.timestamp ) { StopWatch.Stop(); GridTime now = StopWatch.Elapsed(); + if ( log.timing_mode==1 ) StopWatch.Reset(); StopWatch.Start(); stream << log.evidence()<< now << log.background() << " : " ; } @@ -135,6 +140,8 @@ public: void GridLogConfigure(std::vector &logstreams); +extern GridLogger GridLogIRL; +extern GridLogger GridLogSolver; extern GridLogger GridLogError; extern GridLogger GridLogWarning; extern GridLogger GridLogMessage; diff --git a/lib/threads/Threads.h b/lib/threads/Threads.h index d15f15ce..36daf2af 100644 --- a/lib/threads/Threads.h +++ b/lib/threads/Threads.h @@ -51,7 +51,9 @@ Author: paboyle #define PARALLEL_CRITICAL #endif +#define parallel_region PARALLEL_REGION #define parallel_for PARALLEL_FOR_LOOP for +#define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for #define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for namespace Grid { diff --git a/tests/debug/Test_cheby.cc b/tests/debug/Test_cheby.cc index 40544c56..72d07885 100644 --- a/tests/debug/Test_cheby.cc +++ b/tests/debug/Test_cheby.cc @@ -37,8 +37,15 @@ RealD InverseApproximation(RealD x){ RealD SqrtApproximation(RealD x){ return std::sqrt(x); } +RealD Approximation32(RealD x){ + return std::pow(x,-1.0/32.0); +} +RealD Approximation2(RealD x){ + return std::pow(x,-1.0/2.0); +} + RealD StepFunction(RealD x){ - if ( x<0.1 ) return 1.0; + if ( x<10.0 ) return 1.0; else return 0.0; } @@ -56,7 +63,6 @@ int main (int argc, char ** argv) Chebyshev ChebyInv(lo,hi,2000,InverseApproximation); - { std::ofstream of("chebyinv"); ChebyInv.csv(of); @@ -78,7 +84,6 @@ int main (int argc, char ** argv) ChebyStep.JacksonSmooth(); - { std::ofstream of("chebystepjack"); ChebyStep.csv(of); @@ -100,5 +105,30 @@ int main (int argc, char ** argv) ChebyNE.csv(of); } + lo=0.0; + hi=4.0; + Chebyshev Cheby32(lo,hi,2000,Approximation32); + { + std::ofstream of("cheby32"); + Cheby32.csv(of); + } + Cheby32.JacksonSmooth(); + { + std::ofstream of("cheby32jack"); + Cheby32.csv(of); + } + + Chebyshev ChebySqrt(lo,hi,2000,Approximation2); + { + std::ofstream of("chebysqrt"); + ChebySqrt.csv(of); + } + ChebySqrt.JacksonSmooth(); + { + std::ofstream of("chebysqrtjack"); + ChebySqrt.csv(of); + } + + Grid_finalize(); } diff --git a/tests/hmc/Test_remez.cc b/tests/hmc/Test_remez.cc index bc851173..5f4b0a25 100644 --- a/tests/hmc/Test_remez.cc +++ b/tests/hmc/Test_remez.cc @@ -38,11 +38,11 @@ int main (int argc, char ** argv) std::cout< Date: Fri, 13 Oct 2017 13:22:26 +0100 Subject: [PATCH 143/377] Final version prior to reunification --- .../BlockImplicitlyRestartedLanczos.h | 45 +++++++++---------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h index 90d45193..de3f1790 100644 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h @@ -35,9 +35,6 @@ Author: Christoph Lehner //#include #include -#include -#include -#include namespace Grid { @@ -178,7 +175,7 @@ class BlockImplicitlyRestartedLanczos { //////////////////////////////// // Embedded objects //////////////////////////////// - SortEigen _sort; + // SortEigen _sort; LinearFunction &_HermOp; LinearFunction &_HermOpTest; ///////////////////////// @@ -212,11 +209,10 @@ public: return nn; } - void orthogonalize(Field& w, BasisFieldVector& evec,int k) + void orthogonalize(Field& w, std::vector& evec,int k) { OrthoTime-=usecond()/1e6; - //evec.orthogonalize(w,k); - basisOrthogonalize(evec._v,w,k); + basisOrthogonalize(evec,w,k); normalise(w); OrthoTime+=usecond()/1e6; } @@ -238,7 +234,7 @@ repeat →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM until convergence */ - void calc(std::vector& eval, BasisFieldVector& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) + void calc(std::vector& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) { GridBase *grid = src._grid; assert(grid == evec[0]._grid); @@ -341,7 +337,8 @@ until convergence ////////////////////////////////// eval2_copy = eval2; - _sort.push(eval2,Nm); + // _sort.push(eval2,Nm); + std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end()); std::cout<0); - // evec.rotate(Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis - basisRotate(evec._v,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis + basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis std::cout< //memset +//#include +#include -namespace Grid { +namespace Grid { - enum IRLdiagonalisation { - IRLdiagonaliseWithDSTEGR, - IRLdiagonaliseWithQR, - IRLdiagonaliseWithEigen - }; - -//////////////////////////////////////////////////////////////////////////////// -// Helper class for sorting the evalues AND evectors by Field -// Use pointer swizzle on vectors -//////////////////////////////////////////////////////////////////////////////// template -class SortEigen { - private: - static bool less_lmd(RealD left,RealD right){ - return left > right; - } - static bool less_pair(std::pair& left, - std::pair& right){ - return left.first > (right.first); - } - - public: - void push(std::vector& lmd,std::vector& evec,int N) { - - //////////////////////////////////////////////////////////////////////// - // PAB: FIXME: VERY VERY VERY wasteful: takes a copy of the entire vector set. - // : The vector reorder should be done by pointer swizzle somehow - //////////////////////////////////////////////////////////////////////// - std::vector cpy(lmd.size(),evec[0]._grid); - for(int i=0;i > emod(lmd.size()); +void basisOrthogonalize(std::vector &basis,Field &w,int k) +{ + for(int j=0; j(lmd[i],&cpy[i]); - - partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); - - typename std::vector >::iterator it = emod.begin(); - for(int i=0;ifirst; - evec[i]=*(it->second); - ++it; +template +void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) +{ + typedef typename Field::vector_object vobj; + GridBase* grid = basis[0]._grid; + + parallel_region + { + std::vector < vobj > B(Nm); // Thread private + + parallel_for_internal(int ss=0;ss < grid->oSites();ss++){ + for(int j=j0; j& lmd,int N) { - std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); +} + +template +void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, std::vector& idx) +{ + int vlen = idx.size(); + + assert(vlen>=1); + assert(vlen<=sort_vals.size()); + assert(vlen<=_v.size()); + + for (size_t i=0;i i); + ////////////////////////////////////// + // idx[i] is a table of desired sources giving a permutation. + // + // Swap v[i] with v[idx[i]]. + // + // Find j>i for which _vnew[j] = _vold[i], + // track the move idx[j] => idx[i] + // track the move idx[i] => i + ////////////////////////////////////// + size_t j; + for (j=i;j fabs(thrs); +} + +inline std::vector basisSortGetIndex(std::vector& sort_vals) +{ + std::vector idx(sort_vals.size()); + std::iota(idx.begin(), idx.end(), 0); + + // sort indexes based on comparing values in v + std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { + return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); + }); + return idx; +} + +template +void basisSortInPlace(std::vector & _v,std::vector& sort_vals, bool reverse) +{ + std::vector idx = basisSortGetIndex(sort_vals); + if (reverse) + std::reverse(idx.begin(), idx.end()); + + basisReorderInPlace(_v,sort_vals,idx); +} + +// PAB: faster to compute the inner products first then fuse loops. +// If performance critical can improve. +template +void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { + result = zero; + assert(_v.size()==eval.size()); + int N = (int)_v.size(); + for (int i=0;i class ImplicitlyRestartedLanczos { - -private: - - int MaxIter; // Max iterations - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - int Nm; // Nm -- total number of vectors - RealD eresid; + private: + const RealD small = 1.0e-8; + int MaxIter; + int MinRestart; // Minimum number of restarts; only check for convergence after + int Nstop; // Number of evecs checked for convergence + int Nk; // Number of converged sought + // int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk + int Nm; // Nm -- total number of vectors IRLdiagonalisation diagonalisation; - //////////////////////////////////// + int orth_period; + + RealD OrthoTime; + RealD eresid, betastp; + //////////////////////////////// // Embedded objects - //////////////////////////////////// - SortEigen _sort; - LinearOperatorBase &_Linop; - OperatorFunction &_poly; - + //////////////////////////////// + LinearFunction &_HermOp; + LinearFunction &_HermOpTest; ///////////////////////// // Constructor ///////////////////////// public: - ImplicitlyRestartedLanczos(LinearOperatorBase &Linop, // op - OperatorFunction & poly, // polynomial - int _Nstop, // really sought vecs - int _Nk, // sought vecs - int _Nm, // total vecs - RealD _eresid, // resid in lmd deficit - int _MaxIter, // Max iterations - IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen ) : - _Linop(Linop), _poly(poly), - Nstop(_Nstop), Nk(_Nk), Nm(_Nm), - eresid(_eresid), MaxIter(_MaxIter), - diagonalisation(_diagonalisation) - { }; + ImplicitlyRestartedLanczos(LinearFunction & HermOp, + LinearFunction & HermOpTest, + int _Nstop, // sought vecs + int _Nk, // sought vecs + int _Nm, // spare vecs + RealD _eresid, // resid in lmdue deficit + RealD _betastp, // if beta(k) < betastp: converged + int _MaxIter, // Max iterations + int _MinRestart, int _orth_period = 1, + IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : + _HermOp(HermOp), _HermOpTest(HermOpTest), + Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), + eresid(_eresid), betastp(_betastp), + MaxIter(_MaxIter) , MinRestart(_MinRestart), + orth_period(_orth_period), diagonalisation(_diagonalisation) { }; //////////////////////////////// // Helpers //////////////////////////////// - static RealD normalise(Field& v) + template static RealD normalise(T& v) { RealD nn = norm2(v); nn = sqrt(nn); v = v * (1.0/nn); return nn; } - - void orthogonalize(Field& w, std::vector& evec, int k) + + void orthogonalize(Field& w, std::vector& evec,int k) { - typedef typename Field::scalar_type MyComplex; - MyComplex ip; - - for(int j=0; j& eval, std::vector& evec, const Field& src, int& Nconv) + void calc(std::vector& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) { + GridBase *grid = src._grid; + assert(grid == evec[0]._grid); - GridBase *grid = evec[0]._grid; - assert(grid == src._grid); - - std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; - std::cout << GridLogMessage <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl; - std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; - std::cout << GridLogMessage <<" -- seek Nk = " << Nk <<" vectors"<< std::endl; - std::cout << GridLogMessage <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; - std::cout << GridLogMessage <<" -- total Nm = " << Nm <<" vectors"<< std::endl; - std::cout << GridLogMessage <<" -- size of eval = " << eval.size() << std::endl; - std::cout << GridLogMessage <<" -- size of evec = " << evec.size() << std::endl; + GridLogIRL.TimingMode(1); + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; + std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl; + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; + std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl; + std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; + std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl; + std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl; + std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl; if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { - std::cout << GridLogMessage << "Diagonalisation is DSTEGR "< lme(Nm); std::vector lme2(Nm); std::vector eval2(Nm); + std::vector eval2_copy(Nm); + Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); - Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); - - std::vector Iconv(Nm); - std::vector B(Nm,grid); // waste of space replicating - Field f(grid); Field v(grid); - int k1 = 1; int k2 = Nk; - - Nconv = 0; - RealD beta_k; + + Nconv = 0; // Set initial vector evec[0] = src; - std::cout << GridLogMessage <<"norm2(src)= " << norm2(src)<0); + basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis + + std::cout<= MinRestart) { + std::cout << GridLogIRL << "Rotation to test convergence " << std::endl; - _Linop.HermOp(B[i],v); + Field ev0_orig(grid); + ev0_orig = evec[0]; - RealD vnum = real(innerProduct(B[i],v)); // HermOp. - RealD vden = norm2(B[i]); - eval2[i] = vnum/vden; - v -= eval2[i]*B[i]; - RealD vv = norm2(v); - - std::cout.precision(13); - std::cout << GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <=Nstop ){ - goto converged; - } - } // end of iter loop - - std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; - std::cout << GridLogError <<" ImplicitlyRestartedLanczos::calc() NOT converged."; - std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; + { + std::cout << GridLogIRL << "Test convergence" << std::endl; + Field B(grid); + + for(int j = 0; j=Nstop || beta_k < betastp){ + goto converged; + } + + //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; + { + Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); // Restrict Qt to Nk x Nk + for (int k=0;k0) w -= lme[k-1] * evec[k-1]; - - ComplexD zalph = innerProduct(evec[k],w); // 4. αk:=(wk,vk) + + ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) RealD alph = real(zalph); - - w = w - alph * evec[k];// 5. wk:=wk−αkvk - + + w = w - alph * evec_k;// 5. wk:=wk−αkvk + RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop // 7. vk+1 := wk/βk+1 - + lmd[k] = alph; lme[k] = beta; - - if ( k > 0 ) orthogonalize(w,evec,k); // orthonormalise - if ( k < Nm-1) evec[k+1] = w; - - if ( beta < tiny ) std::cout << GridLogMessage << " beta is tiny "<0 && k % orth_period == 0) { + orthogonalize(w,evec,k); // orthonormalise + std::cout<& lmd, std::vector& lme, int Nk, int Nm, Eigen::MatrixXd & Qt, // Nm x Nm @@ -405,11 +565,12 @@ private: } } } + /////////////////////////////////////////////////////////////////////////// // File could end here if settle on Eigen ??? /////////////////////////////////////////////////////////////////////////// - void qr_decomp(std::vector& lmd, // Nm + void QR_decomp(std::vector& lmd, // Nm std::vector& lme, // Nm int Nk, int Nm, // Nk, Nm Eigen::MatrixXd& Qt, // Nm x Nm matrix @@ -576,51 +737,50 @@ void diagonalize_lapack(std::vector& lmd, #endif } - void diagonalize_QR(std::vector& lmd, std::vector& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, - GridBase *grid) - { - int Niter = 100*Nm; - int kmin = 1; - int kmax = Nk; - - // (this should be more sophisticated) - for(int iter=0; iter= kmin; --j){ - RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); - if(fabs(lme[j-1])+dds > dds){ - kmax = j+1; - goto continued; - } - } - Niter = iter; - return; - - continued: - for(int j=0; j dds){ - kmin = j+1; - break; - } +void diagonalize_QR(std::vector& lmd, std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, + GridBase *grid) +{ + int QRiter = 100*Nm; + int kmin = 1; + int kmax = Nk; + + // (this should be more sophisticated) + for(int iter=0; iter= kmin; --j){ + RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); + if(fabs(lme[j-1])+dds > dds){ + kmax = j+1; + goto continued; + } + } + QRiter = iter; + return; + + continued: + for(int j=0; j dds){ + kmin = j+1; + break; } } - std::cout << GridLogError << "[QL method] Error - Too many iteration: "< Date: Fri, 13 Oct 2017 13:23:07 +0100 Subject: [PATCH 145/377] Logging improvement; reunified the Lanczos codes --- .../BlockImplicitlyRestartedLanczos.h | 789 ------------------ lib/log/Log.cc | 2 +- lib/log/Log.h | 30 +- lib/util/Init.cc | 2 +- tests/lanczos/Test_dwf_compressed_lanczos.cc | 17 +- 5 files changed, 36 insertions(+), 804 deletions(-) delete mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h deleted file mode 100644 index de3f1790..00000000 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h +++ /dev/null @@ -1,789 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h - - Copyright (C) 2015 - -Author: Peter Boyle -Author: paboyle -Author: Chulwoo Jung -Author: Christoph Lehner - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_BIRL_H -#define GRID_BIRL_H - -#include //memset -//#include -#include - - -namespace Grid { - -template -void basisOrthogonalize(std::vector &basis,Field &w,int k) -{ - for(int j=0; j -void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) -{ - typedef typename Field::vector_object vobj; - GridBase* grid = basis[0]._grid; - - parallel_region - { - std::vector < vobj > B(Nm); // Thread private - - parallel_for_internal(int ss=0;ss < grid->oSites();ss++){ - for(int j=j0; j -void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, std::vector& idx) -{ - int vlen = idx.size(); - - assert(vlen>=1); - assert(vlen<=sort_vals.size()); - assert(vlen<=_v.size()); - - for (size_t i=0;i i); - ////////////////////////////////////// - // idx[i] is a table of desired sources giving a permutation. - // - // Swap v[i] with v[idx[i]]. - // - // Find j>i for which _vnew[j] = _vold[i], - // track the move idx[j] => idx[i] - // track the move idx[i] => i - ////////////////////////////////////// - size_t j; - for (j=i;j basisSortGetIndex(std::vector& sort_vals) -{ - std::vector idx(sort_vals.size()); - std::iota(idx.begin(), idx.end(), 0); - - // sort indexes based on comparing values in v - std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { - return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); - }); - return idx; -} - -template -void basisSortInPlace(std::vector & _v,std::vector& sort_vals, bool reverse) -{ - std::vector idx = basisSortGetIndex(sort_vals); - if (reverse) - std::reverse(idx.begin(), idx.end()); - - basisReorderInPlace(_v,sort_vals,idx); -} - -// PAB: faster to compute the inner products first then fuse loops. -// If performance critical can improve. -template -void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { - result = zero; - assert(_v.size()==eval.size()); - int N = (int)_v.size(); - for (int i=0;i -class BlockImplicitlyRestartedLanczos { - private: - const RealD small = 1.0e-8; - int MaxIter; - int MinRestart; // Minimum number of restarts; only check for convergence after - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - // int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk - int Nm; // Nm -- total number of vectors - IRLdiagonalisation diagonalisation; - int orth_period; - - RealD OrthoTime; - RealD eresid, betastp; - //////////////////////////////// - // Embedded objects - //////////////////////////////// - // SortEigen _sort; - LinearFunction &_HermOp; - LinearFunction &_HermOpTest; - ///////////////////////// - // Constructor - ///////////////////////// -public: - BlockImplicitlyRestartedLanczos(LinearFunction & HermOp, - LinearFunction & HermOpTest, - int _Nstop, // sought vecs - int _Nk, // sought vecs - int _Nm, // spare vecs - RealD _eresid, // resid in lmdue deficit - RealD _betastp, // if beta(k) < betastp: converged - int _MaxIter, // Max iterations - int _MinRestart, int _orth_period = 1, - IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : - _HermOp(HermOp), _HermOpTest(HermOpTest), - Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), - eresid(_eresid), betastp(_betastp), - MaxIter(_MaxIter) , MinRestart(_MinRestart), - orth_period(_orth_period), diagonalisation(_diagonalisation) { }; - - //////////////////////////////// - // Helpers - //////////////////////////////// - template static RealD normalise(T& v) - { - RealD nn = norm2(v); - nn = sqrt(nn); - v = v * (1.0/nn); - return nn; - } - - void orthogonalize(Field& w, std::vector& evec,int k) - { - OrthoTime-=usecond()/1e6; - basisOrthogonalize(evec,w,k); - normalise(w); - OrthoTime+=usecond()/1e6; - } - -/* Rudy Arthur's thesis pp.137 ------------------------- -Require: M > K P = M − K † -Compute the factorization AVM = VM HM + fM eM -repeat - Q=I - for i = 1,...,P do - QiRi =HM −θiI Q = QQi - H M = Q †i H M Q i - end for - βK =HM(K+1,K) σK =Q(M,K) - r=vK+1βK +rσK - VK =VM(1:M)Q(1:M,1:K) - HK =HM(1:K,1:K) - →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM -until convergence -*/ - void calc(std::vector& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) - { - GridBase *grid = src._grid; - assert(grid == evec[0]._grid); - - GridLogIRL.TimingMode(1); - std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; - std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl; - std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; - std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl; - std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; - std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl; - std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl; - std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl; - if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { - std::cout << GridLogIRL << "Diagonalisation is DSTEGR "< lme(Nm); - std::vector lme2(Nm); - std::vector eval2(Nm); - std::vector eval2_copy(Nm); - Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); - - Field f(grid); - Field v(grid); - int k1 = 1; - int k2 = Nk; - RealD beta_k; - - Nconv = 0; - - // Set initial vector - evec[0] = src; - normalise(evec[0]); - - // Initial Nk steps - OrthoTime=0.; - for(int k=0; k0); - basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis - - std::cout<= MinRestart) { - std::cout << GridLogIRL << "Rotation to test convergence " << std::endl; - - Field ev0_orig(grid); - ev0_orig = evec[0]; - - basisRotate(evec,Qt,0,Nk,0,Nk,Nm); - - { - std::cout << GridLogIRL << "Test convergence" << std::endl; - Field B(grid); - - for(int j = 0; j=Nstop || beta_k < betastp){ - goto converged; - } - - //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; - { - Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); // Restrict Qt to Nk x Nk - for (int k=0;k& lmd, - std::vector& lme, - std::vector& evec, - Field& w,int Nm,int k) - { - const RealD tiny = 1.0e-20; - assert( k< Nm ); - - GridStopWatch gsw_op,gsw_o; - - Field& evec_k = evec[k]; - - _HermOp(evec_k,w); - std::cout<0) w -= lme[k-1] * evec[k-1]; - - ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) - RealD alph = real(zalph); - - w = w - alph * evec_k;// 5. wk:=wk−αkvk - - RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop - // 7. vk+1 := wk/βk+1 - - lmd[k] = alph; - lme[k] = beta; - - std::cout<0 && k % orth_period == 0) { - orthogonalize(w,evec,k); // orthonormalise - std::cout<& lmd, std::vector& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, // Nm x Nm - GridBase *grid) - { - Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk); - - for(int i=0;i eigensolver(TriDiag); - - for (int i = 0; i < Nk; i++) { - lmd[Nk-1-i] = eigensolver.eigenvalues()(i); - } - for (int i = 0; i < Nk; i++) { - for (int j = 0; j < Nk; j++) { - Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i); - } - } - } - - /////////////////////////////////////////////////////////////////////////// - // File could end here if settle on Eigen ??? - /////////////////////////////////////////////////////////////////////////// - - void QR_decomp(std::vector& lmd, // Nm - std::vector& lme, // Nm - int Nk, int Nm, // Nk, Nm - Eigen::MatrixXd& Qt, // Nm x Nm matrix - RealD Dsh, int kmin, int kmax) - { - int k = kmin-1; - RealD x; - - RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); - RealD c = ( lmd[k] -Dsh) *Fden; - RealD s = -lme[k] *Fden; - - RealD tmpa1 = lmd[k]; - RealD tmpa2 = lmd[k+1]; - RealD tmpb = lme[k]; - - lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; - lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; - lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; - x =-s*lme[k+1]; - lme[k+1] = c*lme[k+1]; - - for(int i=0; i& lmd, std::vector& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, - GridBase *grid) - { - Qt = Eigen::MatrixXd::Identity(Nm,Nm); - if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { - diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid); - } else if ( diagonalisation == IRLdiagonaliseWithQR ) { - diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid); - } else if ( diagonalisation == IRLdiagonaliseWithEigen ) { - diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid); - } else { - assert(0); - } - } - -#ifdef USE_LAPACK -void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, - double *vl, double *vu, int *il, int *iu, double *abstol, - int *m, double *w, double *z, int *ldz, int *isuppz, - double *work, int *lwork, int *iwork, int *liwork, - int *info); -#endif - -void diagonalize_lapack(std::vector& lmd, - std::vector& lme, - int Nk, int Nm, - Eigen::MatrixXd& Qt, - GridBase *grid) -{ -#ifdef USE_LAPACK - const int size = Nm; - int NN = Nk; - double evals_tmp[NN]; - double evec_tmp[NN][NN]; - memset(evec_tmp[0],0,sizeof(double)*NN*NN); - double DD[NN]; - double EE[NN]; - for (int i = 0; i< NN; i++) { - for (int j = i - 1; j <= i + 1; j++) { - if ( j < NN && j >= 0 ) { - if (i==j) DD[i] = lmd[i]; - if (i==j) evals_tmp[i] = lmd[i]; - if (j==(i-1)) EE[j] = lme[j]; - } - } - } - int evals_found; - int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; - int liwork = 3+NN*10 ; - int iwork[liwork]; - double work[lwork]; - int isuppz[2*NN]; - char jobz = 'V'; // calculate evals & evecs - char range = 'I'; // calculate all evals - // char range = 'A'; // calculate all evals - char uplo = 'U'; // refer to upper half of original matrix - char compz = 'I'; // Compute eigenvectors of tridiagonal matrix - int ifail[NN]; - int info; - int total = grid->_Nprocessors; - int node = grid->_processor; - int interval = (NN/total)+1; - double vl = 0.0, vu = 0.0; - int il = interval*node+1 , iu = interval*(node+1); - if (iu > NN) iu=NN; - double tol = 0.0; - if (1) { - memset(evals_tmp,0,sizeof(double)*NN); - if ( il <= NN){ - LAPACK_dstegr(&jobz, &range, &NN, - (double*)DD, (double*)EE, - &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' - &tol, // tolerance - &evals_found, evals_tmp, (double*)evec_tmp, &NN, - isuppz, - work, &lwork, iwork, &liwork, - &info); - for (int i = iu-1; i>= il-1; i--){ - evals_tmp[i] = evals_tmp[i - (il-1)]; - if (il>1) evals_tmp[i-(il-1)]=0.; - for (int j = 0; j< NN; j++){ - evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; - if (il>1) evec_tmp[i-(il-1)][j]=0.; - } - } - } - { - grid->GlobalSumVector(evals_tmp,NN); - grid->GlobalSumVector((double*)evec_tmp,NN*NN); - } - } - // Safer to sort instead of just reversing it, - // but the document of the routine says evals are sorted in increasing order. - // qr gives evals in decreasing order. - for(int i=0;i& lmd, std::vector& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, - GridBase *grid) - { - int QRiter = 100*Nm; - int kmin = 1; - int kmax = Nk; - - // (this should be more sophisticated) - for(int iter=0; iter= kmin; --j){ - RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); - if(fabs(lme[j-1])+dds > dds){ - kmax = j+1; - goto continued; - } - } - QRiter = iter; - return; - - continued: - for(int j=0; j dds){ - kmin = j+1; - break; - } - } - } - std::cout << GridLogError << "[QL method] Error - Too many iteration: "<Reset(); + StopWatch->Start(); + } + void TimingMode(int on) { + timing_mode = on; + if(on) { + StopWatch = &LocalStopWatch; + Reset(); + } + } friend std::ostream& operator<< (std::ostream& stream, Logger& log){ @@ -117,10 +131,10 @@ public: stream << log.background()<< std::left << log.topName << log.background()<< " : "; stream << log.colour() << std::left << log.name << log.background() << " : "; if ( log.timestamp ) { - StopWatch.Stop(); - GridTime now = StopWatch.Elapsed(); - if ( log.timing_mode==1 ) StopWatch.Reset(); - StopWatch.Start(); + log.StopWatch->Stop(); + GridTime now = log.StopWatch->Elapsed(); + if ( log.timing_mode==1 ) log.StopWatch->Reset(); + log.StopWatch->Start(); stream << log.evidence()<< now << log.background() << " : " ; } stream << log.colour(); diff --git a/lib/util/Init.cc b/lib/util/Init.cc index 1266d34d..031f8f5a 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -208,7 +208,7 @@ static int Grid_is_initialised = 0; void Grid_init(int *argc,char ***argv) { - GridLogger::StopWatch.Start(); + GridLogger::GlobalStopWatch.Start(); std::string arg; diff --git a/tests/lanczos/Test_dwf_compressed_lanczos.cc b/tests/lanczos/Test_dwf_compressed_lanczos.cc index 7fe37387..544d0358 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos.cc @@ -21,7 +21,14 @@ (ortho krylov low poly); and then fix up lowest say 200 eigenvalues by 1 run with high-degree poly (600 could be enough) */ #include -#include +#include +///////////////////////////////////////////////////////////////////////////// +// The following are now decoupled from the Lanczos and deal with grids. +// Safe to replace functionality +///////////////////////////////////////////////////////////////////////////// +#include +#include +#include #include "FieldVectorIO.h" #include "Params.h" @@ -319,7 +326,7 @@ void CoarseGridLanczos(BlockProjector& pr,RealD alpha2,RealD beta,int Npo Op2 = &Op2plain; } ProjectedHermOp,LatticeFermion> Op2nopoly(pr,HermOp); - BlockImplicitlyRestartedLanczos > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2); + ImplicitlyRestartedLanczos > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2); src_coarse = 1.0; @@ -350,7 +357,7 @@ void CoarseGridLanczos(BlockProjector& pr,RealD alpha2,RealD beta,int Npo ) { - IRL2.calc(eval2,coef,src_coarse,Nconv,true,SkipTest2); + IRL2.calc(eval2,coef._v,src_coarse,Nconv,true,SkipTest2); coef.resize(Nstop2); eval2.resize(Nstop2); @@ -641,7 +648,7 @@ int main (int argc, char ** argv) { } // First round of Lanczos to get low mode basis - BlockImplicitlyRestartedLanczos IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,betastp1,MaxIt,MinRes1); + ImplicitlyRestartedLanczos IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,betastp1,MaxIt,MinRes1); int Nconv; char tag[1024]; @@ -650,7 +657,7 @@ int main (int argc, char ** argv) { if (simple_krylov_basis) { quick_krylov_basis(evec,src,Op1,Nstop1); } else { - IRL1.calc(eval1,evec,src,Nconv,false,1); + IRL1.calc(eval1,evec._v,src,Nconv,false,1); } evec.resize(Nstop1); // and throw away superfluous eval1.resize(Nstop1); From e325929851aa0e26055875a22b39aee39ed186cd Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 13 Oct 2017 14:02:43 +0100 Subject: [PATCH 146/377] ALl codes compile against the new Lanczos call signature --- lib/algorithms/LinearOperator.h | 59 +++++++++++++++++++ lib/algorithms/approx/Chebyshev.h | 35 ----------- .../iterative/ImplicitlyRestartedLanczos.h | 6 +- tests/lanczos/Test_dwf_compressed_lanczos.cc | 30 +--------- tests/lanczos/Test_dwf_lanczos.cc | 11 ++-- tests/lanczos/Test_synthetic_lanczos.cc | 10 ++-- tests/lanczos/Test_wilson_lanczos.cc | 9 ++- 7 files changed, 82 insertions(+), 78 deletions(-) diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index f1b8820e..0d32cc15 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -346,6 +346,7 @@ namespace Grid { virtual void operator() (const Field &in, Field &out) = 0; }; + ///////////////////////////////////////////////////////////// // Base classes for Multishift solvers for operators ///////////////////////////////////////////////////////////// @@ -368,6 +369,64 @@ namespace Grid { }; */ + //////////////////////////////////////////////////////////////////////////////////////////// + // Hermitian operator Linear function and operator function + //////////////////////////////////////////////////////////////////////////////////////////// + template + class HermOpOperatorFunction : public OperatorFunction { + void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { + Linop.HermOp(in,out); + }; + }; + + template + class PlainHermOp : public LinearFunction { + public: + LinearOperatorBase &_Linop; + + PlainHermOp(LinearOperatorBase& linop) : _Linop(linop) + {} + + void operator()(const Field& in, Field& out) { + _Linop.HermOp(in,out); + } + }; + + template + class FunctionHermOp : public LinearFunction { + public: + OperatorFunction & _poly; + LinearOperatorBase &_Linop; + + FunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop) + : _poly(poly), _Linop(linop) {}; + + void operator()(const Field& in, Field& out) { + _poly(_Linop,in,out); + } + }; + + template + class Polynomial : public OperatorFunction { + private: + std::vector Coeffs; + public: + Polynomial(std::vector &_Coeffs) : Coeffs(_Coeffs) { }; + + // Implement the required interface + void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { + + Field AtoN(in._grid); + Field Mtmp(in._grid); + AtoN = in; + out = AtoN*Coeffs[0]; + for(int n=1;n namespace Grid { - //////////////////////////////////////////////////////////////////////////////////////////// - // Simple general polynomial with user supplied coefficients - //////////////////////////////////////////////////////////////////////////////////////////// - template - class HermOpOperatorFunction : public OperatorFunction { - void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { - Linop.HermOp(in,out); - }; - }; - - template - class Polynomial : public OperatorFunction { - private: - std::vector Coeffs; - public: - Polynomial(std::vector &_Coeffs) : Coeffs(_Coeffs) { }; - - // Implement the required interface - void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { - - Field AtoN(in._grid); - Field Mtmp(in._grid); - AtoN = in; - out = AtoN*Coeffs[0]; -// std::cout <<"Poly in " <& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) + void calc(std::vector& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse=true, int SkipTest=0) { GridBase *grid = src._grid; assert(grid == evec[0]._grid); diff --git a/tests/lanczos/Test_dwf_compressed_lanczos.cc b/tests/lanczos/Test_dwf_compressed_lanczos.cc index 544d0358..10d6c3ae 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos.cc @@ -100,19 +100,6 @@ void write_history(char* fn, std::vector& hist) { fclose(f); } -template -class FunctionHermOp : public LinearFunction { -public: - OperatorFunction & _poly; - LinearOperatorBase &_Linop; - - FunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop) : _poly(poly), _Linop(linop) { - } - - void operator()(const Field& in, Field& out) { - _poly(_Linop,in,out); - } -}; template class CheckpointedLinearFunction : public LinearFunction { @@ -268,19 +255,6 @@ public: } }; -template -class PlainHermOp : public LinearFunction { -public: - LinearOperatorBase &_Linop; - - PlainHermOp(LinearOperatorBase& linop) : _Linop(linop) { - } - - void operator()(const Field& in, Field& out) { - _Linop.HermOp(in,out); - } -}; - template using CoarseSiteFieldGeneral = iScalar< iVector >; template using CoarseSiteFieldD = CoarseSiteFieldGeneral< vComplexD, N >; template using CoarseSiteFieldF = CoarseSiteFieldGeneral< vComplexF, N >; @@ -326,7 +300,7 @@ void CoarseGridLanczos(BlockProjector& pr,RealD alpha2,RealD beta,int Npo Op2 = &Op2plain; } ProjectedHermOp,LatticeFermion> Op2nopoly(pr,HermOp); - ImplicitlyRestartedLanczos > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2); + ImplicitlyRestartedLanczos > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,MaxIt,betastp2,MinRes2); src_coarse = 1.0; @@ -648,7 +622,7 @@ int main (int argc, char ** argv) { } // First round of Lanczos to get low mode basis - ImplicitlyRestartedLanczos IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,betastp1,MaxIt,MinRes1); + ImplicitlyRestartedLanczos IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,MaxIt,betastp1,MinRes1); int Nconv; char tag[1024]; diff --git a/tests/lanczos/Test_dwf_lanczos.cc b/tests/lanczos/Test_dwf_lanczos.cc index 1dd5dae3..b1e205cf 100644 --- a/tests/lanczos/Test_dwf_lanczos.cc +++ b/tests/lanczos/Test_dwf_lanczos.cc @@ -84,11 +84,12 @@ int main (int argc, char ** argv) std::vector Coeffs { 0.,-1.}; Polynomial PolyX(Coeffs); - Chebyshev Cheb(0.2,5.,11); -// ChebyshevLanczos Cheb(9.,1.,0.,20); -// Cheb.csv(std::cout); -// exit(-24); - ImplicitlyRestartedLanczos IRL(HermOp,Cheb,Nstop,Nk,Nm,resid,MaxIt); + Chebyshev Cheby(0.2,5.,11); + + FunctionHermOp OpCheby(Cheby,HermOp); + PlainHermOp Op (HermOp); + + ImplicitlyRestartedLanczos IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); std::vector eval(Nm); diff --git a/tests/lanczos/Test_synthetic_lanczos.cc b/tests/lanczos/Test_synthetic_lanczos.cc index 32fd6f32..4be9ca31 100644 --- a/tests/lanczos/Test_synthetic_lanczos.cc +++ b/tests/lanczos/Test_synthetic_lanczos.cc @@ -119,12 +119,13 @@ int main (int argc, char ** argv) RealD beta = 0.1; RealD mu = 0.0; int order = 11; - ChebyshevLanczos Cheby(alpha,beta,mu,order); + Chebyshev Cheby(alpha,beta,order); std::ofstream file("cheby.dat"); Cheby.csv(file); - HermOpOperatorFunction X; DumbOperator HermOp(grid); + FunctionHermOp OpCheby(Cheby,HermOp); + PlainHermOp Op(HermOp); const int Nk = 40; const int Nm = 80; @@ -133,8 +134,9 @@ int main (int argc, char ** argv) int Nconv; RealD eresid = 1.0e-6; - ImplicitlyRestartedLanczos IRL(HermOp,X,Nk,Nk,Nm,eresid,Nit); - ImplicitlyRestartedLanczos ChebyIRL(HermOp,Cheby,Nk,Nk,Nm,eresid,Nit); + + ImplicitlyRestartedLanczos IRL(Op,Op,Nk,Nk,Nm,eresid,Nit); + ImplicitlyRestartedLanczos ChebyIRL(OpCheby,Op,Nk,Nk,Nm,eresid,Nit); LatticeComplex src(grid); gaussian(RNG,src); { diff --git a/tests/lanczos/Test_wilson_lanczos.cc b/tests/lanczos/Test_wilson_lanczos.cc index e8549234..eabc86d7 100644 --- a/tests/lanczos/Test_wilson_lanczos.cc +++ b/tests/lanczos/Test_wilson_lanczos.cc @@ -86,9 +86,12 @@ int main(int argc, char** argv) { std::vector Coeffs{0, 1.}; Polynomial PolyX(Coeffs); - Chebyshev Cheb(0.0, 10., 12); - ImplicitlyRestartedLanczos IRL(HermOp, PolyX, Nstop, Nk, Nm, - resid, MaxIt); + Chebyshev Cheby(0.0, 10., 12); + + FunctionHermOp OpCheby(Cheby,HermOp); + PlainHermOp Op (HermOp); + + ImplicitlyRestartedLanczos IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt); std::vector eval(Nm); FermionField src(FGrid); From 317ddfedee2104de5bfb96d9174dc31542af9797 Mon Sep 17 00:00:00 2001 From: pretidav Date: Mon, 16 Oct 2017 02:47:33 +0200 Subject: [PATCH 147/377] updated test clover + first attempt derivative clove term (still missing spin part) --- lib/qcd/action/fermion/WilsonCloverFermion.cc | 79 +++++---- lib/qcd/action/fermion/WilsonCloverFermion.h | 16 +- tests/core/Test_wilson_clover.cc | 156 ++++++++++++++---- 3 files changed, 180 insertions(+), 71 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index bc996ccb..7f58f277 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -46,7 +46,6 @@ RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) // apply the sigma and Fmunu FermionField temp(out._grid); Mooee(in, temp); - // overall factor out += temp; return axpy_norm(out, 4 + this->mass, in, out); } @@ -89,6 +88,7 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) CloverTerm += fillCloverZT(Ez); CloverTerm *= 0.5 * csw; // FieldStrength normalization? should be ( -i/8 ). Is it the anti-symmetric combination? + int lvol = _Umu._grid->lSites(); int DimRep = Impl::Dimension; @@ -98,20 +98,21 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) std::vector lcoor; typename SiteCloverType::scalar_object Qx = zero, Qxinv = zero; + for (int site = 0; site < lvol; site++) { grid->LocalIndexToLocalCoor(site, lcoor); EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); peekLocalSite(Qx, CloverTerm, lcoor); Qxinv = zero; +//if (csw!=0){ for (int j = 0; j < Ns; j++) for (int k = 0; k < Ns; k++) for (int a = 0; a < DimRep; a++) for (int b = 0; b < DimRep; b++) EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b); - // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; - - + // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; + EigenInvCloverOp = EigenCloverOp.inverse(); //std::cout << EigenInvCloverOp << std::endl; for (int j = 0; j < Ns; j++) @@ -120,9 +121,11 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) for (int b = 0; b < DimRep; b++) Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; - +// } pokeLocalSite(Qxinv, CloverTermInv, lcoor); - } + } + + // Separate the even and odd parts. pickCheckerboard(Even, CloverTermEven, CloverTerm); @@ -180,7 +183,7 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie if (dag){ if (in._grid->_isCheckerBoarded){ if (in.checkerboard == Odd){ - std::cout << "Calling clover term adj Odd" << std::endl; +// std::cout << "Calling clover term adj Odd" << std::endl; Clover = (inv) ? &CloverTermInvDagOdd : &CloverTermDagOdd; /* test @@ -203,7 +206,7 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie } else { - std::cout << "Calling clover term adj Even" << std::endl; +// std::cout << "Calling clover term adj Even" << std::endl; Clover = (inv) ? &CloverTermInvDagEven : &CloverTermDagEven; /* test @@ -225,7 +228,7 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie } - std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; + // std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; out = *Clover * in; } else { Clover = (inv) ? &CloverTermInv : &CloverTerm; @@ -239,14 +242,14 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie if (in._grid->_isCheckerBoarded){ if (in.checkerboard == Odd){ - std::cout << "Calling clover term Odd" << std::endl; + // std::cout << "Calling clover term Odd" << std::endl; Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd; } else { - std::cout << "Calling clover term Even" << std::endl; + // std::cout << "Calling clover term Even" << std::endl; Clover = (inv) ? &CloverTermInvEven : &CloverTermEven; } out = *Clover * in; - std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; + // std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; } else { Clover = (inv) ? &CloverTermInv : &CloverTerm; out = *Clover * in; @@ -281,8 +284,12 @@ void WilsonCloverFermion::MooDeriv(GaugeField &mat, const FermionField &X, GridBase *grid = mat._grid; +//GaugeLinkField Lambdaodd(grid), Lambdaeven(grid), tmp(grid); +//Lambdaodd = zero; //Yodd*dag(Xodd)+Xodd*dag(Yodd); // I have to peek spin and decide the color structure +//Lambdaeven = zero; //Teven*dag(Xeven)+Xeven*dag(Yeven) + 2*(Dee^-1) + GaugeLinkField Lambda(grid), tmp(grid); -Lambda = zero; //Y*dag(X)+X*dag(Y); // I have to peek spin and decide the color structure +Lambda=zero; conformable(mat._grid, X._grid); conformable(Y._grid, X._grid); @@ -297,37 +304,53 @@ for (int mu = 0; mu < Nd; mu++) { C1m[mu]=zero; C2m[mu]=zero; C3m[mu]=zero; C4m[mu]=zero; } +/* + PARALLEL_FOR_LOOP + for (int i = 0; i < CloverTerm._grid->oSites(); i++) + { + T._odata[i]()(0, 1) = timesMinusI(F._odata[i]()()); + T._odata[i]()(1, 0) = timesMinusI(F._odata[i]()()); + T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()()); + T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()()); + } +*/ + +for (int i=0;i<4;i++){ //spin + for(int j=0;j<4;j++){ //spin + +for (int mu=0;mu<4;mu++){ //color + for (int nu=0;nu<4;nu++){ //color -for (int mu=0;mu<4;mu++){ - for (int nu=0;nu<4;nu++){ // insertion in upper staple - tmp = Impl::CovShiftIdentityBackward(Lambda, nu) * U[nu]; - C1p[mu]+= Cshift(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Cshift(U[nu], nu, -1))), mu, 1); + tmp = Lambda * U[nu]; + C1p[mu]+=Impl::ShiftStaple(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); - tmp = Impl::CovShiftIdentityForward(Lambda, mu) * U[mu]; - C2p[mu]+= Cshift(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Cshift(U[nu], nu, -1))), mu, 1); + tmp = Lambda * U[mu]; + C2p[mu]+= Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); tmp = Impl::CovShiftIdentityForward(Lambda, nu) * U[nu]; - C3p[mu]+= Cshift(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Cshift(tmp, nu, -1))), mu, 1); + C3p[mu]+= Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(tmp, nu))), mu); tmp = Lambda; - C4p[mu]+= Cshift(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Cshift(U[nu], nu, -1))),mu,1) * tmp; + C4p[mu]+= Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))),mu) * tmp; // insertion in lower staple - tmp = Impl::CovShiftIdentityForward(Lambda, nu) * U[nu]; - C1m[mu]+= Cshift(Impl::CovShiftBackward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu, 1); + tmp = Lambda * U[nu]; + C1m[mu]+= Impl::ShiftStaple(Impl::CovShiftBackward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu); - tmp = Cshift(Cshift(Lambda, nu, 2),mu, 1) * U[mu]; - C2m[mu]+= Cshift(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, U[nu])), mu ,1); + tmp = Lambda * U[mu]; + C2m[mu]+= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, U[nu])), mu); - tmp = Cshift(Lambda, nu, 2) * U[nu]; - C3m[mu]+= Cshift(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, tmp)), mu, 1); + tmp = Lambda * U[nu]; + C3m[mu]+= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, tmp)), mu); tmp = Lambda; - C4m[mu]+= Cshift(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu, 1)* tmp; + C4m[mu]+= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu)* tmp; } } +} +} //Still implementing. Have to be tested, and understood how to project EO diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index e8654513..7840af90 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -44,7 +44,7 @@ public: INHERIT_IMPL_TYPES(Impl); template using iImplClover = iScalar, Ns> >; typedef iImplClover SiteCloverType; - typedef Lattice CloverFieldType; + typedef Lattice CloverFieldType; public: typedef WilsonFermion WilsonBase; @@ -91,14 +91,12 @@ public: private: // here fixing the 4 dimensions, make it more general? - RealD csw; // Clover coefficient - CloverFieldType CloverTerm, CloverTermInv; // Clover term - CloverFieldType CloverTermEven, CloverTermOdd; - CloverFieldType CloverTermInvEven, CloverTermInvOdd; // Clover term - - CloverFieldType CloverTermInvDagEven, CloverTermInvDagOdd; //test - CloverFieldType CloverTermDagEven, CloverTermDagOdd; //test - + RealD csw; // Clover coefficient + CloverFieldType CloverTerm=zero, CloverTermInv=zero; // Clover term + CloverFieldType CloverTermEven=zero, CloverTermOdd=zero; // Clover term EO + CloverFieldType CloverTermInvEven=zero, CloverTermInvOdd=zero; // Clover term Inv EO + CloverFieldType CloverTermDagEven=zero, CloverTermDagOdd=zero; // Clover term Dag EO + CloverFieldType CloverTermInvDagEven=zero, CloverTermInvDagOdd=zero; // Clover term Inv Dag EO // eventually these two can be compressed into 6x6 blocks instead of the 12x12 // using the DeGrand-Rossi basis for the gamma matrices diff --git a/tests/core/Test_wilson_clover.cc b/tests/core/Test_wilson_clover.cc index 9e5b246e..9a5fffe2 100644 --- a/tests/core/Test_wilson_clover.cc +++ b/tests/core/Test_wilson_clover.cc @@ -55,13 +55,15 @@ int main (int argc, char ** argv) typedef typename WilsonCloverFermionR::FermionField FermionField; typename WilsonCloverFermionR::ImplParams params; - FermionField src (&Grid); random(pRNG,src); - FermionField result(&Grid); result=zero; - FermionField ref(&Grid); ref=zero; - FermionField tmp(&Grid); tmp=zero; - FermionField err(&Grid); tmp=zero; - FermionField phi (&Grid); random(pRNG,phi); - FermionField chi (&Grid); random(pRNG,chi); + FermionField src (&Grid); random(pRNG,src); + FermionField result(&Grid); result=zero; + FermionField result2(&Grid); result2=zero; + FermionField ref(&Grid); ref=zero; + FermionField tmp(&Grid); tmp=zero; + FermionField err(&Grid); err=zero; + FermionField err2(&Grid); err2=zero; + FermionField phi (&Grid); random(pRNG,phi); + FermionField chi (&Grid); random(pRNG,chi); LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); std::vector U(4,&Grid); @@ -71,24 +73,9 @@ int main (int argc, char ** argv) volume=volume*latt_size[mu]; } - // Only one non-zero (y) - for(int mu=0;mu(Umu,mu); - /* Debug force unit - U[mu] = 1.0; - PokeIndex(Umu,U[mu],mu); - */ - } - - ref = zero; - - RealD mass=0.1; + RealD mass= 0.1; RealD csw = 1.0; - { // Simple clover implementation - - // ref = ref + mass * src; - } WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw,params); Dwc.ImportGauge(Umu); @@ -176,27 +163,26 @@ int main (int argc, char ** argv) std::cout< seeds2({5,6,7,8}); + GridParallelRNG pRNG2(&Grid); pRNG2.SeedFixedIntegers(seeds2); + LatticeColourMatrix Omega(&Grid); + LatticeColourMatrix ShiftedOmega(&Grid); + LatticeGaugeField U_prime(&Grid); U_prime=zero; + LatticeColourMatrix U_prime_mu(&Grid); U_prime_mu=zero; + SU::LieRandomize(pRNG2, Omega, 1.0); + for (int mu=0;mu Date: Wed, 18 Oct 2017 13:08:09 +0100 Subject: [PATCH 148/377] Putting the FG verbosity in the Integrator level --- lib/qcd/hmc/integrators/Integrator_algorithm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/qcd/hmc/integrators/Integrator_algorithm.h b/lib/qcd/hmc/integrators/Integrator_algorithm.h index ecc125ef..13a37aeb 100644 --- a/lib/qcd/hmc/integrators/Integrator_algorithm.h +++ b/lib/qcd/hmc/integrators/Integrator_algorithm.h @@ -231,7 +231,7 @@ class ForceGradient : public Integrator Date: Mon, 23 Oct 2017 13:32:26 +0100 Subject: [PATCH 149/377] Changes in the Makefile to compile against Chroma on Linux --- tests/qdpxx/Makefile.am | 3 ++- tests/qdpxx/Makefile.am.qdpxx | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/qdpxx/Makefile.am b/tests/qdpxx/Makefile.am index f7f30b85..bbcd0412 100644 --- a/tests/qdpxx/Makefile.am +++ b/tests/qdpxx/Makefile.am @@ -1,4 +1,5 @@ AM_CXXFLAGS += `chroma-config --cxxflags` -AM_LDFLAGS += `chroma-config --ldflags` `chroma-config --libs` +AM_LDFLAGS += `chroma-config --ldflags` +LIBS += `chroma-config --libs` include Make.inc diff --git a/tests/qdpxx/Makefile.am.qdpxx b/tests/qdpxx/Makefile.am.qdpxx index 3ccfa4b8..f212413f 100644 --- a/tests/qdpxx/Makefile.am.qdpxx +++ b/tests/qdpxx/Makefile.am.qdpxx @@ -1,6 +1,7 @@ # additional include paths necessary to compile the C++ library AM_CXXFLAGS = -I$(top_srcdir)/include `chroma-config --cxxflags` -AM_LDFLAGS = -L$(top_builddir)/lib `chroma-config --ldflags` `chroma-config --libs` +AM_LDFLAGS = -L$(top_builddir)/lib `chroma-config --ldflags` +AM_LIBS = `chroma-config --libs` include Make.inc From 6391b2a1d0ae8128dafa25349be754a198b978d3 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 23 Oct 2017 14:42:35 +0100 Subject: [PATCH 150/377] Added test for Wilson and Clover fermions --- .vscode/settings.json | 3 +- tests/qdpxx/Test_qdpxx_wilson.cc | 463 +++++++++++++++++++++++++++++++ 2 files changed, 465 insertions(+), 1 deletion(-) create mode 100644 tests/qdpxx/Test_qdpxx_wilson.cc diff --git a/.vscode/settings.json b/.vscode/settings.json index 3e49029b..f13d503b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -45,6 +45,7 @@ "istream": "cpp", "ostream": "cpp", "sstream": "cpp", - "streambuf": "cpp" + "streambuf": "cpp", + "algorithm": "cpp" } } \ No newline at end of file diff --git a/tests/qdpxx/Test_qdpxx_wilson.cc b/tests/qdpxx/Test_qdpxx_wilson.cc new file mode 100644 index 00000000..70a8b1bf --- /dev/null +++ b/tests/qdpxx/Test_qdpxx_wilson.cc @@ -0,0 +1,463 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/qdpxx/Test_qdpxx_wilson.cc + + Copyright (C) 2017 + + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include + +// Mass +double mq = 0.1; + +// Define Wilson Types +typedef Grid::QCD::WilsonImplR::FermionField FermionField; +typedef Grid::QCD::LatticeGaugeField GaugeField; + +#include +#include +#include + +enum ChromaAction +{ + Wilson, // Wilson + WilsonClover // CloverFermions +}; + +void make_gauge(GaugeField &lat, FermionField &src); +void calc_grid(ChromaAction CA, GaugeField &lat, FermionField &src, FermionField &res, int dag); +void calc_chroma(ChromaAction CA, GaugeField &lat, FermionField &src, FermionField &res, int dag); + +namespace Chroma +{ + +class ChromaWrapper +{ +public: + typedef multi1d U; + typedef LatticeFermion T4; + + static void ImportGauge(GaugeField &gr, + QDP::multi1d &ch) + { + Grid::QCD::LorentzColourMatrix LCM; + Grid::Complex cc; + QDP::ColorMatrix cm; + QDP::Complex c; + + std::vector x(4); + QDP::multi1d cx(4); + std::vector gd = gr._grid->GlobalDimensions(); + + for (x[0] = 0; x[0] < gd[0]; x[0]++) + { + for (x[1] = 0; x[1] < gd[1]; x[1]++) + { + for (x[2] = 0; x[2] < gd[2]; x[2]++) + { + for (x[3] = 0; x[3] < gd[3]; x[3]++) + { + cx[0] = x[0]; + cx[1] = x[1]; + cx[2] = x[2]; + cx[3] = x[3]; + Grid::peekSite(LCM, gr, x); + + for (int mu = 0; mu < 4; mu++) + { + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + { + cc = LCM(mu)()(i, j); + c = QDP::cmplx(QDP::Real(real(cc)), QDP::Real(imag(cc))); + QDP::pokeColor(cm, c, i, j); + } + } + QDP::pokeSite(ch[mu], cm, cx); + } + } + } + } + } + } + + static void ExportGauge(GaugeField &gr, + QDP::multi1d &ch) + { + Grid::QCD::LorentzColourMatrix LCM; + Grid::Complex cc; + QDP::ColorMatrix cm; + QDP::Complex c; + + std::vector x(4); + QDP::multi1d cx(4); + std::vector gd = gr._grid->GlobalDimensions(); + + for (x[0] = 0; x[0] < gd[0]; x[0]++) + { + for (x[1] = 0; x[1] < gd[1]; x[1]++) + { + for (x[2] = 0; x[2] < gd[2]; x[2]++) + { + for (x[3] = 0; x[3] < gd[3]; x[3]++) + { + cx[0] = x[0]; + cx[1] = x[1]; + cx[2] = x[2]; + cx[3] = x[3]; + + for (int mu = 0; mu < 4; mu++) + { + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + { + cm = QDP::peekSite(ch[mu], cx); + c = QDP::peekColor(cm, i, j); + cc = Grid::Complex(toDouble(real(c)), toDouble(imag(c))); + LCM(mu) + ()(i, j) = cc; + } + } + } + Grid::pokeSite(LCM, gr, x); + } + } + } + } + } + + // Specific for Wilson Fermions + static void ImportFermion(Grid::QCD::LatticeFermion &gr, + QDP::LatticeFermion &ch) + { + Grid::QCD::SpinColourVector F; + Grid::Complex c; + + QDP::Fermion cF; + QDP::SpinVector cS; + QDP::Complex cc; + + std::vector x(4); // explicit 4d fermions in Grid + QDP::multi1d cx(4); + std::vector gd = gr._grid->GlobalDimensions(); + + for (x[0] = 0; x[0] < gd[0]; x[0]++) + { + for (x[1] = 0; x[1] < gd[1]; x[1]++) + { + for (x[2] = 0; x[2] < gd[2]; x[2]++) + { + for (x[3] = 0; x[3] < gd[3]; x[3]++) + { + cx[0] = x[0]; + cx[1] = x[1]; + cx[2] = x[2]; + cx[3] = x[3]; + + Grid::peekSite(F, gr, x); + + for (int j = 0; j < 3; j++) + { + for (int sp = 0; sp < 4; sp++) + { + + c = F()(sp)(j); + + cc = QDP::cmplx(QDP::Real(real(c)), QDP::Real(imag(c))); + + QDP::pokeSpin(cS, cc, sp); + } + QDP::pokeColor(cF, cS, j); + } + QDP::pokeSite(ch, cF, cx); + } + } + } + } + } + + // Specific for 4d Wilson fermions + static void ExportFermion(Grid::QCD::LatticeFermion &gr, + QDP::LatticeFermion &ch) + { + Grid::QCD::SpinColourVector F; + Grid::Complex c; + + QDP::Fermion cF; + QDP::SpinVector cS; + QDP::Complex cc; + + std::vector x(4); // 4d fermions + QDP::multi1d cx(4); + std::vector gd = gr._grid->GlobalDimensions(); + + for (x[0] = 0; x[0] < gd[0]; x[0]++) + { + for (x[1] = 0; x[1] < gd[1]; x[1]++) + { + for (x[2] = 0; x[2] < gd[2]; x[2]++) + { + for (x[3] = 0; x[3] < gd[3]; x[3]++) + { + cx[0] = x[0]; + cx[1] = x[1]; + cx[2] = x[2]; + cx[3] = x[3]; + + cF = QDP::peekSite(ch, cx); + for (int sp = 0; sp < 4; sp++) + { + for (int j = 0; j < 3; j++) + { + cS = QDP::peekColor(cF, j); + cc = QDP::peekSpin(cS, sp); + c = Grid::Complex(QDP::toDouble(QDP::real(cc)), + QDP::toDouble(QDP::imag(cc))); + F() + (sp)(j) = c; + } + } + Grid::pokeSite(F, gr, x); + } + } + } + } + } + + static Handle> GetLinOp(U &u, ChromaAction params) + { + QDP::Real _mq(mq); + QDP::multi1d bcs(QDP::Nd); + + // Boundary conditions + bcs[0] = bcs[1] = bcs[2] = bcs[3] = 1; + + if (params == Wilson) + { + + Chroma::WilsonFermActParams p; + p.Mass = _mq; + + Chroma::Handle> fbc(new Chroma::SimpleFermBC(bcs)); + Chroma::Handle> cfs(new Chroma::CreateSimpleFermState(fbc)); + Chroma::UnprecWilsonFermAct S_f(cfs, p); + Chroma::Handle> ffs(S_f.createState(u)); + return S_f.linOp(ffs); + } + + if (params == WilsonClover) + { + Chroma::CloverFermActParams p; + p.Mass = _mq; + p.clovCoeffR = QDP::Real(1.0); + p.clovCoeffT = QDP::Real(1.0); + Real u0 = QDP::Real(0.0); + + + Chroma::Handle> fbc(new Chroma::SimpleFermBC(bcs)); + Chroma::Handle> cfs(new Chroma::CreateSimpleFermState(fbc)); + Chroma::UnprecCloverFermAct S_f(cfs, p); + Chroma::Handle> ffs(S_f.createState(u)); + return S_f.linOp(ffs); + } + } +}; +} // namespace Chroma + +int main(int argc, char **argv) +{ + + /******************************************************** + * Setup QDP + *********************************************************/ + Chroma::initialize(&argc, &argv); + Chroma::WilsonTypeFermActs4DEnv::registerAll(); + + /******************************************************** + * Setup Grid + *********************************************************/ + Grid::Grid_init(&argc, &argv); + Grid::GridCartesian *UGrid = Grid::QCD::SpaceTimeGrid::makeFourDimGrid(Grid::GridDefaultLatt(), + Grid::GridDefaultSimd(Grid::QCD::Nd, Grid::vComplex::Nsimd()), + Grid::GridDefaultMpi()); + + std::vector gd = UGrid->GlobalDimensions(); + QDP::multi1d nrow(QDP::Nd); + for (int mu = 0; mu < 4; mu++) + nrow[mu] = gd[mu]; + + QDP::Layout::setLattSize(nrow); + QDP::Layout::create(); + + GaugeField Ug(UGrid); + FermionField src(UGrid); + FermionField res_chroma(UGrid); + FermionField res_grid(UGrid); + + std::vector ActionList({Wilson, WilsonClover}); + std::vector ActionName({"Wilson", "WilsonClover"}); + + { + + for (int i = 0; i < ActionList.size(); i++) + { + std::cout << "*****************************" << std::endl; + std::cout << "Action " << ActionName[i] << std::endl; + std::cout << "*****************************" << std::endl; + make_gauge(Ug, src); // fills the gauge field and the fermion field with random numbers + + for (int dag = 0; dag < 2; dag++) + { + + { + + std::cout << "Dag = " << dag << std::endl; + + calc_chroma(ActionList[i], Ug, src, res_chroma, dag); + + // Remove the normalisation of Chroma Gauge links ???????? + std::cout << "Norm of Chroma " << ActionName[i] << " multiply " << Grid::norm2(res_chroma) << std::endl; + calc_grid(ActionList[i], Ug, src, res_grid, dag); + + std::cout << "Norm of gauge " << Grid::norm2(Ug) << std::endl; + + std::cout << "Norm of Grid " << ActionName[i] << " multiply " << Grid::norm2(res_grid) << std::endl; + + res_chroma = res_chroma - res_grid; + std::cout << "Norm of difference " << Grid::norm2(res_chroma) << std::endl; + } + } + + std::cout << "Finished test " << std::endl; + + Chroma::finalize(); + } + } +} + + +void calc_chroma(ChromaAction action, GaugeField &lat, FermionField &src, FermionField &res, int dag) +{ + QDP::multi1d u(4); + Chroma::ChromaWrapper::ImportGauge(lat, u); + + QDP::LatticeFermion check; + QDP::LatticeFermion result; + QDP::LatticeFermion psi; + + Chroma::ChromaWrapper::ImportFermion(src, psi); + + for (int mu = 0; mu < 4; mu++) + { + std::cout << "Imported Gauge norm [" << mu << "] " << QDP::norm2(u[mu]) << std::endl; + } + std::cout << "Imported Fermion norm " << QDP::norm2(psi) << std::endl; + + typedef QDP::LatticeFermion T; + typedef QDP::multi1d U; + + auto linop = Chroma::ChromaWrapper::GetLinOp(u, action); + + printf("Calling Chroma Linop\n"); + fflush(stdout); + + if (dag) + (*linop)(check, psi, Chroma::MINUS); + else + (*linop)(check, psi, Chroma::PLUS); + + printf("Called Chroma Linop\n"); + fflush(stdout); + + // std::cout << "Calling Chroma Linop " << std::endl; + // linop->evenEvenLinOp(tmp, psi, isign); + // check[rb[0]] = tmp; + // linop->oddOddLinOp(tmp, psi, isign); + // check[rb[1]] = tmp; + // linop->evenOddLinOp(tmp, psi, isign); + // check[rb[0]] += tmp; + // linop->oddEvenLinOp(tmp, psi, isign); + // check[rb[1]] += tmp; + + Chroma::ChromaWrapper::ExportFermion(res, check); +} + +void make_gauge(GaugeField &Umu, FermionField &src) +{ + using namespace Grid; + using namespace Grid::QCD; + + std::vector seeds4({1, 2, 3, 4}); + + Grid::GridCartesian *UGrid = (Grid::GridCartesian *)Umu._grid; + Grid::GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + Grid::QCD::SU3::HotConfiguration(RNG4, Umu); + Grid::gaussian(RNG4, src); +} + +void calc_grid(ChromaAction action, Grid::QCD::LatticeGaugeField &Umu, Grid::QCD::LatticeFermion &src, Grid::QCD::LatticeFermion &res, int dag) +{ + using namespace Grid; + using namespace Grid::QCD; + + Grid::GridCartesian *UGrid = (Grid::GridCartesian *)Umu._grid; + Grid::GridRedBlackCartesian *UrbGrid = Grid::QCD::SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + + Grid::RealD _mass = mq; + + if (action == Wilson) + { + + Grid::QCD::WilsonFermionR Wf(Umu, *UGrid, *UrbGrid, _mass); + + std::cout << Grid::GridLogMessage << " Calling Grid Wilson Fermion multiply " << std::endl; + + if (dag) + Wf.Mdag(src, res); + else + Wf.M(src, res); + return; + } + + if (action == WilsonClover) + { + Grid::RealD _csw = 1.0; + + Grid::QCD::WilsonCloverFermionR Wf(Umu, *UGrid, *UrbGrid, _mass, _csw); + Wf.ImportGauge(Umu); + + std::cout << Grid::GridLogMessage << " Calling Grid Wilson Clover Fermion multiply " << std::endl; + + if (dag) + Wf.Mdag(src, res); + else + Wf.M(src, res); + return; + } + + assert(0); +} From 031c94e02e0644701f4eb2337a2dd684615f29c0 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 23 Oct 2017 18:27:34 +0100 Subject: [PATCH 151/377] Debugging process for the clover term --- lib/qcd/action/fermion/WilsonCloverFermion.cc | 11 ++-- lib/qcd/action/fermion/WilsonCloverFermion.h | 24 ++++----- tests/qdpxx/Test_qdpxx_wilson.cc | 52 +++++++++++++++++-- 3 files changed, 65 insertions(+), 22 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index 7f58f277..5e7e0034 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -71,10 +71,10 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) GridBase *grid = _Umu._grid; typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid); - // Compute the field strength terms - WilsonLoops::FieldStrength(Bx, _Umu, Ydir, Zdir); + // Compute the field strength terms mu>nu + WilsonLoops::FieldStrength(Bx, _Umu, Zdir, Ydir); WilsonLoops::FieldStrength(By, _Umu, Zdir, Xdir); - WilsonLoops::FieldStrength(Bz, _Umu, Xdir, Ydir); + WilsonLoops::FieldStrength(Bz, _Umu, Ydir, Xdir); WilsonLoops::FieldStrength(Ex, _Umu, Tdir, Xdir); WilsonLoops::FieldStrength(Ey, _Umu, Tdir, Ydir); WilsonLoops::FieldStrength(Ez, _Umu, Tdir, Zdir); @@ -86,7 +86,7 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) CloverTerm += fillCloverXT(Ex); CloverTerm += fillCloverYT(Ey); CloverTerm += fillCloverZT(Ez); - CloverTerm *= 0.5 * csw; // FieldStrength normalization? should be ( -i/8 ). Is it the anti-symmetric combination? + CloverTerm *= (0.5) * csw; int lvol = _Umu._grid->lSites(); @@ -232,7 +232,8 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie out = *Clover * in; } else { Clover = (inv) ? &CloverTermInv : &CloverTerm; - out = adj(*Clover) * in; + //out = adj(*Clover) * in; + out = adj(CloverTerm) * in; } diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index 7840af90..18386485 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -156,10 +156,10 @@ private: PARALLEL_FOR_LOOP for (int i = 0; i < CloverTerm._grid->oSites(); i++) { - T._odata[i]()(0, 1) = timesI(F._odata[i]()()); //fixed - T._odata[i]()(1, 0) = timesI(F._odata[i]()()); //fixed - T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()()); //fixed - T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()()); //fixed + T._odata[i]()(0, 1) = timesI(F._odata[i]()()); + T._odata[i]()(1, 0) = timesI(F._odata[i]()()); + T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()()); + T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()()); } return T; @@ -172,10 +172,10 @@ private: PARALLEL_FOR_LOOP for (int i = 0; i < CloverTerm._grid->oSites(); i++) { - T._odata[i]()(0, 1) = -(F._odata[i]()()); //fixed - T._odata[i]()(1, 0) = (F._odata[i]()()); //fixed - T._odata[i]()(2, 3) = (F._odata[i]()()); //fixed - T._odata[i]()(3, 2) = -(F._odata[i]()()); //fixed + T._odata[i]()(0, 1) = -(F._odata[i]()()); + T._odata[i]()(1, 0) = (F._odata[i]()()); + T._odata[i]()(2, 3) = (F._odata[i]()()); + T._odata[i]()(3, 2) = -(F._odata[i]()()); } return T; @@ -188,10 +188,10 @@ private: PARALLEL_FOR_LOOP for (int i = 0; i < CloverTerm._grid->oSites(); i++) { - T._odata[i]()(0, 0) = timesI(F._odata[i]()()); //fixed - T._odata[i]()(1, 1) = timesMinusI(F._odata[i]()()); //fixed - T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()()); //fixed - T._odata[i]()(3, 3) = timesI(F._odata[i]()()); //fixed + T._odata[i]()(0, 0) = timesI(F._odata[i]()()); + T._odata[i]()(1, 1) = timesMinusI(F._odata[i]()()); + T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()()); + T._odata[i]()(3, 3) = timesI(F._odata[i]()()); } return T; diff --git a/tests/qdpxx/Test_qdpxx_wilson.cc b/tests/qdpxx/Test_qdpxx_wilson.cc index 70a8b1bf..1e1f6a23 100644 --- a/tests/qdpxx/Test_qdpxx_wilson.cc +++ b/tests/qdpxx/Test_qdpxx_wilson.cc @@ -29,7 +29,7 @@ #include // Mass -double mq = 0.1; +double mq = 0.0; // Define Wilson Types typedef Grid::QCD::WilsonImplR::FermionField FermionField; @@ -274,7 +274,7 @@ public: p.Mass = _mq; p.clovCoeffR = QDP::Real(1.0); p.clovCoeffT = QDP::Real(1.0); - Real u0 = QDP::Real(0.0); + Real u0 = QDP::Real(1.0); Chroma::Handle> fbc(new Chroma::SimpleFermBC(bcs)); @@ -316,6 +316,8 @@ int main(int argc, char **argv) FermionField src(UGrid); FermionField res_chroma(UGrid); FermionField res_grid(UGrid); + FermionField only_wilson(UGrid); + FermionField difference(UGrid); std::vector ActionList({Wilson, WilsonClover}); std::vector ActionName({"Wilson", "WilsonClover"}); @@ -346,8 +348,19 @@ int main(int argc, char **argv) std::cout << "Norm of Grid " << ActionName[i] << " multiply " << Grid::norm2(res_grid) << std::endl; - res_chroma = res_chroma - res_grid; - std::cout << "Norm of difference " << Grid::norm2(res_chroma) << std::endl; + difference = res_chroma - res_grid; + std::cout << "Norm of difference " << Grid::norm2(difference) << std::endl; + + // Isolate Clover term + calc_grid(Wilson, Ug, src, only_wilson, dag);// Wilson term + res_grid -= only_wilson; + res_chroma -= only_wilson; + + std::cout << "Chroma:" << res_chroma << std::endl; + std::cout << "Grid :" << res_grid << std::endl; + + + } } @@ -416,7 +429,36 @@ void make_gauge(GaugeField &Umu, FermionField &src) Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); Grid::QCD::SU3::HotConfiguration(RNG4, Umu); - Grid::gaussian(RNG4, src); + + // Fermion field + //Grid::gaussian(RNG4, src); + Grid::QCD::SpinColourVector F; + Grid::Complex c; + + std::vector x(4); // 4d fermions + std::vector gd = src._grid->GlobalDimensions(); + + for (x[0] = 0; x[0] < gd[0]; x[0]++) + { + for (x[1] = 0; x[1] < gd[1]; x[1]++) + { + for (x[2] = 0; x[2] < gd[2]; x[2]++) + { + for (x[3] = 0; x[3] < gd[3]; x[3]++) + { + for (int sp = 0; sp < 1; sp++) + { + for (int j = 1; j < 2; j++)// colours + { + c = Grid::Complex(1.0, 0.0); + F()(sp)(j) = c; + } + } + Grid::pokeSite(F, src, x); + } + } + } + } } void calc_grid(ChromaAction action, Grid::QCD::LatticeGaugeField &Umu, Grid::QCD::LatticeFermion &src, Grid::QCD::LatticeFermion &res, int dag) From 6579dd30ff6fd12378c0386d2ca317980586faff Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 23 Oct 2017 18:47:00 +0100 Subject: [PATCH 152/377] More debug test --- tests/qdpxx/Test_qdpxx_wilson.cc | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/tests/qdpxx/Test_qdpxx_wilson.cc b/tests/qdpxx/Test_qdpxx_wilson.cc index 1e1f6a23..8046c00d 100644 --- a/tests/qdpxx/Test_qdpxx_wilson.cc +++ b/tests/qdpxx/Test_qdpxx_wilson.cc @@ -276,7 +276,6 @@ public: p.clovCoeffT = QDP::Real(1.0); Real u0 = QDP::Real(1.0); - Chroma::Handle> fbc(new Chroma::SimpleFermBC(bcs)); Chroma::Handle> cfs(new Chroma::CreateSimpleFermState(fbc)); Chroma::UnprecCloverFermAct S_f(cfs, p); @@ -350,17 +349,16 @@ int main(int argc, char **argv) difference = res_chroma - res_grid; std::cout << "Norm of difference " << Grid::norm2(difference) << std::endl; - + // Isolate Clover term - calc_grid(Wilson, Ug, src, only_wilson, dag);// Wilson term + calc_grid(Wilson, Ug, src, only_wilson, dag); // Wilson term res_grid -= only_wilson; res_chroma -= only_wilson; - + std::cout << "Chroma:" << res_chroma << std::endl; std::cout << "Grid :" << res_grid << std::endl; - - - + difference = (res_grid-res_chroma); + std::cout << "Difference :" << difference << std::endl; } } @@ -371,7 +369,6 @@ int main(int argc, char **argv) } } - void calc_chroma(ChromaAction action, GaugeField &lat, FermionField &src, FermionField &res, int dag) { QDP::multi1d u(4); @@ -429,12 +426,14 @@ void make_gauge(GaugeField &Umu, FermionField &src) Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); Grid::QCD::SU3::HotConfiguration(RNG4, Umu); - + // Fermion field //Grid::gaussian(RNG4, src); Grid::QCD::SpinColourVector F; Grid::Complex c; + + std::vector x(4); // 4d fermions std::vector gd = src._grid->GlobalDimensions(); @@ -446,15 +445,20 @@ void make_gauge(GaugeField &Umu, FermionField &src) { for (x[3] = 0; x[3] < gd[3]; x[3]++) { - for (int sp = 0; sp < 1; sp++) + for (int sp = 0; sp < 4; sp++) { - for (int j = 1; j < 2; j++)// colours + for (int j = 0; j < 3; j++) // colours { - c = Grid::Complex(1.0, 0.0); - F()(sp)(j) = c; + F()(sp)(j) = Grid::Complex(0.0,0.0); + if (((sp == 0)|| (sp==3)) && (j==0)) + { + c = Grid::Complex(1.0, 0.0); + F()(sp)(j) = c; + } } } Grid::pokeSite(F, src, x); + } } } From cbda4f66e0cdfdcd1405cf6b1c20a8ea55e976ad Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 24 Oct 2017 10:20:13 +0100 Subject: [PATCH 153/377] Debug of the field strength --- lib/qcd/utils/WilsonLoops.h | 4 +++- tests/qdpxx/Test_qdpxx_wilson.cc | 9 +++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index ff49cf4e..fe813298 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -337,7 +337,9 @@ static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { GaugeMat v = Vup - Vdn; GaugeMat u = PeekIndex(Umu, mu); // some redundant copies GaugeMat vu = v*u; - FS = 0.25*Ta(u*v + Cshift(vu, mu, -1)); + //FS = 0.25*Ta(u*v + Cshift(vu, mu, -1)); + FS = (u*v + Cshift(vu, mu, -1)); + FS = 0.125*(FS - adj(FS)); } static Real TopologicalCharge(GaugeLorentz &U){ diff --git a/tests/qdpxx/Test_qdpxx_wilson.cc b/tests/qdpxx/Test_qdpxx_wilson.cc index 8046c00d..7e2b3f7e 100644 --- a/tests/qdpxx/Test_qdpxx_wilson.cc +++ b/tests/qdpxx/Test_qdpxx_wilson.cc @@ -351,6 +351,7 @@ int main(int argc, char **argv) std::cout << "Norm of difference " << Grid::norm2(difference) << std::endl; // Isolate Clover term + /* calc_grid(Wilson, Ug, src, only_wilson, dag); // Wilson term res_grid -= only_wilson; res_chroma -= only_wilson; @@ -359,6 +360,7 @@ int main(int argc, char **argv) std::cout << "Grid :" << res_grid << std::endl; difference = (res_grid-res_chroma); std::cout << "Difference :" << difference << std::endl; + */ } } @@ -428,7 +430,8 @@ void make_gauge(GaugeField &Umu, FermionField &src) Grid::QCD::SU3::HotConfiguration(RNG4, Umu); // Fermion field - //Grid::gaussian(RNG4, src); + Grid::gaussian(RNG4, src); + /* Grid::QCD::SpinColourVector F; Grid::Complex c; @@ -450,7 +453,7 @@ void make_gauge(GaugeField &Umu, FermionField &src) for (int j = 0; j < 3; j++) // colours { F()(sp)(j) = Grid::Complex(0.0,0.0); - if (((sp == 0)|| (sp==3)) && (j==0)) + if (((sp == 0)|| (sp==3)) && (j==2)) { c = Grid::Complex(1.0, 0.0); F()(sp)(j) = c; @@ -463,6 +466,8 @@ void make_gauge(GaugeField &Umu, FermionField &src) } } } + */ + } void calc_grid(ChromaAction action, Grid::QCD::LatticeGaugeField &Umu, Grid::QCD::LatticeFermion &src, Grid::QCD::LatticeFermion &res, int dag) From ec8cd11c1f7fce1c3deee79977745ba4f6c9776c Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 24 Oct 2017 13:21:17 +0100 Subject: [PATCH 154/377] Cleanup and prepare for pull request --- lib/qcd/action/fermion/WilsonCloverFermion.cc | 279 ++++++++---------- lib/qcd/action/fermion/WilsonCloverFermion.h | 104 +++---- tests/core/Test_wilson_clover.cc | 10 +- tests/qdpxx/Test_qdpxx_wilson.cc | 172 +++++------ 4 files changed, 258 insertions(+), 307 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index 5e7e0034..73e2bf69 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -39,29 +39,33 @@ namespace QCD template RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) { + FermionField temp(out._grid); + // Wilson term out.checkerboard = in.checkerboard; this->Dhop(in, out, DaggerNo); + // Clover term - // apply the sigma and Fmunu - FermionField temp(out._grid); Mooee(in, temp); + out += temp; - return axpy_norm(out, 4 + this->mass, in, out); + return norm2(out); } template RealD WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) { + FermionField temp(out._grid); + // Wilson term out.checkerboard = in.checkerboard; this->Dhop(in, out, DaggerYes); + // Clover term - // apply the sigma and Fmunu - FermionField temp(out._grid); MooeeDag(in, temp); - out+=temp; - return axpy_norm(out, 4 + this->mass, in, out); + + out += temp; + return norm2(out); } template @@ -80,14 +84,14 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) WilsonLoops::FieldStrength(Ez, _Umu, Tdir, Zdir); // Compute the Clover Operator acting on Colour and Spin - CloverTerm = fillCloverYZ(Bx); + CloverTerm = fillCloverYZ(Bx); CloverTerm += fillCloverXZ(By); CloverTerm += fillCloverXY(Bz); CloverTerm += fillCloverXT(Ex); CloverTerm += fillCloverYT(Ey); CloverTerm += fillCloverZT(Ez); - CloverTerm *= (0.5) * csw; - + CloverTerm *= (0.5) * csw; + CloverTerm += (4.0 + this->mass); int lvol = _Umu._grid->lSites(); int DimRep = Impl::Dimension; @@ -98,21 +102,20 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) std::vector lcoor; typename SiteCloverType::scalar_object Qx = zero, Qxinv = zero; - for (int site = 0; site < lvol; site++) { grid->LocalIndexToLocalCoor(site, lcoor); EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); peekLocalSite(Qx, CloverTerm, lcoor); Qxinv = zero; -//if (csw!=0){ + //if (csw!=0){ for (int j = 0; j < Ns; j++) for (int k = 0; k < Ns; k++) for (int a = 0; a < DimRep; a++) for (int b = 0; b < DimRep; b++) EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b); - // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; - + // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; + EigenInvCloverOp = EigenCloverOp.inverse(); //std::cout << EigenInvCloverOp << std::endl; for (int j = 0; j < Ns; j++) @@ -120,35 +123,29 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) for (int a = 0; a < DimRep; a++) for (int b = 0; b < DimRep; b++) Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); - // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; -// } + // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; + // } pokeLocalSite(Qxinv, CloverTermInv, lcoor); - } - + } - - // Separate the even and odd parts. + // Separate the even and odd parts pickCheckerboard(Even, CloverTermEven, CloverTerm); - pickCheckerboard( Odd, CloverTermOdd, CloverTerm); - + pickCheckerboard(Odd, CloverTermOdd, CloverTerm); pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm)); - pickCheckerboard( Odd, CloverTermDagOdd, adj(CloverTerm)); - + pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm)); pickCheckerboard(Even, CloverTermInvEven, CloverTermInv); - pickCheckerboard( Odd, CloverTermInvOdd, CloverTermInv); - + pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv); pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); - pickCheckerboard( Odd, CloverTermInvDagOdd, adj(CloverTermInv)); - + pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); } template void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) { - conformable(in,out); + conformable(in, out); this->MooeeInternal(in, out, DaggerNo, InverseNo); } @@ -177,85 +174,50 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie CloverFieldType *Clover; assert(in.checkerboard == Odd || in.checkerboard == Even); - - - - if (dag){ - if (in._grid->_isCheckerBoarded){ - if (in.checkerboard == Odd){ -// std::cout << "Calling clover term adj Odd" << std::endl; - Clover = (inv) ? &CloverTermInvDagOdd : &CloverTermDagOdd; - -/* test - int DimRep = Impl::Dimension; - Eigen::MatrixXcd A = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); - std::vector lcoor; - typename SiteCloverType::scalar_object Qx2 = zero; - GridBase *grid = in._grid; - int site = 0 ; - grid->LocalIndexToLocalCoor(site, lcoor); - peekLocalSite(Qx2, *Clover, lcoor); - for (int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++) - A(a + j * DimRep, b + k * DimRep) = Qx2()(j, k)(a, b); - std::cout << "adj Odd =" << site << "\n" << A << std::endl; - end test */ - - - - } else { -// std::cout << "Calling clover term adj Even" << std::endl; - Clover = (inv) ? &CloverTermInvDagEven : &CloverTermDagEven; - -/* test - int DimRep = Impl::Dimension; - Eigen::MatrixXcd A = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); - std::vector lcoor; - typename SiteCloverType::scalar_object Qx2 = zero; - GridBase *grid = in._grid; - int site = 0 ; - grid->LocalIndexToLocalCoor(site, lcoor); - peekLocalSite(Qx2, *Clover, lcoor); - for (int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++) - A(a + j * DimRep, b + k * DimRep) = Qx2()(j, k)(a, b); - std::cout << "adj Odd =" << site << "\n" << A << std::endl; - end test */ - - + if (dag) + { + if (in._grid->_isCheckerBoarded) + { + if (in.checkerboard == Odd) + { + Clover = (inv) ? &CloverTermInvDagOdd : &CloverTermDagOdd; + } + else + { + Clover = (inv) ? &CloverTermInvDagEven : &CloverTermDagEven; + } + out = *Clover * in; + } + else + { + Clover = (inv) ? &CloverTermInv : &CloverTerm; + out = adj(*Clover) * in; } - // std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; - out = *Clover * in; - } else { - Clover = (inv) ? &CloverTermInv : &CloverTerm; - //out = adj(*Clover) * in; - out = adj(CloverTerm) * in; } + else + { + if (in._grid->_isCheckerBoarded) + { - - - - } else { - if (in._grid->_isCheckerBoarded){ - - if (in.checkerboard == Odd){ - // std::cout << "Calling clover term Odd" << std::endl; - Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd; - } else { - // std::cout << "Calling clover term Even" << std::endl; - Clover = (inv) ? &CloverTermInvEven : &CloverTermEven; - } - out = *Clover * in; - // std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; - } else { - Clover = (inv) ? &CloverTermInv : &CloverTerm; - out = *Clover * in; + if (in.checkerboard == Odd) + { + // std::cout << "Calling clover term Odd" << std::endl; + Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd; + } + else + { + // std::cout << "Calling clover term Even" << std::endl; + Clover = (inv) ? &CloverTermInvEven : &CloverTermEven; + } + out = *Clover * in; + // std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; + } + else + { + Clover = (inv) ? &CloverTermInv : &CloverTerm; + out = *Clover * in; + } } - } } // MooeeInternal @@ -264,7 +226,6 @@ template void WilsonCloverFermion::MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - GaugeField tmp(mat._grid); conformable(U._grid, V._grid); @@ -282,30 +243,37 @@ void WilsonCloverFermion::MDeriv(GaugeField &mat, const FermionField &U, c template void WilsonCloverFermion::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag) { - -GridBase *grid = mat._grid; -//GaugeLinkField Lambdaodd(grid), Lambdaeven(grid), tmp(grid); -//Lambdaodd = zero; //Yodd*dag(Xodd)+Xodd*dag(Yodd); // I have to peek spin and decide the color structure -//Lambdaeven = zero; //Teven*dag(Xeven)+Xeven*dag(Yeven) + 2*(Dee^-1) + GridBase *grid = mat._grid; -GaugeLinkField Lambda(grid), tmp(grid); -Lambda=zero; + //GaugeLinkField Lambdaodd(grid), Lambdaeven(grid), tmp(grid); + //Lambdaodd = zero; //Yodd*dag(Xodd)+Xodd*dag(Yodd); // I have to peek spin and decide the color structure + //Lambdaeven = zero; //Teven*dag(Xeven)+Xeven*dag(Yeven) + 2*(Dee^-1) -conformable(mat._grid, X._grid); -conformable(Y._grid, X._grid); + GaugeLinkField Lambda(grid), tmp(grid); + Lambda = zero; -std::vector C1p(Nd,grid), C2p(Nd,grid), C3p(Nd,grid), C4p(Nd,grid); -std::vector C1m(Nd,grid), C2m(Nd,grid), C3m(Nd,grid), C4m(Nd,grid); -std::vector U(Nd, mat._grid); + conformable(mat._grid, X._grid); + conformable(Y._grid, X._grid); -for (int mu = 0; mu < Nd; mu++) { - U[mu] = PeekIndex(mat, mu); - C1p[mu]=zero; C2p[mu]=zero; C3p[mu]=zero; C4p[mu]=zero; - C1m[mu]=zero; C2m[mu]=zero; C3m[mu]=zero; C4m[mu]=zero; -} + std::vector C1p(Nd, grid), C2p(Nd, grid), C3p(Nd, grid), C4p(Nd, grid); + std::vector C1m(Nd, grid), C2m(Nd, grid), C3m(Nd, grid), C4m(Nd, grid); + std::vector U(Nd, mat._grid); -/* + for (int mu = 0; mu < Nd; mu++) + { + U[mu] = PeekIndex(mat, mu); + C1p[mu] = zero; + C2p[mu] = zero; + C3p[mu] = zero; + C4p[mu] = zero; + C1m[mu] = zero; + C2m[mu] = zero; + C3m[mu] = zero; + C4m[mu] = zero; + } + + /* PARALLEL_FOR_LOOP for (int i = 0; i < CloverTerm._grid->oSites(); i++) { @@ -314,50 +282,49 @@ for (int mu = 0; mu < Nd; mu++) { T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()()); T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()()); } -*/ +*/ -for (int i=0;i<4;i++){ //spin - for(int j=0;j<4;j++){ //spin + for (int i = 0; i < 4; i++) + { //spin + for (int j = 0; j < 4; j++) + { //spin -for (int mu=0;mu<4;mu++){ //color - for (int nu=0;nu<4;nu++){ //color + for (int mu = 0; mu < 4; mu++) + { //color + for (int nu = 0; nu < 4; nu++) + { //color -// insertion in upper staple - tmp = Lambda * U[nu]; - C1p[mu]+=Impl::ShiftStaple(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); + // insertion in upper staple + tmp = Lambda * U[nu]; + C1p[mu] += Impl::ShiftStaple(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); - tmp = Lambda * U[mu]; - C2p[mu]+= Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); + tmp = Lambda * U[mu]; + C2p[mu] += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); - tmp = Impl::CovShiftIdentityForward(Lambda, nu) * U[nu]; - C3p[mu]+= Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(tmp, nu))), mu); - - tmp = Lambda; - C4p[mu]+= Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))),mu) * tmp; + tmp = Impl::CovShiftIdentityForward(Lambda, nu) * U[nu]; + C3p[mu] += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(tmp, nu))), mu); -// insertion in lower staple - tmp = Lambda * U[nu]; - C1m[mu]+= Impl::ShiftStaple(Impl::CovShiftBackward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu); + tmp = Lambda; + C4p[mu] += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu) * tmp; - tmp = Lambda * U[mu]; - C2m[mu]+= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, U[nu])), mu); + // insertion in lower staple + tmp = Lambda * U[nu]; + C1m[mu] += Impl::ShiftStaple(Impl::CovShiftBackward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu); - tmp = Lambda * U[nu]; - C3m[mu]+= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, tmp)), mu); + tmp = Lambda * U[mu]; + C2m[mu] += Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, U[nu])), mu); - tmp = Lambda; - C4m[mu]+= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu)* tmp; + tmp = Lambda * U[nu]; + C3m[mu] += Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, tmp)), mu); + + tmp = Lambda; + C4m[mu] += Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu) * tmp; + } + } + } } -} - -} -} - -//Still implementing. Have to be tested, and understood how to project EO - - - + //Still implementing. Have to be tested, and understood how to project EO } // Derivative parts diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index 18386485..34482941 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -26,6 +26,7 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ + #ifndef GRID_QCD_WILSON_CLOVER_FERMION_H #define GRID_QCD_WILSON_CLOVER_FERMION_H @@ -42,9 +43,11 @@ class WilsonCloverFermion : public WilsonFermion public: // Types definitions INHERIT_IMPL_TYPES(Impl); - template using iImplClover = iScalar, Ns> >; - typedef iImplClover SiteCloverType; - typedef Lattice CloverFieldType; + template + using iImplClover = iScalar, Ns>>; + typedef iImplClover SiteCloverType; + typedef Lattice CloverFieldType; + public: typedef WilsonFermion WilsonBase; @@ -58,19 +61,21 @@ public: Fgrid, Hgrid, _mass, p), - CloverTerm(&Fgrid), - CloverTermInv(&Fgrid), - CloverTermEven(&Hgrid), - CloverTermOdd(&Hgrid), - CloverTermInvEven(&Hgrid), - CloverTermInvOdd(&Hgrid), - CloverTermDagEven(&Hgrid), //test - CloverTermDagOdd(&Hgrid), //test - CloverTermInvDagEven(&Hgrid), //test - CloverTermInvDagOdd(&Hgrid) //test + CloverTerm(&Fgrid), + CloverTermInv(&Fgrid), + CloverTermEven(&Hgrid), + CloverTermOdd(&Hgrid), + CloverTermInvEven(&Hgrid), + CloverTermInvOdd(&Hgrid), + CloverTermDagEven(&Hgrid), + CloverTermDagOdd(&Hgrid), + CloverTermInvDagEven(&Hgrid), + CloverTermInvDagOdd(&Hgrid) { csw = _csw; assert(Nd == 4); // require 4 dimensions + + if (csw == 0) std::cout << GridLogWarning << "Initializing WilsonCloverFermion with csw = 0" << std::endl; } virtual RealD M(const FermionField &in, FermionField &out); @@ -91,12 +96,12 @@ public: private: // here fixing the 4 dimensions, make it more general? - RealD csw; // Clover coefficient - CloverFieldType CloverTerm=zero, CloverTermInv=zero; // Clover term - CloverFieldType CloverTermEven=zero, CloverTermOdd=zero; // Clover term EO - CloverFieldType CloverTermInvEven=zero, CloverTermInvOdd=zero; // Clover term Inv EO - CloverFieldType CloverTermDagEven=zero, CloverTermDagOdd=zero; // Clover term Dag EO - CloverFieldType CloverTermInvDagEven=zero, CloverTermInvDagOdd=zero; // Clover term Inv Dag EO + RealD csw; // Clover coefficient + CloverFieldType CloverTerm, CloverTermInv; // Clover term + CloverFieldType CloverTermEven, CloverTermOdd; // Clover term EO + CloverFieldType CloverTermInvEven, CloverTermInvOdd; // Clover term Inv EO + CloverFieldType CloverTermDagEven, CloverTermDagOdd; // Clover term Dag EO + CloverFieldType CloverTermInvDagEven, CloverTermInvDagOdd; // Clover term Inv Dag EO // eventually these two can be compressed into 6x6 blocks instead of the 12x12 // using the DeGrand-Rossi basis for the gamma matrices @@ -113,9 +118,9 @@ private: T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()()); T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()()); } - - return T; -} + + return T; + } CloverFieldType fillCloverXZ(const GaugeLinkField &F) { @@ -129,9 +134,9 @@ private: T._odata[i]()(2, 3) = -F._odata[i]()(); T._odata[i]()(3, 2) = F._odata[i]()(); } - - return T; -} + + return T; + } CloverFieldType fillCloverXY(const GaugeLinkField &F) { @@ -145,9 +150,9 @@ private: T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()()); T._odata[i]()(3, 3) = timesI(F._odata[i]()()); } - - return T; -} + + return T; + } CloverFieldType fillCloverXT(const GaugeLinkField &F) { @@ -156,14 +161,14 @@ private: PARALLEL_FOR_LOOP for (int i = 0; i < CloverTerm._grid->oSites(); i++) { - T._odata[i]()(0, 1) = timesI(F._odata[i]()()); - T._odata[i]()(1, 0) = timesI(F._odata[i]()()); - T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()()); - T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()()); + T._odata[i]()(0, 1) = timesI(F._odata[i]()()); + T._odata[i]()(1, 0) = timesI(F._odata[i]()()); + T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()()); + T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()()); } - - return T; -} + + return T; + } CloverFieldType fillCloverYT(const GaugeLinkField &F) { @@ -172,14 +177,14 @@ private: PARALLEL_FOR_LOOP for (int i = 0; i < CloverTerm._grid->oSites(); i++) { - T._odata[i]()(0, 1) = -(F._odata[i]()()); - T._odata[i]()(1, 0) = (F._odata[i]()()); - T._odata[i]()(2, 3) = (F._odata[i]()()); - T._odata[i]()(3, 2) = -(F._odata[i]()()); + T._odata[i]()(0, 1) = -(F._odata[i]()()); + T._odata[i]()(1, 0) = (F._odata[i]()()); + T._odata[i]()(2, 3) = (F._odata[i]()()); + T._odata[i]()(3, 2) = -(F._odata[i]()()); } - - return T; -} + + return T; + } CloverFieldType fillCloverZT(const GaugeLinkField &F) { @@ -188,17 +193,16 @@ private: PARALLEL_FOR_LOOP for (int i = 0; i < CloverTerm._grid->oSites(); i++) { - T._odata[i]()(0, 0) = timesI(F._odata[i]()()); - T._odata[i]()(1, 1) = timesMinusI(F._odata[i]()()); - T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()()); - T._odata[i]()(3, 3) = timesI(F._odata[i]()()); + T._odata[i]()(0, 0) = timesI(F._odata[i]()()); + T._odata[i]()(1, 1) = timesMinusI(F._odata[i]()()); + T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()()); + T._odata[i]()(3, 3) = timesI(F._odata[i]()()); } - - return T; -} + return T; + } }; } } -#endif // GRID_QCD_WILSON_CLOVER_FERMION_H +#endif // GRID_QCD_WILSON_CLOVER_FERMION_H diff --git a/tests/core/Test_wilson_clover.cc b/tests/core/Test_wilson_clover.cc index 9a5fffe2..9a55f6b2 100644 --- a/tests/core/Test_wilson_clover.cc +++ b/tests/core/Test_wilson_clover.cc @@ -237,7 +237,7 @@ int main (int argc, char ** argv) setCheckerboard(src,src_o); - //Gauge Transformation + ////////////////////// Gauge Transformation std::vector seeds2({5,6,7,8}); GridParallelRNG pRNG2(&Grid); pRNG2.SeedFixedIntegers(seeds2); LatticeColourMatrix Omega(&Grid); @@ -251,7 +251,7 @@ int main (int argc, char ** argv) U_prime_mu=Omega*U[mu]*adj(ShiftedOmega); pokeLorentz(U_prime,U_prime_mu,mu); } - + ///////////////// WilsonCloverFermionR Dwc_prime(U_prime,Grid,RBGrid,mass,csw,params); Dwc_prime.ImportGauge(U_prime); @@ -298,7 +298,7 @@ int main (int argc, char ** argv) std::cout< +#include +#include +#include // Mass -double mq = 0.0; +double mq = 0.01; // Define Wilson Types typedef Grid::QCD::WilsonImplR::FermionField FermionField; typedef Grid::QCD::LatticeGaugeField GaugeField; -#include -#include -#include - enum ChromaAction { Wilson, // Wilson WilsonClover // CloverFermions }; -void make_gauge(GaugeField &lat, FermionField &src); -void calc_grid(ChromaAction CA, GaugeField &lat, FermionField &src, FermionField &res, int dag); -void calc_chroma(ChromaAction CA, GaugeField &lat, FermionField &src, FermionField &res, int dag); - namespace Chroma { @@ -286,91 +281,6 @@ public: }; } // namespace Chroma -int main(int argc, char **argv) -{ - - /******************************************************** - * Setup QDP - *********************************************************/ - Chroma::initialize(&argc, &argv); - Chroma::WilsonTypeFermActs4DEnv::registerAll(); - - /******************************************************** - * Setup Grid - *********************************************************/ - Grid::Grid_init(&argc, &argv); - Grid::GridCartesian *UGrid = Grid::QCD::SpaceTimeGrid::makeFourDimGrid(Grid::GridDefaultLatt(), - Grid::GridDefaultSimd(Grid::QCD::Nd, Grid::vComplex::Nsimd()), - Grid::GridDefaultMpi()); - - std::vector gd = UGrid->GlobalDimensions(); - QDP::multi1d nrow(QDP::Nd); - for (int mu = 0; mu < 4; mu++) - nrow[mu] = gd[mu]; - - QDP::Layout::setLattSize(nrow); - QDP::Layout::create(); - - GaugeField Ug(UGrid); - FermionField src(UGrid); - FermionField res_chroma(UGrid); - FermionField res_grid(UGrid); - FermionField only_wilson(UGrid); - FermionField difference(UGrid); - - std::vector ActionList({Wilson, WilsonClover}); - std::vector ActionName({"Wilson", "WilsonClover"}); - - { - - for (int i = 0; i < ActionList.size(); i++) - { - std::cout << "*****************************" << std::endl; - std::cout << "Action " << ActionName[i] << std::endl; - std::cout << "*****************************" << std::endl; - make_gauge(Ug, src); // fills the gauge field and the fermion field with random numbers - - for (int dag = 0; dag < 2; dag++) - { - - { - - std::cout << "Dag = " << dag << std::endl; - - calc_chroma(ActionList[i], Ug, src, res_chroma, dag); - - // Remove the normalisation of Chroma Gauge links ???????? - std::cout << "Norm of Chroma " << ActionName[i] << " multiply " << Grid::norm2(res_chroma) << std::endl; - calc_grid(ActionList[i], Ug, src, res_grid, dag); - - std::cout << "Norm of gauge " << Grid::norm2(Ug) << std::endl; - - std::cout << "Norm of Grid " << ActionName[i] << " multiply " << Grid::norm2(res_grid) << std::endl; - - difference = res_chroma - res_grid; - std::cout << "Norm of difference " << Grid::norm2(difference) << std::endl; - - // Isolate Clover term - /* - calc_grid(Wilson, Ug, src, only_wilson, dag); // Wilson term - res_grid -= only_wilson; - res_chroma -= only_wilson; - - std::cout << "Chroma:" << res_chroma << std::endl; - std::cout << "Grid :" << res_grid << std::endl; - difference = (res_grid-res_chroma); - std::cout << "Difference :" << difference << std::endl; - */ - } - } - - std::cout << "Finished test " << std::endl; - - Chroma::finalize(); - } - } -} - void calc_chroma(ChromaAction action, GaugeField &lat, FermionField &src, FermionField &res, int dag) { QDP::multi1d u(4); @@ -467,7 +377,6 @@ void make_gauge(GaugeField &Umu, FermionField &src) } } */ - } void calc_grid(ChromaAction action, Grid::QCD::LatticeGaugeField &Umu, Grid::QCD::LatticeFermion &src, Grid::QCD::LatticeFermion &res, int dag) @@ -512,3 +421,76 @@ void calc_grid(ChromaAction action, Grid::QCD::LatticeGaugeField &Umu, Grid::QCD assert(0); } + +int main(int argc, char **argv) +{ + + /******************************************************** + * Setup QDP + *********************************************************/ + Chroma::initialize(&argc, &argv); + Chroma::WilsonTypeFermActs4DEnv::registerAll(); + + /******************************************************** + * Setup Grid + *********************************************************/ + Grid::Grid_init(&argc, &argv); + Grid::GridCartesian *UGrid = Grid::QCD::SpaceTimeGrid::makeFourDimGrid(Grid::GridDefaultLatt(), + Grid::GridDefaultSimd(Grid::QCD::Nd, Grid::vComplex::Nsimd()), + Grid::GridDefaultMpi()); + + std::vector gd = UGrid->GlobalDimensions(); + QDP::multi1d nrow(QDP::Nd); + for (int mu = 0; mu < 4; mu++) + nrow[mu] = gd[mu]; + + QDP::Layout::setLattSize(nrow); + QDP::Layout::create(); + + GaugeField Ug(UGrid); + FermionField src(UGrid); + FermionField res_chroma(UGrid); + FermionField res_grid(UGrid); + FermionField only_wilson(UGrid); + FermionField difference(UGrid); + + std::vector ActionList({Wilson, WilsonClover}); + std::vector ActionName({"Wilson", "WilsonClover"}); + + { + + for (int i = 0; i < ActionList.size(); i++) + { + std::cout << "*****************************" << std::endl; + std::cout << "Action " << ActionName[i] << std::endl; + std::cout << "*****************************" << std::endl; + make_gauge(Ug, src); // fills the gauge field and the fermion field with random numbers + + for (int dag = 0; dag < 2; dag++) + { + + { + + std::cout << "Dag = " << dag << std::endl; + + calc_chroma(ActionList[i], Ug, src, res_chroma, dag); + + // Remove the normalisation of Chroma Gauge links ???????? + std::cout << "Norm of Chroma " << ActionName[i] << " multiply " << Grid::norm2(res_chroma) << std::endl; + calc_grid(ActionList[i], Ug, src, res_grid, dag); + + std::cout << "Norm of gauge " << Grid::norm2(Ug) << std::endl; + + std::cout << "Norm of Grid " << ActionName[i] << " multiply " << Grid::norm2(res_grid) << std::endl; + + difference = res_chroma - res_grid; + std::cout << "Norm of difference " << Grid::norm2(difference) << std::endl; + } + } + + std::cout << "Finished test " << std::endl; + + Chroma::finalize(); + } + } +} From 657779374be6b8e02ce8eb6452316acc310fdf26 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 24 Oct 2017 13:27:17 +0100 Subject: [PATCH 155/377] Adding vscode to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6b92b1a4..8eccbc7d 100644 --- a/.gitignore +++ b/.gitignore @@ -123,5 +123,6 @@ make-bin-BUCK.sh lib/qcd/spin/gamma-gen/*.h lib/qcd/spin/gamma-gen/*.cc +.vscode/ .vscode/settings.json settings.json From 2986aa76f80f2857d846c0e48f508de8299052a3 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 24 Oct 2017 13:32:02 +0100 Subject: [PATCH 156/377] Restoring Perfcounts --- lib/perfmon/PerfCount.h | 3 +-- lib/perfmon/Stat.cc | 30 ++++++++++++++---------------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/lib/perfmon/PerfCount.h b/lib/perfmon/PerfCount.h index cb27c283..73d2c70f 100644 --- a/lib/perfmon/PerfCount.h +++ b/lib/perfmon/PerfCount.h @@ -90,10 +90,9 @@ inline uint64_t cyclecount(void){ } #elif defined __x86_64__ inline uint64_t cyclecount(void){ - //return __rdtsc(); + return __rdtsc(); // unsigned int dummy; // return __rdtscp(&dummy); -return 0; // <- remove this; } #else diff --git a/lib/perfmon/Stat.cc b/lib/perfmon/Stat.cc index 75679412..3f47fd83 100644 --- a/lib/perfmon/Stat.cc +++ b/lib/perfmon/Stat.cc @@ -57,37 +57,35 @@ void PmuStat::start(void) pmu_start(); ++count; xmemctrs(&mrstart, &mwstart); - //tstart = __rdtsc(); -tstart=0; + tstart = __rdtsc(); #endif } void PmuStat::enter(int t) { #ifdef __x86_64__ - counters[0][t] = 0;//__rdpmc(0); - counters[1][t] = 0;//__rdpmc(1); - counters[2][t] = 0;//__rdpmc((1<<30)|0); - counters[3][t] = 0;//__rdpmc((1<<30)|1); - counters[4][t] = 0;//__rdpmc((1<<30)|2); - counters[5][t] = 0;//__rdtsc(); + counters[0][t] = __rdpmc(0); + counters[1][t] = __rdpmc(1); + counters[2][t] = __rdpmc((1<<30)|0); + counters[3][t] = __rdpmc((1<<30)|1); + counters[4][t] = __rdpmc((1<<30)|2); + counters[5][t] = __rdtsc(); #endif } void PmuStat::exit(int t) { #ifdef __x86_64__ - counters[0][t] = 0;//__rdpmc(0) - counters[0][t]; - counters[1][t] = 0;// __rdpmc(1) - counters[1][t]; - counters[2][t] = 0;// __rdpmc((1<<30)|0) - counters[2][t]; - counters[3][t] = 0;// __rdpmc((1<<30)|1) - counters[3][t]; - counters[4][t] = 0;// __rdpmc((1<<30)|2) - counters[4][t]; - counters[5][t] = 0;// __rdtsc() - counters[5][t]; + counters[0][t] = __rdpmc(0) - counters[0][t]; + counters[1][t] = __rdpmc(1) - counters[1][t]; + counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t]; + counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t]; + counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t]; + counters[5][t] = __rdtsc() - counters[5][t]; #endif } void PmuStat::accum(int nthreads) { #ifdef __x86_64__ - // tend = __rdtsc(); - tend =0 ; + tend = __rdtsc(); xmemctrs(&mrend, &mwend); pmu_stop(); for (int t = 0; t < nthreads; ++t) { From 8309f2364bbe57004478435d57eca84662dc2c56 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 25 Oct 2017 10:24:14 +0100 Subject: [PATCH 157/377] Solving again the MPI comm bug with FFTs --- lib/cartesian/Cartesian_base.h | 2 ++ lib/cartesian/Cartesian_full.h | 2 ++ lib/cartesian/Cartesian_red_black.h | 2 ++ lib/communicator/Communicator_base.h | 1 + lib/communicator/Communicator_mpi.cc | 6 ++++++ lib/communicator/Communicator_mpit.cc | 6 ++++++ lib/communicator/Communicator_none.cc | 2 ++ 7 files changed, 21 insertions(+) diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index 324772c5..6aa0e3c7 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -52,6 +52,8 @@ public: GridBase(const std::vector & processor_grid, const CartesianCommunicator &parent) : CartesianCommunicator(processor_grid,parent) {}; + virtual ~GridBase() = default; + // Physics Grid information. std::vector _simd_layout;// Which dimensions get relayed out over simd lanes. std::vector _fdimensions;// (full) Global dimensions of array prior to cb removal diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index a6a85ab7..c7ea68c9 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -81,6 +81,8 @@ public: Init(dimensions,simd_layout,processor_grid); } + virtual ~GridCartesian() = default; + void Init(const std::vector &dimensions, const std::vector &simd_layout, const std::vector &processor_grid) diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index f89cacc5..166c8491 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -133,6 +133,8 @@ public: { Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim) ; } + + virtual ~GridRedBlackCartesian() = default; #if 0 //////////////////////////////////////////////////////////// // Create redblack grid ;; deprecate these. Should not diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 8ff22dbd..22c9e4d0 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -155,6 +155,7 @@ class CartesianCommunicator { //////////////////////////////////////////////// CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent); CartesianCommunicator(const std::vector &pdimensions_in); + virtual ~CartesianCommunicator(); private: #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 678e4517..f1dad1e9 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -52,6 +52,12 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); ShmInitGeneric(); } + +CartesianCommunicator::~CartesianCommunicator(){ +  if (communicator && !MPI::Is_finalized()) +  MPI_Comm_free(&communicator); +} + void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index 5137c27b..2d257a44 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -53,6 +53,12 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ShmInitGeneric(); } +CartesianCommunicator::~CartesianCommunicator(){ +  if (communicator && !MPI::Is_finalized()) +  MPI_Comm_free(&communicator); +} + + void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index e9d71a15..629a3e4a 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -56,6 +56,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) } } +CartesianCommunicator::~CartesianCommunicator(){} + void CartesianCommunicator::GlobalSum(float &){} void CartesianCommunicator::GlobalSumVector(float *,int N){} void CartesianCommunicator::GlobalSum(double &){} From 8a3aae98f6ffba03dcb85e1be23cd387a510e35d Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 25 Oct 2017 10:34:49 +0100 Subject: [PATCH 158/377] Solving minor bug in compilation --- lib/communicator/Communicator_mpi.cc | 7 ++++--- lib/communicator/Communicator_mpit.cc | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index f1dad1e9..5a2dc4d0 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -53,9 +53,10 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ShmInitGeneric(); } -CartesianCommunicator::~CartesianCommunicator(){ -  if (communicator && !MPI::Is_finalized()) -  MPI_Comm_free(&communicator); +CartesianCommunicator::~CartesianCommunicator() +{ + if (communicator && !MPI::Is_finalized()) + MPI_Comm_free(&communicator); } void CartesianCommunicator::GlobalSum(uint32_t &u){ diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index 2d257a44..15ee13fd 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -53,9 +53,10 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ShmInitGeneric(); } -CartesianCommunicator::~CartesianCommunicator(){ -  if (communicator && !MPI::Is_finalized()) -  MPI_Comm_free(&communicator); +CartesianCommunicator::~CartesianCommunicator() +{ + if (communicator && !MPI::Is_finalized()) + MPI_Comm_free(&communicator); } From 28ba8a0f481f0451b5dc22691fe0ad35963af55a Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 25 Oct 2017 23:45:57 +0100 Subject: [PATCH 159/377] Force spacing more nicely --- lib/log/Log.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/log/Log.h b/lib/log/Log.h index 1b4732ab..ddff4c1d 100644 --- a/lib/log/Log.h +++ b/lib/log/Log.h @@ -135,7 +135,7 @@ public: GridTime now = log.StopWatch->Elapsed(); if ( log.timing_mode==1 ) log.StopWatch->Reset(); log.StopWatch->Start(); - stream << log.evidence()<< now << log.background() << " : " ; + stream << log.evidence()<< std::setw(6)< Date: Wed, 25 Oct 2017 23:46:33 +0100 Subject: [PATCH 160/377] Improvements for coarse grid compressed lanczos --- lib/algorithms/CoarsenedMatrix.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h index c2910151..8af8d7ac 100644 --- a/lib/algorithms/CoarsenedMatrix.h +++ b/lib/algorithms/CoarsenedMatrix.h @@ -103,29 +103,32 @@ namespace Grid { GridBase *CoarseGrid; GridBase *FineGrid; std::vector > subspace; + int checkerboard; - Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid) : - CoarseGrid(_CoarseGrid), + Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) : + CoarseGrid(_CoarseGrid), FineGrid(_FineGrid), - subspace(nbasis,_FineGrid) + subspace(nbasis,_FineGrid), + checkerboard(_checkerboard) { }; void Orthogonalise(void){ CoarseScalar InnerProd(CoarseGrid); + std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"< pokey(CoarseGrid); - - for(int i=0;ioSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ eProj._odata[ss](i)=CComplex(1.0); } eProj=eProj - iProj; @@ -137,6 +140,7 @@ namespace Grid { blockProject(CoarseVec,FineVec,subspace); } void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){ + FineVec.checkerboard = subspace[0].checkerboard; blockPromote(CoarseVec,FineVec,subspace); } void CreateSubspaceRandom(GridParallelRNG &RNG){ @@ -147,6 +151,7 @@ namespace Grid { Orthogonalise(); } + /* virtual void CreateSubspaceLanczos(GridParallelRNG &RNG,LinearOperatorBase &hermop,int nn=nbasis) { // Run a Lanczos with sloppy convergence @@ -195,7 +200,7 @@ namespace Grid { std::cout << GridLogMessage <<"subspace["< &hermop,int nn=nbasis) { RealD scale; From d83868fdbbc6a3e9f67c966a190d517a2fb7f9f7 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 25 Oct 2017 23:47:10 +0100 Subject: [PATCH 161/377] Identity linear op added -- useful in circumstances where a linear op may or may not be needed. Supply a trivial one if not needed --- lib/algorithms/LinearOperator.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index 0d32cc15..2a68a7b9 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -346,6 +346,13 @@ namespace Grid { virtual void operator() (const Field &in, Field &out) = 0; }; + template class IdentityLinearFunction : public LinearFunction { + public: + void operator() (const Field &in, Field &out){ + out = in; + }; + }; + ///////////////////////////////////////////////////////////// // Base classes for Multishift solvers for operators From f6c3f6bf2d6ff210e25844b64f0d09fe5d074212 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 25 Oct 2017 23:47:59 +0100 Subject: [PATCH 162/377] XML serialisation of parms and initialise from parms object --- lib/algorithms/approx/Chebyshev.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/algorithms/approx/Chebyshev.h b/lib/algorithms/approx/Chebyshev.h index 7a6e9a9b..b34fac7f 100644 --- a/lib/algorithms/approx/Chebyshev.h +++ b/lib/algorithms/approx/Chebyshev.h @@ -34,6 +34,12 @@ Author: Christoph Lehner namespace Grid { +struct ChebyParams : Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams, + RealD, alpha, + RealD, beta, + int, Npoly); +}; //////////////////////////////////////////////////////////////////////////////////////////// // Generic Chebyshev approximations @@ -67,6 +73,7 @@ namespace Grid { }; Chebyshev(){}; + Chebyshev(ChebyParams p){ Init(p.alpha,p.beta,p.Npoly);}; Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);}; Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);}; From a479325349d5eed9351abe5adf267311d8b6d34c Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 25 Oct 2017 23:48:47 +0100 Subject: [PATCH 163/377] Rewrite of local coherence lanczos --- .../Test_dwf_compressed_lanczos_reorg.cc | 518 ++++++++++++++++++ 1 file changed, 518 insertions(+) create mode 100644 tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc new file mode 100644 index 00000000..a0691116 --- /dev/null +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -0,0 +1,518 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc + + Copyright (C) 2017 + +Author: Leans heavily on Christoph Lehner's code +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +/* + * Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features + * in Grid that were intended to be used to support blocked Aggregates, from + */ +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +struct LanczosParams : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, + ChebyParams, Cheby,/*Chebyshev*/ + int, Nstop, /*Vecs in Lanczos must converge Nstop < Nk < Nm*/ + int, Nk, /*Vecs in Lanczos seek converge*/ + int, Nm, /*Total vecs in Lanczos include restart*/ + RealD, resid, /*residual*/ + int, MaxIt, + RealD, betastp, /* ? */ + int, MinRes); // Must restart +}; + +struct CompressedLanczosParams : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(CompressedLanczosParams, + LanczosParams, FineParams, + LanczosParams, CoarseParams, + ChebyParams, Smoother, + std::vector, blockSize, + std::string, config, + std::vector < std::complex >, omega, + RealD, mass, + RealD, M5 + ); +}; + +// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function +template +class ProjectedHermOp : public LinearFunction > > { +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + LinearOperatorBase &_Linop; + Aggregation &_Aggregate; + + ProjectedHermOp(LinearOperatorBase& linop, Aggregation &aggregate) : + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + FineField fin(FineGrid); + FineField fout(FineGrid); + + _Aggregate.PromoteFromSubspace(in,fin); std::cout< +class ProjectedFunctionHermOp : public LinearFunction > > { +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + + OperatorFunction & _poly; + LinearOperatorBase &_Linop; + Aggregation &_Aggregate; + + ProjectedFunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop, + Aggregation &aggregate) : + _poly(poly), + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + + FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard; + FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; + + _Aggregate.PromoteFromSubspace(in,fin); std::cout< +class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester > > +{ + public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + LinearFunction & _Poly; + OperatorFunction & _smoother; + LinearOperatorBase &_Linop; + Aggregation &_Aggregate; + + ImplicitlyRestartedLanczosSmoothedTester(LinearFunction &Poly, + OperatorFunction &smoother, + LinearOperatorBase &Linop, + Aggregation &Aggregate) + : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly) { }; + + int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) + { + CoarseField v(B); + RealD eval_poly = eval; + // Apply operator + _Poly(B,v); + + RealD vnum = real(innerProduct(B,v)); // HermOp. + RealD vden = norm2(B); + RealD vv0 = norm2(v); + eval = vnum/vden; + v -= eval*B; + + RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); + + std::cout.precision(13); + std::cout< +class CoarseFineIRL +{ +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice CoarseField; + typedef Lattice FineField; + +private: + GridBase *_CoarseGrid; + GridBase *_FineGrid; + int _checkerboard; + LinearOperatorBase & _FineOp; + + // FIXME replace Aggregation with vector of fine; the code reuse is too small for + // the hassle and complexity of cross coupling. + Aggregation _Aggregate; + std::vector evals_fine; + std::vector evals_coarse; + std::vector evec_coarse; +public: + CoarseFineIRL(GridBase *FineGrid, + GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard) : + _CoarseGrid(CoarseGrid), + _FineGrid(FineGrid), + _Aggregate(CoarseGrid,FineGrid,checkerboard), + _FineOp(FineOp), + _checkerboard(checkerboard) + { + evals_fine.resize(0); + evals_coarse.resize(0); + }; + void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } + + template static RealD normalise(T& v) + { + RealD nn = norm2(v); + nn = ::sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void testFine(void) + { + int Nk = nbasis; + _Aggregate.subspace.resize(Nk,_FineGrid); + _Aggregate.subspace[0]=1.0; + _Aggregate.subspace[0].checkerboard=_checkerboard; + normalise(_Aggregate.subspace[0]); + PlainHermOp Op(_FineOp); + for(int k=1;k Cheby(cheby_parms); + FunctionHermOp ChebyOp(Cheby,_FineOp); + PlainHermOp Op(_FineOp); + + evals_fine.resize(Nm); + _Aggregate.subspace.resize(Nm,_FineGrid); + + ImplicitlyRestartedLanczos IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); + + FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; + + int Nconv; + IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false,0); + + // Shrink down to number saved + assert(Nstop>=nbasis); + assert(Nconv>=nbasis); + evals_fine.resize(nbasis); + _Aggregate.subspace.resize(nbasis,_FineGrid); + } + void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth, + int Nstop, int Nk, int Nm,RealD resid, + RealD MaxIt, RealD betastp, int MinRes) + { + Chebyshev Cheby(cheby_op); + ProjectedHermOp Op(_FineOp,_Aggregate); + ProjectedFunctionHermOp ChebyOp (Cheby,_FineOp,_Aggregate); + ////////////////////////////////////////////////////////////////////////////////////////////////// + // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL + ////////////////////////////////////////////////////////////////////////////////////////////////// + + Chebyshev ChebySmooth(cheby_smooth); + ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate); + + + evals_coarse.resize(Nm); + evec_coarse.resize(Nm,_CoarseGrid); + + CoarseField src(_CoarseGrid); src=1.0; + + ImplicitlyRestartedLanczos IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); + int Nconv=0; + IRL.calc(evals_coarse,evec_coarse,src,Nconv,false,1); + assert(Nconv>=Nstop); + + for (int i=0;i blockSize = Params.blockSize; + + // Grids + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector fineLatt = GridDefaultLatt(); + int dims=fineLatt.size(); + assert(blockSize.size()==dims+1); + std::vector coarseLatt(dims); + std::vector coarseLatt5d ; + + for (int d=0;d HermOp(Ddwf); + + // Eigenvector storage + LanczosParams fine =Params.FineParams; + LanczosParams coarse=Params.CoarseParams; + + const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop; + const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk; + const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm; + + std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl; + std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl; + assert(Nm2 >= Nm1); + + const int nbasis= 60; + assert(nbasis==Ns1); + CoarseFineIRL IRL(FrbGrid,CoarseGrid5rb,HermOp,Odd); + std::cout << GridLogMessage << "Constructed CoarseFine IRL" << std::endl; + + int do_fine = 1; + int do_coarse = 0; + int do_smooth = 0; + if ( do_fine ) { + std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "< Date: Wed, 25 Oct 2017 23:49:23 +0100 Subject: [PATCH 164/377] 64 bit safe offsets --- lib/parallelIO/BinaryIO.h | 94 +++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 43 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index d14f3fe2..a2abc9be 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -261,7 +261,7 @@ class BinaryIO { GridBase *grid, std::vector &iodata, std::string file, - int offset, + Integer offset, const std::string &format, int control, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -367,7 +367,7 @@ class BinaryIO { assert(0); #endif } else { - std::cout << GridLogMessage << "C++ read I/O " << file << " : " + std::cout << GridLogMessage << "C++ read I/O " << file << " : " << iodata.size() * sizeof(fobj) << " bytes" << std::endl; std::ifstream fin; fin.open(file, std::ios::binary | std::ios::in); @@ -444,48 +444,56 @@ class BinaryIO { assert(0); #endif } else { + + std::cout << GridLogMessage << "C++ write I/O " << file << " : " + << iodata.size() * sizeof(fobj) << " bytes" << std::endl; std::ofstream fout; - fout.exceptions ( std::fstream::failbit | std::fstream::badbit ); - try { - fout.open(file,std::ios::binary|std::ios::out|std::ios::in); - } catch (const std::fstream::failure& exc) { - std::cout << GridLogError << "Error in opening the file " << file << " for output" < &Umu, std::string file, munger munge, - int offset, + Integer offset, const std::string &format, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -552,7 +560,7 @@ class BinaryIO { static inline void writeLatticeObject(Lattice &Umu, std::string file, munger munge, - int offset, + Integer offset, const std::string &format, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -589,7 +597,7 @@ class BinaryIO { static inline void readRNG(GridSerialRNG &serial, GridParallelRNG ¶llel, std::string file, - int offset, + Integer offset, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb) @@ -651,7 +659,7 @@ class BinaryIO { static inline void writeRNG(GridSerialRNG &serial, GridParallelRNG ¶llel, std::string file, - int offset, + Integer offset, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb) From 66295b99aada692f68c6547ce7d435e8d7df9e66 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 25 Oct 2017 23:50:05 +0100 Subject: [PATCH 165/377] Bit less verbose SciDAC IO --- lib/parallelIO/IldgIO.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index ba71153d..1f2b7c90 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -147,7 +147,7 @@ namespace QCD { _scidacRecord = sr; - std::cout << GridLogMessage << "Build SciDAC datatype " <_gsites; createLimeRecordHeader(record_name, 0, 0, PayloadSize); - // std::cout << "W sizeof(sobj)" <_gsites< xmlc(nbytes+1,'\0'); limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); - std::cout << GridLogMessage<< "Non binary record :" < Date: Wed, 25 Oct 2017 23:50:37 +0100 Subject: [PATCH 166/377] Better error messaging --- lib/serialisation/XmlIO.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/serialisation/XmlIO.cc b/lib/serialisation/XmlIO.cc index a132a2f0..c0c45adc 100644 --- a/lib/serialisation/XmlIO.cc +++ b/lib/serialisation/XmlIO.cc @@ -68,10 +68,10 @@ std::string XmlWriter::XmlString(void) XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("") { pugi::xml_parse_result result; - result = doc_.load_string(xmlstring); + result = doc_.load_file(xmlstring); if ( !result ) { - cerr << "XML error description: " << result.description() << "\n"; - cerr << "XML error offset : " << result.offset << "\n"; + cerr << "XML error description: char * " << result.description() << " "<< xmlstring << "\n"; + cerr << "XML error offset : char * " << result.offset << " "< Date: Wed, 25 Oct 2017 23:51:18 +0100 Subject: [PATCH 167/377] Red black friendly coarsening --- lib/lattice/Lattice_transfer.h | 54 ++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 713a8788..48688e43 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -109,8 +109,8 @@ inline void blockProject(Lattice > &coarseData, coarseData=zero; - // Loop with a cache friendly loop ordering - for(int sf=0;sfoSites();sf++){ + // Loop over coars parallel, and then loop over fine associated with coarse. + parallel_for(int sf=0;sfoSites();sf++){ int sc; std::vector coor_c(_ndimension); @@ -119,8 +119,9 @@ inline void blockProject(Lattice > &coarseData, for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); +PARALLEL_CRITICAL for(int i=0;i &fineZ, GridBase * coarse= coarseA._grid; fineZ.checkerboard=fineX.checkerboard; + assert(fineX.checkerboard==fineY.checkerboard); subdivides(coarse,fine); // require they map conformable(fineX,fineY); conformable(fineX,fineZ); @@ -180,9 +182,10 @@ template GridBase *coarse(CoarseInner._grid); GridBase *fine (fineX._grid); - Lattice fine_inner(fine); + Lattice fine_inner(fine); fine_inner.checkerboard = fineX.checkerboard; Lattice coarse_inner(coarse); + // Precision promotion? fine_inner = localInnerProduct(fineX,fineY); blockSum(coarse_inner,fine_inner); parallel_for(int ss=0;ssoSites();ss++){ @@ -193,7 +196,7 @@ template inline void blockNormalise(Lattice &ip,Lattice &fineX) { GridBase *coarse = ip._grid; - Lattice zz(fineX._grid); zz=zero; + Lattice zz(fineX._grid); zz=zero; zz.checkerboard=fineX.checkerboard; blockInnerProduct(ip,fineX,fineX); ip = pow(ip,-0.5); blockZAXPY(fineX,ip,fineX,zz); @@ -216,19 +219,25 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; } + // Turn this around to loop threaded over sc and interior loop + // over sf would thread better coarseData=zero; - for(int sf=0;sfoSites();sf++){ - + parallel_region { + int sc; std::vector coor_c(_ndimension); std::vector coor_f(_ndimension); - Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); - for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; - Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); - - coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf]; + parallel_for_internal(int sf=0;sfoSites();sf++){ + + Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; + Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + +PARALLEL_CRITICAL + coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf]; + } } return; } @@ -238,7 +247,7 @@ inline void blockPick(GridBase *coarse,const Lattice &unpicked,Lattice zz(fine); + Lattice zz(fine); zz.checkerboard = unpicked.checkerboard; Lattice > fcoor(fine); zz = zero; @@ -303,20 +312,21 @@ inline void blockPromote(const Lattice > &coarseData, } // Loop with a cache friendly loop ordering - for(int sf=0;sfoSites();sf++){ - + parallel_region { int sc; std::vector coor_c(_ndimension); std::vector coor_f(_ndimension); - Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); - for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; - Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); - - for(int i=0;ioSites();sf++){ + Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; + Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + + for(int i=0;i Date: Wed, 25 Oct 2017 23:52:47 +0100 Subject: [PATCH 168/377] Use existing functionality where possible --- tests/lanczos/FieldBasisVector.h | 81 ++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 tests/lanczos/FieldBasisVector.h diff --git a/tests/lanczos/FieldBasisVector.h b/tests/lanczos/FieldBasisVector.h new file mode 100644 index 00000000..9a21aa46 --- /dev/null +++ b/tests/lanczos/FieldBasisVector.h @@ -0,0 +1,81 @@ +namespace Grid { + +template +class BasisFieldVector { + public: + int _Nm; + + typedef typename Field::scalar_type Coeff_t; + typedef typename Field::vector_type vCoeff_t; + typedef typename Field::vector_object vobj; + typedef typename vobj::scalar_object sobj; + + std::vector _v; // _Nfull vectors + + void report(int n,GridBase* value) { + + std::cout << GridLogMessage << "BasisFieldVector allocated:\n"; + std::cout << GridLogMessage << " Delta N = " << n << "\n"; + std::cout << GridLogMessage << " Size of full vectors (size) = " << + ((double)n*sizeof(vobj)*value->oSites() / 1024./1024./1024.) << " GB\n"; + std::cout << GridLogMessage << " Size = " << _v.size() << " Capacity = " << _v.capacity() << std::endl; + + value->Barrier(); + +#ifdef __linux + if (value->IsBoss()) { + system("cat /proc/meminfo"); + } +#endif + + value->Barrier(); + + } + + BasisFieldVector(int Nm,GridBase* value) : _Nm(Nm), _v(Nm,value) { + report(Nm,value); + } + + ~BasisFieldVector() { + } + + Field& operator[](int i) { + return _v[i]; + } + + void orthogonalize(Field& w, int k) { + basisOrthogonalize(_v,w,k); + } + + void rotate(Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) { + basisRotate(_v,Qt,j0,j1,k0,k1,Nm); + } + + size_t size() const { + return _Nm; + } + + void resize(int n) { + if (n > _Nm) + _v.reserve(n); + + _v.resize(n,_v[0]._grid); + + if (n < _Nm) + _v.shrink_to_fit(); + + report(n - _Nm,_v[0]._grid); + + _Nm = n; + } + + void sortInPlace(std::vector& sort_vals, bool reverse) { + basisSortInPlace(_v,sort_vals,reverse); + } + + void deflate(const std::vector& eval,const Field& src_orig,Field& result) { + basisDeflate(_v,eval,src_orig,result); + } + + }; +} From e4d461cb03ee3b039345c3c4ec29704dec5c8d94 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 25 Oct 2017 23:53:19 +0100 Subject: [PATCH 169/377] Messagign --- tests/lanczos/Test_dwf_compressed_lanczos.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/lanczos/Test_dwf_compressed_lanczos.cc b/tests/lanczos/Test_dwf_compressed_lanczos.cc index 10d6c3ae..a6eb95e9 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos.cc @@ -26,9 +26,9 @@ // The following are now decoupled from the Lanczos and deal with grids. // Safe to replace functionality ///////////////////////////////////////////////////////////////////////////// -#include -#include -#include +#include "BlockedGrid.h" +#include "FieldBasisVector.h" +#include "BlockProjector.h" #include "FieldVectorIO.h" #include "Params.h" @@ -431,6 +431,7 @@ void CoarseGridLanczos(BlockProjector& pr,RealD alpha2,RealD beta,int Npo auto result = src_orig; // undeflated solve + std::cout << GridLogMessage << " Undeflated solve "<IsBoss()) @@ -438,6 +439,7 @@ void CoarseGridLanczos(BlockProjector& pr,RealD alpha2,RealD beta,int Npo // CG.ResHistory.clear(); // deflated solve with all eigenvectors + std::cout << GridLogMessage << " Deflated solve with all evectors"<& pr,RealD alpha2,RealD beta,int Npo // CG.ResHistory.clear(); // deflated solve with non-blocked eigenvectors + std::cout << GridLogMessage << " Deflated solve with non-blocked evectors"<& pr,RealD alpha2,RealD beta,int Npo // CG.ResHistory.clear(); // deflated solve with all eigenvectors and original eigenvalues from proj + std::cout << GridLogMessage << " Deflated solve with all eigenvectors and original eigenvalues from proj"< Date: Wed, 25 Oct 2017 23:53:44 +0100 Subject: [PATCH 170/377] Faster converge time --- tests/solver/Test_dwf_mrhs_cg.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc index d9215db2..079fa85a 100644 --- a/tests/solver/Test_dwf_mrhs_cg.cc +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -190,7 +190,7 @@ int main (int argc, char ** argv) MdagMLinearOperator HermOp(Ddwf); MdagMLinearOperator HermOpCk(Dchk); - ConjugateGradient CG((1.0e-8/(me+1)),10000); + ConjugateGradient CG((1.0e-5/(me+1)),10000); s_res = zero; CG(HermOp,s_src,s_res); From d577211cc376303d88355df5bb101ff8aaf6f9ab Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 25 Oct 2017 23:57:54 +0100 Subject: [PATCH 171/377] Relax stoppign condition --- tests/solver/Test_dwf_mrhs_cg_mpi.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/solver/Test_dwf_mrhs_cg_mpi.cc b/tests/solver/Test_dwf_mrhs_cg_mpi.cc index 90969b85..fbc6dd32 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc @@ -113,7 +113,7 @@ int main (int argc, char ** argv) MdagMLinearOperator HermOp(Ddwf); MdagMLinearOperator HermOpCk(Dchk); - ConjugateGradient CG((1.0e-8/(me+1)),10000); + ConjugateGradient CG((1.0e-5/(me+1)),10000); s_res = zero; CG(HermOp,s_src,s_res); From e9be293444039051630aca103ae861b51cf242a5 Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 26 Oct 2017 01:59:30 +0100 Subject: [PATCH 172/377] Better messaging --- lib/parallelIO/BinaryIO.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index a2abc9be..b40a75af 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -356,7 +356,7 @@ class BinaryIO { if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { #ifdef USE_MPI_IO - std::cout<< GridLogMessage<< "MPI read I/O "<< file<< std::endl; + std::cout<< GridLogMessage<<"IOobject: MPI read I/O "<< file<< std::endl; ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0); ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); @@ -367,7 +367,7 @@ class BinaryIO { assert(0); #endif } else { - std::cout << GridLogMessage << "C++ read I/O " << file << " : " + std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : " << iodata.size() * sizeof(fobj) << " bytes" << std::endl; std::ifstream fin; fin.open(file, std::ios::binary | std::ios::in); @@ -413,9 +413,9 @@ class BinaryIO { timer.Start(); if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { #ifdef USE_MPI_IO - std::cout << GridLogMessage << "MPI write I/O " << file << std::endl; + std::cout << GridLogMessage <<"IOobject: MPI write I/O " << file << std::endl; ierr = MPI_File_open(grid->communicator, (char *)file.c_str(), MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh); - std::cout << GridLogMessage << "Checking for errors" << std::endl; + // std::cout << GridLogMessage << "Checking for errors" << std::endl; if (ierr != MPI_SUCCESS) { char error_string[BUFSIZ]; @@ -445,7 +445,7 @@ class BinaryIO { #endif } else { - std::cout << GridLogMessage << "C++ write I/O " << file << " : " + std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : " << iodata.size() * sizeof(fobj) << " bytes" << std::endl; std::ofstream fout; From ccd20df8276fa1951f7d6489bce95c3a65de57eb Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 26 Oct 2017 01:59:59 +0100 Subject: [PATCH 173/377] Better IRL interface --- tests/lanczos/BlockProjector.h | 143 +++++++ tests/lanczos/BlockedGrid.h | 401 ++++++++++++++++++ tests/lanczos/Test_dwf_compressed_lanczos.cc | 4 +- .../Test_dwf_compressed_lanczos_reorg.cc | 21 +- 4 files changed, 557 insertions(+), 12 deletions(-) create mode 100644 tests/lanczos/BlockProjector.h create mode 100644 tests/lanczos/BlockedGrid.h diff --git a/tests/lanczos/BlockProjector.h b/tests/lanczos/BlockProjector.h new file mode 100644 index 00000000..6becaa66 --- /dev/null +++ b/tests/lanczos/BlockProjector.h @@ -0,0 +1,143 @@ +namespace Grid { + +/* + BlockProjector + + If _HP_BLOCK_PROJECTORS_ is defined, we assume that _evec is a basis that is not + fully orthonormalized (to the precision of the coarse field) and we allow for higher-precision + coarse field than basis field. + +*/ +//#define _HP_BLOCK_PROJECTORS_ + +template +class BlockProjector { +public: + + BasisFieldVector& _evec; + BlockedGrid& _bgrid; + + BlockProjector(BasisFieldVector& evec, BlockedGrid& bgrid) : _evec(evec), _bgrid(bgrid) { + } + + void createOrthonormalBasis(RealD thres = 0.0) { + + GridStopWatch sw; + sw.Start(); + + int cnt = 0; + +#pragma omp parallel shared(cnt) + { + int lcnt = 0; + +#pragma omp for + for (int b=0;b<_bgrid._o_blocks;b++) { + + for (int i=0;i<_evec._Nm;i++) { + + auto nrm0 = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]); + + // |i> -= |j> + for (int j=0;j + void coarseToFine(const CoarseField& in, Field& out) { + + out = zero; + out.checkerboard = _evec._v[0].checkerboard; + + int Nbasis = sizeof(in._odata[0]._internal._internal) / sizeof(in._odata[0]._internal._internal[0]); + assert(Nbasis == _evec._Nm); + +#pragma omp parallel for + for (int b=0;b<_bgrid._o_blocks;b++) { + for (int j=0;j<_evec._Nm;j++) { + _bgrid.block_caxpy(b,out,in._odata[b]._internal._internal[j],_evec._v[j],out); + } + } + + } + + template + void fineToCoarse(const Field& in, CoarseField& out) { + + out = zero; + + int Nbasis = sizeof(out._odata[0]._internal._internal) / sizeof(out._odata[0]._internal._internal[0]); + assert(Nbasis == _evec._Nm); + + + Field tmp(_bgrid._grid); + tmp = in; + +#pragma omp parallel for + for (int b=0;b<_bgrid._o_blocks;b++) { + for (int j=0;j<_evec._Nm;j++) { + // |rhs> -= |j> + auto c = _bgrid.block_sp(b,_evec._v[j],tmp); + _bgrid.block_caxpy(b,tmp,-c,_evec._v[j],tmp); // may make this more numerically stable + out._odata[b]._internal._internal[j] = c; + } + } + + } + + template + void deflateFine(BasisFieldVector& _coef,const std::vector& eval,int N,const Field& src_orig,Field& result) { + result = zero; + for (int i=0;i + void deflateCoarse(BasisFieldVector& _coef,const std::vector& eval,int N,const Field& src_orig,Field& result) { + CoarseField src_coarse(_coef._v[0]._grid); + CoarseField result_coarse = src_coarse; + result_coarse = zero; + fineToCoarse(src_orig,src_coarse); + for (int i=0;i + void deflate(BasisFieldVector& _coef,const std::vector& eval,int N,const Field& src_orig,Field& result) { + // Deflation on coarse Grid is much faster, so use it by default. Deflation on fine Grid is kept for legacy reasons for now. + deflateCoarse(_coef,eval,N,src_orig,result); + } + +}; +} diff --git a/tests/lanczos/BlockedGrid.h b/tests/lanczos/BlockedGrid.h new file mode 100644 index 00000000..821272de --- /dev/null +++ b/tests/lanczos/BlockedGrid.h @@ -0,0 +1,401 @@ +namespace Grid { + +template +class BlockedGrid { +public: + GridBase* _grid; + typedef typename Field::scalar_type Coeff_t; + typedef typename Field::vector_type vCoeff_t; + + std::vector _bs; // block size + std::vector _nb; // number of blocks + std::vector _l; // local dimensions irrespective of cb + std::vector _l_cb; // local dimensions of checkerboarded vector + std::vector _l_cb_o; // local dimensions of inner checkerboarded vector + std::vector _bs_cb; // block size in checkerboarded vector + std::vector _nb_o; // number of blocks of simd o-sites + + int _nd, _blocks, _cf_size, _cf_block_size, _cf_o_block_size, _o_blocks, _block_sites; + + BlockedGrid(GridBase* grid, const std::vector& block_size) : + _grid(grid), _bs(block_size), _nd((int)_bs.size()), + _nb(block_size), _l(block_size), _l_cb(block_size), _nb_o(block_size), + _l_cb_o(block_size), _bs_cb(block_size) { + + _blocks = 1; + _o_blocks = 1; + _l = grid->FullDimensions(); + _l_cb = grid->LocalDimensions(); + _l_cb_o = grid->_rdimensions; + + _cf_size = 1; + _block_sites = 1; + for (int i=0;i<_nd;i++) { + _l[i] /= grid->_processors[i]; + + assert(!(_l[i] % _bs[i])); // lattice must accommodate choice of blocksize + + int r = _l[i] / _l_cb[i]; + assert(!(_bs[i] % r)); // checkerboarding must accommodate choice of blocksize + _bs_cb[i] = _bs[i] / r; + _block_sites *= _bs_cb[i]; + _nb[i] = _l[i] / _bs[i]; + _nb_o[i] = _nb[i] / _grid->_simd_layout[i]; + if (_nb[i] % _grid->_simd_layout[i]) { // simd must accommodate choice of blocksize + std::cout << GridLogMessage << "Problem: _nb[" << i << "] = " << _nb[i] << " _grid->_simd_layout[" << i << "] = " << _grid->_simd_layout[i] << std::endl; + assert(0); + } + _blocks *= _nb[i]; + _o_blocks *= _nb_o[i]; + _cf_size *= _l[i]; + } + + _cf_size *= 12 / 2; + _cf_block_size = _cf_size / _blocks; + _cf_o_block_size = _cf_size / _o_blocks; + + std::cout << GridLogMessage << "BlockedGrid:" << std::endl; + std::cout << GridLogMessage << " _l = " << _l << std::endl; + std::cout << GridLogMessage << " _l_cb = " << _l_cb << std::endl; + std::cout << GridLogMessage << " _l_cb_o = " << _l_cb_o << std::endl; + std::cout << GridLogMessage << " _bs = " << _bs << std::endl; + std::cout << GridLogMessage << " _bs_cb = " << _bs_cb << std::endl; + + std::cout << GridLogMessage << " _nb = " << _nb << std::endl; + std::cout << GridLogMessage << " _nb_o = " << _nb_o << std::endl; + std::cout << GridLogMessage << " _blocks = " << _blocks << std::endl; + std::cout << GridLogMessage << " _o_blocks = " << _o_blocks << std::endl; + std::cout << GridLogMessage << " sizeof(vCoeff_t) = " << sizeof(vCoeff_t) << std::endl; + std::cout << GridLogMessage << " _cf_size = " << _cf_size << std::endl; + std::cout << GridLogMessage << " _cf_block_size = " << _cf_block_size << std::endl; + std::cout << GridLogMessage << " _block_sites = " << _block_sites << std::endl; + std::cout << GridLogMessage << " _grid->oSites() = " << _grid->oSites() << std::endl; + + // _grid->Barrier(); + //abort(); + } + + void block_to_coor(int b, std::vector& x0) { + + std::vector bcoor; + bcoor.resize(_nd); + x0.resize(_nd); + assert(b < _o_blocks); + Lexicographic::CoorFromIndex(bcoor,b,_nb_o); + int i; + + for (i=0;i<_nd;i++) { + x0[i] = bcoor[i]*_bs_cb[i]; + } + + //std::cout << GridLogMessage << "Map block b -> " << x0 << std::endl; + + } + + void block_site_to_o_coor(const std::vector& x0, std::vector& coor, int i) { + Lexicographic::CoorFromIndex(coor,i,_bs_cb); + for (int j=0;j<_nd;j++) + coor[j] += x0[j]; + } + + int block_site_to_o_site(const std::vector& x0, int i) { + std::vector coor; coor.resize(_nd); + block_site_to_o_coor(x0,coor,i); + Lexicographic::IndexFromCoor(coor,i,_l_cb_o); + return i; + } + + vCoeff_t block_sp(int b, const Field& x, const Field& y) { + + std::vector x0; + block_to_coor(b,x0); + + vCoeff_t ret = 0.0; + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + ret += TensorRemove(innerProduct(x._odata[ss],y._odata[ss])); + } + + return ret; + + } + + vCoeff_t block_sp(int b, const Field& x, const std::vector< ComplexD >& y) { + + std::vector x0; + block_to_coor(b,x0); + + constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t); + int lsize = _cf_o_block_size / _block_sites; + + std::vector< ComplexD > ret(nsimd); + for (int i=0;i + void vcaxpy(iScalar& r,const vCoeff_t& a,const iScalar& x,const iScalar& y) { + vcaxpy(r._internal,a,x._internal,y._internal); + } + + template + void vcaxpy(iVector& r,const vCoeff_t& a,const iVector& x,const iVector& y) { + for (int i=0;i x0; + block_to_coor(b,x0); + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + vcaxpy(ret._odata[ss],a,x._odata[ss],y._odata[ss]); + } + + } + + void block_caxpy(int b, std::vector< ComplexD >& ret, const vCoeff_t& a, const Field& x, const std::vector< ComplexD >& y) { + std::vector x0; + block_to_coor(b,x0); + + constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t); + int lsize = _cf_o_block_size / _block_sites; + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + + int n = lsize / nsimd; + for (int l=0;l& x) { + std::vector x0; + block_to_coor(b,x0); + + int lsize = _cf_o_block_size / _block_sites; + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + + for (int l=0;l& x) { + std::vector x0; + block_to_coor(b,x0); + + int lsize = _cf_o_block_size / _block_sites; + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + + for (int l=0;l + void vcscale(iScalar& r,const vCoeff_t& a,const iScalar& x) { + vcscale(r._internal,a,x._internal); + } + + template + void vcscale(iVector& r,const vCoeff_t& a,const iVector& x) { + for (int i=0;i x0; + block_to_coor(b,x0); + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + vcscale(ret._odata[ss],a,ret._odata[ss]); + } + } + + void getCanonicalBlockOffset(int cb, std::vector& x0) { + const int ndim = 5; + assert(_nb.size() == ndim); + std::vector _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] }; + std::vector _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; + x0.resize(ndim); + + assert(cb >= 0); + assert(cb < _nbc[0]*_nbc[1]*_nbc[2]*_nbc[3]*_nbc[4]); + + Lexicographic::CoorFromIndex(x0,cb,_nbc); + int i; + + for (i=0;i& buf) { + std::vector _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; + std::vector ldim = v._grid->LocalDimensions(); + std::vector cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] }; + const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4]; + // take canonical block cb of v and put it in canonical ordering in buf + std::vector cx0; + getCanonicalBlockOffset(cb,cx0); + +#pragma omp parallel + { + std::vector co0,cl0; + co0=cx0; cl0=cx0; + +#pragma omp for + for (int i=0;i<_nbsc;i++) { + Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo + for (int j=0;j<(int)_bsc.size();j++) + cl0[j] = cx0[j] + co0[j]; + + std::vector l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] }; + int oi = v._grid->oIndex(l0); + int ii = v._grid->iIndex(l0); + int lti = i; + + //if (cb < 2 && i<2) + // std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl; + + for (int s=0;s<4;s++) + for (int c=0;c<3;c++) { + Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii]; + int ti = 12*lti + 3*s + c; + ld = Coeff_t(buf[2*ti+0], buf[2*ti+1]); + } + } + } + } + + void peekBlockOfVectorCanonical(int cb,const Field& v,std::vector& buf) { + std::vector _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; + std::vector ldim = v._grid->LocalDimensions(); + std::vector cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] }; + const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4]; + // take canonical block cb of v and put it in canonical ordering in buf + std::vector cx0; + getCanonicalBlockOffset(cb,cx0); + + buf.resize(_cf_block_size * 2); + +#pragma omp parallel + { + std::vector co0,cl0; + co0=cx0; cl0=cx0; + +#pragma omp for + for (int i=0;i<_nbsc;i++) { + Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo + for (int j=0;j<(int)_bsc.size();j++) + cl0[j] = cx0[j] + co0[j]; + + std::vector l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] }; + int oi = v._grid->oIndex(l0); + int ii = v._grid->iIndex(l0); + int lti = i; + + //if (cb < 2 && i<2) + // std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl; + + for (int s=0;s<4;s++) + for (int c=0;c<3;c++) { + Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii]; + int ti = 12*lti + 3*s + c; + buf[2*ti+0] = ld.real(); + buf[2*ti+1] = ld.imag(); + } + } + } + } + + int globalToLocalCanonicalBlock(int slot,const std::vector& src_nodes,int nb) { + // processor coordinate + int _nd = (int)src_nodes.size(); + std::vector _src_nodes = src_nodes; + std::vector pco(_nd); + Lexicographic::CoorFromIndex(pco,slot,_src_nodes); + std::vector cpco = { pco[1], pco[2], pco[3], pco[4], pco[0] }; + + // get local block + std::vector _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] }; + assert(_nd == 5); + std::vector c_src_local_blocks(_nd); + for (int i=0;i<_nd;i++) { + assert(_grid->_fdimensions[i] % (src_nodes[i] * _bs[i]) == 0); + c_src_local_blocks[(i+4) % 5] = _grid->_fdimensions[i] / src_nodes[i] / _bs[i]; + } + std::vector cbcoor(_nd); // coordinate of block in slot in canonical form + Lexicographic::CoorFromIndex(cbcoor,nb,c_src_local_blocks); + + // cpco, cbcoor + std::vector clbcoor(_nd); + for (int i=0;i<_nd;i++) { + int cgcoor = cpco[i] * c_src_local_blocks[i] + cbcoor[i]; // global block coordinate + int pcoor = cgcoor / _nbc[i]; // processor coordinate in my Grid + int tpcoor = _grid->_processor_coor[(i+1)%5]; + if (pcoor != tpcoor) + return -1; + clbcoor[i] = cgcoor - tpcoor * _nbc[i]; // canonical local block coordinate for canonical dimension i + } + + int lnb; + Lexicographic::IndexFromCoor(clbcoor,lnb,_nbc); + //std::cout << "Mapped slot = " << slot << " nb = " << nb << " to " << lnb << std::endl; + return lnb; + } + + + }; + +} diff --git a/tests/lanczos/Test_dwf_compressed_lanczos.cc b/tests/lanczos/Test_dwf_compressed_lanczos.cc index a6eb95e9..45690f05 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos.cc @@ -331,7 +331,7 @@ void CoarseGridLanczos(BlockProjector& pr,RealD alpha2,RealD beta,int Npo ) { - IRL2.calc(eval2,coef._v,src_coarse,Nconv,true,SkipTest2); + IRL2.calc(eval2,coef._v,src_coarse,Nconv,true); coef.resize(Nstop2); eval2.resize(Nstop2); @@ -635,7 +635,7 @@ int main (int argc, char ** argv) { if (simple_krylov_basis) { quick_krylov_basis(evec,src,Op1,Nstop1); } else { - IRL1.calc(eval1,evec._v,src,Nconv,false,1); + IRL1.calc(eval1,evec._v,src,Nconv,false); } evec.resize(Nstop1); // and throw away superfluous eval1.resize(Nstop1); diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index a0691116..8fbbacbc 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -56,6 +56,7 @@ struct CompressedLanczosParams : Serializable { LanczosParams, FineParams, LanczosParams, CoarseParams, ChebyParams, Smoother, + RealD , coarse_relax_tol, std::vector, blockSize, std::string, config, std::vector < std::complex >, omega, @@ -137,12 +138,13 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc OperatorFunction & _smoother; LinearOperatorBase &_Linop; Aggregation &_Aggregate; - + RealD _coarse_relax_tol; ImplicitlyRestartedLanczosSmoothedTester(LinearFunction &Poly, OperatorFunction &smoother, LinearOperatorBase &Linop, - Aggregation &Aggregate) - : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly) { }; + Aggregation &Aggregate, + RealD coarse_relax_tol=5.0e3) + : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { }; int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) { @@ -196,7 +198,7 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc <<"eval = "< nbasis ) eresid = eresid*_coarse_relax_tol; if( (vv=nbasis); @@ -345,7 +347,7 @@ public: evals_fine.resize(nbasis); _Aggregate.subspace.resize(nbasis,_FineGrid); } - void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth, + void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, int Nstop, int Nk, int Nm,RealD resid, RealD MaxIt, RealD betastp, int MinRes) { @@ -357,8 +359,7 @@ public: ////////////////////////////////////////////////////////////////////////////////////////////////// Chebyshev ChebySmooth(cheby_smooth); - ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate); - + ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); evals_coarse.resize(Nm); evec_coarse.resize(Nm,_CoarseGrid); @@ -367,7 +368,7 @@ public: ImplicitlyRestartedLanczos IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); int Nconv=0; - IRL.calc(evals_coarse,evec_coarse,src,Nconv,false,1); + IRL.calc(evals_coarse,evec_coarse,src,Nconv,false); assert(Nconv>=Nstop); for (int i=0;i Date: Thu, 26 Oct 2017 07:45:56 +0100 Subject: [PATCH 174/377] Update to IRL; getting close to the structure I would like. --- .../iterative/ImplicitlyRestartedLanczos.h | 234 +++++++++++------- 1 file changed, 142 insertions(+), 92 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 6d3e0755..4be2715a 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -71,6 +71,23 @@ void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, i } } +// Extract a single rotated vector +template +void basisRotateJ(Field &result,std::vector &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) +{ + typedef typename Field::vector_object vobj; + GridBase* grid = basis[0]._grid; + + result.checkerboard = basis[0].checkerboard; + parallel_for(int ss=0;ss < grid->oSites();ss++){ + vobj B = zero; + for(int k=k0; k void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, std::vector& idx) { @@ -87,9 +104,7 @@ void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, s assert(idx[i] > i); ////////////////////////////////////// // idx[i] is a table of desired sources giving a permutation. - // // Swap v[i] with v[idx[i]]. - // // Find j>i for which _vnew[j] = _vold[i], // track the move idx[j] => idx[i] // track the move idx[i] => i @@ -155,6 +170,49 @@ enum IRLdiagonalisation { ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// +template class ImplicitlyRestartedLanczosTester +{ + public: + virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); + virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); +}; + +template class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester +{ + public: + LinearFunction &_HermOpTest; + ImplicitlyRestartedLanczosHermOpTester(LinearFunction &HermOpTest) : _HermOpTest(HermOpTest) { }; + int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox) + { + return TestConvergence(j,resid,B,eval,evalMaxApprox); + } + int TestConvergence(int j,RealD eresid,Field &B, RealD &eval,RealD evalMaxApprox) + { + Field v(B); + RealD eval_poly = eval; + // Apply operator + _HermOpTest(B,v); + + RealD vnum = real(innerProduct(B,v)); // HermOp. + RealD vden = norm2(B); + RealD vv0 = norm2(v); + eval = vnum/vden; + v -= eval*B; + + RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); + + std::cout.precision(13); + std::cout< class ImplicitlyRestartedLanczos { @@ -174,14 +232,19 @@ class ImplicitlyRestartedLanczos { //////////////////////////////// // Embedded objects //////////////////////////////// - LinearFunction &_HermOp; - LinearFunction &_HermOpTest; + LinearFunction &_HermOp; + LinearFunction &_HermOpTest; + ImplicitlyRestartedLanczosTester &_Tester; + // Default tester provided (we need a ref to something in default case) + ImplicitlyRestartedLanczosHermOpTester SimpleTester; ///////////////////////// // Constructor ///////////////////////// + public: ImplicitlyRestartedLanczos(LinearFunction & HermOp, LinearFunction & HermOpTest, + ImplicitlyRestartedLanczosTester & Tester, int _Nstop, // sought vecs int _Nk, // sought vecs int _Nm, // spare vecs @@ -190,7 +253,23 @@ public: RealD _betastp=0.0, // if beta(k) < betastp: converged int _MinRestart=1, int _orth_period = 1, IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : - _HermOp(HermOp), _HermOpTest(HermOpTest), + SimpleTester(HermOpTest), _HermOp(HermOp), _HermOpTest(HermOpTest), _Tester(Tester), + Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), + eresid(_eresid), betastp(_betastp), + MaxIter(_MaxIter) , MinRestart(_MinRestart), + orth_period(_orth_period), diagonalisation(_diagonalisation) { }; + + ImplicitlyRestartedLanczos(LinearFunction & HermOp, + LinearFunction & HermOpTest, + int _Nstop, // sought vecs + int _Nk, // sought vecs + int _Nm, // spare vecs + RealD _eresid, // resid in lmdue deficit + int _MaxIter, // Max iterations + RealD _betastp=0.0, // if beta(k) < betastp: converged + int _MinRestart=1, int _orth_period = 1, + IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : + SimpleTester(HermOpTest), _HermOp(HermOp), _HermOpTest(HermOpTest), _Tester(SimpleTester), Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), eresid(_eresid), betastp(_betastp), MaxIter(_MaxIter) , MinRestart(_MinRestart), @@ -232,7 +311,7 @@ repeat →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM until convergence */ - void calc(std::vector& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse=true, int SkipTest=0) + void calc(std::vector& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse=true) { GridBase *grid = src._grid; assert(grid == evec[0]._grid); @@ -335,11 +414,18 @@ until convergence ////////////////////////////////// eval2_copy = eval2; - // _sort.push(eval2,Nm); - std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end()); + std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater()); std::cout<0); - basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis + assert(k20); + basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis std::cout<= MinRestart) { - std::cout << GridLogIRL << "Rotation to test convergence " << std::endl; - - Field ev0_orig(grid); - ev0_orig = evec[0]; - - basisRotate(evec,Qt,0,Nk,0,Nk,Nm); - { - std::cout << GridLogIRL << "Test convergence" << std::endl; - Field B(grid); - - for(int j = 0; j Nconv ) { + Nconv=j+1; + jj=Nstop; // Terminate the scan } } - - // test if we converged, if so, terminate - std::cout<=Nstop || beta_k < betastp){ - goto converged; - } - - //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; - { - Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); // Restrict Qt to Nk x Nk - for (int k=0;k= "<=Nstop || beta_k < betastp){ + if( Nconv>=Nstop){ + goto converged; + } + } else { std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n"; } // end of iter loop @@ -461,24 +510,28 @@ until convergence converged: - if (SkipTest == 1) { - eval = eval2; - } else { - ////////////////////////////////////////////// - // test quickly - // PAB -- what precisely does this test? Don't like this eval2, eval2_copy etc... - ////////////////////////////////////////////// - for (int j=0;j0) w -= lme[k-1] * evec[k-1]; @@ -529,8 +581,6 @@ until convergence lmd[k] = alph; lme[k] = beta; - std::cout<0 && k % orth_period == 0) { orthogonalize(w,evec,k); // orthonormalise std::cout< Date: Thu, 26 Oct 2017 07:47:42 +0100 Subject: [PATCH 175/377] Moving these out of algorithms --- .../BlockProjector.h | 143 ------- .../BlockedGrid.h | 401 ------------------ .../FieldBasisVector.h | 162 ------- 3 files changed, 706 deletions(-) delete mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h delete mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h delete mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h deleted file mode 100644 index 6becaa66..00000000 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h +++ /dev/null @@ -1,143 +0,0 @@ -namespace Grid { - -/* - BlockProjector - - If _HP_BLOCK_PROJECTORS_ is defined, we assume that _evec is a basis that is not - fully orthonormalized (to the precision of the coarse field) and we allow for higher-precision - coarse field than basis field. - -*/ -//#define _HP_BLOCK_PROJECTORS_ - -template -class BlockProjector { -public: - - BasisFieldVector& _evec; - BlockedGrid& _bgrid; - - BlockProjector(BasisFieldVector& evec, BlockedGrid& bgrid) : _evec(evec), _bgrid(bgrid) { - } - - void createOrthonormalBasis(RealD thres = 0.0) { - - GridStopWatch sw; - sw.Start(); - - int cnt = 0; - -#pragma omp parallel shared(cnt) - { - int lcnt = 0; - -#pragma omp for - for (int b=0;b<_bgrid._o_blocks;b++) { - - for (int i=0;i<_evec._Nm;i++) { - - auto nrm0 = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]); - - // |i> -= |j> - for (int j=0;j - void coarseToFine(const CoarseField& in, Field& out) { - - out = zero; - out.checkerboard = _evec._v[0].checkerboard; - - int Nbasis = sizeof(in._odata[0]._internal._internal) / sizeof(in._odata[0]._internal._internal[0]); - assert(Nbasis == _evec._Nm); - -#pragma omp parallel for - for (int b=0;b<_bgrid._o_blocks;b++) { - for (int j=0;j<_evec._Nm;j++) { - _bgrid.block_caxpy(b,out,in._odata[b]._internal._internal[j],_evec._v[j],out); - } - } - - } - - template - void fineToCoarse(const Field& in, CoarseField& out) { - - out = zero; - - int Nbasis = sizeof(out._odata[0]._internal._internal) / sizeof(out._odata[0]._internal._internal[0]); - assert(Nbasis == _evec._Nm); - - - Field tmp(_bgrid._grid); - tmp = in; - -#pragma omp parallel for - for (int b=0;b<_bgrid._o_blocks;b++) { - for (int j=0;j<_evec._Nm;j++) { - // |rhs> -= |j> - auto c = _bgrid.block_sp(b,_evec._v[j],tmp); - _bgrid.block_caxpy(b,tmp,-c,_evec._v[j],tmp); // may make this more numerically stable - out._odata[b]._internal._internal[j] = c; - } - } - - } - - template - void deflateFine(BasisFieldVector& _coef,const std::vector& eval,int N,const Field& src_orig,Field& result) { - result = zero; - for (int i=0;i - void deflateCoarse(BasisFieldVector& _coef,const std::vector& eval,int N,const Field& src_orig,Field& result) { - CoarseField src_coarse(_coef._v[0]._grid); - CoarseField result_coarse = src_coarse; - result_coarse = zero; - fineToCoarse(src_orig,src_coarse); - for (int i=0;i - void deflate(BasisFieldVector& _coef,const std::vector& eval,int N,const Field& src_orig,Field& result) { - // Deflation on coarse Grid is much faster, so use it by default. Deflation on fine Grid is kept for legacy reasons for now. - deflateCoarse(_coef,eval,N,src_orig,result); - } - -}; -} diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h deleted file mode 100644 index 821272de..00000000 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h +++ /dev/null @@ -1,401 +0,0 @@ -namespace Grid { - -template -class BlockedGrid { -public: - GridBase* _grid; - typedef typename Field::scalar_type Coeff_t; - typedef typename Field::vector_type vCoeff_t; - - std::vector _bs; // block size - std::vector _nb; // number of blocks - std::vector _l; // local dimensions irrespective of cb - std::vector _l_cb; // local dimensions of checkerboarded vector - std::vector _l_cb_o; // local dimensions of inner checkerboarded vector - std::vector _bs_cb; // block size in checkerboarded vector - std::vector _nb_o; // number of blocks of simd o-sites - - int _nd, _blocks, _cf_size, _cf_block_size, _cf_o_block_size, _o_blocks, _block_sites; - - BlockedGrid(GridBase* grid, const std::vector& block_size) : - _grid(grid), _bs(block_size), _nd((int)_bs.size()), - _nb(block_size), _l(block_size), _l_cb(block_size), _nb_o(block_size), - _l_cb_o(block_size), _bs_cb(block_size) { - - _blocks = 1; - _o_blocks = 1; - _l = grid->FullDimensions(); - _l_cb = grid->LocalDimensions(); - _l_cb_o = grid->_rdimensions; - - _cf_size = 1; - _block_sites = 1; - for (int i=0;i<_nd;i++) { - _l[i] /= grid->_processors[i]; - - assert(!(_l[i] % _bs[i])); // lattice must accommodate choice of blocksize - - int r = _l[i] / _l_cb[i]; - assert(!(_bs[i] % r)); // checkerboarding must accommodate choice of blocksize - _bs_cb[i] = _bs[i] / r; - _block_sites *= _bs_cb[i]; - _nb[i] = _l[i] / _bs[i]; - _nb_o[i] = _nb[i] / _grid->_simd_layout[i]; - if (_nb[i] % _grid->_simd_layout[i]) { // simd must accommodate choice of blocksize - std::cout << GridLogMessage << "Problem: _nb[" << i << "] = " << _nb[i] << " _grid->_simd_layout[" << i << "] = " << _grid->_simd_layout[i] << std::endl; - assert(0); - } - _blocks *= _nb[i]; - _o_blocks *= _nb_o[i]; - _cf_size *= _l[i]; - } - - _cf_size *= 12 / 2; - _cf_block_size = _cf_size / _blocks; - _cf_o_block_size = _cf_size / _o_blocks; - - std::cout << GridLogMessage << "BlockedGrid:" << std::endl; - std::cout << GridLogMessage << " _l = " << _l << std::endl; - std::cout << GridLogMessage << " _l_cb = " << _l_cb << std::endl; - std::cout << GridLogMessage << " _l_cb_o = " << _l_cb_o << std::endl; - std::cout << GridLogMessage << " _bs = " << _bs << std::endl; - std::cout << GridLogMessage << " _bs_cb = " << _bs_cb << std::endl; - - std::cout << GridLogMessage << " _nb = " << _nb << std::endl; - std::cout << GridLogMessage << " _nb_o = " << _nb_o << std::endl; - std::cout << GridLogMessage << " _blocks = " << _blocks << std::endl; - std::cout << GridLogMessage << " _o_blocks = " << _o_blocks << std::endl; - std::cout << GridLogMessage << " sizeof(vCoeff_t) = " << sizeof(vCoeff_t) << std::endl; - std::cout << GridLogMessage << " _cf_size = " << _cf_size << std::endl; - std::cout << GridLogMessage << " _cf_block_size = " << _cf_block_size << std::endl; - std::cout << GridLogMessage << " _block_sites = " << _block_sites << std::endl; - std::cout << GridLogMessage << " _grid->oSites() = " << _grid->oSites() << std::endl; - - // _grid->Barrier(); - //abort(); - } - - void block_to_coor(int b, std::vector& x0) { - - std::vector bcoor; - bcoor.resize(_nd); - x0.resize(_nd); - assert(b < _o_blocks); - Lexicographic::CoorFromIndex(bcoor,b,_nb_o); - int i; - - for (i=0;i<_nd;i++) { - x0[i] = bcoor[i]*_bs_cb[i]; - } - - //std::cout << GridLogMessage << "Map block b -> " << x0 << std::endl; - - } - - void block_site_to_o_coor(const std::vector& x0, std::vector& coor, int i) { - Lexicographic::CoorFromIndex(coor,i,_bs_cb); - for (int j=0;j<_nd;j++) - coor[j] += x0[j]; - } - - int block_site_to_o_site(const std::vector& x0, int i) { - std::vector coor; coor.resize(_nd); - block_site_to_o_coor(x0,coor,i); - Lexicographic::IndexFromCoor(coor,i,_l_cb_o); - return i; - } - - vCoeff_t block_sp(int b, const Field& x, const Field& y) { - - std::vector x0; - block_to_coor(b,x0); - - vCoeff_t ret = 0.0; - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - ret += TensorRemove(innerProduct(x._odata[ss],y._odata[ss])); - } - - return ret; - - } - - vCoeff_t block_sp(int b, const Field& x, const std::vector< ComplexD >& y) { - - std::vector x0; - block_to_coor(b,x0); - - constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t); - int lsize = _cf_o_block_size / _block_sites; - - std::vector< ComplexD > ret(nsimd); - for (int i=0;i - void vcaxpy(iScalar& r,const vCoeff_t& a,const iScalar& x,const iScalar& y) { - vcaxpy(r._internal,a,x._internal,y._internal); - } - - template - void vcaxpy(iVector& r,const vCoeff_t& a,const iVector& x,const iVector& y) { - for (int i=0;i x0; - block_to_coor(b,x0); - - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - vcaxpy(ret._odata[ss],a,x._odata[ss],y._odata[ss]); - } - - } - - void block_caxpy(int b, std::vector< ComplexD >& ret, const vCoeff_t& a, const Field& x, const std::vector< ComplexD >& y) { - std::vector x0; - block_to_coor(b,x0); - - constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t); - int lsize = _cf_o_block_size / _block_sites; - - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - - int n = lsize / nsimd; - for (int l=0;l& x) { - std::vector x0; - block_to_coor(b,x0); - - int lsize = _cf_o_block_size / _block_sites; - - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - - for (int l=0;l& x) { - std::vector x0; - block_to_coor(b,x0); - - int lsize = _cf_o_block_size / _block_sites; - - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - - for (int l=0;l - void vcscale(iScalar& r,const vCoeff_t& a,const iScalar& x) { - vcscale(r._internal,a,x._internal); - } - - template - void vcscale(iVector& r,const vCoeff_t& a,const iVector& x) { - for (int i=0;i x0; - block_to_coor(b,x0); - - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - vcscale(ret._odata[ss],a,ret._odata[ss]); - } - } - - void getCanonicalBlockOffset(int cb, std::vector& x0) { - const int ndim = 5; - assert(_nb.size() == ndim); - std::vector _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] }; - std::vector _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; - x0.resize(ndim); - - assert(cb >= 0); - assert(cb < _nbc[0]*_nbc[1]*_nbc[2]*_nbc[3]*_nbc[4]); - - Lexicographic::CoorFromIndex(x0,cb,_nbc); - int i; - - for (i=0;i& buf) { - std::vector _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; - std::vector ldim = v._grid->LocalDimensions(); - std::vector cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] }; - const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4]; - // take canonical block cb of v and put it in canonical ordering in buf - std::vector cx0; - getCanonicalBlockOffset(cb,cx0); - -#pragma omp parallel - { - std::vector co0,cl0; - co0=cx0; cl0=cx0; - -#pragma omp for - for (int i=0;i<_nbsc;i++) { - Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo - for (int j=0;j<(int)_bsc.size();j++) - cl0[j] = cx0[j] + co0[j]; - - std::vector l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] }; - int oi = v._grid->oIndex(l0); - int ii = v._grid->iIndex(l0); - int lti = i; - - //if (cb < 2 && i<2) - // std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl; - - for (int s=0;s<4;s++) - for (int c=0;c<3;c++) { - Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii]; - int ti = 12*lti + 3*s + c; - ld = Coeff_t(buf[2*ti+0], buf[2*ti+1]); - } - } - } - } - - void peekBlockOfVectorCanonical(int cb,const Field& v,std::vector& buf) { - std::vector _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; - std::vector ldim = v._grid->LocalDimensions(); - std::vector cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] }; - const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4]; - // take canonical block cb of v and put it in canonical ordering in buf - std::vector cx0; - getCanonicalBlockOffset(cb,cx0); - - buf.resize(_cf_block_size * 2); - -#pragma omp parallel - { - std::vector co0,cl0; - co0=cx0; cl0=cx0; - -#pragma omp for - for (int i=0;i<_nbsc;i++) { - Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo - for (int j=0;j<(int)_bsc.size();j++) - cl0[j] = cx0[j] + co0[j]; - - std::vector l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] }; - int oi = v._grid->oIndex(l0); - int ii = v._grid->iIndex(l0); - int lti = i; - - //if (cb < 2 && i<2) - // std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl; - - for (int s=0;s<4;s++) - for (int c=0;c<3;c++) { - Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii]; - int ti = 12*lti + 3*s + c; - buf[2*ti+0] = ld.real(); - buf[2*ti+1] = ld.imag(); - } - } - } - } - - int globalToLocalCanonicalBlock(int slot,const std::vector& src_nodes,int nb) { - // processor coordinate - int _nd = (int)src_nodes.size(); - std::vector _src_nodes = src_nodes; - std::vector pco(_nd); - Lexicographic::CoorFromIndex(pco,slot,_src_nodes); - std::vector cpco = { pco[1], pco[2], pco[3], pco[4], pco[0] }; - - // get local block - std::vector _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] }; - assert(_nd == 5); - std::vector c_src_local_blocks(_nd); - for (int i=0;i<_nd;i++) { - assert(_grid->_fdimensions[i] % (src_nodes[i] * _bs[i]) == 0); - c_src_local_blocks[(i+4) % 5] = _grid->_fdimensions[i] / src_nodes[i] / _bs[i]; - } - std::vector cbcoor(_nd); // coordinate of block in slot in canonical form - Lexicographic::CoorFromIndex(cbcoor,nb,c_src_local_blocks); - - // cpco, cbcoor - std::vector clbcoor(_nd); - for (int i=0;i<_nd;i++) { - int cgcoor = cpco[i] * c_src_local_blocks[i] + cbcoor[i]; // global block coordinate - int pcoor = cgcoor / _nbc[i]; // processor coordinate in my Grid - int tpcoor = _grid->_processor_coor[(i+1)%5]; - if (pcoor != tpcoor) - return -1; - clbcoor[i] = cgcoor - tpcoor * _nbc[i]; // canonical local block coordinate for canonical dimension i - } - - int lnb; - Lexicographic::IndexFromCoor(clbcoor,lnb,_nbc); - //std::cout << "Mapped slot = " << slot << " nb = " << nb << " to " << lnb << std::endl; - return lnb; - } - - - }; - -} diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h deleted file mode 100644 index 3ad516ef..00000000 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h +++ /dev/null @@ -1,162 +0,0 @@ -namespace Grid { - -template -class BasisFieldVector { - public: - int _Nm; - - typedef typename Field::scalar_type Coeff_t; - typedef typename Field::vector_type vCoeff_t; - typedef typename Field::vector_object vobj; - typedef typename vobj::scalar_object sobj; - - std::vector _v; // _Nfull vectors - - void report(int n,GridBase* value) { - - std::cout << GridLogMessage << "BasisFieldVector allocated:\n"; - std::cout << GridLogMessage << " Delta N = " << n << "\n"; - std::cout << GridLogMessage << " Size of full vectors (size) = " << - ((double)n*sizeof(vobj)*value->oSites() / 1024./1024./1024.) << " GB\n"; - std::cout << GridLogMessage << " Size = " << _v.size() << " Capacity = " << _v.capacity() << std::endl; - - value->Barrier(); - - if (value->IsBoss()) { - system("cat /proc/meminfo"); - } - - value->Barrier(); - - } - - BasisFieldVector(int Nm,GridBase* value) : _Nm(Nm), _v(Nm,value) { - report(Nm,value); - } - - ~BasisFieldVector() { - } - - Field& operator[](int i) { - return _v[i]; - } - - void orthogonalize(Field& w, int k) { - for(int j=0; j B(Nm); - -#pragma omp for - for(int ss=0;ss < grid->oSites();ss++){ - for(int j=j0; j _Nm) - _v.reserve(n); - - _v.resize(n,_v[0]._grid); - - if (n < _Nm) - _v.shrink_to_fit(); - - report(n - _Nm,_v[0]._grid); - - _Nm = n; - } - - std::vector getIndex(std::vector& sort_vals) { - - std::vector idx(sort_vals.size()); - iota(idx.begin(), idx.end(), 0); - - // sort indexes based on comparing values in v - sort(idx.begin(), idx.end(), - [&sort_vals](int i1, int i2) {return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);}); - - return idx; - } - - void reorderInPlace(std::vector& sort_vals, std::vector& idx) { - GridStopWatch gsw; - gsw.Start(); - - int nswaps = 0; - for (size_t i=0;i& sort_vals, bool reverse) { - - std::vector idx = getIndex(sort_vals); - if (reverse) - std::reverse(idx.begin(), idx.end()); - - reorderInPlace(sort_vals,idx); - - } - - void deflate(const std::vector& eval,const Field& src_orig,Field& result) { - result = zero; - int N = (int)_v.size(); - for (int i=0;i Date: Thu, 26 Oct 2017 07:48:03 +0100 Subject: [PATCH 176/377] Test for split/unsplit in isolation --- tests/solver/Test_split_grid.cc | 144 ++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 tests/solver/Test_split_grid.cc diff --git a/tests/solver/Test_split_grid.cc b/tests/solver/Test_split_grid.cc new file mode 100644 index 00000000..90969b85 --- /dev/null +++ b/tests/solver/Test_split_grid.cc @@ -0,0 +1,144 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_mrhs_cg.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + typedef typename DomainWallFermionR::FermionField FermionField; + typedef typename DomainWallFermionR::ComplexField ComplexField; + typename DomainWallFermionR::ImplParams params; + + const int Ls=4; + + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + std::vector mpi_split (mpi_layout.size(),1); + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + int nrhs = UGrid->RankCount() ; + + ///////////////////////////////////////////// + // Split into 1^4 mpi communicators + ///////////////////////////////////////////// + GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + mpi_split, + *UGrid); + + GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); + GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); + GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid); + + /////////////////////////////////////////////// + // Set up the problem as a 4d spreadout job + /////////////////////////////////////////////// + std::vector seeds({1,2,3,4}); + + GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds); + GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds); + std::vector src(nrhs,FGrid); + std::vector src_chk(nrhs,FGrid); + std::vector result(nrhs,FGrid); + FermionField tmp(FGrid); + + for(int s=0;sThisRank(); + + LatticeGaugeField s_Umu(SGrid); + FermionField s_src(SFGrid); + FermionField s_tmp(SFGrid); + FermionField s_res(SFGrid); + + /////////////////////////////////////////////////////////////// + // split the source out using MPI instead of I/O + /////////////////////////////////////////////////////////////// + Grid_split (Umu,s_Umu); + Grid_split (src,s_src); + + /////////////////////////////////////////////////////////////// + // Set up N-solvers as trivially parallel + /////////////////////////////////////////////////////////////// + RealD mass=0.01; + RealD M5=1.8; + DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5); + DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5); + + std::cout << GridLogMessage << "****************************************************************** "< HermOp(Ddwf); + MdagMLinearOperator HermOpCk(Dchk); + ConjugateGradient CG((1.0e-8/(me+1)),10000); + s_res = zero; + CG(HermOp,s_src,s_res); + + ///////////////////////////////////////////////////////////// + // Report how long they all took + ///////////////////////////////////////////////////////////// + std::vector iterations(nrhs,0); + iterations[me] = CG.IterationsToComplete; + + for(int n=0;nGlobalSum(iterations[n]); + std::cout << GridLogMessage<<" Rank "< Date: Thu, 26 Oct 2017 16:25:01 +0100 Subject: [PATCH 177/377] Final? candidate for push back on the lanczos reorg feature --- .../Test_dwf_compressed_lanczos_reorg.cc | 33 ++----------------- 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index 8fbbacbc..ad1aaa47 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -374,20 +374,6 @@ public: for (int i=0;i Date: Thu, 26 Oct 2017 18:23:55 +0100 Subject: [PATCH 178/377] Debugging force term --- lib/qcd/action/fermion/FermionOperatorImpl.h | 52 ++++- lib/qcd/action/fermion/WilsonCloverFermion.cc | 30 +-- lib/qcd/action/fermion/WilsonCloverFermion.h | 128 ++++++++++- tests/forces/Test_wilson_force.cc | 7 +- tests/forces/Test_wilsonclover_force.cc | 209 ++++++++++++++++++ 5 files changed, 395 insertions(+), 31 deletions(-) create mode 100644 tests/forces/Test_wilsonclover_force.cc diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 9d24deb2..89bd9a15 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -254,8 +254,22 @@ namespace QCD { GaugeLinkField link(mat._grid); link = TraceIndex(outerProduct(Btilde,A)); PokeIndex(mat,link,mu); - } + } + + inline void outerProductImpl(PropagatorField &mat, const FermionField &B, const FermionField &A){ + mat = outerProduct(B,A); + } + + inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) { + mat = TraceIndex(P); + } + inline void extractLinkField(std::vector &mat, DoubledGaugeField &Uds){ + for (int mu = 0; mu < Nd; mu++) + mat[mu] = PeekIndex(Uds, mu); + } + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ int Ls=Btilde._grid->_fdimensions[0]; @@ -373,6 +387,19 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres assert(0); } + inline void outerProductImpl(PropagatorField &mat, const FermionField &Btilde, const FermionField &A){ + assert(0); + } + + inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) { + assert(0); + } + + inline void extractLinkField(std::vector &mat, DoubledGaugeField &Uds){ + assert(0); + } + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { assert(0); @@ -611,6 +638,25 @@ class GparityWilsonImpl : public ConjugateGaugeImpl(P); + parallel_for(auto ss = tmp.begin(); ss < tmp.end(); ss++) { + mat[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1)); + } + */ + } + + inline void extractLinkField(std::vector &mat, DoubledGaugeField &Uds){ + assert(0); + } + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { int Ls = Btilde._grid->_fdimensions[0]; @@ -751,8 +797,8 @@ class StaggeredImpl : public PeriodicGaugeImpl(outerProduct(Btilde,A)); PokeIndex(mat,link,mu); - } - + } + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ assert (0); // Must never hit diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index 1dd12f52..2159fffc 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -43,11 +43,15 @@ RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) // Wilson term out.checkerboard = in.checkerboard; - this->Dhop(in, out, DaggerNo); + //this->Dhop(in, out, DaggerNo); // Clover term Mooee(in, temp); + //hack + out = zero; + + out += temp; return norm2(out); } @@ -59,11 +63,14 @@ RealD WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) // Wilson term out.checkerboard = in.checkerboard; - this->Dhop(in, out, DaggerYes); + //this->Dhop(in, out, DaggerYes); // Clover term MooeeDag(in, temp); + //hack + out = zero; + out += temp; return norm2(out); } @@ -84,7 +91,7 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) WilsonLoops::FieldStrength(Ez, _Umu, Tdir, Zdir); // Compute the Clover Operator acting on Colour and Spin - CloverTerm = fillCloverYZ(Bx); + CloverTerm = fillCloverYZ(Bx); CloverTerm += fillCloverXZ(By); CloverTerm += fillCloverXY(Bz); CloverTerm += fillCloverXT(Ex); @@ -223,23 +230,6 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie } // MooeeInternal -// Derivative parts -template -void WilsonCloverFermion::MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) -{ - - GaugeField tmp(mat._grid); - - conformable(U._grid, V._grid); - conformable(U._grid, mat._grid); - - mat.checkerboard = U.checkerboard; - tmp.checkerboard = U.checkerboard; - - this->DhopDeriv(mat, U, V, dag); - MooDeriv(tmp, U, V, dag); - mat += tmp; -} // Derivative parts template diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index 34482941..d8a42129 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -67,15 +67,18 @@ public: CloverTermOdd(&Hgrid), CloverTermInvEven(&Hgrid), CloverTermInvOdd(&Hgrid), - CloverTermDagEven(&Hgrid), - CloverTermDagOdd(&Hgrid), - CloverTermInvDagEven(&Hgrid), - CloverTermInvDagOdd(&Hgrid) + CloverTermDagEven(&Hgrid), + CloverTermDagOdd(&Hgrid), + CloverTermInvDagEven(&Hgrid), + CloverTermInvDagOdd(&Hgrid) { csw = _csw; assert(Nd == 4); // require 4 dimensions - if (csw == 0) std::cout << GridLogWarning << "Initializing WilsonCloverFermion with csw = 0" << std::endl; + if (csw == 0) + std::cout << GridLogWarning << "Initializing WilsonCloverFermion with csw = 0" << std::endl; + + ImportGauge(_Umu); } virtual RealD M(const FermionField &in, FermionField &out); @@ -87,16 +90,127 @@ public: virtual void MooeeInvDag(const FermionField &in, FermionField &out); virtual void MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv); - virtual void MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); + //virtual void MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); virtual void MooDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); virtual void MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); void ImportGauge(const GaugeField &_Umu); + // Derivative parts unpreconditioned pseudofermions + void MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag) + { + conformable(X._grid, Y._grid); + conformable(X._grid, force._grid); + GaugeLinkField force_mu(force._grid), lambda(force._grid); + GaugeField clover_force(force._grid); + PropagatorField Lambda(force._grid); + + // Here we are hitting some performance issues: + // need to extract the components of the DoubledGaugeField + // for each call + // Possible solution + // Create a vector object to store them? (cons: wasting space) + std::vector U(Nd, this->Umu._grid); + + Impl::extractLinkField(U, this->Umu); + + force = zero; + // Derivative of the Wilson hopping term + //this->DhopDeriv(force, X, Y, dag); + + /////////////////////////////////////////////////////////// + // Clover term derivative + /////////////////////////////////////////////////////////// + Impl::outerProductImpl(Lambda, X, Y); + + Gamma::Algebra sigma[] = { + Gamma::Algebra::SigmaXY, + Gamma::Algebra::SigmaXZ, + Gamma::Algebra::SigmaXT, + Gamma::Algebra::MinusSigmaXY, + Gamma::Algebra::SigmaYZ, + Gamma::Algebra::SigmaYT, + Gamma::Algebra::MinusSigmaXZ, + Gamma::Algebra::MinusSigmaYZ, + Gamma::Algebra::SigmaZT, + Gamma::Algebra::MinusSigmaXT, + Gamma::Algebra::MinusSigmaYT, + Gamma::Algebra::MinusSigmaZT}; + + /* + sigma_{\mu \nu}= + | 0 sigma[0] sigma[1] sigma[2] | + | sigma[3] 0 sigma[4] sigma[5] | + | sigma[6] sigma[7] 0 sigma[8] | + | sigma[9] sigma[10] sigma[11] 0 | + */ + + int count = 0; + clover_force = zero; + for (int mu = 0; mu < 4; mu++) + { + force_mu = zero; + for (int nu = 0; nu < 4; nu++) + { + if (mu == nu) continue; + PropagatorField Slambda = Gamma(sigma[count]) * Lambda; + Impl::TraceSpinImpl(lambda, Slambda); //traceSpin + force_mu += Cmunu(U, lambda, mu, nu); + count++; + } + + pokeLorentz(clover_force, U[mu] * force_mu, mu); + } + clover_force *= csw / 8.; + force += clover_force; + } + + // Computing C_{\mu \nu}(x) as in Eq.(B.39) in Zbigniew Sroczynski's PhD thesis + GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) + { + conformable(lambda._grid, U[0]._grid); + GaugeLinkField out(lambda._grid), tmp(lambda._grid); + + // insertion in upper staple + // please check redundancy of shift operations + + // C1+ + tmp = lambda * U[nu]; + out = Impl::ShiftStaple(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); + + // C2+ + tmp = U[mu] * Impl::CovShiftIdentityForward(adj(lambda), mu); + out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); + + // C3+ + tmp = U[nu] * Impl::CovShiftIdentityForward(adj(lambda), nu); + out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(tmp, nu))), mu); + + // C4+ + out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu) * lambda; + + // insertion in lower staple + // C1- + out -= Impl::ShiftStaple(lambda, mu) * Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu); + + // C2- + tmp = adj(lambda) * U[nu]; + out -= Impl::ShiftStaple(Impl::CovShiftBackward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu); + + // C3- + tmp = lambda * U[nu]; + out -= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, tmp)), mu); + + // C4- + out -= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu) * lambda; + + return out; + } + private: // here fixing the 4 dimensions, make it more general? - RealD csw; // Clover coefficient + RealD csw; // Clover coefficient CloverFieldType CloverTerm, CloverTermInv; // Clover term CloverFieldType CloverTermEven, CloverTermOdd; // Clover term EO CloverFieldType CloverTermInvEven, CloverTermInvOdd; // Clover term Inv EO diff --git a/tests/forces/Test_wilson_force.cc b/tests/forces/Test_wilson_force.cc index 1f34a48a..f834726b 100644 --- a/tests/forces/Test_wilson_force.cc +++ b/tests/forces/Test_wilson_force.cc @@ -50,7 +50,12 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); - pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + std::vector vrand(4); + std::srand(std::time(0)); + std::generate(vrand.begin(), vrand.end(), std::rand); + std::cout << GridLogMessage << vrand << std::endl; + pRNG.SeedFixedIntegers(vrand); + //pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeFermion phi (&Grid); gaussian(pRNG,phi); LatticeFermion Mphi (&Grid); diff --git a/tests/forces/Test_wilsonclover_force.cc b/tests/forces/Test_wilsonclover_force.cc new file mode 100644 index 00000000..c99cfa98 --- /dev/null +++ b/tests/forces/Test_wilsonclover_force.cc @@ -0,0 +1,209 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_wilson_force.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main(int argc, char **argv) +{ + Grid_init(&argc, &argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + GridCartesian Grid(latt_size, simd_layout, mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + std::vector seeds({1, 2, 3, 4}); + + GridParallelRNG pRNG(&Grid); + std::vector vrand(4); + std::srand(std::time(0)); + std::generate(vrand.begin(), vrand.end(), std::rand); + std::cout << GridLogMessage << vrand << std::endl; + pRNG.SeedFixedIntegers(vrand); + + LatticeFermion phi(&Grid); + gaussian(pRNG, phi); + LatticeFermion Mphi(&Grid); + LatticeFermion MphiPrime(&Grid); + + LatticeGaugeField U(&Grid); + + SU3::HotConfiguration(pRNG,U); + //SU3::ColdConfiguration(pRNG, U); + + //////////////////////////////////// + // Unmodified matrix element + //////////////////////////////////// + RealD mass = -4.0; //kills the diagonal term + Real csw = 1.0; + WilsonCloverFermionR Dw(U, Grid, RBGrid, mass, csw); + + Dw.M(phi, Mphi); + ComplexD S = innerProduct(Mphi, Mphi); // Action : pdag MdagM p + + // get the deriv of phidag MdagM phi with respect to "U" + LatticeGaugeField UdSdU(&Grid); + LatticeGaugeField tmp(&Grid); + + Dw.MDeriv(tmp, Mphi, phi, DaggerNo); UdSdU = tmp; + Dw.MDeriv(tmp, phi, Mphi, DaggerYes); UdSdU += tmp; + // Take the traceless antihermitian component + UdSdU = Ta(UdSdU); + + //////////////////////////////////// + // Modify the gauge field a little + //////////////////////////////////// + RealD dt = 0.0001; + RealD Hmom = 0.0; + RealD Hmomprime = 0.0; + RealD Hmompp = 0.0; + LatticeColourMatrix mommu(&Grid); + LatticeColourMatrix forcemu(&Grid); + LatticeGaugeField mom(&Grid); + LatticeGaugeField Uprime(&Grid); + + for (int mu = 0; mu < Nd; mu++) { + // Traceless antihermitian momentum; gaussian in lie alg + SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); + Hmom -= real(sum(trace(mommu * mommu))); + PokeIndex(mom, mommu, mu); + } + + parallel_for(int ss=0;ssoSites();ss++){ + for (int mu = 0; mu < Nd; mu++) + Uprime[ss]._internal[mu] = ProjectOnGroup(Exponentiate(mom[ss]._internal[mu], dt, 12) * U[ss]._internal[mu]); + } + + std::cout << GridLogMessage << "Initial mom hamiltonian is " << Hmom << std::endl; + + // New action + Dw.ImportGauge(Uprime); + Dw.M(phi, MphiPrime); + + ComplexD Sprime = innerProduct(MphiPrime, MphiPrime); + + ////////////////////////////////////////////// + // Use derivative to estimate dS + ////////////////////////////////////////////// + + /////////////////////////////////////////////////////// + std::cout << GridLogMessage << "Antihermiticity tests - 1 " << std::endl; + for (int mu = 0; mu < Nd; mu++) + { + mommu = PeekIndex(mom, mu); + std::cout << GridLogMessage << " Mommu " << norm2(mommu) << std::endl; + mommu = mommu + adj(mommu); + std::cout << GridLogMessage << " Test: Mommu + Mommudag " << norm2(mommu) << std::endl; + mommu = PeekIndex(UdSdU, mu); + std::cout << GridLogMessage << " dsdumu " << norm2(mommu) << std::endl; + mommu = mommu + adj(mommu); + std::cout << GridLogMessage << " Test: dsdumu + dag " << norm2(mommu) << std::endl; + std::cout << "" << std::endl; + } + //////////////////////////////////////////////////////// + + LatticeComplex dS(&Grid); + dS = zero; + LatticeComplex dSmom(&Grid); + dSmom = zero; + LatticeComplex dSmom2(&Grid); + dSmom2 = zero; + + + // need for this??? + // ultimately it is just a 2.0 factor in UdSdU + for (int mu = 0; mu < Nd; mu++) + { + mommu = PeekIndex(UdSdU, mu); // P_mu = + mommu = Ta(mommu) * 2.0; // Mom = (P_mu - P_mu^dag) - trace(P_mu - P_mu^dag) + PokeIndex(UdSdU, mommu, mu); // UdSdU_mu = Mom + } + + std::cout << GridLogMessage<< "Antihermiticity tests - 2 " << std::endl; + for (int mu = 0; mu < Nd; mu++) + { + mommu = PeekIndex(mom, mu); + std::cout << GridLogMessage << " Mommu " << norm2(mommu) << std::endl; + mommu = mommu + adj(mommu); + std::cout << GridLogMessage << " Mommu + Mommudag " << norm2(mommu) << std::endl; + mommu = PeekIndex(UdSdU, mu); + std::cout << GridLogMessage << " dsdumu " << norm2(mommu) << std::endl; + mommu = mommu + adj(mommu); + std::cout << GridLogMessage << " dsdumu + dag " << norm2(mommu) << std::endl; + std::cout << "" << std::endl; + } + ///////////////////////////////////////////////////// + + + for (int mu = 0; mu < Nd; mu++) + { + forcemu = PeekIndex(UdSdU, mu); + mommu = PeekIndex(mom, mu); + + // Update PF action density + dS = dS + trace(mommu * forcemu) * dt; + + dSmom = dSmom - trace(mommu * forcemu) * dt; + dSmom2 = dSmom2 - trace(forcemu * forcemu) * (0.25 * dt * dt); + + // Update mom action density + mommu = mommu + forcemu * (dt * 0.5); + + Hmomprime -= real(sum(trace(mommu * mommu))); + } + + ComplexD dSpred = sum(dS); + ComplexD dSm = sum(dSmom); + ComplexD dSm2 = sum(dSmom2); + + std::cout << GridLogMessage << "Initial mom hamiltonian is " << Hmom << std::endl; + std::cout << GridLogMessage << "Final mom hamiltonian is " << Hmomprime << std::endl; + std::cout << GridLogMessage << "Delta mom hamiltonian is " << Hmomprime - Hmom << std::endl; + + std::cout << GridLogMessage << " S " << S << std::endl; + std::cout << GridLogMessage << " Sprime " << Sprime << std::endl; + std::cout << GridLogMessage << "dS " << Sprime - S << std::endl; + std::cout << GridLogMessage << "predict dS " << dSpred << std::endl; + std::cout << GridLogMessage << "dSm " << dSm << std::endl; + std::cout << GridLogMessage << "dSm2" << dSm2 << std::endl; + + std::cout << GridLogMessage << "Total dS " << Hmomprime - Hmom + Sprime - S << std::endl; + + assert(fabs(real(Sprime - S - dSpred)) < 1.0); + + std::cout << GridLogMessage << "Done" << std::endl; + Grid_finalize(); +} From 76bcf6cd8c767b0f3a254b3407b951cdf0782d6d Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 26 Oct 2017 18:45:41 +0100 Subject: [PATCH 179/377] Deleting vscode settings file --- .vscode/settings.json | 51 ------------------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index f13d503b..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,51 +0,0 @@ -// Place your settings in this file to overwrite default and user settings. -{ - "files.associations": { - "cctype": "cpp", - "clocale": "cpp", - "cmath": "cpp", - "csignal": "cpp", - "cstdarg": "cpp", - "cstddef": "cpp", - "cstdio": "cpp", - "cstdlib": "cpp", - "cstring": "cpp", - "ctime": "cpp", - "cwchar": "cpp", - "cwctype": "cpp", - "array": "cpp", - "atomic": "cpp", - "*.tcc": "cpp", - "chrono": "cpp", - "codecvt": "cpp", - "condition_variable": "cpp", - "cstdint": "cpp", - "exception": "cpp", - "slist": "cpp", - "functional": "cpp", - "initializer_list": "cpp", - "iosfwd": "cpp", - "limits": "cpp", - "memory": "cpp", - "mutex": "cpp", - "new": "cpp", - "ratio": "cpp", - "stdexcept": "cpp", - "system_error": "cpp", - "thread": "cpp", - "tuple": "cpp", - "type_traits": "cpp", - "typeinfo": "cpp", - "utility": "cpp", - "iostream": "cpp", - "strstream": "cpp", - "complex": "cpp", - "fstream": "cpp", - "iomanip": "cpp", - "istream": "cpp", - "ostream": "cpp", - "sstream": "cpp", - "streambuf": "cpp", - "algorithm": "cpp" - } -} \ No newline at end of file From 034de160bf7433480d2e176f3501180fbbf1c043 Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Thu, 26 Oct 2017 20:58:46 +0100 Subject: [PATCH 180/377] Staggered updates : Schur fixed and added a unit test for Test_staggered_cg_schur.cc giving stronger check --- lib/algorithms/LinearOperator.h | 2 +- lib/algorithms/iterative/SchurRedBlack.h | 15 ++++- lib/communicator/Communicator_mpi3.cc | 7 ++- lib/communicator/Communicator_mpit.cc | 18 +++--- tests/solver/Test_staggered_cg_prec.cc | 1 - tests/solver/Test_staggered_cg_schur.cc | 76 ++++++++++++++++++++++++ 6 files changed, 103 insertions(+), 16 deletions(-) create mode 100644 tests/solver/Test_staggered_cg_schur.cc diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index f1b8820e..2a757352 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -319,7 +319,7 @@ namespace Grid { Field tmp(in._grid); _Mat.Meooe(in,tmp); _Mat.MooeeInv(tmp,out); - _Mat.MeooeDag(out,tmp); + _Mat.Meooe(out,tmp); _Mat.Mooee(in,out); return axpy_norm(out,-1.0,tmp,out); } diff --git a/lib/algorithms/iterative/SchurRedBlack.h b/lib/algorithms/iterative/SchurRedBlack.h index a309386b..a0fd86a6 100644 --- a/lib/algorithms/iterative/SchurRedBlack.h +++ b/lib/algorithms/iterative/SchurRedBlack.h @@ -55,7 +55,15 @@ Author: Peter Boyle *Odd * i) D_oo psi_o = L^{-1} eta_o * eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) + * + * Wilson: * (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o + * Stag: + * D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e) + * + * L^-1 eta_o= (1 0 ) (e + * (-MoeMee^{-1} 1 ) + * *Even * ii) Mee psi_e + Meo psi_o = src_e * @@ -122,18 +130,19 @@ namespace Grid { pickCheckerboard(Odd ,sol_o,out); ///////////////////////////////////////////////////// - // src_o = Mdag * (source_o - Moe MeeInv source_e) + // src_o = (source_o - Moe MeeInv source_e) ///////////////////////////////////////////////////// _Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even); _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd); tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd); - _Matrix.Mooee(tmp,src_o); assert(src_o.checkerboard ==Odd); + src_o = tmp; assert(src_o.checkerboard ==Odd); + // _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source ////////////////////////////////////////////////////////////// // Call the red-black solver ////////////////////////////////////////////////////////////// - std::cout< + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + +int main (int argc, char ** argv) +{ + typedef typename ImprovedStaggeredFermionR::FermionField FermionField; + typename ImprovedStaggeredFermionR::ImplParams params; + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); + + std::vector seeds({1,2,3,4}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); + + LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + + FermionField src(&Grid); random(pRNG,src); + FermionField result(&Grid); result=zero; + FermionField resid(&Grid); + + RealD mass=0.1; + ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass); + + ConjugateGradient CG(1.0e-8,10000); + SchurRedBlackStaggeredSolve SchurSolver(CG); + + SchurSolver(Ds,src,result); + + Grid_finalize(); +} From 0f3e9ae57d4a0cc6f7f8ec1d0fa8e922335aab72 Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 26 Oct 2017 23:29:59 +0100 Subject: [PATCH 181/377] Gsites error. Only appeared (so far) in I/O code for even odd fields --- lib/cartesian/Cartesian_red_black.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index f89cacc5..5c50f062 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -205,6 +205,7 @@ public: { assert((_gdimensions[d] & 0x1) == 0); _gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard + _gsites /= 2; } _ldimensions[d] = _gdimensions[d] / _processors[d]; assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); From 00ebc150ad6a6db27000829c6830ea8b855bacfe Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 26 Oct 2017 23:30:37 +0100 Subject: [PATCH 182/377] Mistake in string parse; interface is ambiguous and must fix. Is char * a file, or a XML buffer ? --- lib/serialisation/XmlIO.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/serialisation/XmlIO.cc b/lib/serialisation/XmlIO.cc index c0c45adc..260611a5 100644 --- a/lib/serialisation/XmlIO.cc +++ b/lib/serialisation/XmlIO.cc @@ -68,10 +68,10 @@ std::string XmlWriter::XmlString(void) XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("") { pugi::xml_parse_result result; - result = doc_.load_file(xmlstring); + result = doc_.load_string(xmlstring); if ( !result ) { - cerr << "XML error description: char * " << result.description() << " "<< xmlstring << "\n"; - cerr << "XML error offset : char * " << result.offset << " "< Date: Thu, 26 Oct 2017 23:31:46 +0100 Subject: [PATCH 183/377] Cleaning up --- .../iterative/ImplicitlyRestartedLanczos.h | 48 +++++++++++-------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 4be2715a..089e7ff3 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -37,6 +37,9 @@ Author: Christoph Lehner namespace Grid { + //////////////////////////////////////////////////////// + // Move following 100 LOC to lattice/Lattice_basis.h + //////////////////////////////////////////////////////// template void basisOrthogonalize(std::vector &basis,Field &w,int k) { @@ -101,7 +104,6 @@ void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, s if (idx[i] != i) { - assert(idx[i] > i); ////////////////////////////////////// // idx[i] is a table of desired sources giving a permutation. // Swap v[i] with v[idx[i]]. @@ -114,8 +116,7 @@ void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, s if (idx[j]==i) break; - assert(j!=idx.size()); - assert(idx[j]==i); + assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i); std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy std::swap(sort_vals[i],sort_vals[idx[i]]); @@ -161,12 +162,6 @@ void basisDeflate(const std::vector &_v,const std::vector& eval,co } } -enum IRLdiagonalisation { - IRLdiagonaliseWithDSTEGR, - IRLdiagonaliseWithQR, - IRLdiagonaliseWithEigen -}; - ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// @@ -177,6 +172,12 @@ template class ImplicitlyRestartedLanczosTester virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); }; +enum IRLdiagonalisation { + IRLdiagonaliseWithDSTEGR, + IRLdiagonaliseWithQR, + IRLdiagonaliseWithEigen +}; + template class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester { public: @@ -242,6 +243,17 @@ class ImplicitlyRestartedLanczos { ///////////////////////// public: + ////////////////////////////////////////////////////////////////// + // PAB: + ////////////////////////////////////////////////////////////////// + // Too many options & knobs. Do we really need orth_period + // What is the theoretical basis & guarantees of betastp ? + // Nstop=Nk viable? + // MinRestart avoidable with new convergence test? + // Could cut to HermOp, HermOpTest, Tester, Nk, Nm, resid, maxiter (+diagonalisation) + // HermOpTest could be eliminated if we dropped the Power method for max eval. + // -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear + ////////////////////////////////////////////////////////////////// ImplicitlyRestartedLanczos(LinearFunction & HermOp, LinearFunction & HermOpTest, ImplicitlyRestartedLanczosTester & Tester, @@ -413,16 +425,14 @@ until convergence // sorting ////////////////////////////////// eval2_copy = eval2; - std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater()); - std::cout<0); basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis - std::cout<& lmd, // Nm std::vector& lme, // Nm int Nk, int Nm, // Nk, Nm From 9ec9850bdb49548238b1cb253c82bfeee3823683 Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 26 Oct 2017 23:34:31 +0100 Subject: [PATCH 184/377] 64bit ftello update --- lib/parallelIO/IldgIO.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index 1f2b7c90..36ecbd1b 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -224,7 +224,7 @@ class GridLimeReader : public BinaryIO { assert(PayloadSize == file_bytes);// Must match or user error - off_t offset= ftell(File); + uint64_t offset= ftello(File); // std::cout << " ReadLatticeObject from offset "< munge; BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); @@ -253,16 +253,13 @@ class GridLimeReader : public BinaryIO { while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { // std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" < xmlc(nbytes+1,'\0'); limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); - // std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <=0); err=limeWriterCloseRecord(LimeW); assert(err>=0); limeDestroyHeader(h); - // std::cout << " File offset is now"<(); BinarySimpleMunger munge; BinaryIO::writeLatticeObject(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); + // fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<=0); + //////////////////////////////////////// // Write checksum element, propagaing forward from the BinaryIO // Always pair a checksum with a binary object, and close message @@ -703,8 +702,7 @@ class IldgReader : public GridLimeReader { // Binary data ///////////////////////////////// std::cout << GridLogMessage << "ILDG Binary record found : " ILDG_BINARY_DATA << std::endl; - off_t offset= ftell(File); - + uint64_t offset= ftello(File); if ( format == std::string("IEEE64BIG") ) { GaugeSimpleMunger munge; BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); From 7fab183c0eebfd82e006eca2130d809131a36074 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 27 Oct 2017 08:17:49 +0100 Subject: [PATCH 185/377] Better read test --- lib/parallelIO/IldgIO.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index 36ecbd1b..b86e250f 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -159,7 +159,7 @@ namespace QCD { uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16); if ( scidac_csuma !=scidac_checksuma) return 0; if ( scidac_csumb !=scidac_checksumb) return 0; - return 1; + return 1; } //////////////////////////////////////////////////////////////////////////////////// @@ -237,7 +237,7 @@ class GridLimeReader : public BinaryIO { ///////////////////////////////////////////// // Verify checksums ///////////////////////////////////////////// - scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); + assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1); return; } } From fa04b6d3c233d6057fb5133c8e5627bc2d941aba Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 27 Oct 2017 08:18:29 +0100 Subject: [PATCH 186/377] Finished ? Verifying coarse evec restore --- .../Test_dwf_compressed_lanczos_reorg.cc | 145 +++++++++++++----- 1 file changed, 109 insertions(+), 36 deletions(-) diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index ad1aaa47..42814e2f 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -50,9 +50,13 @@ struct LanczosParams : Serializable { int, MinRes); // Must restart }; -struct CompressedLanczosParams : Serializable { +struct LocalCoherenceLanczosParams : Serializable { public: - GRID_SERIALIZABLE_CLASS_MEMBERS(CompressedLanczosParams, + GRID_SERIALIZABLE_CLASS_MEMBERS(bool, doFine, + bool, doFineRead, + bool, doCoarse, + bool, doCoarseRead, + LocalCoherenceLanczosParams, LanczosParams, FineParams, LanczosParams, CoarseParams, ChebyParams, Smoother, @@ -61,8 +65,7 @@ struct CompressedLanczosParams : Serializable { std::string, config, std::vector < std::complex >, omega, RealD, mass, - RealD, M5 - ); + RealD, M5); }; // Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function @@ -209,7 +212,7 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc // Make serializable Lanczos params //////////////////////////////////////////// template -class CoarseFineIRL +class LocalCoherenceLanczos { public: typedef iVector CoarseSiteVector; @@ -230,7 +233,7 @@ private: std::vector evals_coarse; std::vector evec_coarse; public: - CoarseFineIRL(GridBase *FineGrid, + LocalCoherenceLanczos(GridBase *FineGrid, GridBase *CoarseGrid, LinearOperatorBase &FineOp, int checkerboard) : @@ -253,7 +256,7 @@ public: return nn; } - void testFine(void) + void fakeFine(void) { int Nk = nbasis; _Aggregate.subspace.resize(Nk,_FineGrid); @@ -286,6 +289,42 @@ public: write(WR,"evals",evals_fine); } } + + void checkpointFineRestore(std::string evecs_file,std::string evals_file) + { + evals_fine.resize(nbasis); + _Aggregate.subspace.resize(nbasis,_FineGrid); + { + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "< Op(_FineOp); + ImplicitlyRestartedLanczosHermOpTester SimpleTester(Op); + for(int k=0;k ChebySmooth(cheby_smooth); + ProjectedFunctionHermOp ChebyOp (ChebySmooth,_FineOp,_Aggregate); + ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); + + for(int k=0;k=Nstop); - + evals_coarse.resize(Nstop); + evec_coarse.resize (Nstop,_CoarseGrid); for (int i=0;i IRL(FrbGrid,CoarseGrid5rb,HermOp,Odd); - std::cout << GridLogMessage << "Constructed CoarseFine IRL" << std::endl; + LocalCoherenceLanczos _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd); + std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; - int do_fine = 1; - int do_coarse = 0; - int do_smooth = 0; - if ( do_fine ) { + if ( Params.doCoarse ) { + assert( (Params.doFine)||(Params.doFineRead)); + } + + if ( Params.doFine ) { std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "< Date: Fri, 27 Oct 2017 09:04:31 +0100 Subject: [PATCH 187/377] Move the local coherence lanczos into algorithms. Keep the I/O in the tester. Other people can copy this method to write other I/O formats. --- .../iterative/LocalCoherenceLanczos.h | 348 ++++++++++++++ .../Test_dwf_compressed_lanczos_reorg.cc | 436 +++--------------- 2 files changed, 410 insertions(+), 374 deletions(-) create mode 100644 lib/algorithms/iterative/LocalCoherenceLanczos.h diff --git a/lib/algorithms/iterative/LocalCoherenceLanczos.h b/lib/algorithms/iterative/LocalCoherenceLanczos.h new file mode 100644 index 00000000..6b8fe62c --- /dev/null +++ b/lib/algorithms/iterative/LocalCoherenceLanczos.h @@ -0,0 +1,348 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h + + Copyright (C) 2015 + +Author: Christoph Lehner +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_LOCAL_COHERENCE_IRL_H +#define GRID_LOCAL_COHERENCE_IRL_H +namespace Grid { +struct LanczosParams : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, + ChebyParams, Cheby,/*Chebyshev*/ + int, Nstop, /*Vecs in Lanczos must converge Nstop < Nk < Nm*/ + int, Nk, /*Vecs in Lanczos seek converge*/ + int, Nm, /*Total vecs in Lanczos include restart*/ + RealD, resid, /*residual*/ + int, MaxIt, + RealD, betastp, /* ? */ + int, MinRes); // Must restart +}; + +struct LocalCoherenceLanczosParams : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams, + bool, doFine, + bool, doFineRead, + bool, doCoarse, + bool, doCoarseRead, + LanczosParams, FineParams, + LanczosParams, CoarseParams, + ChebyParams, Smoother, + RealD , coarse_relax_tol, + std::vector, blockSize, + std::string, config, + std::vector < std::complex >, omega, + RealD, mass, + RealD, M5); +}; + +// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function +template +class ProjectedHermOp : public LinearFunction > > { +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + LinearOperatorBase &_Linop; + Aggregation &_Aggregate; + + ProjectedHermOp(LinearOperatorBase& linop, Aggregation &aggregate) : + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + FineField fin(FineGrid); + FineField fout(FineGrid); + + _Aggregate.PromoteFromSubspace(in,fin); std::cout< +class ProjectedFunctionHermOp : public LinearFunction > > { +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + + OperatorFunction & _poly; + LinearOperatorBase &_Linop; + Aggregation &_Aggregate; + + ProjectedFunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop, + Aggregation &aggregate) : + _poly(poly), + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + + FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard; + FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; + + _Aggregate.PromoteFromSubspace(in,fin); std::cout< +class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester > > +{ + public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + LinearFunction & _Poly; + OperatorFunction & _smoother; + LinearOperatorBase &_Linop; + Aggregation &_Aggregate; + RealD _coarse_relax_tol; + ImplicitlyRestartedLanczosSmoothedTester(LinearFunction &Poly, + OperatorFunction &smoother, + LinearOperatorBase &Linop, + Aggregation &Aggregate, + RealD coarse_relax_tol=5.0e3) + : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { }; + + int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) + { + CoarseField v(B); + RealD eval_poly = eval; + // Apply operator + _Poly(B,v); + + RealD vnum = real(innerProduct(B,v)); // HermOp. + RealD vden = norm2(B); + RealD vv0 = norm2(v); + eval = vnum/vden; + v -= eval*B; + + RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); + + std::cout.precision(13); + std::cout< nbasis ) eresid = eresid*_coarse_relax_tol; + if( (vv +class LocalCoherenceLanczos +{ +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice CoarseField; + typedef Lattice FineField; + +protected: + GridBase *_CoarseGrid; + GridBase *_FineGrid; + int _checkerboard; + LinearOperatorBase & _FineOp; + + // FIXME replace Aggregation with vector of fine; the code reuse is too small for + // the hassle and complexity of cross coupling. + Aggregation _Aggregate; + std::vector evals_fine; + std::vector evals_coarse; + std::vector evec_coarse; +public: + LocalCoherenceLanczos(GridBase *FineGrid, + GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard) : + _CoarseGrid(CoarseGrid), + _FineGrid(FineGrid), + _Aggregate(CoarseGrid,FineGrid,checkerboard), + _FineOp(FineOp), + _checkerboard(checkerboard) + { + evals_fine.resize(0); + evals_coarse.resize(0); + }; + void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } + + template static RealD normalise(T& v) + { + RealD nn = norm2(v); + nn = ::sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void fakeFine(void) + { + int Nk = nbasis; + _Aggregate.subspace.resize(Nk,_FineGrid); + _Aggregate.subspace[0]=1.0; + _Aggregate.subspace[0].checkerboard=_checkerboard; + normalise(_Aggregate.subspace[0]); + PlainHermOp Op(_FineOp); + for(int k=1;k Op(_FineOp); + ImplicitlyRestartedLanczosHermOpTester SimpleTester(Op); + for(int k=0;k ChebySmooth(cheby_smooth); + ProjectedFunctionHermOp ChebyOp (ChebySmooth,_FineOp,_Aggregate); + ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); + + for(int k=0;k Cheby(cheby_parms); + FunctionHermOp ChebyOp(Cheby,_FineOp); + PlainHermOp Op(_FineOp); + + evals_fine.resize(Nm); + _Aggregate.subspace.resize(Nm,_FineGrid); + + ImplicitlyRestartedLanczos IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); + + FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; + + int Nconv; + IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false); + + // Shrink down to number saved + assert(Nstop>=nbasis); + assert(Nconv>=nbasis); + evals_fine.resize(nbasis); + _Aggregate.subspace.resize(nbasis,_FineGrid); + } + void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, + int Nstop, int Nk, int Nm,RealD resid, + RealD MaxIt, RealD betastp, int MinRes) + { + Chebyshev Cheby(cheby_op); + ProjectedHermOp Op(_FineOp,_Aggregate); + ProjectedFunctionHermOp ChebyOp (Cheby,_FineOp,_Aggregate); + ////////////////////////////////////////////////////////////////////////////////////////////////// + // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL + ////////////////////////////////////////////////////////////////////////////////////////////////// + + Chebyshev ChebySmooth(cheby_smooth); + ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); + + evals_coarse.resize(Nm); + evec_coarse.resize(Nm,_CoarseGrid); + + CoarseField src(_CoarseGrid); src=1.0; + + ImplicitlyRestartedLanczos IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); + int Nconv=0; + IRL.calc(evals_coarse,evec_coarse,src,Nconv,false); + assert(Nconv>=Nstop); + evals_coarse.resize(Nstop); + evec_coarse.resize (Nstop,_CoarseGrid); + for (int i=0;i -class ProjectedFunctionHermOp : public LinearFunction > > { -public: - typedef iVector CoarseSiteVector; - typedef Lattice CoarseField; - typedef Lattice CoarseScalar; // used for inner products on fine field - typedef Lattice FineField; - - - OperatorFunction & _poly; - LinearOperatorBase &_Linop; - Aggregation &_Aggregate; - - ProjectedFunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop, - Aggregation &aggregate) : - _poly(poly), - _Linop(linop), - _Aggregate(aggregate) { }; - - void operator()(const CoarseField& in, CoarseField& out) { - - GridBase *FineGrid = _Aggregate.FineGrid; - - FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard; - FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; - - _Aggregate.PromoteFromSubspace(in,fin); std::cout< -class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester > > -{ - public: - typedef iVector CoarseSiteVector; - typedef Lattice CoarseField; - typedef Lattice CoarseScalar; // used for inner products on fine field - typedef Lattice FineField; - - LinearFunction & _Poly; - OperatorFunction & _smoother; - LinearOperatorBase &_Linop; - Aggregation &_Aggregate; - RealD _coarse_relax_tol; - ImplicitlyRestartedLanczosSmoothedTester(LinearFunction &Poly, - OperatorFunction &smoother, - LinearOperatorBase &Linop, - Aggregation &Aggregate, - RealD coarse_relax_tol=5.0e3) - : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { }; - - int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) - { - CoarseField v(B); - RealD eval_poly = eval; - // Apply operator - _Poly(B,v); - - RealD vnum = real(innerProduct(B,v)); // HermOp. - RealD vden = norm2(B); - RealD vv0 = norm2(v); - eval = vnum/vden; - v -= eval*B; - - RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); - - std::cout.precision(13); - std::cout< nbasis ) eresid = eresid*_coarse_relax_tol; - if( (vv -class LocalCoherenceLanczos -{ -public: - typedef iVector CoarseSiteVector; - typedef Lattice CoarseScalar; // used for inner products on fine field - typedef Lattice CoarseField; - typedef Lattice FineField; - -private: - GridBase *_CoarseGrid; - GridBase *_FineGrid; - int _checkerboard; - LinearOperatorBase & _FineOp; - - // FIXME replace Aggregation with vector of fine; the code reuse is too small for - // the hassle and complexity of cross coupling. - Aggregation _Aggregate; - std::vector evals_fine; - std::vector evals_coarse; - std::vector evec_coarse; -public: - LocalCoherenceLanczos(GridBase *FineGrid, - GridBase *CoarseGrid, - LinearOperatorBase &FineOp, - int checkerboard) : - _CoarseGrid(CoarseGrid), - _FineGrid(FineGrid), - _Aggregate(CoarseGrid,FineGrid,checkerboard), - _FineOp(FineOp), - _checkerboard(checkerboard) - { - evals_fine.resize(0); - evals_coarse.resize(0); - }; - void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } - - template static RealD normalise(T& v) - { - RealD nn = norm2(v); - nn = ::sqrt(nn); - v = v * (1.0/nn); - return nn; - } - - void fakeFine(void) - { - int Nk = nbasis; - _Aggregate.subspace.resize(Nk,_FineGrid); - _Aggregate.subspace[0]=1.0; - _Aggregate.subspace[0].checkerboard=_checkerboard; - normalise(_Aggregate.subspace[0]); - PlainHermOp Op(_FineOp); - for(int k=1;k &FineOp, + int checkerboard) + // Base constructor + : LocalCoherenceLanczos(FineGrid,CoarseGrid,FineOp,checkerboard) + {}; void checkpointFine(std::string evecs_file,std::string evals_file) { - assert(_Aggregate.subspace.size()==nbasis); + assert(this->_Aggregate.subspace.size()==nbasis); emptyUserRecord record; - { - ScidacWriter WR; - WR.open(evecs_file); - for(int k=0;k_Aggregate.subspace[k],record); } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_fine); } void checkpointFineRestore(std::string evecs_file,std::string evals_file) { - evals_fine.resize(nbasis); - _Aggregate.subspace.resize(nbasis,_FineGrid); - { - std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<evals_fine.resize(nbasis); + this->_Aggregate.subspace.resize(nbasis,this->_FineGrid); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<evals_fine); + + assert(this->evals_fine.size()==nbasis); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "< Op(_FineOp); - ImplicitlyRestartedLanczosHermOpTester SimpleTester(Op); - for(int k=0;k_Aggregate.subspace[k].checkerboard=this->_checkerboard; + RD.readScidacFieldRecord(this->_Aggregate.subspace[k],record); + } + RD.close(); } void checkpointCoarse(std::string evecs_file,std::string evals_file) { - int n = evec_coarse.size(); + int n = this->evec_coarse.size(); emptyUserRecord record; - { - ScidacWriter WR; - WR.open(evecs_file); - for(int k=0;kevec_coarse[k],record); } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_coarse); } + void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec) { std::cout << " resizing to " << nvec<< std::endl; - evals_coarse.resize(nvec); - evec_coarse.resize(nvec,_CoarseGrid); - { - std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<evals_coarse.resize(nvec); + this->evec_coarse.resize(nvec,this->_CoarseGrid); + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<evals_coarse); + assert(this->evals_coarse.size()==nvec); emptyUserRecord record; - { - std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "< ChebySmooth(cheby_smooth); - ProjectedFunctionHermOp ChebyOp (ChebySmooth,_FineOp,_Aggregate); - ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); - - for(int k=0;k Cheby(cheby_parms); - FunctionHermOp ChebyOp(Cheby,_FineOp); - PlainHermOp Op(_FineOp); - - evals_fine.resize(Nm); - _Aggregate.subspace.resize(Nm,_FineGrid); - - ImplicitlyRestartedLanczos IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); - - FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; - - int Nconv; - IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false); - - // Shrink down to number saved - assert(Nstop>=nbasis); - assert(Nconv>=nbasis); - evals_fine.resize(nbasis); - _Aggregate.subspace.resize(nbasis,_FineGrid); - } - void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, - int Nstop, int Nk, int Nm,RealD resid, - RealD MaxIt, RealD betastp, int MinRes) - { - Chebyshev Cheby(cheby_op); - ProjectedHermOp Op(_FineOp,_Aggregate); - ProjectedFunctionHermOp ChebyOp (Cheby,_FineOp,_Aggregate); - ////////////////////////////////////////////////////////////////////////////////////////////////// - // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL - ////////////////////////////////////////////////////////////////////////////////////////////////// - - Chebyshev ChebySmooth(cheby_smooth); - ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); - - evals_coarse.resize(Nm); - evec_coarse.resize(Nm,_CoarseGrid); - - CoarseField src(_CoarseGrid); src=1.0; - - ImplicitlyRestartedLanczos IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); - int Nconv=0; - IRL.calc(evals_coarse,evec_coarse,src,Nconv,false); - assert(Nconv>=Nstop); - evals_coarse.resize(Nstop); - evec_coarse.resize (Nstop,_CoarseGrid); - for (int i=0;ievec_coarse[k],record); } + RD.close(); } }; - int main (int argc, char ** argv) { Grid_init(&argc,&argv); @@ -465,7 +153,9 @@ int main (int argc, char ** argv) { std::vector blockSize = Params.blockSize; // Grids - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); @@ -516,12 +206,10 @@ int main (int argc, char ** argv) { const int nbasis= 60; assert(nbasis==Ns1); - LocalCoherenceLanczos _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd); + LocalCoherenceLanczosScidac _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd); std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; - if ( Params.doCoarse ) { - assert( (Params.doFine)||(Params.doFineRead)); - } + assert( (Params.doFine)||(Params.doFineRead)); if ( Params.doFine ) { std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "< Date: Fri, 27 Oct 2017 09:43:22 +0100 Subject: [PATCH 188/377] Passes reload of coarse basis --- lib/algorithms/iterative/LocalCoherenceLanczos.h | 6 +++++- tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/algorithms/iterative/LocalCoherenceLanczos.h b/lib/algorithms/iterative/LocalCoherenceLanczos.h index 6b8fe62c..d5d1bbc2 100644 --- a/lib/algorithms/iterative/LocalCoherenceLanczos.h +++ b/lib/algorithms/iterative/LocalCoherenceLanczos.h @@ -285,7 +285,11 @@ public: ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); for(int k=0;k Date: Fri, 27 Oct 2017 10:29:34 +0100 Subject: [PATCH 189/377] Bug fix in the coarse restore... Think this is nearly there --- tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index 0824cfa4..4c702a33 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -109,7 +109,7 @@ public: void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec) { - std::cout << " resizing to " << nvec<< std::endl; + std::cout << "resizing coarse vecs to " << nvec<< std::endl; this->evals_coarse.resize(nvec); this->evec_coarse.resize(nvec,this->_CoarseGrid); std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<evec_coarse[k],record); } RD.close(); From 1ef424b1392038df12130b1ce2f855c8b1cc1dbd Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 27 Oct 2017 14:20:35 +0100 Subject: [PATCH 190/377] Split grid Y2K bug fix attempt --- lib/communicator/Communicator_base.h | 14 +++++++++----- lib/communicator/Communicator_mpi.cc | 26 +++++++++++++++++++++----- lib/communicator/Communicator_none.cc | 8 ++++++-- lib/lattice/Lattice_transfer.h | 8 ++++---- 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 22c9e4d0..ff054497 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -274,12 +274,16 @@ class CartesianCommunicator { // std::cerr << " AllToAll in.size() "< void Broadcast(int root,obj &data) { diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 5a2dc4d0..ef612f98 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -55,7 +55,9 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { CartesianCommunicator::~CartesianCommunicator() { - if (communicator && !MPI::Is_finalized()) + int MPI_is_finalised; + MPI_Finalized(&MPI_is_finalised); + if (communicator && MPI_is_finalised) MPI_Comm_free(&communicator); } @@ -195,7 +197,7 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) communicator); assert(ierr==0); } -void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes) +void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes) { std::vector row(_ndimension,1); assert(dim>=0 && dim<_ndimension); @@ -204,11 +206,25 @@ void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes) row[dim] = _processors[dim]; CartesianCommunicator Comm(row,*this); - Comm.AllToAll(in,out,bytes); + Comm.AllToAll(in,out,words,bytes); } -void CartesianCommunicator::AllToAll(void *in,void *out,int bytes) +void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) { - MPI_Alltoall(in ,bytes,MPI_BYTE,out,bytes,MPI_BYTE,communicator); + // MPI is a pain and uses "int" arguments + // 64*64*64*128*16 == 500Million elements of data. + // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. + // (Turns up on 32^3 x 64 Gparity too) + MPI_Datatype object; + int iwords; + int ibytes; + iwords = words; + ibytes = bytes; + assert(words == iwords); // safe to cast to int ? + assert(bytes == ibytes); // safe to cast to int ? + MPI_Type_contiguous(ibytes,MPI_BYTE,&object); + MPI_Type_commit(&object); + MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); + MPI_Type_free(&object); } /////////////////////////////////////////////////////// // Should only be used prior to Grid Init finished. diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index 629a3e4a..a862d52a 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -100,9 +100,13 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector & { assert(0); } -void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes) +void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes) { - bcopy(in,out,bytes); + bcopy(in,out,bytes*words); +} +void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) +{ + bcopy(in,out,bytes*words); } int CartesianCommunicator::RankWorld(void){return 0;} diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 713a8788..bc59e9eb 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -790,8 +790,8 @@ void Grid_split(std::vector > & full,Lattice & split) ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d]; } - int lsites = full_grid->lSites(); - Integer sz = lsites * nvector; + uint64_t lsites = full_grid->lSites(); + uint64_t sz = lsites * nvector; std::vector tmpdata(sz); std::vector alldata(sz); std::vector scalardata(lsites); @@ -908,8 +908,8 @@ void Grid_unsplit(std::vector > & full,Lattice & split) ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d]; } - int lsites = full_grid->lSites(); - Integer sz = lsites * nvector; + uint64_t lsites = full_grid->lSites(); + uint64_t sz = lsites * nvector; std::vector tmpdata(sz); std::vector alldata(sz); std::vector scalardata(lsites); From f941c4ee180aa20b3f3a24a939018357457e5bbf Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Sun, 29 Oct 2017 11:43:33 +0000 Subject: [PATCH 191/377] Clover term force ok --- lib/qcd/action/fermion/WilsonCloverFermion.cc | 9 +- lib/qcd/action/fermion/WilsonCloverFermion.h | 24 ++-- lib/qcd/utils/WilsonLoops.h | 2 +- tests/forces/Test_wilsonclover_force.cc | 118 +++++++++++++++--- 4 files changed, 118 insertions(+), 35 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index 2159fffc..e678835a 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -48,10 +48,7 @@ RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) // Clover term Mooee(in, temp); - //hack - out = zero; - - + out= zero; out += temp; return norm2(out); } @@ -68,9 +65,7 @@ RealD WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) // Clover term MooeeDag(in, temp); - //hack - out = zero; - + out=zero; out += temp; return norm2(out); } diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index d8a42129..402a9a7e 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -122,6 +122,8 @@ public: // Clover term derivative /////////////////////////////////////////////////////////// Impl::outerProductImpl(Lambda, X, Y); + //std::cout << "Lambda:" << Lambda << std::endl; + Gamma::Algebra sigma[] = { Gamma::Algebra::SigmaXY, @@ -153,16 +155,18 @@ public: for (int nu = 0; nu < 4; nu++) { if (mu == nu) continue; - PropagatorField Slambda = Gamma(sigma[count]) * Lambda; - Impl::TraceSpinImpl(lambda, Slambda); //traceSpin - force_mu += Cmunu(U, lambda, mu, nu); + PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked + Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok + force_mu -= Cmunu(U, lambda, mu, nu); // checked count++; } pokeLorentz(clover_force, U[mu] * force_mu, mu); } - clover_force *= csw / 8.; + clover_force *= csw; force += clover_force; + + } // Computing C_{\mu \nu}(x) as in Eq.(B.39) in Zbigniew Sroczynski's PhD thesis @@ -170,20 +174,19 @@ public: { conformable(lambda._grid, U[0]._grid); GaugeLinkField out(lambda._grid), tmp(lambda._grid); - // insertion in upper staple // please check redundancy of shift operations - + // C1+ tmp = lambda * U[nu]; out = Impl::ShiftStaple(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); - + // C2+ - tmp = U[mu] * Impl::CovShiftIdentityForward(adj(lambda), mu); + tmp = U[mu] * Impl::ShiftStaple(adj(lambda), mu); out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); - + // C3+ - tmp = U[nu] * Impl::CovShiftIdentityForward(adj(lambda), nu); + tmp = U[nu] * Impl::ShiftStaple(adj(lambda), nu); out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(tmp, nu))), mu); // C4+ @@ -259,6 +262,7 @@ private: PARALLEL_FOR_LOOP for (int i = 0; i < CloverTerm._grid->oSites(); i++) { + T._odata[i]()(0, 0) = timesMinusI(F._odata[i]()()); T._odata[i]()(1, 1) = timesI(F._odata[i]()()); T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()()); diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index fe813298..86609ffc 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -327,7 +327,7 @@ static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { static void FieldStrength(GaugeMat &FS, const GaugeLorentz &Umu, int mu, int nu){ // Fmn +--<--+ Ut +--<--+ // | | | | - // (x)+-->--+ +-->--+(x) + // (x)+-->--+ +-->--+(x) - h.c. // | | | | // +--<--+ +--<--+ diff --git a/tests/forces/Test_wilsonclover_force.cc b/tests/forces/Test_wilsonclover_force.cc index c99cfa98..82adb8ab 100644 --- a/tests/forces/Test_wilsonclover_force.cc +++ b/tests/forces/Test_wilsonclover_force.cc @@ -1,6 +1,6 @@ /************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid + Grid physics library, www.github.com/paboyle/Grid Source file: ./tests/Test_wilson_force.cc @@ -45,14 +45,17 @@ int main(int argc, char **argv) int threads = GridThread::GetThreads(); std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; - std::vector seeds({1, 2, 3, 4}); + std::vector seeds({1, 2, 30, 50}); GridParallelRNG pRNG(&Grid); + std::vector vrand(4); std::srand(std::time(0)); std::generate(vrand.begin(), vrand.end(), std::rand); std::cout << GridLogMessage << vrand << std::endl; pRNG.SeedFixedIntegers(vrand); + + //pRNG.SeedFixedIntegers(seeds); LatticeFermion phi(&Grid); gaussian(pRNG, phi); @@ -61,16 +64,53 @@ int main(int argc, char **argv) LatticeGaugeField U(&Grid); - SU3::HotConfiguration(pRNG,U); +/* + std::vector x(4); // 4d fermions + std::vector gd = Grid.GlobalDimensions(); + Grid::QCD::SpinColourVector F; + Grid::Complex c; + + phi = zero; + for (x[0] = 0; x[0] < 1; x[0]++) + { + for (x[1] = 0; x[1] < 1; x[1]++) + { + for (x[2] = 0; x[2] < 1; x[2]++) + { + for (x[3] = 0; x[3] < 1; x[3]++) + { + for (int sp = 0; sp < 4; sp++) + { + for (int j = 0; j < 3; j++) // colours + { + F()(sp)(j) = Grid::Complex(0.0,0.0); + if (((sp == 0) && (j==0))) + { + c = Grid::Complex(1.0, 0.0); + F()(sp)(j) = c; + } + } + } + Grid::pokeSite(F, phi, x); + + } + } + } + } +*/ + + std::vector site = {0, 0, 0, 0}; + SU3::HotConfiguration(pRNG, U); //SU3::ColdConfiguration(pRNG, U); + //////////////////////////////////// // Unmodified matrix element //////////////////////////////////// RealD mass = -4.0; //kills the diagonal term Real csw = 1.0; WilsonCloverFermionR Dw(U, Grid, RBGrid, mass, csw); - + Dw.ImportGauge(U); Dw.M(phi, Mphi); ComplexD S = innerProduct(Mphi, Mphi); // Action : pdag MdagM p @@ -78,11 +118,23 @@ int main(int argc, char **argv) LatticeGaugeField UdSdU(&Grid); LatticeGaugeField tmp(&Grid); - Dw.MDeriv(tmp, Mphi, phi, DaggerNo); UdSdU = tmp; - Dw.MDeriv(tmp, phi, Mphi, DaggerYes); UdSdU += tmp; + //////////////////////////////////////////// + Dw.MDeriv(tmp, Mphi, phi, DaggerNo); + UdSdU = tmp; + Dw.MDeriv(tmp, phi, Mphi, DaggerYes); + UdSdU += tmp; + ///////////////////////////////////////////// + // Take the traceless antihermitian component - UdSdU = Ta(UdSdU); - + //UdSdU = Ta(UdSdU); + + //std::cout << UdSdU << std::endl; + //SU3::LatticeAlgebraVector hforce(&Grid); + LatticeColourMatrix mommu(&Grid); + //mommu = PeekIndex(UdSdU, 0); + //SU3::projectOnAlgebra(hforce, mommu); + //std::cout << hforce << std::endl; + //////////////////////////////////// // Modify the gauge field a little //////////////////////////////////// @@ -90,28 +142,63 @@ int main(int argc, char **argv) RealD Hmom = 0.0; RealD Hmomprime = 0.0; RealD Hmompp = 0.0; - LatticeColourMatrix mommu(&Grid); LatticeColourMatrix forcemu(&Grid); LatticeGaugeField mom(&Grid); LatticeGaugeField Uprime(&Grid); + for (int mu = 0; mu < Nd; mu++) { // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); Hmom -= real(sum(trace(mommu * mommu))); PokeIndex(mom, mommu, mu); } + /* + SU3::AlgebraVector h; + SU3::LatticeAlgebraVector hl(&Grid); + h()()(0) = 1.0; + hl = zero; + pokeSite(h, hl, site); + SU3::FundamentalLieAlgebraMatrix(hl, mommu); + mom = zero; + PokeIndex(mom, mommu, 0); + Hmom -= real(sum(trace(mommu * mommu))); + */ + /* parallel_for(int ss=0;ssoSites();ss++){ - for (int mu = 0; mu < Nd; mu++) + for (int mu = 0; mu < Nd; mu++) Uprime[ss]._internal[mu] = ProjectOnGroup(Exponentiate(mom[ss]._internal[mu], dt, 12) * U[ss]._internal[mu]); - } + } +*/ + + for (int mu = 0; mu < Nd; mu++) + { + parallel_for(auto i = mom.begin(); i < mom.end(); i++) + { + Uprime[i](mu) = U[i](mu); + Uprime[i](mu) += mom[i](mu) * U[i](mu) * dt; + Uprime[i](mu) += mom[i](mu) * mom[i](mu) * U[i](mu) * (dt * dt / 2.0); + Uprime[i](mu) += mom[i](mu) * mom[i](mu) * mom[i](mu) * U[i](mu) * (dt * dt * dt / 6.0); + Uprime[i](mu) += mom[i](mu) * mom[i](mu) * mom[i](mu) * mom[i](mu) * U[i](mu) * (dt * dt * dt * dt / 24.0); + Uprime[i](mu) += mom[i](mu) * mom[i](mu) * mom[i](mu) * mom[i](mu) * mom[i](mu) * U[i](mu) * (dt * dt * dt * dt * dt / 120.0); + Uprime[i](mu) += mom[i](mu) * mom[i](mu) * mom[i](mu) * mom[i](mu) * mom[i](mu) * mom[i](mu) * U[i](mu) * (dt * dt * dt * dt * dt * dt / 720.0); + } + } std::cout << GridLogMessage << "Initial mom hamiltonian is " << Hmom << std::endl; - + // New action + LatticeGaugeField diff(&Grid); + diff = Uprime - U; + //std::cout << "Diff:" << diff << std::endl; Dw.ImportGauge(Uprime); Dw.M(phi, MphiPrime); + LatticeFermion DiffFermion(&Grid); + DiffFermion = MphiPrime - Mphi; + //std::cout << "DiffFermion:" << DiffFermion << std::endl; + //std::cout << "Mphi:" << Mphi << std::endl; + //std::cout << "MphiPrime:" << MphiPrime << std::endl; ComplexD Sprime = innerProduct(MphiPrime, MphiPrime); @@ -143,16 +230,14 @@ int main(int argc, char **argv) dSmom2 = zero; - // need for this??? - // ultimately it is just a 2.0 factor in UdSdU for (int mu = 0; mu < Nd; mu++) { - mommu = PeekIndex(UdSdU, mu); // P_mu = + mommu = PeekIndex(UdSdU, mu); // P_mu = mommu = Ta(mommu) * 2.0; // Mom = (P_mu - P_mu^dag) - trace(P_mu - P_mu^dag) PokeIndex(UdSdU, mommu, mu); // UdSdU_mu = Mom } - std::cout << GridLogMessage<< "Antihermiticity tests - 2 " << std::endl; + std::cout << GridLogMessage << "Antihermiticity tests - 2 " << std::endl; for (int mu = 0; mu < Nd; mu++) { mommu = PeekIndex(mom, mu); @@ -167,7 +252,6 @@ int main(int argc, char **argv) } ///////////////////////////////////////////////////// - for (int mu = 0; mu < Nd; mu++) { forcemu = PeekIndex(UdSdU, mu); From 749189fd7209181f7923cb77ba8075c0efaf420b Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Sun, 29 Oct 2017 12:03:08 +0000 Subject: [PATCH 192/377] Full clover force correct --- lib/qcd/action/fermion/WilsonCloverFermion.cc | 6 +- lib/qcd/action/fermion/WilsonCloverFermion.h | 2 +- tests/forces/Test_wilsonclover_force.cc | 123 ++---------------- 3 files changed, 15 insertions(+), 116 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index e678835a..fff970a2 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -43,12 +43,11 @@ RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) // Wilson term out.checkerboard = in.checkerboard; - //this->Dhop(in, out, DaggerNo); + this->Dhop(in, out, DaggerNo); // Clover term Mooee(in, temp); - out= zero; out += temp; return norm2(out); } @@ -60,12 +59,11 @@ RealD WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) // Wilson term out.checkerboard = in.checkerboard; - //this->Dhop(in, out, DaggerYes); + this->Dhop(in, out, DaggerYes); // Clover term MooeeDag(in, temp); - out=zero; out += temp; return norm2(out); } diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index 402a9a7e..cd13b225 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -116,7 +116,7 @@ public: force = zero; // Derivative of the Wilson hopping term - //this->DhopDeriv(force, X, Y, dag); + this->DhopDeriv(force, X, Y, dag); /////////////////////////////////////////////////////////// // Clover term derivative diff --git a/tests/forces/Test_wilsonclover_force.cc b/tests/forces/Test_wilsonclover_force.cc index 82adb8ab..bcf67be4 100644 --- a/tests/forces/Test_wilsonclover_force.cc +++ b/tests/forces/Test_wilsonclover_force.cc @@ -48,13 +48,12 @@ int main(int argc, char **argv) std::vector seeds({1, 2, 30, 50}); GridParallelRNG pRNG(&Grid); - + std::vector vrand(4); std::srand(std::time(0)); std::generate(vrand.begin(), vrand.end(), std::rand); std::cout << GridLogMessage << vrand << std::endl; pRNG.SeedFixedIntegers(vrand); - //pRNG.SeedFixedIntegers(seeds); LatticeFermion phi(&Grid); @@ -64,50 +63,14 @@ int main(int argc, char **argv) LatticeGaugeField U(&Grid); -/* - std::vector x(4); // 4d fermions - std::vector gd = Grid.GlobalDimensions(); - Grid::QCD::SpinColourVector F; - Grid::Complex c; - - phi = zero; - for (x[0] = 0; x[0] < 1; x[0]++) - { - for (x[1] = 0; x[1] < 1; x[1]++) - { - for (x[2] = 0; x[2] < 1; x[2]++) - { - for (x[3] = 0; x[3] < 1; x[3]++) - { - for (int sp = 0; sp < 4; sp++) - { - for (int j = 0; j < 3; j++) // colours - { - F()(sp)(j) = Grid::Complex(0.0,0.0); - if (((sp == 0) && (j==0))) - { - c = Grid::Complex(1.0, 0.0); - F()(sp)(j) = c; - } - } - } - Grid::pokeSite(F, phi, x); - - } - } - } - } -*/ - std::vector site = {0, 0, 0, 0}; SU3::HotConfiguration(pRNG, U); - //SU3::ColdConfiguration(pRNG, U); - + //SU3::ColdConfiguration(pRNG, U);// Clover term zero //////////////////////////////////// // Unmodified matrix element //////////////////////////////////// - RealD mass = -4.0; //kills the diagonal term + RealD mass = 0.1; Real csw = 1.0; WilsonCloverFermionR Dw(U, Grid, RBGrid, mass, csw); Dw.ImportGauge(U); @@ -125,103 +88,42 @@ int main(int argc, char **argv) UdSdU += tmp; ///////////////////////////////////////////// - // Take the traceless antihermitian component - //UdSdU = Ta(UdSdU); - - //std::cout << UdSdU << std::endl; - //SU3::LatticeAlgebraVector hforce(&Grid); - LatticeColourMatrix mommu(&Grid); - //mommu = PeekIndex(UdSdU, 0); - //SU3::projectOnAlgebra(hforce, mommu); - //std::cout << hforce << std::endl; - //////////////////////////////////// // Modify the gauge field a little //////////////////////////////////// - RealD dt = 0.0001; + RealD dt = 0.00005; RealD Hmom = 0.0; RealD Hmomprime = 0.0; RealD Hmompp = 0.0; + LatticeColourMatrix mommu(&Grid); LatticeColourMatrix forcemu(&Grid); LatticeGaugeField mom(&Grid); LatticeGaugeField Uprime(&Grid); - - for (int mu = 0; mu < Nd; mu++) { + for (int mu = 0; mu < Nd; mu++) + { // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); Hmom -= real(sum(trace(mommu * mommu))); PokeIndex(mom, mommu, mu); - } - /* - SU3::AlgebraVector h; - SU3::LatticeAlgebraVector hl(&Grid); - h()()(0) = 1.0; - hl = zero; - pokeSite(h, hl, site); - SU3::FundamentalLieAlgebraMatrix(hl, mommu); - mom = zero; - PokeIndex(mom, mommu, 0); - Hmom -= real(sum(trace(mommu * mommu))); - */ - /* - parallel_for(int ss=0;ssoSites();ss++){ - for (int mu = 0; mu < Nd; mu++) - Uprime[ss]._internal[mu] = ProjectOnGroup(Exponentiate(mom[ss]._internal[mu], dt, 12) * U[ss]._internal[mu]); - } -*/ - - for (int mu = 0; mu < Nd; mu++) - { - parallel_for(auto i = mom.begin(); i < mom.end(); i++) + parallel_for(int ss = 0; ss < mom._grid->oSites(); ss++) { - Uprime[i](mu) = U[i](mu); - Uprime[i](mu) += mom[i](mu) * U[i](mu) * dt; - Uprime[i](mu) += mom[i](mu) * mom[i](mu) * U[i](mu) * (dt * dt / 2.0); - Uprime[i](mu) += mom[i](mu) * mom[i](mu) * mom[i](mu) * U[i](mu) * (dt * dt * dt / 6.0); - Uprime[i](mu) += mom[i](mu) * mom[i](mu) * mom[i](mu) * mom[i](mu) * U[i](mu) * (dt * dt * dt * dt / 24.0); - Uprime[i](mu) += mom[i](mu) * mom[i](mu) * mom[i](mu) * mom[i](mu) * mom[i](mu) * U[i](mu) * (dt * dt * dt * dt * dt / 120.0); - Uprime[i](mu) += mom[i](mu) * mom[i](mu) * mom[i](mu) * mom[i](mu) * mom[i](mu) * mom[i](mu) * U[i](mu) * (dt * dt * dt * dt * dt * dt / 720.0); + Uprime[ss]._internal[mu] = ProjectOnGroup(Exponentiate(mom[ss]._internal[mu], dt, 12) * U[ss]._internal[mu]); } } std::cout << GridLogMessage << "Initial mom hamiltonian is " << Hmom << std::endl; // New action - LatticeGaugeField diff(&Grid); - diff = Uprime - U; - //std::cout << "Diff:" << diff << std::endl; Dw.ImportGauge(Uprime); Dw.M(phi, MphiPrime); - LatticeFermion DiffFermion(&Grid); - DiffFermion = MphiPrime - Mphi; - //std::cout << "DiffFermion:" << DiffFermion << std::endl; - //std::cout << "Mphi:" << Mphi << std::endl; - //std::cout << "MphiPrime:" << MphiPrime << std::endl; - ComplexD Sprime = innerProduct(MphiPrime, MphiPrime); ////////////////////////////////////////////// // Use derivative to estimate dS ////////////////////////////////////////////// - /////////////////////////////////////////////////////// - std::cout << GridLogMessage << "Antihermiticity tests - 1 " << std::endl; - for (int mu = 0; mu < Nd; mu++) - { - mommu = PeekIndex(mom, mu); - std::cout << GridLogMessage << " Mommu " << norm2(mommu) << std::endl; - mommu = mommu + adj(mommu); - std::cout << GridLogMessage << " Test: Mommu + Mommudag " << norm2(mommu) << std::endl; - mommu = PeekIndex(UdSdU, mu); - std::cout << GridLogMessage << " dsdumu " << norm2(mommu) << std::endl; - mommu = mommu + adj(mommu); - std::cout << GridLogMessage << " Test: dsdumu + dag " << norm2(mommu) << std::endl; - std::cout << "" << std::endl; - } - //////////////////////////////////////////////////////// - LatticeComplex dS(&Grid); dS = zero; LatticeComplex dSmom(&Grid); @@ -229,7 +131,6 @@ int main(int argc, char **argv) LatticeComplex dSmom2(&Grid); dSmom2 = zero; - for (int mu = 0; mu < Nd; mu++) { mommu = PeekIndex(UdSdU, mu); // P_mu = @@ -237,7 +138,7 @@ int main(int argc, char **argv) PokeIndex(UdSdU, mommu, mu); // UdSdU_mu = Mom } - std::cout << GridLogMessage << "Antihermiticity tests - 2 " << std::endl; + std::cout << GridLogMessage << "Antihermiticity tests" << std::endl; for (int mu = 0; mu < Nd; mu++) { mommu = PeekIndex(mom, mu); @@ -279,8 +180,8 @@ int main(int argc, char **argv) std::cout << GridLogMessage << " S " << S << std::endl; std::cout << GridLogMessage << " Sprime " << Sprime << std::endl; - std::cout << GridLogMessage << "dS " << Sprime - S << std::endl; - std::cout << GridLogMessage << "predict dS " << dSpred << std::endl; + std::cout << GridLogMessage << "dS (S' - S) :" << Sprime - S << std::endl; + std::cout << GridLogMessage << "predict dS (force) :" << dSpred << std::endl; std::cout << GridLogMessage << "dSm " << dSm << std::endl; std::cout << GridLogMessage << "dSm2" << dSm2 << std::endl; From 689323f4eec85b159d82fe4b2b7097ff4312c70c Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 30 Oct 2017 00:03:15 +0000 Subject: [PATCH 193/377] Reverse dim ordering lexico support --- lib/util/Lexicographic.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/util/Lexicographic.h b/lib/util/Lexicographic.h index b922dba5..f5c55b74 100644 --- a/lib/util/Lexicographic.h +++ b/lib/util/Lexicographic.h @@ -26,6 +26,25 @@ namespace Grid{ } } + static inline void IndexFromCoorReversed (const std::vector& coor,int &index,const std::vector &dims){ + int nd=dims.size(); + int stride=1; + index=0; + for(int d=nd-1;d>=0;d--){ + index = index+stride*coor[d]; + stride=stride*dims[d]; + } + } + static inline void CoorFromIndexReversed (std::vector& coor,int index,const std::vector &dims){ + int nd= dims.size(); + coor.resize(nd); + for(int d=nd-1;d>=0;d--){ + coor[d] = index % dims[d]; + index = index / dims[d]; + } + } + + }; } From 4a699b4da340280d0502fcaab6d31b598e924f93 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 30 Oct 2017 00:04:14 +0000 Subject: [PATCH 194/377] New rank can be found out --- lib/cartesian/Cartesian_base.h | 9 +++++++-- lib/cartesian/Cartesian_full.h | 11 +++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index 6aa0e3c7..acc870de 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -44,13 +44,18 @@ namespace Grid{ class GridBase : public CartesianCommunicator , public GridThread { public: - + int dummy; // Give Lattice access template friend class Lattice; GridBase(const std::vector & processor_grid) : CartesianCommunicator(processor_grid) {}; GridBase(const std::vector & processor_grid, - const CartesianCommunicator &parent) : CartesianCommunicator(processor_grid,parent) {}; + const CartesianCommunicator &parent, + int &split_rank) + : CartesianCommunicator(processor_grid,parent,split_rank) {}; + GridBase(const std::vector & processor_grid, + const CartesianCommunicator &parent) + : CartesianCommunicator(processor_grid,parent,dummy) {}; virtual ~GridBase() = default; diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index c7ea68c9..9273abf3 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -38,7 +38,7 @@ namespace Grid{ class GridCartesian: public GridBase { public: - + int dummy; virtual int CheckerBoardFromOindexTable (int Oindex) { return 0; } @@ -67,7 +67,14 @@ public: GridCartesian(const std::vector &dimensions, const std::vector &simd_layout, const std::vector &processor_grid, - const GridCartesian &parent) : GridBase(processor_grid,parent) + const GridCartesian &parent) : GridBase(processor_grid,parent,dummy) + { + Init(dimensions,simd_layout,processor_grid); + } + GridCartesian(const std::vector &dimensions, + const std::vector &simd_layout, + const std::vector &processor_grid, + const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank) { Init(dimensions,simd_layout,processor_grid); } From fe4d9b003ca9c38ff6ec15e7445c22b0f4a72ade Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 30 Oct 2017 00:04:47 +0000 Subject: [PATCH 195/377] More digits --- lib/algorithms/iterative/ConjugateGradient.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index 5c968e04..0d4e51c7 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -78,12 +78,12 @@ class ConjugateGradient : public OperatorFunction { cp = a; ssq = norm2(src); - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: src " << ssq << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mp " << d << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mmp " << b << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: cp,r " << cp << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: p " << a << std::endl; + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl; + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: src " << ssq << std::endl; + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mp " << d << std::endl; + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mmp " << b << std::endl; + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: cp,r " << cp << std::endl; + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: p " << a << std::endl; RealD rsq = Tolerance * Tolerance * ssq; @@ -92,7 +92,7 @@ class ConjugateGradient : public OperatorFunction { return; } - std::cout << GridLogIterative << std::setprecision(4) + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; GridStopWatch LinalgTimer; From 5bf42e1e150cb0e9116e427653955cb4398b1326 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 30 Oct 2017 00:05:21 +0000 Subject: [PATCH 196/377] Update --- tests/solver/Test_dwf_hdcr.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/solver/Test_dwf_hdcr.cc b/tests/solver/Test_dwf_hdcr.cc index c553ba0a..b3373238 100644 --- a/tests/solver/Test_dwf_hdcr.cc +++ b/tests/solver/Test_dwf_hdcr.cc @@ -555,13 +555,13 @@ int main (int argc, char ** argv) std::cout< HermDefOp(Ddwf); - Subspace Aggregates(Coarse5d,FGrid); + Subspace Aggregates(Coarse5d,FGrid,0); // Aggregates.CreateSubspace(RNG5,HermDefOp,nbasis); assert ( (nbasis & 0x1)==0); int nb=nbasis/2; std::cout< Date: Mon, 30 Oct 2017 00:16:12 +0000 Subject: [PATCH 197/377] Communicator updates for split grid --- lib/communicator/Communicator_base.cc | 63 +++++++++++++++++++-------- lib/communicator/Communicator_base.h | 2 +- lib/communicator/Communicator_mpi.cc | 3 +- 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index ce9a3cf0..a72c75fe 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -97,9 +97,9 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) } -#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) +#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent) +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) { _ndimension = processors.size(); assert(_ndimension = parent._ndimension); @@ -124,33 +124,51 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, for(int d=0;d<_ndimension;d++){ ccoor[d] = parent._processor_coor[d] % processors[d]; scoor[d] = parent._processor_coor[d] / processors[d]; - ssize[d] = parent._processors[d]/ processors[d]; + ssize[d] = parent._processors[d] / processors[d]; } - int crank,srank; // rank within subcomm ; rank of subcomm within blocks of subcomms - Lexicographic::IndexFromCoor(ccoor,crank,processors); - Lexicographic::IndexFromCoor(scoor,srank,ssize); + int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms + // Mpi uses the reverse Lexico convention to us + Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); + Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); MPI_Comm comm_split; if ( Nchild > 1 ) { - // std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< &processors, ////////////////////////////////////////////////////////////////////////////////////////////////////// void CartesianCommunicator::InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base) { - // if ( communicator_base != communicator_world ) { - // std::cout << "Cartesian communicator created with a non-world communicator"< &proc } std::vector periodic(_ndimension,1); - MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],1,&communicator); + MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); MPI_Comm_rank(communicator,&_processor); MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); + if ( communicator_base != communicator_world ) { + std::cout << "Cartesian communicator created with a non-world communicator"< &processors,const CartesianCommunicator &parent); + CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank); CartesianCommunicator(const std::vector &pdimensions_in); virtual ~CartesianCommunicator(); diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index ef612f98..5593aa8b 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -205,7 +205,8 @@ void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words, // Split the communicator row[dim] = _processors[dim]; - CartesianCommunicator Comm(row,*this); + int me; + CartesianCommunicator Comm(row,*this,me); Comm.AllToAll(in,out,words,bytes); } void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) From a7f72eb9946d782e48fe315be066ca95b5c097b6 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 30 Oct 2017 00:22:06 +0000 Subject: [PATCH 198/377] SHaking out --- lib/lattice/Lattice_transfer.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 962cdeb1..1b09217b 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -757,6 +757,7 @@ void precisionChange(Lattice &out, const Lattice &in){ // NB: Easiest to programme if keep in lex order. // ///////////////////////////////////////////////////////// + template void Grid_split(std::vector > & full,Lattice & split) { @@ -805,6 +806,7 @@ void Grid_split(std::vector > & full,Lattice & split) std::vector tmpdata(sz); std::vector alldata(sz); std::vector scalardata(lsites); + for(int v=0;v > & full,Lattice & split) std::vector ldims = full_grid->_ldimensions; std::vector lcoor(ndim); - for(int d=0;d=0;d--){ if ( ratio[d] != 1 ) { full_grid ->AllToAll(d,alldata,tmpdata); - + // std::cout << GridLogMessage << "Grid_split: dim " <_processors[d]< > & full,Lattice & split) int rmul=nvec*lsites; int vmul= lsites; alldata[rsite] = tmpdata[lsite+r*rmul+v*vmul]; - + // if ( lsite==0 ) { + // std::cout << "Grid_split: grow alldata["< > & full,Lattice & split) } } } - vectorizeFromLexOrdArray(alldata,split); } @@ -936,10 +944,12 @@ void Grid_unsplit(std::vector > & full,Lattice & split) lsites = split_grid->lSites(); std::vector ldims = split_grid->_ldimensions; - for(int d=ndim-1;d>=0;d--){ + // for(int d=ndim-1;d>=0;d--){ + for(int d=0;d_processors[d] > 1 ) { tmpdata = alldata; split_grid->AllToAll(d,tmpdata,alldata); @@ -985,13 +995,11 @@ void Grid_unsplit(std::vector > & full,Lattice & split) lsites = full_grid->lSites(); for(int v=0;v Date: Mon, 30 Oct 2017 00:22:52 +0000 Subject: [PATCH 199/377] : --- tests/solver/Test_dwf_mrhs_cg.cc | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc index 079fa85a..207e1331 100644 --- a/tests/solver/Test_dwf_mrhs_cg.cc +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -52,15 +52,28 @@ int main (int argc, char ** argv) GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - int nrhs = UGrid->RankCount() ; - ///////////////////////////////////////////// // Split into 1^4 mpi communicators ///////////////////////////////////////////// + for(int i=0;i> mpi_split[k]; + } + break; + } + } + + int nrhs = 1; + int me; + for(int i=0;i seeds({1,2,3,4}); - GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds); GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds); std::vector src(nrhs,FGrid); @@ -93,7 +105,7 @@ int main (int argc, char ** argv) emptyUserRecord record; std::string file("./scratch.scidac"); std::string filef("./scratch.scidac.ferm"); - int me = UGrid->ThisRank(); + LatticeGaugeField s_Umu(SGrid); FermionField s_src(SFGrid); FermionField s_src_split(SFGrid); @@ -169,7 +181,7 @@ int main (int argc, char ** argv) for(int n=0;nBarrier(); if ( n==me ) { - std::cerr << GridLogMessage<<"Split "<< me << " " << norm2(s_src_split) << " " << norm2(s_src)<< " diff " << norm2(s_tmp)<Barrier(); } @@ -218,7 +230,6 @@ int main (int argc, char ** argv) std::cout << " diff " < Date: Mon, 30 Oct 2017 00:23:34 +0000 Subject: [PATCH 200/377] Extended sub comm supported --- tests/solver/Test_split_grid.cc | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/solver/Test_split_grid.cc b/tests/solver/Test_split_grid.cc index 90969b85..2b6a4bf7 100644 --- a/tests/solver/Test_split_grid.cc +++ b/tests/solver/Test_split_grid.cc @@ -52,11 +52,24 @@ int main (int argc, char ** argv) GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - int nrhs = UGrid->RankCount() ; - ///////////////////////////////////////////// // Split into 1^4 mpi communicators ///////////////////////////////////////////// + + for(int i=0;i> mpi_split[k]; + } + break; + } + } + + int nrhs = 1; + for(int i=0;i Date: Mon, 30 Oct 2017 00:24:11 +0000 Subject: [PATCH 201/377] Get subrank info from communicator constructor --- tests/solver/Test_dwf_mrhs_cg_mpieo.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/solver/Test_dwf_mrhs_cg_mpieo.cc b/tests/solver/Test_dwf_mrhs_cg_mpieo.cc index 14115b59..a6dfcd57 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpieo.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpieo.cc @@ -47,7 +47,9 @@ int main (int argc, char ** argv) std::vector mpi_layout = GridDefaultMpi(); std::vector mpi_split (mpi_layout.size(),1); - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); @@ -57,10 +59,11 @@ int main (int argc, char ** argv) ///////////////////////////////////////////// // Split into 1^4 mpi communicators ///////////////////////////////////////////// + int me; GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), mpi_split, - *UGrid); + *UGrid,me); GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); @@ -89,8 +92,6 @@ int main (int argc, char ** argv) ///////////////// // MPI only sends ///////////////// - int me = UGrid->ThisRank(); - LatticeGaugeField s_Umu(SGrid); FermionField s_src(SFGrid); FermionField s_src_e(SFrbGrid); From 78e8704eacb41fae706e50c24ae0baa6b17b9481 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 30 Oct 2017 00:25:31 +0000 Subject: [PATCH 202/377] Shaking out --- tests/solver/Test_dwf_mrhs_cg_mpi.cc | 99 +++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 10 deletions(-) diff --git a/tests/solver/Test_dwf_mrhs_cg_mpi.cc b/tests/solver/Test_dwf_mrhs_cg_mpi.cc index fbc6dd32..f640edff 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc @@ -1,4 +1,4 @@ - /************************************************************************************* + /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -47,20 +47,36 @@ int main (int argc, char ** argv) std::vector mpi_layout = GridDefaultMpi(); std::vector mpi_split (mpi_layout.size(),1); - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - int nrhs = UGrid->RankCount() ; - ///////////////////////////////////////////// // Split into 1^4 mpi communicators ///////////////////////////////////////////// + + for(int i=0;i> mpi_split[k]; + } + break; + } + } + + int nrhs = 1; + int me; + for(int i=0;i result(nrhs,FGrid); FermionField tmp(FGrid); - for(int s=0;sThisRank(); - LatticeGaugeField s_Umu(SGrid); FermionField s_src(SFGrid); FermionField s_tmp(SFGrid); @@ -98,6 +144,36 @@ int main (int argc, char ** argv) /////////////////////////////////////////////////////////////// Grid_split (Umu,s_Umu); Grid_split (src,s_src); + std::cout << " split rank " < HermOp(Ddwf); MdagMLinearOperator HermOpCk(Dchk); - ConjugateGradient CG((1.0e-5/(me+1)),10000); + ConjugateGradient CG((1.0e-5),10000); s_res = zero; CG(HermOp,s_src,s_res); + std::cout << " s_res norm "< Date: Mon, 30 Oct 2017 01:14:11 +0000 Subject: [PATCH 203/377] No compile on comms == none fix --- lib/communicator/Communicator_none.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index a862d52a..26b330a7 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -38,8 +38,8 @@ void CartesianCommunicator::Init(int *argc, char *** arv) ShmInitGeneric(); } -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent) - : CartesianCommunicator(processors) {} +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) + : CartesianCommunicator(processors) { srank=0;} CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { From fa5e4add47bfdb2d0c54486ebb99236b9db11326 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 31 Oct 2017 18:20:38 +0000 Subject: [PATCH 204/377] Added support for anisotropy to the WilsonFermion class --- lib/qcd/action/fermion/WilsonFermion.cc | 42 ++++++++++++++++++++----- lib/qcd/action/fermion/WilsonFermion.h | 21 +++++++++++-- tests/qdpxx/Test_qdpxx_wilson.cc | 28 ++++++++++++++--- 3 files changed, 76 insertions(+), 15 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index 19f9674d..55ef5a51 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -47,7 +47,8 @@ int WilsonFermionStatic::HandOptDslash; template WilsonFermion::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, RealD _mass, - const ImplParams &p) + const ImplParams &p, + const WilsonAnisotropyCoefficients &anis) : Kernels(p), _grid(&Fgrid), _cbgrid(&Hgrid), @@ -60,16 +61,41 @@ WilsonFermion::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, Umu(&Fgrid), UmuEven(&Hgrid), UmuOdd(&Hgrid), - _tmp(&Hgrid) + _tmp(&Hgrid), + anisotropyCoeff(anis) { // Allocate the required comms buffer ImportGauge(_Umu); + if (anisotropyCoeff.isAnisotropic){ + diag_mass = mass + 1.0 + (Nd-1)*(anisotropyCoeff.nu / anisotropyCoeff.xi_0); + } else { + diag_mass = 4.0 + mass; + } + + } template void WilsonFermion::ImportGauge(const GaugeField &_Umu) { GaugeField HUmu(_Umu._grid); - HUmu = _Umu * (-0.5); + + //Here multiply the anisotropy coefficients + if (anisotropyCoeff.isAnisotropic) + { + + for (int mu = 0; mu < Nd; mu++) + { + GaugeLinkField U_dir = (-0.5)*PeekIndex(_Umu, mu); + if (mu != anisotropyCoeff.t_direction) + U_dir *= (anisotropyCoeff.nu / anisotropyCoeff.xi_0); + + PokeIndex(HUmu, U_dir, mu); + } + } + else + { + HUmu = _Umu * (-0.5); + } Impl::DoubleStore(GaugeGrid(), Umu, HUmu); pickCheckerboard(Even, UmuEven, Umu); pickCheckerboard(Odd, UmuOdd, Umu); @@ -83,14 +109,14 @@ template RealD WilsonFermion::M(const FermionField &in, FermionField &out) { out.checkerboard = in.checkerboard; Dhop(in, out, DaggerNo); - return axpy_norm(out, 4 + mass, in, out); + return axpy_norm(out, diag_mass, in, out); } template RealD WilsonFermion::Mdag(const FermionField &in, FermionField &out) { out.checkerboard = in.checkerboard; Dhop(in, out, DaggerYes); - return axpy_norm(out, 4 + mass, in, out); + return axpy_norm(out, diag_mass, in, out); } template @@ -114,7 +140,7 @@ void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) { template void WilsonFermion::Mooee(const FermionField &in, FermionField &out) { out.checkerboard = in.checkerboard; - typename FermionField::scalar_type scal(4.0 + mass); + typename FermionField::scalar_type scal(diag_mass); out = scal * in; } @@ -127,7 +153,7 @@ void WilsonFermion::MooeeDag(const FermionField &in, FermionField &out) { template void WilsonFermion::MooeeInv(const FermionField &in, FermionField &out) { out.checkerboard = in.checkerboard; - out = (1.0/(4.0+mass))*in; + out = (1.0/(diag_mass))*in; } template @@ -204,7 +230,7 @@ void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, FermionField Btilde(B._grid); FermionField Atilde(B._grid); - Atilde = A; + Atilde = A;//redundant st.HaloExchange(B, compressor); diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h index 50f4f884..7b2b5206 100644 --- a/lib/qcd/action/fermion/WilsonFermion.h +++ b/lib/qcd/action/fermion/WilsonFermion.h @@ -44,6 +44,19 @@ class WilsonFermionStatic { static const int npoint = 8; }; +struct WilsonAnisotropyCoefficients{ + bool isAnisotropic; + int t_direction; + double xi_0; + double nu; + + WilsonAnisotropyCoefficients(): + isAnisotropic(false), + t_direction(Nd-1), + xi_0(1.0), + nu(1.0){} +}; + template class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { public: @@ -117,8 +130,9 @@ class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { // Constructor WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, - GridRedBlackCartesian &Hgrid, RealD _mass, - const ImplParams &p = ImplParams()); + GridRedBlackCartesian &Hgrid, RealD _mass, + const ImplParams &p = ImplParams(), + const WilsonAnisotropyCoefficients &anis = WilsonAnisotropyCoefficients() ); // DoubleStore impl dependent void ImportGauge(const GaugeField &_Umu); @@ -130,6 +144,7 @@ class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { // protected: public: RealD mass; + RealD diag_mass; GridBase *_grid; GridBase *_cbgrid; @@ -146,6 +161,8 @@ class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { LebesgueOrder Lebesgue; LebesgueOrder LebesgueEvenOdd; + + WilsonAnisotropyCoefficients anisotropyCoeff; }; typedef WilsonFermion WilsonFermionF; diff --git a/tests/qdpxx/Test_qdpxx_wilson.cc b/tests/qdpxx/Test_qdpxx_wilson.cc index 6e6eb213..a084ebad 100644 --- a/tests/qdpxx/Test_qdpxx_wilson.cc +++ b/tests/qdpxx/Test_qdpxx_wilson.cc @@ -32,7 +32,7 @@ #include // Mass -double mq = 0.02; +double mq = 0.1; // Define Wilson Types typedef Grid::QCD::WilsonImplR::FermionField FermionField; @@ -255,6 +255,12 @@ public: Chroma::WilsonFermActParams p; p.Mass = _mq; + AnisoParam_t _apar; + _apar.anisoP = true; + _apar.t_dir = 3; // in 4d + _apar.xi_0 = 2.0; + _apar.nu = 1.0; + p.anisoParam = _apar; Chroma::Handle> fbc(new Chroma::SimpleFermBC(bcs)); Chroma::Handle> cfs(new Chroma::CreateSimpleFermState(fbc)); @@ -269,7 +275,13 @@ public: p.Mass = _mq; p.clovCoeffR = QDP::Real(1.0); p.clovCoeffT = QDP::Real(1.0); - Real u0 = QDP::Real(1.0); + p.u0 = QDP::Real(1.0); + AnisoParam_t _apar; + _apar.anisoP = false; + _apar.t_dir = 3; // in 4d + _apar.xi_0 = 2.0; + _apar.nu = 1.0; + p.anisoParam = _apar; Chroma::Handle> fbc(new Chroma::SimpleFermBC(bcs)); Chroma::Handle> cfs(new Chroma::CreateSimpleFermState(fbc)); @@ -391,8 +403,13 @@ void calc_grid(ChromaAction action, Grid::QCD::LatticeGaugeField &Umu, Grid::QCD if (action == Wilson) { - - Grid::QCD::WilsonFermionR Wf(Umu, *UGrid, *UrbGrid, _mass); + WilsonAnisotropyCoefficients anis; + anis.isAnisotropic = true; + anis.t_direction = 3; + anis.xi_0 = 2.0; + anis.nu = 1.0; + WilsonImplParams iParam; + Grid::QCD::WilsonFermionR Wf(Umu, *UGrid, *UrbGrid, _mass, iParam, anis); std::cout << Grid::GridLogMessage << " Calling Grid Wilson Fermion multiply " << std::endl; @@ -406,7 +423,8 @@ void calc_grid(ChromaAction action, Grid::QCD::LatticeGaugeField &Umu, Grid::QCD if (action == WilsonClover) { Grid::RealD _csw = 1.0; - + WilsonAnisotropyCoefficients anis; + WilsonImplParams implParam; Grid::QCD::WilsonCloverFermionR Wf(Umu, *UGrid, *UrbGrid, _mass, _csw); Wf.ImportGauge(Umu); From 8e057721a91c3d58bb3c076f111504aacb515906 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 1 Nov 2017 12:50:54 +0000 Subject: [PATCH 205/377] Anisotropic Clover term written and tested --- lib/algorithms/LinearOperator.h | 10 +- lib/qcd/action/fermion/WilsonCloverFermion.cc | 102 +--- lib/qcd/action/fermion/WilsonCloverFermion.h | 116 +++-- lib/qcd/action/fermion/WilsonFermion.h | 9 +- tests/core/Test_wilson_clover.cc | 492 ++++++++++-------- tests/forces/Test_wilsonclover_force.cc | 2 +- .../Test_hmc_EOWilsonCloverFermionGauge.cc | 139 +++++ .../hmc/Test_hmc_WilsonCloverFermionGauge.cc | 126 +++++ tests/qdpxx/Test_qdpxx_wilson.cc | 15 +- 9 files changed, 637 insertions(+), 374 deletions(-) create mode 100644 tests/hmc/Test_hmc_EOWilsonCloverFermionGauge.cc create mode 100644 tests/hmc/Test_hmc_WilsonCloverFermionGauge.cc diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index f1b8820e..ee97e96d 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -183,11 +183,13 @@ namespace Grid { virtual RealD Mpc (const Field &in, Field &out) =0; virtual RealD MpcDag (const Field &in, Field &out) =0; virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) { - Field tmp(in._grid); + Field tmp(in._grid); + tmp.checkerboard = in.checkerboard; ni=Mpc(in,tmp); no=MpcDag(tmp,out); } virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + out.checkerboard = in.checkerboard; MpcDagMpc(in,out,n1,n2); } virtual void HermOp(const Field &in, Field &out){ @@ -215,13 +217,15 @@ namespace Grid { public: SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; virtual RealD Mpc (const Field &in, Field &out) { - Field tmp(in._grid); -// std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl; + Field tmp(in._grid); + tmp.checkerboard = !in.checkerboard; + //std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl; _Mat.Meooe(in,tmp); _Mat.MooeeInv(tmp,out); _Mat.Meooe(out,tmp); + //std::cout << "cb in " << in.checkerboard << " cb out " << out.checkerboard << std::endl; _Mat.Mooee(in,out); return axpy_norm(out,-1.0,tmp,out); } diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index fff970a2..3ec90e06 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -84,14 +84,14 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) WilsonLoops::FieldStrength(Ez, _Umu, Tdir, Zdir); // Compute the Clover Operator acting on Colour and Spin - CloverTerm = fillCloverYZ(Bx); - CloverTerm += fillCloverXZ(By); - CloverTerm += fillCloverXY(Bz); - CloverTerm += fillCloverXT(Ex); - CloverTerm += fillCloverYT(Ey); - CloverTerm += fillCloverZT(Ez); - CloverTerm *= (0.5) * csw; - CloverTerm += (4.0 + this->mass); + // multiply here by the clover coefficients for the anisotropy + CloverTerm = fillCloverYZ(Bx) * csw_r; + CloverTerm += fillCloverXZ(By) * csw_r; + CloverTerm += fillCloverXY(Bz) * csw_r; + CloverTerm += fillCloverXT(Ex) * csw_t; + CloverTerm += fillCloverYT(Ey) * csw_t; + CloverTerm += fillCloverZT(Ez) * csw_t; + CloverTerm += diag_mass; int lvol = _Umu._grid->lSites(); int DimRep = Impl::Dimension; @@ -145,7 +145,6 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) template void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) { - conformable(in, out); this->MooeeInternal(in, out, DaggerNo, InverseNo); } @@ -158,14 +157,12 @@ void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &o template void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out) { - conformable(in,out); this->MooeeInternal(in, out, DaggerNo, InverseYes); } template void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out) { - conformable(in,out); this->MooeeInternal(in, out, DaggerYes, InverseYes); } @@ -228,88 +225,7 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie template void WilsonCloverFermion::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag) { - - GridBase *grid = mat._grid; - - //GaugeLinkField Lambdaodd(grid), Lambdaeven(grid), tmp(grid); - //Lambdaodd = zero; //Yodd*dag(Xodd)+Xodd*dag(Yodd); // I have to peek spin and decide the color structure - //Lambdaeven = zero; //Teven*dag(Xeven)+Xeven*dag(Yeven) + 2*(Dee^-1) - - GaugeLinkField Lambda(grid), tmp(grid); - Lambda = zero; - - conformable(mat._grid, X._grid); - conformable(Y._grid, X._grid); - - std::vector C1p(Nd, grid), C2p(Nd, grid), C3p(Nd, grid), C4p(Nd, grid); - std::vector C1m(Nd, grid), C2m(Nd, grid), C3m(Nd, grid), C4m(Nd, grid); - std::vector U(Nd, mat._grid); - - for (int mu = 0; mu < Nd; mu++) - { - U[mu] = PeekIndex(mat, mu); - C1p[mu] = zero; - C2p[mu] = zero; - C3p[mu] = zero; - C4p[mu] = zero; - C1m[mu] = zero; - C2m[mu] = zero; - C3m[mu] = zero; - C4m[mu] = zero; - } - - /* - PARALLEL_FOR_LOOP - for (int i = 0; i < CloverTerm._grid->oSites(); i++) - { - T._odata[i]()(0, 1) = timesMinusI(F._odata[i]()()); - T._odata[i]()(1, 0) = timesMinusI(F._odata[i]()()); - T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()()); - T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()()); - } -*/ - - for (int i = 0; i < 4; i++) - { //spin - for (int j = 0; j < 4; j++) - { //spin - - for (int mu = 0; mu < 4; mu++) - { //color - for (int nu = 0; nu < 4; nu++) - { //color - - // insertion in upper staple - tmp = Lambda * U[nu]; - C1p[mu] += Impl::ShiftStaple(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); - - tmp = Lambda * U[mu]; - C2p[mu] += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); - - tmp = Impl::CovShiftIdentityForward(Lambda, nu) * U[nu]; - C3p[mu] += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(tmp, nu))), mu); - - tmp = Lambda; - C4p[mu] += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu) * tmp; - - // insertion in lower staple - tmp = Lambda * U[nu]; - C1m[mu] += Impl::ShiftStaple(Impl::CovShiftBackward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu); - - tmp = Lambda * U[mu]; - C2m[mu] += Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, U[nu])), mu); - - tmp = Lambda * U[nu]; - C3m[mu] += Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, tmp)), mu); - - tmp = Lambda; - C4m[mu] += Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu) * tmp; - } - } - } - } - - //Still implementing. Have to be tested, and understood how to project EO + assert(0); } // Derivative parts diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.h b/lib/qcd/action/fermion/WilsonCloverFermion.h index cd13b225..268564c0 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.h +++ b/lib/qcd/action/fermion/WilsonCloverFermion.h @@ -6,8 +6,8 @@ Copyright (C) 2017 - Author: paboyle Author: Guido Cossu + Author: David Preti <> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,6 +37,22 @@ namespace Grid namespace QCD { +/////////////////////////////////////////////////////////////////// +// Wilson Clover +// +// Operator ( with anisotropy coefficients): +// +// Q = 1 + (Nd-1)/xi_0 + m +// + W_t + (nu/xi_0) * W_s +// - 1/2*[ csw_t * sum_s (sigma_ts F_ts) + (csw_s/xi_0) * sum_ss (sigma_ss F_ss) ] +// +// s spatial, t temporal directions. +// where W_t and W_s are the temporal and spatial components of the +// Wilson Dirac operator +// +// csw_r = csw_t to recover the isotropic version +////////////////////////////////////////////////////////////////// + template class WilsonCloverFermion : public WilsonFermion { @@ -55,28 +71,43 @@ public: // Constructors WilsonCloverFermion(GaugeField &_Umu, GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, - RealD _mass, - RealD _csw, - const ImplParams &p = ImplParams()) : WilsonFermion(_Umu, - Fgrid, - Hgrid, - _mass, p), - CloverTerm(&Fgrid), - CloverTermInv(&Fgrid), - CloverTermEven(&Hgrid), - CloverTermOdd(&Hgrid), - CloverTermInvEven(&Hgrid), - CloverTermInvOdd(&Hgrid), - CloverTermDagEven(&Hgrid), - CloverTermDagOdd(&Hgrid), - CloverTermInvDagEven(&Hgrid), - CloverTermInvDagOdd(&Hgrid) + const RealD _mass, + const RealD _csw_r = 0.0, + const RealD _csw_t = 0.0, + const WilsonAnisotropyCoefficients &clover_anisotropy = WilsonAnisotropyCoefficients(), + const ImplParams &impl_p = ImplParams()) : WilsonFermion(_Umu, + Fgrid, + Hgrid, + _mass, impl_p, clover_anisotropy), + CloverTerm(&Fgrid), + CloverTermInv(&Fgrid), + CloverTermEven(&Hgrid), + CloverTermOdd(&Hgrid), + CloverTermInvEven(&Hgrid), + CloverTermInvOdd(&Hgrid), + CloverTermDagEven(&Hgrid), + CloverTermDagOdd(&Hgrid), + CloverTermInvDagEven(&Hgrid), + CloverTermInvDagOdd(&Hgrid) { - csw = _csw; assert(Nd == 4); // require 4 dimensions - if (csw == 0) - std::cout << GridLogWarning << "Initializing WilsonCloverFermion with csw = 0" << std::endl; + if (clover_anisotropy.isAnisotropic) + { + csw_r = _csw_r * 0.5 / clover_anisotropy.xi_0; + diag_mass = _mass + 1.0 + (Nd - 1) * (clover_anisotropy.nu / clover_anisotropy.xi_0); + } + else + { + csw_r = _csw_r * 0.5; + diag_mass = 4.0 + _mass; + } + csw_t = _csw_t * 0.5; + + if (csw_r == 0) + std::cout << GridLogWarning << "Initializing WilsonCloverFermion with csw_r = 0" << std::endl; + if (csw_t == 0) + std::cout << GridLogWarning << "Initializing WilsonCloverFermion with csw_t = 0" << std::endl; ImportGauge(_Umu); } @@ -105,15 +136,15 @@ public: GaugeField clover_force(force._grid); PropagatorField Lambda(force._grid); - // Here we are hitting some performance issues: + // Guido: Here we are hitting some performance issues: // need to extract the components of the DoubledGaugeField // for each call // Possible solution // Create a vector object to store them? (cons: wasting space) std::vector U(Nd, this->Umu._grid); - + Impl::extractLinkField(U, this->Umu); - + force = zero; // Derivative of the Wilson hopping term this->DhopDeriv(force, X, Y, dag); @@ -121,10 +152,9 @@ public: /////////////////////////////////////////////////////////// // Clover term derivative /////////////////////////////////////////////////////////// - Impl::outerProductImpl(Lambda, X, Y); + Impl::outerProductImpl(Lambda, X, Y); //std::cout << "Lambda:" << Lambda << std::endl; - Gamma::Algebra sigma[] = { Gamma::Algebra::SigmaXY, Gamma::Algebra::SigmaXZ, @@ -148,25 +178,34 @@ public: */ int count = 0; - clover_force = zero; + clover_force = zero; for (int mu = 0; mu < 4; mu++) { force_mu = zero; for (int nu = 0; nu < 4; nu++) { - if (mu == nu) continue; + if (mu == nu) + continue; + + RealD factor; + if (nu == 4 || mu == 4) + { + factor = 2.0 * csw_t; + } + else + { + factor = 2.0 * csw_r; + } PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked - Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok - force_mu -= Cmunu(U, lambda, mu, nu); // checked + Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok + force_mu -= factor*Cmunu(U, lambda, mu, nu); // checked count++; } pokeLorentz(clover_force, U[mu] * force_mu, mu); } - clover_force *= csw; + //clover_force *= csw; force += clover_force; - - } // Computing C_{\mu \nu}(x) as in Eq.(B.39) in Zbigniew Sroczynski's PhD thesis @@ -176,15 +215,15 @@ public: GaugeLinkField out(lambda._grid), tmp(lambda._grid); // insertion in upper staple // please check redundancy of shift operations - + // C1+ tmp = lambda * U[nu]; out = Impl::ShiftStaple(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); - + // C2+ tmp = U[mu] * Impl::ShiftStaple(adj(lambda), mu); out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu); - + // C3+ tmp = U[nu] * Impl::ShiftStaple(adj(lambda), nu); out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(tmp, nu))), mu); @@ -213,16 +252,17 @@ public: private: // here fixing the 4 dimensions, make it more general? - RealD csw; // Clover coefficient + RealD csw_r; // Clover coefficient - spatial + RealD csw_t; // Clover coefficient - temporal + RealD diag_mass; // Mass term CloverFieldType CloverTerm, CloverTermInv; // Clover term CloverFieldType CloverTermEven, CloverTermOdd; // Clover term EO CloverFieldType CloverTermInvEven, CloverTermInvOdd; // Clover term Inv EO CloverFieldType CloverTermDagEven, CloverTermDagOdd; // Clover term Dag EO CloverFieldType CloverTermInvDagEven, CloverTermInvDagOdd; // Clover term Inv Dag EO - // eventually these two can be compressed into 6x6 blocks instead of the 12x12 + // eventually these can be compressed into 6x6 blocks instead of the 12x12 // using the DeGrand-Rossi basis for the gamma matrices - CloverFieldType fillCloverYZ(const GaugeLinkField &F) { CloverFieldType T(F._grid); diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h index 7b2b5206..ca5eba8b 100644 --- a/lib/qcd/action/fermion/WilsonFermion.h +++ b/lib/qcd/action/fermion/WilsonFermion.h @@ -45,10 +45,11 @@ class WilsonFermionStatic { }; struct WilsonAnisotropyCoefficients{ - bool isAnisotropic; - int t_direction; - double xi_0; - double nu; + GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonAnisotropyCoefficients, + bool, isAnisotropic, + int, t_direction, + double, xi_0, + double, nu); WilsonAnisotropyCoefficients(): isAnisotropic(false), diff --git a/tests/core/Test_wilson_clover.cc b/tests/core/Test_wilson_clover.cc index 9a55f6b2..9281e298 100644 --- a/tests/core/Test_wilson_clover.cc +++ b/tests/core/Test_wilson_clover.cc @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,302 +24,334 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ - /* END LEGAL */ +/* END LEGAL */ #include using namespace std; using namespace Grid; using namespace Grid::QCD; -int main (int argc, char ** argv) +int main(int argc, char **argv) { - Grid_init(&argc,&argv); + Grid_init(&argc, &argv); - std::vector latt_size = GridDefaultLatt(); - std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); - std::vector mpi_layout = GridDefaultMpi(); - GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size, simd_layout, mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); - std::cout< seeds({1,2,3,4}); - GridParallelRNG pRNG(&Grid); + std::vector seeds({1, 2, 3, 4}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); - typedef typename WilsonCloverFermionR::FermionField FermionField; - typename WilsonCloverFermionR::ImplParams params; + typedef typename WilsonCloverFermionR::FermionField FermionField; + typename WilsonCloverFermionR::ImplParams params; + WilsonAnisotropyCoefficients anis; - FermionField src (&Grid); random(pRNG,src); - FermionField result(&Grid); result=zero; - FermionField result2(&Grid); result2=zero; - FermionField ref(&Grid); ref=zero; - FermionField tmp(&Grid); tmp=zero; - FermionField err(&Grid); err=zero; - FermionField err2(&Grid); err2=zero; - FermionField phi (&Grid); random(pRNG,phi); - FermionField chi (&Grid); random(pRNG,chi); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); - std::vector U(4,&Grid); + FermionField src(&Grid); + random(pRNG, src); + FermionField result(&Grid); + result = zero; + FermionField result2(&Grid); + result2 = zero; + FermionField ref(&Grid); + ref = zero; + FermionField tmp(&Grid); + tmp = zero; + FermionField err(&Grid); + err = zero; + FermionField err2(&Grid); + err2 = zero; + FermionField phi(&Grid); + random(pRNG, phi); + FermionField chi(&Grid); + random(pRNG, chi); + LatticeGaugeField Umu(&Grid); + SU3::HotConfiguration(pRNG, Umu); + std::vector U(4, &Grid); - - double volume=1; - for(int mu=0;mu * = < chi | Deo^dag| phi> "< * = < chi | Deo^dag| phi> " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; - FermionField dchi_e (&RBGrid); - FermionField dchi_o (&RBGrid); + FermionField chi_e(&RBGrid); + FermionField chi_o(&RBGrid); - FermionField phi_e (&RBGrid); - FermionField phi_o (&RBGrid); + FermionField dchi_e(&RBGrid); + FermionField dchi_o(&RBGrid); - FermionField dphi_e (&RBGrid); - FermionField dphi_o (&RBGrid); + FermionField phi_e(&RBGrid); + FermionField phi_o(&RBGrid); - pickCheckerboard(Even,chi_e,chi); - pickCheckerboard(Odd ,chi_o,chi); - pickCheckerboard(Even,phi_e,phi); - pickCheckerboard(Odd ,phi_o,phi); + FermionField dphi_e(&RBGrid); + FermionField dphi_o(&RBGrid); - Dwc.Meooe(chi_e,dchi_o); - Dwc.Meooe(chi_o,dchi_e); - Dwc.MeooeDag(phi_e,dphi_o); - Dwc.MeooeDag(phi_o,dphi_e); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); - ComplexD pDce = innerProduct(phi_e,dchi_e); - ComplexD pDco = innerProduct(phi_o,dchi_o); - ComplexD cDpe = innerProduct(chi_e,dphi_e); - ComplexD cDpo = innerProduct(chi_o,dphi_o); + Dwc.Meooe(chi_e, dchi_o); + Dwc.Meooe(chi_o, dchi_e); + Dwc.MeooeDag(phi_e, dphi_o); + Dwc.MeooeDag(phi_o, dphi_e); - std::cout< seeds2({5,6,7,8}); - GridParallelRNG pRNG2(&Grid); pRNG2.SeedFixedIntegers(seeds2); + std::vector seeds2({5, 6, 7, 8}); + GridParallelRNG pRNG2(&Grid); + pRNG2.SeedFixedIntegers(seeds2); LatticeColourMatrix Omega(&Grid); LatticeColourMatrix ShiftedOmega(&Grid); - LatticeGaugeField U_prime(&Grid); U_prime=zero; - LatticeColourMatrix U_prime_mu(&Grid); U_prime_mu=zero; + LatticeGaugeField U_prime(&Grid); + U_prime = zero; + LatticeColourMatrix U_prime_mu(&Grid); + U_prime_mu = zero; SU::LieRandomize(pRNG2, Omega, 1.0); - for (int mu=0;mu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +int main(int argc, char **argv) { + using namespace Grid; + using namespace Grid::QCD; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + // here make a routine to print all the relevant information on the run + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + // Typedefs to simplify notation + typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm + typedef WilsonImplR FermionImplPolicy; + typedef WilsonCloverFermionR FermionAction; + typedef typename FermionAction::FermionField FermionField; + + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + HMCWrapper TheHMC; + + // Grid from the command line + TheHMC.Resources.AddFourDimGrid("gauge"); + // Possibile to create the module by hand + // hardcoding parameters or using a Reader + + + // Checkpointer definition + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_lat"; + CPparams.rng_prefix = "ckpoint_rng"; + CPparams.saveInterval = 5; + CPparams.format = "IEEE64BIG"; + + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + // here there is too much indirection + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////// + // Collect actions, here use more encapsulation + // need wrappers of the fermionic classes + // that have a complex construction + // standard + RealD beta = 5.6 ; + WilsonGaugeActionR Waction(beta); + + // temporarily need a gauge field + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + LatticeGaugeField U(GridPtr); + + Real mass = 0.01; + Real csw = 1.0; + + FermionAction FermOp(U, *GridPtr, *GridRBPtr, mass, csw); + + ConjugateGradient CG(1.0e-8, 2000); + + TwoFlavourEvenOddPseudoFermionAction Nf2(FermOp, CG, CG); + + // Set smearing (true/false), default: false + Nf2.is_smeared = false; + + + // Collect actions + ActionLevel Level1(1); + Level1.push_back(&Nf2); + + ActionLevel Level2(4); + Level2.push_back(&Waction); + + TheHMC.TheAction.push_back(Level1); + TheHMC.TheAction.push_back(Level2); + ///////////////////////////////////////////////////////////// + + /* + double rho = 0.1; // smearing parameter + int Nsmear = 2; // number of smearing levels + Smear_Stout Stout(rho); + SmearedConfiguration SmearingPolicy( + UGrid, Nsmear, Stout); + */ + + // HMC parameters are serialisable + TheHMC.Parameters.MD.MDsteps = 20; + TheHMC.Parameters.MD.trajL = 1.0; + + TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file + TheHMC.Run(); // no smearing + // TheHMC.Run(SmearingPolicy); // for smearing + + Grid_finalize(); + +} // main + + + + + + + diff --git a/tests/hmc/Test_hmc_WilsonCloverFermionGauge.cc b/tests/hmc/Test_hmc_WilsonCloverFermionGauge.cc new file mode 100644 index 00000000..322bb304 --- /dev/null +++ b/tests/hmc/Test_hmc_WilsonCloverFermionGauge.cc @@ -0,0 +1,126 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_hmc_WilsonFermionGauge.cc + +Copyright (C) 2017 + +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +int main(int argc, char **argv) +{ + using namespace Grid; + using namespace Grid::QCD; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + // here make a routine to print all the relevant information on the run + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + // Typedefs to simplify notation + typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm + typedef WilsonImplR FermionImplPolicy; + typedef WilsonCloverFermionR FermionAction; + typedef typename FermionAction::FermionField FermionField; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + HMCWrapper TheHMC; + + // Grid from the command line + TheHMC.Resources.AddFourDimGrid("gauge"); + + // Checkpointer definition + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_lat"; + CPparams.rng_prefix = "ckpoint_rng"; + CPparams.saveInterval = 5; + CPparams.format = "IEEE64BIG"; + + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////// + // Collect actions, here use more encapsulation + // need wrappers of the fermionic classes + // that have a complex construction + // standard + RealD beta = 5.6; + WilsonGaugeActionR Waction(beta); + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + // temporarily need a gauge field + LatticeGaugeField U(GridPtr); + + Real mass = 0.01; + Real csw = 1.0; + + FermionAction FermOp(U, *GridPtr, *GridRBPtr, mass, csw); + ConjugateGradient CG(1.0e-8, 5000); + + TwoFlavourPseudoFermionAction Nf2(FermOp, CG, CG); + + // Set smearing (true/false), default: false + Nf2.is_smeared = false; + + // Collect actions + ActionLevel Level1(1); + Level1.push_back(&Nf2); + + ActionLevel Level2(4); + Level2.push_back(&Waction); + + TheHMC.TheAction.push_back(Level1); + TheHMC.TheAction.push_back(Level2); + ///////////////////////////////////////////////////////////// + + /* + double rho = 0.1; // smearing parameter + int Nsmear = 2; // number of smearing levels + Smear_Stout Stout(rho); + SmearedConfiguration SmearingPolicy( + UGrid, Nsmear, Stout); + */ + + // HMC parameters are serialisable + TheHMC.Parameters.MD.MDsteps = 20; + TheHMC.Parameters.MD.trajL = 1.0; + + TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file + TheHMC.Run(); // no smearing + // TheHMC.Run(SmearingPolicy); // for smearing + + Grid_finalize(); + +} // main diff --git a/tests/qdpxx/Test_qdpxx_wilson.cc b/tests/qdpxx/Test_qdpxx_wilson.cc index a084ebad..29e9c9ce 100644 --- a/tests/qdpxx/Test_qdpxx_wilson.cc +++ b/tests/qdpxx/Test_qdpxx_wilson.cc @@ -274,10 +274,10 @@ public: Chroma::CloverFermActParams p; p.Mass = _mq; p.clovCoeffR = QDP::Real(1.0); - p.clovCoeffT = QDP::Real(1.0); + p.clovCoeffT = QDP::Real(2.0); p.u0 = QDP::Real(1.0); AnisoParam_t _apar; - _apar.anisoP = false; + _apar.anisoP = true; _apar.t_dir = 3; // in 4d _apar.xi_0 = 2.0; _apar.nu = 1.0; @@ -422,10 +422,15 @@ void calc_grid(ChromaAction action, Grid::QCD::LatticeGaugeField &Umu, Grid::QCD if (action == WilsonClover) { - Grid::RealD _csw = 1.0; + Grid::RealD _csw_r = 1.0; + Grid::RealD _csw_t = 2.0; WilsonAnisotropyCoefficients anis; - WilsonImplParams implParam; - Grid::QCD::WilsonCloverFermionR Wf(Umu, *UGrid, *UrbGrid, _mass, _csw); + anis.isAnisotropic = true; + anis.t_direction = 3; + anis.xi_0 = 2.0; + anis.nu = 1.0; + WilsonImplParams CloverImplParam; + Grid::QCD::WilsonCloverFermionR Wf(Umu, *UGrid, *UrbGrid, _mass, _csw_r, _csw_t, anis, CloverImplParam); Wf.ImportGauge(Umu); std::cout << Grid::GridLogMessage << " Calling Grid Wilson Clover Fermion multiply " << std::endl; From 7b42ac9982da4b9f6c9ae0934ccef62506672425 Mon Sep 17 00:00:00 2001 From: pretidav Date: Thu, 2 Nov 2017 21:58:16 +0100 Subject: [PATCH 206/377] added polyakov loop observable to the hmc --- lib/qcd/modules/ObservableModules.h | 13 ++++ lib/qcd/observables/hmc_observable.h | 2 + lib/qcd/observables/polyakov_loop.h | 68 +++++++++++++++++++ lib/qcd/utils/WilsonLoops.h | 23 +++++++ .../hmc/Test_hmc_WilsonCloverFermionGauge.cc | 3 + 5 files changed, 109 insertions(+) create mode 100644 lib/qcd/observables/polyakov_loop.h diff --git a/lib/qcd/modules/ObservableModules.h b/lib/qcd/modules/ObservableModules.h index 24511617..fbffc236 100644 --- a/lib/qcd/modules/ObservableModules.h +++ b/lib/qcd/modules/ObservableModules.h @@ -92,6 +92,19 @@ class PlaquetteMod: public ObservableModule, NoParameters> PlaquetteMod(): ObsBase(NoParameters()){} }; +template < class Impl > +class PolyakovMod: public ObservableModule, NoParameters>{ + typedef ObservableModule, NoParameters> ObsBase; + using ObsBase::ObsBase; // for constructors + + // acquire resource + virtual void initialize(){ + this->ObservablePtr.reset(new PolyakovLogger()); + } + public: + PolyakovMod(): ObsBase(NoParameters()){} +}; + template < class Impl > class TopologicalChargeMod: public ObservableModule, TopologyObsParameters>{ diff --git a/lib/qcd/observables/hmc_observable.h b/lib/qcd/observables/hmc_observable.h index db629ce7..fcf11774 100644 --- a/lib/qcd/observables/hmc_observable.h +++ b/lib/qcd/observables/hmc_observable.h @@ -45,5 +45,7 @@ class HmcObservable { #include "plaquette.h" #include "topological_charge.h" +#include "polyakov_loop.h" + #endif // HMC_OBSERVABLE_H diff --git a/lib/qcd/observables/polyakov_loop.h b/lib/qcd/observables/polyakov_loop.h new file mode 100644 index 00000000..d708b474 --- /dev/null +++ b/lib/qcd/observables/polyakov_loop.h @@ -0,0 +1,68 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/modules/polyakov_line.h + +Copyright (C) 2017 + +Author: David Preti + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ + +#ifndef HMC_POLYAKOV_H +#define HMC_POLYAKOV_H + +namespace Grid { +namespace QCD { + +// this is only defined for a gauge theory +template +class PolyakovLogger : public HmcObservable { + public: + // here forces the Impl to be of gauge fields + // if not the compiler will complain + INHERIT_GIMPL_TYPES(Impl); + + // necessary for HmcObservable compatibility + typedef typename Impl::Field Field; + + void TrajectoryComplete(int traj, + Field &U, + GridSerialRNG &sRNG, + GridParallelRNG &pRNG) { + + ComplexD polyakov = WilsonLoops::avgPolyakovLoop(U); + + int def_prec = std::cout.precision(); + + std::cout << GridLogMessage + << std::setprecision(std::numeric_limits::digits10 + 1) + << "Polyakov Loop: [ " << traj << " ] "<< polyakov << std::endl; + + std::cout.precision(def_prec); + + } +}; + +} // namespace QCD +} // namespace Grid + +#endif // HMC_POLYAKOV_H diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index 86609ffc..1e57015c 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -123,6 +123,29 @@ public: return sumplaq / vol / faces / Nc; // Nd , Nc dependent... FIXME } + + ////////////////////////////////////////////////// + // average over all x,y,z the temporal loop + ////////////////////////////////////////////////// + static ComplexD avgPolyakovLoop(const GaugeField &Umu) { //assume Nd=4 + GaugeMat Ut(Umu._grid), P(Umu._grid); + ComplexD out; + int T = Umu._grid->GlobalDimensions()[3]; + int X = Umu._grid->GlobalDimensions()[0]; + int Y = Umu._grid->GlobalDimensions()[1]; + int Z = Umu._grid->GlobalDimensions()[2]; + + std::cout << X << Y << Z << T << std::endl; + Ut = peekLorentz(Umu,3); //Select temporal direction + P = Ut; + for (int t=1;t PlaqObs; TheHMC.Resources.AddObservable(); + + typedef PolyakovMod PolyakovObs; + TheHMC.Resources.AddObservable(); ////////////////////////////////////////////// ///////////////////////////////////////////////////////////// From 360efd0088847727e2fabe034dc5f18a09430cff Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 2 Nov 2017 22:05:31 +0000 Subject: [PATCH 207/377] Improved treatment of reverse asked for by chris. Truncate the basis. Power method renormalises --- .../iterative/ImplicitlyRestartedLanczos.h | 44 ++++++++++++------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 089e7ff3..7a0760c9 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -181,8 +181,8 @@ enum IRLdiagonalisation { template class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester { public: - LinearFunction &_HermOpTest; - ImplicitlyRestartedLanczosHermOpTester(LinearFunction &HermOpTest) : _HermOpTest(HermOpTest) { }; + LinearFunction &_HermOp; + ImplicitlyRestartedLanczosHermOpTester(LinearFunction &HermOp) : _HermOp(HermOp) { }; int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox) { return TestConvergence(j,resid,B,eval,evalMaxApprox); @@ -192,7 +192,7 @@ template class ImplicitlyRestartedLanczosHermOpTester : public Imp Field v(B); RealD eval_poly = eval; // Apply operator - _HermOpTest(B,v); + _HermOp(B,v); RealD vnum = real(innerProduct(B,v)); // HermOp. RealD vden = norm2(B); @@ -233,8 +233,8 @@ class ImplicitlyRestartedLanczos { //////////////////////////////// // Embedded objects //////////////////////////////// + LinearFunction &_PolyOp; LinearFunction &_HermOp; - LinearFunction &_HermOpTest; ImplicitlyRestartedLanczosTester &_Tester; // Default tester provided (we need a ref to something in default case) ImplicitlyRestartedLanczosHermOpTester SimpleTester; @@ -246,16 +246,22 @@ public: ////////////////////////////////////////////////////////////////// // PAB: ////////////////////////////////////////////////////////////////// - // Too many options & knobs. Do we really need orth_period + // Too many options & knobs. + // Eliminate: + // orth_period + // betastp + // MinRestart + // + // Do we really need orth_period // What is the theoretical basis & guarantees of betastp ? // Nstop=Nk viable? // MinRestart avoidable with new convergence test? - // Could cut to HermOp, HermOpTest, Tester, Nk, Nm, resid, maxiter (+diagonalisation) - // HermOpTest could be eliminated if we dropped the Power method for max eval. + // Could cut to PolyOp, HermOp, Tester, Nk, Nm, resid, maxiter (+diagonalisation) + // HermOp could be eliminated if we dropped the Power method for max eval. // -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear ////////////////////////////////////////////////////////////////// - ImplicitlyRestartedLanczos(LinearFunction & HermOp, - LinearFunction & HermOpTest, + ImplicitlyRestartedLanczos(LinearFunction & PolyOp, + LinearFunction & HermOp, ImplicitlyRestartedLanczosTester & Tester, int _Nstop, // sought vecs int _Nk, // sought vecs @@ -265,14 +271,14 @@ public: RealD _betastp=0.0, // if beta(k) < betastp: converged int _MinRestart=1, int _orth_period = 1, IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : - SimpleTester(HermOpTest), _HermOp(HermOp), _HermOpTest(HermOpTest), _Tester(Tester), + SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester), Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), eresid(_eresid), betastp(_betastp), MaxIter(_MaxIter) , MinRestart(_MinRestart), orth_period(_orth_period), diagonalisation(_diagonalisation) { }; - ImplicitlyRestartedLanczos(LinearFunction & HermOp, - LinearFunction & HermOpTest, + ImplicitlyRestartedLanczos(LinearFunction & PolyOp, + LinearFunction & HermOp, int _Nstop, // sought vecs int _Nk, // sought vecs int _Nm, // spare vecs @@ -281,7 +287,7 @@ public: RealD _betastp=0.0, // if beta(k) < betastp: converged int _MinRestart=1, int _orth_period = 1, IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : - SimpleTester(HermOpTest), _HermOp(HermOp), _HermOpTest(HermOpTest), _Tester(SimpleTester), + SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(SimpleTester), Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), eresid(_eresid), betastp(_betastp), MaxIter(_MaxIter) , MinRestart(_MinRestart), @@ -323,7 +329,7 @@ repeat →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM until convergence */ - void calc(std::vector& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse=true) + void calc(std::vector& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse=false) { GridBase *grid = src._grid; assert(grid == evec[0]._grid); @@ -355,7 +361,8 @@ until convergence auto tmp = src; const int _MAX_ITER_IRL_MEVAPP_ = 50; for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) { - _HermOpTest(src_n,tmp); + normalise(src_n); + _HermOp(src_n,tmp); RealD vnum = real(innerProduct(src_n,tmp)); // HermOp. RealD vden = norm2(src_n); RealD na = vnum/vden; @@ -536,7 +543,10 @@ until convergence std::cout << GridLogIRL << "Nconv ("<0) w -= lme[k-1] * evec[k-1]; From c79606a5dc4acfc4ba1453262b344031bee8a07b Mon Sep 17 00:00:00 2001 From: pretidav Date: Fri, 3 Nov 2017 22:46:32 +0100 Subject: [PATCH 208/377] Test production code wilson clover. Still missing QObs measurement on-the-fly. --- lib/qcd/utils/WilsonLoops.h | 1 - tests/hmc/Test_hmc_WCFG_Production.cc | 210 ++++++++++++++++++++++++++ 2 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 tests/hmc/Test_hmc_WCFG_Production.cc diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index 1e57015c..cdd76ecc 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -135,7 +135,6 @@ public: int Y = Umu._grid->GlobalDimensions()[1]; int Z = Umu._grid->GlobalDimensions()[2]; - std::cout << X << Y << Z << T << std::endl; Ut = peekLorentz(Umu,3); //Select temporal direction P = Ut; for (int t=1;t + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + + +namespace Grid{ + struct FermionParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(FermionParameters, + double, mass, + double, csw, + double, StoppingCondition, + int, MaxCGIterations, + bool, ApplySmearing); + }; + + + struct WilsonCloverHMCParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonCloverHMCParameters, + double, gauge_beta, + FermionParameters, WilsonClover) + + template + WilsonCloverHMCParameters(Reader& Reader){ + read(Reader, "Action", *this); + } + }; + + struct SmearingParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(SmearingParameters, + double, rho, + Integer, Nsmear) + + template + SmearingParameters(Reader& Reader){ + read(Reader, "StoutSmearing", *this); + } + + }; + + +} + +int main(int argc, char **argv) +{ + using namespace Grid; + using namespace Grid::QCD; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + // here make a routine to print all the relevant information on the run + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + // Typedefs to simplify notation + typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm + typedef WilsonImplR FermionImplPolicy; + typedef WilsonCloverFermionR FermionAction; + typedef typename FermionAction::FermionField FermionField; + typedef Grid::JSONReader Serialiser; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + HMCWrapper TheHMC; + + // Grid from the command line + TheHMC.ReadCommandLine(argc, argv); + if (TheHMC.ParameterFile.empty()){ + std::cout << "Input file not specified." + << "Use --ParameterFile option in the command line.\nAborting" + << std::endl; + exit(1); + } + Serialiser Reader(TheHMC.ParameterFile); + WilsonCloverHMCParameters MyParams(Reader); + + // Apply smearing to the fermionic action + bool ApplySmearing = MyParams.WilsonClover.ApplySmearing; + + TheHMC.Resources.AddFourDimGrid("gauge"); + + // Checkpointer definition + CheckpointerParameters CPparams(Reader); + + /* + CPparams.config_prefix = "ckpoint_lat"; + CPparams.rng_prefix = "ckpoint_rng"; + CPparams.saveInterval = 5; + CPparams.format = "IEEE64BIG"; + */ + + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar(Reader); + /* + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + */ + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + + typedef PolyakovMod PolyakovObs; + TheHMC.Resources.AddObservable(); + + //typedef TopologicalChargeMod QObs; + //TopologyObsParameters TopParams(Reader); + //TheHMC.Resources.AddObservable(TopParams); + ////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////// + // Collect actions, here use more encapsulation + // need wrappers of the fermionic classes + // that have a complex construction + // standard + + //RealD beta = 5.6; + WilsonGaugeActionR Waction(MyParams.gauge_beta); + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + // temporarily need a gauge field + LatticeGaugeField U(GridPtr); + + //Real mass = 0.01; + //Real csw = 1.0; + + Real mass = MyParams.WilsonClover.mass; + Real csw = MyParams.WilsonClover.csw; + + std::cout << "mass and csw" << mass << " and " << csw << std::endl; + + FermionAction FermOp(U, *GridPtr, *GridRBPtr, mass, csw, csw); + ConjugateGradient CG(MyParams.WilsonClover.StoppingCondition, MyParams.WilsonClover.MaxCGIterations); + TwoFlavourPseudoFermionAction Nf2(FermOp, CG, CG); + + // Set smearing (true/false), default: false + Nf2.is_smeared = ApplySmearing; + + // Collect actions + ActionLevel Level1(1); + Level1.push_back(&Nf2); + + ActionLevel Level2(4); + Level2.push_back(&Waction); + + TheHMC.TheAction.push_back(Level1); + TheHMC.TheAction.push_back(Level2); + ///////////////////////////////////////////////////////////// + + + /* + double rho = 0.1; // smearing parameter + int Nsmear = 2; // number of smearing levels + Smear_Stout Stout(rho); + SmearedConfiguration SmearingPolicy( + UGrid, Nsmear, Stout); + */ + + // HMC parameters are serialisable + + TheHMC.Parameters.initialize(Reader); + //TheHMC.Parameters.MD.MDsteps = 20; + //TheHMC.Parameters.MD.trajL = 1.0; + + if (ApplySmearing){ + SmearingParameters SmPar(Reader); + //double rho = 0.1; // smearing parameter + //int Nsmear = 3; // number of smearing levels + Smear_Stout Stout(SmPar.rho); + SmearedConfiguration SmearingPolicy(GridPtr, SmPar.Nsmear, Stout); + TheHMC.Run(SmearingPolicy); // for smearing + } else { + TheHMC.Run(); // no smearing + } + + //TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file + //TheHMC.Run(); // no smearing + // TheHMC.Run(SmearingPolicy); // for smearing + + Grid_finalize(); + +} // main From 915f610da0c986c43ee706f8716ebbb453b0342c Mon Sep 17 00:00:00 2001 From: pretidav Date: Sat, 4 Nov 2017 01:17:06 +0100 Subject: [PATCH 209/377] clover 2indexSymm hmc production test created. clover 2indexAsymm and clover mixed to be filled. --- lib/qcd/action/fermion/Fermion.h | 17 ++ lib/qcd/action/fermion/FermionOperatorImpl.h | 4 + lib/qcd/action/fermion/WilsonCloverFermion.cc | 6 +- tests/hmc/Test_hmc_WC2ASFG_Production.cc | 129 +++++++++++ tests/hmc/Test_hmc_WC2SFG_Production.cc | 212 ++++++++++++++++++ tests/hmc/Test_hmc_WCMixedRepFG_Production.cc | 139 ++++++++++++ 6 files changed, 504 insertions(+), 3 deletions(-) create mode 100644 tests/hmc/Test_hmc_WC2ASFG_Production.cc create mode 100644 tests/hmc/Test_hmc_WC2SFG_Production.cc create mode 100644 tests/hmc/Test_hmc_WCMixedRepFG_Production.cc diff --git a/lib/qcd/action/fermion/Fermion.h b/lib/qcd/action/fermion/Fermion.h index bc8397ba..2a008cb7 100644 --- a/lib/qcd/action/fermion/Fermion.h +++ b/lib/qcd/action/fermion/Fermion.h @@ -106,6 +106,10 @@ typedef WilsonFermion WilsonTwoIndexSymmetricFermi typedef WilsonFermion WilsonTwoIndexSymmetricFermionF; typedef WilsonFermion WilsonTwoIndexSymmetricFermionD; +typedef WilsonFermion WilsonTwoIndexAntiSymmetricFermionR; +typedef WilsonFermion WilsonTwoIndexAntiSymmetricFermionF; +typedef WilsonFermion WilsonTwoIndexAntiSymmetricFermionD; + // Twisted mass fermion typedef WilsonTMFermion WilsonTMFermionR; typedef WilsonTMFermion WilsonTMFermionF; @@ -116,6 +120,19 @@ typedef WilsonCloverFermion WilsonCloverFermionR; typedef WilsonCloverFermion WilsonCloverFermionF; typedef WilsonCloverFermion WilsonCloverFermionD; +typedef WilsonCloverFermion WilsonCloverAdjFermionR; +typedef WilsonCloverFermion WilsonCloverAdjFermionF; +typedef WilsonCloverFermion WilsonCloverAdjFermionD; + +typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionR; +typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionF; +typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionD; + +typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionR; +typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionF; +typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionD; + +// Domain Wall fermions typedef DomainWallFermion DomainWallFermionR; typedef DomainWallFermion DomainWallFermionF; typedef DomainWallFermion DomainWallFermionD; diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 89bd9a15..85d6ffea 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -1004,6 +1004,10 @@ typedef WilsonImpl Wilso typedef WilsonImpl WilsonTwoIndexSymmetricImplF; // Float typedef WilsonImpl WilsonTwoIndexSymmetricImplD; // Double +typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplR; // Real.. whichever prec +typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplF; // Float +typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplD; // Double + typedef DomainWallVec5dImpl DomainWallVec5dImplR; // Real.. whichever prec typedef DomainWallVec5dImpl DomainWallVec5dImplF; // Float typedef DomainWallVec5dImpl DomainWallVec5dImplD; // Double diff --git a/lib/qcd/action/fermion/WilsonCloverFermion.cc b/lib/qcd/action/fermion/WilsonCloverFermion.cc index 3ec90e06..3c082446 100644 --- a/lib/qcd/action/fermion/WilsonCloverFermion.cc +++ b/lib/qcd/action/fermion/WilsonCloverFermion.cc @@ -235,9 +235,9 @@ void WilsonCloverFermion::MeeDeriv(GaugeField &mat, const FermionField &U, assert(0); // not implemented yet } -FermOpTemplateInstantiate(WilsonCloverFermion); // now only for the fundamental representation -//AdjointFermOpTemplateInstantiate(WilsonCloverFermion); -//TwoIndexFermOpTemplateInstantiate(WilsonCloverFermion); +FermOpTemplateInstantiate(WilsonCloverFermion); +AdjointFermOpTemplateInstantiate(WilsonCloverFermion); +TwoIndexFermOpTemplateInstantiate(WilsonCloverFermion); //GparityFermOpTemplateInstantiate(WilsonCloverFermion); } } diff --git a/tests/hmc/Test_hmc_WC2ASFG_Production.cc b/tests/hmc/Test_hmc_WC2ASFG_Production.cc new file mode 100644 index 00000000..b0d1d3a4 --- /dev/null +++ b/tests/hmc/Test_hmc_WC2ASFG_Production.cc @@ -0,0 +1,129 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_hmc_WilsonAdjointFermionGauge.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: neo + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include "Grid/Grid.h" + +int main(int argc, char **argv) { + using namespace Grid; + using namespace Grid::QCD; + + // Here change the allowed (higher) representations + typedef Representations< FundamentalRepresentation, TwoIndexAntiSymmetricRepresentation > TheRepresentations; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + // here make a routine to print all the relevant information on the run + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + // Typedefs to simplify notation + typedef GenericHMCRunnerHirep HMCWrapper; + + typedef WilsonTwoIndexAntiSymmetricImplR FermionImplPolicy; // gauge field implemetation for the pseudofermions + typedef WilsonTwoIndexAntiSymmetricFermionR FermionAction; // type of lattice fermions (Wilson, DW, ...) + typedef typename FermionAction::FermionField FermionField; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + HMCWrapper TheHMC; + + // Grid from the command line + TheHMC.Resources.AddFourDimGrid("gauge"); + // Possibile to create the module by hand + // hardcoding parameters or using a Reader + + + // Checkpointer definition + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_lat"; + CPparams.rng_prefix = "ckpoint_rng"; + CPparams.saveInterval = 5; + CPparams.format = "IEEE64BIG"; + + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////// + // Collect actions, here use more encapsulation + // need wrappers of the fermionic classes + // that have a complex construction + // standard + RealD beta = 2.25 ; + WilsonGaugeActionR Waction(beta); + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + // temporarily need a gauge field + TwoIndexSymmetricRepresentation::LatticeField U(GridPtr); + + Real mass = -0.95; + + // Can we define an overloaded operator that does not need U and initialises + // it with zeroes? + FermionAction FermOp(U, *GridPtr, *GridRBPtr, mass); + + ConjugateGradient CG(1.0e-8, 2000, false); + + TwoFlavourPseudoFermionAction Nf2(FermOp, CG, CG); + + // Set smearing (true/false), default: false + Nf2.is_smeared = false; + + + // Collect actions + ActionLevel Level1(1); + Level1.push_back(&Nf2); + + ActionLevel Level2(4); + Level2.push_back(&Waction); + + TheHMC.TheAction.push_back(Level1); + TheHMC.TheAction.push_back(Level2); + + // HMC parameters are serialisable + TheHMC.Parameters.MD.MDsteps = 20; + TheHMC.Parameters.MD.trajL = 1.0; + + TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file + TheHMC.Run(); // no smearing + // TheHMC.Run(SmearingPolicy); // for smearing + + Grid_finalize(); + +} // main + + diff --git a/tests/hmc/Test_hmc_WC2SFG_Production.cc b/tests/hmc/Test_hmc_WC2SFG_Production.cc new file mode 100644 index 00000000..8d5fc458 --- /dev/null +++ b/tests/hmc/Test_hmc_WC2SFG_Production.cc @@ -0,0 +1,212 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_hmc_WilsonFermionGauge.cc + +Copyright (C) 2017 + +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + + +namespace Grid{ + struct FermionParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(FermionParameters, + double, mass, + double, csw, + double, StoppingCondition, + int, MaxCGIterations, + bool, ApplySmearing); + }; + + + struct WilsonCloverHMCParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonCloverHMCParameters, + double, gauge_beta, + FermionParameters, WilsonClover) + + template + WilsonCloverHMCParameters(Reader& Reader){ + read(Reader, "Action", *this); + } + }; + + struct SmearingParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(SmearingParameters, + double, rho, + Integer, Nsmear) + + template + SmearingParameters(Reader& Reader){ + read(Reader, "StoutSmearing", *this); + } + + }; + + +} + +int main(int argc, char **argv) +{ + using namespace Grid; + using namespace Grid::QCD; + + typedef Representations< FundamentalRepresentation, TwoIndexSymmetricRepresentation > TheRepresentations; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + // here make a routine to print all the relevant information on the run + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + // Typedefs to simplify notation + typedef GenericHMCRunnerHirep HMCWrapper; // Uses the default minimum norm + typedef WilsonTwoIndexSymmetricImplR FermionImplPolicy; // gauge field implemetation for the pseudofermions + typedef WilsonCloverTwoIndexSymmetricFermionR FermionAction; // type of lattice fermions (Wilson, DW, ...) + typedef typename FermionAction::FermionField FermionField; + typedef Grid::JSONReader Serialiser; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + HMCWrapper TheHMC; + + // Grid from the command line + TheHMC.ReadCommandLine(argc, argv); + if (TheHMC.ParameterFile.empty()){ + std::cout << "Input file not specified." + << "Use --ParameterFile option in the command line.\nAborting" + << std::endl; + exit(1); + } + Serialiser Reader(TheHMC.ParameterFile); + WilsonCloverHMCParameters MyParams(Reader); + + // Apply smearing to the fermionic action + bool ApplySmearing = MyParams.WilsonClover.ApplySmearing; + + TheHMC.Resources.AddFourDimGrid("gauge"); + + // Checkpointer definition + CheckpointerParameters CPparams(Reader); + + /* + CPparams.config_prefix = "ckpoint_lat"; + CPparams.rng_prefix = "ckpoint_rng"; + CPparams.saveInterval = 5; + CPparams.format = "IEEE64BIG"; + */ + + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar(Reader); + /* + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + */ + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + + typedef PolyakovMod PolyakovObs; + TheHMC.Resources.AddObservable(); + + //typedef TopologicalChargeMod QObs; + //TopologyObsParameters TopParams(Reader); + //TheHMC.Resources.AddObservable(TopParams); + ////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////// + // Collect actions, here use more encapsulation + // need wrappers of the fermionic classes + // that have a complex construction + // standard + + //RealD beta = 5.6; + WilsonGaugeActionR Waction(MyParams.gauge_beta); + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + // temporarily need a gauge field + TwoIndexSymmetricRepresentation::LatticeField U(GridPtr); + + //Real mass = 0.01; + //Real csw = 1.0; + + Real mass = MyParams.WilsonClover.mass; + Real csw = MyParams.WilsonClover.csw; + + std::cout << "mass and csw" << mass << " and " << csw << std::endl; + + FermionAction FermOp(U, *GridPtr, *GridRBPtr, mass, csw, csw); + ConjugateGradient CG(MyParams.WilsonClover.StoppingCondition, MyParams.WilsonClover.MaxCGIterations); + TwoFlavourPseudoFermionAction Nf2(FermOp, CG, CG); + + // Set smearing (true/false), default: false + Nf2.is_smeared = ApplySmearing; + + // Collect actions + ActionLevel Level1(1); + Level1.push_back(&Nf2); + + ActionLevel Level2(4); + Level2.push_back(&Waction); + + TheHMC.TheAction.push_back(Level1); + TheHMC.TheAction.push_back(Level2); + ///////////////////////////////////////////////////////////// + + + /* + double rho = 0.1; // smearing parameter + int Nsmear = 2; // number of smearing levels + Smear_Stout Stout(rho); + SmearedConfiguration SmearingPolicy( + UGrid, Nsmear, Stout); + */ + + // HMC parameters are serialisable + + TheHMC.Parameters.initialize(Reader); + //TheHMC.Parameters.MD.MDsteps = 20; + //TheHMC.Parameters.MD.trajL = 1.0; + + if (ApplySmearing){ + SmearingParameters SmPar(Reader); + //double rho = 0.1; // smearing parameter + //int Nsmear = 3; // number of smearing levels + Smear_Stout Stout(SmPar.rho); + SmearedConfiguration SmearingPolicy(GridPtr, SmPar.Nsmear, Stout); + TheHMC.Run(SmearingPolicy); // for smearing + } else { + TheHMC.Run(); // no smearing + } + + //TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file + //TheHMC.Run(); // no smearing + // TheHMC.Run(SmearingPolicy); // for smearing + + Grid_finalize(); + +} // main diff --git a/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc b/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc new file mode 100644 index 00000000..b54345cf --- /dev/null +++ b/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc @@ -0,0 +1,139 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_hmc_WilsonAdjointFermionGauge.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: neo +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include "Grid/Grid.h" + + + + +int main(int argc, char **argv) { + using namespace Grid; + using namespace Grid::QCD; + + // Here change the allowed (higher) representations + typedef Representations< FundamentalRepresentation, AdjointRepresentation , TwoIndexSymmetricRepresentation> TheRepresentations; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + // here make a routine to print all the relevant information on the run + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + // Typedefs to simplify notation + typedef GenericHMCRunnerHirep HMCWrapper; + + typedef WilsonAdjImplR AdjImplPolicy; // gauge field implemetation for the pseudofermions + typedef WilsonAdjFermionR AdjFermionAction; // type of lattice fermions (Wilson, DW, ...) + typedef WilsonTwoIndexSymmetricImplR SymmImplPolicy; + typedef WilsonTwoIndexSymmetricFermionR SymmFermionAction; + + + typedef typename AdjFermionAction::FermionField AdjFermionField; + typedef typename SymmFermionAction::FermionField SymmFermionField; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + HMCWrapper TheHMC; + + // Grid from the command line + TheHMC.Resources.AddFourDimGrid("gauge"); + // Possibile to create the module by hand + // hardcoding parameters or using a Reader + + + // Checkpointer definition + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_lat"; + CPparams.rng_prefix = "ckpoint_rng"; + CPparams.saveInterval = 5; + CPparams.format = "IEEE64BIG"; + + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////// + // Collect actions, here use more encapsulation + // need wrappers of the fermionic classes + // that have a complex construction + // standard + RealD beta = 2.25 ; + WilsonGaugeActionR Waction(beta); + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + // temporarily need a gauge field + AdjointRepresentation::LatticeField UA(GridPtr); + TwoIndexSymmetricRepresentation::LatticeField US(GridPtr); + + Real adjoint_mass = -0.1; + Real symm_mass = -0.5; + AdjFermionAction AdjFermOp(UA, *GridPtr, *GridRBPtr, adjoint_mass); + SymmFermionAction SymmFermOp(US, *GridPtr, *GridRBPtr, symm_mass); + + ConjugateGradient CG_adj(1.0e-8, 10000, false); + ConjugateGradient CG_symm(1.0e-8, 10000, false); + + // Pass two solvers: one for the force computation and one for the action + TwoFlavourPseudoFermionAction Nf2_Adj(AdjFermOp, CG_adj, CG_adj); + TwoFlavourPseudoFermionAction Nf2_Symm(SymmFermOp, CG_symm, CG_symm); + + // Collect actions + ActionLevel Level1(1); + Level1.push_back(&Nf2_Adj); + Level1.push_back(&Nf2_Symm); + + + ActionLevel Level2(4); + Level2.push_back(&Waction); + + TheHMC.TheAction.push_back(Level1); + TheHMC.TheAction.push_back(Level2); + + // HMC parameters are serialisable + TheHMC.Parameters.MD.MDsteps = 20; + TheHMC.Parameters.MD.trajL = 1.0; + + TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file + TheHMC.Run(); // no smearing + + Grid_finalize(); + +} // main + + From a4934292182ffa1e96a5ecad2062f3ce06727f1c Mon Sep 17 00:00:00 2001 From: pretidav Date: Sat, 4 Nov 2017 18:16:54 +0100 Subject: [PATCH 210/377] added Production tests for MixedRep, Adj, 2S, 2AS. Still missing QObs. The HMC is not printing correctly all the actions and forces. --- lib/qcd/action/fermion/FermionCore.h | 4 +- lib/qcd/action/fermion/WilsonKernelsHand.cc | 3 +- tests/hmc/Test_hmc_WC2ASFG_Production.cc | 162 +++++++++---- tests/hmc/Test_hmc_WCMixedRepFG_Production.cc | 215 ++++++++++++------ tests/hmc/Test_hmc_WCadjFG_Production.cc | 213 +++++++++++++++++ 5 files changed, 491 insertions(+), 106 deletions(-) create mode 100644 tests/hmc/Test_hmc_WCadjFG_Production.cc diff --git a/lib/qcd/action/fermion/FermionCore.h b/lib/qcd/action/fermion/FermionCore.h index 17006961..60632c3a 100644 --- a/lib/qcd/action/fermion/FermionCore.h +++ b/lib/qcd/action/fermion/FermionCore.h @@ -70,7 +70,9 @@ Author: Peter Boyle #define TwoIndexFermOpTemplateInstantiate(A) \ template class A; \ - template class A; + template class A; \ + template class A; \ + template class A; #define FermOp5dVecTemplateInstantiate(A) \ template class A; \ diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index 80b81714..aa6b5f6b 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -946,5 +946,6 @@ INSTANTIATE_THEM(DomainWallVec5dImplFH); INSTANTIATE_THEM(DomainWallVec5dImplDF); INSTANTIATE_THEM(ZDomainWallVec5dImplFH); INSTANTIATE_THEM(ZDomainWallVec5dImplDF); - +INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplF); +INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplD); }} diff --git a/tests/hmc/Test_hmc_WC2ASFG_Production.cc b/tests/hmc/Test_hmc_WC2ASFG_Production.cc index b0d1d3a4..d255ab5d 100644 --- a/tests/hmc/Test_hmc_WC2ASFG_Production.cc +++ b/tests/hmc/Test_hmc_WC2ASFG_Production.cc @@ -2,12 +2,11 @@ Grid physics library, www.github.com/paboyle/Grid -Source file: ./tests/Test_hmc_WilsonAdjointFermionGauge.cc +Source file: ./tests/Test_hmc_WilsonFermionGauge.cc -Copyright (C) 2015 +Copyright (C) 2017 -Author: Peter Boyle -Author: neo +Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,103 +26,188 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include "Grid/Grid.h" +#include -int main(int argc, char **argv) { + +namespace Grid{ + struct FermionParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(FermionParameters, + double, mass, + double, csw, + double, StoppingCondition, + int, MaxCGIterations, + bool, ApplySmearing); + }; + + + struct WilsonCloverHMCParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonCloverHMCParameters, + double, gauge_beta, + FermionParameters, WilsonClover) + + template + WilsonCloverHMCParameters(Reader& Reader){ + read(Reader, "Action", *this); + } + }; + + struct SmearingParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(SmearingParameters, + double, rho, + Integer, Nsmear) + + template + SmearingParameters(Reader& Reader){ + read(Reader, "StoutSmearing", *this); + } + + }; + + +} + +int main(int argc, char **argv) +{ using namespace Grid; using namespace Grid::QCD; - // Here change the allowed (higher) representations - typedef Representations< FundamentalRepresentation, TwoIndexAntiSymmetricRepresentation > TheRepresentations; + typedef Representations< FundamentalRepresentation, TwoIndexAntiSymmetricRepresentation > TheRepresentations; Grid_init(&argc, &argv); int threads = GridThread::GetThreads(); // here make a routine to print all the relevant information on the run std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; - // Typedefs to simplify notation - typedef GenericHMCRunnerHirep HMCWrapper; - + // Typedefs to simplify notation + typedef GenericHMCRunnerHirep HMCWrapper; // Uses the default minimum norm typedef WilsonTwoIndexAntiSymmetricImplR FermionImplPolicy; // gauge field implemetation for the pseudofermions - typedef WilsonTwoIndexAntiSymmetricFermionR FermionAction; // type of lattice fermions (Wilson, DW, ...) + typedef WilsonCloverTwoIndexAntiSymmetricFermionR FermionAction; // type of lattice fermions (Wilson, DW, ...) typedef typename FermionAction::FermionField FermionField; + typedef Grid::JSONReader Serialiser; //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: HMCWrapper TheHMC; // Grid from the command line - TheHMC.Resources.AddFourDimGrid("gauge"); - // Possibile to create the module by hand - // hardcoding parameters or using a Reader + TheHMC.ReadCommandLine(argc, argv); + if (TheHMC.ParameterFile.empty()){ + std::cout << "Input file not specified." + << "Use --ParameterFile option in the command line.\nAborting" + << std::endl; + exit(1); + } + Serialiser Reader(TheHMC.ParameterFile); + WilsonCloverHMCParameters MyParams(Reader); + // Apply smearing to the fermionic action + bool ApplySmearing = MyParams.WilsonClover.ApplySmearing; + + TheHMC.Resources.AddFourDimGrid("gauge"); // Checkpointer definition - CheckpointerParameters CPparams; + CheckpointerParameters CPparams(Reader); + + /* CPparams.config_prefix = "ckpoint_lat"; CPparams.rng_prefix = "ckpoint_rng"; CPparams.saveInterval = 5; CPparams.format = "IEEE64BIG"; + */ TheHMC.Resources.LoadNerscCheckpointer(CPparams); - RNGModuleParameters RNGpar; + RNGModuleParameters RNGpar(Reader); + /* RNGpar.serial_seeds = "1 2 3 4 5"; RNGpar.parallel_seeds = "6 7 8 9 10"; TheHMC.Resources.SetRNGSeeds(RNGpar); + */ + TheHMC.Resources.SetRNGSeeds(RNGpar); // Construct observables typedef PlaquetteMod PlaqObs; TheHMC.Resources.AddObservable(); + + typedef PolyakovMod PolyakovObs; + TheHMC.Resources.AddObservable(); + + //typedef TopologicalChargeMod QObs; + //TopologyObsParameters TopParams(Reader); + //TheHMC.Resources.AddObservable(TopParams); ////////////////////////////////////////////// ///////////////////////////////////////////////////////////// // Collect actions, here use more encapsulation - // need wrappers of the fermionic classes + // need wrappers of the fermionic classes // that have a complex construction // standard - RealD beta = 2.25 ; - WilsonGaugeActionR Waction(beta); - - auto GridPtr = TheHMC.Resources.GetCartesian(); + + //RealD beta = 5.6; + WilsonGaugeActionR Waction(MyParams.gauge_beta); + + auto GridPtr = TheHMC.Resources.GetCartesian(); auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); // temporarily need a gauge field - TwoIndexSymmetricRepresentation::LatticeField U(GridPtr); + TwoIndexAntiSymmetricRepresentation::LatticeField U(GridPtr); - Real mass = -0.95; + //Real mass = 0.01; + //Real csw = 1.0; - // Can we define an overloaded operator that does not need U and initialises - // it with zeroes? - FermionAction FermOp(U, *GridPtr, *GridRBPtr, mass); + Real mass = MyParams.WilsonClover.mass; + Real csw = MyParams.WilsonClover.csw; - ConjugateGradient CG(1.0e-8, 2000, false); + std::cout << "mass and csw" << mass << " and " << csw << std::endl; + FermionAction FermOp(U, *GridPtr, *GridRBPtr, mass, csw, csw); + ConjugateGradient CG(MyParams.WilsonClover.StoppingCondition, MyParams.WilsonClover.MaxCGIterations); TwoFlavourPseudoFermionAction Nf2(FermOp, CG, CG); // Set smearing (true/false), default: false - Nf2.is_smeared = false; + Nf2.is_smeared = ApplySmearing; - - // Collect actions - ActionLevel Level1(1); + // Collect actions + ActionLevel Level1(1); Level1.push_back(&Nf2); - ActionLevel Level2(4); + ActionLevel Level2(4); Level2.push_back(&Waction); TheHMC.TheAction.push_back(Level1); TheHMC.TheAction.push_back(Level2); + ///////////////////////////////////////////////////////////// - // HMC parameters are serialisable - TheHMC.Parameters.MD.MDsteps = 20; - TheHMC.Parameters.MD.trajL = 1.0; - TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file - TheHMC.Run(); // no smearing + /* + double rho = 0.1; // smearing parameter + int Nsmear = 2; // number of smearing levels + Smear_Stout Stout(rho); + SmearedConfiguration SmearingPolicy( + UGrid, Nsmear, Stout); + */ + + // HMC parameters are serialisable + + TheHMC.Parameters.initialize(Reader); + //TheHMC.Parameters.MD.MDsteps = 20; + //TheHMC.Parameters.MD.trajL = 1.0; + + if (ApplySmearing){ + SmearingParameters SmPar(Reader); + //double rho = 0.1; // smearing parameter + //int Nsmear = 3; // number of smearing levels + Smear_Stout Stout(SmPar.rho); + SmearedConfiguration SmearingPolicy(GridPtr, SmPar.Nsmear, Stout); + TheHMC.Run(SmearingPolicy); // for smearing + } else { + TheHMC.Run(); // no smearing + } + + //TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file + //TheHMC.Run(); // no smearing // TheHMC.Run(SmearingPolicy); // for smearing Grid_finalize(); } // main - diff --git a/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc b/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc index b54345cf..a79452f4 100644 --- a/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc +++ b/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc @@ -32,6 +32,40 @@ directory #include "Grid/Grid.h" +namespace Grid{ + struct FermionParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(FermionParameters, + double, mass, + double, csw, + double, StoppingCondition, + int, MaxCGIterations, + bool, ApplySmearing); + }; + + struct WilsonCloverHMCParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonCloverHMCParameters, + double, gauge_beta, + FermionParameters, WilsonCloverFund, + FermionParameters, WilsonCloverAS) + + template + WilsonCloverHMCParameters(Reader& Reader){ + read(Reader, "Action", *this); + } + }; + + struct SmearingParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(SmearingParameters, + double, rho, + Integer, Nsmear) + + template + SmearingParameters(Reader& Reader){ + read(Reader, "StoutSmearing", *this); + } + + }; +} int main(int argc, char **argv) { @@ -39,7 +73,7 @@ int main(int argc, char **argv) { using namespace Grid::QCD; // Here change the allowed (higher) representations - typedef Representations< FundamentalRepresentation, AdjointRepresentation , TwoIndexSymmetricRepresentation> TheRepresentations; + typedef Representations< FundamentalRepresentation, TwoIndexAntiSymmetricRepresentation> TheRepresentations; Grid_init(&argc, &argv); int threads = GridThread::GetThreads(); @@ -49,91 +83,142 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunnerHirep HMCWrapper; - typedef WilsonAdjImplR AdjImplPolicy; // gauge field implemetation for the pseudofermions - typedef WilsonAdjFermionR AdjFermionAction; // type of lattice fermions (Wilson, DW, ...) - typedef WilsonTwoIndexSymmetricImplR SymmImplPolicy; - typedef WilsonTwoIndexSymmetricFermionR SymmFermionAction; + typedef WilsonImplR FundImplPolicy; + typedef WilsonCloverFermionR FundFermionAction; + typedef typename FundFermionAction::FermionField FundFermionField; + typedef WilsonTwoIndexAntiSymmetricImplR ASymmImplPolicy; + typedef WilsonCloverTwoIndexAntiSymmetricFermionR ASymmFermionAction; + typedef typename ASymmFermionAction::FermionField ASymmFermionField; - typedef typename AdjFermionAction::FermionField AdjFermionField; - typedef typename SymmFermionAction::FermionField SymmFermionField; - + typedef Grid::JSONReader Serialiser; //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: HMCWrapper TheHMC; - - // Grid from the command line - TheHMC.Resources.AddFourDimGrid("gauge"); - // Possibile to create the module by hand - // hardcoding parameters or using a Reader - - - // Checkpointer definition - CheckpointerParameters CPparams; - CPparams.config_prefix = "ckpoint_lat"; - CPparams.rng_prefix = "ckpoint_rng"; - CPparams.saveInterval = 5; - CPparams.format = "IEEE64BIG"; - TheHMC.Resources.LoadNerscCheckpointer(CPparams); - - RNGModuleParameters RNGpar; - RNGpar.serial_seeds = "1 2 3 4 5"; - RNGpar.parallel_seeds = "6 7 8 9 10"; - TheHMC.Resources.SetRNGSeeds(RNGpar); - - // Construct observables - typedef PlaquetteMod PlaqObs; - TheHMC.Resources.AddObservable(); - ////////////////////////////////////////////// - - ///////////////////////////////////////////////////////////// - // Collect actions, here use more encapsulation - // need wrappers of the fermionic classes - // that have a complex construction - // standard - RealD beta = 2.25 ; - WilsonGaugeActionR Waction(beta); + // Grid from the command line + TheHMC.ReadCommandLine(argc, argv); + if (TheHMC.ParameterFile.empty()){ + std::cout << "Input file not specified." + << "Use --ParameterFile option in the command line.\nAborting" + << std::endl; + exit(1); + } + Serialiser Reader(TheHMC.ParameterFile); + WilsonCloverHMCParameters MyParams(Reader); + + // Apply smearing to the fermionic action + bool ApplySmearingFund = MyParams.WilsonCloverFund.ApplySmearing; + bool ApplySmearingAS = MyParams.WilsonCloverAS.ApplySmearing; - auto GridPtr = TheHMC.Resources.GetCartesian(); - auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); - // temporarily need a gauge field - AdjointRepresentation::LatticeField UA(GridPtr); - TwoIndexSymmetricRepresentation::LatticeField US(GridPtr); + TheHMC.Resources.AddFourDimGrid("gauge"); + + // Checkpointer definition + CheckpointerParameters CPparams(Reader); + + /* + CPparams.config_prefix = "ckpoint_lat"; + CPparams.rng_prefix = "ckpoint_rng"; + CPparams.saveInterval = 5; + CPparams.format = "IEEE64BIG"; + */ + + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar(Reader); + /* + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + */ + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + + typedef PolyakovMod PolyakovObs; + TheHMC.Resources.AddObservable(); + + //typedef TopologicalChargeMod QObs; + //TopologyObsParameters TopParams(Reader); + //TheHMC.Resources.AddObservable(TopParams); + ////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////// + // Collect actions, here use more encapsulation + // need wrappers of the fermionic classes + // that have a complex construction + // standard + + //RealD beta = 5.6; + WilsonGaugeActionR Waction(MyParams.gauge_beta); + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + // temporarily need a gauge field + FundamentalRepresentation::LatticeField UF(GridPtr); + TwoIndexAntiSymmetricRepresentation::LatticeField UAS(GridPtr); - Real adjoint_mass = -0.1; - Real symm_mass = -0.5; - AdjFermionAction AdjFermOp(UA, *GridPtr, *GridRBPtr, adjoint_mass); - SymmFermionAction SymmFermOp(US, *GridPtr, *GridRBPtr, symm_mass); - ConjugateGradient CG_adj(1.0e-8, 10000, false); - ConjugateGradient CG_symm(1.0e-8, 10000, false); + Real Fundmass = MyParams.WilsonCloverFund.mass; + Real Fundcsw = MyParams.WilsonCloverFund.csw; + Real ASmass = MyParams.WilsonCloverAS.mass; + Real AScsw = MyParams.WilsonCloverAS.csw; - // Pass two solvers: one for the force computation and one for the action - TwoFlavourPseudoFermionAction Nf2_Adj(AdjFermOp, CG_adj, CG_adj); - TwoFlavourPseudoFermionAction Nf2_Symm(SymmFermOp, CG_symm, CG_symm); + + + std::cout << "Fund: mass and csw" << Fundmass << " and " << Fundcsw << std::endl; + std::cout << "AS : mass and csw" << ASmass << " and " << AScsw << std::endl; + + + FundFermionAction FundFermOp(UF, *GridPtr, *GridRBPtr, Fundmass, Fundcsw, Fundcsw); + ConjugateGradient CG_Fund(MyParams.WilsonCloverFund.StoppingCondition, MyParams.WilsonCloverFund.MaxCGIterations); + TwoFlavourPseudoFermionAction Nf2_Fund(FundFermOp, CG_Fund, CG_Fund); + + ASymmFermionAction ASFermOp(UAS, *GridPtr, *GridRBPtr, ASmass, AScsw, AScsw); + ConjugateGradient CG_AS(MyParams.WilsonCloverAS.StoppingCondition, MyParams.WilsonCloverAS.MaxCGIterations); + TwoFlavourPseudoFermionAction Nf2_AS(ASFermOp, CG_AS, CG_AS); + + Nf2_Fund.is_smeared = ApplySmearingFund; + Nf2_AS.is_smeared = ApplySmearingAS; + // Collect actions - ActionLevel Level1(1); - Level1.push_back(&Nf2_Adj); - Level1.push_back(&Nf2_Symm); + ActionLevel Level1(1); + Level1.push_back(&Nf2_Fund); + Level1.push_back(&Nf2_AS); - ActionLevel Level2(4); + ActionLevel Level2(4); Level2.push_back(&Waction); TheHMC.TheAction.push_back(Level1); TheHMC.TheAction.push_back(Level2); - // HMC parameters are serialisable - TheHMC.Parameters.MD.MDsteps = 20; - TheHMC.Parameters.MD.trajL = 1.0; + TheHMC.Parameters.initialize(Reader); + //TheHMC.Parameters.MD.MDsteps = 20; + //TheHMC.Parameters.MD.trajL = 1.0; +/* + if (ApplySmearingFund || ApplySmearingAS){ + SmearingParameters SmPar(Reader); + //double rho = 0.1; // smearing parameter + //int Nsmear = 3; // number of smearing levels + Smear_Stout Stout(SmPar.rho); + SmearedConfiguration SmearingPolicy(GridPtr, SmPar.Nsmear, Stout); + TheHMC.Run(SmearingPolicy); // for smearing + } else { + TheHMC.Run(); // no smearing + } +*/ + TheHMC.Run(); - TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file - TheHMC.Run(); // no smearing + + //TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file + //TheHMC.Run(); // no smearing + // TheHMC.Run(SmearingPolicy); // for smearing Grid_finalize(); } // main - - diff --git a/tests/hmc/Test_hmc_WCadjFG_Production.cc b/tests/hmc/Test_hmc_WCadjFG_Production.cc new file mode 100644 index 00000000..b99c1189 --- /dev/null +++ b/tests/hmc/Test_hmc_WCadjFG_Production.cc @@ -0,0 +1,213 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_hmc_WilsonFermionGauge.cc + +Copyright (C) 2017 + +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + + +namespace Grid{ + struct FermionParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(FermionParameters, + double, mass, + double, csw, + double, StoppingCondition, + int, MaxCGIterations, + bool, ApplySmearing); + }; + + + struct WilsonCloverHMCParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonCloverHMCParameters, + double, gauge_beta, + FermionParameters, WilsonClover) + + template + WilsonCloverHMCParameters(Reader& Reader){ + read(Reader, "Action", *this); + } + }; + + struct SmearingParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(SmearingParameters, + double, rho, + Integer, Nsmear) + + template + SmearingParameters(Reader& Reader){ + read(Reader, "StoutSmearing", *this); + } + + }; + + +} + +int main(int argc, char **argv) +{ + using namespace Grid; + using namespace Grid::QCD; + + typedef Representations< FundamentalRepresentation, AdjointRepresentation > TheRepresentations; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + // here make a routine to print all the relevant information on the run + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + // Typedefs to simplify notation + typedef GenericHMCRunnerHirep HMCWrapper; // Uses the default minimum norm + typedef WilsonAdjImplR FermionImplPolicy; // gauge field implemetation for the pseudofermions + typedef WilsonCloverAdjFermionR FermionAction; // type of lattice fermions (Wilson, DW, ...) + typedef typename FermionAction::FermionField FermionField; + typedef Grid::JSONReader Serialiser; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + HMCWrapper TheHMC; + + // Grid from the command line + TheHMC.ReadCommandLine(argc, argv); + if (TheHMC.ParameterFile.empty()){ + std::cout << "Input file not specified." + << "Use --ParameterFile option in the command line.\nAborting" + << std::endl; + exit(1); + } + Serialiser Reader(TheHMC.ParameterFile); + WilsonCloverHMCParameters MyParams(Reader); + + // Apply smearing to the fermionic action + bool ApplySmearing = MyParams.WilsonClover.ApplySmearing; + + TheHMC.Resources.AddFourDimGrid("gauge"); + + // Checkpointer definition + CheckpointerParameters CPparams(Reader); + + /* + CPparams.config_prefix = "ckpoint_lat"; + CPparams.rng_prefix = "ckpoint_rng"; + CPparams.saveInterval = 5; + CPparams.format = "IEEE64BIG"; + */ + + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar(Reader); + /* + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + */ + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + + typedef PolyakovMod PolyakovObs; + TheHMC.Resources.AddObservable(); + + //typedef TopologicalChargeMod QObs; + //TopologyObsParameters TopParams(Reader); + //TheHMC.Resources.AddObservable(TopParams); + ////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////// + // Collect actions, here use more encapsulation + // need wrappers of the fermionic classes + // that have a complex construction + // standard + + //RealD beta = 5.6; + WilsonGaugeActionR Waction(MyParams.gauge_beta); + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + // temporarily need a gauge field + AdjointRepresentation::LatticeField U(GridPtr); + + //Real mass = 0.01; + //Real csw = 1.0; + + Real mass = MyParams.WilsonClover.mass; + Real csw = MyParams.WilsonClover.csw; + + std::cout << "mass and csw" << mass << " and " << csw << std::endl; + + FermionAction FermOp(U, *GridPtr, *GridRBPtr, mass, csw, csw); + ConjugateGradient CG(MyParams.WilsonClover.StoppingCondition, MyParams.WilsonClover.MaxCGIterations); + TwoFlavourPseudoFermionAction Nf2(FermOp, CG, CG); + + // Set smearing (true/false), default: false + Nf2.is_smeared = ApplySmearing; + + // Collect actions + ActionLevel Level1(1); + Level1.push_back(&Nf2); + + ActionLevel Level2(4); + Level2.push_back(&Waction); + + TheHMC.TheAction.push_back(Level1); + TheHMC.TheAction.push_back(Level2); + ///////////////////////////////////////////////////////////// + + + /* + double rho = 0.1; // smearing parameter + int Nsmear = 2; // number of smearing levels + Smear_Stout Stout(rho); + SmearedConfiguration SmearingPolicy( + UGrid, Nsmear, Stout); + */ + + // HMC parameters are serialisable + + TheHMC.Parameters.initialize(Reader); + //TheHMC.Parameters.MD.MDsteps = 20; + //TheHMC.Parameters.MD.trajL = 1.0; + + if (ApplySmearing){ + SmearingParameters SmPar(Reader); + //double rho = 0.1; // smearing parameter + //int Nsmear = 3; // number of smearing levels + Smear_Stout Stout(SmPar.rho); + SmearedConfiguration SmearingPolicy(GridPtr, SmPar.Nsmear, Stout); + TheHMC.Run(SmearingPolicy); // for smearing + } else { + TheHMC.Run(); // no smearing + } + + //TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file + //TheHMC.Run(); // no smearing + // TheHMC.Run(SmearingPolicy); // for smearing + + Grid_finalize(); + +} // main + From 69929f20bbea5bcd125d9d296fd395e04e0e4dd2 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 6 Nov 2017 23:45:00 +0000 Subject: [PATCH 211/377] Destructor fix. Split Grid and MPI3 will not yet work without more effort from me. --- lib/communicator/Communicator_base.cc | 41 +++++++++++++++++++++++++-- lib/communicator/Communicator_base.h | 2 +- lib/communicator/Communicator_mpi.cc | 30 -------------------- lib/communicator/Communicator_mpi3.cc | 17 ++++++++++- lib/communicator/Communicator_mpit.cc | 8 +++++- 5 files changed, 62 insertions(+), 36 deletions(-) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index a72c75fe..531dd358 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -98,7 +98,39 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) #if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) +void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes) +{ + std::vector row(_ndimension,1); + assert(dim>=0 && dim<_ndimension); + // Split the communicator + row[dim] = _processors[dim]; + + int me; + CartesianCommunicator Comm(row,*this,me); + Comm.AllToAll(in,out,words,bytes); +} +void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) +{ + // MPI is a pain and uses "int" arguments + // 64*64*64*128*16 == 500Million elements of data. + // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. + // (Turns up on 32^3 x 64 Gparity too) + MPI_Datatype object; + int iwords; + int ibytes; + iwords = words; + ibytes = bytes; + assert(words == iwords); // safe to cast to int ? + assert(bytes == ibytes); // safe to cast to int ? + MPI_Type_contiguous(ibytes,MPI_BYTE,&object); + MPI_Type_commit(&object); + MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); + MPI_Type_free(&object); +} +#endif + +#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) { _ndimension = processors.size(); @@ -176,6 +208,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, ////////////////////////////////////////////////////////////////////////////////////////////////////// InitFromMPICommunicator(processors,comm_split); } + ////////////////////////////////////////////////////////////////////////////////////////////////////// // Take an MPI_Comm and self assemble ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -199,7 +232,7 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector &proc MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); if ( communicator_base != communicator_world ) { - std::cout << "Cartesian communicator created with a non-world communicator"< &proc int Size; MPI_Comm_size(communicator,&Size); -#ifdef GRID_COMMS_MPIT +#if defined(GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) communicator_halo.resize (2*_ndimension); for(int i=0;i<_ndimension*2;i++){ MPI_Comm_dup(communicator,&communicator_halo[i]); @@ -220,7 +253,9 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector &proc assert(Size==_Nprocessors); } +#endif +#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { InitFromMPICommunicator(processors,communicator_world); @@ -229,10 +264,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) #endif #if !defined( GRID_COMMS_MPI3) - int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; int CartesianCommunicator::RankCount(void) { return ProcessorCount();}; #endif + #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT) double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, int xmit_to_rank, diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 4374ac93..73ea6165 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -158,7 +158,7 @@ class CartesianCommunicator { virtual ~CartesianCommunicator(); private: -#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) +#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) //////////////////////////////////////////////// // Private initialise from an MPI communicator // Can use after an MPI_Comm_split, but hidden from user so private diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 5593aa8b..f7b2a460 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -196,36 +196,6 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) root, communicator); assert(ierr==0); -} -void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes) -{ - std::vector row(_ndimension,1); - assert(dim>=0 && dim<_ndimension); - - // Split the communicator - row[dim] = _processors[dim]; - - int me; - CartesianCommunicator Comm(row,*this,me); - Comm.AllToAll(in,out,words,bytes); -} -void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) -{ - // MPI is a pain and uses "int" arguments - // 64*64*64*128*16 == 500Million elements of data. - // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. - // (Turns up on 32^3 x 64 Gparity too) - MPI_Datatype object; - int iwords; - int ibytes; - iwords = words; - ibytes = bytes; - assert(words == iwords); // safe to cast to int ? - assert(bytes == ibytes); // safe to cast to int ? - MPI_Type_contiguous(ibytes,MPI_BYTE,&object); - MPI_Type_commit(&object); - MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); - MPI_Type_free(&object); } /////////////////////////////////////////////////////// // Should only be used prior to Grid Init finished. diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 3cac726c..9e023fef 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -454,11 +454,15 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &c ////////////////////////////////// // Try to subdivide communicator ////////////////////////////////// -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent) +/* + * Use default in MPI compile + */ +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) : CartesianCommunicator(processors) { std::cout << "Attempts to split MPI3 communicators will fail until implemented" < &processors) { int ierr; @@ -596,6 +600,17 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) } } }; +CartesianCommunicator::~CartesianCommunicator() +{ + int MPI_is_finalised; + MPI_Finalized(&MPI_is_finalised); + if (communicator && MPI_is_finalised) { + MPI_Comm_free(&communicator); + for(int i=0;i< communicator_halo.size();i++){ + MPI_Comm_free(&communicator_halo[i]); + } + } +} void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index 56f96c20..31f786ac 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -55,8 +55,14 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { CartesianCommunicator::~CartesianCommunicator() { - if (communicator && !MPI::Is_finalized()) + int MPI_is_finalised; + MPI_Finalized(&MPI_is_finalised); + if (communicator && MPI_is_finalised) { MPI_Comm_free(&communicator); + for(int i=0;i< communicator_halo.size();i++){ + MPI_Comm_free(&communicator_halo[i]); + } + } } From c519aab19dcfb559ab21585d9f9221b1fc193a60 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 7 Nov 2017 13:55:37 +0000 Subject: [PATCH 212/377] Fixing the MPI memory leak in the communicators --- lib/communicator/Communicator_mpi.cc | 2 +- lib/communicator/Communicator_mpi3.cc | 8 ++++++++ lib/communicator/Communicator_mpit.cc | 5 +++-- lib/communicator/Communicator_shmem.cc | 2 ++ 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 5593aa8b..1c1ae6c5 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -57,7 +57,7 @@ CartesianCommunicator::~CartesianCommunicator() { int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised); - if (communicator && MPI_is_finalised) + if (communicator && !MPI_is_finalised) MPI_Comm_free(&communicator); } diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 3cac726c..52f65c34 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -596,6 +596,14 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) } } }; +CartesianCommunicator::~CartesianCommunicator() +{ + int MPI_is_finalised; + MPI_Finalized(&MPI_is_finalised); + if (communicator && !MPI_is_finalised) + MPI_Comm_free(&communicator); +} + void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index 56f96c20..1c24433b 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -55,11 +55,12 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { CartesianCommunicator::~CartesianCommunicator() { - if (communicator && !MPI::Is_finalized()) + int MPI_is_finalised; + MPI_Finalized(&MPI_is_finalised); + if (communicator && !MPI_is_finalised) MPI_Comm_free(&communicator); } - void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_shmem.cc b/lib/communicator/Communicator_shmem.cc index ed49285d..03e3173e 100644 --- a/lib/communicator/Communicator_shmem.cc +++ b/lib/communicator/Communicator_shmem.cc @@ -75,6 +75,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ShmInitGeneric(); } +CartesianCommunicator::~CartesianCommunicator(){} + CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent) : CartesianCommunicator(processors) { From 9b8d1cc3da4769f250665cc8e05fb305c794bc5d Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Tue, 7 Nov 2017 14:48:45 +0000 Subject: [PATCH 213/377] Staggered Schur decomposed matrix norm changed to not be the Schur anymore :( Carleton wanted this for multimass / multishift --- lib/algorithms/LinearOperator.h | 12 ++++++++++++ lib/algorithms/iterative/SchurRedBlack.h | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index 9b0e4942..0fa039c8 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -317,11 +317,23 @@ namespace Grid { } virtual RealD Mpc (const Field &in, Field &out) { Field tmp(in._grid); + Field tmp2(in._grid); + + _Mat.Mooee(in,out); + _Mat.Mooee(out,tmp); + + _Mat.Meooe(in,out); + _Mat.Meooe(out,tmp2); + + return axpy_norm(out,-1.0,tmp2,tmp); +#if 0 + //... much prefer conventional Schur norm _Mat.Meooe(in,tmp); _Mat.MooeeInv(tmp,out); _Mat.Meooe(out,tmp); _Mat.Mooee(in,out); return axpy_norm(out,-1.0,tmp,out); +#endif } virtual RealD MpcDag (const Field &in, Field &out){ return Mpc(in,out); diff --git a/lib/algorithms/iterative/SchurRedBlack.h b/lib/algorithms/iterative/SchurRedBlack.h index a0fd86a6..b9767aa8 100644 --- a/lib/algorithms/iterative/SchurRedBlack.h +++ b/lib/algorithms/iterative/SchurRedBlack.h @@ -90,7 +90,7 @@ namespace Grid { // Take a matrix and form a Red Black solver calling a Herm solver // Use of RB info prevents making SchurRedBlackSolve conform to standard interface /////////////////////////////////////////////////////////////////////////////////////////////////////// - + // Now make the norm reflect extra factor of Mee template class SchurRedBlackStaggeredSolve { private: OperatorFunction & _HermitianRBSolver; @@ -136,8 +136,8 @@ namespace Grid { _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd); tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd); - src_o = tmp; assert(src_o.checkerboard ==Odd); - // _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source + //src_o = tmp; assert(src_o.checkerboard ==Odd); + _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm. ////////////////////////////////////////////////////////////// // Call the red-black solver From 1860b1698c4b204a332e2fad4dea86c97ffb0abc Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Wed, 8 Nov 2017 09:03:01 +0000 Subject: [PATCH 214/377] Fixed the bag on MPI_T at Cam --- lib/communicator/Communicator_mpit.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index b2fb71a6..bceea0d8 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -57,7 +57,7 @@ CartesianCommunicator::~CartesianCommunicator() { int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised); - if (communicator && !MPI_is_finalised) + if (communicator && !MPI_is_finalised){ MPI_Comm_free(&communicator); for(int i=0;i< communicator_halo.size();i++){ MPI_Comm_free(&communicator_halo[i]); @@ -246,7 +246,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector Date: Wed, 8 Nov 2017 22:02:32 +0100 Subject: [PATCH 215/377] restored WilsonKernelsHand.cc and added Qtop to production codes --- lib/qcd/action/fermion/WilsonKernelsHand.cc | 2 -- tests/hmc/Test_hmc_WCMixedRepFG_Production.cc | 6 +++--- tests/hmc/Test_hmc_WCadjFG_Production.cc | 6 +++--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index aa6b5f6b..90e6cb9b 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -946,6 +946,4 @@ INSTANTIATE_THEM(DomainWallVec5dImplFH); INSTANTIATE_THEM(DomainWallVec5dImplDF); INSTANTIATE_THEM(ZDomainWallVec5dImplFH); INSTANTIATE_THEM(ZDomainWallVec5dImplDF); -INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplF); -INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplD); }} diff --git a/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc b/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc index a79452f4..aa5cce85 100644 --- a/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc +++ b/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc @@ -140,9 +140,9 @@ int main(int argc, char **argv) { typedef PolyakovMod PolyakovObs; TheHMC.Resources.AddObservable(); - //typedef TopologicalChargeMod QObs; - //TopologyObsParameters TopParams(Reader); - //TheHMC.Resources.AddObservable(TopParams); + typedef TopologicalChargeMod QObs; + TopologyObsParameters TopParams(Reader); + TheHMC.Resources.AddObservable(TopParams); ////////////////////////////////////////////// ///////////////////////////////////////////////////////////// diff --git a/tests/hmc/Test_hmc_WCadjFG_Production.cc b/tests/hmc/Test_hmc_WCadjFG_Production.cc index b99c1189..48cea756 100644 --- a/tests/hmc/Test_hmc_WCadjFG_Production.cc +++ b/tests/hmc/Test_hmc_WCadjFG_Production.cc @@ -131,9 +131,9 @@ int main(int argc, char **argv) typedef PolyakovMod PolyakovObs; TheHMC.Resources.AddObservable(); - //typedef TopologicalChargeMod QObs; - //TopologyObsParameters TopParams(Reader); - //TheHMC.Resources.AddObservable(TopParams); + typedef TopologicalChargeMod QObs; + TopologyObsParameters TopParams(Reader); + TheHMC.Resources.AddObservable(TopParams); ////////////////////////////////////////////// ///////////////////////////////////////////////////////////// From 1d7ccc6b2c28a44a9405757bfc3fc0e7fd63584d Mon Sep 17 00:00:00 2001 From: fionnoh Date: Thu, 9 Nov 2017 19:46:57 +0000 Subject: [PATCH 216/377] Declaring virtual functions as pure virtual functions. --- lib/algorithms/iterative/ImplicitlyRestartedLanczos.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 7a0760c9..7b85c095 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -168,8 +168,8 @@ void basisDeflate(const std::vector &_v,const std::vector& eval,co template class ImplicitlyRestartedLanczosTester { public: - virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); - virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); + virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0; + virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0; }; enum IRLdiagonalisation { From 6a15e2e8ef25e54c966d44169c7f2ab4d6d1c1d0 Mon Sep 17 00:00:00 2001 From: pretidav Date: Sun, 12 Nov 2017 14:16:19 +0100 Subject: [PATCH 217/377] Added WilsonTwoIndexAntiSymmImpl instantiation in WilsonKernelsHand.cc (shoud not be necessary) --- lib/qcd/action/fermion/WilsonKernelsHand.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index 90e6cb9b..aa6b5f6b 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -946,4 +946,6 @@ INSTANTIATE_THEM(DomainWallVec5dImplFH); INSTANTIATE_THEM(DomainWallVec5dImplDF); INSTANTIATE_THEM(ZDomainWallVec5dImplFH); INSTANTIATE_THEM(ZDomainWallVec5dImplDF); +INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplF); +INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplD); }} From 1f1d77b01a308a7716d429b09bb0ffa01cd7f835 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 14 Nov 2017 10:01:48 +0000 Subject: [PATCH 218/377] Performance metrics for the Scalar Action force term --- lib/qcd/action/scalar/ScalarImpl.h | 4 ++ .../action/scalar/ScalarInteractionAction.h | 62 ++++++++++++++----- lib/qcd/hmc/GenericHMCrunner.h | 2 +- 3 files changed, 53 insertions(+), 15 deletions(-) diff --git a/lib/qcd/action/scalar/ScalarImpl.h b/lib/qcd/action/scalar/ScalarImpl.h index 650f4d17..55f5049d 100644 --- a/lib/qcd/action/scalar/ScalarImpl.h +++ b/lib/qcd/action/scalar/ScalarImpl.h @@ -168,7 +168,11 @@ class ScalarImplTypes { static inline void update_field(Field &P, Field &U, double ep) { #ifndef USE_FFT_ACCELERATION + double t0=usecond(); U += P * ep; + double t1=usecond(); + double total_time = (t1-t0)/1e6; + std::cout << GridLogIntegrator << "Total time for updating field (s) : " << total_time << std::endl; #else // FFT transform P(x) -> P(p) // divide by (M^2+p^2) M external parameter (how to pass?) diff --git a/lib/qcd/action/scalar/ScalarInteractionAction.h b/lib/qcd/action/scalar/ScalarInteractionAction.h index 3848751d..8738b647 100644 --- a/lib/qcd/action/scalar/ScalarInteractionAction.h +++ b/lib/qcd/action/scalar/ScalarInteractionAction.h @@ -44,18 +44,18 @@ public: INHERIT_FIELD_TYPES(Impl); private: - RealD mass_square; - RealD lambda; - RealD g; - const unsigned int N = Impl::Group::Dimension; + RealD mass_square; + RealD lambda; + RealD g; + const unsigned int N = Impl::Group::Dimension; typedef typename Field::vector_object vobj; typedef CartesianStencil Stencil; SimpleCompressor compressor; int npoint = 2 * Ndim; - std::vector directions; // = {0,1,2,3,0,1,2,3}; // forcing 4 dimensions - std::vector displacements; // = {1,1,1,1, -1,-1,-1,-1}; + std::vector directions; // + std::vector displacements; // public: ScalarInteractionAction(RealD ms, RealD l, RealD gval) : mass_square(ms), lambda(l), g(gval), displacements(2 * Ndim, 0), directions(2 * Ndim, 0) @@ -124,39 +124,55 @@ public: } // NB the trace in the algebra is normalised to 1/2 // minus sign coming from the antihermitian fields - return -(TensorRemove(sum(trace(action)))).real()*N/g; + return -(TensorRemove(sum(trace(action)))).real() * N / g; }; virtual void deriv(const Field &p, Field &force) { + double t0 = usecond(); assert(p._grid->Nd() == Ndim); force = (2. * Ndim + mass_square) * p - 2. * lambda * p * p * p; + double interm_t = usecond(); + // move this outside static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); - phiStencil.HaloExchange(p, compressor); + phiStencil.HaloExchange(p, compressor); + double halo_t = usecond(); + int chunk = 128; //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); + + // inverting the order of the loops slows down the code(! g++ 7) + // cannot try to reduce the number of force writes by factor npoint... + // use cache blocking for (int point = 0; point < npoint; point++) { - parallel_for(int i = 0; i < p._grid->oSites(); i++) - { - const vobj *temp; - vobj temp2; + +#pragma omp parallel +{ int permute_type; StencilEntry *SE; + const vobj *temp; + +#pragma omp for schedule(static, chunk) + for (int i = 0; i < p._grid->oSites(); i++) + { SE = phiStencil.GetEntry(permute_type, point, i); + // prefetch next p? if (SE->_is_local) { temp = &p._odata[SE->_offset]; + if (SE->_permute) { + vobj temp2; permute(temp2, *temp, permute_type); force._odata[i] -= temp2; } else { - force._odata[i] -= *temp; + force._odata[i] -= *temp; // slow part. Dominated by this read/write (BW) } } else @@ -164,9 +180,27 @@ public: force._odata[i] -= phiStencil.CommBuf()[SE->_offset]; } } + } - force *= N/g; } + force *= N / g; + + double t1 = usecond(); + double total_time = (t1 - t0) / 1e6; + double interm_time = (interm_t - t0) / 1e6; + double halo_time = (halo_t - interm_t) / 1e6; + double stencil_time = (t1 - halo_t) / 1e6; + std::cout << GridLogIntegrator << "Total time for force computation (s) : " << total_time << std::endl; + std::cout << GridLogIntegrator << "Intermediate time for force computation (s): " << interm_time << std::endl; + std::cout << GridLogIntegrator << "Halo time in force computation (s) : " << halo_time << std::endl; + std::cout << GridLogIntegrator << "Stencil time in force computation (s) : " << stencil_time << std::endl; + double flops = p._grid->gSites() * (14 * N * N * N + 18 * N * N + 2); + double flops_no_stencil = p._grid->gSites() * (14 * N * N * N + 6 * N * N + 2); + double Gflops = flops / (total_time * 1e9); + double Gflops_no_stencil = flops_no_stencil / (interm_time * 1e9); + std::cout << GridLogIntegrator << "Flops: " << flops << " - Gflop/s : " << Gflops << std::endl; + std::cout << GridLogIntegrator << "Flops NS: " << flops_no_stencil << " - Gflop/s NS: " << Gflops_no_stencil << std::endl; +} }; } // namespace Grid diff --git a/lib/qcd/hmc/GenericHMCrunner.h b/lib/qcd/hmc/GenericHMCrunner.h index 4f6c1af0..26fec3d5 100644 --- a/lib/qcd/hmc/GenericHMCrunner.h +++ b/lib/qcd/hmc/GenericHMCrunner.h @@ -211,7 +211,7 @@ typedef HMCWrapperTemplate ScalarAdjGenericHMCRunner; template -using ScalarNxNAdjGenericHMCRunner = HMCWrapperTemplate < ScalarNxNAdjImplR, MinimumNorm2, ScalarNxNMatrixFields >; +using ScalarNxNAdjGenericHMCRunner = HMCWrapperTemplate < ScalarNxNAdjImplR, ForceGradient, ScalarNxNMatrixFields >; } // namespace QCD } // namespace Grid From 94b8fb56862289c0663453c0e2d82fa6da310f38 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 19 Nov 2017 01:39:04 +0000 Subject: [PATCH 219/377] Debug in progress --- lib/communicator/Communicator_base.cc | 47 ++++++++++++------- lib/communicator/Communicator_mpi3.cc | 2 +- lib/lattice/Lattice_transfer.h | 67 +++++++++++++++++++++++++++ tests/solver/Test_dwf_mrhs_cg_mpi.cc | 6 +-- 4 files changed, 102 insertions(+), 20 deletions(-) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 531dd358..223b07fd 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -134,8 +134,18 @@ void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) { _ndimension = processors.size(); - assert(_ndimension = parent._ndimension); - + + int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension); + std::vector parent_processor_coor(_ndimension,0); + std::vector parent_processors (_ndimension,1); + + // Can make 5d grid from 4d etc... + int pad = _ndimension-parent_ndimension; + for(int d=0;d &processors, std::vector ssize(_ndimension); // coor of split within parent for(int d=0;d<_ndimension;d++){ - ccoor[d] = parent._processor_coor[d] % processors[d]; - scoor[d] = parent._processor_coor[d] / processors[d]; - ssize[d] = parent._processors[d] / processors[d]; + ccoor[d] = parent_processor_coor[d] % processors[d]; + scoor[d] = parent_processor_coor[d] / processors[d]; + ssize[d] = parent_processors[d] / processors[d]; } int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms // Mpi uses the reverse Lexico convention to us @@ -166,38 +176,34 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, MPI_Comm comm_split; if ( Nchild > 1 ) { - /* std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< &processors, // Set up from the new split communicator ////////////////////////////////////////////////////////////////////////////////////////////////////// InitFromMPICommunicator(processors,comm_split); + + std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl; + for(int d=0;d &out, const Lattice &in){ // NB: Easiest to programme if keep in lex order. // ///////////////////////////////////////////////////////// +/* +[0,0,0,0,0] S {V<4>{V<3>{(0,0),(0,0),(0,0)},V<3>{(0,0),(0,0),(0,0)},V<3>{(0,0),(0,0),(0,0)},V<3>{(0,0),(0,0),(0,0)}}} +[0,0,0,0,1] S {V<4>{V<3>{(1,0),(1,0),(1,0)},V<3>{(1,0),(1,0),(1,0)},V<3>{(1,0),(1,0),(1,0)},V<3>{(1,0),(1,0),(1,0)}}} +[0,0,0,0,2] S {V<4>{V<3>{(4,0),(4,0),(4,0)},V<3>{(4,0),(4,0),(4,0)},V<3>{(4,0),(4,0),(4,0)},V<3>{(4,0),(4,0),(4,0)}}} +[0,0,0,0,3] S {V<4>{V<3>{(5,0),(5,0),(5,0)},V<3>{(5,0),(5,0),(5,0)},V<3>{(5,0),(5,0),(5,0)},V<3>{(5,0),(5,0),(5,0)}}} +[0,0,0,0,4] S {V<4>{V<3>{(2,0),(2,0),(2,0)},V<3>{(2,0),(2,0),(2,0)},V<3>{(2,0),(2,0),(2,0)},V<3>{(2,0),(2,0),(2,0)}}} +[0,0,0,0,5] S {V<4>{V<3>{(3,0),(3,0),(3,0)},V<3>{(3,0),(3,0),(3,0)},V<3>{(3,0),(3,0),(3,0)},V<3>{(3,0),(3,0),(3,0)}}} +[0,0,0,0,6] S {V<4>{V<3>{(6,0),(6,0),(6,0)},V<3>{(6,0),(6,0),(6,0)},V<3>{(6,0),(6,0),(6,0)},V<3>{(6,0),(6,0),(6,0)}}} +[0,0,0,0,7] S {V<4>{V<3>{(7,0),(7,0),(7,0)},V<3>{(7,0),(7,0),(7,0)},V<3>{(7,0),(7,0),(7,0)},V<3>{(7,0),(7,0),(7,0)}}} +[0,0,0,0,8] S {V<4>{V<3>{(8,0),(8,0),(8,0)},V<3>{(8,0),(8,0),(8,0)},V<3>{(8,0),(8,0),(8,0)},V<3>{(8,0),(8,0),(8,0)}}} +[0,0,0,0,9] S {V<4>{V<3>{(9,0),(9,0),(9,0)},V<3>{(9,0),(9,0),(9,0)},V<3>{(9,0),(9,0),(9,0)},V<3>{(9,0),(9,0),(9,0)}}} +[0,0,0,0,10] S {V<4>{V<3>{(12,0),(12,0),(12,0)},V<3>{(12,0),(12,0),(12,0)},V<3>{(12,0),(12,0),(12,0)},V<3>{(12,0),(12,0),(12,0)}}} +[0,0,0,0,11] S {V<4>{V<3>{(13,0),(13,0),(13,0)},V<3>{(13,0),(13,0),(13,0)},V<3>{(13,0),(13,0),(13,0)},V<3>{(13,0),(13,0),(13,0)}}} +[0,0,0,0,12] S {V<4>{V<3>{(10,0),(10,0),(10,0)},V<3>{(10,0),(10,0),(10,0)},V<3>{(10,0),(10,0),(10,0)},V<3>{(10,0),(10,0),(10,0)}}} +[0,0,0,0,13] S {V<4>{V<3>{(11,0),(11,0),(11,0)},V<3>{(11,0),(11,0),(11,0)},V<3>{(11,0),(11,0),(11,0)},V<3>{(11,0),(11,0),(11,0)}}} +[0,0,0,0,14] S {V<4>{V<3>{(14,0),(14,0),(14,0)},V<3>{(14,0),(14,0),(14,0)},V<3>{(14,0),(14,0),(14,0)},V<3>{(14,0),(14,0),(14,0)}}} +[0,0,0,0,15] S {V<4>{V<3>{(15,0),(15,0),(15,0)},V<3>{(15,0),(15,0),(15,0)},V<3>{(15,0),(15,0),(15,0)},V<3>{(15,0),(15,0),(15,0)}}} + + +Process decomp +[A(0 1) A(2 3) B(0 1) B(2 3)] [ A(4 5) A(6 7) B(4 5) B(6 7)] [ A(8 9) A(10 11) B(8 9) B(10 11)] [A(12 13) A(14 15) B(12 13) B(14 15)] + +A2A(Full) + -- divides M*fL into fP segments of size M*fL/fP = fL/sP + -- total is fP * fL/sP = M * fL + A(0 1) A(4 5) A(8 9) A(12 13) + A(2 3) A(6 7) A(10 11) A(14 15) + B(0 1) B(4 5) B(8 9) B(12 13) + B(2 3) B(6 7) B(10 11) B(14 15) + + +A2A(Split) + A(0 1) A(4 5) A(2 3) A(6 7) + A(8 9) A(12 13) A(10 11) A(14 15) + B(0 1) B(2 3) B(4 5) B(6 7) + B(8 9) B(10 11) B(12 13) B(14 15) + +-------------------- +-- General case +-------------------- +G global lattice +fP - procs +sP - Procs in split grid +M - subdivisions/vectors - M*sP = fP ** constraint 1 +fL = G/fP per node (full) +sL = G/sP per node split + +[ G * M ] total = G*fP/sP. +[ Subdivide fL*M by fP => fL *M / fP = fL/fP *fP/sP = fL/sP ] +-------------------- +-- 1st A2A chunk is fL*M/fP = G/fP *fP/sP /fP = fL/sP +-- Let cL = fL/sP chunk. ( Divide into fP/sP = M chunks ) + +-- node 0 1st cL of node 0,1,... fP-1 ; vector 0 +-- node 1 2nd cL of node 0,1,... fP-1 +-- node 2 3nd cL of node 0,1,... fP-1 +-- node 3 4th cL of node 0,1,... fP-1 +... when node > sP get vector 1 etc... + +-- 2nd A2A (over sP nodes; subdivide the fP into sP chunks of M) +-- node 0 1st cL of node 0M..(1M-1); 2nd cL of node 0M..(1M-1)).. +-- node 1 1st cL of node 1M..(2M-1); 2nd cL of node 1M..(2M-1).. +-- node 2 1st cL of node 2M..(3M-1); 2nd cL of node 2M..(3M-1).. +-- node 3 1st cL of node 3M..(3M-1); 2nd cL of node 2M..(3M-1).. +-- +-- Insert correctly + */ template void Grid_split(std::vector > & full,Lattice & split) { diff --git a/tests/solver/Test_dwf_mrhs_cg_mpi.cc b/tests/solver/Test_dwf_mrhs_cg_mpi.cc index f640edff..d380f91e 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc @@ -95,7 +95,7 @@ int main (int argc, char ** argv) FermionField tmp(FGrid); for(int s=0;s HermOp(Ddwf); MdagMLinearOperator HermOpCk(Dchk); - ConjugateGradient CG((1.0e-5),10000); + ConjugateGradient CG((1.0e-2),10000); s_res = zero; CG(HermOp,s_src,s_res); From f403ab01336b6ec2cdc4260f698a0f5001bb0a3d Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 22 Nov 2017 17:13:09 +0000 Subject: [PATCH 220/377] gitignore update --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d743ee06..399f2f6b 100644 --- a/.gitignore +++ b/.gitignore @@ -93,6 +93,7 @@ build*/* *.xcodeproj/* build.sh .vscode +*.code-workspace # Eigen source # ################ @@ -122,4 +123,3 @@ make-bin-BUCK.sh ##################### lib/qcd/spin/gamma-gen/*.h lib/qcd/spin/gamma-gen/*.cc - From a3fe874a5ba649fecec0a273e4f5a0dd52995a03 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 22 Nov 2017 23:27:19 +0000 Subject: [PATCH 221/377] Hadrons: everything is broken, repairing while implementing the new memory model --- extras/Hadrons/Application.cc | 14 +++ extras/Hadrons/Application.hpp | 2 + extras/Hadrons/Environment.cc | 106 ++++-------------- extras/Hadrons/Environment.hpp | 118 ++++++-------------- extras/Hadrons/Module.hpp | 6 + extras/Hadrons/Modules.hpp | 52 ++++----- extras/Hadrons/Modules/MAction/DWF.hpp | 25 ++--- extras/Hadrons/Modules/MAction/Wilson.hpp | 30 ++--- extras/Hadrons/Modules/MGauge/Unit.cc | 4 +- extras/Hadrons/Modules/MSolver/RBPrecCG.hpp | 27 ++--- extras/Hadrons/modules.inc | 74 ++++++------ 11 files changed, 174 insertions(+), 284 deletions(-) diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index 90ebcfd7..a94b617c 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -316,3 +316,17 @@ void Application::configLoop(void) LOG(Message) << BIG_SEP << " End of measurement " << BIG_SEP << std::endl; env().freeAll(); } + +// memory profile ////////////////////////////////////////////////////////////// +void Application::memoryProfile(void) +{ + auto graph = env().makeModuleGraph(); + auto program = graph.topoSort(); + bool msg; + + msg = HadronsLogMessage.isActive(); + HadronsLogMessage.Active(false); + + HadronsLogMessage.Active(msg); +} + diff --git a/extras/Hadrons/Application.hpp b/extras/Hadrons/Application.hpp index fce9b6eb..8b11b0c7 100644 --- a/extras/Hadrons/Application.hpp +++ b/extras/Hadrons/Application.hpp @@ -101,6 +101,8 @@ public: private: // environment shortcut Environment & env(void) const; + // memory profile + void memoryProfile(void); private: long unsigned int locVol_; std::string parameterFileName_{""}; diff --git a/extras/Hadrons/Environment.cc b/extras/Hadrons/Environment.cc index 0e7a4326..eb0a6f70 100644 --- a/extras/Hadrons/Environment.cc +++ b/extras/Hadrons/Environment.cc @@ -67,6 +67,16 @@ bool Environment::isDryRun(void) const return dryRun_; } +void Environment::memoryProfile(const bool doMemoryProfile) +{ + memoryProfile_ = doMemoryProfile; +} + +bool Environment::doMemoryProfile(void) const +{ + return memoryProfile_; +} + // trajectory number /////////////////////////////////////////////////////////// void Environment::setTrajectory(const unsigned int traj) { @@ -349,10 +359,10 @@ Environment::executeProgram(const std::vector &p) auto it = std::find_if(p.rbegin(), p.rend(), pred); if (it != p.rend()) { - freeProg[p.rend() - it - 1].insert(i); + freeProg[std::distance(p.rend(), it) - 1].insert(i); } } - + // program execution for (unsigned int i = 0; i < p.size(); ++i) { @@ -448,6 +458,7 @@ void Environment::addObject(const std::string name, const int moduleAddress) info.name = name; info.module = moduleAddress; + info.data = nullptr; object_.push_back(std::move(info)); objectAddress_[name] = static_cast(object_.size() - 1); } @@ -457,39 +468,6 @@ void Environment::addObject(const std::string name, const int moduleAddress) } } -void Environment::registerObject(const unsigned int address, - const unsigned int size, const unsigned int Ls) -{ - if (!hasRegisteredObject(address)) - { - if (hasObject(address)) - { - object_[address].size = size; - object_[address].Ls = Ls; - object_[address].isRegistered = true; - } - else - { - HADRON_ERROR("no object with address " + std::to_string(address)); - } - } - else - { - HADRON_ERROR("object with address " + std::to_string(address) - + " already registered"); - } -} - -void Environment::registerObject(const std::string name, - const unsigned int size, const unsigned int Ls) -{ - if (!hasObject(name)) - { - addObject(name); - } - registerObject(getObjectAddress(name), size, Ls); -} - unsigned int Environment::getObjectAddress(const std::string name) const { if (hasObject(name)) @@ -516,7 +494,7 @@ std::string Environment::getObjectName(const unsigned int address) const std::string Environment::getObjectType(const unsigned int address) const { - if (hasRegisteredObject(address)) + if (hasObject(address)) { if (object_[address].type) { @@ -527,11 +505,6 @@ std::string Environment::getObjectType(const unsigned int address) const return ""; } } - else if (hasObject(address)) - { - HADRON_ERROR("object with address " + std::to_string(address) - + " exists but is not registered"); - } else { HADRON_ERROR("no object with address " + std::to_string(address)); @@ -545,15 +518,10 @@ std::string Environment::getObjectType(const std::string name) const Environment::Size Environment::getObjectSize(const unsigned int address) const { - if (hasRegisteredObject(address)) + if (hasObject(address)) { return object_[address].size; } - else if (hasObject(address)) - { - HADRON_ERROR("object with address " + std::to_string(address) - + " exists but is not registered"); - } else { HADRON_ERROR("no object with address " + std::to_string(address)); @@ -584,15 +552,10 @@ unsigned int Environment::getObjectModule(const std::string name) const unsigned int Environment::getObjectLs(const unsigned int address) const { - if (hasRegisteredObject(address)) + if (hasObject(address)) { return object_[address].Ls; } - else if (hasObject(address)) - { - HADRON_ERROR("object with address " + std::to_string(address) - + " exists but is not registered"); - } else { HADRON_ERROR("no object with address " + std::to_string(address)); @@ -616,30 +579,6 @@ bool Environment::hasObject(const std::string name) const return ((it != objectAddress_.end()) and hasObject(it->second)); } -bool Environment::hasRegisteredObject(const unsigned int address) const -{ - if (hasObject(address)) - { - return object_[address].isRegistered; - } - else - { - return false; - } -} - -bool Environment::hasRegisteredObject(const std::string name) const -{ - if (hasObject(name)) - { - return hasRegisteredObject(getObjectAddress(name)); - } - else - { - return false; - } -} - bool Environment::hasCreatedObject(const unsigned int address) const { if (hasObject(address)) @@ -680,10 +619,7 @@ Environment::Size Environment::getTotalSize(void) const for (auto &o: object_) { - if (o.isRegistered) - { - size += o.size; - } + size += o.size; } return size; @@ -738,7 +674,7 @@ bool Environment::freeObject(const unsigned int address) { if (!hasOwners(address)) { - if (!isDryRun() and object_[address].isRegistered) + if (!isDryRun()) { LOG(Message) << "Destroying object '" << object_[address].name << "'" << std::endl; @@ -747,10 +683,8 @@ bool Environment::freeObject(const unsigned int address) { object_[p].owners.erase(address); } - object_[address].size = 0; - object_[address].Ls = 0; - object_[address].isRegistered = false; - object_[address].type = nullptr; + object_[address].size = 0; + object_[address].type = nullptr; object_[address].owners.clear(); object_[address].properties.clear(); object_[address].data.reset(nullptr); diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index 13264bd5..b426fb27 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -83,12 +83,12 @@ private: std::string name; ModPt data{nullptr}; std::vector input; + size_t maxAllocated; }; struct ObjInfo { Size size{0}; unsigned int Ls{0}; - bool isRegistered{false}; const std::type_info *type{nullptr}; std::string name; int module{-1}; @@ -99,6 +99,8 @@ public: // dry run void dryRun(const bool isDry); bool isDryRun(void) const; + void memoryProfile(const bool doMemoryProfile); + bool doMemoryProfile(void) const; // trajectory number void setTrajectory(const unsigned int traj); unsigned int getTrajectory(void) const; @@ -143,32 +145,17 @@ public: // general memory management void addObject(const std::string name, const int moduleAddress = -1); - void registerObject(const unsigned int address, - const unsigned int size, - const unsigned int Ls = 1); - void registerObject(const std::string name, - const unsigned int size, - const unsigned int Ls = 1); + template + void createObject(const std::string name, + const unsigned int Ls, + Ts ... args); template - unsigned int lattice4dSize(void) const; - template - void registerLattice(const unsigned int address, - const unsigned int Ls = 1); - template - void registerLattice(const std::string name, - const unsigned int Ls = 1); - template - void setObject(const unsigned int address, T *object); - template - void setObject(const std::string name, T *object); + void createLattice(const std::string name, + const unsigned int Ls = 1); template T * getObject(const unsigned int address) const; template T * getObject(const std::string name) const; - template - T * createLattice(const unsigned int address); - template - T * createLattice(const std::string name); unsigned int getObjectAddress(const std::string name) const; std::string getObjectName(const unsigned int address) const; std::string getObjectType(const unsigned int address) const; @@ -181,8 +168,6 @@ public: unsigned int getObjectLs(const std::string name) const; bool hasObject(const unsigned int address) const; bool hasObject(const std::string name) const; - bool hasRegisteredObject(const unsigned int address) const; - bool hasRegisteredObject(const std::string name) const; bool hasCreatedObject(const unsigned int address) const; bool hasCreatedObject(const std::string name) const; bool isObject5d(const unsigned int address) const; @@ -204,7 +189,7 @@ public: void printContent(void); private: // general - bool dryRun_{false}; + bool dryRun_{false}, memoryProfile_{false}; unsigned int traj_, locVol_; // grids std::vector dim_; @@ -296,56 +281,45 @@ M * Environment::getModule(const std::string name) const return getModule(getModuleAddress(name)); } -template -unsigned int Environment::lattice4dSize(void) const +template +void Environment::createObject(const std::string name, const unsigned int Ls, + Ts ... args) { - return sizeof(typename T::vector_object)/getGrid()->Nsimd(); -} - -template -void Environment::registerLattice(const unsigned int address, - const unsigned int Ls) -{ - createGrid(Ls); - registerObject(address, Ls*lattice4dSize(), Ls); -} - -template -void Environment::registerLattice(const std::string name, const unsigned int Ls) -{ - createGrid(Ls); - registerObject(name, Ls*lattice4dSize(), Ls); -} - -template -void Environment::setObject(const unsigned int address, T *object) -{ - if (hasRegisteredObject(address)) + if (!hasObject(name)) { - object_[address].data.reset(new Holder(object)); - object_[address].type = &typeid(T); + addObject(name); } - else if (hasObject(address)) + + unsigned int address = getObjectAddress(name); + + if (!object_[address].data) { - HADRON_ERROR("object with address " + std::to_string(address) + - " exists but is not registered"); + MemoryStats memStats; + + MemoryProfiler::stats = &memStats; + object_[address].Ls = Ls; + object_[address].data.reset(new Holder(new T(args...))); + object_[address].size = memStats.totalAllocated; + object_[address].type = &typeid(T); } else { - HADRON_ERROR("no object with address " + std::to_string(address)); + HADRON_ERROR("object '" + name + "' already allocated"); } } template -void Environment::setObject(const std::string name, T *object) +void Environment::createLattice(const std::string name, const unsigned int Ls) { - setObject(getObjectAddress(name), object); + GridCartesian *g = getGrid(Ls); + + createObject(name, Ls, g); } template T * Environment::getObject(const unsigned int address) const { - if (hasRegisteredObject(address)) + if (hasObject(address)) { if (auto h = dynamic_cast *>(object_[address].data.get())) { @@ -358,11 +332,6 @@ T * Environment::getObject(const unsigned int address) const "' (has type '" + getObjectType(address) + "')"); } } - else if (hasObject(address)) - { - HADRON_ERROR("object with address " + std::to_string(address) + - " exists but is not registered"); - } else { HADRON_ERROR("no object with address " + std::to_string(address)); @@ -375,26 +344,10 @@ T * Environment::getObject(const std::string name) const return getObject(getObjectAddress(name)); } -template -T * Environment::createLattice(const unsigned int address) -{ - GridCartesian *g = getGrid(getObjectLs(address)); - - setObject(address, new T(g)); - - return getObject(address); -} - -template -T * Environment::createLattice(const std::string name) -{ - return createLattice(getObjectAddress(name)); -} - template bool Environment::isObjectOfType(const unsigned int address) const { - if (hasRegisteredObject(address)) + if (hasObject(address)) { if (auto h = dynamic_cast *>(object_[address].data.get())) { @@ -405,11 +358,6 @@ bool Environment::isObjectOfType(const unsigned int address) const return false; } } - else if (hasObject(address)) - { - HADRON_ERROR("object with address " + std::to_string(address) + - " exists but is not registered"); - } else { HADRON_ERROR("no object with address " + std::to_string(address)); diff --git a/extras/Hadrons/Module.hpp b/extras/Hadrons/Module.hpp index 071e254a..5500bf36 100644 --- a/extras/Hadrons/Module.hpp +++ b/extras/Hadrons/Module.hpp @@ -88,6 +88,12 @@ static ns##mod##ModuleRegistrar ns##mod##ModuleRegistrarInstance; #define ARG(...) __VA_ARGS__ +#define mCreateObj(type, name, Ls, ...)\ +env().template createObject(name, Ls, __VA_ARGS__) + +#define mGetObj(type, name)\ +*env().template getObject(name) + /****************************************************************************** * Module class * ******************************************************************************/ diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index e1f06f32..08678671 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -30,31 +30,31 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include #include -#include -#include -#include -#include -#include -#include +// #include +// #include +// #include +// #include +// #include +// #include #include -#include -#include -#include -#include -#include -#include -#include +// #include +// #include +// #include +// #include +// #include +// #include +// #include diff --git a/extras/Hadrons/Modules/MAction/DWF.hpp b/extras/Hadrons/Modules/MAction/DWF.hpp index 78e0916c..a2ed063b 100644 --- a/extras/Hadrons/Modules/MAction/DWF.hpp +++ b/extras/Hadrons/Modules/MAction/DWF.hpp @@ -102,37 +102,30 @@ std::vector TDWF::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// template void TDWF::setup(void) -{ - unsigned int size; - - size = 2*env().template lattice4dSize(); - env().registerObject(getName(), size, par().Ls); -} - -// execution /////////////////////////////////////////////////////////////////// -template -void TDWF::execute(void) { LOG(Message) << "Setting up domain wall fermion matrix with m= " << par().mass << ", M5= " << par().M5 << " and Ls= " << par().Ls << " using gauge field '" << par().gauge << "'" << std::endl; - LOG(Message) << "Fermion boundary conditions: " << par().boundary + LOG(Message) << "Fermion boundary conditions: " << par().boundary << std::endl; env().createGrid(par().Ls); - auto &U = *env().template getObject(par().gauge); + auto &U = mGetObj(LatticeGaugeField, par().gauge); auto &g4 = *env().getGrid(); auto &grb4 = *env().getRbGrid(); auto &g5 = *env().getGrid(par().Ls); auto &grb5 = *env().getRbGrid(par().Ls); std::vector boundary = strToVec(par().boundary); typename DomainWallFermion::ImplParams implParams(boundary); - FMat *fMatPt = new DomainWallFermion(U, g5, grb5, g4, grb4, - par().mass, par().M5, - implParams); - env().setObject(getName(), fMatPt); + mCreateObj(DomainWallFermion, getName(), par().Ls, + U, g5, grb5, g4, grb4, par().mass, par().M5, implParams); } +// execution /////////////////////////////////////////////////////////////////// +template +void TDWF::execute(void) +{} + END_MODULE_NAMESPACE END_HADRONS_NAMESPACE diff --git a/extras/Hadrons/Modules/MAction/Wilson.hpp b/extras/Hadrons/Modules/MAction/Wilson.hpp index aab54245..bc892daf 100644 --- a/extras/Hadrons/Modules/MAction/Wilson.hpp +++ b/extras/Hadrons/Modules/MAction/Wilson.hpp @@ -101,29 +101,23 @@ std::vector TWilson::getOutput(void) template void TWilson::setup(void) { - unsigned int size; - - size = 2*env().template lattice4dSize(); - env().registerObject(getName(), size); + LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass + << " using gauge field '" << par().gauge << "'" << std::endl; + LOG(Message) << "Fermion boundary conditions: " << par().boundary + << std::endl; + auto &U = mGetObj(LatticeGaugeField, par().gauge); + auto &grid = *env().getGrid(); + auto &gridRb = *env().getRbGrid(); + std::vector boundary = strToVec(par().boundary); + typename WilsonFermion::ImplParams implParams(boundary); + mCreateObj(WilsonFermion, getName(), 1, U, grid, gridRb, par().mass, + implParams); } // execution /////////////////////////////////////////////////////////////////// template void TWilson::execute() -{ - LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass - << " using gauge field '" << par().gauge << "'" << std::endl; - LOG(Message) << "Fermion boundary conditions: " << par().boundary - << std::endl; - auto &U = *env().template getObject(par().gauge); - auto &grid = *env().getGrid(); - auto &gridRb = *env().getRbGrid(); - std::vector boundary = strToVec(par().boundary); - typename WilsonFermion::ImplParams implParams(boundary); - FMat *fMatPt = new WilsonFermion(U, grid, gridRb, par().mass, - implParams); - env().setObject(getName(), fMatPt); -} +{} END_MODULE_NAMESPACE diff --git a/extras/Hadrons/Modules/MGauge/Unit.cc b/extras/Hadrons/Modules/MGauge/Unit.cc index 18d75c59..b259b7d5 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.cc +++ b/extras/Hadrons/Modules/MGauge/Unit.cc @@ -57,13 +57,13 @@ std::vector TUnit::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// void TUnit::setup(void) { - env().registerLattice(getName()); + mCreateObj(LatticeGaugeField, getName(), 1, env().getGrid()); } // execution /////////////////////////////////////////////////////////////////// void TUnit::execute(void) { LOG(Message) << "Creating unit gauge configuration" << std::endl; - LatticeGaugeField &U = *env().createLattice(getName()); + auto &U = mGetObj(LatticeGaugeField, getName()); SU3::ColdConfiguration(*env().get4dRng(), U); } diff --git a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp index b1f63a5d..fe6992fc 100644 --- a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp +++ b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp @@ -100,17 +100,12 @@ std::vector TRBPrecCG::getOutput(void) template void TRBPrecCG::setup(void) { - auto Ls = env().getObjectLs(par().action); - - env().registerObject(getName(), 0, Ls); - env().addOwnership(getName(), par().action); -} + LOG(Message) << "setting up Schur red-black preconditioned CG for" + << " action '" << par().action << "' with residual " + << par().residual << std::endl; -// execution /////////////////////////////////////////////////////////////////// -template -void TRBPrecCG::execute(void) -{ - auto &mat = *(env().template getObject(par().action)); + auto Ls = env().getObjectLs(par().action); + auto &mat = mGetObj(FMat, par().action); auto solver = [&mat, this](FermionField &sol, const FermionField &source) { ConjugateGradient cg(par().residual, 10000); @@ -118,13 +113,15 @@ void TRBPrecCG::execute(void) schurSolver(mat, source, sol); }; - - LOG(Message) << "setting up Schur red-black preconditioned CG for" - << " action '" << par().action << "' with residual " - << par().residual << std::endl; - env().setObject(getName(), new SolverFn(solver)); + mCreateObj(SolverFn, getName(), Ls, solver); + env().addOwnership(getName(), par().action); } +// execution /////////////////////////////////////////////////////////////////// +template +void TRBPrecCG::execute(void) +{} + END_MODULE_NAMESPACE END_HADRONS_NAMESPACE diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index fbbb2eb9..63745baf 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -1,42 +1,44 @@ modules_cc =\ - Modules/MContraction/WeakHamiltonianEye.cc \ - Modules/MContraction/WeakHamiltonianNonEye.cc \ - Modules/MContraction/WeakNeutral4ptDisc.cc \ - Modules/MGauge/Load.cc \ - Modules/MGauge/Random.cc \ - Modules/MGauge/StochEm.cc \ - Modules/MGauge/Unit.cc \ - Modules/MScalar/ChargedProp.cc \ - Modules/MScalar/FreeProp.cc + Modules/MGauge/Unit.cc + # Modules/MContraction/WeakHamiltonianEye.cc \ + # Modules/MContraction/WeakHamiltonianNonEye.cc \ + # Modules/MContraction/WeakNeutral4ptDisc.cc \ + # Modules/MGauge/Load.cc \ + # Modules/MGauge/Random.cc \ + # Modules/MGauge/StochEm.cc \ + # Modules/MScalar/ChargedProp.cc \ + # Modules/MScalar/FreeProp.cc modules_hpp =\ Modules/MAction/DWF.hpp \ Modules/MAction/Wilson.hpp \ - Modules/MContraction/Baryon.hpp \ - Modules/MContraction/DiscLoop.hpp \ - Modules/MContraction/Gamma3pt.hpp \ - Modules/MContraction/Meson.hpp \ - Modules/MContraction/WardIdentity.hpp \ - Modules/MContraction/WeakHamiltonian.hpp \ - Modules/MContraction/WeakHamiltonianEye.hpp \ - Modules/MContraction/WeakHamiltonianNonEye.hpp \ - Modules/MContraction/WeakNeutral4ptDisc.hpp \ - Modules/MFermion/GaugeProp.hpp \ - Modules/MGauge/Load.hpp \ - Modules/MGauge/Random.hpp \ - Modules/MGauge/StochEm.hpp \ Modules/MGauge/Unit.hpp \ - Modules/MLoop/NoiseLoop.hpp \ - Modules/MScalar/ChargedProp.hpp \ - Modules/MScalar/FreeProp.hpp \ - Modules/MScalar/Scalar.hpp \ - Modules/MSink/Point.hpp \ - Modules/MSink/Smear.hpp \ - Modules/MSolver/RBPrecCG.hpp \ - Modules/MSource/Point.hpp \ - Modules/MSource/SeqConserved.hpp \ - Modules/MSource/SeqGamma.hpp \ - Modules/MSource/Wall.hpp \ - Modules/MSource/Z2.hpp \ - Modules/MUtilities/TestSeqConserved.hpp \ - Modules/MUtilities/TestSeqGamma.hpp + Modules/MSolver/RBPrecCG.hpp + + # Modules/MContraction/Baryon.hpp \ + # Modules/MContraction/DiscLoop.hpp \ + # Modules/MContraction/Gamma3pt.hpp \ + # Modules/MContraction/Meson.hpp \ + # Modules/MContraction/WardIdentity.hpp \ + # Modules/MContraction/WeakHamiltonian.hpp \ + # Modules/MContraction/WeakHamiltonianEye.hpp \ + # Modules/MContraction/WeakHamiltonianNonEye.hpp \ + # Modules/MContraction/WeakNeutral4ptDisc.hpp \ + # Modules/MFermion/GaugeProp.hpp \ + # Modules/MGauge/Load.hpp \ + # Modules/MGauge/Random.hpp \ + # Modules/MGauge/StochEm.hpp \ + # Modules/MLoop/NoiseLoop.hpp \ + # Modules/MScalar/ChargedProp.hpp \ + # Modules/MScalar/FreeProp.hpp \ + # Modules/MScalar/Scalar.hpp \ + # Modules/MSink/Point.hpp \ + # Modules/MSink/Smear.hpp \ + # Modules/MSolver/RBPrecCG.hpp \ + # Modules/MSource/Point.hpp \ + # Modules/MSource/SeqConserved.hpp \ + # Modules/MSource/SeqGamma.hpp \ + # Modules/MSource/Wall.hpp \ + # Modules/MSource/Z2.hpp \ + # Modules/MUtilities/TestSeqConserved.hpp \ + # Modules/MUtilities/TestSeqGamma.hpp From e55397bc134ead26dbac8e2ef244406a8b9d6a3b Mon Sep 17 00:00:00 2001 From: azusayamaguchi Date: Fri, 24 Nov 2017 14:18:30 +0000 Subject: [PATCH 222/377] Staggerd cg --- lib/algorithms/LinearOperator.h | 20 +++++++++++--------- lib/algorithms/iterative/SchurRedBlack.h | 9 +++++++-- lib/lattice/Lattice_transfer.h | 16 ++++++---------- tests/solver/Test_staggered_cg_schur.cc | 14 ++++++++++++++ 4 files changed, 38 insertions(+), 21 deletions(-) diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index 0fa039c8..26746e6e 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -308,32 +308,34 @@ namespace Grid { public: SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){}; virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + GridLogIterative.TimingMode(1); + std::cout << GridLogIterative << " HermOpAndNorm "< inline void pickCheckerboard(int cb,Lattice &half,const Lattice &full){ half.checkerboard = cb; - int ssh=0; - //parallel_for - for(int ss=0;ssoSites();ss++){ - std::vector coor; + + parallel_for(int ss=0;ssoSites();ss++){ int cbos; - + std::vector coor; full._grid->oCoorFromOindex(coor,ss); cbos=half._grid->CheckerBoard(coor); if (cbos==cb) { + int ssh=half._grid->oIndex(coor); half._odata[ssh] = full._odata[ss]; - ssh++; } } } template inline void setCheckerboard(Lattice &full,const Lattice &half){ int cb = half.checkerboard; - int ssh=0; - //parallel_for - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ std::vector coor; int cbos; @@ -77,8 +73,8 @@ inline void subdivides(GridBase *coarse,GridBase *fine) cbos=half._grid->CheckerBoard(coor); if (cbos==cb) { + int ssh=half._grid->oIndex(coor); full._odata[ss]=half._odata[ssh]; - ssh++; } } } diff --git a/tests/solver/Test_staggered_cg_schur.cc b/tests/solver/Test_staggered_cg_schur.cc index 09044995..a5c25b85 100644 --- a/tests/solver/Test_staggered_cg_schur.cc +++ b/tests/solver/Test_staggered_cg_schur.cc @@ -70,7 +70,21 @@ int main (int argc, char ** argv) ConjugateGradient CG(1.0e-8,10000); SchurRedBlackStaggeredSolve SchurSolver(CG); + double volume=1.0; + for(int mu=0;mu volume * 1146 + double ncall=CG.IterationsToComplete; + double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146 + + std::cout< &processors, ////////////////////////////////////////////////////////////////////////////////////////////////////// InitFromMPICommunicator(processors,comm_split); - std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl; - for(int d=0;d &proc MPI_Comm_rank(communicator,&_processor); MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); - if ( communicator_base != communicator_world ) { + if ( 0 && (communicator_base != communicator_world) ) { std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"< Date: Mon, 27 Nov 2017 12:33:08 +0000 Subject: [PATCH 224/377] Believe split/unsplit works, but need to make pretty --- lib/lattice/Lattice_transfer.h | 201 ++++++++++++++++++++++----------- 1 file changed, 133 insertions(+), 68 deletions(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index dd03fb4f..3d9289d6 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -890,50 +890,85 @@ void Grid_split(std::vector > & full,Lattice & split) if ( ratio[d] != 1 ) { full_grid ->AllToAll(d,alldata,tmpdata); - // std::cout << GridLogMessage << "Grid_split: dim " <_processors[d]<_processors[d] > 1 ) { + alldata=tmpdata; + split_grid->AllToAll(d,alldata,tmpdata); + } - auto rdims = ldims; rdims[d] *= ratio[d]; - auto rsites= lsites*ratio[d]; - for(int v=0;v_processors[d]; + int fP = full_grid->_processors[d]; - for(int r=0;r_processors[d] > 1 ) { - tmpdata = alldata; - split_grid->AllToAll(d,tmpdata,alldata); - } } } vectorizeFromLexOrdArray(alldata,split); @@ -1008,55 +1043,84 @@ void Grid_unsplit(std::vector > & full,Lattice & split) std::vector rcoor(ndim); int nvec = 1; - lsites = split_grid->lSites(); - std::vector ldims = split_grid->_ldimensions; + uint64_t rsites = split_grid->lSites(); + std::vector rdims = split_grid->_ldimensions; - // for(int d=ndim-1;d>=0;d--){ for(int d=0;d_processors[d]; + int fP = full_grid->_processors[d]; - if ( split_grid->_processors[d] > 1 ) { - tmpdata = alldata; - split_grid->AllToAll(d,tmpdata,alldata); - } + int M = ratio[d]; + auto ldims = rdims; ldims[d] /= M; // Decrease local dims by same factor + auto lsites= rsites/M; // Decreases rsites by M - ////////////////////////////////////////// - //Local volume for this dimension is expanded by ratio of processor extents - // Number of vectors is decreased by same factor - // Rearrange to lexico for bigger volume - ////////////////////////////////////////// - auto rsites= lsites/ratio[d]; - auto rdims = ldims; rdims[d]/=ratio[d]; + int fvol = lsites; + int svol = rsites; + int chunk = (nvec*fvol)/sP; + int cL = (nvec*ldims[d])/sP; + + for(int c=0;c= tmpdata.size() ) { - // rsite, rcoor --> smaller local volume - // lsite, lcoor --> bigger original (single node?) volume - // For loop over each site within smaller subvol - for(int rsite=0;rsiteAllToAll(d,tmpdata,alldata); + if ( split_grid->_processors[d] > 1 ) { + split_grid->AllToAll(d,tmpdata,alldata); + tmpdata=alldata; + } + full_grid ->AllToAll(d,tmpdata,alldata); + + rdims[d]/= M; + rsites /= M; + nvec *= M; // Increase nvec by subdivision factor + } } } @@ -1064,12 +1128,13 @@ void Grid_unsplit(std::vector > & full,Lattice & split) for(int v=0;v Date: Mon, 27 Nov 2017 12:34:25 +0000 Subject: [PATCH 225/377] Clean on multinode target after split 1 1 2 4 -> 1 1 2 2 --- tests/solver/Test_dwf_mrhs_cg_mpi.cc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/solver/Test_dwf_mrhs_cg_mpi.cc b/tests/solver/Test_dwf_mrhs_cg_mpi.cc index d380f91e..b3611e01 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc @@ -121,12 +121,12 @@ int main (int argc, char ** argv) random(pRNG5,src[s]); tmp = 100.0*s; src[s] = (src[s] * 0.1) + tmp; - std::cout << " src ["< Date: Mon, 27 Nov 2017 15:10:22 +0000 Subject: [PATCH 226/377] Debug --- tests/solver/Test_dwf_mrhs_cg_mpi.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/solver/Test_dwf_mrhs_cg_mpi.cc b/tests/solver/Test_dwf_mrhs_cg_mpi.cc index b3611e01..06df58c6 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc @@ -173,6 +173,7 @@ int main (int argc, char ** argv) // std::cout << " s_src \n" << s_src << std::endl; // std::cout << " s_src_tmp \n" << s_src_tmp << std::endl; // std::cout << " s_src_diff \n" << s_src_diff << std::endl; + // exit(0); #endif /////////////////////////////////////////////////////////////// From 28ceacec45e052578cf6b4fa1f394c87f417d1d2 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 27 Nov 2017 15:13:29 +0000 Subject: [PATCH 227/377] Split/Unsplit working --- lib/lattice/Lattice_transfer.h | 275 ++++++++------------------------- 1 file changed, 65 insertions(+), 210 deletions(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 78b80ba4..c7e2a507 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -694,30 +694,6 @@ void precisionChange(Lattice &out, const Lattice &in){ //////////////////////////////////////////////////////////////////////////////// // Communicate between grids //////////////////////////////////////////////////////////////////////////////// -// -// All to all plan -// -// Subvolume on fine grid is v. Vectors a,b,c,d -// -/////////////////////////////////////////////////////////////////////////////////////////////////////////// -// SIMPLEST CASE: -/////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Mesh of nodes (2) ; subdivide to 1 subdivisions -// -// Lex ord: -// N0 va0 vb0 N1 va1 vb1 -// -// For each dimension do an all to all -// -// full AllToAll(0) -// N0 va0 va1 N1 vb0 vb1 -// -// REARRANGE -// N0 va01 N1 vb01 -// -// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract". -// NB: Easiest to programme if keep in lex order. -// /////////////////////////////////////////////////////////////////////////////////////////////////////////// // SIMPLE CASE: /////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -751,75 +727,16 @@ void precisionChange(Lattice &out, const Lattice &in){ // // Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract". // NB: Easiest to programme if keep in lex order. -// -///////////////////////////////////////////////////////// /* - -[0,0,0,0,0] S {V<4>{V<3>{(0,0),(0,0),(0,0)},V<3>{(0,0),(0,0),(0,0)},V<3>{(0,0),(0,0),(0,0)},V<3>{(0,0),(0,0),(0,0)}}} -[0,0,0,0,1] S {V<4>{V<3>{(1,0),(1,0),(1,0)},V<3>{(1,0),(1,0),(1,0)},V<3>{(1,0),(1,0),(1,0)},V<3>{(1,0),(1,0),(1,0)}}} -[0,0,0,0,2] S {V<4>{V<3>{(4,0),(4,0),(4,0)},V<3>{(4,0),(4,0),(4,0)},V<3>{(4,0),(4,0),(4,0)},V<3>{(4,0),(4,0),(4,0)}}} -[0,0,0,0,3] S {V<4>{V<3>{(5,0),(5,0),(5,0)},V<3>{(5,0),(5,0),(5,0)},V<3>{(5,0),(5,0),(5,0)},V<3>{(5,0),(5,0),(5,0)}}} -[0,0,0,0,4] S {V<4>{V<3>{(2,0),(2,0),(2,0)},V<3>{(2,0),(2,0),(2,0)},V<3>{(2,0),(2,0),(2,0)},V<3>{(2,0),(2,0),(2,0)}}} -[0,0,0,0,5] S {V<4>{V<3>{(3,0),(3,0),(3,0)},V<3>{(3,0),(3,0),(3,0)},V<3>{(3,0),(3,0),(3,0)},V<3>{(3,0),(3,0),(3,0)}}} -[0,0,0,0,6] S {V<4>{V<3>{(6,0),(6,0),(6,0)},V<3>{(6,0),(6,0),(6,0)},V<3>{(6,0),(6,0),(6,0)},V<3>{(6,0),(6,0),(6,0)}}} -[0,0,0,0,7] S {V<4>{V<3>{(7,0),(7,0),(7,0)},V<3>{(7,0),(7,0),(7,0)},V<3>{(7,0),(7,0),(7,0)},V<3>{(7,0),(7,0),(7,0)}}} -[0,0,0,0,8] S {V<4>{V<3>{(8,0),(8,0),(8,0)},V<3>{(8,0),(8,0),(8,0)},V<3>{(8,0),(8,0),(8,0)},V<3>{(8,0),(8,0),(8,0)}}} -[0,0,0,0,9] S {V<4>{V<3>{(9,0),(9,0),(9,0)},V<3>{(9,0),(9,0),(9,0)},V<3>{(9,0),(9,0),(9,0)},V<3>{(9,0),(9,0),(9,0)}}} -[0,0,0,0,10] S {V<4>{V<3>{(12,0),(12,0),(12,0)},V<3>{(12,0),(12,0),(12,0)},V<3>{(12,0),(12,0),(12,0)},V<3>{(12,0),(12,0),(12,0)}}} -[0,0,0,0,11] S {V<4>{V<3>{(13,0),(13,0),(13,0)},V<3>{(13,0),(13,0),(13,0)},V<3>{(13,0),(13,0),(13,0)},V<3>{(13,0),(13,0),(13,0)}}} -[0,0,0,0,12] S {V<4>{V<3>{(10,0),(10,0),(10,0)},V<3>{(10,0),(10,0),(10,0)},V<3>{(10,0),(10,0),(10,0)},V<3>{(10,0),(10,0),(10,0)}}} -[0,0,0,0,13] S {V<4>{V<3>{(11,0),(11,0),(11,0)},V<3>{(11,0),(11,0),(11,0)},V<3>{(11,0),(11,0),(11,0)},V<3>{(11,0),(11,0),(11,0)}}} -[0,0,0,0,14] S {V<4>{V<3>{(14,0),(14,0),(14,0)},V<3>{(14,0),(14,0),(14,0)},V<3>{(14,0),(14,0),(14,0)},V<3>{(14,0),(14,0),(14,0)}}} -[0,0,0,0,15] S {V<4>{V<3>{(15,0),(15,0),(15,0)},V<3>{(15,0),(15,0),(15,0)},V<3>{(15,0),(15,0),(15,0)},V<3>{(15,0),(15,0),(15,0)}}} - - -Process decomp -[A(0 1) A(2 3) B(0 1) B(2 3)] [ A(4 5) A(6 7) B(4 5) B(6 7)] [ A(8 9) A(10 11) B(8 9) B(10 11)] [A(12 13) A(14 15) B(12 13) B(14 15)] - -A2A(Full) - -- divides M*fL into fP segments of size M*fL/fP = fL/sP - -- total is fP * fL/sP = M * fL - A(0 1) A(4 5) A(8 9) A(12 13) - A(2 3) A(6 7) A(10 11) A(14 15) - B(0 1) B(4 5) B(8 9) B(12 13) - B(2 3) B(6 7) B(10 11) B(14 15) - - -A2A(Split) - A(0 1) A(4 5) A(2 3) A(6 7) - A(8 9) A(12 13) A(10 11) A(14 15) - B(0 1) B(2 3) B(4 5) B(6 7) - B(8 9) B(10 11) B(12 13) B(14 15) - --------------------- --- General case --------------------- -G global lattice -fP - procs -sP - Procs in split grid -M - subdivisions/vectors - M*sP = fP ** constraint 1 -fL = G/fP per node (full) -sL = G/sP per node split - -[ G * M ] total = G*fP/sP. -[ Subdivide fL*M by fP => fL *M / fP = fL/fP *fP/sP = fL/sP ] --------------------- --- 1st A2A chunk is fL*M/fP = G/fP *fP/sP /fP = fL/sP --- Let cL = fL/sP chunk. ( Divide into fP/sP = M chunks ) - --- node 0 1st cL of node 0,1,... fP-1 ; vector 0 --- node 1 2nd cL of node 0,1,... fP-1 --- node 2 3nd cL of node 0,1,... fP-1 --- node 3 4th cL of node 0,1,... fP-1 -... when node > sP get vector 1 etc... - --- 2nd A2A (over sP nodes; subdivide the fP into sP chunks of M) --- node 0 1st cL of node 0M..(1M-1); 2nd cL of node 0M..(1M-1)).. --- node 1 1st cL of node 1M..(2M-1); 2nd cL of node 1M..(2M-1).. --- node 2 1st cL of node 2M..(3M-1); 2nd cL of node 2M..(3M-1).. --- node 3 1st cL of node 3M..(3M-1); 2nd cL of node 2M..(3M-1).. --- --- Insert correctly + * Let chunk = (fvol*nvec)/sP be size of a chunk. ( Divide lexico vol * nvec into fP/sP = M chunks ) + * + * 2nd A2A (over sP nodes; subdivide the fP into sP chunks of M) + * + * node 0 1st chunk of node 0M..(1M-1); 2nd chunk of node 0M..(1M-1).. data chunk x M x sP = fL / sP * M * sP = fL * M growth + * node 1 1st chunk of node 1M..(2M-1); 2nd chunk of node 1M..(2M-1).. + * node 2 1st chunk of node 2M..(3M-1); 2nd chunk of node 2M..(3M-1).. + * node 3 1st chunk of node 3M..(3M-1); 2nd chunk of node 2M..(3M-1).. + * etc... */ template void Grid_split(std::vector > & full,Lattice & split) @@ -879,7 +796,6 @@ void Grid_split(std::vector > & full,Lattice & split) int nvec = nvector; // Counts down to 1 as we collapse dims std::vector ldims = full_grid->_ldimensions; - std::vector lcoor(ndim); for(int d=ndim-1;d>=0;d--){ @@ -891,73 +807,40 @@ void Grid_split(std::vector > & full,Lattice & split) split_grid->AllToAll(d,alldata,tmpdata); } - /* --- Let chunk = (fL*nvec)/sP chunk. ( Divide into fP/sP = M chunks ) --- --- 2nd A2A (over sP nodes; subdivide the fP into sP chunks of M) --- --- node 0 1st chunk of node 0M..(1M-1); 2nd chunk of node 0M..(1M-1).. data chunk x M x sP = fL / sP * M * sP = fL * M growth --- node 1 1st chunk of node 1M..(2M-1); 2nd chunk of node 1M..(2M-1).. --- node 2 1st chunk of node 2M..(3M-1); 2nd chunk of node 2M..(3M-1).. --- node 3 1st chunk of node 3M..(3M-1); 2nd chunk of node 2M..(3M-1).. --- --- Loop over c = 0..chunk-1 --- Loop over n = 0..M --- Loop over j = 0..sP --- total chunk*M*sP = fL/sP*fP/sP*sP = G/sP = sL --- csite = (c+m*chunk)% --- split into m*chunk+o = lsite*nvec/fP --- Must turn to vec, rsite, - */ - auto rdims = ldims; - int M = ratio[d]; - nvec /= M; // Reduce nvec by subdivision factor - rdims[d] *= M; // increase local dims by same factor + auto M = ratio[d]; auto rsites= lsites*M;// increases rsites by M + nvec /= M; // Reduce nvec by subdivision factor + rdims[d] *= M; // increase local dim by same factor int sP = split_grid->_processors[d]; int fP = full_grid->_processors[d]; int fvol = lsites; - int svol = rsites; - int chunk = (nvec*fvol)/sP; - int cL = (nvec*ldims[d])/sP; - - for(int c=0;c coor(ndim); + Lexicographic::CoorFromIndex(coor, lex_fvol, ldims); + coor[d] += m*ldims[d]; + Lexicographic::IndexFromCoor(coor, lex_r, rdims); + lex_r += lex_vec * rsites; - alldata[rsite] = tmpdata[c+chunk*m+chunk*M*s]; - - if ( 0 - &&(lcoor[0]==0) - &&(lcoor[1]==0) - &&(lcoor[2]==0) - &&(lcoor[3]==0) ) { - - std::cout << GridLogMessage << " SPLIT rcoor[d] = "< > & full,Lattice & split) ///////////////////////////////////////////////////////////////// // Start from split grid and work towards full grid ///////////////////////////////////////////////////////////////// - std::vector lcoor(ndim); - std::vector rcoor(ndim); int nvec = 1; uint64_t rsites = split_grid->lSites(); @@ -1046,77 +927,52 @@ void Grid_unsplit(std::vector > & full,Lattice & split) if ( ratio[d] != 1 ) { - { - int sP = split_grid->_processors[d]; - int fP = full_grid->_processors[d]; + auto M = ratio[d]; - int M = ratio[d]; - auto ldims = rdims; ldims[d] /= M; // Decrease local dims by same factor - auto lsites= rsites/M; // Decreases rsites by M - - int fvol = lsites; - int svol = rsites; - int chunk = (nvec*fvol)/sP; - int cL = (nvec*ldims[d])/sP; + int sP = split_grid->_processors[d]; + int fP = full_grid->_processors[d]; + + auto ldims = rdims; ldims[d] /= M; // Decrease local dims by same factor + auto lsites= rsites/M; // Decreases rsites by M + + int fvol = lsites; + int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol); + { + // Loop over reordered data post A2A for(int c=0;c= tmpdata.size() ) { - - std::cout << "c "< coor(ndim); + Lexicographic::CoorFromIndex(coor, lex_fvol, ldims); + coor[d] += m*ldims[d]; + Lexicographic::IndexFromCoor(coor, lex_r, rdims); + lex_r += lex_vec * rsites; + // LexicoFind coordinate & vector number within split lattice + tmpdata[lex_c] = alldata[lex_r]; } } } - - if ( split_grid->_processors[d] > 1 ) { - split_grid->AllToAll(d,tmpdata,alldata); - tmpdata=alldata; - } - full_grid ->AllToAll(d,tmpdata,alldata); - - rdims[d]/= M; - rsites /= M; - nvec *= M; // Increase nvec by subdivision factor } + + if ( split_grid->_processors[d] > 1 ) { + split_grid->AllToAll(d,tmpdata,alldata); + tmpdata=alldata; + } + full_grid ->AllToAll(d,tmpdata,alldata); + rdims[d]/= M; + rsites /= M; + nvec *= M; // Increase nvec by subdivision factor } } @@ -1129,7 +985,6 @@ void Grid_unsplit(std::vector > & full,Lattice & split) } vectorizeFromLexOrdArray(scalardata,full[v]); } - } } From 514993ed17671607a33d4b23873fd9f136b776e1 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 1 Dec 2017 19:38:23 +0000 Subject: [PATCH 228/377] Hadrons: progress on the interface, genetic algorithm freezing --- extras/Hadrons/Application.cc | 15 ++++-- extras/Hadrons/Environment.cc | 27 +++++++--- extras/Hadrons/Environment.hpp | 40 +++++++-------- extras/Hadrons/Module.hpp | 49 +++++++++++++++++-- extras/Hadrons/Modules.hpp | 8 +-- extras/Hadrons/Modules/MAction/DWF.hpp | 7 +-- extras/Hadrons/Modules/MAction/Wilson.hpp | 7 +-- extras/Hadrons/Modules/MContraction/Meson.hpp | 18 +++---- extras/Hadrons/Modules/MFermion/GaugeProp.hpp | 29 ++++++----- extras/Hadrons/Modules/MGauge/Unit.cc | 4 +- extras/Hadrons/Modules/MSink/Point.hpp | 36 +++++++++----- extras/Hadrons/Modules/MSolver/RBPrecCG.hpp | 4 +- extras/Hadrons/Modules/MSource/Point.hpp | 4 +- extras/Hadrons/modules.inc | 9 ++-- 14 files changed, 164 insertions(+), 93 deletions(-) diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index a94b617c..0a7d0290 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -98,6 +98,8 @@ void Application::run(void) { parseParameterFile(parameterFileName_); } + env().checkGraph(); + env().printContent(); if (!scheduled_) { schedule(); @@ -124,8 +126,14 @@ void Application::parseParameterFile(const std::string parameterFileName) LOG(Message) << "Building application from '" << parameterFileName << "'..." << std::endl; read(reader, "parameters", par); setPar(par); - push(reader, "modules"); - push(reader, "module"); + if (!push(reader, "modules")) + { + HADRON_ERROR("Cannot open node 'modules' in parameter file '" + parameterFileName + "'"); + } + if (!push(reader, "module")) + { + HADRON_ERROR("Cannot open node 'modules/module' in parameter file '" + parameterFileName + "'"); + } do { read(reader, "id", id); @@ -186,6 +194,8 @@ void Application::schedule(void) // build module dependency graph LOG(Message) << "Building module graph..." << std::endl; auto graph = env().makeModuleGraph(); + LOG(Debug) << "Module graph:" << std::endl; + LOG(Debug) << graph << std::endl; auto con = graph.getConnectedComponents(); // constrained topological sort using a genetic algorithm @@ -329,4 +339,3 @@ void Application::memoryProfile(void) HadronsLogMessage.Active(msg); } - diff --git a/extras/Hadrons/Environment.cc b/extras/Hadrons/Environment.cc index eb0a6f70..a6855862 100644 --- a/extras/Hadrons/Environment.cc +++ b/extras/Hadrons/Environment.cc @@ -333,6 +333,17 @@ Graph Environment::makeModuleGraph(void) const return moduleGraph; } +void Environment::checkGraph(void) const +{ + for (auto &o: object_) + { + if (o.module < 0) + { + HADRON_ERROR("object '" + o.name + "' does not have a creator"); + } + } +} + #define BIG_SEP "===============" #define SEP "---------------" #define MEM_MSG(size)\ @@ -346,6 +357,7 @@ Environment::executeProgram(const std::vector &p) bool continueCollect, nothingFreed; // build garbage collection schedule + LOG(Debug) << "Building garbage collection schedule..." << std::endl; freeProg.resize(p.size()); for (unsigned int i = 0; i < object_.size(); ++i) { @@ -359,11 +371,12 @@ Environment::executeProgram(const std::vector &p) auto it = std::find_if(p.rbegin(), p.rend(), pred); if (it != p.rend()) { - freeProg[std::distance(p.rend(), it) - 1].insert(i); + freeProg[std::distance(it, p.rend()) - 1].insert(i); } } // program execution + LOG(Debug) << "Executing program..." << std::endl; for (unsigned int i = 0; i < p.size(); ++i) { // execute module @@ -712,16 +725,16 @@ void Environment::freeAll(void) void Environment::printContent(void) { - LOG(Message) << "Modules: " << std::endl; + LOG(Debug) << "Modules: " << std::endl; for (unsigned int i = 0; i < module_.size(); ++i) { - LOG(Message) << std::setw(4) << i << ": " - << getModuleName(i) << std::endl; + LOG(Debug) << std::setw(4) << i << ": " + << getModuleName(i) << std::endl; } - LOG(Message) << "Objects: " << std::endl; + LOG(Debug) << "Objects: " << std::endl; for (unsigned int i = 0; i < object_.size(); ++i) { - LOG(Message) << std::setw(4) << i << ": " - << getObjectName(i) << std::endl; + LOG(Debug) << std::setw(4) << i << ": " + << getObjectName(i) << std::endl; } } diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index b426fb27..58e035ac 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -76,6 +76,7 @@ public: typedef std::unique_ptr GridRbPt; typedef std::unique_ptr RngPt; typedef std::unique_ptr LatticePt; + enum class Storage {object, cache, temporary}; private: struct ModuleInfo { @@ -88,6 +89,7 @@ private: struct ObjInfo { Size size{0}; + Storage storage{Storage::object}; unsigned int Ls{0}; const std::type_info *type{nullptr}; std::string name; @@ -140,18 +142,17 @@ public: bool hasModule(const unsigned int address) const; bool hasModule(const std::string name) const; Graph makeModuleGraph(void) const; + void checkGraph(void) const; Size executeProgram(const std::vector &p); Size executeProgram(const std::vector &p); // general memory management void addObject(const std::string name, const int moduleAddress = -1); - template + template void createObject(const std::string name, + const Storage storage, const unsigned int Ls, - Ts ... args); - template - void createLattice(const std::string name, - const unsigned int Ls = 1); + P &&pt); template T * getObject(const unsigned int address) const; template @@ -203,6 +204,7 @@ private: // module and related maps std::vector module_; std::map moduleAddress_; + std::string currentModule_{""}; // lattice store std::map lattice_; // object store @@ -281,9 +283,11 @@ M * Environment::getModule(const std::string name) const return getModule(getModuleAddress(name)); } -template -void Environment::createObject(const std::string name, const unsigned int Ls, - Ts ... args) +template +void Environment::createObject(const std::string name, + const Environment::Storage storage, + const unsigned int Ls, + P &&pt) { if (!hasObject(name)) { @@ -296,11 +300,13 @@ void Environment::createObject(const std::string name, const unsigned int Ls, { MemoryStats memStats; - MemoryProfiler::stats = &memStats; - object_[address].Ls = Ls; - object_[address].data.reset(new Holder(new T(args...))); - object_[address].size = memStats.totalAllocated; - object_[address].type = &typeid(T); + MemoryProfiler::stats = &memStats; + object_[address].storage = storage; + object_[address].Ls = Ls; + object_[address].data.reset(new Holder(pt)); + object_[address].size = memStats.totalAllocated; + object_[address].type = &typeid(T); + MemoryProfiler::stats = nullptr; } else { @@ -308,14 +314,6 @@ void Environment::createObject(const std::string name, const unsigned int Ls, } } -template -void Environment::createLattice(const std::string name, const unsigned int Ls) -{ - GridCartesian *g = getGrid(Ls); - - createObject(name, Ls, g); -} - template T * Environment::getObject(const unsigned int address) const { diff --git a/extras/Hadrons/Module.hpp b/extras/Hadrons/Module.hpp index 5500bf36..a0b062df 100644 --- a/extras/Hadrons/Module.hpp +++ b/extras/Hadrons/Module.hpp @@ -87,13 +87,54 @@ public:\ static ns##mod##ModuleRegistrar ns##mod##ModuleRegistrarInstance; #define ARG(...) __VA_ARGS__ +#define MACRO_REDIRECT(arg1, arg2, arg3, macro, ...) macro -#define mCreateObj(type, name, Ls, ...)\ -env().template createObject(name, Ls, __VA_ARGS__) - -#define mGetObj(type, name)\ +#define envGet(type, name)\ *env().template getObject(name) +#define envGetTmp(type, name)\ +*env().template getObject(getName() + "_tmp_" + name) + +#define envIsType(type, name)\ +env().template getObject(name) + +#define envCreate(type, name, Ls, pt)\ +env().template createObject(name, Environment::Storage::object, Ls, pt) + +#define envCreateLat4(type, name)\ +envCreate(type, name, 1, new type(env().getGrid())) + +#define envCreateLat5(type, name, Ls)\ +envCreate(type, name, Ls, new type(env().getGrid(Ls))) + +#define envCreateLat(...)\ +MACRO_REDIRECT(__VA_ARGS__, envCreateLat5, envCreateLat4)(__VA_ARGS__) + +#define envCache(type, name, Ls, pt)\ +env().template createObject(name, Environment::Storage::cache, Ls, pt) + +#define envCacheLat4(type, name)\ +envCache(type, name, 1, new type(env().getGrid())) + +#define envCacheLat5(type, name, Ls)\ +envCache(type, name, Ls, new type(env().getGrid(Ls))) + +#define envCacheLat(...)\ +MACRO_REDIRECT(__VA_ARGS__, envCacheLat5, envCacheLat4)(__VA_ARGS__) + +#define envTmp(type, name, Ls, pt)\ +env().template createObject(getName() + "_tmp_" + name, \ + Environment::Storage::temporary, Ls, pt) + +#define envTmpLat4(type, name)\ +envTmp(type, name, 1, new type(env().getGrid())) + +#define envTmpLat5(type, name, Ls)\ +envTmp(type, name, Ls, new type(env().getGrid(Ls))) + +#define envTmpLat(...)\ +MACRO_REDIRECT(__VA_ARGS__, envTmpLat5, envTmpLat4)(__VA_ARGS__) + /****************************************************************************** * Module class * ******************************************************************************/ diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 08678671..bb574a14 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -33,13 +33,13 @@ See the full license in the file "LICENSE" in the top level distribution directo // #include // #include // #include -// #include +#include // #include // #include // #include // #include // #include -// #include +#include // #include // #include // #include @@ -48,10 +48,10 @@ See the full license in the file "LICENSE" in the top level distribution directo // #include // #include // #include -// #include +#include // #include #include -// #include +#include // #include // #include // #include diff --git a/extras/Hadrons/Modules/MAction/DWF.hpp b/extras/Hadrons/Modules/MAction/DWF.hpp index a2ed063b..7c82fe8b 100644 --- a/extras/Hadrons/Modules/MAction/DWF.hpp +++ b/extras/Hadrons/Modules/MAction/DWF.hpp @@ -110,15 +110,16 @@ void TDWF::setup(void) LOG(Message) << "Fermion boundary conditions: " << par().boundary << std::endl; env().createGrid(par().Ls); - auto &U = mGetObj(LatticeGaugeField, par().gauge); + auto &U = envGet(LatticeGaugeField, par().gauge); auto &g4 = *env().getGrid(); auto &grb4 = *env().getRbGrid(); auto &g5 = *env().getGrid(par().Ls); auto &grb5 = *env().getRbGrid(par().Ls); std::vector boundary = strToVec(par().boundary); typename DomainWallFermion::ImplParams implParams(boundary); - mCreateObj(DomainWallFermion, getName(), par().Ls, - U, g5, grb5, g4, grb4, par().mass, par().M5, implParams); + envCreate(FMat, getName(), par().Ls, + new DomainWallFermion(U, g5, grb5, g4, grb4, par().mass, + par().M5, implParams)); } // execution /////////////////////////////////////////////////////////////////// diff --git a/extras/Hadrons/Modules/MAction/Wilson.hpp b/extras/Hadrons/Modules/MAction/Wilson.hpp index bc892daf..5c334f8d 100644 --- a/extras/Hadrons/Modules/MAction/Wilson.hpp +++ b/extras/Hadrons/Modules/MAction/Wilson.hpp @@ -105,13 +105,14 @@ void TWilson::setup(void) << " using gauge field '" << par().gauge << "'" << std::endl; LOG(Message) << "Fermion boundary conditions: " << par().boundary << std::endl; - auto &U = mGetObj(LatticeGaugeField, par().gauge); + auto &U = envGet(LatticeGaugeField, par().gauge); auto &grid = *env().getGrid(); auto &gridRb = *env().getRbGrid(); std::vector boundary = strToVec(par().boundary); typename WilsonFermion::ImplParams implParams(boundary); - mCreateObj(WilsonFermion, getName(), 1, U, grid, gridRb, par().mass, - implParams); + envCreate(FMat, getName(), 1, new WilsonFermion(U, grid, gridRb, + par().mass, + implParams)); } // execution /////////////////////////////////////////////////////////////////// diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index b71f7c08..ccc6dc55 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -153,7 +153,6 @@ void TMeson::parseGammaString(std::vector &gammaList) } } - // execution /////////////////////////////////////////////////////////////////// #define mesonConnected(q1, q2, gSnk, gSrc) \ (g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2) @@ -180,11 +179,11 @@ void TMeson::execute(void) result[i].gamma_src = gammaList[i].second; result[i].corr.resize(nt); } - if (env().template isObjectOfType(par().q1) and - env().template isObjectOfType(par().q2)) + if (envIsType(SlicedPropagator1, par().q1) and + envIsType(SlicedPropagator2, par().q2)) { - SlicedPropagator1 &q1 = *env().template getObject(par().q1); - SlicedPropagator2 &q2 = *env().template getObject(par().q2); + SlicedPropagator1 &q1 = envGet(SlicedPropagator1, par().q1); + SlicedPropagator2 &q2 = envGet(SlicedPropagator2, par().q2); LOG(Message) << "(propagator already sinked)" << std::endl; for (unsigned int i = 0; i < result.size(); ++i) @@ -200,8 +199,8 @@ void TMeson::execute(void) } else { - PropagatorField1 &q1 = *env().template getObject(par().q1); - PropagatorField2 &q2 = *env().template getObject(par().q2); + PropagatorField1 &q1 = envGet(PropagatorField1, par().q1); + PropagatorField2 &q2 = envGet(PropagatorField2, par().q2); LatticeComplex c(env().getGrid()); LOG(Message) << "(using sink '" << par().sink << "')" << std::endl; @@ -214,15 +213,14 @@ void TMeson::execute(void) ns = env().getModuleNamespace(env().getObjectModule(par().sink)); if (ns == "MSource") { - PropagatorField1 &sink = - *env().template getObject(par().sink); + PropagatorField1 &sink = envGet(PropagatorField1, par().sink); c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink); sliceSum(c, buf, Tp); } else if (ns == "MSink") { - SinkFnScalar &sink = *env().template getObject(par().sink); + SinkFnScalar &sink = envGet(SinkFnScalar, par().sink); c = trace(mesonConnected(q1, q2, gSnk, gSrc)); buf = sink(c); diff --git a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp index 8add9a00..59994d0d 100644 --- a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp +++ b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp @@ -127,10 +127,13 @@ template void TGaugeProp::setup(void) { Ls_ = env().getObjectLs(par().solver); - env().template registerLattice(getName()); + envCreateLat(PropagatorField, getName()); + envTmpLat(FermionField, "source", Ls_); + envTmpLat(FermionField, "sol", Ls_); + envTmpLat(FermionField, "tmp"); if (Ls_ > 1) { - env().template registerLattice(getName() + "_5d", Ls_); + envCreateLat(PropagatorField, getName() + "_5d", Ls_); } } @@ -139,21 +142,18 @@ template void TGaugeProp::execute(void) { LOG(Message) << "Computing quark propagator '" << getName() << "'" - << std::endl; + << std::endl; - FermionField source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)), - tmp(env().getGrid()); + FermionField &source = envGetTmp(FermionField, "source"); + FermionField &sol = envGetTmp(FermionField, "sol"); + FermionField &tmp = envGetTmp(FermionField, "tmp"); std::string propName = (Ls_ == 1) ? getName() : (getName() + "_5d"); - PropagatorField &prop = *env().template createLattice(propName); - PropagatorField &fullSrc = *env().template getObject(par().source); - SolverFn &solver = *env().template getObject(par().solver); - if (Ls_ > 1) - { - env().template createLattice(getName()); - } + PropagatorField &prop = envGet(PropagatorField, propName); + PropagatorField &fullSrc = envGet(PropagatorField, par().source); + SolverFn &solver = envGet(SolverFn, par().solver); LOG(Message) << "Inverting using solver '" << par().solver - << "' on source '" << par().source << "'" << std::endl; + << "' on source '" << par().source << "'" << std::endl; for (unsigned int s = 0; s < Ns; ++s) for (unsigned int c = 0; c < Nc; ++c) { @@ -190,8 +190,7 @@ void TGaugeProp::execute(void) // create 4D propagators from 5D one if necessary if (Ls_ > 1) { - PropagatorField &p4d = - *env().template getObject(getName()); + PropagatorField &p4d = envGet(PropagatorField, getName()); make_4D(sol, tmp, Ls_); FermToProp(p4d, tmp, s, c); } diff --git a/extras/Hadrons/Modules/MGauge/Unit.cc b/extras/Hadrons/Modules/MGauge/Unit.cc index b259b7d5..b3a7d634 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.cc +++ b/extras/Hadrons/Modules/MGauge/Unit.cc @@ -57,13 +57,13 @@ std::vector TUnit::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// void TUnit::setup(void) { - mCreateObj(LatticeGaugeField, getName(), 1, env().getGrid()); + envCreateLat(LatticeGaugeField, getName()); } // execution /////////////////////////////////////////////////////////////////// void TUnit::execute(void) { LOG(Message) << "Creating unit gauge configuration" << std::endl; - auto &U = mGetObj(LatticeGaugeField, getName()); + auto &U = envGet(LatticeGaugeField, getName()); SU3::ColdConfiguration(*env().get4dRng(), U); } diff --git a/extras/Hadrons/Modules/MSink/Point.hpp b/extras/Hadrons/Modules/MSink/Point.hpp index 0761c4c4..b124e2e5 100644 --- a/extras/Hadrons/Modules/MSink/Point.hpp +++ b/extras/Hadrons/Modules/MSink/Point.hpp @@ -65,6 +65,9 @@ public: virtual void setup(void); // execution virtual void execute(void); +private: + bool hasPhase_{false}; + std::string momphName_; }; MODULE_REGISTER_NS(Point, TPoint, MSink); @@ -77,6 +80,7 @@ MODULE_REGISTER_NS(ScalarPoint, TPoint, MSink); template TPoint::TPoint(const std::string name) : Module(name) +, momphName_ (name + "_momph") {} // dependencies/products /////////////////////////////////////////////////////// @@ -100,30 +104,36 @@ std::vector TPoint::getOutput(void) template void TPoint::setup(void) { - unsigned int size; - - size = env().template lattice4dSize(); - env().registerObject(getName(), size); + envTmpLat(LatticeComplex, "coor"); + envCacheLat(LatticeComplex, momphName_); + envCreate(SinkFn, getName(), 1, nullptr); } // execution /////////////////////////////////////////////////////////////////// template void TPoint::execute(void) { - std::vector p = strToVec(par().mom); - LatticeComplex ph(env().getGrid()), coor(env().getGrid()); + std::vector p = strToVec(par().mom); + LatticeComplex &ph = envGet(LatticeComplex, momphName_); Complex i(0.0,1.0); LOG(Message) << "Setting up point sink function for momentum [" << par().mom << "]" << std::endl; - ph = zero; - for(unsigned int mu = 0; mu < env().getNd(); mu++) + + if (!hasPhase_) { - LatticeCoordinate(coor, mu); - ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor; + LatticeComplex &coor = envGetTmp(LatticeComplex, "coor"); + + ph = zero; + for(unsigned int mu = 0; mu < env().getNd(); mu++) + { + LatticeCoordinate(coor, mu); + ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor; + } + ph = exp((Real)(2*M_PI)*i*ph); + hasPhase_ = true; } - ph = exp((Real)(2*M_PI)*i*ph); - auto sink = [ph](const PropagatorField &field) + auto sink = [&ph](const PropagatorField &field) { SlicedPropagator res; PropagatorField tmp = ph*field; @@ -132,7 +142,7 @@ void TPoint::execute(void) return res; }; - env().setObject(getName(), new SinkFn(sink)); + envGet(SinkFn, getName()) = sink; } END_MODULE_NAMESPACE diff --git a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp index fe6992fc..8063d939 100644 --- a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp +++ b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp @@ -105,7 +105,7 @@ void TRBPrecCG::setup(void) << par().residual << std::endl; auto Ls = env().getObjectLs(par().action); - auto &mat = mGetObj(FMat, par().action); + auto &mat = envGet(FMat, par().action); auto solver = [&mat, this](FermionField &sol, const FermionField &source) { ConjugateGradient cg(par().residual, 10000); @@ -113,7 +113,7 @@ void TRBPrecCG::setup(void) schurSolver(mat, source, sol); }; - mCreateObj(SolverFn, getName(), Ls, solver); + envCreate(SolverFn, getName(), Ls, new SolverFn(solver)); env().addOwnership(getName(), par().action); } diff --git a/extras/Hadrons/Modules/MSource/Point.hpp b/extras/Hadrons/Modules/MSource/Point.hpp index 7815e5c1..5e16149e 100644 --- a/extras/Hadrons/Modules/MSource/Point.hpp +++ b/extras/Hadrons/Modules/MSource/Point.hpp @@ -111,7 +111,7 @@ std::vector TPoint::getOutput(void) template void TPoint::setup(void) { - env().template registerLattice(getName()); + envCreateLat(PropagatorField, getName()); } // execution /////////////////////////////////////////////////////////////////// @@ -123,7 +123,7 @@ void TPoint::execute(void) LOG(Message) << "Creating point source at position [" << par().position << "]" << std::endl; - PropagatorField &src = *env().template createLattice(getName()); + PropagatorField &src = envGet(PropagatorField, getName()); id = 1.; src = zero; pokeSite(id, src, position); diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 63745baf..5ce2435f 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -12,13 +12,16 @@ modules_cc =\ modules_hpp =\ Modules/MAction/DWF.hpp \ Modules/MAction/Wilson.hpp \ + Modules/MSink/Point.hpp \ + Modules/MSource/Point.hpp \ Modules/MGauge/Unit.hpp \ - Modules/MSolver/RBPrecCG.hpp + Modules/MSolver/RBPrecCG.hpp \ + Modules/MFermion/GaugeProp.hpp \ + Modules/MContraction/Meson.hpp # Modules/MContraction/Baryon.hpp \ # Modules/MContraction/DiscLoop.hpp \ # Modules/MContraction/Gamma3pt.hpp \ - # Modules/MContraction/Meson.hpp \ # Modules/MContraction/WardIdentity.hpp \ # Modules/MContraction/WeakHamiltonian.hpp \ # Modules/MContraction/WeakHamiltonianEye.hpp \ @@ -32,10 +35,8 @@ modules_hpp =\ # Modules/MScalar/ChargedProp.hpp \ # Modules/MScalar/FreeProp.hpp \ # Modules/MScalar/Scalar.hpp \ - # Modules/MSink/Point.hpp \ # Modules/MSink/Smear.hpp \ # Modules/MSolver/RBPrecCG.hpp \ - # Modules/MSource/Point.hpp \ # Modules/MSource/SeqConserved.hpp \ # Modules/MSource/SeqGamma.hpp \ # Modules/MSource/Wall.hpp \ From 2427a21428b6704a08119a24c60f0e77830c55c7 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 1 Dec 2017 19:44:07 +0000 Subject: [PATCH 229/377] minor serial IO fixes, XML now issues warning when trying to read absent nodes, these becomes --- lib/serialisation/JSON_IO.cc | 2 +- lib/serialisation/MacroMagic.h | 6 ++++- lib/serialisation/XmlIO.cc | 41 ++++++++++++++++++++++------------ lib/serialisation/XmlIO.h | 10 +++++++-- 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/lib/serialisation/JSON_IO.cc b/lib/serialisation/JSON_IO.cc index 99a9cdd6..6a01aa84 100644 --- a/lib/serialisation/JSON_IO.cc +++ b/lib/serialisation/JSON_IO.cc @@ -25,7 +25,7 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include using namespace Grid; using namespace std; diff --git a/lib/serialisation/MacroMagic.h b/lib/serialisation/MacroMagic.h index 774c947f..5df2c780 100644 --- a/lib/serialisation/MacroMagic.h +++ b/lib/serialisation/MacroMagic.h @@ -125,7 +125,11 @@ static inline void write(Writer &WR,const std::string &s, const cname &obj){ }\ template \ static inline void read(Reader &RD,const std::string &s, cname &obj){ \ - push(RD,s);\ + if (!push(RD,s))\ + {\ + std::cout << Grid::GridLogWarning << "IO: Cannot open node '" << s << "'" << std::endl;\ + return;\ + };\ GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_READ_MEMBER,__VA_ARGS__)) \ pop(RD);\ }\ diff --git a/lib/serialisation/XmlIO.cc b/lib/serialisation/XmlIO.cc index 260611a5..8ac7422c 100644 --- a/lib/serialisation/XmlIO.cc +++ b/lib/serialisation/XmlIO.cc @@ -100,13 +100,16 @@ XmlReader::XmlReader(const string &fileName,string toplev) : fileName_(fileName) bool XmlReader::push(const string &s) { + if (node_.child(s.c_str())) + { + node_ = node_.child(s.c_str()); - if (node_.child(s.c_str()) == NULL ) + return true; + } + else + { return false; - - node_ = node_.child(s.c_str()); - return true; - + } } void XmlReader::pop(void) @@ -117,20 +120,30 @@ void XmlReader::pop(void) bool XmlReader::nextElement(const std::string &s) { if (node_.next_sibling(s.c_str())) - { - node_ = node_.next_sibling(s.c_str()); - - return true; - } + { + node_ = node_.next_sibling(s.c_str()); + + return true; + } else - { - return false; - } + { + return false; + } } template <> void XmlReader::readDefault(const string &s, string &output) { - output = node_.child(s.c_str()).first_child().value(); + if (node_.child(s.c_str())) + { + output = node_.child(s.c_str()).first_child().value(); + } + else + { + std::cout << GridLogWarning << "XML: cannot open node '" << s << "'"; + std::cout << std::endl; + + output = ""; + } } diff --git a/lib/serialisation/XmlIO.h b/lib/serialisation/XmlIO.h index fcdbf1e4..e37eb8d9 100644 --- a/lib/serialisation/XmlIO.h +++ b/lib/serialisation/XmlIO.h @@ -39,6 +39,7 @@ Author: paboyle #include #include +#include namespace Grid { @@ -119,7 +120,6 @@ namespace Grid std::string buf; readDefault(s, buf); - // std::cout << s << " " << buf << std::endl; fromString(output, buf); } @@ -132,7 +132,13 @@ namespace Grid std::string buf; unsigned int i = 0; - push(s); + if (!push(s)) + { + std::cout << GridLogWarning << "XML: cannot open node '" << s << "'"; + std::cout << std::endl; + + return; + } while (node_.child("elem")) { output.resize(i + 1); From 2a9ebddad59116151e6db2a0bc8cdbf53dd5741c Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Sun, 3 Dec 2017 19:45:15 +0100 Subject: [PATCH 230/377] Hadrons: scheduler offline, minimal code working again --- extras/Hadrons/Application.cc | 93 ++++++++++--------- extras/Hadrons/Module.hpp | 4 +- extras/Hadrons/Modules/MContraction/Meson.hpp | 4 +- 3 files changed, 51 insertions(+), 50 deletions(-) diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index 0a7d0290..0860437b 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -182,7 +182,7 @@ GeneticScheduler::ObjFunc memPeak = \ memPeak = env().executeProgram(program);\ env().dryRun(false);\ env().freeAll();\ - HadronsLogMessage.Active(true);\ + HadronsLogMessage.Active(msg);\ \ return memPeak;\ } @@ -199,58 +199,59 @@ void Application::schedule(void) auto con = graph.getConnectedComponents(); // constrained topological sort using a genetic algorithm - LOG(Message) << "Scheduling computation..." << std::endl; - LOG(Message) << " #module= " << graph.size() << std::endl; - LOG(Message) << " population size= " << par_.genetic.popSize << std::endl; - LOG(Message) << " max. generation= " << par_.genetic.maxGen << std::endl; - LOG(Message) << " max. cst. generation= " << par_.genetic.maxCstGen << std::endl; - LOG(Message) << " mutation rate= " << par_.genetic.mutationRate << std::endl; + // LOG(Message) << "Scheduling computation..." << std::endl; + // LOG(Message) << " #module= " << graph.size() << std::endl; + // LOG(Message) << " population size= " << par_.genetic.popSize << std::endl; + // LOG(Message) << " max. generation= " << par_.genetic.maxGen << std::endl; + // LOG(Message) << " max. cst. generation= " << par_.genetic.maxCstGen << std::endl; + // LOG(Message) << " mutation rate= " << par_.genetic.mutationRate << std::endl; - unsigned int k = 0, gen, prevPeak, nCstPeak = 0; - std::random_device rd; - GeneticScheduler::Parameters par; + // unsigned int k = 0, gen, prevPeak, nCstPeak = 0; + // std::random_device rd; + // GeneticScheduler::Parameters par; - par.popSize = par_.genetic.popSize; - par.mutationRate = par_.genetic.mutationRate; - par.seed = rd(); - memPeak_ = 0; - CartesianCommunicator::BroadcastWorld(0, &(par.seed), sizeof(par.seed)); + // par.popSize = par_.genetic.popSize; + // par.mutationRate = par_.genetic.mutationRate; + // par.seed = rd(); + // memPeak_ = 0; + // CartesianCommunicator::BroadcastWorld(0, &(par.seed), sizeof(par.seed)); for (unsigned int i = 0; i < con.size(); ++i) { - GeneticScheduler scheduler(con[i], memPeak, par); + // GeneticScheduler scheduler(con[i], memPeak, par); - gen = 0; - do - { - LOG(Debug) << "Generation " << gen << ":" << std::endl; - scheduler.nextGeneration(); - if (gen != 0) - { - if (prevPeak == scheduler.getMinValue()) - { - nCstPeak++; - } - else - { - nCstPeak = 0; - } - } + // gen = 0; + // do + // { + // LOG(Debug) << "Generation " << gen << ":" << std::endl; + // scheduler.nextGeneration(); + // if (gen != 0) + // { + // if (prevPeak == scheduler.getMinValue()) + // { + // nCstPeak++; + // } + // else + // { + // nCstPeak = 0; + // } + // } - prevPeak = scheduler.getMinValue(); - if (gen % 10 == 0) - { - LOG(Iterative) << "Generation " << gen << ": " - << MEM_MSG(scheduler.getMinValue()) << std::endl; - } + // prevPeak = scheduler.getMinValue(); + // if (gen % 10 == 0) + // { + // LOG(Iterative) << "Generation " << gen << ": " + // << MEM_MSG(scheduler.getMinValue()) << std::endl; + // } - gen++; - } while ((gen < par_.genetic.maxGen) - and (nCstPeak < par_.genetic.maxCstGen)); - auto &t = scheduler.getMinSchedule(); - if (scheduler.getMinValue() > memPeak_) - { - memPeak_ = scheduler.getMinValue(); - } + // gen++; + // } while ((gen < par_.genetic.maxGen) + // and (nCstPeak < par_.genetic.maxCstGen)); + // auto &t = scheduler.getMinSchedule(); + // if (scheduler.getMinValue() > memPeak_) + // { + // memPeak_ = scheduler.getMinValue(); + // } + auto t = con[i].topoSort(); for (unsigned int j = 0; j < t.size(); ++j) { program_.push_back(t[j]); diff --git a/extras/Hadrons/Module.hpp b/extras/Hadrons/Module.hpp index a0b062df..a9525029 100644 --- a/extras/Hadrons/Module.hpp +++ b/extras/Hadrons/Module.hpp @@ -95,8 +95,8 @@ static ns##mod##ModuleRegistrar ns##mod##ModuleRegistrarInstance; #define envGetTmp(type, name)\ *env().template getObject(getName() + "_tmp_" + name) -#define envIsType(type, name)\ -env().template getObject(name) +#define envHasType(type, name)\ +env().template isObjectOfType(name) #define envCreate(type, name, Ls, pt)\ env().template createObject(name, Environment::Storage::object, Ls, pt) diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index ccc6dc55..34127da3 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -179,8 +179,8 @@ void TMeson::execute(void) result[i].gamma_src = gammaList[i].second; result[i].corr.resize(nt); } - if (envIsType(SlicedPropagator1, par().q1) and - envIsType(SlicedPropagator2, par().q2)) + if (envHasType(SlicedPropagator1, par().q1) and + envHasType(SlicedPropagator2, par().q2)) { SlicedPropagator1 &q1 = envGet(SlicedPropagator1, par().q1); SlicedPropagator2 &q2 = envGet(SlicedPropagator2, par().q2); From 624246409cc769715c74665d876a4cb4038a9693 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Sun, 3 Dec 2017 19:46:18 +0100 Subject: [PATCH 231/377] Hadrons: module setup/execute protected to forbid user to bypass execution control --- extras/Hadrons/Module.hpp | 5 +++-- extras/Hadrons/Modules/MAction/DWF.hpp | 1 + extras/Hadrons/Modules/MAction/Wilson.hpp | 1 + extras/Hadrons/Modules/MContraction/Baryon.hpp | 1 + extras/Hadrons/Modules/MContraction/DiscLoop.hpp | 1 + extras/Hadrons/Modules/MContraction/Gamma3pt.hpp | 1 + extras/Hadrons/Modules/MContraction/Meson.hpp | 1 + extras/Hadrons/Modules/MContraction/WardIdentity.hpp | 1 + extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp | 4 +++- extras/Hadrons/Modules/MFermion/GaugeProp.hpp | 1 + extras/Hadrons/Modules/MGauge/Load.hpp | 1 + extras/Hadrons/Modules/MGauge/Random.hpp | 1 + extras/Hadrons/Modules/MGauge/StochEm.hpp | 1 + extras/Hadrons/Modules/MGauge/Unit.hpp | 1 + extras/Hadrons/Modules/MLoop/NoiseLoop.hpp | 1 + extras/Hadrons/Modules/MScalar/ChargedProp.hpp | 1 + extras/Hadrons/Modules/MScalar/FreeProp.hpp | 1 + extras/Hadrons/Modules/MSink/Point.hpp | 1 + extras/Hadrons/Modules/MSink/Smear.hpp | 1 + extras/Hadrons/Modules/MSolver/RBPrecCG.hpp | 1 + extras/Hadrons/Modules/MSource/Point.hpp | 1 + extras/Hadrons/Modules/MSource/SeqConserved.hpp | 1 + extras/Hadrons/Modules/MSource/SeqGamma.hpp | 1 + extras/Hadrons/Modules/MSource/Wall.hpp | 1 + extras/Hadrons/Modules/MSource/Z2.hpp | 1 + extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp | 1 + extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp | 1 + 27 files changed, 31 insertions(+), 3 deletions(-) diff --git a/extras/Hadrons/Module.hpp b/extras/Hadrons/Module.hpp index a9525029..017a9172 100644 --- a/extras/Hadrons/Module.hpp +++ b/extras/Hadrons/Module.hpp @@ -157,10 +157,11 @@ public: // parse parameters virtual void parseParameters(XmlReader &reader, const std::string name) = 0; virtual void saveParameters(XmlWriter &writer, const std::string name) = 0; - // setup - virtual void setup(void) {}; // execution void operator()(void); +protected: + // setup + virtual void setup(void) {}; virtual void execute(void) = 0; private: std::string name_; diff --git a/extras/Hadrons/Modules/MAction/DWF.hpp b/extras/Hadrons/Modules/MAction/DWF.hpp index 7c82fe8b..36c70073 100644 --- a/extras/Hadrons/Modules/MAction/DWF.hpp +++ b/extras/Hadrons/Modules/MAction/DWF.hpp @@ -65,6 +65,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MAction/Wilson.hpp b/extras/Hadrons/Modules/MAction/Wilson.hpp index 5c334f8d..7fe1f44e 100644 --- a/extras/Hadrons/Modules/MAction/Wilson.hpp +++ b/extras/Hadrons/Modules/MAction/Wilson.hpp @@ -63,6 +63,7 @@ public: // dependencies/products virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MContraction/Baryon.hpp b/extras/Hadrons/Modules/MContraction/Baryon.hpp index 78bde5a2..da927391 100644 --- a/extras/Hadrons/Modules/MContraction/Baryon.hpp +++ b/extras/Hadrons/Modules/MContraction/Baryon.hpp @@ -72,6 +72,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // execution virtual void execute(void); }; diff --git a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp index 4f782cd3..f8da3943 100644 --- a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp +++ b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp @@ -68,6 +68,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp index 162ab786..a8653186 100644 --- a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp +++ b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp @@ -99,6 +99,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 34127da3..31640b7c 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -97,6 +97,7 @@ public: virtual std::vector getInput(void); virtual std::vector getOutput(void); virtual void parseGammaString(std::vector &gammaList); +protected: // execution virtual void execute(void); }; diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index 8a56e0eb..a298c1a1 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -74,6 +74,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp index 302b207e..7df40370 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp @@ -100,11 +100,13 @@ public:\ /* dependency relation */ \ virtual std::vector getInput(void);\ virtual std::vector getOutput(void);\ +public:\ + std::vector VA_label = {"V", "A"};\ +protected:\ /* setup */ \ virtual void setup(void);\ /* execution */ \ virtual void execute(void);\ - std::vector VA_label = {"V", "A"};\ };\ MODULE_REGISTER_NS(modname, T##modname, MContraction); diff --git a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp index 59994d0d..8529825b 100644 --- a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp +++ b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp @@ -85,6 +85,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MGauge/Load.hpp b/extras/Hadrons/Modules/MGauge/Load.hpp index 5ff6da0f..a338af79 100644 --- a/extras/Hadrons/Modules/MGauge/Load.hpp +++ b/extras/Hadrons/Modules/MGauge/Load.hpp @@ -58,6 +58,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MGauge/Random.hpp b/extras/Hadrons/Modules/MGauge/Random.hpp index a97d25cf..a07130e4 100644 --- a/extras/Hadrons/Modules/MGauge/Random.hpp +++ b/extras/Hadrons/Modules/MGauge/Random.hpp @@ -51,6 +51,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MGauge/StochEm.hpp b/extras/Hadrons/Modules/MGauge/StochEm.hpp index 12ce9fdc..bacb5172 100644 --- a/extras/Hadrons/Modules/MGauge/StochEm.hpp +++ b/extras/Hadrons/Modules/MGauge/StochEm.hpp @@ -60,6 +60,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MGauge/Unit.hpp b/extras/Hadrons/Modules/MGauge/Unit.hpp index 7cd15ef7..c1650cc7 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.hpp +++ b/extras/Hadrons/Modules/MGauge/Unit.hpp @@ -51,6 +51,7 @@ public: // dependencies/products virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp b/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp index 5d2c4a13..1f40dd48 100644 --- a/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp +++ b/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp @@ -74,6 +74,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp index fbe75c05..ab6a0184 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp @@ -37,6 +37,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.hpp b/extras/Hadrons/Modules/MScalar/FreeProp.hpp index 97cf288a..38372a0c 100644 --- a/extras/Hadrons/Modules/MScalar/FreeProp.hpp +++ b/extras/Hadrons/Modules/MScalar/FreeProp.hpp @@ -33,6 +33,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MSink/Point.hpp b/extras/Hadrons/Modules/MSink/Point.hpp index b124e2e5..853a7c32 100644 --- a/extras/Hadrons/Modules/MSink/Point.hpp +++ b/extras/Hadrons/Modules/MSink/Point.hpp @@ -61,6 +61,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MSink/Smear.hpp b/extras/Hadrons/Modules/MSink/Smear.hpp index c3973d2b..b51d2f49 100644 --- a/extras/Hadrons/Modules/MSink/Smear.hpp +++ b/extras/Hadrons/Modules/MSink/Smear.hpp @@ -62,6 +62,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp index 8063d939..d8a4b95f 100644 --- a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp +++ b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp @@ -62,6 +62,7 @@ public: // dependencies/products virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MSource/Point.hpp b/extras/Hadrons/Modules/MSource/Point.hpp index 5e16149e..b9813688 100644 --- a/extras/Hadrons/Modules/MSource/Point.hpp +++ b/extras/Hadrons/Modules/MSource/Point.hpp @@ -72,6 +72,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MSource/SeqConserved.hpp b/extras/Hadrons/Modules/MSource/SeqConserved.hpp index 86a7dfb9..e8f91be1 100644 --- a/extras/Hadrons/Modules/MSource/SeqConserved.hpp +++ b/extras/Hadrons/Modules/MSource/SeqConserved.hpp @@ -83,6 +83,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MSource/SeqGamma.hpp b/extras/Hadrons/Modules/MSource/SeqGamma.hpp index e2129a46..8f67f8fa 100644 --- a/extras/Hadrons/Modules/MSource/SeqGamma.hpp +++ b/extras/Hadrons/Modules/MSource/SeqGamma.hpp @@ -81,6 +81,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MSource/Wall.hpp b/extras/Hadrons/Modules/MSource/Wall.hpp index 4de37e4d..57dee06d 100644 --- a/extras/Hadrons/Modules/MSource/Wall.hpp +++ b/extras/Hadrons/Modules/MSource/Wall.hpp @@ -73,6 +73,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MSource/Z2.hpp b/extras/Hadrons/Modules/MSource/Z2.hpp index a7f7a3e6..e2cc4f34 100644 --- a/extras/Hadrons/Modules/MSource/Z2.hpp +++ b/extras/Hadrons/Modules/MSource/Z2.hpp @@ -76,6 +76,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp index b085eb8c..f8714d88 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp @@ -80,6 +80,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp index 2799e5d0..9736ab54 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp @@ -64,6 +64,7 @@ public: // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); +protected: // setup virtual void setup(void); // execution From 59aae5f5ec97133f4f9ba80f3d2f718284d9e7f7 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Sun, 3 Dec 2017 19:47:11 +0100 Subject: [PATCH 232/377] Hadrons: garbage collector clean temporaries --- extras/Hadrons/Environment.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/extras/Hadrons/Environment.cc b/extras/Hadrons/Environment.cc index a6855862..27849bd7 100644 --- a/extras/Hadrons/Environment.cc +++ b/extras/Hadrons/Environment.cc @@ -422,6 +422,15 @@ Environment::executeProgram(const std::vector &p) } } } while (continueCollect); + // free temporaries + for (unsigned int i = 0; i < object_.size(); ++i) + { + if ((object_[i].storage == Storage::temporary) + and hasCreatedObject(i)) + { + freeObject(i); + } + } // any remaining objects in step i garbage collection schedule // is scheduled for step i + 1 if (i + 1 < p.size()) @@ -687,7 +696,7 @@ bool Environment::freeObject(const unsigned int address) { if (!hasOwners(address)) { - if (!isDryRun()) + if (!isDryRun() and hasCreatedObject(address)) { LOG(Message) << "Destroying object '" << object_[address].name << "'" << std::endl; From 01f00385a4460ea21e09c2beaa77bd20c7a78550 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Sun, 3 Dec 2017 19:47:40 +0100 Subject: [PATCH 233/377] Hadrons: genetic pair selection based on exponential probability --- extras/Hadrons/GeneticScheduler.hpp | 33 ++++++++++++----------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/extras/Hadrons/GeneticScheduler.hpp b/extras/Hadrons/GeneticScheduler.hpp index d0c52596..3b0195e7 100644 --- a/extras/Hadrons/GeneticScheduler.hpp +++ b/extras/Hadrons/GeneticScheduler.hpp @@ -212,28 +212,23 @@ typename GeneticScheduler::GenePair GeneticScheduler::selectPair(void) std::vector prob; unsigned int ind; Gene *p1, *p2; + const double max = population_.rbegin()->first; + for (auto &c: population_) { - prob.push_back(1./c.first); - } - do - { - double probCpy; - - std::discrete_distribution dis1(prob.begin(), prob.end()); - auto rIt = population_.begin(); - ind = dis1(gen_); - std::advance(rIt, ind); - p1 = &(rIt->second); - probCpy = prob[ind]; - prob[ind] = 0.; - std::discrete_distribution dis2(prob.begin(), prob.end()); - rIt = population_.begin(); - std::advance(rIt, dis2(gen_)); - p2 = &(rIt->second); - prob[ind] = probCpy; - } while (p1 == p2); + prob.push_back(std::exp((c.first-1.)/max)); + } + std::discrete_distribution dis1(prob.begin(), prob.end()); + auto rIt = population_.begin(); + ind = dis1(gen_); + std::advance(rIt, ind); + p1 = &(rIt->second); + prob[ind] = 0.; + std::discrete_distribution dis2(prob.begin(), prob.end()); + rIt = population_.begin(); + std::advance(rIt, dis2(gen_)); + p2 = &(rIt->second); return std::make_pair(p1, p2); } From 3127b52c907d6055d5d8e044a1e3764cea8c9f6f Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Sun, 3 Dec 2017 19:48:34 +0100 Subject: [PATCH 234/377] bootstrap script does not destroy Eigen is working offline --- bootstrap.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bootstrap.sh b/bootstrap.sh index dfb6735d..bdf748df 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -3,9 +3,7 @@ EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' echo "-- deploying Eigen source..." -wget ${EIGEN_URL} --no-check-certificate -./scripts/update_eigen.sh `basename ${EIGEN_URL}` -rm `basename ${EIGEN_URL}` +wget ${EIGEN_URL} --no-check-certificate && ./scripts/update_eigen.sh `basename ${EIGEN_URL}` && rm `basename ${EIGEN_URL}` echo '-- generating Make.inc files...' ./scripts/filelist From ae3b7713a9b2d7c095e9e1dd8de396f506580a3f Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 5 Dec 2017 11:36:31 +0000 Subject: [PATCH 235/377] Cold start doesnt need RNG --- lib/qcd/utils/SUn.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/qcd/utils/SUn.h b/lib/qcd/utils/SUn.h index 8f0c0a7b..cdc6c961 100644 --- a/lib/qcd/utils/SUn.h +++ b/lib/qcd/utils/SUn.h @@ -746,7 +746,7 @@ template } } template - static void ColdConfiguration(GridParallelRNG &pRNG,GaugeField &out){ + static void ColdConfiguration(GaugeField &out){ typedef typename GaugeField::vector_type vector_type; typedef iSUnMatrix vMatrixType; typedef Lattice LatticeMatrixType; @@ -757,6 +757,10 @@ template PokeIndex(out,Umu,mu); } } + template + static void ColdConfiguration(GridParallelRNG &pRNG,GaugeField &out){ + ColdConfiguration(out); + } template static void taProj( const LatticeMatrixType &in, LatticeMatrixType &out){ From d93c6760ec850abb93ee3f94a3444ee0dba84c6f Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 5 Dec 2017 11:39:26 +0000 Subject: [PATCH 236/377] Faster code for split unsplit --- lib/lattice/Lattice_transfer.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index c7e2a507..32c15d22 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -822,6 +822,7 @@ void Grid_split(std::vector > & full,Lattice & split) // Loop over reordered data post A2A parallel_for(int c=0;c coor(ndim); for(int m=0;m > & full,Lattice & split) uint64_t lex_vec = lex_fvol_vec/fvol; // which node sets an adder to the coordinate - std::vector coor(ndim); Lexicographic::CoorFromIndex(coor, lex_fvol, ldims); coor[d] += m*ldims[d]; Lexicographic::IndexFromCoor(coor, lex_r, rdims); @@ -940,10 +940,11 @@ void Grid_unsplit(std::vector > & full,Lattice & split) { // Loop over reordered data post A2A - for(int c=0;c coor(ndim); for(int m=0;m > & full,Lattice & split) uint64_t lex_vec = lex_fvol_vec/fvol; // which node sets an adder to the coordinate - std::vector coor(ndim); Lexicographic::CoorFromIndex(coor, lex_fvol, ldims); coor[d] += m*ldims[d]; Lexicographic::IndexFromCoor(coor, lex_r, rdims); @@ -978,9 +978,9 @@ void Grid_unsplit(std::vector > & full,Lattice & split) lsites = full_grid->lSites(); for(int v=0;v Date: Tue, 5 Dec 2017 11:42:05 +0000 Subject: [PATCH 237/377] Faster RNG init --- lib/lattice/Lattice_rng.h | 35 +++++------------------------------ 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/lib/lattice/Lattice_rng.h b/lib/lattice/Lattice_rng.h index 6dc50fd2..11d8e325 100644 --- a/lib/lattice/Lattice_rng.h +++ b/lib/lattice/Lattice_rng.h @@ -77,9 +77,6 @@ namespace Grid { // merge of April 11 2017 -//<<<<<<< HEAD - - // this function is necessary for the LS vectorised field inline int RNGfillable_general(GridBase *coarse,GridBase *fine) { @@ -91,7 +88,6 @@ namespace Grid { // all further divisions are local for(int d=0;d_processors[d]==1); for(int d=0;d_processors[d] == fine->_processors[d+lowerdims]); - // then divide the number of local sites // check that the total number of sims agree, meanse the iSites are the same @@ -102,27 +98,6 @@ namespace Grid { return fine->lSites() / coarse->lSites(); } - - /* - // Wrap seed_seq to give common interface with random_device - class fixedSeed { - public: - typedef std::seed_seq::result_type result_type; - std::seed_seq src; - - fixedSeed(const std::vector &seeds) : src(seeds.begin(),seeds.end()) {}; - - result_type operator () (void){ - std::vector list(1); - src.generate(list.begin(),list.end()); - return list[0]; - } - - }; - -======= ->>>>>>> develop - */ // real scalars are one component template @@ -171,7 +146,7 @@ namespace Grid { // support for parallel init /////////////////////// #ifdef RNG_FAST_DISCARD - static void Skip(RngEngine &eng) + static void Skip(RngEngine &eng,uint64_t site) { ///////////////////////////////////////////////////////////////////////////////////// // Skip by 2^40 elements between successive lattice sites @@ -184,7 +159,8 @@ namespace Grid { // and margin of safety is orders of magnitude. // We could hack Sitmo to skip in the higher order words of state if necessary ///////////////////////////////////////////////////////////////////////////////////// - uint64_t skip = 0x1; skip = skip<<40; + uint64_t skip = site; + skip = skip<<40; eng.discard(skip); } #endif @@ -411,9 +387,7 @@ namespace Grid { int rank,o_idx,i_idx; // Everybody loops over global volume. - for(int gidx=0;gidx<_grid->_gsites;gidx++){ - - Skip(master_engine); // Skip to next RNG sequence + parallel_for(int gidx=0;gidx<_grid->_gsites;gidx++){ // Where is it? _grid->GlobalIndexToGlobalCoor(gidx,gcoor); @@ -423,6 +397,7 @@ namespace Grid { if( rank == _grid->ThisRank() ){ int l_idx=generator_idx(o_idx,i_idx); _generators[l_idx] = master_engine; + Skip(_generators[l_idx],gidx); // Skip to next RNG sequence } } From a14038051fbeed58692fe1342f746c045e093585 Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 5 Dec 2017 11:43:25 +0000 Subject: [PATCH 238/377] Improved AllToAll asserts --- lib/communicator/Communicator_base.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 73ea6165..548515cd 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -276,10 +276,11 @@ class CartesianCommunicator { assert(in.size()==out.size()); uint64_t bytes=sizeof(T); uint64_t words=in.size()/numnode; - + // std:: cout << "AllToAll buffer size "<< in.size()*sizeof(T)< Date: Tue, 5 Dec 2017 13:01:10 +0000 Subject: [PATCH 239/377] Improved parallel RNG init --- lib/lattice/Lattice_rng.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/lattice/Lattice_rng.h b/lib/lattice/Lattice_rng.h index 11d8e325..d5190e63 100644 --- a/lib/lattice/Lattice_rng.h +++ b/lib/lattice/Lattice_rng.h @@ -159,9 +159,11 @@ namespace Grid { // and margin of safety is orders of magnitude. // We could hack Sitmo to skip in the higher order words of state if necessary ///////////////////////////////////////////////////////////////////////////////////// + // uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init uint64_t skip = site; skip = skip<<40; eng.discard(skip); + // std::cout << " Engine " < Date: Tue, 5 Dec 2017 13:07:31 +0000 Subject: [PATCH 240/377] Clean up of test --- tests/solver/Test_dwf_mrhs_cg_mpi.cc | 34 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/tests/solver/Test_dwf_mrhs_cg_mpi.cc b/tests/solver/Test_dwf_mrhs_cg_mpi.cc index 06df58c6..7e11d8d1 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc @@ -81,21 +81,20 @@ int main (int argc, char ** argv) GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid); - + std::cout << GridLogMessage << "Made the grids"< seeds({1,2,3,4}); - GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds); - GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds); std::vector src(nrhs,FGrid); std::vector src_chk(nrhs,FGrid); std::vector result(nrhs,FGrid); FermionField tmp(FGrid); + std::cout << GridLogMessage << "Made the Fermion Fields"< Date: Tue, 5 Dec 2017 14:12:22 +0000 Subject: [PATCH 241/377] Threading improvement --- lib/lattice/Lattice_rng.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/lattice/Lattice_rng.h b/lib/lattice/Lattice_rng.h index d5190e63..edf9dd23 100644 --- a/lib/lattice/Lattice_rng.h +++ b/lib/lattice/Lattice_rng.h @@ -385,13 +385,14 @@ namespace Grid { // MT implementation does not implement fast discard even though // in principle this is possible //////////////////////////////////////////////// - std::vector gcoor; - int rank,o_idx,i_idx; // Everybody loops over global volume. parallel_for(int gidx=0;gidx<_grid->_gsites;gidx++){ // Where is it? + int rank,o_idx,i_idx; + std::vector gcoor; + _grid->GlobalIndexToGlobalCoor(gidx,gcoor); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); From 542225195903b5a54bd2b2768c8153b29fba5230 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 5 Dec 2017 15:31:59 +0100 Subject: [PATCH 242/377] Hadrons: execution part moved in a new virtual machine class --- extras/Hadrons/Application.cc | 41 +- extras/Hadrons/Application.hpp | 11 +- extras/Hadrons/Environment.cc | 387 ++--------------- extras/Hadrons/Environment.hpp | 124 +----- extras/Hadrons/Global.hpp | 4 + extras/Hadrons/Makefile.am | 6 +- extras/Hadrons/Module.cc | 8 +- extras/Hadrons/Module.hpp | 8 +- extras/Hadrons/Modules/MContraction/Meson.hpp | 2 +- extras/Hadrons/VirtualMachine.cc | 388 ++++++++++++++++++ extras/Hadrons/VirtualMachine.hpp | 164 ++++++++ 11 files changed, 647 insertions(+), 496 deletions(-) create mode 100644 extras/Hadrons/VirtualMachine.cc create mode 100644 extras/Hadrons/VirtualMachine.hpp diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index 0860437b..af67dff3 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -73,12 +73,6 @@ Application::Application(const std::string parameterFileName) parameterFileName_ = parameterFileName; } -// environment shortcut //////////////////////////////////////////////////////// -Environment & Application::env(void) const -{ - return Environment::getInstance(); -} - // access ////////////////////////////////////////////////////////////////////// void Application::setPar(const Application::GlobalPar &par) { @@ -94,12 +88,13 @@ const Application::GlobalPar & Application::getPar(void) // execute ///////////////////////////////////////////////////////////////////// void Application::run(void) { - if (!parameterFileName_.empty() and (env().getNModule() == 0)) + if (!parameterFileName_.empty() and (vm().getNModule() == 0)) { parseParameterFile(parameterFileName_); } - env().checkGraph(); + //vm().checkGraph(); env().printContent(); + vm().printContent(); if (!scheduled_) { schedule(); @@ -137,7 +132,7 @@ void Application::parseParameterFile(const std::string parameterFileName) do { read(reader, "id", id); - env().createModule(id.name, id.type, reader); + vm().createModule(id.name, id.type, reader); } while (reader.nextElement("module")); pop(reader); pop(reader); @@ -147,7 +142,7 @@ void Application::saveParameterFile(const std::string parameterFileName) { XmlWriter writer(parameterFileName); ObjectId id; - const unsigned int nMod = env().getNModule(); + const unsigned int nMod = vm().getNModule(); LOG(Message) << "Saving application to '" << parameterFileName << "'..." << std::endl; write(writer, "parameters", getPar()); @@ -155,10 +150,10 @@ void Application::saveParameterFile(const std::string parameterFileName) for (unsigned int i = 0; i < nMod; ++i) { push(writer, "module"); - id.name = env().getModuleName(i); - id.type = env().getModule(i)->getRegisteredName(); + id.name = vm().getModuleName(i); + id.type = vm().getModule(i)->getRegisteredName(); write(writer, "id", id); - env().getModule(i)->saveParameters(writer, "options"); + vm().getModule(i)->saveParameters(writer, "options"); pop(writer); } pop(writer); @@ -178,9 +173,9 @@ GeneticScheduler::ObjFunc memPeak = \ \ msg = HadronsLogMessage.isActive();\ HadronsLogMessage.Active(false);\ - env().dryRun(true);\ - memPeak = env().executeProgram(program);\ - env().dryRun(false);\ + vm().dryRun(true);\ + memPeak = vm().executeProgram(program);\ + vm().dryRun(false);\ env().freeAll();\ HadronsLogMessage.Active(msg);\ \ @@ -193,7 +188,7 @@ void Application::schedule(void) // build module dependency graph LOG(Message) << "Building module graph..." << std::endl; - auto graph = env().makeModuleGraph(); + auto graph = vm().makeModuleGraph(); LOG(Debug) << "Module graph:" << std::endl; LOG(Debug) << graph << std::endl; auto con = graph.getConnectedComponents(); @@ -273,7 +268,7 @@ void Application::saveSchedule(const std::string filename) << std::endl; for (auto address: program_) { - program.push_back(env().getModuleName(address)); + program.push_back(vm().getModuleName(address)); } write(writer, "schedule", program); } @@ -291,7 +286,7 @@ void Application::loadSchedule(const std::string filename) program_.clear(); for (auto &name: program) { - program_.push_back(env().getModuleAddress(name)); + program_.push_back(vm().getModuleAddress(name)); } scheduled_ = true; memPeak_ = memPeak(program_); @@ -308,7 +303,7 @@ void Application::printSchedule(void) for (unsigned int i = 0; i < program_.size(); ++i) { LOG(Message) << std::setw(4) << i + 1 << ": " - << env().getModuleName(program_[i]) << std::endl; + << vm().getModuleName(program_[i]) << std::endl; } } @@ -321,8 +316,8 @@ void Application::configLoop(void) { LOG(Message) << BIG_SEP << " Starting measurement for trajectory " << t << " " << BIG_SEP << std::endl; - env().setTrajectory(t); - env().executeProgram(program_); + vm().setTrajectory(t); + vm().executeProgram(program_); } LOG(Message) << BIG_SEP << " End of measurement " << BIG_SEP << std::endl; env().freeAll(); @@ -331,7 +326,7 @@ void Application::configLoop(void) // memory profile ////////////////////////////////////////////////////////////// void Application::memoryProfile(void) { - auto graph = env().makeModuleGraph(); + auto graph = vm().makeModuleGraph(); auto program = graph.topoSort(); bool msg; diff --git a/extras/Hadrons/Application.hpp b/extras/Hadrons/Application.hpp index 8b11b0c7..66488206 100644 --- a/extras/Hadrons/Application.hpp +++ b/extras/Hadrons/Application.hpp @@ -31,8 +31,7 @@ See the full license in the file "LICENSE" in the top level distribution directo #define Hadrons_Application_hpp_ #include -#include -#include +#include #include BEGIN_HADRONS_NAMESPACE @@ -100,7 +99,9 @@ public: void configLoop(void); private: // environment shortcut - Environment & env(void) const; + DEFINE_ENV_ALIAS; + // virtual machine shortcut + DEFINE_VM_ALIAS; // memory profile void memoryProfile(void); private: @@ -119,14 +120,14 @@ private: template void Application::createModule(const std::string name) { - env().createModule(name); + vm().createModule(name); } template void Application::createModule(const std::string name, const typename M::Par &par) { - env().createModule(name, par); + vm().createModule(name, par); } END_HADRONS_NAMESPACE diff --git a/extras/Hadrons/Environment.cc b/extras/Hadrons/Environment.cc index 27849bd7..ea41f343 100644 --- a/extras/Hadrons/Environment.cc +++ b/extras/Hadrons/Environment.cc @@ -56,38 +56,6 @@ Environment::Environment(void) rng4d_.reset(new GridParallelRNG(grid4d_.get())); } -// dry run ///////////////////////////////////////////////////////////////////// -void Environment::dryRun(const bool isDry) -{ - dryRun_ = isDry; -} - -bool Environment::isDryRun(void) const -{ - return dryRun_; -} - -void Environment::memoryProfile(const bool doMemoryProfile) -{ - memoryProfile_ = doMemoryProfile; -} - -bool Environment::doMemoryProfile(void) const -{ - return memoryProfile_; -} - -// trajectory number /////////////////////////////////////////////////////////// -void Environment::setTrajectory(const unsigned int traj) -{ - traj_ = traj; -} - -unsigned int Environment::getTrajectory(void) const -{ - return traj_; -} - // grids /////////////////////////////////////////////////////////////////////// void Environment::createGrid(const unsigned int Ls) { @@ -153,6 +121,11 @@ int Environment::getDim(const unsigned int mu) const return dim_[mu]; } +unsigned long int Environment::getLocalVolume(void) const +{ + return locVol_; +} + // random number generator ///////////////////////////////////////////////////// void Environment::setSeed(const std::vector &seed) { @@ -164,313 +137,6 @@ GridParallelRNG * Environment::get4dRng(void) const return rng4d_.get(); } -// module management /////////////////////////////////////////////////////////// -void Environment::pushModule(Environment::ModPt &pt) -{ - std::string name = pt->getName(); - - if (!hasModule(name)) - { - std::vector inputAddress; - unsigned int address; - ModuleInfo m; - - m.data = std::move(pt); - m.type = typeIdPt(*m.data.get()); - m.name = name; - auto input = m.data->getInput(); - for (auto &in: input) - { - if (!hasObject(in)) - { - addObject(in , -1); - } - m.input.push_back(objectAddress_[in]); - } - auto output = m.data->getOutput(); - module_.push_back(std::move(m)); - address = static_cast(module_.size() - 1); - moduleAddress_[name] = address; - for (auto &out: output) - { - if (!hasObject(out)) - { - addObject(out, address); - } - else - { - if (object_[objectAddress_[out]].module < 0) - { - object_[objectAddress_[out]].module = address; - } - else - { - HADRON_ERROR("object '" + out - + "' is already produced by module '" - + module_[object_[getObjectAddress(out)].module].name - + "' (while pushing module '" + name + "')"); - } - } - } - } - else - { - HADRON_ERROR("module '" + name + "' already exists"); - } -} - -unsigned int Environment::getNModule(void) const -{ - return module_.size(); -} - -void Environment::createModule(const std::string name, const std::string type, - XmlReader &reader) -{ - auto &factory = ModuleFactory::getInstance(); - auto pt = factory.create(type, name); - - pt->parseParameters(reader, "options"); - pushModule(pt); -} - -ModuleBase * Environment::getModule(const unsigned int address) const -{ - if (hasModule(address)) - { - return module_[address].data.get(); - } - else - { - HADRON_ERROR("no module with address " + std::to_string(address)); - } -} - -ModuleBase * Environment::getModule(const std::string name) const -{ - return getModule(getModuleAddress(name)); -} - -unsigned int Environment::getModuleAddress(const std::string name) const -{ - if (hasModule(name)) - { - return moduleAddress_.at(name); - } - else - { - HADRON_ERROR("no module with name '" + name + "'"); - } -} - -std::string Environment::getModuleName(const unsigned int address) const -{ - if (hasModule(address)) - { - return module_[address].name; - } - else - { - HADRON_ERROR("no module with address " + std::to_string(address)); - } -} - -std::string Environment::getModuleType(const unsigned int address) const -{ - if (hasModule(address)) - { - return typeName(module_[address].type); - } - else - { - HADRON_ERROR("no module with address " + std::to_string(address)); - } -} - -std::string Environment::getModuleType(const std::string name) const -{ - return getModuleType(getModuleAddress(name)); -} - -std::string Environment::getModuleNamespace(const unsigned int address) const -{ - std::string type = getModuleType(address), ns; - - auto pos2 = type.rfind("::"); - auto pos1 = type.rfind("::", pos2 - 2); - - return type.substr(pos1 + 2, pos2 - pos1 - 2); -} - -std::string Environment::getModuleNamespace(const std::string name) const -{ - return getModuleNamespace(getModuleAddress(name)); -} - -bool Environment::hasModule(const unsigned int address) const -{ - return (address < module_.size()); -} - -bool Environment::hasModule(const std::string name) const -{ - return (moduleAddress_.find(name) != moduleAddress_.end()); -} - -Graph Environment::makeModuleGraph(void) const -{ - Graph moduleGraph; - - for (unsigned int i = 0; i < module_.size(); ++i) - { - moduleGraph.addVertex(i); - for (auto &j: module_[i].input) - { - moduleGraph.addEdge(object_[j].module, i); - } - } - - return moduleGraph; -} - -void Environment::checkGraph(void) const -{ - for (auto &o: object_) - { - if (o.module < 0) - { - HADRON_ERROR("object '" + o.name + "' does not have a creator"); - } - } -} - -#define BIG_SEP "===============" -#define SEP "---------------" -#define MEM_MSG(size)\ -sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)" - -Environment::Size -Environment::executeProgram(const std::vector &p) -{ - Size memPeak = 0, sizeBefore, sizeAfter; - std::vector> freeProg; - bool continueCollect, nothingFreed; - - // build garbage collection schedule - LOG(Debug) << "Building garbage collection schedule..." << std::endl; - freeProg.resize(p.size()); - for (unsigned int i = 0; i < object_.size(); ++i) - { - auto pred = [i, this](const unsigned int j) - { - auto &in = module_[j].input; - auto it = std::find(in.begin(), in.end(), i); - - return (it != in.end()) or (j == object_[i].module); - }; - auto it = std::find_if(p.rbegin(), p.rend(), pred); - if (it != p.rend()) - { - freeProg[std::distance(it, p.rend()) - 1].insert(i); - } - } - - // program execution - LOG(Debug) << "Executing program..." << std::endl; - for (unsigned int i = 0; i < p.size(); ++i) - { - // execute module - if (!isDryRun()) - { - LOG(Message) << SEP << " Measurement step " << i+1 << "/" - << p.size() << " (module '" << module_[p[i]].name - << "') " << SEP << std::endl; - } - (*module_[p[i]].data)(); - sizeBefore = getTotalSize(); - // print used memory after execution - if (!isDryRun()) - { - LOG(Message) << "Allocated objects: " << MEM_MSG(sizeBefore) - << std::endl; - } - if (sizeBefore > memPeak) - { - memPeak = sizeBefore; - } - // garbage collection for step i - if (!isDryRun()) - { - LOG(Message) << "Garbage collection..." << std::endl; - } - nothingFreed = true; - do - { - continueCollect = false; - auto toFree = freeProg[i]; - for (auto &j: toFree) - { - // continue garbage collection while there are still - // objects without owners - continueCollect = continueCollect or !hasOwners(j); - if(freeObject(j)) - { - // if an object has been freed, remove it from - // the garbage collection schedule - freeProg[i].erase(j); - nothingFreed = false; - } - } - } while (continueCollect); - // free temporaries - for (unsigned int i = 0; i < object_.size(); ++i) - { - if ((object_[i].storage == Storage::temporary) - and hasCreatedObject(i)) - { - freeObject(i); - } - } - // any remaining objects in step i garbage collection schedule - // is scheduled for step i + 1 - if (i + 1 < p.size()) - { - for (auto &j: freeProg[i]) - { - freeProg[i + 1].insert(j); - } - } - // print used memory after garbage collection if necessary - if (!isDryRun()) - { - sizeAfter = getTotalSize(); - if (sizeBefore != sizeAfter) - { - LOG(Message) << "Allocated objects: " << MEM_MSG(sizeAfter) - << std::endl; - } - else - { - LOG(Message) << "Nothing to free" << std::endl; - } - } - } - - return memPeak; -} - -Environment::Size Environment::executeProgram(const std::vector &p) -{ - std::vector pAddress; - - for (auto &n: p) - { - pAddress.push_back(getModuleAddress(n)); - } - - return executeProgram(pAddress); -} - // general memory management /////////////////////////////////////////////////// void Environment::addObject(const std::string name, const int moduleAddress) { @@ -490,6 +156,17 @@ void Environment::addObject(const std::string name, const int moduleAddress) } } +void Environment::setObjectModule(const unsigned int objAddress, + const int modAddress) +{ + object_[objAddress].module = modAddress; +} + +unsigned int Environment::getMaxAddress(void) const +{ + return object_.size(); +} + unsigned int Environment::getObjectAddress(const std::string name) const { if (hasObject(name)) @@ -555,7 +232,24 @@ Environment::Size Environment::getObjectSize(const std::string name) const return getObjectSize(getObjectAddress(name)); } -unsigned int Environment::getObjectModule(const unsigned int address) const +Environment::Storage Environment::getObjectStorage(const unsigned int address) const +{ + if (hasObject(address)) + { + return object_[address].storage; + } + else + { + HADRON_ERROR("no object with address " + std::to_string(address)); + } +} + +Environment::Storage Environment::getObjectStorage(const std::string name) const +{ + return getObjectStorage(getObjectAddress(name)); +} + +int Environment::getObjectModule(const unsigned int address) const { if (hasObject(address)) { @@ -567,7 +261,7 @@ unsigned int Environment::getObjectModule(const unsigned int address) const } } -unsigned int Environment::getObjectModule(const std::string name) const +int Environment::getObjectModule(const std::string name) const { return getObjectModule(getObjectAddress(name)); } @@ -696,7 +390,7 @@ bool Environment::freeObject(const unsigned int address) { if (!hasOwners(address)) { - if (!isDryRun() and hasCreatedObject(address)) + if (hasCreatedObject(address)) { LOG(Message) << "Destroying object '" << object_[address].name << "'" << std::endl; @@ -732,14 +426,9 @@ void Environment::freeAll(void) } } -void Environment::printContent(void) +// print environment content /////////////////////////////////////////////////// +void Environment::printContent(void) const { - LOG(Debug) << "Modules: " << std::endl; - for (unsigned int i = 0; i < module_.size(); ++i) - { - LOG(Debug) << std::setw(4) << i << ": " - << getModuleName(i) << std::endl; - } LOG(Debug) << "Objects: " << std::endl; for (unsigned int i = 0; i < object_.size(); ++i) { diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index 58e035ac..9d482923 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -31,20 +31,12 @@ See the full license in the file "LICENSE" in the top level distribution directo #define Hadrons_Environment_hpp_ #include -#include - -#ifndef SITE_SIZE_TYPE -#define SITE_SIZE_TYPE unsigned int -#endif BEGIN_HADRONS_NAMESPACE /****************************************************************************** * Global environment * ******************************************************************************/ -// forward declaration of Module -class ModuleBase; - class Object { public: @@ -66,26 +58,22 @@ private: std::unique_ptr objPt_{nullptr}; }; +#define DEFINE_ENV_ALIAS \ +inline Environment & env(void) const\ +{\ + return Environment::getInstance();\ +} + class Environment { SINGLETON(Environment); public: typedef SITE_SIZE_TYPE Size; - typedef std::unique_ptr ModPt; typedef std::unique_ptr GridPt; typedef std::unique_ptr GridRbPt; typedef std::unique_ptr RngPt; - typedef std::unique_ptr LatticePt; enum class Storage {object, cache, temporary}; private: - struct ModuleInfo - { - const std::type_info *type{nullptr}; - std::string name; - ModPt data{nullptr}; - std::vector input; - size_t maxAllocated; - }; struct ObjInfo { Size size{0}; @@ -98,53 +86,17 @@ private: std::unique_ptr data{nullptr}; }; public: - // dry run - void dryRun(const bool isDry); - bool isDryRun(void) const; - void memoryProfile(const bool doMemoryProfile); - bool doMemoryProfile(void) const; - // trajectory number - void setTrajectory(const unsigned int traj); - unsigned int getTrajectory(void) const; // grids void createGrid(const unsigned int Ls); GridCartesian * getGrid(const unsigned int Ls = 1) const; GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; std::vector getDim(void) const; int getDim(const unsigned int mu) const; + unsigned long int getLocalVolume(void) const; unsigned int getNd(void) const; // random number generator void setSeed(const std::vector &seed); GridParallelRNG * get4dRng(void) const; - // module management - void pushModule(ModPt &pt); - template - void createModule(const std::string name); - template - void createModule(const std::string name, - const typename M::Par &par); - void createModule(const std::string name, - const std::string type, - XmlReader &reader); - unsigned int getNModule(void) const; - ModuleBase * getModule(const unsigned int address) const; - ModuleBase * getModule(const std::string name) const; - template - M * getModule(const unsigned int address) const; - template - M * getModule(const std::string name) const; - unsigned int getModuleAddress(const std::string name) const; - std::string getModuleName(const unsigned int address) const; - std::string getModuleType(const unsigned int address) const; - std::string getModuleType(const std::string name) const; - std::string getModuleNamespace(const unsigned int address) const; - std::string getModuleNamespace(const std::string name) const; - bool hasModule(const unsigned int address) const; - bool hasModule(const std::string name) const; - Graph makeModuleGraph(void) const; - void checkGraph(void) const; - Size executeProgram(const std::vector &p); - Size executeProgram(const std::vector &p); // general memory management void addObject(const std::string name, const int moduleAddress = -1); @@ -153,18 +105,23 @@ public: const Storage storage, const unsigned int Ls, P &&pt); + void setObjectModule(const unsigned int objAddress, + const int modAddress); template T * getObject(const unsigned int address) const; template T * getObject(const std::string name) const; + unsigned int getMaxAddress(void) const; unsigned int getObjectAddress(const std::string name) const; std::string getObjectName(const unsigned int address) const; std::string getObjectType(const unsigned int address) const; std::string getObjectType(const std::string name) const; Size getObjectSize(const unsigned int address) const; Size getObjectSize(const std::string name) const; - unsigned int getObjectModule(const unsigned int address) const; - unsigned int getObjectModule(const std::string name) const; + Storage getObjectStorage(const unsigned int address) const; + Storage getObjectStorage(const std::string name) const; + int getObjectModule(const unsigned int address) const; + int getObjectModule(const std::string name) const; unsigned int getObjectLs(const unsigned int address) const; unsigned int getObjectLs(const std::string name) const; bool hasObject(const unsigned int address) const; @@ -187,11 +144,11 @@ public: bool freeObject(const unsigned int address); bool freeObject(const std::string name); void freeAll(void); - void printContent(void); + // print environment content + void printContent(void) const; private: // general - bool dryRun_{false}, memoryProfile_{false}; - unsigned int traj_, locVol_; + unsigned long int locVol_; // grids std::vector dim_; GridPt grid4d_; @@ -201,12 +158,6 @@ private: unsigned int nd_; // random number generator RngPt rng4d_; - // module and related maps - std::vector module_; - std::map moduleAddress_; - std::string currentModule_{""}; - // lattice store - std::map lattice_; // object store std::vector object_; std::map objectAddress_; @@ -243,46 +194,7 @@ void Holder::reset(T *pt) /****************************************************************************** * Environment template implementation * ******************************************************************************/ -// module management /////////////////////////////////////////////////////////// -template -void Environment::createModule(const std::string name) -{ - ModPt pt(new M(name)); - - pushModule(pt); -} - -template -void Environment::createModule(const std::string name, - const typename M::Par &par) -{ - ModPt pt(new M(name)); - - static_cast(pt.get())->setPar(par); - pushModule(pt); -} - -template -M * Environment::getModule(const unsigned int address) const -{ - if (auto *pt = dynamic_cast(getModule(address))) - { - return pt; - } - else - { - HADRON_ERROR("module '" + module_[address].name - + "' does not have type " + typeid(M).name() - + "(object type: " + getModuleType(address) + ")"); - } -} - -template -M * Environment::getModule(const std::string name) const -{ - return getModule(getModuleAddress(name)); -} - +// general memory management /////////////////////////////////////////////////// template void Environment::createObject(const std::string name, const Environment::Storage storage, diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index 371256e8..1f0ce201 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -35,6 +35,10 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include +#ifndef SITE_SIZE_TYPE +#define SITE_SIZE_TYPE unsigned int +#endif + #define BEGIN_HADRONS_NAMESPACE \ namespace Grid {\ using namespace QCD;\ diff --git a/extras/Hadrons/Makefile.am b/extras/Hadrons/Makefile.am index 9cb23600..826cb158 100644 --- a/extras/Hadrons/Makefile.am +++ b/extras/Hadrons/Makefile.am @@ -8,7 +8,8 @@ libHadrons_a_SOURCES = \ Application.cc \ Environment.cc \ Global.cc \ - Module.cc + Module.cc \ + VirtualMachine.cc libHadrons_adir = $(pkgincludedir)/Hadrons nobase_libHadrons_a_HEADERS = \ $(modules_hpp) \ @@ -20,7 +21,8 @@ nobase_libHadrons_a_HEADERS = \ Graph.hpp \ Module.hpp \ Modules.hpp \ - ModuleFactory.hpp + ModuleFactory.hpp \ + VirtualMachine.hpp HadronsXmlRun_SOURCES = HadronsXmlRun.cc HadronsXmlRun_LDADD = libHadrons.a -lGrid diff --git a/extras/Hadrons/Module.cc b/extras/Hadrons/Module.cc index 2549a931..bf596bfc 100644 --- a/extras/Hadrons/Module.cc +++ b/extras/Hadrons/Module.cc @@ -39,7 +39,6 @@ using namespace Hadrons; // constructor ///////////////////////////////////////////////////////////////// ModuleBase::ModuleBase(const std::string name) : name_(name) -, env_(Environment::getInstance()) {} // access ////////////////////////////////////////////////////////////////////// @@ -48,11 +47,6 @@ std::string ModuleBase::getName(void) const return name_; } -Environment & ModuleBase::env(void) const -{ - return env_; -} - // get factory registration name if available std::string ModuleBase::getRegisteredName(void) { @@ -64,7 +58,7 @@ std::string ModuleBase::getRegisteredName(void) void ModuleBase::operator()(void) { setup(); - if (!env().isDryRun()) + if (!vm().isDryRun()) { execute(); } diff --git a/extras/Hadrons/Module.hpp b/extras/Hadrons/Module.hpp index 017a9172..d1910c9b 100644 --- a/extras/Hadrons/Module.hpp +++ b/extras/Hadrons/Module.hpp @@ -31,7 +31,7 @@ See the full license in the file "LICENSE" in the top level distribution directo #define Hadrons_Module_hpp_ #include -#include +#include BEGIN_HADRONS_NAMESPACE @@ -148,7 +148,6 @@ public: virtual ~ModuleBase(void) = default; // access std::string getName(void) const; - Environment &env(void) const; // get factory registration name if available virtual std::string getRegisteredName(void); // dependencies/products @@ -163,9 +162,12 @@ protected: // setup virtual void setup(void) {}; virtual void execute(void) = 0; + // environment shortcut + DEFINE_ENV_ALIAS; + // virtual machine shortcut + DEFINE_VM_ALIAS; private: std::string name_; - Environment &env_; }; // derived class, templating the parameter class diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 31640b7c..7c0012d2 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -211,7 +211,7 @@ void TMeson::execute(void) Gamma gSrc(gammaList[i].second); std::string ns; - ns = env().getModuleNamespace(env().getObjectModule(par().sink)); + ns = vm().getModuleNamespace(env().getObjectModule(par().sink)); if (ns == "MSource") { PropagatorField1 &sink = envGet(PropagatorField1, par().sink); diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc new file mode 100644 index 00000000..f09e2710 --- /dev/null +++ b/extras/Hadrons/VirtualMachine.cc @@ -0,0 +1,388 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/VirtualMachine.cc + +Copyright (C) 2017 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include + +using namespace Grid; +using namespace QCD; +using namespace Hadrons; + +/****************************************************************************** + * VirtualMachine implementation * + ******************************************************************************/ +// dry run ///////////////////////////////////////////////////////////////////// +void VirtualMachine::dryRun(const bool isDry) +{ + dryRun_ = isDry; +} + +bool VirtualMachine::isDryRun(void) const +{ + return dryRun_; +} + +void VirtualMachine::memoryProfile(const bool doMemoryProfile) +{ + memoryProfile_ = doMemoryProfile; +} + +bool VirtualMachine::doMemoryProfile(void) const +{ + return memoryProfile_; +} + +// trajectory counter ////////////////////////////////////////////////////////// +void VirtualMachine::setTrajectory(const unsigned int traj) +{ + traj_ = traj; +} + +unsigned int VirtualMachine::getTrajectory(void) const +{ + return traj_; +} + +// module management /////////////////////////////////////////////////////////// +void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) +{ + std::string name = pt->getName(); + + if (!hasModule(name)) + { + std::vector inputAddress; + unsigned int address; + ModuleInfo m; + + m.data = std::move(pt); + m.type = typeIdPt(*m.data.get()); + m.name = name; + auto input = m.data->getInput(); + for (auto &in: input) + { + if (!env().hasObject(in)) + { + env().addObject(in , -1); + } + m.input.push_back(env().getObjectAddress(in)); + } + auto output = m.data->getOutput(); + module_.push_back(std::move(m)); + address = static_cast(module_.size() - 1); + moduleAddress_[name] = address; + for (auto &out: output) + { + if (!env().hasObject(out)) + { + env().addObject(out, address); + } + else + { + if (env().getObjectModule(env().getObjectAddress(out)) < 0) + { + env().setObjectModule(env().getObjectAddress(out), address); + } + else + { + HADRON_ERROR("object '" + out + + "' is already produced by module '" + + module_[env().getObjectModule(out)].name + + "' (while pushing module '" + name + "')"); + } + } + } + } + else + { + HADRON_ERROR("module '" + name + "' already exists"); + } +} + +unsigned int VirtualMachine::getNModule(void) const +{ + return module_.size(); +} + +void VirtualMachine::createModule(const std::string name, const std::string type, + XmlReader &reader) +{ + auto &factory = ModuleFactory::getInstance(); + auto pt = factory.create(type, name); + + pt->parseParameters(reader, "options"); + pushModule(pt); +} + +ModuleBase * VirtualMachine::getModule(const unsigned int address) const +{ + if (hasModule(address)) + { + return module_[address].data.get(); + } + else + { + HADRON_ERROR("no module with address " + std::to_string(address)); + } +} + +ModuleBase * VirtualMachine::getModule(const std::string name) const +{ + return getModule(getModuleAddress(name)); +} + +unsigned int VirtualMachine::getModuleAddress(const std::string name) const +{ + if (hasModule(name)) + { + return moduleAddress_.at(name); + } + else + { + HADRON_ERROR("no module with name '" + name + "'"); + } +} + +std::string VirtualMachine::getModuleName(const unsigned int address) const +{ + if (hasModule(address)) + { + return module_[address].name; + } + else + { + HADRON_ERROR("no module with address " + std::to_string(address)); + } +} + +std::string VirtualMachine::getModuleType(const unsigned int address) const +{ + if (hasModule(address)) + { + return typeName(module_[address].type); + } + else + { + HADRON_ERROR("no module with address " + std::to_string(address)); + } +} + +std::string VirtualMachine::getModuleType(const std::string name) const +{ + return getModuleType(getModuleAddress(name)); +} + +std::string VirtualMachine::getModuleNamespace(const unsigned int address) const +{ + std::string type = getModuleType(address), ns; + + auto pos2 = type.rfind("::"); + auto pos1 = type.rfind("::", pos2 - 2); + + return type.substr(pos1 + 2, pos2 - pos1 - 2); +} + +std::string VirtualMachine::getModuleNamespace(const std::string name) const +{ + return getModuleNamespace(getModuleAddress(name)); +} + +bool VirtualMachine::hasModule(const unsigned int address) const +{ + return (address < module_.size()); +} + +bool VirtualMachine::hasModule(const std::string name) const +{ + return (moduleAddress_.find(name) != moduleAddress_.end()); +} + +Graph VirtualMachine::makeModuleGraph(void) const +{ + Graph moduleGraph; + + for (unsigned int i = 0; i < module_.size(); ++i) + { + moduleGraph.addVertex(i); + for (auto &j: module_[i].input) + { + moduleGraph.addEdge(env().getObjectModule(j), i); + } + } + + return moduleGraph; +} + +// void VirtualMachine::checkGraph(void) const +// { +// for (auto &o: object_) +// { +// if (o.module < 0) +// { +// HADRON_ERROR("object '" + o.name + "' does not have a creator"); +// } +// } +// } + +// general execution /////////////////////////////////////////////////////////// +#define BIG_SEP "===============" +#define SEP "---------------" +#define MEM_MSG(size)\ +sizeString((size)*env().getLocalVolume()) << " (" << sizeString(size) << "/site)" + +VirtualMachine::Size +VirtualMachine::executeProgram(const std::vector &p) +{ + Size memPeak = 0, sizeBefore, sizeAfter; + std::vector> freeProg; + bool continueCollect, nothingFreed; + + // build garbage collection schedule + LOG(Debug) << "Building garbage collection schedule..." << std::endl; + freeProg.resize(p.size()); + for (unsigned int i = 0; i < env().getMaxAddress(); ++i) + { + auto pred = [i, this](const unsigned int j) + { + auto &in = module_[j].input; + auto it = std::find(in.begin(), in.end(), i); + + return (it != in.end()) or (j == env().getObjectModule(i)); + }; + auto it = std::find_if(p.rbegin(), p.rend(), pred); + if (it != p.rend()) + { + freeProg[std::distance(it, p.rend()) - 1].insert(i); + } + } + + // program execution + LOG(Debug) << "Executing program..." << std::endl; + for (unsigned int i = 0; i < p.size(); ++i) + { + // execute module + if (!isDryRun()) + { + LOG(Message) << SEP << " Measurement step " << i+1 << "/" + << p.size() << " (module '" << module_[p[i]].name + << "') " << SEP << std::endl; + } + (*module_[p[i]].data)(); + sizeBefore = env().getTotalSize(); + // print used memory after execution + if (!isDryRun()) + { + LOG(Message) << "Allocated objects: " << MEM_MSG(sizeBefore) + << std::endl; + } + if (sizeBefore > memPeak) + { + memPeak = sizeBefore; + } + // garbage collection for step i + if (!isDryRun()) + { + LOG(Message) << "Garbage collection..." << std::endl; + } + nothingFreed = true; + do + { + continueCollect = false; + auto toFree = freeProg[i]; + for (auto &j: toFree) + { + // continue garbage collection while there are still + // objects without owners + continueCollect = continueCollect or !env().hasOwners(j); + if(env().freeObject(j)) + { + // if an object has been freed, remove it from + // the garbage collection schedule + freeProg[i].erase(j); + nothingFreed = false; + } + } + } while (continueCollect); + // free temporaries + for (unsigned int i = 0; i < env().getMaxAddress(); ++i) + { + if ((env().getObjectStorage(i) == Environment::Storage::temporary) + and env().hasCreatedObject(i)) + { + env().freeObject(i); + } + } + // any remaining objects in step i garbage collection schedule + // is scheduled for step i + 1 + if (i + 1 < p.size()) + { + for (auto &j: freeProg[i]) + { + freeProg[i + 1].insert(j); + } + } + // print used memory after garbage collection if necessary + if (!isDryRun()) + { + sizeAfter = env().getTotalSize(); + if (sizeBefore != sizeAfter) + { + LOG(Message) << "Allocated objects: " << MEM_MSG(sizeAfter) + << std::endl; + } + else + { + LOG(Message) << "Nothing to free" << std::endl; + } + } + } + + return memPeak; +} + +VirtualMachine::Size VirtualMachine::executeProgram(const std::vector &p) +{ + std::vector pAddress; + + for (auto &n: p) + { + pAddress.push_back(getModuleAddress(n)); + } + + return executeProgram(pAddress); +} + +// print VM content //////////////////////////////////////////////////////////// +void VirtualMachine::printContent(void) const +{ + LOG(Debug) << "Modules: " << std::endl; + for (unsigned int i = 0; i < module_.size(); ++i) + { + LOG(Debug) << std::setw(4) << i << ": " + << getModuleName(i) << std::endl; + } +} diff --git a/extras/Hadrons/VirtualMachine.hpp b/extras/Hadrons/VirtualMachine.hpp new file mode 100644 index 00000000..357fdb5b --- /dev/null +++ b/extras/Hadrons/VirtualMachine.hpp @@ -0,0 +1,164 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/VirtualMachine.hpp + +Copyright (C) 2017 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#ifndef Hadrons_VirtualMachine_hpp_ +#define Hadrons_VirtualMachine_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +#define DEFINE_VM_ALIAS \ +inline VirtualMachine & vm(void) const\ +{\ + return VirtualMachine::getInstance();\ +} + +/****************************************************************************** + * Virtual machine for module execution * + ******************************************************************************/ +// forward declaration of Module +class ModuleBase; + +class VirtualMachine +{ + SINGLETON_DEFCTOR(VirtualMachine); +public: + typedef SITE_SIZE_TYPE Size; + typedef std::unique_ptr ModPt; +private: + struct ModuleInfo + { + const std::type_info *type{nullptr}; + std::string name; + ModPt data{nullptr}; + std::vector input; + size_t maxAllocated; + }; +public: + // dry run + void dryRun(const bool isDry); + bool isDryRun(void) const; + void memoryProfile(const bool doMemoryProfile); + bool doMemoryProfile(void) const; + // trajectory counter + void setTrajectory(const unsigned int traj); + unsigned int getTrajectory(void) const; + // module management + void pushModule(ModPt &pt); + template + void createModule(const std::string name); + template + void createModule(const std::string name, + const typename M::Par &par); + void createModule(const std::string name, + const std::string type, + XmlReader &reader); + unsigned int getNModule(void) const; + ModuleBase * getModule(const unsigned int address) const; + ModuleBase * getModule(const std::string name) const; + template + M * getModule(const unsigned int address) const; + template + M * getModule(const std::string name) const; + unsigned int getModuleAddress(const std::string name) const; + std::string getModuleName(const unsigned int address) const; + std::string getModuleType(const unsigned int address) const; + std::string getModuleType(const std::string name) const; + std::string getModuleNamespace(const unsigned int address) const; + std::string getModuleNamespace(const std::string name) const; + bool hasModule(const unsigned int address) const; + bool hasModule(const std::string name) const; + Graph makeModuleGraph(void) const; + void checkGraph(void) const; + // print VM content + void printContent(void) const; + // general execution + Size executeProgram(const std::vector &p); + Size executeProgram(const std::vector &p); +private: + // environment shortcut + DEFINE_ENV_ALIAS; +private: + // general + bool dryRun_{false}, memoryProfile_{false}; + unsigned int traj_; + // module and related maps + std::vector module_; + std::map moduleAddress_; + std::string currentModule_{""}; +}; + +/****************************************************************************** + * VirtualMachine template implementation * + ******************************************************************************/ +// module management /////////////////////////////////////////////////////////// +template +void VirtualMachine::createModule(const std::string name) +{ + ModPt pt(new M(name)); + + pushModule(pt); +} + +template +void VirtualMachine::createModule(const std::string name, + const typename M::Par &par) +{ + ModPt pt(new M(name)); + + static_cast(pt.get())->setPar(par); + pushModule(pt); +} + +template +M * VirtualMachine::getModule(const unsigned int address) const +{ + if (auto *pt = dynamic_cast(getModule(address))) + { + return pt; + } + else + { + HADRON_ERROR("module '" + module_[address].name + + "' does not have type " + typeid(M).name() + + "(has type: " + getModuleType(address) + ")"); + } +} + +template +M * VirtualMachine::getModule(const std::string name) const +{ + return getModule(getModuleAddress(name)); +} + +END_HADRONS_NAMESPACE + +#endif // Hadrons_VirtualMachine_hpp_ From 62eb1f0e593042f9f7665a55c0f17fe3e196beae Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 6 Dec 2017 16:48:17 +0100 Subject: [PATCH 243/377] FermionOperator virtual destructor needed for polymorphism --- lib/qcd/action/fermion/FermionOperator.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/qcd/action/fermion/FermionOperator.h b/lib/qcd/action/fermion/FermionOperator.h index 676a0e83..ddd2272a 100644 --- a/lib/qcd/action/fermion/FermionOperator.h +++ b/lib/qcd/action/fermion/FermionOperator.h @@ -47,6 +47,7 @@ namespace Grid { INHERIT_IMPL_TYPES(Impl); FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {}; + virtual ~FermionOperator(void) = default; virtual FermionField &tmp(void) = 0; From e78794688a676131ecf88902ae923a7d32b7cb96 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 6 Dec 2017 16:50:25 +0100 Subject: [PATCH 244/377] memory profiler improvement --- lib/allocator/AlignedAllocator.cc | 3 +- lib/allocator/AlignedAllocator.h | 79 ++++++++++++++++++------------- 2 files changed, 47 insertions(+), 35 deletions(-) diff --git a/lib/allocator/AlignedAllocator.cc b/lib/allocator/AlignedAllocator.cc index 944e287f..dfdb1050 100644 --- a/lib/allocator/AlignedAllocator.cc +++ b/lib/allocator/AlignedAllocator.cc @@ -3,7 +3,8 @@ namespace Grid { -MemoryStats *MemoryProfiler::stats = nullptr; +MemoryStats *MemoryProfiler::stats = nullptr; +bool MemoryProfiler::debug = false; int PointerCache::victim; diff --git a/lib/allocator/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h index bdccacec..85e2b240 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/lib/allocator/AlignedAllocator.h @@ -74,8 +74,47 @@ namespace Grid { { public: static MemoryStats *stats; + static bool debug; }; + #define profilerDebugPrint \ + if (MemoryProfiler::stats)\ + {\ + auto s = MemoryProfiler::stats;\ + std::cout << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl;\ + std::cout << "[Memory debug] Total : " << s->totalAllocated << "B" << std::endl;\ + std::cout << "[Memory debug] Max : " << s->maxAllocated << "B" << std::endl;\ + std::cout << "[Memory debug] Current: " << s->totalAllocated << "B" << std::endl;\ + std::cout << "[Memory debug] Freed : " << s->totalFreed << "B" << std::endl;\ + } + + #define profilerAllocate(bytes)\ + if (MemoryProfiler::stats)\ + {\ + auto s = MemoryProfiler::stats;\ + s->totalAllocated += (bytes);\ + s->currentlyAllocated += (bytes);\ + s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated);\ + }\ + if (MemoryProfiler::debug)\ + {\ + std::cout << "[Memory debug] allocating " << bytes << "B" << std::endl;\ + profilerDebugPrint;\ + } + + #define profilerFree(bytes)\ + if (MemoryProfiler::stats)\ + {\ + auto s = MemoryProfiler::stats;\ + s->totalFreed += (bytes);\ + s->currentlyAllocated -= (bytes);\ + }\ + if (MemoryProfiler::debug)\ + {\ + std::cout << "[Memory debug] freeing " << bytes << "B" << std::endl;\ + profilerDebugPrint;\ + } + void check_huge_pages(void *Buf,uint64_t BYTES); //////////////////////////////////////////////////////////////////// @@ -104,13 +143,7 @@ public: pointer allocate(size_type __n, const void* _p= 0) { size_type bytes = __n*sizeof(_Tp); - - if (auto s = MemoryProfiler::stats) - { - s->totalAllocated += bytes; - s->currentlyAllocated += bytes; - s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); - } + profilerAllocate(bytes); _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); // if ( ptr != NULL ) @@ -141,11 +174,7 @@ public: void deallocate(pointer __p, size_type __n) { size_type bytes = __n * sizeof(_Tp); - if (auto s = MemoryProfiler::stats) - { - s->totalFreed += bytes; - s->currentlyAllocated -= bytes; - } + profilerFree(bytes); pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); @@ -199,12 +228,7 @@ public: { size_type bytes = __n*sizeof(_Tp); - if (auto s = MemoryProfiler::stats) - { - s->totalAllocated += bytes; - s->currentlyAllocated += bytes; - s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); - } + profilerAllocate(bytes); #ifdef CRAY _Tp *ptr = (_Tp *) shmem_align(bytes,64); #else @@ -229,11 +253,7 @@ public: void deallocate(pointer __p, size_type __n) { size_type bytes = __n*sizeof(_Tp); - if (auto s = MemoryProfiler::stats) - { - s->totalFreed += bytes; - s->currentlyAllocated -= bytes; - } + profilerFree(bytes); shmem_free((void *)__p); } #else @@ -241,12 +261,7 @@ public: { size_type bytes = __n*sizeof(_Tp); - if (auto s = MemoryProfiler::stats) - { - s->totalAllocated += bytes; - s->currentlyAllocated += bytes; - s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); - } + profilerAllocate(bytes); #ifdef HAVE_MM_MALLOC_H _Tp * ptr = (_Tp *) _mm_malloc(bytes, GRID_ALLOC_ALIGN); #else @@ -265,11 +280,7 @@ public: void deallocate(pointer __p, size_type __n) { size_type bytes = __n*sizeof(_Tp); - if (auto s = MemoryProfiler::stats) - { - s->totalFreed += bytes; - s->currentlyAllocated -= bytes; - } + profilerFree(bytes); #ifdef HAVE_MM_MALLOC_H _mm_free((void *)__p); #else From 0fbf445edd90be7ac6363a77bc93c8b7325c45fe Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 6 Dec 2017 16:51:48 +0100 Subject: [PATCH 245/377] Hadrons: object creation that get properly captured by the memory profiler --- extras/Hadrons/Environment.hpp | 32 +++++++++++++++------ extras/Hadrons/Module.hpp | 27 +++++++++-------- extras/Hadrons/Modules/MAction/DWF.hpp | 5 ++-- extras/Hadrons/Modules/MAction/Wilson.hpp | 5 ++-- extras/Hadrons/Modules/MSolver/RBPrecCG.hpp | 2 +- extras/Hadrons/VirtualMachine.cc | 3 +- 6 files changed, 44 insertions(+), 30 deletions(-) diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index 9d482923..5058a820 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -100,11 +100,16 @@ public: // general memory management void addObject(const std::string name, const int moduleAddress = -1); - template + template + void createDerivedObject(const std::string name, + const Environment::Storage storage, + const unsigned int Ls, + Ts && ... args); + template void createObject(const std::string name, - const Storage storage, + const Environment::Storage storage, const unsigned int Ls, - P &&pt); + Ts && ... args); void setObjectModule(const unsigned int objAddress, const int modAddress); template @@ -195,11 +200,11 @@ void Holder::reset(T *pt) * Environment template implementation * ******************************************************************************/ // general memory management /////////////////////////////////////////////////// -template -void Environment::createObject(const std::string name, +template +void Environment::createDerivedObject(const std::string name, const Environment::Storage storage, const unsigned int Ls, - P &&pt) + Ts && ... args) { if (!hasObject(name)) { @@ -210,13 +215,13 @@ void Environment::createObject(const std::string name, if (!object_[address].data) { - MemoryStats memStats; + MemoryStats memStats; MemoryProfiler::stats = &memStats; object_[address].storage = storage; object_[address].Ls = Ls; - object_[address].data.reset(new Holder(pt)); - object_[address].size = memStats.totalAllocated; + object_[address].data.reset(new Holder(new T(std::forward(args)...))); + object_[address].size = memStats.maxAllocated; object_[address].type = &typeid(T); MemoryProfiler::stats = nullptr; } @@ -226,6 +231,15 @@ void Environment::createObject(const std::string name, } } +template +void Environment::createObject(const std::string name, + const Environment::Storage storage, + const unsigned int Ls, + Ts && ... args) +{ + createDerivedObject(name, storage, Ls, std::forward(args)...); +} + template T * Environment::getObject(const unsigned int address) const { diff --git a/extras/Hadrons/Module.hpp b/extras/Hadrons/Module.hpp index d1910c9b..14d98bfb 100644 --- a/extras/Hadrons/Module.hpp +++ b/extras/Hadrons/Module.hpp @@ -98,39 +98,42 @@ static ns##mod##ModuleRegistrar ns##mod##ModuleRegistrarInstance; #define envHasType(type, name)\ env().template isObjectOfType(name) -#define envCreate(type, name, Ls, pt)\ -env().template createObject(name, Environment::Storage::object, Ls, pt) +#define envCreate(type, name, Ls, ...)\ +env().template createObject(name, Environment::Storage::object, Ls, __VA_ARGS__) + +#define envCreateDerived(base, type, name, Ls, ...)\ +env().template createDerivedObject(name, Environment::Storage::object, Ls, __VA_ARGS__) #define envCreateLat4(type, name)\ -envCreate(type, name, 1, new type(env().getGrid())) +envCreate(type, name, 1, env().getGrid()) #define envCreateLat5(type, name, Ls)\ -envCreate(type, name, Ls, new type(env().getGrid(Ls))) +envCreate(type, name, Ls, env().getGrid(Ls)) #define envCreateLat(...)\ MACRO_REDIRECT(__VA_ARGS__, envCreateLat5, envCreateLat4)(__VA_ARGS__) -#define envCache(type, name, Ls, pt)\ -env().template createObject(name, Environment::Storage::cache, Ls, pt) +#define envCache(type, name, Ls, ...)\ +env().template createObject(name, Environment::Storage::cache, Ls, __VA_ARGS__) #define envCacheLat4(type, name)\ -envCache(type, name, 1, new type(env().getGrid())) +envCache(type, name, 1, env().getGrid()) #define envCacheLat5(type, name, Ls)\ -envCache(type, name, Ls, new type(env().getGrid(Ls))) +envCache(type, name, Ls, env().getGrid(Ls)) #define envCacheLat(...)\ MACRO_REDIRECT(__VA_ARGS__, envCacheLat5, envCacheLat4)(__VA_ARGS__) -#define envTmp(type, name, Ls, pt)\ +#define envTmp(type, name, Ls, ...)\ env().template createObject(getName() + "_tmp_" + name, \ - Environment::Storage::temporary, Ls, pt) + Environment::Storage::temporary, Ls, __VA_ARGS__) #define envTmpLat4(type, name)\ -envTmp(type, name, 1, new type(env().getGrid())) +envTmp(type, name, 1, env().getGrid()) #define envTmpLat5(type, name, Ls)\ -envTmp(type, name, Ls, new type(env().getGrid(Ls))) +envTmp(type, name, Ls, env().getGrid(Ls)) #define envTmpLat(...)\ MACRO_REDIRECT(__VA_ARGS__, envTmpLat5, envTmpLat4)(__VA_ARGS__) diff --git a/extras/Hadrons/Modules/MAction/DWF.hpp b/extras/Hadrons/Modules/MAction/DWF.hpp index 36c70073..e7d28476 100644 --- a/extras/Hadrons/Modules/MAction/DWF.hpp +++ b/extras/Hadrons/Modules/MAction/DWF.hpp @@ -118,9 +118,8 @@ void TDWF::setup(void) auto &grb5 = *env().getRbGrid(par().Ls); std::vector boundary = strToVec(par().boundary); typename DomainWallFermion::ImplParams implParams(boundary); - envCreate(FMat, getName(), par().Ls, - new DomainWallFermion(U, g5, grb5, g4, grb4, par().mass, - par().M5, implParams)); + envCreateDerived(FMat, DomainWallFermion, getName(), par().Ls, U, g5, + grb5, g4, grb4, par().mass, par().M5, implParams); } // execution /////////////////////////////////////////////////////////////////// diff --git a/extras/Hadrons/Modules/MAction/Wilson.hpp b/extras/Hadrons/Modules/MAction/Wilson.hpp index 7fe1f44e..591a3fed 100644 --- a/extras/Hadrons/Modules/MAction/Wilson.hpp +++ b/extras/Hadrons/Modules/MAction/Wilson.hpp @@ -111,9 +111,8 @@ void TWilson::setup(void) auto &gridRb = *env().getRbGrid(); std::vector boundary = strToVec(par().boundary); typename WilsonFermion::ImplParams implParams(boundary); - envCreate(FMat, getName(), 1, new WilsonFermion(U, grid, gridRb, - par().mass, - implParams)); + envCreateDerived(FMat, WilsonFermion, getName(), 1, U, grid, gridRb, + par().mass, implParams); } // execution /////////////////////////////////////////////////////////////////// diff --git a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp index d8a4b95f..d6c21412 100644 --- a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp +++ b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp @@ -114,7 +114,7 @@ void TRBPrecCG::setup(void) schurSolver(mat, source, sol); }; - envCreate(SolverFn, getName(), Ls, new SolverFn(solver)); + envCreate(SolverFn, getName(), Ls, solver); env().addOwnership(getName(), par().action); } diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc index f09e2710..ae1d5b6b 100644 --- a/extras/Hadrons/VirtualMachine.cc +++ b/extras/Hadrons/VirtualMachine.cc @@ -251,8 +251,7 @@ Graph VirtualMachine::makeModuleGraph(void) const // general execution /////////////////////////////////////////////////////////// #define BIG_SEP "===============" #define SEP "---------------" -#define MEM_MSG(size)\ -sizeString((size)*env().getLocalVolume()) << " (" << sizeString(size) << "/site)" +#define MEM_MSG(size) sizeString(size) VirtualMachine::Size VirtualMachine::executeProgram(const std::vector &p) From f9aa39e1c458652185ea81d2cfa16b9e47119e4e Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 7 Dec 2017 14:40:58 +0100 Subject: [PATCH 246/377] global memory debug through command line flag --- extras/Hadrons/Environment.hpp | 15 +++++++++++---- extras/Hadrons/Global.cc | 27 --------------------------- extras/Hadrons/Global.hpp | 3 --- lib/allocator/AlignedAllocator.cc | 25 +++++++++++++++++++++++++ lib/allocator/AlignedAllocator.h | 21 ++++++++++++++------- lib/util/Init.cc | 8 +++++++- 6 files changed, 57 insertions(+), 42 deletions(-) diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index 5058a820..5177b312 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -216,14 +216,21 @@ void Environment::createDerivedObject(const std::string name, if (!object_[address].data) { MemoryStats memStats; - - MemoryProfiler::stats = &memStats; + + if (!MemoryProfiler::stats) + { + MemoryProfiler::stats = &memStats; + } + size_t initMem = MemoryProfiler::stats->currentlyAllocated; object_[address].storage = storage; object_[address].Ls = Ls; object_[address].data.reset(new Holder(new T(std::forward(args)...))); - object_[address].size = memStats.maxAllocated; + object_[address].size = MemoryProfiler::stats->maxAllocated - initMem; object_[address].type = &typeid(T); - MemoryProfiler::stats = nullptr; + if (MemoryProfiler::stats == &memStats) + { + MemoryProfiler::stats = nullptr; + } } else { diff --git a/extras/Hadrons/Global.cc b/extras/Hadrons/Global.cc index 7b0b8fb6..130ede96 100644 --- a/extras/Hadrons/Global.cc +++ b/extras/Hadrons/Global.cc @@ -39,33 +39,6 @@ HadronsLogger Hadrons::HadronsLogMessage(1,"Message"); HadronsLogger Hadrons::HadronsLogIterative(1,"Iterative"); HadronsLogger Hadrons::HadronsLogDebug(1,"Debug"); -// pretty size formatting ////////////////////////////////////////////////////// -std::string Hadrons::sizeString(long unsigned int bytes) - -{ - constexpr unsigned int bufSize = 256; - const char *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"}; - char buf[256]; - long unsigned int s = 0; - double count = bytes; - - while (count >= 1024 && s < 7) - { - s++; - count /= 1024; - } - if (count - floor(count) == 0.0) - { - snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]); - } - else - { - snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]); - } - - return std::string(buf); -} - // type utilities ////////////////////////////////////////////////////////////// constexpr unsigned int maxNameSize = 1024u; diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index 1f0ce201..4c37b961 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -138,9 +138,6 @@ public:\ private:\ name(void) = default; -// pretty size formating -std::string sizeString(long unsigned int bytes); - // type utilities template const std::type_info * typeIdPt(const T &x) diff --git a/lib/allocator/AlignedAllocator.cc b/lib/allocator/AlignedAllocator.cc index dfdb1050..10b49f4b 100644 --- a/lib/allocator/AlignedAllocator.cc +++ b/lib/allocator/AlignedAllocator.cc @@ -97,4 +97,29 @@ void check_huge_pages(void *Buf,uint64_t BYTES) #endif } +std::string sizeString(const size_t bytes) +{ + constexpr unsigned int bufSize = 256; + const char *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"}; + char buf[256]; + size_t s = 0; + double count = bytes; + + while (count >= 1024 && s < 7) + { + s++; + count /= 1024; + } + if (count - floor(count) == 0.0) + { + snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]); + } + else + { + snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]); + } + + return std::string(buf); +} + } diff --git a/lib/allocator/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h index 85e2b240..3b27aec9 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/lib/allocator/AlignedAllocator.h @@ -64,6 +64,8 @@ namespace Grid { }; + std::string sizeString(size_t bytes); + struct MemoryStats { size_t totalAllocated{0}, maxAllocated{0}, @@ -77,15 +79,20 @@ namespace Grid { static bool debug; }; + #define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")" #define profilerDebugPrint \ if (MemoryProfiler::stats)\ {\ auto s = MemoryProfiler::stats;\ - std::cout << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl;\ - std::cout << "[Memory debug] Total : " << s->totalAllocated << "B" << std::endl;\ - std::cout << "[Memory debug] Max : " << s->maxAllocated << "B" << std::endl;\ - std::cout << "[Memory debug] Current: " << s->totalAllocated << "B" << std::endl;\ - std::cout << "[Memory debug] Freed : " << s->totalFreed << "B" << std::endl;\ + std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl;\ + std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \ + << std::endl;\ + std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \ + << std::endl;\ + std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \ + << std::endl;\ + std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \ + << std::endl;\ } #define profilerAllocate(bytes)\ @@ -98,7 +105,7 @@ namespace Grid { }\ if (MemoryProfiler::debug)\ {\ - std::cout << "[Memory debug] allocating " << bytes << "B" << std::endl;\ + std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl;\ profilerDebugPrint;\ } @@ -111,7 +118,7 @@ namespace Grid { }\ if (MemoryProfiler::debug)\ {\ - std::cout << "[Memory debug] freeing " << bytes << "B" << std::endl;\ + std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl;\ profilerDebugPrint;\ } diff --git a/lib/util/Init.cc b/lib/util/Init.cc index 031f8f5a..20367293 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -204,7 +204,7 @@ std::string GridCmdVectorIntToString(const std::vector & vec){ // Reinit guard ///////////////////////////////////////////////////////// static int Grid_is_initialised = 0; - +static MemoryStats dbgMemStats; void Grid_init(int *argc,char ***argv) { @@ -251,6 +251,11 @@ void Grid_init(int *argc,char ***argv) assert(fp!=(FILE *)NULL); } + if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-mem") ){ + MemoryProfiler::debug = true; + MemoryProfiler::stats = &dbgMemStats; + } + //////////////////////////////////// // Banner //////////////////////////////////// @@ -324,6 +329,7 @@ void Grid_init(int *argc,char ***argv) std::cout< Date: Fri, 8 Dec 2017 11:13:39 +0000 Subject: [PATCH 247/377] bug fix in sequential insertion of conserved vector current --- lib/qcd/action/fermion/WilsonFermion.cc | 8 +++++++- lib/qcd/action/fermion/WilsonFermion5D.cc | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index b986edd7..1a020e8a 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -395,7 +395,8 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, Lattice> ph(_grid), coor(_grid); Complex i(0.0,1.0); PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid); - int tshift = (mu == Tp) ? 1 : 0; + unsigned int tshift = (mu == Tp) ? 1 : 0; + unsigned int LLt = GridDefaultLatt()[Tp]; // Momentum projection ph = zero; @@ -434,6 +435,11 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, // Repeat for backward direction. t_mask = ((coords._odata[sU] >= (tmin + tshift)) && (coords._odata[sU] <= (tmax + tshift))); + + //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) + unsigned int t0 = 0; + if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords._odata[sU] == t0 )); + timeSlices = Reduce(t_mask); if (timeSlices > 0) diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 19a37c34..393ee7f3 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -794,8 +794,9 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, PropagatorField tmpFwd(FermionGrid()), tmpBwd(FermionGrid()), tmp(FermionGrid()); Complex i(0.0, 1.0); - int tshift = (mu == Tp) ? 1 : 0; + unsigned int tshift = (mu == Tp) ? 1 : 0; unsigned int LLs = q_in._grid->_rdimensions[0]; + unsigned int LLt = GridDefaultLatt()[Tp]; // Momentum projection. ph = zero; @@ -842,6 +843,11 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, // Repeat for backward direction. t_mask = ((coords._odata[sU] >= (tmin + tshift)) && (coords._odata[sU] <= (tmax + tshift))); + + //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) + unsigned int t0 = 0; + if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords._odata[sU] == t0 )); + timeSlices = Reduce(t_mask); if (timeSlices > 0) From 64161a8743ded19a767883793d7030d6e4093699 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 12 Dec 2017 13:08:01 +0000 Subject: [PATCH 248/377] Hadrons: much simpler reference dependency --- extras/Hadrons/Environment.cc | 78 +++---------------- extras/Hadrons/Environment.hpp | 27 +++---- extras/Hadrons/Graph.hpp | 12 +-- extras/Hadrons/Module.hpp | 1 + extras/Hadrons/Modules/MAction/DWF.hpp | 9 +++ extras/Hadrons/Modules/MAction/Wilson.hpp | 9 +++ extras/Hadrons/Modules/MContraction/Meson.hpp | 9 +++ extras/Hadrons/Modules/MFermion/GaugeProp.hpp | 9 +++ extras/Hadrons/Modules/MGauge/Unit.cc | 7 ++ extras/Hadrons/Modules/MGauge/Unit.hpp | 1 + extras/Hadrons/Modules/MSink/Point.hpp | 9 +++ extras/Hadrons/Modules/MSolver/RBPrecCG.hpp | 12 ++- extras/Hadrons/Modules/MSource/Point.hpp | 9 +++ .../Modules/templates/Module.cc.template | 8 ++ .../Modules/templates/Module.hpp.template | 1 + .../templates/Module_in_NS.cc.template | 8 ++ .../templates/Module_in_NS.hpp.template | 1 + .../Modules/templates/Module_tmp.hpp.template | 1 + .../templates/Module_tmp_in_NS.hpp.template | 9 +++ extras/Hadrons/VirtualMachine.cc | 77 +++++++++--------- 20 files changed, 171 insertions(+), 126 deletions(-) diff --git a/extras/Hadrons/Environment.cc b/extras/Hadrons/Environment.cc index ea41f343..66291966 100644 --- a/extras/Hadrons/Environment.cc +++ b/extras/Hadrons/Environment.cc @@ -341,81 +341,21 @@ Environment::Size Environment::getTotalSize(void) const return size; } -void Environment::addOwnership(const unsigned int owner, - const unsigned int property) +void Environment::freeObject(const unsigned int address) { - if (hasObject(property)) + if (hasCreatedObject(address)) { - object_[property].owners.insert(owner); - } - else - { - HADRON_ERROR("no object with address " + std::to_string(property)); - } - if (hasObject(owner)) - { - object_[owner].properties.insert(property); - } - else - { - HADRON_ERROR("no object with address " + std::to_string(owner)); + LOG(Message) << "Destroying object '" << object_[address].name + << "'" << std::endl; } + object_[address].size = 0; + object_[address].type = nullptr; + object_[address].data.reset(nullptr); } -void Environment::addOwnership(const std::string owner, - const std::string property) +void Environment::freeObject(const std::string name) { - addOwnership(getObjectAddress(owner), getObjectAddress(property)); -} - -bool Environment::hasOwners(const unsigned int address) const -{ - - if (hasObject(address)) - { - return (!object_[address].owners.empty()); - } - else - { - HADRON_ERROR("no object with address " + std::to_string(address)); - } -} - -bool Environment::hasOwners(const std::string name) const -{ - return hasOwners(getObjectAddress(name)); -} - -bool Environment::freeObject(const unsigned int address) -{ - if (!hasOwners(address)) - { - if (hasCreatedObject(address)) - { - LOG(Message) << "Destroying object '" << object_[address].name - << "'" << std::endl; - } - for (auto &p: object_[address].properties) - { - object_[p].owners.erase(address); - } - object_[address].size = 0; - object_[address].type = nullptr; - object_[address].owners.clear(); - object_[address].properties.clear(); - object_[address].data.reset(nullptr); - - return true; - } - else - { - return false; - } -} - -bool Environment::freeObject(const std::string name) -{ - return freeObject(getObjectAddress(name)); + freeObject(getObjectAddress(name)); } void Environment::freeAll(void) diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index 5177b312..811ee14e 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -82,7 +82,6 @@ private: const std::type_info *type{nullptr}; std::string name; int module{-1}; - std::set owners, properties; std::unique_ptr data{nullptr}; }; public: @@ -140,14 +139,8 @@ public: template bool isObjectOfType(const std::string name) const; Environment::Size getTotalSize(void) const; - void addOwnership(const unsigned int owner, - const unsigned int property); - void addOwnership(const std::string owner, - const std::string property); - bool hasOwners(const unsigned int address) const; - bool hasOwners(const std::string name) const; - bool freeObject(const unsigned int address); - bool freeObject(const std::string name); + void freeObject(const unsigned int address); + void freeObject(const std::string name); void freeAll(void); // print environment content void printContent(void) const; @@ -252,15 +245,23 @@ T * Environment::getObject(const unsigned int address) const { if (hasObject(address)) { - if (auto h = dynamic_cast *>(object_[address].data.get())) + if (hasCreatedObject(address)) { - return h->getPt(); + if (auto h = dynamic_cast *>(object_[address].data.get())) + { + return h->getPt(); + } + else + { + HADRON_ERROR("object with address " + std::to_string(address) + + " does not have type '" + typeName(&typeid(T)) + + "' (has type '" + getObjectType(address) + "')"); + } } else { HADRON_ERROR("object with address " + std::to_string(address) + - " does not have type '" + typeName(&typeid(T)) + - "' (has type '" + getObjectType(address) + "')"); + " is empty"); } } else diff --git a/extras/Hadrons/Graph.hpp b/extras/Hadrons/Graph.hpp index df255517..bb9ae679 100644 --- a/extras/Hadrons/Graph.hpp +++ b/extras/Hadrons/Graph.hpp @@ -430,7 +430,7 @@ std::vector Graph::getAdjacentVertices(const T &value) const { return ((e.first == value) or (e.second == value)); }; - auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred); + auto eIt = std::find_if(edgeSet_.begin(), edgeSet_.end(), pred); while (eIt != edgeSet_.end()) { @@ -442,7 +442,7 @@ std::vector Graph::getAdjacentVertices(const T &value) const { adjacentVertex.push_back((*eIt).first); } - eIt = find_if(++eIt, edgeSet_.end(), pred); + eIt = std::find_if(++eIt, edgeSet_.end(), pred); } return adjacentVertex; @@ -458,12 +458,12 @@ std::vector Graph::getChildren(const T &value) const { return (e.first == value); }; - auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred); + auto eIt = std::find_if(edgeSet_.begin(), edgeSet_.end(), pred); while (eIt != edgeSet_.end()) { child.push_back((*eIt).second); - eIt = find_if(++eIt, edgeSet_.end(), pred); + eIt = std::find_if(++eIt, edgeSet_.end(), pred); } return child; @@ -479,12 +479,12 @@ std::vector Graph::getParents(const T &value) const { return (e.second == value); }; - auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred); + auto eIt = std::find_if(edgeSet_.begin(), edgeSet_.end(), pred); while (eIt != edgeSet_.end()) { parent.push_back((*eIt).first); - eIt = find_if(++eIt, edgeSet_.end(), pred); + eIt = std::find_if(++eIt, edgeSet_.end(), pred); } return parent; diff --git a/extras/Hadrons/Module.hpp b/extras/Hadrons/Module.hpp index 14d98bfb..c6b58e9f 100644 --- a/extras/Hadrons/Module.hpp +++ b/extras/Hadrons/Module.hpp @@ -155,6 +155,7 @@ public: virtual std::string getRegisteredName(void); // dependencies/products virtual std::vector getInput(void) = 0; + virtual std::vector getReference(void) = 0; virtual std::vector getOutput(void) = 0; // parse parameters virtual void parseParameters(XmlReader &reader, const std::string name) = 0; diff --git a/extras/Hadrons/Modules/MAction/DWF.hpp b/extras/Hadrons/Modules/MAction/DWF.hpp index e7d28476..91e4ec94 100644 --- a/extras/Hadrons/Modules/MAction/DWF.hpp +++ b/extras/Hadrons/Modules/MAction/DWF.hpp @@ -64,6 +64,7 @@ public: virtual ~TDWF(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -92,6 +93,14 @@ std::vector TDWF::getInput(void) return in; } +template +std::vector TDWF::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TDWF::getOutput(void) { diff --git a/extras/Hadrons/Modules/MAction/Wilson.hpp b/extras/Hadrons/Modules/MAction/Wilson.hpp index 591a3fed..1ca3bf59 100644 --- a/extras/Hadrons/Modules/MAction/Wilson.hpp +++ b/extras/Hadrons/Modules/MAction/Wilson.hpp @@ -62,6 +62,7 @@ public: virtual ~TWilson(void) = default; // dependencies/products virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -90,6 +91,14 @@ std::vector TWilson::getInput(void) return in; } +template +std::vector TWilson::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TWilson::getOutput(void) { diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 7c0012d2..7d19feb8 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -95,6 +95,7 @@ public: virtual ~TMeson(void) = default; // dependencies/products virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); virtual void parseGammaString(std::vector &gammaList); protected: @@ -122,6 +123,14 @@ std::vector TMeson::getInput(void) return input; } +template +std::vector TMeson::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TMeson::getOutput(void) { diff --git a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp index 8529825b..f860c403 100644 --- a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp +++ b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp @@ -84,6 +84,7 @@ public: virtual ~TGaugeProp(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -115,6 +116,14 @@ std::vector TGaugeProp::getInput(void) return in; } +template +std::vector TGaugeProp::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TGaugeProp::getOutput(void) { diff --git a/extras/Hadrons/Modules/MGauge/Unit.cc b/extras/Hadrons/Modules/MGauge/Unit.cc index b3a7d634..bc05a785 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.cc +++ b/extras/Hadrons/Modules/MGauge/Unit.cc @@ -47,6 +47,13 @@ std::vector TUnit::getInput(void) return std::vector(); } +std::vector TUnit::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + std::vector TUnit::getOutput(void) { std::vector out = {getName()}; diff --git a/extras/Hadrons/Modules/MGauge/Unit.hpp b/extras/Hadrons/Modules/MGauge/Unit.hpp index c1650cc7..4b69f0ce 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.hpp +++ b/extras/Hadrons/Modules/MGauge/Unit.hpp @@ -50,6 +50,7 @@ public: virtual ~TUnit(void) = default; // dependencies/products virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup diff --git a/extras/Hadrons/Modules/MSink/Point.hpp b/extras/Hadrons/Modules/MSink/Point.hpp index 853a7c32..16b89434 100644 --- a/extras/Hadrons/Modules/MSink/Point.hpp +++ b/extras/Hadrons/Modules/MSink/Point.hpp @@ -60,6 +60,7 @@ public: virtual ~TPoint(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -93,6 +94,14 @@ std::vector TPoint::getInput(void) return in; } +template +std::vector TPoint::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TPoint::getOutput(void) { diff --git a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp index d6c21412..bb4f3f62 100644 --- a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp +++ b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp @@ -61,6 +61,7 @@ public: virtual ~TRBPrecCG(void) = default; // dependencies/products virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -84,11 +85,19 @@ TRBPrecCG::TRBPrecCG(const std::string name) template std::vector TRBPrecCG::getInput(void) { - std::vector in = {par().action}; + std::vector in = {}; return in; } +template +std::vector TRBPrecCG::getReference(void) +{ + std::vector ref = {par().action}; + + return ref; +} + template std::vector TRBPrecCG::getOutput(void) { @@ -115,7 +124,6 @@ void TRBPrecCG::setup(void) schurSolver(mat, source, sol); }; envCreate(SolverFn, getName(), Ls, solver); - env().addOwnership(getName(), par().action); } // execution /////////////////////////////////////////////////////////////////// diff --git a/extras/Hadrons/Modules/MSource/Point.hpp b/extras/Hadrons/Modules/MSource/Point.hpp index b9813688..3fab41c0 100644 --- a/extras/Hadrons/Modules/MSource/Point.hpp +++ b/extras/Hadrons/Modules/MSource/Point.hpp @@ -71,6 +71,7 @@ public: virtual ~TPoint(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -100,6 +101,14 @@ std::vector TPoint::getInput(void) return in; } +template +std::vector TPoint::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TPoint::getOutput(void) { diff --git a/extras/Hadrons/Modules/templates/Module.cc.template b/extras/Hadrons/Modules/templates/Module.cc.template index 0c509d6d..29edadfb 100644 --- a/extras/Hadrons/Modules/templates/Module.cc.template +++ b/extras/Hadrons/Modules/templates/Module.cc.template @@ -19,6 +19,14 @@ std::vector T___FILEBASENAME___::getInput(void) return in; } +template +std::vector T___FILEBASENAME___::getReference(void) +{ + std::vector in = {}; + + return in; +} + std::vector T___FILEBASENAME___::getOutput(void) { std::vector out = {getName()}; diff --git a/extras/Hadrons/Modules/templates/Module.hpp.template b/extras/Hadrons/Modules/templates/Module.hpp.template index fb43260f..b59e168f 100644 --- a/extras/Hadrons/Modules/templates/Module.hpp.template +++ b/extras/Hadrons/Modules/templates/Module.hpp.template @@ -26,6 +26,7 @@ public: virtual ~T___FILEBASENAME___(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); // setup virtual void setup(void); diff --git a/extras/Hadrons/Modules/templates/Module_in_NS.cc.template b/extras/Hadrons/Modules/templates/Module_in_NS.cc.template index 8b2a0ec0..880129bd 100644 --- a/extras/Hadrons/Modules/templates/Module_in_NS.cc.template +++ b/extras/Hadrons/Modules/templates/Module_in_NS.cc.template @@ -20,6 +20,14 @@ std::vector T___FILEBASENAME___::getInput(void) return in; } +template +std::vector T___FILEBASENAME___::getReference(void) +{ + std::vector in = {}; + + return in; +} + std::vector T___FILEBASENAME___::getOutput(void) { std::vector out = {getName()}; diff --git a/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template b/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template index ea77b12a..f90cb052 100644 --- a/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template +++ b/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template @@ -28,6 +28,7 @@ public: virtual ~T___FILEBASENAME___(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); // setup virtual void setup(void); diff --git a/extras/Hadrons/Modules/templates/Module_tmp.hpp.template b/extras/Hadrons/Modules/templates/Module_tmp.hpp.template index 2ee053a9..b4e7f87f 100644 --- a/extras/Hadrons/Modules/templates/Module_tmp.hpp.template +++ b/extras/Hadrons/Modules/templates/Module_tmp.hpp.template @@ -27,6 +27,7 @@ public: virtual ~T___FILEBASENAME___(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); // setup virtual void setup(void); diff --git a/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template b/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template index b79c0ad3..9aef1c92 100644 --- a/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template +++ b/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template @@ -29,6 +29,7 @@ public: virtual ~T___FILEBASENAME___(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); // setup virtual void setup(void); @@ -56,6 +57,14 @@ std::vector T___FILEBASENAME___::getInput(void) return in; } +template +std::vector T___FILEBASENAME___::getReference(void) +{ + std::vector in = {}; + + return in; +} + template std::vector T___FILEBASENAME___::getOutput(void) { diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc index ae1d5b6b..7f967f66 100644 --- a/extras/Hadrons/VirtualMachine.cc +++ b/extras/Hadrons/VirtualMachine.cc @@ -82,8 +82,7 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) m.data = std::move(pt); m.type = typeIdPt(*m.data.get()); m.name = name; - auto input = m.data->getInput(); - for (auto &in: input) + for (auto &in: m.data->getInput()) { if (!env().hasObject(in)) { @@ -91,11 +90,18 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) } m.input.push_back(env().getObjectAddress(in)); } - auto output = m.data->getOutput(); + for (auto &ref: m.data->getReference()) + { + if (!env().hasObject(ref)) + { + env().addObject(ref , -1); + } + m.input.push_back(env().getObjectAddress(ref)); + } module_.push_back(std::move(m)); address = static_cast(module_.size() - 1); moduleAddress_[name] = address; - for (auto &out: output) + for (auto &out: getModule(address)->getOutput()) { if (!env().hasObject(out)) { @@ -114,6 +120,25 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) + module_[env().getObjectModule(out)].name + "' (while pushing module '" + name + "')"); } + if (getModule(address)->getReference().size() > 0) + { + auto pred = [this, out](const ModuleInfo &n) + { + auto &in = n.input; + auto it = std::find(in.begin(), in.end(), env().getObjectAddress(out)); + + return (it != in.end()); + }; + auto it = std::find_if(module_.begin(), module_.end(), pred); + while (it != module_.end()) + { + for (auto &ref: getModule(address)->getReference()) + { + it->input.push_back(env().getObjectAddress(ref)); + } + it = std::find_if(++it, module_.end(), pred); + } + } } } } @@ -225,12 +250,17 @@ Graph VirtualMachine::makeModuleGraph(void) const { Graph moduleGraph; - for (unsigned int i = 0; i < module_.size(); ++i) + // create vertices + for (unsigned int m = 0; m < module_.size(); ++m) { - moduleGraph.addVertex(i); - for (auto &j: module_[i].input) + moduleGraph.addVertex(m); + } + // create edges + for (unsigned int m = 0; m < module_.size(); ++m) + { + for (auto &in: module_[m].input) { - moduleGraph.addEdge(env().getObjectModule(j), i); + moduleGraph.addEdge(env().getObjectModule(in), m); } } @@ -258,7 +288,6 @@ VirtualMachine::executeProgram(const std::vector &p) { Size memPeak = 0, sizeBefore, sizeAfter; std::vector> freeProg; - bool continueCollect, nothingFreed; // build garbage collection schedule LOG(Debug) << "Building garbage collection schedule..." << std::endl; @@ -307,25 +336,10 @@ VirtualMachine::executeProgram(const std::vector &p) { LOG(Message) << "Garbage collection..." << std::endl; } - nothingFreed = true; - do + for (auto &j: freeProg[i]) { - continueCollect = false; - auto toFree = freeProg[i]; - for (auto &j: toFree) - { - // continue garbage collection while there are still - // objects without owners - continueCollect = continueCollect or !env().hasOwners(j); - if(env().freeObject(j)) - { - // if an object has been freed, remove it from - // the garbage collection schedule - freeProg[i].erase(j); - nothingFreed = false; - } - } - } while (continueCollect); + env().freeObject(j); + } // free temporaries for (unsigned int i = 0; i < env().getMaxAddress(); ++i) { @@ -335,15 +349,6 @@ VirtualMachine::executeProgram(const std::vector &p) env().freeObject(i); } } - // any remaining objects in step i garbage collection schedule - // is scheduled for step i + 1 - if (i + 1 < p.size()) - { - for (auto &j: freeProg[i]) - { - freeProg[i + 1].insert(j); - } - } // print used memory after garbage collection if necessary if (!isDryRun()) { From 26d7b829a076fa74df370789e9f723f8b793fa67 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 12 Dec 2017 14:04:28 +0000 Subject: [PATCH 249/377] Hadrons: error managed through expections --- extras/Hadrons/Application.cc | 8 +-- extras/Hadrons/Environment.cc | 23 +++--- extras/Hadrons/Environment.hpp | 10 +-- extras/Hadrons/Exceptions.cc | 57 +++++++++++++++ extras/Hadrons/Exceptions.hpp | 72 +++++++++++++++++++ extras/Hadrons/Factory.hpp | 2 +- extras/Hadrons/Global.hpp | 7 +- extras/Hadrons/Graph.hpp | 12 ++-- extras/Hadrons/Makefile.am | 2 + extras/Hadrons/Module.cc | 2 +- .../Modules/MContraction/WardIdentity.hpp | 2 +- extras/Hadrons/Modules/MFermion/GaugeProp.hpp | 2 +- .../Modules/MUtilities/TestSeqConserved.hpp | 2 +- extras/Hadrons/VirtualMachine.cc | 14 ++-- extras/Hadrons/VirtualMachine.hpp | 2 +- 15 files changed, 174 insertions(+), 43 deletions(-) create mode 100644 extras/Hadrons/Exceptions.cc create mode 100644 extras/Hadrons/Exceptions.hpp diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index af67dff3..aa66d36f 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -123,11 +123,11 @@ void Application::parseParameterFile(const std::string parameterFileName) setPar(par); if (!push(reader, "modules")) { - HADRON_ERROR("Cannot open node 'modules' in parameter file '" + parameterFileName + "'"); + HADRON_ERROR(Parsing, "Cannot open node 'modules' in parameter file '" + parameterFileName + "'"); } if (!push(reader, "module")) { - HADRON_ERROR("Cannot open node 'modules/module' in parameter file '" + parameterFileName + "'"); + HADRON_ERROR(Parsing, "Cannot open node 'modules/module' in parameter file '" + parameterFileName + "'"); } do { @@ -262,7 +262,7 @@ void Application::saveSchedule(const std::string filename) if (!scheduled_) { - HADRON_ERROR("Computation not scheduled"); + HADRON_ERROR(Definition, "Computation not scheduled"); } LOG(Message) << "Saving current schedule to '" << filename << "'..." << std::endl; @@ -296,7 +296,7 @@ void Application::printSchedule(void) { if (!scheduled_) { - HADRON_ERROR("Computation not scheduled"); + HADRON_ERROR(Definition, "Computation not scheduled"); } LOG(Message) << "Schedule (memory peak: " << MEM_MSG(memPeak_) << "):" << std::endl; diff --git a/extras/Hadrons/Environment.cc b/extras/Hadrons/Environment.cc index 66291966..403476d0 100644 --- a/extras/Hadrons/Environment.cc +++ b/extras/Hadrons/Environment.cc @@ -35,6 +35,9 @@ using namespace Grid; using namespace QCD; using namespace Hadrons; +#define ERROR_NO_ADDRESS(address)\ +HADRON_ERROR(Definition, "no object with address " + std::to_string(address)); + /****************************************************************************** * Environment implementation * ******************************************************************************/ @@ -83,7 +86,7 @@ GridCartesian * Environment::getGrid(const unsigned int Ls) const } catch(std::out_of_range &) { - HADRON_ERROR("no grid with Ls= " << Ls); + HADRON_ERROR(Definition, "no grid with Ls= " + std::to_string(Ls)); } } @@ -102,7 +105,7 @@ GridRedBlackCartesian * Environment::getRbGrid(const unsigned int Ls) const } catch(std::out_of_range &) { - HADRON_ERROR("no red-black 5D grid with Ls= " << Ls); + HADRON_ERROR(Definition, "no red-black 5D grid with Ls= " + std::to_string(Ls)); } } @@ -152,7 +155,7 @@ void Environment::addObject(const std::string name, const int moduleAddress) } else { - HADRON_ERROR("object '" + name + "' already exists"); + HADRON_ERROR(Definition, "object '" + name + "' already exists"); } } @@ -175,7 +178,7 @@ unsigned int Environment::getObjectAddress(const std::string name) const } else { - HADRON_ERROR("no object with name '" + name + "'"); + HADRON_ERROR(Definition, "no object with name '" + name + "'"); } } @@ -187,7 +190,7 @@ std::string Environment::getObjectName(const unsigned int address) const } else { - HADRON_ERROR("no object with address " + std::to_string(address)); + ERROR_NO_ADDRESS(address); } } @@ -206,7 +209,7 @@ std::string Environment::getObjectType(const unsigned int address) const } else { - HADRON_ERROR("no object with address " + std::to_string(address)); + ERROR_NO_ADDRESS(address); } } @@ -223,7 +226,7 @@ Environment::Size Environment::getObjectSize(const unsigned int address) const } else { - HADRON_ERROR("no object with address " + std::to_string(address)); + ERROR_NO_ADDRESS(address); } } @@ -240,7 +243,7 @@ Environment::Storage Environment::getObjectStorage(const unsigned int address) c } else { - HADRON_ERROR("no object with address " + std::to_string(address)); + ERROR_NO_ADDRESS(address); } } @@ -257,7 +260,7 @@ int Environment::getObjectModule(const unsigned int address) const } else { - HADRON_ERROR("no object with address " + std::to_string(address)); + ERROR_NO_ADDRESS(address); } } @@ -274,7 +277,7 @@ unsigned int Environment::getObjectLs(const unsigned int address) const } else { - HADRON_ERROR("no object with address " + std::to_string(address)); + ERROR_NO_ADDRESS(address); } } diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index 811ee14e..60371c20 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -227,7 +227,7 @@ void Environment::createDerivedObject(const std::string name, } else { - HADRON_ERROR("object '" + name + "' already allocated"); + HADRON_ERROR(Definition, "object '" + name + "' already allocated"); } } @@ -253,20 +253,20 @@ T * Environment::getObject(const unsigned int address) const } else { - HADRON_ERROR("object with address " + std::to_string(address) + + HADRON_ERROR(Definition, "object with address " + std::to_string(address) + " does not have type '" + typeName(&typeid(T)) + "' (has type '" + getObjectType(address) + "')"); } } else { - HADRON_ERROR("object with address " + std::to_string(address) + + HADRON_ERROR(Definition, "object with address " + std::to_string(address) + " is empty"); } } else { - HADRON_ERROR("no object with address " + std::to_string(address)); + HADRON_ERROR(Definition, "no object with address " + std::to_string(address)); } } @@ -292,7 +292,7 @@ bool Environment::isObjectOfType(const unsigned int address) const } else { - HADRON_ERROR("no object with address " + std::to_string(address)); + HADRON_ERROR(Definition, "no object with address " + std::to_string(address)); } } diff --git a/extras/Hadrons/Exceptions.cc b/extras/Hadrons/Exceptions.cc new file mode 100644 index 00000000..bf532c21 --- /dev/null +++ b/extras/Hadrons/Exceptions.cc @@ -0,0 +1,57 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Exceptions.cc + +Copyright (C) 2017 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +#ifndef ERR_SUFF +#define ERR_SUFF " (" + loc + ")" +#endif + +#define CONST_EXC(name, init) \ +name::name(std::string msg, std::string loc)\ +:init\ +{} + +using namespace Grid; +using namespace Hadrons; +using namespace Exceptions; + +// logic errors +CONST_EXC(Logic, logic_error(msg + ERR_SUFF)) +CONST_EXC(Definition, Logic("definition error: " + msg, loc)) +CONST_EXC(Implementation, Logic("implementation error: " + msg, loc)) +CONST_EXC(Range, Logic("range error: " + msg, loc)) +CONST_EXC(Size, Logic("size error: " + msg, loc)) +// runtime errors +CONST_EXC(Runtime, runtime_error(msg + ERR_SUFF)) +CONST_EXC(Argument, Runtime("argument error: " + msg, loc)) +CONST_EXC(Io, Runtime("IO error: " + msg, loc)) +CONST_EXC(Memory, Runtime("memory error: " + msg, loc)) +CONST_EXC(Parsing, Runtime("parsing error: " + msg, loc)) +CONST_EXC(Program, Runtime("program error: " + msg, loc)) +CONST_EXC(System, Runtime("system error: " + msg, loc)) \ No newline at end of file diff --git a/extras/Hadrons/Exceptions.hpp b/extras/Hadrons/Exceptions.hpp new file mode 100644 index 00000000..8f04ab41 --- /dev/null +++ b/extras/Hadrons/Exceptions.hpp @@ -0,0 +1,72 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Exceptions.hpp + +Copyright (C) 2017 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#ifndef Hadrons_Exceptions_hpp_ +#define Hadrons_Exceptions_hpp_ + +#include +#ifndef Hadrons_Global_hpp_ +#include +#endif + +#define SRC_LOC std::string(__FUNCTION__) + " at " + std::string(__FILE__) + ":"\ + + std::to_string(__LINE__) +#define HADRON_ERROR(exc, msg)\ +LOG(Error) << msg << std::endl;\ +throw(Exceptions::exc(msg, SRC_LOC)); + +#define DECL_EXC(name, base) \ +class name: public base\ +{\ +public:\ + name(std::string msg, std::string loc);\ +} + +BEGIN_HADRONS_NAMESPACE + +namespace Exceptions +{ + // logic errors + DECL_EXC(Logic, std::logic_error); + DECL_EXC(Definition, Logic); + DECL_EXC(Implementation, Logic); + DECL_EXC(Range, Logic); + DECL_EXC(Size, Logic); + // runtime errors + DECL_EXC(Runtime, std::runtime_error); + DECL_EXC(Argument, Runtime); + DECL_EXC(Io, Runtime); + DECL_EXC(Memory, Runtime); + DECL_EXC(Parsing, Runtime); + DECL_EXC(Program, Runtime); + DECL_EXC(System, Runtime); +} + +END_HADRONS_NAMESPACE + +#endif // Hadrons_Exceptions_hpp_ diff --git a/extras/Hadrons/Factory.hpp b/extras/Hadrons/Factory.hpp index da86acae..65ce03ca 100644 --- a/extras/Hadrons/Factory.hpp +++ b/extras/Hadrons/Factory.hpp @@ -95,7 +95,7 @@ std::unique_ptr Factory::create(const std::string type, } catch (std::out_of_range &) { - HADRON_ERROR("object of type '" + type + "' unknown"); + HADRON_ERROR(Argument, "object of type '" + type + "' unknown"); } return func(name); diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index 4c37b961..c3d60bf2 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -100,11 +100,6 @@ public: }; #define LOG(channel) std::cout << HadronsLog##channel -#define HADRON_ERROR(msg)\ -LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\ - << __LINE__ << ")" << std::endl;\ -abort(); - #define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl; extern HadronsLogger HadronsLogError; @@ -176,4 +171,6 @@ typedef XmlWriter CorrWriter; END_HADRONS_NAMESPACE +#include + #endif // Hadrons_Global_hpp_ diff --git a/extras/Hadrons/Graph.hpp b/extras/Hadrons/Graph.hpp index bb9ae679..a9c240fa 100644 --- a/extras/Hadrons/Graph.hpp +++ b/extras/Hadrons/Graph.hpp @@ -185,7 +185,7 @@ void Graph::removeVertex(const T &value) } else { - HADRON_ERROR("vertex " << value << " does not exists"); + HADRON_ERROR(Range, "vertex does not exists"); } // remove all edges containing the vertex @@ -214,7 +214,7 @@ void Graph::removeEdge(const Edge &e) } else { - HADRON_ERROR("edge " << e << " does not exists"); + HADRON_ERROR(Range, "edge does not exists"); } } @@ -260,7 +260,7 @@ void Graph::mark(const T &value, const bool doMark) } else { - HADRON_ERROR("vertex " << value << " does not exists"); + HADRON_ERROR(Range, "vertex does not exists"); } } @@ -298,7 +298,7 @@ bool Graph::isMarked(const T &value) const } else { - HADRON_ERROR("vertex " << value << " does not exists"); + HADRON_ERROR(Range, "vertex does not exists"); return false; } @@ -544,7 +544,7 @@ std::vector Graph::topoSort(void) { if (tmpMarked.at(v)) { - HADRON_ERROR("cannot topologically sort a cyclic graph"); + HADRON_ERROR(Range, "cannot topologically sort a cyclic graph"); } if (!isMarked(v)) { @@ -603,7 +603,7 @@ std::vector Graph::topoSort(Gen &gen) { if (tmpMarked.at(v)) { - HADRON_ERROR("cannot topologically sort a cyclic graph"); + HADRON_ERROR(Range, "cannot topologically sort a cyclic graph"); } if (!isMarked(v)) { diff --git a/extras/Hadrons/Makefile.am b/extras/Hadrons/Makefile.am index 826cb158..3d07679a 100644 --- a/extras/Hadrons/Makefile.am +++ b/extras/Hadrons/Makefile.am @@ -7,6 +7,7 @@ libHadrons_a_SOURCES = \ $(modules_cc) \ Application.cc \ Environment.cc \ + Exceptions.cc \ Global.cc \ Module.cc \ VirtualMachine.cc @@ -15,6 +16,7 @@ nobase_libHadrons_a_HEADERS = \ $(modules_hpp) \ Application.hpp \ Environment.hpp \ + Exceptions.hpp \ Factory.hpp \ GeneticScheduler.hpp \ Global.hpp \ diff --git a/extras/Hadrons/Module.cc b/extras/Hadrons/Module.cc index bf596bfc..383a5c2e 100644 --- a/extras/Hadrons/Module.cc +++ b/extras/Hadrons/Module.cc @@ -50,7 +50,7 @@ std::string ModuleBase::getName(void) const // get factory registration name if available std::string ModuleBase::getRegisteredName(void) { - HADRON_ERROR("module '" + getName() + "' has a type not registered" + HADRON_ERROR(Definition, "module '" + getName() + "' has no registered type" + " in the factory"); } diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index a298c1a1..90922c27 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -118,7 +118,7 @@ void TWardIdentity::setup(void) Ls_ = env().getObjectLs(par().q); if (Ls_ != env().getObjectLs(par().action)) { - HADRON_ERROR("Ls mismatch between quark action and propagator"); + HADRON_ERROR(Size, "Ls mismatch between quark action and propagator"); } } diff --git a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp index f860c403..4d08841d 100644 --- a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp +++ b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp @@ -187,7 +187,7 @@ void TGaugeProp::execute(void) { if (Ls_ != env().getObjectLs(par().source)) { - HADRON_ERROR("Ls mismatch between quark action and source"); + HADRON_ERROR(Size, "Ls mismatch between quark action and source"); } else { diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp index f8714d88..b0f2846f 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp @@ -122,7 +122,7 @@ void TTestSeqConserved::setup(void) auto Ls = env().getObjectLs(par().q); if (Ls != env().getObjectLs(par().action)) { - HADRON_ERROR("Ls mismatch between quark action and propagator"); + HADRON_ERROR(Size, "Ls mismatch between quark action and propagator"); } } diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc index 7f967f66..0c3eca20 100644 --- a/extras/Hadrons/VirtualMachine.cc +++ b/extras/Hadrons/VirtualMachine.cc @@ -115,7 +115,7 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) } else { - HADRON_ERROR("object '" + out + HADRON_ERROR(Definition, "object '" + out + "' is already produced by module '" + module_[env().getObjectModule(out)].name + "' (while pushing module '" + name + "')"); @@ -144,7 +144,7 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) } else { - HADRON_ERROR("module '" + name + "' already exists"); + HADRON_ERROR(Definition, "module '" + name + "' already exists"); } } @@ -171,7 +171,7 @@ ModuleBase * VirtualMachine::getModule(const unsigned int address) const } else { - HADRON_ERROR("no module with address " + std::to_string(address)); + HADRON_ERROR(Definition, "no module with address " + std::to_string(address)); } } @@ -188,7 +188,7 @@ unsigned int VirtualMachine::getModuleAddress(const std::string name) const } else { - HADRON_ERROR("no module with name '" + name + "'"); + HADRON_ERROR(Definition, "no module with name '" + name + "'"); } } @@ -200,7 +200,7 @@ std::string VirtualMachine::getModuleName(const unsigned int address) const } else { - HADRON_ERROR("no module with address " + std::to_string(address)); + HADRON_ERROR(Definition, "no module with address " + std::to_string(address)); } } @@ -212,7 +212,7 @@ std::string VirtualMachine::getModuleType(const unsigned int address) const } else { - HADRON_ERROR("no module with address " + std::to_string(address)); + HADRON_ERROR(Definition, "no module with address " + std::to_string(address)); } } @@ -273,7 +273,7 @@ Graph VirtualMachine::makeModuleGraph(void) const // { // if (o.module < 0) // { -// HADRON_ERROR("object '" + o.name + "' does not have a creator"); +// HADRON_ERROR(Runtime, "object '" + o.name + "' does not have a creator"); // } // } // } diff --git a/extras/Hadrons/VirtualMachine.hpp b/extras/Hadrons/VirtualMachine.hpp index 357fdb5b..c5557add 100644 --- a/extras/Hadrons/VirtualMachine.hpp +++ b/extras/Hadrons/VirtualMachine.hpp @@ -147,7 +147,7 @@ M * VirtualMachine::getModule(const unsigned int address) const } else { - HADRON_ERROR("module '" + module_[address].name + HADRON_ERROR(Definition, "module '" + module_[address].name + "' does not have type " + typeid(M).name() + "(has type: " + getModuleType(address) + ")"); } From 259d504ef0325879d19d8283a4cd97a4dabd8c1d Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 12 Dec 2017 19:32:58 +0000 Subject: [PATCH 250/377] Hadrons: first full implementation of the module memory profiler --- extras/Hadrons/Application.cc | 5 +- extras/Hadrons/Environment.cc | 13 +- extras/Hadrons/Environment.hpp | 9 +- extras/Hadrons/Global.hpp | 2 +- extras/Hadrons/Module.hpp | 6 +- extras/Hadrons/Modules/MContraction/Meson.hpp | 2 +- extras/Hadrons/VirtualMachine.cc | 120 ++++++++++++++++++ extras/Hadrons/VirtualMachine.hpp | 22 +++- 8 files changed, 166 insertions(+), 13 deletions(-) diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index aa66d36f..135e4df4 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -92,9 +92,10 @@ void Application::run(void) { parseParameterFile(parameterFileName_); } - //vm().checkGraph(); - env().printContent(); vm().printContent(); + env().printContent(); + //vm().checkGraph(); + vm().memoryProfile(); if (!scheduled_) { schedule(); diff --git a/extras/Hadrons/Environment.cc b/extras/Hadrons/Environment.cc index 403476d0..6de13e86 100644 --- a/extras/Hadrons/Environment.cc +++ b/extras/Hadrons/Environment.cc @@ -369,6 +369,16 @@ void Environment::freeAll(void) } } +void Environment::protectObjects(const bool protect) +{ + protect_ = protect; +} + +bool Environment::objectsProtected(void) const +{ + return protect_; +} + // print environment content /////////////////////////////////////////////////// void Environment::printContent(void) const { @@ -376,6 +386,7 @@ void Environment::printContent(void) const for (unsigned int i = 0; i < object_.size(); ++i) { LOG(Debug) << std::setw(4) << i << ": " - << getObjectName(i) << std::endl; + << getObjectName(i) << " (" + << sizeString(getObjectSize(i)) << ")" << std::endl; } } diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index 60371c20..adea13ce 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -142,11 +142,14 @@ public: void freeObject(const unsigned int address); void freeObject(const std::string name); void freeAll(void); + void protectObjects(const bool protect); + bool objectsProtected(void) const; // print environment content void printContent(void) const; private: // general unsigned long int locVol_; + bool protect_{true}; // grids std::vector dim_; GridPt grid4d_; @@ -195,8 +198,8 @@ void Holder::reset(T *pt) // general memory management /////////////////////////////////////////////////// template void Environment::createDerivedObject(const std::string name, - const Environment::Storage storage, - const unsigned int Ls, + const Environment::Storage storage, + const unsigned int Ls, Ts && ... args) { if (!hasObject(name)) @@ -206,7 +209,7 @@ void Environment::createDerivedObject(const std::string name, unsigned int address = getObjectAddress(name); - if (!object_[address].data) + if (!object_[address].data or !objectsProtected()) { MemoryStats memStats; diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index c3d60bf2..ebfe94dc 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -36,7 +36,7 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #ifndef SITE_SIZE_TYPE -#define SITE_SIZE_TYPE unsigned int +#define SITE_SIZE_TYPE size_t #endif #define BEGIN_HADRONS_NAMESPACE \ diff --git a/extras/Hadrons/Module.hpp b/extras/Hadrons/Module.hpp index c6b58e9f..25c0ac05 100644 --- a/extras/Hadrons/Module.hpp +++ b/extras/Hadrons/Module.hpp @@ -160,12 +160,12 @@ public: // parse parameters virtual void parseParameters(XmlReader &reader, const std::string name) = 0; virtual void saveParameters(XmlWriter &writer, const std::string name) = 0; - // execution - void operator()(void); -protected: // setup virtual void setup(void) {}; virtual void execute(void) = 0; + // execution + void operator()(void); +protected: // environment shortcut DEFINE_ENV_ALIAS; // virtual machine shortcut diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 7d19feb8..3c179d44 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -134,7 +134,7 @@ std::vector TMeson::getReference(void) template std::vector TMeson::getOutput(void) { - std::vector output = {getName()}; + std::vector output = {}; return output; } diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc index 0c3eca20..15e53dbf 100644 --- a/extras/Hadrons/VirtualMachine.cc +++ b/extras/Hadrons/VirtualMachine.cc @@ -390,3 +390,123 @@ void VirtualMachine::printContent(void) const << getModuleName(i) << std::endl; } } + +// memory profile ////////////////////////////////////////////////////////////// +VirtualMachine::MemoryProfile VirtualMachine::memoryProfile(void) const +{ + bool protect = env().objectsProtected(); + bool hmsg = HadronsLogMessage.isActive(); + bool gmsg = GridLogMessage.isActive(); + bool err = HadronsLogError.isActive(); + MemoryProfile profile; + auto program = makeModuleGraph().topoSort(); + + profile.module.resize(getNModule()); + env().protectObjects(false); + GridLogMessage.Active(false); + HadronsLogMessage.Active(false); + HadronsLogError.Active(false); + for (auto it = program.rbegin(); it != program.rend(); ++it) + { + auto a = *it; + + if (profile.module[a].empty()) + { + LOG(Debug) << "Profiling memory for module '" << module_[a].name << "' (" << a << ")..." << std::endl; + memoryProfile(profile, a); + env().freeAll(); + } + } + env().protectObjects(protect); + GridLogMessage.Active(gmsg); + HadronsLogMessage.Active(hmsg); + HadronsLogError.Active(err); + LOG(Debug) << "Memory profile:" << std::endl; + LOG(Debug) << "----------------" << std::endl; + for (unsigned int a = 0; a < profile.module.size(); ++a) + { + LOG(Debug) << getModuleName(a) << " (" << a << ")" << std::endl; + for (auto &o: profile.module[a]) + { + LOG(Debug) << "|__ " << env().getObjectName(o.first) << " (" + << sizeString(o.second) << ")" << std::endl; + } + LOG(Debug) << std::endl; + } + LOG(Debug) << "----------------" << std::endl; + + return profile; +} + +void VirtualMachine::resizeProfile(MemoryProfile &profile) const +{ + if (env().getMaxAddress() > profile.object.size()) + { + MemoryPrint empty; + + empty.size = 0; + empty.module = -1; + profile.object.resize(env().getMaxAddress(), empty); + } +} + +void VirtualMachine::updateProfile(MemoryProfile &profile, + const unsigned int address) const +{ + resizeProfile(profile); + for (unsigned int a = 0; a < env().getMaxAddress(); ++a) + { + if (env().hasCreatedObject(a) and (profile.object[a].module == -1)) + { + profile.object[a].size = env().getObjectSize(a); + profile.object[a].module = address; + profile.module[address][a] = profile.object[a].size; + } + } +} + +void VirtualMachine::cleanEnvironment(MemoryProfile &profile) const +{ + resizeProfile(profile); + for (unsigned int a = 0; a < env().getMaxAddress(); ++a) + { + if (env().hasCreatedObject(a) and (profile.object[a].module == -1)) + { + env().freeObject(a); + } + } +} + +void VirtualMachine::memoryProfile(MemoryProfile &profile, + const unsigned int address) const +{ + auto m = getModule(address); + + LOG(Debug) << "Setting up module '" << m->getName() << "' (" << address << ")..." << std::endl; + + try + { + m->setup(); + updateProfile(profile, address); + } + catch (Exceptions::Definition &) + { + cleanEnvironment(profile); + for (auto &in: m->getInput()) + { + memoryProfile(profile, env().getObjectModule(in)); + } + for (auto &ref: m->getReference()) + { + memoryProfile(profile, env().getObjectModule(ref)); + } + m->setup(); + updateProfile(profile, address); + } +} + +void VirtualMachine::memoryProfile(MemoryProfile &profile, + const std::string name) const +{ + memoryProfile(profile, getModuleAddress(name)); +} diff --git a/extras/Hadrons/VirtualMachine.hpp b/extras/Hadrons/VirtualMachine.hpp index c5557add..56e5a8cf 100644 --- a/extras/Hadrons/VirtualMachine.hpp +++ b/extras/Hadrons/VirtualMachine.hpp @@ -51,8 +51,18 @@ class VirtualMachine { SINGLETON_DEFCTOR(VirtualMachine); public: - typedef SITE_SIZE_TYPE Size; - typedef std::unique_ptr ModPt; + typedef SITE_SIZE_TYPE Size; + typedef std::unique_ptr ModPt; + struct MemoryPrint + { + Size size; + unsigned int module; + }; + struct MemoryProfile + { + std::vector> module; + std::vector object; + }; private: struct ModuleInfo { @@ -100,12 +110,20 @@ public: void checkGraph(void) const; // print VM content void printContent(void) const; + // memory profile + MemoryProfile memoryProfile(void) const; // general execution Size executeProgram(const std::vector &p); Size executeProgram(const std::vector &p); private: // environment shortcut DEFINE_ENV_ALIAS; + // memory profile + void resizeProfile(MemoryProfile &profile) const; + void updateProfile(MemoryProfile &profile, const unsigned int address) const; + void cleanEnvironment(MemoryProfile &profile) const; + void memoryProfile(MemoryProfile &profile, const std::string name) const; + void memoryProfile(MemoryProfile &profile, const unsigned int address) const; private: // general bool dryRun_{false}, memoryProfile_{false}; From a9c8d7dad03f1b39acb5e081c3424d03ee035e07 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 13 Dec 2017 12:13:40 +0000 Subject: [PATCH 251/377] Hadrons: code cleaning --- extras/Hadrons/VirtualMachine.cc | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc index 15e53dbf..1f4772a6 100644 --- a/extras/Hadrons/VirtualMachine.cc +++ b/extras/Hadrons/VirtualMachine.cc @@ -79,21 +79,26 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) unsigned int address; ModuleInfo m; + // module registration ------------------------------------------------- m.data = std::move(pt); m.type = typeIdPt(*m.data.get()); m.name = name; + // input dependencies for (auto &in: m.data->getInput()) { if (!env().hasObject(in)) { + // if object does not exist, add it with no creator module env().addObject(in , -1); } m.input.push_back(env().getObjectAddress(in)); } + // reference dependencies for (auto &ref: m.data->getReference()) { if (!env().hasObject(ref)) { + // if object does not exist, add it with no creator module env().addObject(ref , -1); } m.input.push_back(env().getObjectAddress(ref)); @@ -101,20 +106,24 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) module_.push_back(std::move(m)); address = static_cast(module_.size() - 1); moduleAddress_[name] = address; + // connecting outputs to potential inputs ------------------------------ for (auto &out: getModule(address)->getOutput()) { if (!env().hasObject(out)) { + // output does not exists, add it env().addObject(out, address); } else { if (env().getObjectModule(env().getObjectAddress(out)) < 0) { + // output exists but without creator, correct it env().setObjectModule(env().getObjectAddress(out), address); } else { + // output already fully registered, error HADRON_ERROR(Definition, "object '" + out + "' is already produced by module '" + module_[env().getObjectModule(out)].name @@ -122,10 +131,14 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) } if (getModule(address)->getReference().size() > 0) { + // module has references, dependency should be propagated + // to children modules; find module with `out` as an input + // and add references to their input auto pred = [this, out](const ModuleInfo &n) { auto &in = n.input; - auto it = std::find(in.begin(), in.end(), env().getObjectAddress(out)); + auto it = std::find(in.begin(), in.end(), + env().getObjectAddress(out)); return (it != in.end()); }; @@ -154,7 +167,7 @@ unsigned int VirtualMachine::getNModule(void) const } void VirtualMachine::createModule(const std::string name, const std::string type, - XmlReader &reader) + XmlReader &reader) { auto &factory = ModuleFactory::getInstance(); auto pt = factory.create(type, name); @@ -267,17 +280,6 @@ Graph VirtualMachine::makeModuleGraph(void) const return moduleGraph; } -// void VirtualMachine::checkGraph(void) const -// { -// for (auto &o: object_) -// { -// if (o.module < 0) -// { -// HADRON_ERROR(Runtime, "object '" + o.name + "' does not have a creator"); -// } -// } -// } - // general execution /////////////////////////////////////////////////////////// #define BIG_SEP "===============" #define SEP "---------------" @@ -412,7 +414,8 @@ VirtualMachine::MemoryProfile VirtualMachine::memoryProfile(void) const if (profile.module[a].empty()) { - LOG(Debug) << "Profiling memory for module '" << module_[a].name << "' (" << a << ")..." << std::endl; + LOG(Debug) << "Profiling memory for module '" << module_[a].name + << "' (" << a << ")..." << std::endl; memoryProfile(profile, a); env().freeAll(); } From 61fc50d616674e198b503d177ed86adef0e2260b Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 13 Dec 2017 13:44:23 +0000 Subject: [PATCH 252/377] Hadrons: better organisation of the VM --- extras/Hadrons/Application.cc | 23 +- extras/Hadrons/Application.hpp | 2 - extras/Hadrons/Module.cc | 5 +- extras/Hadrons/VirtualMachine.cc | 403 +++++++++++++++--------------- extras/Hadrons/VirtualMachine.hpp | 47 ++-- 5 files changed, 236 insertions(+), 244 deletions(-) diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index 135e4df4..24618447 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -94,8 +94,6 @@ void Application::run(void) } vm().printContent(); env().printContent(); - //vm().checkGraph(); - vm().memoryProfile(); if (!scheduled_) { schedule(); @@ -185,11 +183,11 @@ GeneticScheduler::ObjFunc memPeak = \ void Application::schedule(void) { - DEFINE_MEMPEAK; + //DEFINE_MEMPEAK; // build module dependency graph LOG(Message) << "Building module graph..." << std::endl; - auto graph = vm().makeModuleGraph(); + auto graph = vm().getModuleGraph(); LOG(Debug) << "Module graph:" << std::endl; LOG(Debug) << graph << std::endl; auto con = graph.getConnectedComponents(); @@ -276,7 +274,7 @@ void Application::saveSchedule(const std::string filename) void Application::loadSchedule(const std::string filename) { - DEFINE_MEMPEAK; + //DEFINE_MEMPEAK; TextReader reader(filename); std::vector program; @@ -290,7 +288,7 @@ void Application::loadSchedule(const std::string filename) program_.push_back(vm().getModuleAddress(name)); } scheduled_ = true; - memPeak_ = memPeak(program_); + //memPeak_ = memPeak(program_); } void Application::printSchedule(void) @@ -323,16 +321,3 @@ void Application::configLoop(void) LOG(Message) << BIG_SEP << " End of measurement " << BIG_SEP << std::endl; env().freeAll(); } - -// memory profile ////////////////////////////////////////////////////////////// -void Application::memoryProfile(void) -{ - auto graph = vm().makeModuleGraph(); - auto program = graph.topoSort(); - bool msg; - - msg = HadronsLogMessage.isActive(); - HadronsLogMessage.Active(false); - - HadronsLogMessage.Active(msg); -} diff --git a/extras/Hadrons/Application.hpp b/extras/Hadrons/Application.hpp index 66488206..8d2537d0 100644 --- a/extras/Hadrons/Application.hpp +++ b/extras/Hadrons/Application.hpp @@ -102,8 +102,6 @@ private: DEFINE_ENV_ALIAS; // virtual machine shortcut DEFINE_VM_ALIAS; - // memory profile - void memoryProfile(void); private: long unsigned int locVol_; std::string parameterFileName_{""}; diff --git a/extras/Hadrons/Module.cc b/extras/Hadrons/Module.cc index 383a5c2e..e5ef0fe4 100644 --- a/extras/Hadrons/Module.cc +++ b/extras/Hadrons/Module.cc @@ -58,8 +58,5 @@ std::string ModuleBase::getRegisteredName(void) void ModuleBase::operator()(void) { setup(); - if (!vm().isDryRun()) - { - execute(); - } + execute(); } diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc index 1f4772a6..8667a51c 100644 --- a/extras/Hadrons/VirtualMachine.cc +++ b/extras/Hadrons/VirtualMachine.cc @@ -36,27 +36,6 @@ using namespace Hadrons; /****************************************************************************** * VirtualMachine implementation * ******************************************************************************/ -// dry run ///////////////////////////////////////////////////////////////////// -void VirtualMachine::dryRun(const bool isDry) -{ - dryRun_ = isDry; -} - -bool VirtualMachine::isDryRun(void) const -{ - return dryRun_; -} - -void VirtualMachine::memoryProfile(const bool doMemoryProfile) -{ - memoryProfile_ = doMemoryProfile; -} - -bool VirtualMachine::doMemoryProfile(void) const -{ - return memoryProfile_; -} - // trajectory counter ////////////////////////////////////////////////////////// void VirtualMachine::setTrajectory(const unsigned int traj) { @@ -259,40 +238,192 @@ bool VirtualMachine::hasModule(const std::string name) const return (moduleAddress_.find(name) != moduleAddress_.end()); } -Graph VirtualMachine::makeModuleGraph(void) const +// print VM content //////////////////////////////////////////////////////////// +void VirtualMachine::printContent(void) const { - Graph moduleGraph; + LOG(Debug) << "Modules: " << std::endl; + for (unsigned int i = 0; i < module_.size(); ++i) + { + LOG(Debug) << std::setw(4) << i << ": " + << getModuleName(i) << std::endl; + } +} + +// module graph //////////////////////////////////////////////////////////////// +Graph VirtualMachine::getModuleGraph(void) +{ + if (graphOutdated_) + { + makeModuleGraph(); + graphOutdated_ = false; + } + + return graph_; +} + +void VirtualMachine::makeModuleGraph(void) +{ + Graph graph; // create vertices for (unsigned int m = 0; m < module_.size(); ++m) { - moduleGraph.addVertex(m); + graph.addVertex(m); } // create edges for (unsigned int m = 0; m < module_.size(); ++m) { for (auto &in: module_[m].input) { - moduleGraph.addEdge(env().getObjectModule(in), m); + graph.addEdge(env().getObjectModule(in), m); } } - - return moduleGraph; + graph_ = graph; } -// general execution /////////////////////////////////////////////////////////// -#define BIG_SEP "===============" -#define SEP "---------------" -#define MEM_MSG(size) sizeString(size) - -VirtualMachine::Size -VirtualMachine::executeProgram(const std::vector &p) +// memory profile ////////////////////////////////////////////////////////////// +const VirtualMachine::MemoryProfile & VirtualMachine::getMemoryProfile(void) { - Size memPeak = 0, sizeBefore, sizeAfter; - std::vector> freeProg; + if (memoryProfileOutdated_) + { + makeMemoryProfile(); + memoryProfileOutdated_ = false; + } + + return profile_; +} + +void VirtualMachine::makeMemoryProfile(void) +{ + bool protect = env().objectsProtected(); + bool hmsg = HadronsLogMessage.isActive(); + bool gmsg = GridLogMessage.isActive(); + bool err = HadronsLogError.isActive(); + auto program = getModuleGraph().topoSort(); + + resetProfile(); + profile_.module.resize(getNModule()); + env().protectObjects(false); + GridLogMessage.Active(false); + HadronsLogMessage.Active(false); + HadronsLogError.Active(false); + for (auto it = program.rbegin(); it != program.rend(); ++it) + { + auto a = *it; + + if (profile_.module[a].empty()) + { + LOG(Debug) << "Profiling memory for module '" << module_[a].name + << "' (" << a << ")..." << std::endl; + memoryProfile(a); + env().freeAll(); + } + } + env().protectObjects(protect); + GridLogMessage.Active(gmsg); + HadronsLogMessage.Active(hmsg); + HadronsLogError.Active(err); + LOG(Debug) << "Memory profile:" << std::endl; + LOG(Debug) << "----------------" << std::endl; + for (unsigned int a = 0; a < profile_.module.size(); ++a) + { + LOG(Debug) << getModuleName(a) << " (" << a << ")" << std::endl; + for (auto &o: profile_.module[a]) + { + LOG(Debug) << "|__ " << env().getObjectName(o.first) << " (" + << sizeString(o.second) << ")" << std::endl; + } + LOG(Debug) << std::endl; + } + LOG(Debug) << "----------------" << std::endl; +} + +void VirtualMachine::resetProfile(void) +{ + profile_.module.clear(); + profile_.object.clear(); +} + +void VirtualMachine::resizeProfile(void) +{ + if (env().getMaxAddress() > profile_.object.size()) + { + MemoryPrint empty; + + empty.size = 0; + empty.module = -1; + profile_.object.resize(env().getMaxAddress(), empty); + } +} + +void VirtualMachine::updateProfile(const unsigned int address) +{ + resizeProfile(); + for (unsigned int a = 0; a < env().getMaxAddress(); ++a) + { + if (env().hasCreatedObject(a) and (profile_.object[a].module == -1)) + { + profile_.object[a].size = env().getObjectSize(a); + profile_.object[a].module = address; + profile_.module[address][a] = profile_.object[a].size; + if (env().getObjectModule(a) < 0) + { + env().setObjectModule(a, address); + } + } + } +} + +void VirtualMachine::cleanEnvironment(void) +{ + resizeProfile(); + for (unsigned int a = 0; a < env().getMaxAddress(); ++a) + { + if (env().hasCreatedObject(a) and (profile_.object[a].module == -1)) + { + env().freeObject(a); + } + } +} + +void VirtualMachine::memoryProfile(const unsigned int address) +{ + auto m = getModule(address); + + LOG(Debug) << "Setting up module '" << m->getName() + << "' (" << address << ")..." << std::endl; + try + { + m->setup(); + updateProfile(address); + } + catch (Exceptions::Definition &) + { + cleanEnvironment(); + for (auto &in: m->getInput()) + { + memoryProfile(env().getObjectModule(in)); + } + for (auto &ref: m->getReference()) + { + memoryProfile(env().getObjectModule(ref)); + } + m->setup(); + updateProfile(address); + } +} + +void VirtualMachine::memoryProfile(const std::string name) +{ + memoryProfile(getModuleAddress(name)); +} + +// garbage collector /////////////////////////////////////////////////////////// +VirtualMachine::GarbageSchedule +VirtualMachine::makeGarbageSchedule(const std::vector &p) const +{ + GarbageSchedule freeProg; - // build garbage collection schedule - LOG(Debug) << "Building garbage collection schedule..." << std::endl; freeProg.resize(p.size()); for (unsigned int i = 0; i < env().getMaxAddress(); ++i) { @@ -310,34 +441,42 @@ VirtualMachine::executeProgram(const std::vector &p) } } + return freeProg; +} + +// general execution /////////////////////////////////////////////////////////// +#define BIG_SEP "===============" +#define SEP "---------------" +#define MEM_MSG(size) sizeString(size) + +void VirtualMachine::executeProgram(const std::vector &p) const +{ + Size memPeak = 0, sizeBefore, sizeAfter; + GarbageSchedule freeProg; + + // build garbage collection schedule + LOG(Debug) << "Building garbage collection schedule..." << std::endl; + freeProg = makeGarbageSchedule(p); + // program execution LOG(Debug) << "Executing program..." << std::endl; for (unsigned int i = 0; i < p.size(); ++i) { // execute module - if (!isDryRun()) - { - LOG(Message) << SEP << " Measurement step " << i+1 << "/" - << p.size() << " (module '" << module_[p[i]].name - << "') " << SEP << std::endl; - } + LOG(Message) << SEP << " Measurement step " << i + 1 << "/" + << p.size() << " (module '" << module_[p[i]].name + << "') " << SEP << std::endl; (*module_[p[i]].data)(); sizeBefore = env().getTotalSize(); // print used memory after execution - if (!isDryRun()) - { - LOG(Message) << "Allocated objects: " << MEM_MSG(sizeBefore) - << std::endl; - } + LOG(Message) << "Allocated objects: " << MEM_MSG(sizeBefore) + << std::endl; if (sizeBefore > memPeak) { memPeak = sizeBefore; } // garbage collection for step i - if (!isDryRun()) - { - LOG(Message) << "Garbage collection..." << std::endl; - } + LOG(Message) << "Garbage collection..." << std::endl; for (auto &j: freeProg[i]) { env().freeObject(j); @@ -352,25 +491,20 @@ VirtualMachine::executeProgram(const std::vector &p) } } // print used memory after garbage collection if necessary - if (!isDryRun()) + sizeAfter = env().getTotalSize(); + if (sizeBefore != sizeAfter) { - sizeAfter = env().getTotalSize(); - if (sizeBefore != sizeAfter) - { - LOG(Message) << "Allocated objects: " << MEM_MSG(sizeAfter) - << std::endl; - } - else - { - LOG(Message) << "Nothing to free" << std::endl; - } + LOG(Message) << "Allocated objects: " << MEM_MSG(sizeAfter) + << std::endl; + } + else + { + LOG(Message) << "Nothing to free" << std::endl; } } - - return memPeak; } -VirtualMachine::Size VirtualMachine::executeProgram(const std::vector &p) +void VirtualMachine::executeProgram(const std::vector &p) const { std::vector pAddress; @@ -378,138 +512,5 @@ VirtualMachine::Size VirtualMachine::executeProgram(const std::vector profile.object.size()) - { - MemoryPrint empty; - - empty.size = 0; - empty.module = -1; - profile.object.resize(env().getMaxAddress(), empty); - } -} - -void VirtualMachine::updateProfile(MemoryProfile &profile, - const unsigned int address) const -{ - resizeProfile(profile); - for (unsigned int a = 0; a < env().getMaxAddress(); ++a) - { - if (env().hasCreatedObject(a) and (profile.object[a].module == -1)) - { - profile.object[a].size = env().getObjectSize(a); - profile.object[a].module = address; - profile.module[address][a] = profile.object[a].size; - } - } -} - -void VirtualMachine::cleanEnvironment(MemoryProfile &profile) const -{ - resizeProfile(profile); - for (unsigned int a = 0; a < env().getMaxAddress(); ++a) - { - if (env().hasCreatedObject(a) and (profile.object[a].module == -1)) - { - env().freeObject(a); - } - } -} - -void VirtualMachine::memoryProfile(MemoryProfile &profile, - const unsigned int address) const -{ - auto m = getModule(address); - - LOG(Debug) << "Setting up module '" << m->getName() << "' (" << address << ")..." << std::endl; - - try - { - m->setup(); - updateProfile(profile, address); - } - catch (Exceptions::Definition &) - { - cleanEnvironment(profile); - for (auto &in: m->getInput()) - { - memoryProfile(profile, env().getObjectModule(in)); - } - for (auto &ref: m->getReference()) - { - memoryProfile(profile, env().getObjectModule(ref)); - } - m->setup(); - updateProfile(profile, address); - } -} - -void VirtualMachine::memoryProfile(MemoryProfile &profile, - const std::string name) const -{ - memoryProfile(profile, getModuleAddress(name)); + executeProgram(pAddress); } diff --git a/extras/Hadrons/VirtualMachine.hpp b/extras/Hadrons/VirtualMachine.hpp index 56e5a8cf..88e70b55 100644 --- a/extras/Hadrons/VirtualMachine.hpp +++ b/extras/Hadrons/VirtualMachine.hpp @@ -51,8 +51,9 @@ class VirtualMachine { SINGLETON_DEFCTOR(VirtualMachine); public: - typedef SITE_SIZE_TYPE Size; - typedef std::unique_ptr ModPt; + typedef SITE_SIZE_TYPE Size; + typedef std::unique_ptr ModPt; + typedef std::vector> GarbageSchedule; struct MemoryPrint { Size size; @@ -73,11 +74,6 @@ private: size_t maxAllocated; }; public: - // dry run - void dryRun(const bool isDry); - bool isDryRun(void) const; - void memoryProfile(const bool doMemoryProfile); - bool doMemoryProfile(void) const; // trajectory counter void setTrajectory(const unsigned int traj); unsigned int getTrajectory(void) const; @@ -106,32 +102,47 @@ public: std::string getModuleNamespace(const std::string name) const; bool hasModule(const unsigned int address) const; bool hasModule(const std::string name) const; - Graph makeModuleGraph(void) const; - void checkGraph(void) const; // print VM content void printContent(void) const; + // module graph (could be a const reference if topoSort was const) + Graph getModuleGraph(void); // memory profile - MemoryProfile memoryProfile(void) const; + const MemoryProfile &getMemoryProfile(void); + // garbage collector + GarbageSchedule makeGarbageSchedule(const std::vector &p) const; + // high-water memory function + Size memoryNeeded(const std::vector &p, + const GarbageSchedule &g); + Size memoryNeeded(const std::vector &p); // general execution - Size executeProgram(const std::vector &p); - Size executeProgram(const std::vector &p); + void executeProgram(const std::vector &p) const; + void executeProgram(const std::vector &p) const; private: // environment shortcut DEFINE_ENV_ALIAS; + // module graph + void makeModuleGraph(void); // memory profile - void resizeProfile(MemoryProfile &profile) const; - void updateProfile(MemoryProfile &profile, const unsigned int address) const; - void cleanEnvironment(MemoryProfile &profile) const; - void memoryProfile(MemoryProfile &profile, const std::string name) const; - void memoryProfile(MemoryProfile &profile, const unsigned int address) const; + void makeMemoryProfile(void); + void resetProfile(void); + void resizeProfile(void); + void updateProfile(const unsigned int address); + void cleanEnvironment(void); + void memoryProfile(const std::string name); + void memoryProfile(const unsigned int address); private: // general - bool dryRun_{false}, memoryProfile_{false}; unsigned int traj_; // module and related maps std::vector module_; std::map moduleAddress_; std::string currentModule_{""}; + // module graph + bool graphOutdated_{true}; + Graph graph_; + // memory profile + bool memoryProfileOutdated_{true}; + MemoryProfile profile_; }; /****************************************************************************** From 0887566134b7cd7b1a4fb3af69180c0dd9dbed91 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 13 Dec 2017 16:36:15 +0000 Subject: [PATCH 253/377] Hadrons: scheduler back! --- extras/Hadrons/Application.cc | 110 +++-------------------- extras/Hadrons/Application.hpp | 31 +++---- extras/Hadrons/GeneticScheduler.hpp | 66 +++++++------- extras/Hadrons/VirtualMachine.cc | 134 +++++++++++++++++++++++----- extras/Hadrons/VirtualMachine.hpp | 34 ++++--- 5 files changed, 189 insertions(+), 186 deletions(-) diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index 24618447..9a3366d4 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -94,10 +94,7 @@ void Application::run(void) } vm().printContent(); env().printContent(); - if (!scheduled_) - { - schedule(); - } + schedule(); printSchedule(); configLoop(); } @@ -122,11 +119,13 @@ void Application::parseParameterFile(const std::string parameterFileName) setPar(par); if (!push(reader, "modules")) { - HADRON_ERROR(Parsing, "Cannot open node 'modules' in parameter file '" + parameterFileName + "'"); + HADRON_ERROR(Parsing, "Cannot open node 'modules' in parameter file '" + + parameterFileName + "'"); } if (!push(reader, "module")) { - HADRON_ERROR(Parsing, "Cannot open node 'modules/module' in parameter file '" + parameterFileName + "'"); + HADRON_ERROR(Parsing, "Cannot open node 'modules/module' in parameter file '" + + parameterFileName + "'"); } do { @@ -160,98 +159,13 @@ void Application::saveParameterFile(const std::string parameterFileName) } // schedule computation //////////////////////////////////////////////////////// -#define MEM_MSG(size)\ -sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)" - -#define DEFINE_MEMPEAK \ -GeneticScheduler::ObjFunc memPeak = \ -[this](const std::vector &program)\ -{\ - unsigned int memPeak;\ - bool msg;\ - \ - msg = HadronsLogMessage.isActive();\ - HadronsLogMessage.Active(false);\ - vm().dryRun(true);\ - memPeak = vm().executeProgram(program);\ - vm().dryRun(false);\ - env().freeAll();\ - HadronsLogMessage.Active(msg);\ - \ - return memPeak;\ -} - void Application::schedule(void) { - //DEFINE_MEMPEAK; - - // build module dependency graph - LOG(Message) << "Building module graph..." << std::endl; - auto graph = vm().getModuleGraph(); - LOG(Debug) << "Module graph:" << std::endl; - LOG(Debug) << graph << std::endl; - auto con = graph.getConnectedComponents(); - - // constrained topological sort using a genetic algorithm - // LOG(Message) << "Scheduling computation..." << std::endl; - // LOG(Message) << " #module= " << graph.size() << std::endl; - // LOG(Message) << " population size= " << par_.genetic.popSize << std::endl; - // LOG(Message) << " max. generation= " << par_.genetic.maxGen << std::endl; - // LOG(Message) << " max. cst. generation= " << par_.genetic.maxCstGen << std::endl; - // LOG(Message) << " mutation rate= " << par_.genetic.mutationRate << std::endl; - - // unsigned int k = 0, gen, prevPeak, nCstPeak = 0; - // std::random_device rd; - // GeneticScheduler::Parameters par; - - // par.popSize = par_.genetic.popSize; - // par.mutationRate = par_.genetic.mutationRate; - // par.seed = rd(); - // memPeak_ = 0; - // CartesianCommunicator::BroadcastWorld(0, &(par.seed), sizeof(par.seed)); - for (unsigned int i = 0; i < con.size(); ++i) + if (!scheduled_ and !loadedSchedule_) { - // GeneticScheduler scheduler(con[i], memPeak, par); - - // gen = 0; - // do - // { - // LOG(Debug) << "Generation " << gen << ":" << std::endl; - // scheduler.nextGeneration(); - // if (gen != 0) - // { - // if (prevPeak == scheduler.getMinValue()) - // { - // nCstPeak++; - // } - // else - // { - // nCstPeak = 0; - // } - // } - - // prevPeak = scheduler.getMinValue(); - // if (gen % 10 == 0) - // { - // LOG(Iterative) << "Generation " << gen << ": " - // << MEM_MSG(scheduler.getMinValue()) << std::endl; - // } - - // gen++; - // } while ((gen < par_.genetic.maxGen) - // and (nCstPeak < par_.genetic.maxCstGen)); - // auto &t = scheduler.getMinSchedule(); - // if (scheduler.getMinValue() > memPeak_) - // { - // memPeak_ = scheduler.getMinValue(); - // } - auto t = con[i].topoSort(); - for (unsigned int j = 0; j < t.size(); ++j) - { - program_.push_back(t[j]); - } + program_ = vm().schedule(par_.genetic); + scheduled_ = true; } - scheduled_ = true; } void Application::saveSchedule(const std::string filename) @@ -274,8 +188,6 @@ void Application::saveSchedule(const std::string filename) void Application::loadSchedule(const std::string filename) { - //DEFINE_MEMPEAK; - TextReader reader(filename); std::vector program; @@ -287,8 +199,7 @@ void Application::loadSchedule(const std::string filename) { program_.push_back(vm().getModuleAddress(name)); } - scheduled_ = true; - //memPeak_ = memPeak(program_); + loadedSchedule_ = true; } void Application::printSchedule(void) @@ -297,7 +208,8 @@ void Application::printSchedule(void) { HADRON_ERROR(Definition, "Computation not scheduled"); } - LOG(Message) << "Schedule (memory peak: " << MEM_MSG(memPeak_) << "):" + auto peak = vm().memoryNeeded(program_); + LOG(Message) << "Schedule (memory needed: " << sizeString(peak) << "):" << std::endl; for (unsigned int i = 0; i < program_.size(); ++i) { diff --git a/extras/Hadrons/Application.hpp b/extras/Hadrons/Application.hpp index 8d2537d0..4b2ce77b 100644 --- a/extras/Hadrons/Application.hpp +++ b/extras/Hadrons/Application.hpp @@ -50,25 +50,13 @@ public: unsigned int, end, unsigned int, step); }; - class GeneticPar: Serializable - { - public: - GeneticPar(void): - popSize{20}, maxGen{1000}, maxCstGen{100}, mutationRate{.1} {}; - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(GeneticPar, - unsigned int, popSize, - unsigned int, maxGen, - unsigned int, maxCstGen, - double , mutationRate); - }; class GlobalPar: Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar, - TrajRange, trajCounter, - GeneticPar, genetic, - std::string, seed); + TrajRange, trajCounter, + VirtualMachine::GeneticPar, genetic, + std::string, seed); }; public: // constructors @@ -103,12 +91,11 @@ private: // virtual machine shortcut DEFINE_VM_ALIAS; private: - long unsigned int locVol_; - std::string parameterFileName_{""}; - GlobalPar par_; - std::vector program_; - Environment::Size memPeak_; - bool scheduled_{false}; + long unsigned int locVol_; + std::string parameterFileName_{""}; + GlobalPar par_; + VirtualMachine::Program program_; + bool scheduled_{false}, loadedSchedule_{false}; }; /****************************************************************************** @@ -119,6 +106,7 @@ template void Application::createModule(const std::string name) { vm().createModule(name); + scheduled_ = false; } template @@ -126,6 +114,7 @@ void Application::createModule(const std::string name, const typename M::Par &par) { vm().createModule(name, par); + scheduled_ = false; } END_HADRONS_NAMESPACE diff --git a/extras/Hadrons/GeneticScheduler.hpp b/extras/Hadrons/GeneticScheduler.hpp index 3b0195e7..f199f1ed 100644 --- a/extras/Hadrons/GeneticScheduler.hpp +++ b/extras/Hadrons/GeneticScheduler.hpp @@ -38,13 +38,13 @@ BEGIN_HADRONS_NAMESPACE /****************************************************************************** * Scheduler based on a genetic algorithm * ******************************************************************************/ -template +template class GeneticScheduler { public: - typedef std::vector Gene; - typedef std::pair GenePair; - typedef std::function ObjFunc; + typedef std::vector Gene; + typedef std::pair GenePair; + typedef std::function ObjFunc; struct Parameters { double mutationRate; @@ -65,7 +65,7 @@ public: void benchmarkCrossover(const unsigned int nIt); // print population friend std::ostream & operator<<(std::ostream &out, - const GeneticScheduler &s) + const GeneticScheduler &s) { out << "["; for (auto &p: s.population_) @@ -87,19 +87,19 @@ private: void mutation(Gene &m, const Gene &c); private: - Graph &graph_; - const ObjFunc &func_; - const Parameters par_; - std::multimap population_; - std::mt19937 gen_; + Graph &graph_; + const ObjFunc &func_; + const Parameters par_; + std::multimap population_; + std::mt19937 gen_; }; /****************************************************************************** * template implementation * ******************************************************************************/ // constructor ///////////////////////////////////////////////////////////////// -template -GeneticScheduler::GeneticScheduler(Graph &graph, const ObjFunc &func, +template +GeneticScheduler::GeneticScheduler(Graph &graph, const ObjFunc &func, const Parameters &par) : graph_(graph) , func_(func) @@ -109,22 +109,22 @@ GeneticScheduler::GeneticScheduler(Graph &graph, const ObjFunc &func, } // access ////////////////////////////////////////////////////////////////////// -template -const typename GeneticScheduler::Gene & -GeneticScheduler::getMinSchedule(void) +template +const typename GeneticScheduler::Gene & +GeneticScheduler::getMinSchedule(void) { return population_.begin()->second; } -template -int GeneticScheduler::getMinValue(void) +template +int GeneticScheduler::getMinValue(void) { return population_.begin()->first; } // breed a new generation ////////////////////////////////////////////////////// -template -void GeneticScheduler::nextGeneration(void) +template +void GeneticScheduler::nextGeneration(void) { // random initialization of the population if necessary if (population_.size() != par_.popSize) @@ -158,8 +158,8 @@ void GeneticScheduler::nextGeneration(void) } // evolution steps ///////////////////////////////////////////////////////////// -template -void GeneticScheduler::initPopulation(void) +template +void GeneticScheduler::initPopulation(void) { population_.clear(); for (unsigned int i = 0; i < par_.popSize; ++i) @@ -170,8 +170,8 @@ void GeneticScheduler::initPopulation(void) } } -template -void GeneticScheduler::doCrossover(void) +template +void GeneticScheduler::doCrossover(void) { auto p = selectPair(); Gene &p1 = *(p.first), &p2 = *(p.second); @@ -185,8 +185,8 @@ void GeneticScheduler::doCrossover(void) } } -template -void GeneticScheduler::doMutation(void) +template +void GeneticScheduler::doMutation(void) { std::uniform_real_distribution mdis(0., 1.); std::uniform_int_distribution pdis(0, population_.size() - 1); @@ -206,8 +206,8 @@ void GeneticScheduler::doMutation(void) } // genetic operators /////////////////////////////////////////////////////////// -template -typename GeneticScheduler::GenePair GeneticScheduler::selectPair(void) +template +typename GeneticScheduler::GenePair GeneticScheduler::selectPair(void) { std::vector prob; unsigned int ind; @@ -233,8 +233,8 @@ typename GeneticScheduler::GenePair GeneticScheduler::selectPair(void) return std::make_pair(p1, p2); } -template -void GeneticScheduler::crossover(Gene &c1, Gene &c2, const Gene &p1, +template +void GeneticScheduler::crossover(Gene &c1, Gene &c2, const Gene &p1, const Gene &p2) { Gene buf; @@ -268,8 +268,8 @@ void GeneticScheduler::crossover(Gene &c1, Gene &c2, const Gene &p1, } } -template -void GeneticScheduler::mutation(Gene &m, const Gene &c) +template +void GeneticScheduler::mutation(Gene &m, const Gene &c) { Gene buf; std::uniform_int_distribution dis(0, c.size() - 1); @@ -298,8 +298,8 @@ void GeneticScheduler::mutation(Gene &m, const Gene &c) } } -template -void GeneticScheduler::benchmarkCrossover(const unsigned int nIt) +template +void GeneticScheduler::benchmarkCrossover(const unsigned int nIt) { Gene p1, p2, c1, c2; double neg = 0., eq = 0., pos = 0., total; diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc index 8667a51c..8a6bd149 100644 --- a/extras/Hadrons/VirtualMachine.cc +++ b/extras/Hadrons/VirtualMachine.cc @@ -27,6 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution directo /* END LEGAL */ #include +#include #include using namespace Grid; @@ -133,6 +134,8 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) } } } + graphOutdated_ = true; + memoryProfileOutdated_ = true; } else { @@ -364,6 +367,7 @@ void VirtualMachine::updateProfile(const unsigned int address) if (env().hasCreatedObject(a) and (profile_.object[a].module == -1)) { profile_.object[a].size = env().getObjectSize(a); + profile_.object[a].storage = env().getObjectStorage(a); profile_.object[a].module = address; profile_.module[address][a] = profile_.object[a].size; if (env().getObjectModule(a) < 0) @@ -419,37 +423,130 @@ void VirtualMachine::memoryProfile(const std::string name) } // garbage collector /////////////////////////////////////////////////////////// -VirtualMachine::GarbageSchedule -VirtualMachine::makeGarbageSchedule(const std::vector &p) const +VirtualMachine::GarbageSchedule +VirtualMachine::makeGarbageSchedule(const Program &p) const { GarbageSchedule freeProg; freeProg.resize(p.size()); - for (unsigned int i = 0; i < env().getMaxAddress(); ++i) + for (unsigned int a = 0; a < env().getMaxAddress(); ++a) { - auto pred = [i, this](const unsigned int j) + if (env().getObjectStorage(a) == Environment::Storage::temporary) { - auto &in = module_[j].input; - auto it = std::find(in.begin(), in.end(), i); - - return (it != in.end()) or (j == env().getObjectModule(i)); - }; - auto it = std::find_if(p.rbegin(), p.rend(), pred); - if (it != p.rend()) + auto it = std::find(p.begin(), p.end(), env().getObjectModule(a)); + + if (it != p.end()) + { + freeProg[std::distance(p.begin(), it)].insert(a); + } + } + else if (env().getObjectStorage(a) == Environment::Storage::object) { - freeProg[std::distance(it, p.rend()) - 1].insert(i); + auto pred = [a, this](const unsigned int b) + { + auto &in = module_[b].input; + auto it = std::find(in.begin(), in.end(), a); + + return (it != in.end()) or (b == env().getObjectModule(a)); + }; + auto it = std::find_if(p.rbegin(), p.rend(), pred); + if (it != p.rend()) + { + freeProg[std::distance(it, p.rend()) - 1].insert(a); + } } } return freeProg; } +// high-water memory function ////////////////////////////////////////////////// +VirtualMachine::Size VirtualMachine::memoryNeeded(const Program &p) +{ + const MemoryProfile &profile = getMemoryProfile(); + GarbageSchedule freep = makeGarbageSchedule(p); + Size current = 0, max = 0; + + for (unsigned int i = 0; i < p.size(); ++i) + { + for (auto &o: profile.module[p[i]]) + { + current += o.second; + } + max = std::max(current, max); + for (auto &o: freep[i]) + { + current -= profile.object[o].size; + } + } + + return max; +} + +// genetic scheduler /////////////////////////////////////////////////////////// +VirtualMachine::Program VirtualMachine::schedule(const GeneticPar &par) +{ + typedef GeneticScheduler Scheduler; + + auto graph = getModuleGraph(); + + //constrained topological sort using a genetic algorithm + LOG(Message) << "Scheduling computation..." << std::endl; + LOG(Message) << " #module= " << graph.size() << std::endl; + LOG(Message) << " population size= " << par.popSize << std::endl; + LOG(Message) << " max. generation= " << par.maxGen << std::endl; + LOG(Message) << " max. cst. generation= " << par.maxCstGen << std::endl; + LOG(Message) << " mutation rate= " << par.mutationRate << std::endl; + + unsigned int k = 0, gen, prevPeak, nCstPeak = 0; + std::random_device rd; + Scheduler::Parameters gpar; + + gpar.popSize = par.popSize; + gpar.mutationRate = par.mutationRate; + gpar.seed = rd(); + CartesianCommunicator::BroadcastWorld(0, &(gpar.seed), sizeof(gpar.seed)); + Scheduler::ObjFunc memPeak = [this](const Program &p)->Size + { + return memoryNeeded(p); + }; + Scheduler scheduler(graph, memPeak, gpar); + gen = 0; + do + { + LOG(Debug) << "Generation " << gen << ":" << std::endl; + scheduler.nextGeneration(); + if (gen != 0) + { + if (prevPeak == scheduler.getMinValue()) + { + nCstPeak++; + } + else + { + nCstPeak = 0; + } + } + + prevPeak = scheduler.getMinValue(); + if (gen % 10 == 0) + { + LOG(Iterative) << "Generation " << gen << ": " + << sizeString(scheduler.getMinValue()) << std::endl; + } + + gen++; + } while ((gen < par.maxGen) and (nCstPeak < par.maxCstGen)); + + return scheduler.getMinSchedule(); +} + // general execution /////////////////////////////////////////////////////////// #define BIG_SEP "===============" #define SEP "---------------" #define MEM_MSG(size) sizeString(size) -void VirtualMachine::executeProgram(const std::vector &p) const +void VirtualMachine::executeProgram(const Program &p) const { Size memPeak = 0, sizeBefore, sizeAfter; GarbageSchedule freeProg; @@ -481,15 +578,6 @@ void VirtualMachine::executeProgram(const std::vector &p) const { env().freeObject(j); } - // free temporaries - for (unsigned int i = 0; i < env().getMaxAddress(); ++i) - { - if ((env().getObjectStorage(i) == Environment::Storage::temporary) - and env().hasCreatedObject(i)) - { - env().freeObject(i); - } - } // print used memory after garbage collection if necessary sizeAfter = env().getTotalSize(); if (sizeBefore != sizeAfter) @@ -506,7 +594,7 @@ void VirtualMachine::executeProgram(const std::vector &p) const void VirtualMachine::executeProgram(const std::vector &p) const { - std::vector pAddress; + Program pAddress; for (auto &n: p) { diff --git a/extras/Hadrons/VirtualMachine.hpp b/extras/Hadrons/VirtualMachine.hpp index 88e70b55..a411c108 100644 --- a/extras/Hadrons/VirtualMachine.hpp +++ b/extras/Hadrons/VirtualMachine.hpp @@ -51,19 +51,33 @@ class VirtualMachine { SINGLETON_DEFCTOR(VirtualMachine); public: - typedef SITE_SIZE_TYPE Size; - typedef std::unique_ptr ModPt; - typedef std::vector> GarbageSchedule; + typedef SITE_SIZE_TYPE Size; + typedef std::unique_ptr ModPt; + typedef std::vector> GarbageSchedule; + typedef std::vector Program; struct MemoryPrint { - Size size; - unsigned int module; + Size size; + Environment::Storage storage; + unsigned int module; }; struct MemoryProfile { std::vector> module; std::vector object; }; + class GeneticPar: Serializable + { + public: + GeneticPar(void): + popSize{20}, maxGen{1000}, maxCstGen{100}, mutationRate{.1} {}; + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(GeneticPar, + unsigned int, popSize, + unsigned int, maxGen, + unsigned int, maxCstGen, + double , mutationRate); + }; private: struct ModuleInfo { @@ -109,13 +123,13 @@ public: // memory profile const MemoryProfile &getMemoryProfile(void); // garbage collector - GarbageSchedule makeGarbageSchedule(const std::vector &p) const; + GarbageSchedule makeGarbageSchedule(const Program &p) const; // high-water memory function - Size memoryNeeded(const std::vector &p, - const GarbageSchedule &g); - Size memoryNeeded(const std::vector &p); + Size memoryNeeded(const Program &p); + // genetic scheduler + Program schedule(const GeneticPar &par); // general execution - void executeProgram(const std::vector &p) const; + void executeProgram(const Program &p) const; void executeProgram(const std::vector &p) const; private: // environment shortcut From 842754bea9f8c4ba42c9295854342c6857f061bf Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 13 Dec 2017 19:41:41 +0000 Subject: [PATCH 254/377] Hadrons: most modules ported to the new interface, compiles but untested --- extras/Hadrons/Module.hpp | 4 +- extras/Hadrons/Modules.hpp | 38 +++++------ extras/Hadrons/Modules/MAction/DWF.hpp | 11 ++-- extras/Hadrons/Modules/MAction/Wilson.hpp | 1 + .../Hadrons/Modules/MContraction/Baryon.hpp | 32 +++++++--- .../Hadrons/Modules/MContraction/DiscLoop.hpp | 17 +++-- .../Hadrons/Modules/MContraction/Gamma3pt.hpp | 21 +++++-- extras/Hadrons/Modules/MContraction/Meson.hpp | 11 ++-- .../Modules/MContraction/WardIdentity.hpp | 36 ++++++++--- .../Modules/MContraction/WeakHamiltonian.hpp | 1 + .../MContraction/WeakHamiltonianEye.cc | 41 ++++++++---- .../MContraction/WeakHamiltonianNonEye.cc | 44 ++++++++----- .../MContraction/WeakNeutral4ptDisc.cc | 39 ++++++++---- extras/Hadrons/Modules/MFermion/GaugeProp.hpp | 16 ++--- extras/Hadrons/Modules/MGauge/Load.cc | 19 ++++-- extras/Hadrons/Modules/MGauge/Load.hpp | 1 + extras/Hadrons/Modules/MGauge/Random.cc | 16 ++++- extras/Hadrons/Modules/MGauge/Random.hpp | 1 + extras/Hadrons/Modules/MGauge/StochEm.cc | 29 +++++---- extras/Hadrons/Modules/MGauge/StochEm.hpp | 1 + extras/Hadrons/Modules/MGauge/Unit.cc | 1 + extras/Hadrons/Modules/MLoop/NoiseLoop.hpp | 18 ++++-- extras/Hadrons/Modules/MSink/Point.hpp | 13 ++-- extras/Hadrons/Modules/MSink/Smear.hpp | 23 ++++--- extras/Hadrons/Modules/MSource/Point.hpp | 11 ++-- .../Hadrons/Modules/MSource/SeqConserved.hpp | 17 +++-- extras/Hadrons/Modules/MSource/SeqGamma.hpp | 53 +++++++++++----- extras/Hadrons/Modules/MSource/Wall.hpp | 49 ++++++++++----- extras/Hadrons/Modules/MSource/Z2.hpp | 38 ++++++++--- .../Modules/MUtilities/TestSeqConserved.hpp | 44 ++++++++----- .../Modules/MUtilities/TestSeqGamma.hpp | 28 ++++++--- extras/Hadrons/modules.inc | 63 +++++++++---------- tests/hadrons/Test_hadrons.hpp | 40 ++++++------ 33 files changed, 504 insertions(+), 273 deletions(-) diff --git a/extras/Hadrons/Module.hpp b/extras/Hadrons/Module.hpp index 25c0ac05..390573d8 100644 --- a/extras/Hadrons/Module.hpp +++ b/extras/Hadrons/Module.hpp @@ -92,8 +92,8 @@ static ns##mod##ModuleRegistrar ns##mod##ModuleRegistrarInstance; #define envGet(type, name)\ *env().template getObject(name) -#define envGetTmp(type, name)\ -*env().template getObject(getName() + "_tmp_" + name) +#define envGetTmp(type, var)\ +type &var = *env().template getObject(getName() + "_tmp_" + #var) #define envHasType(type, name)\ env().template isObjectOfType(name) diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index bb574a14..61a20058 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -30,31 +30,31 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include -// #include -// #include -// #include +#include +#include +#include #include -// #include -// #include -// #include -// #include -// #include +#include +#include +#include +#include +#include #include -// #include -// #include -// #include +#include +#include +#include #include -// #include +#include // #include // #include // #include #include -// #include +#include #include #include -// #include -// #include -// #include -// #include -// #include -// #include +#include +#include +#include +#include +#include +#include diff --git a/extras/Hadrons/Modules/MAction/DWF.hpp b/extras/Hadrons/Modules/MAction/DWF.hpp index 91e4ec94..0cb9a4cb 100644 --- a/extras/Hadrons/Modules/MAction/DWF.hpp +++ b/extras/Hadrons/Modules/MAction/DWF.hpp @@ -119,12 +119,13 @@ void TDWF::setup(void) << std::endl; LOG(Message) << "Fermion boundary conditions: " << par().boundary << std::endl; + env().createGrid(par().Ls); - auto &U = envGet(LatticeGaugeField, par().gauge); - auto &g4 = *env().getGrid(); - auto &grb4 = *env().getRbGrid(); - auto &g5 = *env().getGrid(par().Ls); - auto &grb5 = *env().getRbGrid(par().Ls); + auto &U = envGet(LatticeGaugeField, par().gauge); + auto &g4 = *env().getGrid(); + auto &grb4 = *env().getRbGrid(); + auto &g5 = *env().getGrid(par().Ls); + auto &grb5 = *env().getRbGrid(par().Ls); std::vector boundary = strToVec(par().boundary); typename DomainWallFermion::ImplParams implParams(boundary); envCreateDerived(FMat, DomainWallFermion, getName(), par().Ls, U, g5, diff --git a/extras/Hadrons/Modules/MAction/Wilson.hpp b/extras/Hadrons/Modules/MAction/Wilson.hpp index 1ca3bf59..a6b3f0d6 100644 --- a/extras/Hadrons/Modules/MAction/Wilson.hpp +++ b/extras/Hadrons/Modules/MAction/Wilson.hpp @@ -115,6 +115,7 @@ void TWilson::setup(void) << " using gauge field '" << par().gauge << "'" << std::endl; LOG(Message) << "Fermion boundary conditions: " << par().boundary << std::endl; + auto &U = envGet(LatticeGaugeField, par().gauge); auto &grid = *env().getGrid(); auto &gridRb = *env().getRbGrid(); diff --git a/extras/Hadrons/Modules/MContraction/Baryon.hpp b/extras/Hadrons/Modules/MContraction/Baryon.hpp index da927391..28f6aa51 100644 --- a/extras/Hadrons/Modules/MContraction/Baryon.hpp +++ b/extras/Hadrons/Modules/MContraction/Baryon.hpp @@ -71,8 +71,11 @@ public: virtual ~TBaryon(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: + // setup + virtual void setup(void); // execution virtual void execute(void); }; @@ -97,14 +100,29 @@ std::vector TBaryon::getInput(void) return input; } +template +std::vector TBaryon::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TBaryon::getOutput(void) { - std::vector out = {getName()}; + std::vector out = {}; return out; } +// setup /////////////////////////////////////////////////////////////////////// +template +void TBaryon::setup(void) +{ + envTmpLat(LatticeComplex, "c"); +} + // execution /////////////////////////////////////////////////////////////////// template void TBaryon::execute(void) @@ -113,12 +131,12 @@ void TBaryon::execute(void) << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" << par().q3 << "'" << std::endl; - CorrWriter writer(par().output); - PropagatorField1 &q1 = *env().template getObject(par().q1); - PropagatorField2 &q2 = *env().template getObject(par().q2); - PropagatorField3 &q3 = *env().template getObject(par().q2); - LatticeComplex c(env().getGrid()); - Result result; + CorrWriter writer(par().output); + auto &q1 = envGet(PropagatorField1, par().q1); + auto &q2 = envGet(PropagatorField2, par().q2); + auto &q3 = envGet(PropagatorField3, par().q2); + envGetTmp(LatticeComplex, c); + Result result; // FIXME: do contractions diff --git a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp index f8da3943..c0fbe296 100644 --- a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp +++ b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp @@ -67,6 +67,7 @@ public: virtual ~TDiscLoop(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -95,10 +96,18 @@ std::vector TDiscLoop::getInput(void) return in; } +template +std::vector TDiscLoop::getReference(void) +{ + std::vector out = {}; + + return out; +} + template std::vector TDiscLoop::getOutput(void) { - std::vector out = {getName()}; + std::vector out = {}; return out; } @@ -107,7 +116,7 @@ std::vector TDiscLoop::getOutput(void) template void TDiscLoop::setup(void) { - + envTmpLat(LatticeComplex, "c"); } // execution /////////////////////////////////////////////////////////////////// @@ -119,12 +128,12 @@ void TDiscLoop::execute(void) << " insertion." << std::endl; CorrWriter writer(par().output); - PropagatorField &q_loop = *env().template getObject(par().q_loop); - LatticeComplex c(env().getGrid()); + auto &q_loop = envGet(PropagatorField, par().q_loop); Gamma gamma(par().gamma); std::vector buf; Result result; + envGetTmp(LatticeComplex, c); c = trace(gamma*q_loop); sliceSum(c, buf, Tp); diff --git a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp index a8653186..4a6baf3e 100644 --- a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp +++ b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp @@ -98,6 +98,7 @@ public: virtual ~TGamma3pt(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -126,10 +127,18 @@ std::vector TGamma3pt::getInput(void) return in; } +template +std::vector TGamma3pt::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TGamma3pt::getOutput(void) { - std::vector out = {getName()}; + std::vector out = {}; return out; } @@ -138,7 +147,7 @@ std::vector TGamma3pt::getOutput(void) template void TGamma3pt::setup(void) { - + envTmpLat(LatticeComplex, "c"); } // execution /////////////////////////////////////////////////////////////////// @@ -153,10 +162,9 @@ void TGamma3pt::execute(void) // Initialise variables. q2 and q3 are normal propagators, q1 may be // sink smeared. CorrWriter writer(par().output); - SlicedPropagator1 &q1 = *env().template getObject(par().q1); - PropagatorField2 &q2 = *env().template getObject(par().q2); - PropagatorField3 &q3 = *env().template getObject(par().q3); - LatticeComplex c(env().getGrid()); + auto &q1 = envGet(SlicedPropagator1, par().q1); + auto &q2 = envGet(PropagatorField2, par().q2); + auto &q3 = envGet(PropagatorField2, par().q3); Gamma g5(Gamma::Algebra::Gamma5); Gamma gamma(par().gamma); std::vector buf; @@ -165,6 +173,7 @@ void TGamma3pt::execute(void) // Extract relevant timeslice of sinked propagator q1, then contract & // sum over all spacial positions of gamma insertion. SitePropagator1 q1Snk = q1[par().tSnk]; + envGetTmp(LatticeComplex, c); c = trace(g5*q1Snk*adj(q2)*(g5*gamma)*q3); sliceSum(c, buf, Tp); diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 3c179d44..1fd86d3a 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -161,6 +161,7 @@ void TMeson::parseGammaString(std::vector &gammaList) // Parse individual contractions from input string. gammaList = strToVec(par().gammas); } + envTmpLat(LatticeComplex, "c"); } // execution /////////////////////////////////////////////////////////////////// @@ -192,8 +193,8 @@ void TMeson::execute(void) if (envHasType(SlicedPropagator1, par().q1) and envHasType(SlicedPropagator2, par().q2)) { - SlicedPropagator1 &q1 = envGet(SlicedPropagator1, par().q1); - SlicedPropagator2 &q2 = envGet(SlicedPropagator2, par().q2); + auto &q1 = envGet(SlicedPropagator1, par().q1); + auto &q2 = envGet(SlicedPropagator2, par().q2); LOG(Message) << "(propagator already sinked)" << std::endl; for (unsigned int i = 0; i < result.size(); ++i) @@ -209,10 +210,10 @@ void TMeson::execute(void) } else { - PropagatorField1 &q1 = envGet(PropagatorField1, par().q1); - PropagatorField2 &q2 = envGet(PropagatorField2, par().q2); - LatticeComplex c(env().getGrid()); + auto &q1 = envGet(PropagatorField1, par().q1); + auto &q2 = envGet(PropagatorField2, par().q2); + envGetTmp(LatticeComplex, c); LOG(Message) << "(using sink '" << par().sink << "')" << std::endl; for (unsigned int i = 0; i < result.size(); ++i) { diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index 90922c27..c92c7243 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -73,6 +73,7 @@ public: virtual ~TWardIdentity(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -103,10 +104,18 @@ std::vector TWardIdentity::getInput(void) return in; } +template +std::vector TWardIdentity::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TWardIdentity::getOutput(void) { - std::vector out = {getName()}; + std::vector out = {}; return out; } @@ -120,6 +129,15 @@ void TWardIdentity::setup(void) { HADRON_ERROR(Size, "Ls mismatch between quark action and propagator"); } + envTmpLat(PropagatorField, "tmp"); + envTmpLat(PropagatorField, "vector_WI"); + if (par().test_axial) + { + envTmpLat(PropagatorField, "psi"); + envTmpLat(LatticeComplex, "PP"); + envTmpLat(LatticeComplex, "axial_defect"); + envTmpLat(LatticeComplex, "PJ5q"); + } } // execution /////////////////////////////////////////////////////////////////// @@ -129,12 +147,13 @@ void TWardIdentity::execute(void) LOG(Message) << "Performing Ward Identity checks for quark '" << par().q << "'." << std::endl; - PropagatorField tmp(env().getGrid()), vector_WI(env().getGrid()); - PropagatorField &q = *env().template getObject(par().q); - FMat &act = *(env().template getObject(par().action)); - Gamma g5(Gamma::Algebra::Gamma5); + auto &q = envGet(PropagatorField, par().q); + auto &act = envGet(FMat, par().action); + Gamma g5(Gamma::Algebra::Gamma5); // Compute D_mu V_mu, D here is backward derivative. + envGetTmp(PropagatorField, tmp); + envGetTmp(PropagatorField, vector_WI); vector_WI = zero; for (unsigned int mu = 0; mu < Nd; ++mu) { @@ -149,9 +168,10 @@ void TWardIdentity::execute(void) if (par().test_axial) { - PropagatorField psi(env().getGrid()); - LatticeComplex PP(env().getGrid()), axial_defect(env().getGrid()), - PJ5q(env().getGrid()); + envGetTmp(PropagatorField, psi); + envGetTmp(LatticeComplex, PP); + envGetTmp(LatticeComplex, axial_defect); + envGetTmp(LatticeComplex, PJ5q); std::vector axial_buf; // Compute , D is backwards derivative. diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp index 7df40370..2b53c87a 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp @@ -99,6 +99,7 @@ public:\ virtual ~T##modname(void) = default;\ /* dependency relation */ \ virtual std::vector getInput(void);\ + virtual std::vector getReference(void);\ virtual std::vector getOutput(void);\ public:\ std::vector VA_label = {"V", "A"};\ diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc index 314b080a..7a73a7e3 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc @@ -74,9 +74,16 @@ std::vector TWeakHamiltonianEye::getInput(void) return in; } +std::vector TWeakHamiltonianEye::getReference(void) +{ + std::vector out = {}; + + return out; +} + std::vector TWeakHamiltonianEye::getOutput(void) { - std::vector out = {getName()}; + std::vector out = {}; return out; } @@ -84,7 +91,15 @@ std::vector TWeakHamiltonianEye::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// void TWeakHamiltonianEye::setup(void) { + unsigned int ndim = env().getNd(); + envTmpLat(LatticeComplex, "expbuf"); + envTmpLat(PropagatorField, "tmp1"); + envTmpLat(LatticeComplex, "tmp2"); + envTmp(std::vector, "S_body", 1, ndim, PropagatorField(env().getGrid())); + envTmp(std::vector, "S_loop", 1, ndim, PropagatorField(env().getGrid())); + envTmp(std::vector, "E_body", 1, ndim, LatticeComplex(env().getGrid())); + envTmp(std::vector, "E_loop", 1, ndim, LatticeComplex(env().getGrid())); } // execution /////////////////////////////////////////////////////////////////// @@ -96,22 +111,22 @@ void TWeakHamiltonianEye::execute(void) << "'." << std::endl; CorrWriter writer(par().output); - SlicedPropagator &q1 = *env().template getObject(par().q1); - PropagatorField &q2 = *env().template getObject(par().q2); - PropagatorField &q3 = *env().template getObject(par().q3); - PropagatorField &q4 = *env().template getObject(par().q4); - Gamma g5 = Gamma(Gamma::Algebra::Gamma5); - LatticeComplex expbuf(env().getGrid()); + auto &q1 = envGet(SlicedPropagator, par().q1); + auto &q2 = envGet(PropagatorField, par().q2); + auto &q3 = envGet(PropagatorField, par().q3); + auto &q4 = envGet(PropagatorField, par().q4); + Gamma g5 = Gamma(Gamma::Algebra::Gamma5); std::vector corrbuf; std::vector result(n_eye_diag); unsigned int ndim = env().getNd(); - PropagatorField tmp1(env().getGrid()); - LatticeComplex tmp2(env().getGrid()); - std::vector S_body(ndim, tmp1); - std::vector S_loop(ndim, tmp1); - std::vector E_body(ndim, tmp2); - std::vector E_loop(ndim, tmp2); + envGetTmp(LatticeComplex, expbuf); + envGetTmp(PropagatorField, tmp1); + envGetTmp(LatticeComplex, tmp2); + envGetTmp(std::vector, S_body); + envGetTmp(std::vector, S_loop); + envGetTmp(std::vector, E_body); + envGetTmp(std::vector, E_loop); // Get sink timeslice of q1. SitePropagator q1Snk = q1[par().tSnk]; diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc index 2c4df68a..c333713d 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc @@ -74,9 +74,15 @@ std::vector TWeakHamiltonianNonEye::getInput(void) return in; } +std::vector TWeakHamiltonianNonEye::getReference(void) +{ + std::vector out = {}; + + return out; +} std::vector TWeakHamiltonianNonEye::getOutput(void) { - std::vector out = {getName()}; + std::vector out = {}; return out; } @@ -84,7 +90,15 @@ std::vector TWeakHamiltonianNonEye::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// void TWeakHamiltonianNonEye::setup(void) { + unsigned int ndim = env().getNd(); + envTmpLat(LatticeComplex, "expbuf"); + envTmpLat(PropagatorField, "tmp1"); + envTmpLat(LatticeComplex, "tmp2"); + envTmp(std::vector, "C_i_side_loop", 1, ndim, PropagatorField(env().getGrid())); + envTmp(std::vector, "C_f_side_loop", 1, ndim, PropagatorField(env().getGrid())); + envTmp(std::vector, "W_i_side_loop", 1, ndim, LatticeComplex(env().getGrid())); + envTmp(std::vector, "W_f_side_loop", 1, ndim, LatticeComplex(env().getGrid())); } // execution /////////////////////////////////////////////////////////////////// @@ -95,23 +109,23 @@ void TWeakHamiltonianNonEye::execute(void) << par().q2 << ", '" << par().q3 << "' and '" << par().q4 << "'." << std::endl; - CorrWriter writer(par().output); - PropagatorField &q1 = *env().template getObject(par().q1); - PropagatorField &q2 = *env().template getObject(par().q2); - PropagatorField &q3 = *env().template getObject(par().q3); - PropagatorField &q4 = *env().template getObject(par().q4); - Gamma g5 = Gamma(Gamma::Algebra::Gamma5); - LatticeComplex expbuf(env().getGrid()); + CorrWriter writer(par().output); + auto &q1 = envGet(PropagatorField, par().q1); + auto &q2 = envGet(PropagatorField, par().q2); + auto &q3 = envGet(PropagatorField, par().q3); + auto &q4 = envGet(PropagatorField, par().q4); + Gamma g5 = Gamma(Gamma::Algebra::Gamma5); std::vector corrbuf; std::vector result(n_noneye_diag); - unsigned int ndim = env().getNd(); + unsigned int ndim = env().getNd(); - PropagatorField tmp1(env().getGrid()); - LatticeComplex tmp2(env().getGrid()); - std::vector C_i_side_loop(ndim, tmp1); - std::vector C_f_side_loop(ndim, tmp1); - std::vector W_i_side_loop(ndim, tmp2); - std::vector W_f_side_loop(ndim, tmp2); + envGetTmp(LatticeComplex, expbuf); + envGetTmp(PropagatorField, tmp1); + envGetTmp(LatticeComplex, tmp2); + envGetTmp(std::vector, C_i_side_loop); + envGetTmp(std::vector, C_f_side_loop); + envGetTmp(std::vector, W_i_side_loop); + envGetTmp(std::vector, W_f_side_loop); // Setup for C-type contractions. for (int mu = 0; mu < ndim; ++mu) diff --git a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc index 6685f292..e0f07f6c 100644 --- a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc +++ b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc @@ -76,9 +76,16 @@ std::vector TWeakNeutral4ptDisc::getInput(void) return in; } +std::vector TWeakNeutral4ptDisc::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + std::vector TWeakNeutral4ptDisc::getOutput(void) { - std::vector out = {getName()}; + std::vector out = {}; return out; } @@ -86,7 +93,13 @@ std::vector TWeakNeutral4ptDisc::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// void TWeakNeutral4ptDisc::setup(void) { + unsigned int ndim = env().getNd(); + envTmpLat(LatticeComplex, "expbuf"); + envTmpLat(PropagatorField, "tmp"); + envTmpLat(LatticeComplex, "curr"); + envTmp(std::vector, "meson", 1, ndim, PropagatorField(env().getGrid())); + envTmp(std::vector, "loop", 1, ndim, PropagatorField(env().getGrid())); } // execution /////////////////////////////////////////////////////////////////// @@ -97,21 +110,21 @@ void TWeakNeutral4ptDisc::execute(void) << par().q2 << ", '" << par().q3 << "' and '" << par().q4 << "'." << std::endl; - CorrWriter writer(par().output); - PropagatorField &q1 = *env().template getObject(par().q1); - PropagatorField &q2 = *env().template getObject(par().q2); - PropagatorField &q3 = *env().template getObject(par().q3); - PropagatorField &q4 = *env().template getObject(par().q4); - Gamma g5 = Gamma(Gamma::Algebra::Gamma5); - LatticeComplex expbuf(env().getGrid()); + CorrWriter writer(par().output); + auto &q1 = envGet(PropagatorField, par().q1); + auto &q2 = envGet(PropagatorField, par().q2); + auto &q3 = envGet(PropagatorField, par().q3); + auto &q4 = envGet(PropagatorField, par().q4); + Gamma g5 = Gamma(Gamma::Algebra::Gamma5); std::vector corrbuf; std::vector result(n_neut_disc_diag); - unsigned int ndim = env().getNd(); + unsigned int ndim = env().getNd(); - PropagatorField tmp(env().getGrid()); - std::vector meson(ndim, tmp); - std::vector loop(ndim, tmp); - LatticeComplex curr(env().getGrid()); + envGetTmp(LatticeComplex, expbuf); + envGetTmp(PropagatorField, tmp); + envGetTmp(LatticeComplex, curr); + envGetTmp(std::vector, meson); + envGetTmp(std::vector, loop); // Setup for type 1 contractions. for (int mu = 0; mu < ndim; ++mu) diff --git a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp index 4d08841d..e77df287 100644 --- a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp +++ b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp @@ -154,21 +154,21 @@ void TGaugeProp::execute(void) LOG(Message) << "Computing quark propagator '" << getName() << "'" << std::endl; - FermionField &source = envGetTmp(FermionField, "source"); - FermionField &sol = envGetTmp(FermionField, "sol"); - FermionField &tmp = envGetTmp(FermionField, "tmp"); - std::string propName = (Ls_ == 1) ? getName() : (getName() + "_5d"); - PropagatorField &prop = envGet(PropagatorField, propName); - PropagatorField &fullSrc = envGet(PropagatorField, par().source); - SolverFn &solver = envGet(SolverFn, par().solver); + std::string propName = (Ls_ == 1) ? getName() : (getName() + "_5d"); + auto &prop = envGet(PropagatorField, propName); + auto &fullSrc = envGet(PropagatorField, par().source); + auto &solver = envGet(SolverFn, par().solver); + envGetTmp(FermionField, source); + envGetTmp(FermionField, sol); + envGetTmp(FermionField, tmp); LOG(Message) << "Inverting using solver '" << par().solver << "' on source '" << par().source << "'" << std::endl; for (unsigned int s = 0; s < Ns; ++s) for (unsigned int c = 0; c < Nc; ++c) { LOG(Message) << "Inversion for spin= " << s << ", color= " << c - << std::endl; + << std::endl; // source conversion for 4D sources if (!env().isObject5d(par().source)) { diff --git a/extras/Hadrons/Modules/MGauge/Load.cc b/extras/Hadrons/Modules/MGauge/Load.cc index 062e7e98..c2fd49de 100644 --- a/extras/Hadrons/Modules/MGauge/Load.cc +++ b/extras/Hadrons/Modules/MGauge/Load.cc @@ -49,6 +49,13 @@ std::vector TLoad::getInput(void) return in; } +std::vector TLoad::getReference(void) +{ + std::vector ref; + + return ref; +} + std::vector TLoad::getOutput(void) { std::vector out = {getName()}; @@ -59,19 +66,19 @@ std::vector TLoad::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// void TLoad::setup(void) { - env().registerLattice(getName()); + envCreateLat(LatticeGaugeField, getName()); } // execution /////////////////////////////////////////////////////////////////// void TLoad::execute(void) { - FieldMetaData header; - std::string fileName = par().file + "." - + std::to_string(env().getTrajectory()); - + FieldMetaData header; + std::string fileName = par().file + "." + + std::to_string(vm().getTrajectory()); LOG(Message) << "Loading NERSC configuration from file '" << fileName << "'" << std::endl; - LatticeGaugeField &U = *env().createLattice(getName()); + + auto &U = envGet(LatticeGaugeField, getName()); NerscIO::readConfiguration(U, header, fileName); LOG(Message) << "NERSC header:" << std::endl; dump_meta_data(header, LOG(Message)); diff --git a/extras/Hadrons/Modules/MGauge/Load.hpp b/extras/Hadrons/Modules/MGauge/Load.hpp index a338af79..a967d714 100644 --- a/extras/Hadrons/Modules/MGauge/Load.hpp +++ b/extras/Hadrons/Modules/MGauge/Load.hpp @@ -57,6 +57,7 @@ public: virtual ~TLoad(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup diff --git a/extras/Hadrons/Modules/MGauge/Random.cc b/extras/Hadrons/Modules/MGauge/Random.cc index c10fdfc3..fdb0d145 100644 --- a/extras/Hadrons/Modules/MGauge/Random.cc +++ b/extras/Hadrons/Modules/MGauge/Random.cc @@ -44,7 +44,16 @@ TRandom::TRandom(const std::string name) // dependencies/products /////////////////////////////////////////////////////// std::vector TRandom::getInput(void) { - return std::vector(); + std::vector in; + + return in; +} + +std::vector TRandom::getReference(void) +{ + std::vector ref; + + return ref; } std::vector TRandom::getOutput(void) @@ -57,13 +66,14 @@ std::vector TRandom::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// void TRandom::setup(void) { - env().registerLattice(getName()); + envCreateLat(LatticeGaugeField, getName()); } // execution /////////////////////////////////////////////////////////////////// void TRandom::execute(void) { LOG(Message) << "Generating random gauge configuration" << std::endl; - LatticeGaugeField &U = *env().createLattice(getName()); + + auto &U = envGet(LatticeGaugeField, getName()); SU3::HotConfiguration(*env().get4dRng(), U); } diff --git a/extras/Hadrons/Modules/MGauge/Random.hpp b/extras/Hadrons/Modules/MGauge/Random.hpp index a07130e4..30525113 100644 --- a/extras/Hadrons/Modules/MGauge/Random.hpp +++ b/extras/Hadrons/Modules/MGauge/Random.hpp @@ -50,6 +50,7 @@ public: virtual ~TRandom(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup diff --git a/extras/Hadrons/Modules/MGauge/StochEm.cc b/extras/Hadrons/Modules/MGauge/StochEm.cc index c7a9fc4f..a878ae2f 100644 --- a/extras/Hadrons/Modules/MGauge/StochEm.cc +++ b/extras/Hadrons/Modules/MGauge/StochEm.cc @@ -47,6 +47,13 @@ std::vector TStochEm::getInput(void) return in; } +std::vector TStochEm::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + std::vector TStochEm::getOutput(void) { std::vector out = {getName()}; @@ -57,32 +64,28 @@ std::vector TStochEm::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// void TStochEm::setup(void) { - if (!env().hasRegisteredObject("_" + getName() + "_weight")) + if (!env().hasCreatedObject("_" + getName() + "_weight")) { - env().registerLattice("_" + getName() + "_weight"); + envCacheLat(EmComp, "_" + getName() + "_weight"); } - env().registerLattice(getName()); + envCreateLat(EmField, getName()); } // execution /////////////////////////////////////////////////////////////////// void TStochEm::execute(void) { + LOG(Message) << "Generating stochatic EM potential..." << std::endl; + PhotonR photon(par().gauge, par().zmScheme); - EmField &a = *env().createLattice(getName()); - EmComp *w; + auto &a = envGet(EmField, getName()); + auto &w = envGet(EmComp, "_" + getName() + "_weight"); if (!env().hasCreatedObject("_" + getName() + "_weight")) { LOG(Message) << "Caching stochatic EM potential weight (gauge: " << par().gauge << ", zero-mode scheme: " << par().zmScheme << ")..." << std::endl; - w = env().createLattice("_" + getName() + "_weight"); - photon.StochasticWeight(*w); + photon.StochasticWeight(w); } - else - { - w = env().getObject("_" + getName() + "_weight"); - } - LOG(Message) << "Generating stochatic EM potential..." << std::endl; - photon.StochasticField(a, *env().get4dRng(), *w); + photon.StochasticField(a, *env().get4dRng(), w); } diff --git a/extras/Hadrons/Modules/MGauge/StochEm.hpp b/extras/Hadrons/Modules/MGauge/StochEm.hpp index bacb5172..efc2e39b 100644 --- a/extras/Hadrons/Modules/MGauge/StochEm.hpp +++ b/extras/Hadrons/Modules/MGauge/StochEm.hpp @@ -59,6 +59,7 @@ public: virtual ~TStochEm(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup diff --git a/extras/Hadrons/Modules/MGauge/Unit.cc b/extras/Hadrons/Modules/MGauge/Unit.cc index bc05a785..af31f124 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.cc +++ b/extras/Hadrons/Modules/MGauge/Unit.cc @@ -71,6 +71,7 @@ void TUnit::setup(void) void TUnit::execute(void) { LOG(Message) << "Creating unit gauge configuration" << std::endl; + auto &U = envGet(LatticeGaugeField, getName()); SU3::ColdConfiguration(*env().get4dRng(), U); } diff --git a/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp b/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp index 1f40dd48..0feb5efb 100644 --- a/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp +++ b/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp @@ -73,6 +73,7 @@ public: virtual ~TNoiseLoop(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -101,6 +102,15 @@ std::vector TNoiseLoop::getInput(void) return in; } + +template +std::vector TNoiseLoop::getReference(void) +{ + std::vector out = {}; + + return out; +} + template std::vector TNoiseLoop::getOutput(void) { @@ -113,16 +123,16 @@ std::vector TNoiseLoop::getOutput(void) template void TNoiseLoop::setup(void) { - env().template registerLattice(getName()); + envCreateLat(PropagatorField, getName()); } // execution /////////////////////////////////////////////////////////////////// template void TNoiseLoop::execute(void) { - PropagatorField &loop = *env().template createLattice(getName()); - PropagatorField &q = *env().template getObject(par().q); - PropagatorField &eta = *env().template getObject(par().eta); + auto &loop = envGet(PropagatorField, getName()); + auto &q = envGet(PropagatorField, par().q); + auto &eta = envGet(PropagatorField, par().eta); loop = q*adj(eta); } diff --git a/extras/Hadrons/Modules/MSink/Point.hpp b/extras/Hadrons/Modules/MSink/Point.hpp index 16b89434..42cae4f6 100644 --- a/extras/Hadrons/Modules/MSink/Point.hpp +++ b/extras/Hadrons/Modules/MSink/Point.hpp @@ -122,18 +122,19 @@ void TPoint::setup(void) // execution /////////////////////////////////////////////////////////////////// template void TPoint::execute(void) -{ - std::vector p = strToVec(par().mom); - LatticeComplex &ph = envGet(LatticeComplex, momphName_); - Complex i(0.0,1.0); - +{ LOG(Message) << "Setting up point sink function for momentum [" << par().mom << "]" << std::endl; + auto &ph = envGet(LatticeComplex, momphName_); + if (!hasPhase_) { - LatticeComplex &coor = envGetTmp(LatticeComplex, "coor"); + Complex i(0.0,1.0); + std::vector p; + envGetTmp(LatticeComplex, coor); + p = strToVec(par().mom); ph = zero; for(unsigned int mu = 0; mu < env().getNd(); mu++) { diff --git a/extras/Hadrons/Modules/MSink/Smear.hpp b/extras/Hadrons/Modules/MSink/Smear.hpp index b51d2f49..03cc861a 100644 --- a/extras/Hadrons/Modules/MSink/Smear.hpp +++ b/extras/Hadrons/Modules/MSink/Smear.hpp @@ -61,6 +61,7 @@ public: virtual ~TSmear(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -89,6 +90,14 @@ std::vector TSmear::getInput(void) return in; } +template +std::vector TSmear::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TSmear::getOutput(void) { @@ -101,9 +110,7 @@ std::vector TSmear::getOutput(void) template void TSmear::setup(void) { - unsigned int nt = env().getDim(Tp); - unsigned int size = nt * sizeof(SitePropagator); - env().registerObject(getName(), size); + envCreate(SlicedPropagator, getName(), 1, env().getDim(Tp)); } // execution /////////////////////////////////////////////////////////////////// @@ -114,11 +121,11 @@ void TSmear::execute(void) << "' using sink function '" << par().sink << "'." << std::endl; - SinkFn &sink = *env().template getObject(par().sink); - PropagatorField &q = *env().template getObject(par().q); - SlicedPropagator *out = new SlicedPropagator(env().getDim(Tp)); - *out = sink(q); - env().setObject(getName(), out); + auto &sink = envGet(SinkFn, par().sink); + auto &q = envGet(PropagatorField, par().q); + auto &out = envGet(SlicedPropagator, getName()); + + out = sink(q); } END_MODULE_NAMESPACE diff --git a/extras/Hadrons/Modules/MSource/Point.hpp b/extras/Hadrons/Modules/MSource/Point.hpp index 3fab41c0..6470c77f 100644 --- a/extras/Hadrons/Modules/MSource/Point.hpp +++ b/extras/Hadrons/Modules/MSource/Point.hpp @@ -128,12 +128,13 @@ void TPoint::setup(void) template void TPoint::execute(void) { - std::vector position = strToVec(par().position); - SitePropagator id; - LOG(Message) << "Creating point source at position [" << par().position - << "]" << std::endl; - PropagatorField &src = envGet(PropagatorField, getName()); + << "]" << std::endl; + + std::vector position = strToVec(par().position); + auto &src = envGet(PropagatorField, getName()); + SitePropagator id; + id = 1.; src = zero; pokeSite(id, src, position); diff --git a/extras/Hadrons/Modules/MSource/SeqConserved.hpp b/extras/Hadrons/Modules/MSource/SeqConserved.hpp index e8f91be1..9ccbee1b 100644 --- a/extras/Hadrons/Modules/MSource/SeqConserved.hpp +++ b/extras/Hadrons/Modules/MSource/SeqConserved.hpp @@ -82,6 +82,7 @@ public: virtual ~TSeqConserved(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -110,6 +111,14 @@ std::vector TSeqConserved::getInput(void) return in; } +template +std::vector TSeqConserved::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TSeqConserved::getOutput(void) { @@ -123,7 +132,7 @@ template void TSeqConserved::setup(void) { auto Ls_ = env().getObjectLs(par().action); - env().template registerLattice(getName(), Ls_); + envCreateLat(PropagatorField, getName(), Ls_); } // execution /////////////////////////////////////////////////////////////////// @@ -143,9 +152,9 @@ void TSeqConserved::execute(void) << par().mu << ") for " << par().tA << " <= t <= " << par().tB << std::endl; } - PropagatorField &src = *env().template createLattice(getName()); - PropagatorField &q = *env().template getObject(par().q); - FMat &mat = *(env().template getObject(par().action)); + auto &src = envGet(PropagatorField, getName()); + auto &q = envGet(PropagatorField, par().q); + auto &mat = envGet(FMat, par().action); std::vector mom = strToVec(par().mom); mat.SeqConservedCurrent(q, src, par().curr_type, par().mu, diff --git a/extras/Hadrons/Modules/MSource/SeqGamma.hpp b/extras/Hadrons/Modules/MSource/SeqGamma.hpp index 8f67f8fa..d2b3c958 100644 --- a/extras/Hadrons/Modules/MSource/SeqGamma.hpp +++ b/extras/Hadrons/Modules/MSource/SeqGamma.hpp @@ -80,12 +80,16 @@ public: virtual ~TSeqGamma(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup virtual void setup(void); // execution virtual void execute(void); +private: + bool hasPhase_{false}; + std::string momphName_, tName_; }; MODULE_REGISTER_NS(SeqGamma, TSeqGamma, MSource); @@ -97,6 +101,8 @@ MODULE_REGISTER_NS(SeqGamma, TSeqGamma, MSource); template TSeqGamma::TSeqGamma(const std::string name) : Module(name) +, momphName_ (name + "_momph") +, tName_ (name + "_t") {} // dependencies/products /////////////////////////////////////////////////////// @@ -108,6 +114,14 @@ std::vector TSeqGamma::getInput(void) return in; } +template +std::vector TSeqGamma::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TSeqGamma::getOutput(void) { @@ -120,7 +134,10 @@ std::vector TSeqGamma::getOutput(void) template void TSeqGamma::setup(void) { - env().template registerLattice(getName()); + envCreateLat(PropagatorField, getName()); + envCacheLat(Lattice>, tName_); + envCacheLat(LatticeComplex, momphName_); + envTmpLat(LatticeComplex, "coor"); } // execution /////////////////////////////////////////////////////////////////// @@ -138,23 +155,29 @@ void TSeqGamma::execute(void) << " sequential source for " << par().tA << " <= t <= " << par().tB << std::endl; } - PropagatorField &src = *env().template createLattice(getName()); - PropagatorField &q = *env().template getObject(par().q); - Lattice> t(env().getGrid()); - LatticeComplex ph(env().getGrid()), coor(env().getGrid()); - Gamma g(par().gamma); - std::vector p; - Complex i(0.0,1.0); + auto &src = envGet(PropagatorField, getName()); + auto &q = envGet(PropagatorField, par().q); + auto &ph = envGet(LatticeComplex, momphName_); + auto &t = envGet(Lattice>, tName_); + Gamma g(par().gamma); - p = strToVec(par().mom); - ph = zero; - for(unsigned int mu = 0; mu < env().getNd(); mu++) + if (!hasPhase_) { - LatticeCoordinate(coor, mu); - ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); + Complex i(0.0,1.0); + std::vector p; + + envGetTmp(LatticeComplex, coor); + p = strToVec(par().mom); + ph = zero; + for(unsigned int mu = 0; mu < env().getNd(); mu++) + { + LatticeCoordinate(coor, mu); + ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor; + } + ph = exp((Real)(2*M_PI)*i*ph); + LatticeCoordinate(t, Tp); + hasPhase_ = true; } - ph = exp((Real)(2*M_PI)*i*ph); - LatticeCoordinate(t, Tp); src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); } diff --git a/extras/Hadrons/Modules/MSource/Wall.hpp b/extras/Hadrons/Modules/MSource/Wall.hpp index 57dee06d..d9814d9e 100644 --- a/extras/Hadrons/Modules/MSource/Wall.hpp +++ b/extras/Hadrons/Modules/MSource/Wall.hpp @@ -72,12 +72,16 @@ public: virtual ~TWall(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup virtual void setup(void); // execution virtual void execute(void); +private: + bool hasPhase_{false}; + std::string momphName_, tName_; }; MODULE_REGISTER_NS(Wall, TWall, MSource); @@ -89,17 +93,27 @@ MODULE_REGISTER_NS(Wall, TWall, MSource); template TWall::TWall(const std::string name) : Module(name) +, momphName_ (name + "_momph") +, tName_ (name + "_t") {} // dependencies/products /////////////////////////////////////////////////////// template std::vector TWall::getInput(void) { - std::vector in; + std::vector in = {}; return in; } +template +std::vector TWall::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TWall::getOutput(void) { @@ -112,7 +126,7 @@ std::vector TWall::getOutput(void) template void TWall::setup(void) { - env().template registerLattice(getName()); + envCreateLat(PropagatorField, getName()); } // execution /////////////////////////////////////////////////////////////////// @@ -122,21 +136,28 @@ void TWall::execute(void) LOG(Message) << "Generating wall source at t = " << par().tW << " with momentum " << par().mom << std::endl; - PropagatorField &src = *env().template createLattice(getName()); - Lattice> t(env().getGrid()); - LatticeComplex ph(env().getGrid()), coor(env().getGrid()); - std::vector p; - Complex i(0.0,1.0); + auto &src = envGet(PropagatorField, getName()); + auto &ph = envGet(LatticeComplex, momphName_); + auto &t = envGet(Lattice>, tName_); - p = strToVec(par().mom); - ph = zero; - for(unsigned int mu = 0; mu < Nd; mu++) + if (!hasPhase_) { - LatticeCoordinate(coor, mu); - ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); + Complex i(0.0,1.0); + std::vector p; + + envGetTmp(LatticeComplex, coor); + p = strToVec(par().mom); + ph = zero; + for(unsigned int mu = 0; mu < env().getNd(); mu++) + { + LatticeCoordinate(coor, mu); + ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor; + } + ph = exp((Real)(2*M_PI)*i*ph); + LatticeCoordinate(t, Tp); + hasPhase_ = true; } - ph = exp((Real)(2*M_PI)*i*ph); - LatticeCoordinate(t, Tp); + src = 1.; src = where((t == par().tW), src*ph, 0.*src); } diff --git a/extras/Hadrons/Modules/MSource/Z2.hpp b/extras/Hadrons/Modules/MSource/Z2.hpp index e2cc4f34..2e864ff0 100644 --- a/extras/Hadrons/Modules/MSource/Z2.hpp +++ b/extras/Hadrons/Modules/MSource/Z2.hpp @@ -75,12 +75,16 @@ public: virtual ~TZ2(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup virtual void setup(void); // execution virtual void execute(void); +private: + bool hasT_{false}; + std::string tName_; }; MODULE_REGISTER_NS(Z2, TZ2, MSource); @@ -93,6 +97,7 @@ MODULE_REGISTER_NS(ScalarZ2, TZ2, MSource); template TZ2::TZ2(const std::string name) : Module(name) +, tName_ (name + "_t") {} // dependencies/products /////////////////////////////////////////////////////// @@ -104,6 +109,14 @@ std::vector TZ2::getInput(void) return in; } +template +std::vector TZ2::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TZ2::getOutput(void) { @@ -116,29 +129,36 @@ std::vector TZ2::getOutput(void) template void TZ2::setup(void) { - env().template registerLattice(getName()); + envCreateLat(PropagatorField, getName()); + envCacheLat(Lattice>, tName_); + envTmpLat(LatticeComplex, "eta"); } // execution /////////////////////////////////////////////////////////////////// template void TZ2::execute(void) { - Lattice> t(env().getGrid()); - LatticeComplex eta(env().getGrid()); - Complex shift(1., 1.); - if (par().tA == par().tB) { LOG(Message) << "Generating Z_2 wall source at t= " << par().tA - << std::endl; + << std::endl; } else { LOG(Message) << "Generating Z_2 band for " << par().tA << " <= t <= " - << par().tB << std::endl; + << par().tB << std::endl; } - PropagatorField &src = *env().template createLattice(getName()); - LatticeCoordinate(t, Tp); + + auto &src = envGet(PropagatorField, getName()); + auto &t = envGet(Lattice>, getName()); + Complex shift(1., 1.); + + if (!hasT_) + { + LatticeCoordinate(t, Tp); + hasT_ = true; + } + envGetTmp(LatticeComplex, eta); bernoulli(*env().get4dRng(), eta); eta = (2.*eta - shift)*(1./::sqrt(2.)); eta = where((t >= par().tA) and (t <= par().tB), eta, 0.*eta); diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp index b0f2846f..081d2911 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp @@ -79,6 +79,7 @@ public: virtual ~TTestSeqConserved(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -107,6 +108,14 @@ std::vector TTestSeqConserved::getInput(void) return in; } +template +std::vector TTestSeqConserved::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TTestSeqConserved::getOutput(void) { @@ -124,36 +133,37 @@ void TTestSeqConserved::setup(void) { HADRON_ERROR(Size, "Ls mismatch between quark action and propagator"); } + envTmpLat(PropagatorField, "tmp"); + envTmpLat(LatticeComplex, "c"); } // execution /////////////////////////////////////////////////////////////////// template void TTestSeqConserved::execute(void) { - PropagatorField tmp(env().getGrid()); - PropagatorField &q = *env().template getObject(par().q); - PropagatorField &qSeq = *env().template getObject(par().qSeq); - FMat &act = *(env().template getObject(par().action)); - Gamma g5(Gamma::Algebra::Gamma5); - Gamma::Algebra gA = (par().curr == Current::Axial) ? - Gamma::Algebra::Gamma5 : - Gamma::Algebra::Identity; - Gamma g(gA); - SitePropagator qSite; - Complex test_S, test_V, check_S, check_V; - std::vector check_buf; - LatticeComplex c(env().getGrid()); - // Check sequential insertion of current gives same result as conserved // current sink upon contraction. Assume q uses a point source. - std::vector siteCoord; + + auto &q = envGet(PropagatorField, par().q); + auto &qSeq = envGet(PropagatorField, par().qSeq); + auto &act = envGet(FMat, par().action); + Gamma g5(Gamma::Algebra::Gamma5); + Gamma::Algebra gA = (par().curr == Current::Axial) ? + Gamma::Algebra::Gamma5 : + Gamma::Algebra::Identity; + Gamma g(gA); + SitePropagator qSite; + Complex test_S, test_V, check_S, check_V; + std::vector check_buf; + std::vector siteCoord; + + envGetTmp(PropagatorField, tmp); + envGetTmp(LatticeComplex, c); siteCoord = strToVec(par().origin); peekSite(qSite, qSeq, siteCoord); test_S = trace(qSite*g); test_V = trace(qSite*g*Gamma::gmu[par().mu]); - act.ContractConservedCurrent(q, q, tmp, par().curr, par().mu); - c = trace(tmp*g); sliceSum(c, check_buf, Tp); check_S = TensorRemove(check_buf[par().t_J]); diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp index 9736ab54..30bd4b69 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp @@ -63,6 +63,7 @@ public: virtual ~TTestSeqGamma(void) = default; // dependency relation virtual std::vector getInput(void); + virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -91,6 +92,14 @@ std::vector TTestSeqGamma::getInput(void) return in; } +template +std::vector TTestSeqGamma::getReference(void) +{ + std::vector ref = {}; + + return ref; +} + template std::vector TTestSeqGamma::getOutput(void) { @@ -103,26 +112,27 @@ std::vector TTestSeqGamma::getOutput(void) template void TTestSeqGamma::setup(void) { - + envTmpLat(LatticeComplex, "c"); } // execution /////////////////////////////////////////////////////////////////// template void TTestSeqGamma::execute(void) { - PropagatorField &q = *env().template getObject(par().q); - PropagatorField &qSeq = *env().template getObject(par().qSeq); - LatticeComplex c(env().getGrid()); - Gamma g5(Gamma::Algebra::Gamma5); - Gamma g(par().gamma); - SitePropagator qSite; - Complex test, check; + auto &q = envGet(PropagatorField, par().q); + auto &qSeq = envGet(PropagatorField, par().qSeq); + Gamma g5(Gamma::Algebra::Gamma5); + Gamma g(par().gamma); + SitePropagator qSite; + Complex test, check; std::vector check_buf; + std::vector siteCoord; // Check sequential insertion of gamma matrix gives same result as // insertion of gamma at sink upon contraction. Assume q uses a point // source. - std::vector siteCoord; + + envGetTmp(LatticeComplex, c); siteCoord = strToVec(par().origin); peekSite(qSite, qSeq, siteCoord); test = trace(g*qSite); diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 5ce2435f..2f4d183e 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -1,45 +1,38 @@ modules_cc =\ - Modules/MGauge/Unit.cc - # Modules/MContraction/WeakHamiltonianEye.cc \ - # Modules/MContraction/WeakHamiltonianNonEye.cc \ - # Modules/MContraction/WeakNeutral4ptDisc.cc \ - # Modules/MGauge/Load.cc \ - # Modules/MGauge/Random.cc \ - # Modules/MGauge/StochEm.cc \ - # Modules/MScalar/ChargedProp.cc \ - # Modules/MScalar/FreeProp.cc + Modules/MGauge/Unit.cc \ + Modules/MContraction/WeakHamiltonianEye.cc \ + Modules/MContraction/WeakHamiltonianNonEye.cc \ + Modules/MContraction/WeakNeutral4ptDisc.cc \ + Modules/MGauge/Load.cc \ + Modules/MGauge/Random.cc \ + Modules/MGauge/StochEm.cc modules_hpp =\ Modules/MAction/DWF.hpp \ Modules/MAction/Wilson.hpp \ Modules/MSink/Point.hpp \ Modules/MSource/Point.hpp \ + Modules/MGauge/Load.hpp \ + Modules/MGauge/Random.hpp \ + Modules/MGauge/StochEm.hpp \ Modules/MGauge/Unit.hpp \ Modules/MSolver/RBPrecCG.hpp \ Modules/MFermion/GaugeProp.hpp \ - Modules/MContraction/Meson.hpp - - # Modules/MContraction/Baryon.hpp \ - # Modules/MContraction/DiscLoop.hpp \ - # Modules/MContraction/Gamma3pt.hpp \ - # Modules/MContraction/WardIdentity.hpp \ - # Modules/MContraction/WeakHamiltonian.hpp \ - # Modules/MContraction/WeakHamiltonianEye.hpp \ - # Modules/MContraction/WeakHamiltonianNonEye.hpp \ - # Modules/MContraction/WeakNeutral4ptDisc.hpp \ - # Modules/MFermion/GaugeProp.hpp \ - # Modules/MGauge/Load.hpp \ - # Modules/MGauge/Random.hpp \ - # Modules/MGauge/StochEm.hpp \ - # Modules/MLoop/NoiseLoop.hpp \ - # Modules/MScalar/ChargedProp.hpp \ - # Modules/MScalar/FreeProp.hpp \ - # Modules/MScalar/Scalar.hpp \ - # Modules/MSink/Smear.hpp \ - # Modules/MSolver/RBPrecCG.hpp \ - # Modules/MSource/SeqConserved.hpp \ - # Modules/MSource/SeqGamma.hpp \ - # Modules/MSource/Wall.hpp \ - # Modules/MSource/Z2.hpp \ - # Modules/MUtilities/TestSeqConserved.hpp \ - # Modules/MUtilities/TestSeqGamma.hpp + Modules/MContraction/Baryon.hpp \ + Modules/MContraction/DiscLoop.hpp \ + Modules/MContraction/Gamma3pt.hpp \ + Modules/MContraction/Meson.hpp \ + Modules/MContraction/WardIdentity.hpp \ + Modules/MContraction/WeakHamiltonian.hpp \ + Modules/MContraction/WeakHamiltonianEye.hpp \ + Modules/MContraction/WeakHamiltonianNonEye.hpp \ + Modules/MContraction/WeakNeutral4ptDisc.hpp \ + Modules/MLoop/NoiseLoop.hpp \ + Modules/MSink/Smear.hpp \ + Modules/MSolver/RBPrecCG.hpp \ + Modules/MSource/SeqConserved.hpp \ + Modules/MSource/SeqGamma.hpp \ + Modules/MSource/Wall.hpp \ + Modules/MSource/Z2.hpp \ + Modules/MUtilities/TestSeqConserved.hpp \ + Modules/MUtilities/TestSeqGamma.hpp diff --git a/tests/hadrons/Test_hadrons.hpp b/tests/hadrons/Test_hadrons.hpp index 9bd3ee0a..0265f5a6 100644 --- a/tests/hadrons/Test_hadrons.hpp +++ b/tests/hadrons/Test_hadrons.hpp @@ -118,7 +118,7 @@ inline void makeWilsonAction(Application &application, std::string actionName, std::string &gaugeField, double mass, std::string boundary = "1 1 1 -1") { - if (!(Environment::getInstance().hasModule(actionName))) + if (!(VirtualMachine::getInstance().hasModule(actionName))) { MAction::Wilson::Par actionPar; actionPar.gauge = gaugeField; @@ -144,7 +144,7 @@ inline void makeDWFAction(Application &application, std::string actionName, std::string &gaugeField, double mass, double M5, unsigned int Ls, std::string boundary = "1 1 1 -1") { - if (!(Environment::getInstance().hasModule(actionName))) + if (!(VirtualMachine::getInstance().hasModule(actionName))) { MAction::DWF::Par actionPar; actionPar.gauge = gaugeField; @@ -173,7 +173,7 @@ inline void makeDWFAction(Application &application, std::string actionName, inline void makeRBPrecCGSolver(Application &application, std::string &solverName, std::string &actionName, double residual = 1e-8) { - if (!(Environment::getInstance().hasModule(solverName))) + if (!(VirtualMachine::getInstance().hasModule(solverName))) { MSolver::RBPrecCG::Par solverPar; solverPar.action = actionName; @@ -195,7 +195,7 @@ inline void makePointSource(Application &application, std::string srcName, std::string pos) { // If the source already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(srcName))) + if (!(VirtualMachine::getInstance().hasModule(srcName))) { MSource::Point::Par pointPar; pointPar.position = pos; @@ -219,7 +219,7 @@ inline void makeSequentialSource(Application &application, std::string srcName, std::string mom = ZERO_MOM) { // If the source already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(srcName))) + if (!(VirtualMachine::getInstance().hasModule(srcName))) { MSource::SeqGamma::Par seqPar; seqPar.q = qSrc; @@ -255,7 +255,7 @@ inline void makeConservedSequentialSource(Application &application, std::string mom = ZERO_MOM) { // If the source already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(srcName))) + if (!(VirtualMachine::getInstance().hasModule(srcName))) { MSource::SeqConserved::Par seqPar; seqPar.q = qSrc; @@ -280,7 +280,7 @@ inline void makeConservedSequentialSource(Application &application, inline void makeNoiseSource(Application &application, std::string &srcName, unsigned int tA, unsigned int tB) { - if (!(Environment::getInstance().hasModule(srcName))) + if (!(VirtualMachine::getInstance().hasModule(srcName))) { MSource::Z2::Par noisePar; noisePar.tA = tA; @@ -302,7 +302,7 @@ inline void makeWallSource(Application &application, std::string &srcName, unsigned int tW, std::string mom = ZERO_MOM) { // If the source already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(srcName))) + if (!(VirtualMachine::getInstance().hasModule(srcName))) { MSource::Wall::Par wallPar; wallPar.tW = tW; @@ -324,7 +324,7 @@ inline void makePointSink(Application &application, std::string &sinkFnct, std::string mom = ZERO_MOM) { // If the sink function already exists, don't make it again. - if (!(Environment::getInstance().hasModule(sinkFnct))) + if (!(VirtualMachine::getInstance().hasModule(sinkFnct))) { MSink::Point::Par pointPar; pointPar.mom = mom; @@ -345,7 +345,7 @@ inline void sinkSmear(Application &application, std::string &sinkFnct, std::string &propName, std::string &smearedProp) { // If the propagator has already been smeared, don't smear it again. - if (!(Environment::getInstance().hasModule(smearedProp))) + if (!(VirtualMachine::getInstance().hasModule(smearedProp))) { MSink::Smear::Par smearPar; smearPar.q = propName; @@ -367,7 +367,7 @@ inline void makePropagator(Application &application, std::string &propName, std::string &srcName, std::string &solver) { // If the propagator already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(propName))) + if (!(VirtualMachine::getInstance().hasModule(propName))) { MFermion::GaugeProp::Par quarkPar; quarkPar.source = srcName; @@ -390,7 +390,7 @@ inline void makeLoop(Application &application, std::string &propName, std::string &srcName, std::string &resName) { // If the loop propagator already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(propName))) + if (!(VirtualMachine::getInstance().hasModule(propName))) { MLoop::NoiseLoop::Par loopPar; loopPar.q = resName; @@ -421,7 +421,7 @@ inline void mesonContraction(Application &application, std::string &sink, std::string gammas = "") { - if (!(Environment::getInstance().hasModule(modName))) + if (!(VirtualMachine::getInstance().hasModule(modName))) { MContraction::Meson::Par mesPar; mesPar.output = output; @@ -453,7 +453,7 @@ inline void gamma3ptContraction(Application &application, unsigned int npt, Gamma::Algebra gamma = Gamma::Algebra::Identity) { std::string modName = std::to_string(npt) + "pt_" + label; - if (!(Environment::getInstance().hasModule(modName))) + if (!(VirtualMachine::getInstance().hasModule(modName))) { MContraction::Gamma3pt::Par gamma3ptPar; gamma3ptPar.output = std::to_string(npt) + "pt/" + label; @@ -487,7 +487,7 @@ inline void weakContraction##top(Application &application, unsigned int npt,\ std::string &label, unsigned int tSnk = 0)\ {\ std::string modName = std::to_string(npt) + "pt_" + label;\ - if (!(Environment::getInstance().hasModule(modName)))\ + if (!(VirtualMachine::getInstance().hasModule(modName)))\ {\ MContraction::WeakHamiltonian##top::Par weakPar;\ weakPar.output = std::to_string(npt) + "pt/" + label;\ @@ -521,7 +521,7 @@ inline void disc0Contraction(Application &application, std::string &label) { std::string modName = "4pt_" + label; - if (!(Environment::getInstance().hasModule(modName))) + if (!(VirtualMachine::getInstance().hasModule(modName))) { MContraction::WeakNeutral4ptDisc::Par disc0Par; disc0Par.output = "4pt/" + label; @@ -547,7 +547,7 @@ inline void discLoopContraction(Application &application, std::string &q_loop, std::string &modName, Gamma::Algebra gamma = Gamma::Algebra::Identity) { - if (!(Environment::getInstance().hasModule(modName))) + if (!(VirtualMachine::getInstance().hasModule(modName))) { MContraction::DiscLoop::Par discPar; discPar.output = "disc/" + modName; @@ -574,7 +574,7 @@ inline void makeWITest(Application &application, std::string &modName, std::string &propName, std::string &actionName, double mass, unsigned int Ls = 1, bool test_axial = false) { - if (!(Environment::getInstance().hasModule(modName))) + if (!(VirtualMachine::getInstance().hasModule(modName))) { MContraction::WardIdentity::Par wiPar; if (Ls > 1) @@ -613,7 +613,7 @@ inline void makeSeqCurrComparison(Application &application, std::string &modName std::string &actionName, std::string &origin, unsigned int t_J, unsigned int mu, Current curr) { - if (!(Environment::getInstance().hasModule(modName))) + if (!(VirtualMachine::getInstance().hasModule(modName))) { MUtilities::TestSeqConserved::Par seqPar; seqPar.q = propName; @@ -646,7 +646,7 @@ inline void makeSeqGamComparison(Application &application, std::string &modName, std::string &origin, Gamma::Algebra gamma, unsigned int t_g) { - if (!(Environment::getInstance().hasModule(modName))) + if (!(VirtualMachine::getInstance().hasModule(modName))) { MUtilities::TestSeqGamma::Par seqPar; seqPar.q = propName; From 591a38c487acc0f0abc7dd099e09f26f661f913c Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 14 Dec 2017 19:42:16 +0000 Subject: [PATCH 255/377] Hadrons: VM fixes --- extras/Hadrons/Modules/MSource/Z2.hpp | 2 +- extras/Hadrons/VirtualMachine.cc | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/extras/Hadrons/Modules/MSource/Z2.hpp b/extras/Hadrons/Modules/MSource/Z2.hpp index 2e864ff0..39840319 100644 --- a/extras/Hadrons/Modules/MSource/Z2.hpp +++ b/extras/Hadrons/Modules/MSource/Z2.hpp @@ -150,7 +150,7 @@ void TZ2::execute(void) } auto &src = envGet(PropagatorField, getName()); - auto &t = envGet(Lattice>, getName()); + auto &t = envGet(Lattice>, tName_); Complex shift(1., 1.); if (!hasT_) diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc index 8a6bd149..8b2ea516 100644 --- a/extras/Hadrons/VirtualMachine.cc +++ b/extras/Hadrons/VirtualMachine.cc @@ -83,6 +83,24 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) } m.input.push_back(env().getObjectAddress(ref)); } + auto inCopy = m.input; + // if module has inputs with references, they need to be added as + // an input + for (auto &in: inCopy) + { + int inm = env().getObjectModule(in); + + if (inm > 0) + { + if (getModule(inm)->getReference().size() > 0) + { + for (auto &rin: getModule(inm)->getReference()) + { + m.input.push_back(env().getObjectAddress(rin)); + } + } + } + } module_.push_back(std::move(m)); address = static_cast(module_.size() - 1); moduleAddress_[name] = address; From bcf6f3890c38420eba6449f5190fc688b006fbf7 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 14 Dec 2017 21:14:10 +0000 Subject: [PATCH 256/377] Hadrons: more fixes after test --- extras/Hadrons/Modules/MContraction/Meson.hpp | 12 ++++++++++-- extras/Hadrons/VirtualMachine.cc | 4 ++-- extras/Hadrons/VirtualMachine.hpp | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 1fd86d3a..3b250a3b 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -99,6 +99,8 @@ public: virtual std::vector getOutput(void); virtual void parseGammaString(std::vector &gammaList); protected: + // execution + virtual void setup(void); // execution virtual void execute(void); }; @@ -160,8 +162,14 @@ void TMeson::parseGammaString(std::vector &gammaList) { // Parse individual contractions from input string. gammaList = strToVec(par().gammas); - } - envTmpLat(LatticeComplex, "c"); + } +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TMeson::setup(void) +{ + envTmpLat(LatticeComplex, "c"); } // execution /////////////////////////////////////////////////////////////////// diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc index 8b2ea516..e0035bc1 100644 --- a/extras/Hadrons/VirtualMachine.cc +++ b/extras/Hadrons/VirtualMachine.cc @@ -579,8 +579,8 @@ void VirtualMachine::executeProgram(const Program &p) const { // execute module LOG(Message) << SEP << " Measurement step " << i + 1 << "/" - << p.size() << " (module '" << module_[p[i]].name - << "') " << SEP << std::endl; + << p.size() << " (module '" << module_[p[i]].name + << "') " << SEP << std::endl; (*module_[p[i]].data)(); sizeBefore = env().getTotalSize(); // print used memory after execution diff --git a/extras/Hadrons/VirtualMachine.hpp b/extras/Hadrons/VirtualMachine.hpp index a411c108..3af7d914 100644 --- a/extras/Hadrons/VirtualMachine.hpp +++ b/extras/Hadrons/VirtualMachine.hpp @@ -59,7 +59,7 @@ public: { Size size; Environment::Storage storage; - unsigned int module; + int module; }; struct MemoryProfile { From e2fe97277bc0dcc65700645f16c547fe3d6b429e Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 19 Dec 2017 20:28:04 +0000 Subject: [PATCH 257/377] Hadrons: getReference use is rare, empty by default --- extras/Hadrons/Module.hpp | 5 ++++- extras/Hadrons/Modules/MAction/DWF.hpp | 9 --------- extras/Hadrons/Modules/MAction/Wilson.hpp | 9 --------- extras/Hadrons/Modules/MContraction/Baryon.hpp | 9 --------- extras/Hadrons/Modules/MContraction/DiscLoop.hpp | 9 --------- extras/Hadrons/Modules/MContraction/Gamma3pt.hpp | 9 --------- extras/Hadrons/Modules/MContraction/Meson.hpp | 9 --------- extras/Hadrons/Modules/MContraction/WardIdentity.hpp | 9 --------- .../Hadrons/Modules/MContraction/WeakHamiltonian.hpp | 1 - .../Hadrons/Modules/MContraction/WeakHamiltonianEye.cc | 7 ------- .../Modules/MContraction/WeakHamiltonianNonEye.cc | 6 ------ .../Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc | 7 ------- extras/Hadrons/Modules/MFermion/GaugeProp.hpp | 9 --------- extras/Hadrons/Modules/MGauge/Load.cc | 7 ------- extras/Hadrons/Modules/MGauge/Load.hpp | 1 - extras/Hadrons/Modules/MGauge/Random.cc | 7 ------- extras/Hadrons/Modules/MGauge/Random.hpp | 1 - extras/Hadrons/Modules/MGauge/StochEm.cc | 7 ------- extras/Hadrons/Modules/MGauge/StochEm.hpp | 1 - extras/Hadrons/Modules/MGauge/Unit.cc | 7 ------- extras/Hadrons/Modules/MGauge/Unit.hpp | 1 - extras/Hadrons/Modules/MLoop/NoiseLoop.hpp | 10 ---------- extras/Hadrons/Modules/MSink/Point.hpp | 9 --------- extras/Hadrons/Modules/MSink/Smear.hpp | 9 --------- extras/Hadrons/Modules/MSource/Point.hpp | 9 --------- extras/Hadrons/Modules/MSource/SeqConserved.hpp | 9 --------- extras/Hadrons/Modules/MSource/SeqGamma.hpp | 9 --------- extras/Hadrons/Modules/MSource/Wall.hpp | 9 --------- extras/Hadrons/Modules/MSource/Z2.hpp | 9 --------- extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp | 9 --------- extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp | 9 --------- extras/Hadrons/Modules/templates/Module.cc.template | 8 -------- extras/Hadrons/Modules/templates/Module.hpp.template | 1 - .../Hadrons/Modules/templates/Module_in_NS.cc.template | 8 -------- .../Modules/templates/Module_in_NS.hpp.template | 1 - .../Hadrons/Modules/templates/Module_tmp.hpp.template | 1 - .../Modules/templates/Module_tmp_in_NS.hpp.template | 9 --------- 37 files changed, 4 insertions(+), 245 deletions(-) diff --git a/extras/Hadrons/Module.hpp b/extras/Hadrons/Module.hpp index 390573d8..b71f779d 100644 --- a/extras/Hadrons/Module.hpp +++ b/extras/Hadrons/Module.hpp @@ -155,7 +155,10 @@ public: virtual std::string getRegisteredName(void); // dependencies/products virtual std::vector getInput(void) = 0; - virtual std::vector getReference(void) = 0; + virtual std::vector getReference(void) + { + return std::vector(0); + }; virtual std::vector getOutput(void) = 0; // parse parameters virtual void parseParameters(XmlReader &reader, const std::string name) = 0; diff --git a/extras/Hadrons/Modules/MAction/DWF.hpp b/extras/Hadrons/Modules/MAction/DWF.hpp index 0cb9a4cb..d99f1165 100644 --- a/extras/Hadrons/Modules/MAction/DWF.hpp +++ b/extras/Hadrons/Modules/MAction/DWF.hpp @@ -64,7 +64,6 @@ public: virtual ~TDWF(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -93,14 +92,6 @@ std::vector TDWF::getInput(void) return in; } -template -std::vector TDWF::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TDWF::getOutput(void) { diff --git a/extras/Hadrons/Modules/MAction/Wilson.hpp b/extras/Hadrons/Modules/MAction/Wilson.hpp index a6b3f0d6..8ef755bb 100644 --- a/extras/Hadrons/Modules/MAction/Wilson.hpp +++ b/extras/Hadrons/Modules/MAction/Wilson.hpp @@ -62,7 +62,6 @@ public: virtual ~TWilson(void) = default; // dependencies/products virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -91,14 +90,6 @@ std::vector TWilson::getInput(void) return in; } -template -std::vector TWilson::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TWilson::getOutput(void) { diff --git a/extras/Hadrons/Modules/MContraction/Baryon.hpp b/extras/Hadrons/Modules/MContraction/Baryon.hpp index 28f6aa51..1ef2e257 100644 --- a/extras/Hadrons/Modules/MContraction/Baryon.hpp +++ b/extras/Hadrons/Modules/MContraction/Baryon.hpp @@ -71,7 +71,6 @@ public: virtual ~TBaryon(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -100,14 +99,6 @@ std::vector TBaryon::getInput(void) return input; } -template -std::vector TBaryon::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TBaryon::getOutput(void) { diff --git a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp index c0fbe296..ef50061c 100644 --- a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp +++ b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp @@ -67,7 +67,6 @@ public: virtual ~TDiscLoop(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -96,14 +95,6 @@ std::vector TDiscLoop::getInput(void) return in; } -template -std::vector TDiscLoop::getReference(void) -{ - std::vector out = {}; - - return out; -} - template std::vector TDiscLoop::getOutput(void) { diff --git a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp index 4a6baf3e..fb9a9d4b 100644 --- a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp +++ b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp @@ -98,7 +98,6 @@ public: virtual ~TGamma3pt(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -127,14 +126,6 @@ std::vector TGamma3pt::getInput(void) return in; } -template -std::vector TGamma3pt::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TGamma3pt::getOutput(void) { diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 3b250a3b..46bbdb2e 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -95,7 +95,6 @@ public: virtual ~TMeson(void) = default; // dependencies/products virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); virtual void parseGammaString(std::vector &gammaList); protected: @@ -125,14 +124,6 @@ std::vector TMeson::getInput(void) return input; } -template -std::vector TMeson::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TMeson::getOutput(void) { diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index c92c7243..556450a8 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -73,7 +73,6 @@ public: virtual ~TWardIdentity(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -104,14 +103,6 @@ std::vector TWardIdentity::getInput(void) return in; } -template -std::vector TWardIdentity::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TWardIdentity::getOutput(void) { diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp index 2b53c87a..7df40370 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp @@ -99,7 +99,6 @@ public:\ virtual ~T##modname(void) = default;\ /* dependency relation */ \ virtual std::vector getInput(void);\ - virtual std::vector getReference(void);\ virtual std::vector getOutput(void);\ public:\ std::vector VA_label = {"V", "A"};\ diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc index 7a73a7e3..43dfa609 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc @@ -74,13 +74,6 @@ std::vector TWeakHamiltonianEye::getInput(void) return in; } -std::vector TWeakHamiltonianEye::getReference(void) -{ - std::vector out = {}; - - return out; -} - std::vector TWeakHamiltonianEye::getOutput(void) { std::vector out = {}; diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc index c333713d..8a7113e3 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc @@ -74,12 +74,6 @@ std::vector TWeakHamiltonianNonEye::getInput(void) return in; } -std::vector TWeakHamiltonianNonEye::getReference(void) -{ - std::vector out = {}; - - return out; -} std::vector TWeakHamiltonianNonEye::getOutput(void) { std::vector out = {}; diff --git a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc index e0f07f6c..18423f3e 100644 --- a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc +++ b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc @@ -76,13 +76,6 @@ std::vector TWeakNeutral4ptDisc::getInput(void) return in; } -std::vector TWeakNeutral4ptDisc::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - std::vector TWeakNeutral4ptDisc::getOutput(void) { std::vector out = {}; diff --git a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp index e77df287..05b3d17a 100644 --- a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp +++ b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp @@ -84,7 +84,6 @@ public: virtual ~TGaugeProp(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -116,14 +115,6 @@ std::vector TGaugeProp::getInput(void) return in; } -template -std::vector TGaugeProp::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TGaugeProp::getOutput(void) { diff --git a/extras/Hadrons/Modules/MGauge/Load.cc b/extras/Hadrons/Modules/MGauge/Load.cc index c2fd49de..b168a010 100644 --- a/extras/Hadrons/Modules/MGauge/Load.cc +++ b/extras/Hadrons/Modules/MGauge/Load.cc @@ -49,13 +49,6 @@ std::vector TLoad::getInput(void) return in; } -std::vector TLoad::getReference(void) -{ - std::vector ref; - - return ref; -} - std::vector TLoad::getOutput(void) { std::vector out = {getName()}; diff --git a/extras/Hadrons/Modules/MGauge/Load.hpp b/extras/Hadrons/Modules/MGauge/Load.hpp index a967d714..a338af79 100644 --- a/extras/Hadrons/Modules/MGauge/Load.hpp +++ b/extras/Hadrons/Modules/MGauge/Load.hpp @@ -57,7 +57,6 @@ public: virtual ~TLoad(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup diff --git a/extras/Hadrons/Modules/MGauge/Random.cc b/extras/Hadrons/Modules/MGauge/Random.cc index fdb0d145..97afd338 100644 --- a/extras/Hadrons/Modules/MGauge/Random.cc +++ b/extras/Hadrons/Modules/MGauge/Random.cc @@ -49,13 +49,6 @@ std::vector TRandom::getInput(void) return in; } -std::vector TRandom::getReference(void) -{ - std::vector ref; - - return ref; -} - std::vector TRandom::getOutput(void) { std::vector out = {getName()}; diff --git a/extras/Hadrons/Modules/MGauge/Random.hpp b/extras/Hadrons/Modules/MGauge/Random.hpp index 30525113..a07130e4 100644 --- a/extras/Hadrons/Modules/MGauge/Random.hpp +++ b/extras/Hadrons/Modules/MGauge/Random.hpp @@ -50,7 +50,6 @@ public: virtual ~TRandom(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup diff --git a/extras/Hadrons/Modules/MGauge/StochEm.cc b/extras/Hadrons/Modules/MGauge/StochEm.cc index a878ae2f..c5318573 100644 --- a/extras/Hadrons/Modules/MGauge/StochEm.cc +++ b/extras/Hadrons/Modules/MGauge/StochEm.cc @@ -47,13 +47,6 @@ std::vector TStochEm::getInput(void) return in; } -std::vector TStochEm::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - std::vector TStochEm::getOutput(void) { std::vector out = {getName()}; diff --git a/extras/Hadrons/Modules/MGauge/StochEm.hpp b/extras/Hadrons/Modules/MGauge/StochEm.hpp index efc2e39b..bacb5172 100644 --- a/extras/Hadrons/Modules/MGauge/StochEm.hpp +++ b/extras/Hadrons/Modules/MGauge/StochEm.hpp @@ -59,7 +59,6 @@ public: virtual ~TStochEm(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup diff --git a/extras/Hadrons/Modules/MGauge/Unit.cc b/extras/Hadrons/Modules/MGauge/Unit.cc index af31f124..8bee1ecc 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.cc +++ b/extras/Hadrons/Modules/MGauge/Unit.cc @@ -47,13 +47,6 @@ std::vector TUnit::getInput(void) return std::vector(); } -std::vector TUnit::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - std::vector TUnit::getOutput(void) { std::vector out = {getName()}; diff --git a/extras/Hadrons/Modules/MGauge/Unit.hpp b/extras/Hadrons/Modules/MGauge/Unit.hpp index 4b69f0ce..c1650cc7 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.hpp +++ b/extras/Hadrons/Modules/MGauge/Unit.hpp @@ -50,7 +50,6 @@ public: virtual ~TUnit(void) = default; // dependencies/products virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup diff --git a/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp b/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp index 0feb5efb..512c731a 100644 --- a/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp +++ b/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp @@ -73,7 +73,6 @@ public: virtual ~TNoiseLoop(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -102,15 +101,6 @@ std::vector TNoiseLoop::getInput(void) return in; } - -template -std::vector TNoiseLoop::getReference(void) -{ - std::vector out = {}; - - return out; -} - template std::vector TNoiseLoop::getOutput(void) { diff --git a/extras/Hadrons/Modules/MSink/Point.hpp b/extras/Hadrons/Modules/MSink/Point.hpp index 42cae4f6..43be3009 100644 --- a/extras/Hadrons/Modules/MSink/Point.hpp +++ b/extras/Hadrons/Modules/MSink/Point.hpp @@ -60,7 +60,6 @@ public: virtual ~TPoint(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -94,14 +93,6 @@ std::vector TPoint::getInput(void) return in; } -template -std::vector TPoint::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TPoint::getOutput(void) { diff --git a/extras/Hadrons/Modules/MSink/Smear.hpp b/extras/Hadrons/Modules/MSink/Smear.hpp index 03cc861a..e85ab263 100644 --- a/extras/Hadrons/Modules/MSink/Smear.hpp +++ b/extras/Hadrons/Modules/MSink/Smear.hpp @@ -61,7 +61,6 @@ public: virtual ~TSmear(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -90,14 +89,6 @@ std::vector TSmear::getInput(void) return in; } -template -std::vector TSmear::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TSmear::getOutput(void) { diff --git a/extras/Hadrons/Modules/MSource/Point.hpp b/extras/Hadrons/Modules/MSource/Point.hpp index 6470c77f..1d8241cf 100644 --- a/extras/Hadrons/Modules/MSource/Point.hpp +++ b/extras/Hadrons/Modules/MSource/Point.hpp @@ -71,7 +71,6 @@ public: virtual ~TPoint(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -101,14 +100,6 @@ std::vector TPoint::getInput(void) return in; } -template -std::vector TPoint::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TPoint::getOutput(void) { diff --git a/extras/Hadrons/Modules/MSource/SeqConserved.hpp b/extras/Hadrons/Modules/MSource/SeqConserved.hpp index 9ccbee1b..3e8ef457 100644 --- a/extras/Hadrons/Modules/MSource/SeqConserved.hpp +++ b/extras/Hadrons/Modules/MSource/SeqConserved.hpp @@ -82,7 +82,6 @@ public: virtual ~TSeqConserved(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -111,14 +110,6 @@ std::vector TSeqConserved::getInput(void) return in; } -template -std::vector TSeqConserved::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TSeqConserved::getOutput(void) { diff --git a/extras/Hadrons/Modules/MSource/SeqGamma.hpp b/extras/Hadrons/Modules/MSource/SeqGamma.hpp index d2b3c958..abad5ace 100644 --- a/extras/Hadrons/Modules/MSource/SeqGamma.hpp +++ b/extras/Hadrons/Modules/MSource/SeqGamma.hpp @@ -80,7 +80,6 @@ public: virtual ~TSeqGamma(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -114,14 +113,6 @@ std::vector TSeqGamma::getInput(void) return in; } -template -std::vector TSeqGamma::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TSeqGamma::getOutput(void) { diff --git a/extras/Hadrons/Modules/MSource/Wall.hpp b/extras/Hadrons/Modules/MSource/Wall.hpp index d9814d9e..9d5f1f46 100644 --- a/extras/Hadrons/Modules/MSource/Wall.hpp +++ b/extras/Hadrons/Modules/MSource/Wall.hpp @@ -72,7 +72,6 @@ public: virtual ~TWall(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -106,14 +105,6 @@ std::vector TWall::getInput(void) return in; } -template -std::vector TWall::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TWall::getOutput(void) { diff --git a/extras/Hadrons/Modules/MSource/Z2.hpp b/extras/Hadrons/Modules/MSource/Z2.hpp index 39840319..3593cb34 100644 --- a/extras/Hadrons/Modules/MSource/Z2.hpp +++ b/extras/Hadrons/Modules/MSource/Z2.hpp @@ -75,7 +75,6 @@ public: virtual ~TZ2(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -109,14 +108,6 @@ std::vector TZ2::getInput(void) return in; } -template -std::vector TZ2::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TZ2::getOutput(void) { diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp index 081d2911..0647884c 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp @@ -79,7 +79,6 @@ public: virtual ~TTestSeqConserved(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -108,14 +107,6 @@ std::vector TTestSeqConserved::getInput(void) return in; } -template -std::vector TTestSeqConserved::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TTestSeqConserved::getOutput(void) { diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp index 30bd4b69..fd53eab8 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp @@ -63,7 +63,6 @@ public: virtual ~TTestSeqGamma(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); protected: // setup @@ -92,14 +91,6 @@ std::vector TTestSeqGamma::getInput(void) return in; } -template -std::vector TTestSeqGamma::getReference(void) -{ - std::vector ref = {}; - - return ref; -} - template std::vector TTestSeqGamma::getOutput(void) { diff --git a/extras/Hadrons/Modules/templates/Module.cc.template b/extras/Hadrons/Modules/templates/Module.cc.template index 29edadfb..0c509d6d 100644 --- a/extras/Hadrons/Modules/templates/Module.cc.template +++ b/extras/Hadrons/Modules/templates/Module.cc.template @@ -19,14 +19,6 @@ std::vector T___FILEBASENAME___::getInput(void) return in; } -template -std::vector T___FILEBASENAME___::getReference(void) -{ - std::vector in = {}; - - return in; -} - std::vector T___FILEBASENAME___::getOutput(void) { std::vector out = {getName()}; diff --git a/extras/Hadrons/Modules/templates/Module.hpp.template b/extras/Hadrons/Modules/templates/Module.hpp.template index b59e168f..fb43260f 100644 --- a/extras/Hadrons/Modules/templates/Module.hpp.template +++ b/extras/Hadrons/Modules/templates/Module.hpp.template @@ -26,7 +26,6 @@ public: virtual ~T___FILEBASENAME___(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); // setup virtual void setup(void); diff --git a/extras/Hadrons/Modules/templates/Module_in_NS.cc.template b/extras/Hadrons/Modules/templates/Module_in_NS.cc.template index 880129bd..8b2a0ec0 100644 --- a/extras/Hadrons/Modules/templates/Module_in_NS.cc.template +++ b/extras/Hadrons/Modules/templates/Module_in_NS.cc.template @@ -20,14 +20,6 @@ std::vector T___FILEBASENAME___::getInput(void) return in; } -template -std::vector T___FILEBASENAME___::getReference(void) -{ - std::vector in = {}; - - return in; -} - std::vector T___FILEBASENAME___::getOutput(void) { std::vector out = {getName()}; diff --git a/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template b/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template index f90cb052..ea77b12a 100644 --- a/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template +++ b/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template @@ -28,7 +28,6 @@ public: virtual ~T___FILEBASENAME___(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); // setup virtual void setup(void); diff --git a/extras/Hadrons/Modules/templates/Module_tmp.hpp.template b/extras/Hadrons/Modules/templates/Module_tmp.hpp.template index b4e7f87f..2ee053a9 100644 --- a/extras/Hadrons/Modules/templates/Module_tmp.hpp.template +++ b/extras/Hadrons/Modules/templates/Module_tmp.hpp.template @@ -27,7 +27,6 @@ public: virtual ~T___FILEBASENAME___(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); // setup virtual void setup(void); diff --git a/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template b/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template index 9aef1c92..b79c0ad3 100644 --- a/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template +++ b/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template @@ -29,7 +29,6 @@ public: virtual ~T___FILEBASENAME___(void) = default; // dependency relation virtual std::vector getInput(void); - virtual std::vector getReference(void); virtual std::vector getOutput(void); // setup virtual void setup(void); @@ -57,14 +56,6 @@ std::vector T___FILEBASENAME___::getInput(void) return in; } -template -std::vector T___FILEBASENAME___::getReference(void) -{ - std::vector in = {}; - - return in; -} - template std::vector T___FILEBASENAME___::getOutput(void) { From 65d4f17976ec7920aeccb880a50ad852e7fe7290 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 19 Dec 2017 20:28:32 +0000 Subject: [PATCH 258/377] Hadrons: no errors when trying to recreate a cache --- extras/Hadrons/Environment.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index adea13ce..7f1bc26d 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -228,7 +228,11 @@ void Environment::createDerivedObject(const std::string name, MemoryProfiler::stats = nullptr; } } - else + // object already exists, no error if it is a cache, error otherwise + else if ((object_[address].storage != Storage::cache) or + (object_[address].storage != storage) or + (object_[address].name != name) or + (object_[address].type != &typeid(T))) { HADRON_ERROR(Definition, "object '" + name + "' already allocated"); } From 67c3fa0f5f2adda473b7543121a7ad6041547259 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 21 Dec 2017 11:39:07 +0000 Subject: [PATCH 259/377] Hadrons: all modules are now ported, more tests need to be done --- extras/Hadrons/Modules.hpp | 40 ++--- extras/Hadrons/Modules/MScalar/ChargedProp.cc | 153 +++++++++--------- .../Hadrons/Modules/MScalar/ChargedProp.hpp | 6 +- extras/Hadrons/Modules/MScalar/FreeProp.cc | 27 ++-- extras/Hadrons/Modules/MScalar/FreeProp.hpp | 1 + extras/Hadrons/make_module_list.sh | 30 ++++ extras/Hadrons/modules.inc | 49 +++--- 7 files changed, 165 insertions(+), 141 deletions(-) diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 61a20058..cf381d0f 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -28,33 +28,33 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#include -#include #include -#include -#include #include -#include #include -#include #include +#include #include +#include +#include +#include #include -#include -#include -#include -#include -#include -// #include -// #include -// #include -#include -#include -#include -#include -#include #include +#include #include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc index cd8dc244..6cb75a28 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.cc +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -37,90 +37,44 @@ void TChargedProp::setup(void) { phaseName_.push_back("_shiftphase_" + std::to_string(mu)); } - GFSrcName_ = "_" + getName() + "_DinvSrc"; - if (!env().hasRegisteredObject(freeMomPropName_)) + GFSrcName_ = getName() + "_DinvSrc"; + fftName_ = getName() + "_fft"; + + freeMomPropDone_ = env().hasCreatedObject(freeMomPropName_); + GFSrcDone_ = env().hasCreatedObject(GFSrcName_); + phasesDone_ = env().hasCreatedObject(phaseName_[0]); + envCacheLat(ScalarField, freeMomPropName_); + for (unsigned int mu = 0; mu < env().getNd(); ++mu) { - env().registerLattice(freeMomPropName_); + envCacheLat(ScalarField, phaseName_[mu]); } - if (!env().hasRegisteredObject(phaseName_[0])) - { - for (unsigned int mu = 0; mu < env().getNd(); ++mu) - { - env().registerLattice(phaseName_[mu]); - } - } - if (!env().hasRegisteredObject(GFSrcName_)) - { - env().registerLattice(GFSrcName_); - } - env().registerLattice(getName()); + envCacheLat(ScalarField, GFSrcName_); + envCreateLat(ScalarField, getName()); + envTmpLat(ScalarField, "buf"); + envTmpLat(ScalarField, "result"); + envTmpLat(ScalarField, "Amu"); + envCache(FFT, fftName_, 1, env().getGrid()); } // execution /////////////////////////////////////////////////////////////////// void TChargedProp::execute(void) { // CACHING ANALYTIC EXPRESSIONS - ScalarField &source = *env().getObject(par().source); - Complex ci(0.0,1.0); - FFT fft(env().getGrid()); - - // cache free scalar propagator - if (!env().hasCreatedObject(freeMomPropName_)) - { - LOG(Message) << "Caching momentum space free scalar propagator" - << " (mass= " << par().mass << ")..." << std::endl; - freeMomProp_ = env().createLattice(freeMomPropName_); - SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass); - } - else - { - freeMomProp_ = env().getObject(freeMomPropName_); - } - // cache G*F*src - if (!env().hasCreatedObject(GFSrcName_)) - - { - GFSrc_ = env().createLattice(GFSrcName_); - fft.FFT_all_dim(*GFSrc_, source, FFT::forward); - *GFSrc_ = (*freeMomProp_)*(*GFSrc_); - } - else - { - GFSrc_ = env().getObject(GFSrcName_); - } - // cache phases - if (!env().hasCreatedObject(phaseName_[0])) - { - std::vector &l = env().getGrid()->_fdimensions; - - LOG(Message) << "Caching shift phases..." << std::endl; - for (unsigned int mu = 0; mu < env().getNd(); ++mu) - { - Real twoPiL = M_PI*2./l[mu]; - - phase_.push_back(env().createLattice(phaseName_[mu])); - LatticeCoordinate(*(phase_[mu]), mu); - *(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu]))); - } - } - else - { - for (unsigned int mu = 0; mu < env().getNd(); ++mu) - { - phase_.push_back(env().getObject(phaseName_[mu])); - } - } + makeCaches(); // PROPAGATOR CALCULATION LOG(Message) << "Computing charged scalar propagator" << " (mass= " << par().mass << ", charge= " << par().charge << ")..." << std::endl; - ScalarField &prop = *env().createLattice(getName()); - ScalarField buf(env().getGrid()); - ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_; - double q = par().charge; - + auto &prop = envGet(ScalarField, getName()); + auto &GFSrc = envGet(ScalarField, GFSrcName_); + auto &G = envGet(ScalarField, freeMomPropName_); + auto &fft = envGet(FFT, fftName_); + double q = par().charge; + envGetTmp(ScalarField, result); + envGetTmp(ScalarField, buf); + // G*F*Src prop = GFSrc; @@ -146,7 +100,7 @@ void TChargedProp::execute(void) if (!par().output.empty()) { std::string filename = par().output + "." + - std::to_string(env().getTrajectory()); + std::to_string(vm().getTrajectory()); LOG(Message) << "Saving zero-momentum projection to '" << filename << "'..." << std::endl; @@ -166,15 +120,55 @@ void TChargedProp::execute(void) } } +void TChargedProp::makeCaches(void) +{ + auto &freeMomProp = envGet(ScalarField, freeMomPropName_); + auto &GFSrc = envGet(ScalarField, GFSrcName_); + auto &fft = envGet(FFT, fftName_); + + if (!freeMomPropDone_) + { + LOG(Message) << "Caching momentum space free scalar propagator" + << " (mass= " << par().mass << ")..." << std::endl; + SIMPL::MomentumSpacePropagator(freeMomProp, par().mass); + } + if (!GFSrcDone_) + { + FFT fft(env().getGrid()); + auto &source = envGet(ScalarField, par().source); + + LOG(Message) << "Caching G*F*src..." << std::endl; + fft.FFT_all_dim(GFSrc, source, FFT::forward); + GFSrc = freeMomProp*GFSrc; + } + if (!phasesDone_) + { + std::vector &l = env().getGrid()->_fdimensions; + Complex ci(0.0,1.0); + + LOG(Message) << "Caching shift phases..." << std::endl; + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Real twoPiL = M_PI*2./l[mu]; + auto &phmu = envGet(ScalarField, phaseName_[mu]); + + LatticeCoordinate(phmu, mu); + phmu = exp(ci*twoPiL*phmu); + phase_.push_back(&phmu); + } + } +} + void TChargedProp::momD1(ScalarField &s, FFT &fft) { - EmField &A = *env().getObject(par().emField); - ScalarField buf(env().getGrid()), result(env().getGrid()), - Amu(env().getGrid()); + auto &A = envGet(EmField, par().emField); Complex ci(0.0,1.0); - result = zero; + envGetTmp(ScalarField, buf); + envGetTmp(ScalarField, result); + envGetTmp(ScalarField, Amu); + result = zero; for (unsigned int mu = 0; mu < env().getNd(); ++mu) { Amu = peekLorentz(A, mu); @@ -198,12 +192,13 @@ void TChargedProp::momD1(ScalarField &s, FFT &fft) void TChargedProp::momD2(ScalarField &s, FFT &fft) { - EmField &A = *env().getObject(par().emField); - ScalarField buf(env().getGrid()), result(env().getGrid()), - Amu(env().getGrid()); + auto &A = envGet(EmField, par().emField); + + envGetTmp(ScalarField, buf); + envGetTmp(ScalarField, result); + envGetTmp(ScalarField, Amu); result = zero; - for (unsigned int mu = 0; mu < env().getNd(); ++mu) { Amu = peekLorentz(A, mu); diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp index ab6a0184..cfcce28e 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp @@ -43,14 +43,14 @@ protected: // execution virtual void execute(void); private: + void makeCaches(void); void momD1(ScalarField &s, FFT &fft); void momD2(ScalarField &s, FFT &fft); private: - std::string freeMomPropName_, GFSrcName_; + bool freeMomPropDone_, GFSrcDone_, phasesDone_; + std::string freeMomPropName_, GFSrcName_, fftName_; std::vector phaseName_; - ScalarField *freeMomProp_, *GFSrc_; std::vector phase_; - EmField *A; }; MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar); diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.cc b/extras/Hadrons/Modules/MScalar/FreeProp.cc index 674867e3..924db288 100644 --- a/extras/Hadrons/Modules/MScalar/FreeProp.cc +++ b/extras/Hadrons/Modules/MScalar/FreeProp.cc @@ -33,38 +33,31 @@ void TFreeProp::setup(void) { freeMomPropName_ = FREEMOMPROP(par().mass); - if (!env().hasRegisteredObject(freeMomPropName_)) - { - env().registerLattice(freeMomPropName_); - } - env().registerLattice(getName()); + freePropDone_ = env().hasCreatedObject(freeMomPropName_); + envCacheLat(ScalarField, freeMomPropName_); + envCreateLat(ScalarField, getName()); } // execution /////////////////////////////////////////////////////////////////// void TFreeProp::execute(void) { - ScalarField &prop = *env().createLattice(getName()); - ScalarField &source = *env().getObject(par().source); - ScalarField *freeMomProp; + auto &freeMomProp = envGet(ScalarField, freeMomPropName_); + auto &prop = envGet(ScalarField, getName()); + auto &source = envGet(ScalarField, par().source); - if (!env().hasCreatedObject(freeMomPropName_)) + if (!freePropDone_) { LOG(Message) << "Caching momentum space free scalar propagator" << " (mass= " << par().mass << ")..." << std::endl; - freeMomProp = env().createLattice(freeMomPropName_); - SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass); - } - else - { - freeMomProp = env().getObject(freeMomPropName_); + SIMPL::MomentumSpacePropagator(freeMomProp, par().mass); } LOG(Message) << "Computing free scalar propagator..." << std::endl; - SIMPL::FreePropagator(source, prop, *freeMomProp); + SIMPL::FreePropagator(source, prop, freeMomProp); if (!par().output.empty()) { TextWriter writer(par().output + "." + - std::to_string(env().getTrajectory())); + std::to_string(vm().getTrajectory())); std::vector buf; std::vector result; diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.hpp b/extras/Hadrons/Modules/MScalar/FreeProp.hpp index 38372a0c..6b956134 100644 --- a/extras/Hadrons/Modules/MScalar/FreeProp.hpp +++ b/extras/Hadrons/Modules/MScalar/FreeProp.hpp @@ -40,6 +40,7 @@ protected: virtual void execute(void); private: std::string freeMomPropName_; + bool freePropDone_; }; MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar); diff --git a/extras/Hadrons/make_module_list.sh b/extras/Hadrons/make_module_list.sh index ddc56ff6..8c6fa4da 100755 --- a/extras/Hadrons/make_module_list.sh +++ b/extras/Hadrons/make_module_list.sh @@ -7,6 +7,36 @@ echo 'modules_hpp =\' >> modules.inc find Modules -name '*.hpp' -type f -print | sed 's/^/ /;$q;s/$/ \\/' >> modules.inc echo '' >> modules.inc rm -f Modules.hpp +echo "/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules.hpp + +Copyright (C) 2015 +Copyright (C) 2016 +Copyright (C) 2017 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file \"LICENSE\" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +" > Modules.hpp for f in `find Modules -name '*.hpp'`; do echo "#include " >> Modules.hpp done diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 2f4d183e..199bb5cd 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -1,38 +1,43 @@ modules_cc =\ - Modules/MGauge/Unit.cc \ Modules/MContraction/WeakHamiltonianEye.cc \ - Modules/MContraction/WeakHamiltonianNonEye.cc \ Modules/MContraction/WeakNeutral4ptDisc.cc \ + Modules/MContraction/WeakHamiltonianNonEye.cc \ Modules/MGauge/Load.cc \ + Modules/MGauge/Unit.cc \ + Modules/MGauge/StochEm.cc \ Modules/MGauge/Random.cc \ - Modules/MGauge/StochEm.cc + Modules/MScalar/FreeProp.cc \ + Modules/MScalar/ChargedProp.cc modules_hpp =\ - Modules/MAction/DWF.hpp \ - Modules/MAction/Wilson.hpp \ - Modules/MSink/Point.hpp \ - Modules/MSource/Point.hpp \ - Modules/MGauge/Load.hpp \ - Modules/MGauge/Random.hpp \ - Modules/MGauge/StochEm.hpp \ - Modules/MGauge/Unit.hpp \ - Modules/MSolver/RBPrecCG.hpp \ - Modules/MFermion/GaugeProp.hpp \ Modules/MContraction/Baryon.hpp \ - Modules/MContraction/DiscLoop.hpp \ - Modules/MContraction/Gamma3pt.hpp \ Modules/MContraction/Meson.hpp \ - Modules/MContraction/WardIdentity.hpp \ Modules/MContraction/WeakHamiltonian.hpp \ - Modules/MContraction/WeakHamiltonianEye.hpp \ Modules/MContraction/WeakHamiltonianNonEye.hpp \ + Modules/MContraction/DiscLoop.hpp \ Modules/MContraction/WeakNeutral4ptDisc.hpp \ - Modules/MLoop/NoiseLoop.hpp \ - Modules/MSink/Smear.hpp \ - Modules/MSolver/RBPrecCG.hpp \ - Modules/MSource/SeqConserved.hpp \ + Modules/MContraction/Gamma3pt.hpp \ + Modules/MContraction/WardIdentity.hpp \ + Modules/MContraction/WeakHamiltonianEye.hpp \ + Modules/MFermion/GaugeProp.hpp \ Modules/MSource/SeqGamma.hpp \ + Modules/MSource/Point.hpp \ Modules/MSource/Wall.hpp \ Modules/MSource/Z2.hpp \ + Modules/MSource/SeqConserved.hpp \ + Modules/MSink/Smear.hpp \ + Modules/MSink/Point.hpp \ + Modules/MSolver/RBPrecCG.hpp \ + Modules/MGauge/Load.hpp \ + Modules/MGauge/Unit.hpp \ + Modules/MGauge/Random.hpp \ + Modules/MGauge/StochEm.hpp \ + Modules/MUtilities/TestSeqGamma.hpp \ Modules/MUtilities/TestSeqConserved.hpp \ - Modules/MUtilities/TestSeqGamma.hpp + Modules/MLoop/NoiseLoop.hpp \ + Modules/MScalar/FreeProp.hpp \ + Modules/MScalar/Scalar.hpp \ + Modules/MScalar/ChargedProp.hpp \ + Modules/MAction/DWF.hpp \ + Modules/MAction/Wilson.hpp + From 4ce63af7d5945b89043797b4de1613f0fbaf4798 Mon Sep 17 00:00:00 2001 From: pretidav Date: Fri, 22 Dec 2017 19:02:07 +0100 Subject: [PATCH 260/377] Working on Hadrons with Hirep. (QCD is set for SU4) --- extras/Hadrons/Modules.hpp | 42 ++--- .../Hadrons/Modules/MAction/WilsonClover.hpp | 142 +++++++++++++++ extras/Hadrons/Modules/MFermion/GaugeProp.hpp | 13 +- extras/Hadrons/Modules/MGauge/FundtoHirep.cc | 75 ++++++++ extras/Hadrons/Modules/MGauge/FundtoHirep.hpp | 77 ++++++++ extras/Hadrons/modules.inc | 51 +++--- lib/qcd/QCD.h | 18 +- lib/qcd/action/fermion/WilsonFermion.h | 3 +- tests/hadrons/Test_hadrons_2AS_spectrum.cc | 168 ++++++++++++++++++ tests/lanczos/Test_WCMultiRep_lanczos.cc | 108 +++++++++++ 10 files changed, 637 insertions(+), 60 deletions(-) create mode 100644 extras/Hadrons/Modules/MAction/WilsonClover.hpp create mode 100644 extras/Hadrons/Modules/MGauge/FundtoHirep.cc create mode 100644 extras/Hadrons/Modules/MGauge/FundtoHirep.hpp create mode 100644 tests/hadrons/Test_hadrons_2AS_spectrum.cc create mode 100644 tests/lanczos/Test_WCMultiRep_lanczos.cc diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index c27254aa..262795e8 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -1,25 +1,27 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/extras/Hadrons/Modules/MAction/WilsonClover.hpp b/extras/Hadrons/Modules/MAction/WilsonClover.hpp new file mode 100644 index 00000000..44b1f0b7 --- /dev/null +++ b/extras/Hadrons/Modules/MAction/WilsonClover.hpp @@ -0,0 +1,142 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MAction/Wilson.hpp + +Copyright (C) 2015 +Copyright (C) 2016 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#ifndef Hadrons_MAction_WilsonClover_hpp_ +#define Hadrons_MAction_WilsonClover_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * TWilson quark action * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MAction) + +class WilsonCloverPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonCloverPar, + std::string, gauge, + double , mass, + double , csw_r, + double , csw_t, + WilsonAnisotropyCoefficients ,clover_anisotropy, + std::string, boundary + ); +}; + +template +class TWilsonClover: public Module +{ +public: + FGS_TYPE_ALIASES(FImpl,); +public: + // constructor + TWilsonClover(const std::string name); + // destructor + virtual ~TWilsonClover(void) = default; + // dependencies/products + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(WilsonClover, TWilsonClover, MAction); + +/****************************************************************************** + * TWilsonClover template implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TWilsonClover::TWilsonClover(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TWilsonClover::getInput(void) +{ + std::vector in = {par().gauge}; + + return in; +} + +template +std::vector TWilsonClover::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TWilsonClover::setup(void) +{ + unsigned int size; + + size = 2*env().template lattice4dSize(); + env().registerObject(getName(), size); +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TWilsonClover::execute() +{ + LOG(Message) << "Setting up TWilsonClover fermion matrix with m= " << par().mass + << " using gauge field '" << par().gauge << "'" << std::endl; + LOG(Message) << "Fermion boundary conditions: " << par().boundary + << std::endl; + LOG(Message) << "clover term csw_r= " << par().csw_r + << " csw_t= " << par().csw_t + << std::endl; + auto &U = *env().template getObject(par().gauge); + auto &grid = *env().getGrid(); + auto &gridRb = *env().getRbGrid(); + std::vector boundary = strToVec(par().boundary); + typename WilsonCloverFermion::ImplParams implParams(boundary); + FMat *fMatPt = new WilsonCloverFermion(U, grid, gridRb, par().mass, + par().csw_r, + par().csw_t, + par().clover_anisotropy, + implParams); + env().setObject(getName(), fMatPt); +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_WilsonClover_hpp_ diff --git a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp index b4f9edcc..4e802710 100644 --- a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp +++ b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp @@ -43,7 +43,6 @@ private: }; MODULE_REGISTER_NS(GaugeProp, TGaugeProp, MFermion); - /****************************************************************************** * TGaugeProp implementation * ******************************************************************************/ @@ -103,7 +102,7 @@ void TGaugeProp::execute(void) LOG(Message) << "Inverting using solver '" << par().solver << "' on source '" << par().source << "'" << std::endl; for (unsigned int s = 0; s < Ns; ++s) - for (unsigned int c = 0; c < Nc; ++c) + for (unsigned int c = 0; c < FImpl::Dimension; ++c) { LOG(Message) << "Inversion for spin= " << s << ", color= " << c << std::endl; @@ -112,12 +111,12 @@ void TGaugeProp::execute(void) { if (Ls_ == 1) { - PropToFerm(source, fullSrc, s, c); + PropToFerm(source, fullSrc, s, c); } else { source = zero; - PropToFerm(tmp, fullSrc, s, c); + PropToFerm(tmp, fullSrc, s, c); InsertSlice(tmp, source, 0, 0); InsertSlice(tmp, source, Ls_-1, 0); axpby_ssp_pplus(source, 0., source, 1., source, 0, 0); @@ -133,12 +132,12 @@ void TGaugeProp::execute(void) } else { - PropToFerm(source, fullSrc, s, c); + PropToFerm(source, fullSrc, s, c); } } sol = zero; solver(sol, source); - FermToProp(prop, sol, s, c); + FermToProp(prop, sol, s, c); // create 4D propagators from 5D one if necessary if (Ls_ > 1) { @@ -148,7 +147,7 @@ void TGaugeProp::execute(void) axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); ExtractSlice(tmp, sol, 0, 0); - FermToProp(p4d, tmp, s, c); + FermToProp(p4d, tmp, s, c); } } } diff --git a/extras/Hadrons/Modules/MGauge/FundtoHirep.cc b/extras/Hadrons/Modules/MGauge/FundtoHirep.cc new file mode 100644 index 00000000..f15a3b7c --- /dev/null +++ b/extras/Hadrons/Modules/MGauge/FundtoHirep.cc @@ -0,0 +1,75 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MGauge/FundtoHirep.cc + +Copyright (C) 2015 +Copyright (C) 2016 + + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +using namespace Grid; +using namespace Hadrons; +using namespace MGauge; + +// constructor ///////////////////////////////////////////////////////////////// +template +TFundtoHirep::TFundtoHirep(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TFundtoHirep::getInput(void) +{ + std::vector in; + return in; +} + +template +std::vector TFundtoHirep::getOutput(void) +{ + std::vector out = {getName()}; + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TFundtoHirep::setup(void) +{ + env().template registerLattice(getName()); +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TFundtoHirep::execute(void) +{ + auto &U = *env().template getObject(par().gaugeconf); + LOG(Message) << "Transforming Representation" << std::endl; + + Rep TargetRepresentation(U._grid); + TargetRepresentation.update_representation(U); + + typename Rep::LatticeField &URep = *env().template createLattice(getName()); + URep = TargetRepresentation.U; +} diff --git a/extras/Hadrons/Modules/MGauge/FundtoHirep.hpp b/extras/Hadrons/Modules/MGauge/FundtoHirep.hpp new file mode 100644 index 00000000..6f072783 --- /dev/null +++ b/extras/Hadrons/Modules/MGauge/FundtoHirep.hpp @@ -0,0 +1,77 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MGauge/FundtoHirep.hpp + +Copyright (C) 2015 +Copyright (C) 2016 + +Author: David Preti + Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#ifndef Hadrons_MGauge_FundtoHirep_hpp_ +#define Hadrons_MGauge_FundtoHirep_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * Load a NERSC configuration * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MGauge) + +class FundtoHirepPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(FundtoHirepPar, + std::string, gaugeconf); +}; + +template +class TFundtoHirep: public Module +{ +public: + // constructor + TFundtoHirep(const std::string name); + // destructor + virtual ~TFundtoHirep(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + void setup(void); + // execution + void execute(void); +}; + +//MODULE_REGISTER_NS(FundtoAdjoint, TFundtoHirep, MGauge); +//MODULE_REGISTER_NS(FundtoTwoIndexSym, TFundtoHirep, MGauge); +//MODULE_REGISTER_NS(FundtoTwoIndexAsym, TFundtoHirep, MGauge); + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MGauge_FundtoHirep_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 669b08ba..089341c1 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -1,38 +1,41 @@ modules_cc =\ + Modules/MScalar/ChargedProp.cc \ + Modules/MScalar/FreeProp.cc \ Modules/MContraction/WeakHamiltonianEye.cc \ Modules/MContraction/WeakHamiltonianNonEye.cc \ Modules/MContraction/WeakNeutral4ptDisc.cc \ - Modules/MGauge/Load.cc \ - Modules/MGauge/Random.cc \ Modules/MGauge/StochEm.cc \ Modules/MGauge/Unit.cc \ - Modules/MScalar/ChargedProp.cc \ - Modules/MScalar/FreeProp.cc + Modules/MGauge/Load.cc \ + Modules/MGauge/FundtoHirep.cc \ + Modules/MGauge/Random.cc modules_hpp =\ - Modules/MAction/DWF.hpp \ - Modules/MAction/Wilson.hpp \ - Modules/MContraction/Baryon.hpp \ - Modules/MContraction/DiscLoop.hpp \ - Modules/MContraction/Gamma3pt.hpp \ - Modules/MContraction/Meson.hpp \ - Modules/MContraction/WeakHamiltonian.hpp \ - Modules/MContraction/WeakHamiltonianEye.hpp \ - Modules/MContraction/WeakHamiltonianNonEye.hpp \ - Modules/MContraction/WeakNeutral4ptDisc.hpp \ - Modules/MFermion/GaugeProp.hpp \ - Modules/MGauge/Load.hpp \ - Modules/MGauge/Random.hpp \ - Modules/MGauge/StochEm.hpp \ - Modules/MGauge/Unit.hpp \ Modules/MLoop/NoiseLoop.hpp \ Modules/MScalar/ChargedProp.hpp \ - Modules/MScalar/FreeProp.hpp \ Modules/MScalar/Scalar.hpp \ + Modules/MScalar/FreeProp.hpp \ + Modules/MSource/Wall.hpp \ + Modules/MSource/SeqGamma.hpp \ + Modules/MSource/Point.hpp \ + Modules/MSource/Z2.hpp \ + Modules/MFermion/GaugeProp.hpp \ + Modules/MContraction/Meson.hpp \ + Modules/MContraction/WeakHamiltonianNonEye.hpp \ + Modules/MContraction/WeakHamiltonianEye.hpp \ + Modules/MContraction/DiscLoop.hpp \ + Modules/MContraction/Baryon.hpp \ + Modules/MContraction/Gamma3pt.hpp \ + Modules/MContraction/WeakNeutral4ptDisc.hpp \ + Modules/MContraction/WeakHamiltonian.hpp \ Modules/MSink/Point.hpp \ Modules/MSolver/RBPrecCG.hpp \ - Modules/MSource/Point.hpp \ - Modules/MSource/SeqGamma.hpp \ - Modules/MSource/Wall.hpp \ - Modules/MSource/Z2.hpp + Modules/MGauge/StochEm.hpp \ + Modules/MGauge/FundtoHirep.hpp \ + Modules/MGauge/Unit.hpp \ + Modules/MGauge/Load.hpp \ + Modules/MGauge/Random.hpp \ + Modules/MAction/WilsonClover.hpp \ + Modules/MAction/DWF.hpp \ + Modules/MAction/Wilson.hpp diff --git a/lib/qcd/QCD.h b/lib/qcd/QCD.h index 9913a071..2caea7e9 100644 --- a/lib/qcd/QCD.h +++ b/lib/qcd/QCD.h @@ -49,7 +49,7 @@ namespace QCD { static const int Zm = 6; static const int Tm = 7; - static const int Nc=3; + static const int Nc=4; static const int Ns=4; static const int Nd=4; static const int Nhs=2; // half spinor @@ -421,15 +421,16 @@ namespace QCD { ////////////////////////////////////////////// // Fermion <-> propagator assignements ////////////////////////////////////////////// - template - void FermToProp(Prop &p, const Ferm &f, const int s, const int c) + //template + template + void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c) { - for(int j = 0; j < Ns; ++j) + for(int j = 0; j < Ns; ++j) { auto pjs = peekSpin(p, j, s); auto fj = peekSpin(f, j); - for(int i = 0; i < Nc; ++i) + for(int i = 0; i < Fimpl::Dimension; ++i) { pokeColour(pjs, peekColour(fj, i), i, c); } @@ -437,15 +438,16 @@ namespace QCD { } } - template - void PropToFerm(Ferm &f, const Prop &p, const int s, const int c) + //template + template + void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c) { for(int j = 0; j < Ns; ++j) { auto pjs = peekSpin(p, j, s); auto fj = peekSpin(f, j); - for(int i = 0; i < Nc; ++i) + for(int i = 0; i < Fimpl::Dimension; ++i) { pokeColour(fj, peekColour(pjs, i, c), i); } diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h index ca5eba8b..0aea4b68 100644 --- a/lib/qcd/action/fermion/WilsonFermion.h +++ b/lib/qcd/action/fermion/WilsonFermion.h @@ -44,7 +44,8 @@ class WilsonFermionStatic { static const int npoint = 8; }; -struct WilsonAnisotropyCoefficients{ + struct WilsonAnisotropyCoefficients: Serializable + { GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonAnisotropyCoefficients, bool, isAnisotropic, int, t_direction, diff --git a/tests/hadrons/Test_hadrons_2AS_spectrum.cc b/tests/hadrons/Test_hadrons_2AS_spectrum.cc new file mode 100644 index 00000000..2f519834 --- /dev/null +++ b/tests/hadrons/Test_hadrons_2AS_spectrum.cc @@ -0,0 +1,168 @@ +/******************************************************************************* + Grid physics library, www.github.com/paboyle/Grid + + Source file: tests/hadrons/Test_hadrons_spectrum.cc + + Copyright (C) 2015 + + Author: Antonin Portelli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution + directory. + *******************************************************************************/ + +#include + +using namespace Grid; +using namespace Hadrons; + + + BEGIN_HADRONS_NAMESPACE + BEGIN_MODULE_NAMESPACE(MFermion) + MODULE_REGISTER_NS(GaugeProp2AS, TGaugeProp, MFermion); + END_MODULE_NAMESPACE + BEGIN_MODULE_NAMESPACE(MSource) + MODULE_REGISTER_NS(Point2AS, TPoint, MSource); + END_MODULE_NAMESPACE + BEGIN_MODULE_NAMESPACE(MContraction) + MODULE_REGISTER_NS(Meson2AS, ARG(TMeson), MContraction); +// MODULE_REGISTER_NS(BaryonMultirep, ARG(TBaryon), MContraction); + END_MODULE_NAMESPACE + BEGIN_MODULE_NAMESPACE(MSink) + MODULE_REGISTER_NS(ScalarPoint2AS, TPoint, MSink); + END_MODULE_NAMESPACE + BEGIN_MODULE_NAMESPACE(MSolver) + MODULE_REGISTER_NS(RBPrecCG2AS, TRBPrecCG, MSolver); + END_MODULE_NAMESPACE + BEGIN_MODULE_NAMESPACE(MAction) + MODULE_REGISTER_NS(WilsonClover2AS, TWilsonClover, MAction); + END_MODULE_NAMESPACE + END_HADRONS_NAMESPACE + + +int main(int argc, char *argv[]) +{ + // initialization ////////////////////////////////////////////////////////// + Grid_init(&argc, &argv); + HadronsLogError.Active(GridLogError.isActive()); + HadronsLogWarning.Active(GridLogWarning.isActive()); + HadronsLogMessage.Active(GridLogMessage.isActive()); + HadronsLogIterative.Active(GridLogIterative.isActive()); + HadronsLogDebug.Active(GridLogDebug.isActive()); + LOG(Message) << "Grid initialized" << std::endl; + // run setup /////////////////////////////////////////////////////////////// + Application application; + std::vector flavour = {"l", "s"}; + std::vector mass = {-0.01, -0.04}; + double csw = 1.0; + // global parameters + Application::GlobalPar globalPar; + globalPar.trajCounter.start = 1500; + globalPar.trajCounter.end = 1520; + globalPar.trajCounter.step = 20; + globalPar.seed = "1 2 3 4"; + application.setPar(globalPar); + // gauge field + application.createModule("gauge"); + MSource::Point2AS::Par ptPar; + ptPar.position = "0 0 0 0"; + application.createModule("pt", ptPar); + // sink + MSink::ScalarPoint2AS::Par sinkPar; + sinkPar.mom = "0 0 0"; + application.createModule("sink", sinkPar); + + // set fermion boundary conditions to be periodic space, antiperiodic time. + std::string boundary = "1 1 1 -1"; + + for (unsigned int i = 0; i < flavour.size(); ++i) + { + // actions + MAction::WilsonClover2AS::Par actionPar; + actionPar.gauge = "gauge"; + actionPar.mass = mass[i]; + actionPar.csw_r = csw; + actionPar.csw_t = csw; + actionPar.clover_anisotropy.isAnisotropic= false; + actionPar.clover_anisotropy.t_direction = Nd-1 ; + actionPar.clover_anisotropy.xi_0 = 1.0 ; + actionPar.clover_anisotropy.nu = 1.0 ; + actionPar.boundary = boundary; + application.createModule("WilsonClover2AS_" + flavour[i], actionPar); + + // solvers + MSolver::RBPrecCG2AS::Par solverPar; + solverPar.action = "WilsonClover2AS_" + flavour[i]; + solverPar.residual = 1.0e-8; + application.createModule("CG_" + flavour[i], + solverPar); + + // propagators + MFermion::GaugeProp2AS::Par quarkPar; + quarkPar.solver = "CG_" + flavour[i]; + quarkPar.source = "pt"; + application.createModule("Qpt_" + flavour[i], quarkPar); + quarkPar.source = "z2"; + application.createModule("QZ2_" + flavour[i], quarkPar); + } + for (unsigned int i = 0; i < flavour.size(); ++i) + for (unsigned int j = i; j < flavour.size(); ++j) + { + MContraction::Meson2AS::Par mesPar; + + mesPar.output = "mesons2AS/pt_" + flavour[i] + flavour[j]; + mesPar.q1 = "Qpt_" + flavour[i]; + mesPar.q2 = "Qpt_" + flavour[j]; + mesPar.gammas = "all"; + mesPar.sink = "sink"; + application.createModule("meson_pt_" + + flavour[i] + flavour[j], + mesPar); + + // mesPar.output = "mesons2AS/Z2_" + flavour[i] + flavour[j]; + // mesPar.q1 = "QZ2_" + flavour[i]; + // mesPar.q2 = "QZ2_" + flavour[j]; + // mesPar.gammas = "all"; + // mesPar.sink = "sink"; + // application.createModule("meson_Z2_" + // + flavour[i] + flavour[j], + // mesPar); + } + for (unsigned int i = 0; i < flavour.size(); ++i) + for (unsigned int j = i; j < flavour.size(); ++j) + for (unsigned int k = j; k < flavour.size(); ++k) + { + MContraction::Baryon::Par barPar; + + barPar.output = "baryons/pt_" + flavour[i] + flavour[j] + flavour[k]; + barPar.q1 = "Qpt_" + flavour[i]; + barPar.q2 = "Qpt_" + flavour[j]; + barPar.q3 = "Qpt_" + flavour[k]; + application.createModule( + "baryon_pt_" + flavour[i] + flavour[j] + flavour[k], barPar); + } + + // execution + application.saveParameterFile("spectrum.xml"); + application.run(); + + // epilogue + LOG(Message) << "Grid is finalizing now" << std::endl; + Grid_finalize(); + + return EXIT_SUCCESS; +} diff --git a/tests/lanczos/Test_WCMultiRep_lanczos.cc b/tests/lanczos/Test_WCMultiRep_lanczos.cc new file mode 100644 index 00000000..e8549234 --- /dev/null +++ b/tests/lanczos/Test_WCMultiRep_lanczos.cc @@ -0,0 +1,108 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_dwf_lanczos.cc + +Copyright (C) 2015 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +typedef WilsonFermionR FermionOp; +typedef typename WilsonFermionR::FermionField FermionField; + + +RealD AllZero(RealD x) { return 0.; } + +int main(int argc, char** argv) { + Grid_init(&argc, &argv); + + GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid( + GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian* UrbGrid = + SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian* FGrid = UGrid; + GridRedBlackCartesian* FrbGrid = UrbGrid; + printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n", UGrid, UrbGrid, FGrid, + FrbGrid); + + std::vector seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5rb(FrbGrid); + RNG5.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); + SU3::HotConfiguration(RNG4, Umu); + +/* + std::vector U(4, UGrid); + for (int mu = 0; mu < Nd; mu++) { + U[mu] = PeekIndex(Umu, mu); + } +*/ + + RealD mass = -0.1; + RealD M5 = 1.8; + RealD mob_b = 1.5; + FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass); + MdagMLinearOperator HermOp(WilsonOperator); /// <----- + //SchurDiagTwoOperator HermOp(WilsonOperator); + + const int Nstop = 20; + const int Nk = 60; + const int Np = 60; + const int Nm = Nk + Np; + const int MaxIt = 10000; + RealD resid = 1.0e-6; + + std::vector Coeffs{0, 1.}; + Polynomial PolyX(Coeffs); + Chebyshev Cheb(0.0, 10., 12); + ImplicitlyRestartedLanczos IRL(HermOp, PolyX, Nstop, Nk, Nm, + resid, MaxIt); + + std::vector eval(Nm); + FermionField src(FGrid); + gaussian(RNG5, src); + std::vector evec(Nm, FGrid); + for (int i = 0; i < 1; i++) { + std::cout << i << " / " << Nm << " grid pointer " << evec[i]._grid + << std::endl; + }; + + int Nconv; + IRL.calc(eval, evec, src, Nconv); + + std::cout << eval << std::endl; + + Grid_finalize(); +} From 185da83454961773a4666d4fff45724abb426f5b Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 26 Dec 2017 14:05:17 +0100 Subject: [PATCH 261/377] Hadrons: new MIO module namespace, NERSC loader moved there --- extras/Hadrons/Modules.hpp | 9 +++--- .../{MGauge/Load.cc => MIO/LoadNersc.cc} | 25 +++++++--------- .../{MGauge/Load.hpp => MIO/LoadNersc.hpp} | 30 ++++++++----------- extras/Hadrons/modules.inc | 8 ++--- 4 files changed, 32 insertions(+), 40 deletions(-) rename extras/Hadrons/Modules/{MGauge/Load.cc => MIO/LoadNersc.cc} (81%) rename extras/Hadrons/Modules/{MGauge/Load.hpp => MIO/LoadNersc.hpp} (75%) diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index cf381d0f..3ae2f9a7 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -2,13 +2,12 @@ Grid physics library, www.github.com/paboyle/Grid -Source file: extras/Hadrons/Modules.hpp +Source file: Modules.hpp -Copyright (C) 2015 -Copyright (C) 2016 -Copyright (C) 2017 +Copyright (C) 2015-2018 Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -46,7 +45,6 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include -#include #include #include #include @@ -58,3 +56,4 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include +#include diff --git a/extras/Hadrons/Modules/MGauge/Load.cc b/extras/Hadrons/Modules/MIO/LoadNersc.cc similarity index 81% rename from extras/Hadrons/Modules/MGauge/Load.cc rename to extras/Hadrons/Modules/MIO/LoadNersc.cc index b168a010..2c35d2e1 100644 --- a/extras/Hadrons/Modules/MGauge/Load.cc +++ b/extras/Hadrons/Modules/MIO/LoadNersc.cc @@ -2,12 +2,10 @@ Grid physics library, www.github.com/paboyle/Grid -Source file: extras/Hadrons/Modules/MGauge/Load.cc +Source file: LoadNersc.cc -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 -Author: Antonin Portelli This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,30 +24,29 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ - -#include +#include using namespace Grid; using namespace Hadrons; -using namespace MGauge; +using namespace MIO; /****************************************************************************** -* TLoad implementation * +* TLoadNersc implementation * ******************************************************************************/ // constructor ///////////////////////////////////////////////////////////////// -TLoad::TLoad(const std::string name) -: Module(name) +TLoadNersc::TLoadNersc(const std::string name) +: Module(name) {} // dependencies/products /////////////////////////////////////////////////////// -std::vector TLoad::getInput(void) +std::vector TLoadNersc::getInput(void) { std::vector in; return in; } -std::vector TLoad::getOutput(void) +std::vector TLoadNersc::getOutput(void) { std::vector out = {getName()}; @@ -57,13 +54,13 @@ std::vector TLoad::getOutput(void) } // setup /////////////////////////////////////////////////////////////////////// -void TLoad::setup(void) +void TLoadNersc::setup(void) { envCreateLat(LatticeGaugeField, getName()); } // execution /////////////////////////////////////////////////////////////////// -void TLoad::execute(void) +void TLoadNersc::execute(void) { FieldMetaData header; std::string fileName = par().file + "." diff --git a/extras/Hadrons/Modules/MGauge/Load.hpp b/extras/Hadrons/Modules/MIO/LoadNersc.hpp similarity index 75% rename from extras/Hadrons/Modules/MGauge/Load.hpp rename to extras/Hadrons/Modules/MIO/LoadNersc.hpp index a338af79..5bd251c3 100644 --- a/extras/Hadrons/Modules/MGauge/Load.hpp +++ b/extras/Hadrons/Modules/MIO/LoadNersc.hpp @@ -2,12 +2,10 @@ Grid physics library, www.github.com/paboyle/Grid -Source file: extras/Hadrons/Modules/MGauge/Load.hpp +Source file: LoadNersc.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 -Author: Antonin Portelli This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,9 +24,8 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ - -#ifndef Hadrons_MGauge_Load_hpp_ -#define Hadrons_MGauge_Load_hpp_ +#ifndef Hadrons_MIO_LoadNersc_hpp_ +#define Hadrons_MIO_LoadNersc_hpp_ #include #include @@ -37,38 +34,37 @@ See the full license in the file "LICENSE" in the top level distribution directo BEGIN_HADRONS_NAMESPACE /****************************************************************************** - * Load a NERSC configuration * + * Load a NERSC configuration * ******************************************************************************/ -BEGIN_MODULE_NAMESPACE(MGauge) +BEGIN_MODULE_NAMESPACE(MIO) -class LoadPar: Serializable +class LoadNerscPar: Serializable { public: - GRID_SERIALIZABLE_CLASS_MEMBERS(LoadPar, + GRID_SERIALIZABLE_CLASS_MEMBERS(LoadNerscPar, std::string, file); }; -class TLoad: public Module +class TLoadNersc: public Module { public: // constructor - TLoad(const std::string name); + TLoadNersc(const std::string name); // destructor - virtual ~TLoad(void) = default; + virtual ~TLoadNersc(void) = default; // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); -protected: // setup virtual void setup(void); // execution virtual void execute(void); }; -MODULE_REGISTER_NS(Load, TLoad, MGauge); +MODULE_REGISTER_NS(LoadNersc, TLoadNersc, MIO); END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_MGauge_Load_hpp_ +#endif // Hadrons_MIO_LoadNersc_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 199bb5cd..85fa0971 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -2,12 +2,12 @@ modules_cc =\ Modules/MContraction/WeakHamiltonianEye.cc \ Modules/MContraction/WeakNeutral4ptDisc.cc \ Modules/MContraction/WeakHamiltonianNonEye.cc \ - Modules/MGauge/Load.cc \ Modules/MGauge/Unit.cc \ Modules/MGauge/StochEm.cc \ Modules/MGauge/Random.cc \ Modules/MScalar/FreeProp.cc \ - Modules/MScalar/ChargedProp.cc + Modules/MScalar/ChargedProp.cc \ + Modules/MIO/LoadNersc.cc modules_hpp =\ Modules/MContraction/Baryon.hpp \ @@ -28,7 +28,6 @@ modules_hpp =\ Modules/MSink/Smear.hpp \ Modules/MSink/Point.hpp \ Modules/MSolver/RBPrecCG.hpp \ - Modules/MGauge/Load.hpp \ Modules/MGauge/Unit.hpp \ Modules/MGauge/Random.hpp \ Modules/MGauge/StochEm.hpp \ @@ -39,5 +38,6 @@ modules_hpp =\ Modules/MScalar/Scalar.hpp \ Modules/MScalar/ChargedProp.hpp \ Modules/MAction/DWF.hpp \ - Modules/MAction/Wilson.hpp + Modules/MAction/Wilson.hpp \ + Modules/MIO/LoadNersc.hpp From 8b30c5956c7e78250303a1a80e6fd0cd79a7682a Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 26 Dec 2017 14:16:47 +0100 Subject: [PATCH 262/377] Hadrons: copyright update --- extras/Hadrons/Application.cc | 3 +- extras/Hadrons/Application.hpp | 3 +- extras/Hadrons/Environment.cc | 3 +- extras/Hadrons/Environment.hpp | 3 +- extras/Hadrons/Exceptions.cc | 4 +-- extras/Hadrons/Exceptions.hpp | 2 +- extras/Hadrons/Factory.hpp | 3 +- extras/Hadrons/GeneticScheduler.hpp | 3 +- extras/Hadrons/Global.cc | 3 +- extras/Hadrons/Global.hpp | 4 +-- extras/Hadrons/Graph.hpp | 3 +- extras/Hadrons/HadronsXmlRun.cc | 3 +- extras/Hadrons/HadronsXmlSchedule.cc | 3 +- extras/Hadrons/Module.cc | 3 +- extras/Hadrons/Module.hpp | 3 +- extras/Hadrons/ModuleFactory.hpp | 3 +- extras/Hadrons/Modules.hpp | 2 +- extras/Hadrons/Modules/MAction/DWF.hpp | 4 +-- extras/Hadrons/Modules/MAction/Wilson.hpp | 4 +-- .../Hadrons/Modules/MContraction/Baryon.hpp | 4 +-- .../Hadrons/Modules/MContraction/DiscLoop.hpp | 5 ++-- .../Hadrons/Modules/MContraction/Gamma3pt.hpp | 5 ++-- extras/Hadrons/Modules/MContraction/Meson.hpp | 6 ++-- .../Modules/MContraction/WardIdentity.hpp | 5 ++-- .../Modules/MContraction/WeakHamiltonian.hpp | 5 ++-- .../MContraction/WeakHamiltonianEye.cc | 5 ++-- .../MContraction/WeakHamiltonianEye.hpp | 5 ++-- .../MContraction/WeakHamiltonianNonEye.cc | 5 ++-- .../MContraction/WeakHamiltonianNonEye.hpp | 5 ++-- .../MContraction/WeakNeutral4ptDisc.cc | 5 ++-- .../MContraction/WeakNeutral4ptDisc.hpp | 5 ++-- extras/Hadrons/Modules/MFermion/GaugeProp.hpp | 6 ++-- extras/Hadrons/Modules/MGauge/Random.cc | 3 +- extras/Hadrons/Modules/MGauge/Random.hpp | 3 +- extras/Hadrons/Modules/MGauge/StochEm.cc | 4 +-- extras/Hadrons/Modules/MGauge/StochEm.hpp | 4 +-- extras/Hadrons/Modules/MGauge/Unit.cc | 3 +- extras/Hadrons/Modules/MGauge/Unit.hpp | 3 +- extras/Hadrons/Modules/MIO/LoadNersc.cc | 3 +- extras/Hadrons/Modules/MIO/LoadNersc.hpp | 3 +- extras/Hadrons/Modules/MLoop/NoiseLoop.hpp | 5 ++-- extras/Hadrons/Modules/MScalar/ChargedProp.cc | 28 +++++++++++++++++++ .../Hadrons/Modules/MScalar/ChargedProp.hpp | 27 ++++++++++++++++++ extras/Hadrons/Modules/MScalar/FreeProp.cc | 27 ++++++++++++++++++ extras/Hadrons/Modules/MScalar/FreeProp.hpp | 27 ++++++++++++++++++ extras/Hadrons/Modules/MScalar/Scalar.hpp | 27 ++++++++++++++++++ extras/Hadrons/Modules/MSink/Point.hpp | 3 +- extras/Hadrons/Modules/MSink/Smear.hpp | 5 ++-- extras/Hadrons/Modules/MSolver/RBPrecCG.hpp | 3 +- extras/Hadrons/Modules/MSource/Point.hpp | 4 +-- .../Hadrons/Modules/MSource/SeqConserved.hpp | 7 +++-- extras/Hadrons/Modules/MSource/SeqGamma.hpp | 5 ++-- extras/Hadrons/Modules/MSource/Wall.hpp | 5 ++-- extras/Hadrons/Modules/MSource/Z2.hpp | 3 +- .../Modules/MUtilities/TestSeqConserved.hpp | 5 ++-- .../Modules/MUtilities/TestSeqGamma.hpp | 5 ++-- extras/Hadrons/VirtualMachine.cc | 2 +- extras/Hadrons/VirtualMachine.hpp | 2 +- scripts/copyright | 5 ++-- 59 files changed, 238 insertions(+), 108 deletions(-) diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index 9a3366d4..6d5d6776 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Application.cc -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Application.hpp b/extras/Hadrons/Application.hpp index 4b2ce77b..8cd15433 100644 --- a/extras/Hadrons/Application.hpp +++ b/extras/Hadrons/Application.hpp @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Application.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Environment.cc b/extras/Hadrons/Environment.cc index 6de13e86..82b0dda1 100644 --- a/extras/Hadrons/Environment.cc +++ b/extras/Hadrons/Environment.cc @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Environment.cc -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index 7f1bc26d..e9bfffe1 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Environment.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Exceptions.cc b/extras/Hadrons/Exceptions.cc index bf532c21..eedc03b1 100644 --- a/extras/Hadrons/Exceptions.cc +++ b/extras/Hadrons/Exceptions.cc @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Exceptions.cc -Copyright (C) 2017 +Copyright (C) 2015-2018 Author: Antonin Portelli @@ -54,4 +54,4 @@ CONST_EXC(Io, Runtime("IO error: " + msg, loc)) CONST_EXC(Memory, Runtime("memory error: " + msg, loc)) CONST_EXC(Parsing, Runtime("parsing error: " + msg, loc)) CONST_EXC(Program, Runtime("program error: " + msg, loc)) -CONST_EXC(System, Runtime("system error: " + msg, loc)) \ No newline at end of file +CONST_EXC(System, Runtime("system error: " + msg, loc)) diff --git a/extras/Hadrons/Exceptions.hpp b/extras/Hadrons/Exceptions.hpp index 8f04ab41..ab588e5e 100644 --- a/extras/Hadrons/Exceptions.hpp +++ b/extras/Hadrons/Exceptions.hpp @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Exceptions.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Factory.hpp b/extras/Hadrons/Factory.hpp index 65ce03ca..705a639e 100644 --- a/extras/Hadrons/Factory.hpp +++ b/extras/Hadrons/Factory.hpp @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Factory.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/GeneticScheduler.hpp b/extras/Hadrons/GeneticScheduler.hpp index f199f1ed..9a6476c3 100644 --- a/extras/Hadrons/GeneticScheduler.hpp +++ b/extras/Hadrons/GeneticScheduler.hpp @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/GeneticScheduler.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Global.cc b/extras/Hadrons/Global.cc index 130ede96..fc41424c 100644 --- a/extras/Hadrons/Global.cc +++ b/extras/Hadrons/Global.cc @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Global.cc -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index ebfe94dc..1b4e5f9a 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -4,10 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Global.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Graph.hpp b/extras/Hadrons/Graph.hpp index a9c240fa..67694aa8 100644 --- a/extras/Hadrons/Graph.hpp +++ b/extras/Hadrons/Graph.hpp @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Graph.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/HadronsXmlRun.cc b/extras/Hadrons/HadronsXmlRun.cc index 0dff8f9a..07eb096e 100644 --- a/extras/Hadrons/HadronsXmlRun.cc +++ b/extras/Hadrons/HadronsXmlRun.cc @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/HadronsXmlRun.cc -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/HadronsXmlSchedule.cc b/extras/Hadrons/HadronsXmlSchedule.cc index a8ca9a63..6b167690 100644 --- a/extras/Hadrons/HadronsXmlSchedule.cc +++ b/extras/Hadrons/HadronsXmlSchedule.cc @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/HadronsXmlSchedule.cc -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Module.cc b/extras/Hadrons/Module.cc index e5ef0fe4..54978f93 100644 --- a/extras/Hadrons/Module.cc +++ b/extras/Hadrons/Module.cc @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Module.cc -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Module.hpp b/extras/Hadrons/Module.hpp index b71f779d..2ba425e4 100644 --- a/extras/Hadrons/Module.hpp +++ b/extras/Hadrons/Module.hpp @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Module.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/ModuleFactory.hpp b/extras/Hadrons/ModuleFactory.hpp index 48ab305c..d5c703fa 100644 --- a/extras/Hadrons/ModuleFactory.hpp +++ b/extras/Hadrons/ModuleFactory.hpp @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/ModuleFactory.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 3ae2f9a7..7f7c5dc7 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -2,7 +2,7 @@ Grid physics library, www.github.com/paboyle/Grid -Source file: Modules.hpp +Source file: extras/Hadrons/Modules.hpp Copyright (C) 2015-2018 diff --git a/extras/Hadrons/Modules/MAction/DWF.hpp b/extras/Hadrons/Modules/MAction/DWF.hpp index d99f1165..4dfd06cf 100644 --- a/extras/Hadrons/Modules/MAction/DWF.hpp +++ b/extras/Hadrons/Modules/MAction/DWF.hpp @@ -4,10 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MAction/DWF.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MAction/Wilson.hpp b/extras/Hadrons/Modules/MAction/Wilson.hpp index 8ef755bb..6467b3ee 100644 --- a/extras/Hadrons/Modules/MAction/Wilson.hpp +++ b/extras/Hadrons/Modules/MAction/Wilson.hpp @@ -4,10 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MAction/Wilson.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MContraction/Baryon.hpp b/extras/Hadrons/Modules/MContraction/Baryon.hpp index 1ef2e257..625c7108 100644 --- a/extras/Hadrons/Modules/MContraction/Baryon.hpp +++ b/extras/Hadrons/Modules/MContraction/Baryon.hpp @@ -4,10 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MContraction/Baryon.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp index ef50061c..3d08f0eb 100644 --- a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp +++ b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp index fb9a9d4b..68701aeb 100644 --- a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp +++ b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 46bbdb2e..5cf504e3 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -4,12 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MContraction/Meson.hpp -Copyright (C) 2015 -Copyright (C) 2016 -Copyright (C) 2017 +Copyright (C) 2015-2018 Author: Antonin Portelli - Andrew Lawson +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index 556450a8..2801d88c 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MContraction/WardIdentity.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp index 7df40370..9d8ada98 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc index 43dfa609..b79c09e7 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp index 3a2b9309..24f39f6c 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc index 8a7113e3..e66b6ee7 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp index eb5abe3c..c4cd66f1 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc index 18423f3e..e0a00472 100644 --- a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc +++ b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp index f26d4636..5de2a751 100644 --- a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp index 05b3d17a..33787a0b 100644 --- a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp +++ b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp @@ -4,12 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MFermion/GaugeProp.hpp -Copyright (C) 2015 -Copyright (C) 2016 -Copyright (C) 2017 +Copyright (C) 2015-2018 Author: Antonin Portelli - Andrew Lawson +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MGauge/Random.cc b/extras/Hadrons/Modules/MGauge/Random.cc index 97afd338..962fc243 100644 --- a/extras/Hadrons/Modules/MGauge/Random.cc +++ b/extras/Hadrons/Modules/MGauge/Random.cc @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MGauge/Random.cc -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Modules/MGauge/Random.hpp b/extras/Hadrons/Modules/MGauge/Random.hpp index a07130e4..51a08dbb 100644 --- a/extras/Hadrons/Modules/MGauge/Random.hpp +++ b/extras/Hadrons/Modules/MGauge/Random.hpp @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MGauge/Random.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Modules/MGauge/StochEm.cc b/extras/Hadrons/Modules/MGauge/StochEm.cc index c5318573..21b7f626 100644 --- a/extras/Hadrons/Modules/MGauge/StochEm.cc +++ b/extras/Hadrons/Modules/MGauge/StochEm.cc @@ -4,9 +4,9 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MGauge/StochEm.cc -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 +Author: Antonin Portelli This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MGauge/StochEm.hpp b/extras/Hadrons/Modules/MGauge/StochEm.hpp index bacb5172..87b70880 100644 --- a/extras/Hadrons/Modules/MGauge/StochEm.hpp +++ b/extras/Hadrons/Modules/MGauge/StochEm.hpp @@ -4,9 +4,9 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 +Author: Antonin Portelli This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MGauge/Unit.cc b/extras/Hadrons/Modules/MGauge/Unit.cc index 8bee1ecc..38b5f3aa 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.cc +++ b/extras/Hadrons/Modules/MGauge/Unit.cc @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MGauge/Unit.cc -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Modules/MGauge/Unit.hpp b/extras/Hadrons/Modules/MGauge/Unit.hpp index c1650cc7..d6ce5a6b 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.hpp +++ b/extras/Hadrons/Modules/MGauge/Unit.hpp @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MGauge/Unit.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Modules/MIO/LoadNersc.cc b/extras/Hadrons/Modules/MIO/LoadNersc.cc index 2c35d2e1..f20606fc 100644 --- a/extras/Hadrons/Modules/MIO/LoadNersc.cc +++ b/extras/Hadrons/Modules/MIO/LoadNersc.cc @@ -2,10 +2,11 @@ Grid physics library, www.github.com/paboyle/Grid -Source file: LoadNersc.cc +Source file: extras/Hadrons/Modules/MIO/LoadNersc.cc Copyright (C) 2015-2018 +Author: Antonin Portelli This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MIO/LoadNersc.hpp b/extras/Hadrons/Modules/MIO/LoadNersc.hpp index 5bd251c3..d6742e1e 100644 --- a/extras/Hadrons/Modules/MIO/LoadNersc.hpp +++ b/extras/Hadrons/Modules/MIO/LoadNersc.hpp @@ -2,10 +2,11 @@ Grid physics library, www.github.com/paboyle/Grid -Source file: LoadNersc.hpp +Source file: extras/Hadrons/Modules/MIO/LoadNersc.hpp Copyright (C) 2015-2018 +Author: Antonin Portelli This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp b/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp index 512c731a..e61bf163 100644 --- a/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp +++ b/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp -Copyright (C) 2016 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc index 6cb75a28..da82617f 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.cc +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -1,3 +1,31 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MScalar/ChargedProp.cc + +Copyright (C) 2015-2018 + +Author: Antonin Portelli +Author: James Harrison + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ #include #include diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp index cfcce28e..4d43aec2 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp @@ -1,3 +1,30 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MScalar/ChargedProp.hpp + +Copyright (C) 2015-2018 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ #ifndef Hadrons_MScalar_ChargedProp_hpp_ #define Hadrons_MScalar_ChargedProp_hpp_ diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.cc b/extras/Hadrons/Modules/MScalar/FreeProp.cc index 924db288..ee86b9db 100644 --- a/extras/Hadrons/Modules/MScalar/FreeProp.cc +++ b/extras/Hadrons/Modules/MScalar/FreeProp.cc @@ -1,3 +1,30 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MScalar/FreeProp.cc + +Copyright (C) 2015-2018 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ #include #include diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.hpp b/extras/Hadrons/Modules/MScalar/FreeProp.hpp index 6b956134..df17f44e 100644 --- a/extras/Hadrons/Modules/MScalar/FreeProp.hpp +++ b/extras/Hadrons/Modules/MScalar/FreeProp.hpp @@ -1,3 +1,30 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MScalar/FreeProp.hpp + +Copyright (C) 2015-2018 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ #ifndef Hadrons_MScalar_FreeProp_hpp_ #define Hadrons_MScalar_FreeProp_hpp_ diff --git a/extras/Hadrons/Modules/MScalar/Scalar.hpp b/extras/Hadrons/Modules/MScalar/Scalar.hpp index db702ff2..7272f1b3 100644 --- a/extras/Hadrons/Modules/MScalar/Scalar.hpp +++ b/extras/Hadrons/Modules/MScalar/Scalar.hpp @@ -1,3 +1,30 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MScalar/Scalar.hpp + +Copyright (C) 2015-2018 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ #ifndef Hadrons_Scalar_hpp_ #define Hadrons_Scalar_hpp_ diff --git a/extras/Hadrons/Modules/MSink/Point.hpp b/extras/Hadrons/Modules/MSink/Point.hpp index 43be3009..c5f6eff0 100644 --- a/extras/Hadrons/Modules/MSink/Point.hpp +++ b/extras/Hadrons/Modules/MSink/Point.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MSink/Point.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MSink/Smear.hpp b/extras/Hadrons/Modules/MSink/Smear.hpp index e85ab263..e72dece0 100644 --- a/extras/Hadrons/Modules/MSink/Smear.hpp +++ b/extras/Hadrons/Modules/MSink/Smear.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MSink/Smear.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp index bb4f3f62..54c0f2d8 100644 --- a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp +++ b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MSolver/RBPrecCG.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Modules/MSource/Point.hpp b/extras/Hadrons/Modules/MSource/Point.hpp index 1d8241cf..ac6df252 100644 --- a/extras/Hadrons/Modules/MSource/Point.hpp +++ b/extras/Hadrons/Modules/MSource/Point.hpp @@ -4,10 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MSource/Point.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MSource/SeqConserved.hpp b/extras/Hadrons/Modules/MSource/SeqConserved.hpp index 3e8ef457..ee8d8d56 100644 --- a/extras/Hadrons/Modules/MSource/SeqConserved.hpp +++ b/extras/Hadrons/Modules/MSource/SeqConserved.hpp @@ -2,11 +2,12 @@ Grid physics library, www.github.com/paboyle/Grid -Source file: extras/Hadrons/Modules/MContraction/SeqConserved.hpp +Source file: extras/Hadrons/Modules/MSource/SeqConserved.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MSource/SeqGamma.hpp b/extras/Hadrons/Modules/MSource/SeqGamma.hpp index abad5ace..40eda29f 100644 --- a/extras/Hadrons/Modules/MSource/SeqGamma.hpp +++ b/extras/Hadrons/Modules/MSource/SeqGamma.hpp @@ -4,11 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp -Copyright (C) 2015 -Copyright (C) 2016 -Copyright (C) 2017 +Copyright (C) 2015-2018 Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MSource/Wall.hpp b/extras/Hadrons/Modules/MSource/Wall.hpp index 9d5f1f46..5853b11a 100644 --- a/extras/Hadrons/Modules/MSource/Wall.hpp +++ b/extras/Hadrons/Modules/MSource/Wall.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MSource/Wall.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MSource/Z2.hpp b/extras/Hadrons/Modules/MSource/Z2.hpp index 3593cb34..4414e37f 100644 --- a/extras/Hadrons/Modules/MSource/Z2.hpp +++ b/extras/Hadrons/Modules/MSource/Z2.hpp @@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MSource/Z2.hpp -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp index 0647884c..6ee1e3c2 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp index fd53eab8..df35d887 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp @@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 -Author: Andrew Lawson +Author: Antonin Portelli +Author: Lanny91 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc index e0035bc1..d47bafb7 100644 --- a/extras/Hadrons/VirtualMachine.cc +++ b/extras/Hadrons/VirtualMachine.cc @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/VirtualMachine.cc -Copyright (C) 2017 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/extras/Hadrons/VirtualMachine.hpp b/extras/Hadrons/VirtualMachine.hpp index 3af7d914..19a74f94 100644 --- a/extras/Hadrons/VirtualMachine.hpp +++ b/extras/Hadrons/VirtualMachine.hpp @@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: extras/Hadrons/VirtualMachine.hpp -Copyright (C) 2017 +Copyright (C) 2015-2018 Author: Antonin Portelli diff --git a/scripts/copyright b/scripts/copyright index cc9ed6e5..a461b54c 100755 --- a/scripts/copyright +++ b/scripts/copyright @@ -11,8 +11,7 @@ Grid physics library, www.github.com/paboyle/Grid Source file: $1 -Copyright (C) 2015 -Copyright (C) 2016 +Copyright (C) 2015-2018 EOF @@ -60,4 +59,4 @@ shift done - +rm message tmp.fil From e8ac75055c0566fc56663240e07d2d8340ad5799 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 27 Dec 2017 14:24:29 +0100 Subject: [PATCH 263/377] Hadrons: binary configuration loader --- extras/Hadrons/Global.hpp | 6 +- extras/Hadrons/Modules.hpp | 1 + extras/Hadrons/Modules/MIO/LoadBinary.hpp | 140 ++++++++++++++++++++++ extras/Hadrons/modules.inc | 3 +- 4 files changed, 148 insertions(+), 2 deletions(-) create mode 100644 extras/Hadrons/Modules/MIO/LoadBinary.hpp diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index 1b4e5f9a..c68edafd 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -61,6 +61,9 @@ using Grid::operator<<; #ifndef SIMPL #define SIMPL ScalarImplCR #endif +#ifndef GIMPL +#define GIMPL GimplTypesR +#endif BEGIN_HADRONS_NAMESPACE @@ -84,7 +87,8 @@ typedef std::function SolverFn##suffix; #define SINK_TYPE_ALIASES(suffix)\ -typedef std::function SinkFn##suffix; +typedef std::function SinkFn##suffix; #define FGS_TYPE_ALIASES(FImpl, suffix)\ FERM_TYPE_ALIASES(FImpl, suffix)\ diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 7f7c5dc7..e50d2b0b 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -57,3 +57,4 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include +#include diff --git a/extras/Hadrons/Modules/MIO/LoadBinary.hpp b/extras/Hadrons/Modules/MIO/LoadBinary.hpp new file mode 100644 index 00000000..5e45dfd8 --- /dev/null +++ b/extras/Hadrons/Modules/MIO/LoadBinary.hpp @@ -0,0 +1,140 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MIO/LoadBinary.hpp + +Copyright (C) 2015-2018 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef Hadrons_MIO_LoadBinary_hpp_ +#define Hadrons_MIO_LoadBinary_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * Load a binary configurations * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MIO) + +class LoadBinaryPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(LoadBinaryPar, + std::string, file, + std::string, format); +}; + +template +class TLoadBinary: public Module +{ +public: + typedef typename Impl::Field Field; + typedef typename Impl::Simd Simd; + typedef typename Field::vector_object vobj; + typedef typename vobj::scalar_object sobj; + typedef typename sobj::DoublePrecision sobj_double; + typedef BinarySimpleMunger Munger; +public: + // constructor + TLoadBinary(const std::string name); + // destructor + virtual ~TLoadBinary(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(LoadBinary, TLoadBinary, MIO); +MODULE_REGISTER_NS(LoadBinaryScalarSU2, TLoadBinary>, MIO); +MODULE_REGISTER_NS(LoadBinaryScalarSU3, TLoadBinary>, MIO); +MODULE_REGISTER_NS(LoadBinaryScalarSU4, TLoadBinary>, MIO); +MODULE_REGISTER_NS(LoadBinaryScalarSU5, TLoadBinary>, MIO); +MODULE_REGISTER_NS(LoadBinaryScalarSU6, TLoadBinary>, MIO); + +/****************************************************************************** + * TLoadBinary implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TLoadBinary::TLoadBinary(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TLoadBinary::getInput(void) +{ + std::vector in; + + return in; +} + +template +std::vector TLoadBinary::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TLoadBinary::setup(void) +{ + envCreateLat(Field, getName()); +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TLoadBinary::execute(void) +{ + Munger munge; + uint32_t nersc_csum, scidac_csuma, scidac_csumb; + auto &U = envGet(Field, getName()); + std::string filename = par().file + "." + + std::to_string(vm().getTrajectory()); + + LOG(Message) << "Loading " << par().format + << "binary configuration from file '" << filename + << "'" << std::endl; + BinaryIO::readLatticeObject(U, filename, munge, 0, + par().format, nersc_csum, + scidac_csuma, scidac_csumb); + LOG(Message) << "Checksums:" << std::endl; + LOG(Message) << " NERSC " << nersc_csum << std::endl; + LOG(Message) << " SciDAC A " << scidac_csuma << std::endl; + LOG(Message) << " SciDAC B " << scidac_csumb << std::endl; +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MIO_LoadBinary_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 85fa0971..6e1ef6dc 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -39,5 +39,6 @@ modules_hpp =\ Modules/MScalar/ChargedProp.hpp \ Modules/MAction/DWF.hpp \ Modules/MAction/Wilson.hpp \ - Modules/MIO/LoadNersc.hpp + Modules/MIO/LoadNersc.hpp \ + Modules/MIO/LoadBinary.hpp From 0d612039ed4c16c5dc0234bc243a631f6e7173b8 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 29 Dec 2017 16:58:23 +0100 Subject: [PATCH 264/377] Hadrons: prettier Grid logging (non-intrusive) --- extras/Hadrons/Application.cc | 1 + extras/Hadrons/Global.cc | 15 +++++++++++++++ extras/Hadrons/Global.hpp | 2 ++ extras/Hadrons/HadronsXmlRun.cc | 6 ------ extras/Hadrons/HadronsXmlSchedule.cc | 6 ------ lib/log/Log.h | 9 ++++++++- 6 files changed, 26 insertions(+), 13 deletions(-) diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index 6d5d6776..7ba98ade 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -42,6 +42,7 @@ using namespace Hadrons; // constructors //////////////////////////////////////////////////////////////// Application::Application(void) { + initLogger(); LOG(Message) << "Modules available:" << std::endl; auto list = ModuleFactory::getInstance().getBuilderList(); for (auto &m: list) diff --git a/extras/Hadrons/Global.cc b/extras/Hadrons/Global.cc index fc41424c..942a4243 100644 --- a/extras/Hadrons/Global.cc +++ b/extras/Hadrons/Global.cc @@ -38,6 +38,21 @@ HadronsLogger Hadrons::HadronsLogMessage(1,"Message"); HadronsLogger Hadrons::HadronsLogIterative(1,"Iterative"); HadronsLogger Hadrons::HadronsLogDebug(1,"Debug"); +void Hadrons::initLogger(void) +{ + auto w = std::string("Hadrons").length(); + GridLogError.setTopWidth(w); + GridLogWarning.setTopWidth(w); + GridLogMessage.setTopWidth(w); + GridLogIterative.setTopWidth(w); + GridLogDebug.setTopWidth(w); + HadronsLogError.Active(GridLogError.isActive()); + HadronsLogWarning.Active(GridLogWarning.isActive()); + HadronsLogMessage.Active(GridLogMessage.isActive()); + HadronsLogIterative.Active(GridLogIterative.isActive()); + HadronsLogDebug.Active(GridLogDebug.isActive()); +} + // type utilities ////////////////////////////////////////////////////////////// constexpr unsigned int maxNameSize = 1024u; diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index c68edafd..274e1934 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -112,6 +112,8 @@ extern HadronsLogger HadronsLogMessage; extern HadronsLogger HadronsLogIterative; extern HadronsLogger HadronsLogDebug; +void initLogger(void); + // singleton pattern #define SINGLETON(name)\ public:\ diff --git a/extras/Hadrons/HadronsXmlRun.cc b/extras/Hadrons/HadronsXmlRun.cc index 07eb096e..680f234b 100644 --- a/extras/Hadrons/HadronsXmlRun.cc +++ b/extras/Hadrons/HadronsXmlRun.cc @@ -54,12 +54,6 @@ int main(int argc, char *argv[]) // initialization Grid_init(&argc, &argv); - HadronsLogError.Active(GridLogError.isActive()); - HadronsLogWarning.Active(GridLogWarning.isActive()); - HadronsLogMessage.Active(GridLogMessage.isActive()); - HadronsLogIterative.Active(GridLogIterative.isActive()); - HadronsLogDebug.Active(GridLogDebug.isActive()); - LOG(Message) << "Grid initialized" << std::endl; // execution Application application(parameterFileName); diff --git a/extras/Hadrons/HadronsXmlSchedule.cc b/extras/Hadrons/HadronsXmlSchedule.cc index 6b167690..55f3b231 100644 --- a/extras/Hadrons/HadronsXmlSchedule.cc +++ b/extras/Hadrons/HadronsXmlSchedule.cc @@ -48,12 +48,6 @@ int main(int argc, char *argv[]) // initialization Grid_init(&argc, &argv); - HadronsLogError.Active(GridLogError.isActive()); - HadronsLogWarning.Active(GridLogWarning.isActive()); - HadronsLogMessage.Active(GridLogMessage.isActive()); - HadronsLogIterative.Active(GridLogIterative.isActive()); - HadronsLogDebug.Active(GridLogDebug.isActive()); - LOG(Message) << "Grid initialized" << std::endl; // execution Application application; diff --git a/lib/log/Log.h b/lib/log/Log.h index ddff4c1d..011a7250 100644 --- a/lib/log/Log.h +++ b/lib/log/Log.h @@ -86,6 +86,7 @@ protected: Colours &Painter; int active; int timing_mode; + int topWidth{-1}; static int timestamp; std::string name, topName; std::string COLOUR; @@ -124,11 +125,17 @@ public: Reset(); } } + void setTopWidth(const int w) {topWidth = w;} friend std::ostream& operator<< (std::ostream& stream, Logger& log){ if ( log.active ) { - stream << log.background()<< std::left << log.topName << log.background()<< " : "; + stream << log.background()<< std::left; + if (log.topWidth > 0) + { + stream << std::setw(log.topWidth); + } + stream << log.topName << log.background()<< " : "; stream << log.colour() << std::left << log.name << log.background() << " : "; if ( log.timestamp ) { log.StopWatch->Stop(); From dd62f2f371cce3236f4e38dfd8502b0a68fa01ba Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 29 Dec 2017 16:58:44 +0100 Subject: [PATCH 265/377] Hadrons: log message fix --- extras/Hadrons/Modules/MIO/LoadBinary.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extras/Hadrons/Modules/MIO/LoadBinary.hpp b/extras/Hadrons/Modules/MIO/LoadBinary.hpp index 5e45dfd8..d9a8b5f8 100644 --- a/extras/Hadrons/Modules/MIO/LoadBinary.hpp +++ b/extras/Hadrons/Modules/MIO/LoadBinary.hpp @@ -122,7 +122,7 @@ void TLoadBinary::execute(void) + std::to_string(vm().getTrajectory()); LOG(Message) << "Loading " << par().format - << "binary configuration from file '" << filename + << " binary configuration from file '" << filename << "'" << std::endl; BinaryIO::readLatticeObject(U, filename, munge, 0, par().format, nersc_csum, From 9028e278e4ededffb45b4d2e510c79860d536584 Mon Sep 17 00:00:00 2001 From: David Preti Date: Sat, 6 Jan 2018 15:57:38 +0100 Subject: [PATCH 266/377] Trying to fix a bug with SU4 mesons (still under investigation) --- extras/Hadrons/Modules/MGauge/Load.cc | 3 +- extras/Hadrons/Modules/MGauge/Random.cc | 2 +- extras/Hadrons/Modules/MGauge/Unit.cc | 2 +- tests/hadrons/Test_hadrons_wilsonFund.cc | 160 +++++++++++++++++++++++ 4 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 tests/hadrons/Test_hadrons_wilsonFund.cc diff --git a/extras/Hadrons/Modules/MGauge/Load.cc b/extras/Hadrons/Modules/MGauge/Load.cc index 062e7e98..97be9539 100644 --- a/extras/Hadrons/Modules/MGauge/Load.cc +++ b/extras/Hadrons/Modules/MGauge/Load.cc @@ -66,7 +66,8 @@ void TLoad::setup(void) void TLoad::execute(void) { FieldMetaData header; - std::string fileName = par().file + "." + + std::string fileName = par().file + "ckpoint_lat." + std::to_string(env().getTrajectory()); LOG(Message) << "Loading NERSC configuration from file '" << fileName diff --git a/extras/Hadrons/Modules/MGauge/Random.cc b/extras/Hadrons/Modules/MGauge/Random.cc index c10fdfc3..5c063361 100644 --- a/extras/Hadrons/Modules/MGauge/Random.cc +++ b/extras/Hadrons/Modules/MGauge/Random.cc @@ -65,5 +65,5 @@ void TRandom::execute(void) { LOG(Message) << "Generating random gauge configuration" << std::endl; LatticeGaugeField &U = *env().createLattice(getName()); - SU3::HotConfiguration(*env().get4dRng(), U); + SU4::HotConfiguration(*env().get4dRng(), U); } diff --git a/extras/Hadrons/Modules/MGauge/Unit.cc b/extras/Hadrons/Modules/MGauge/Unit.cc index 18d75c59..12696ee9 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.cc +++ b/extras/Hadrons/Modules/MGauge/Unit.cc @@ -65,5 +65,5 @@ void TUnit::execute(void) { LOG(Message) << "Creating unit gauge configuration" << std::endl; LatticeGaugeField &U = *env().createLattice(getName()); - SU3::ColdConfiguration(*env().get4dRng(), U); + SU4::ColdConfiguration(*env().get4dRng(), U); } diff --git a/tests/hadrons/Test_hadrons_wilsonFund.cc b/tests/hadrons/Test_hadrons_wilsonFund.cc new file mode 100644 index 00000000..aff8a670 --- /dev/null +++ b/tests/hadrons/Test_hadrons_wilsonFund.cc @@ -0,0 +1,160 @@ +/******************************************************************************* + Grid physics library, www.github.com/paboyle/Grid + + Source file: tests/hadrons/Test_hadrons_spectrum.cc + + Copyright (C) 2015 + + Author: Antonin Portelli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution + directory. + *******************************************************************************/ + +#include + +using namespace Grid; +using namespace Hadrons; + +int main(int argc, char *argv[]) +{ + // initialization ////////////////////////////////////////////////////////// + Grid_init(&argc, &argv); + HadronsLogError.Active(GridLogError.isActive()); + HadronsLogWarning.Active(GridLogWarning.isActive()); + HadronsLogMessage.Active(GridLogMessage.isActive()); + HadronsLogIterative.Active(GridLogIterative.isActive()); + HadronsLogDebug.Active(GridLogDebug.isActive()); + LOG(Message) << "Grid initialized" << std::endl; + + // run setup /////////////////////////////////////////////////////////////// + Application application; + std::vector flavour = {"l"}; + std::vector mass = {-0.1}; + double csw = 0.0; + + // global parameters + Application::GlobalPar globalPar; + + globalPar.trajCounter.start = 1; + globalPar.trajCounter.end = 2; + globalPar.trajCounter.step = 1; + + globalPar.trajCounter.start = 309; + globalPar.trajCounter.end = 310; + globalPar.trajCounter.step = 1; + globalPar.seed = "1 2 3 4"; + application.setPar(globalPar); + // gauge field + application.createModule("gauge"); + //application.createModule("gauge"); + + // sources + //MSource::Z2::Par z2Par; + //z2Par.tA = 0; + //z2Par.tB = 0; + //application.createModule("z2", z2Par); + MSource::Point::Par ptPar; + ptPar.position = "0 0 0 0"; + application.createModule("pt", ptPar); + // sink + MSink::Point::Par sinkPar; + sinkPar.mom = "0 0 0"; + application.createModule("sink", sinkPar); + + // set fermion boundary conditions to be periodic space, antiperiodic time. + std::string boundary = "1 1 1 -1"; + + for (unsigned int i = 0; i < flavour.size(); ++i) + { + // actions + MAction::WilsonClover::Par actionPar; + actionPar.gauge = "gauge"; + actionPar.mass = mass[i]; + actionPar.boundary = boundary; + actionPar.csw_r = csw; + actionPar.csw_t = csw; + + // !!!!! Check if Anisotropy works !!!!! + actionPar.clover_anisotropy.isAnisotropic= false; + actionPar.clover_anisotropy.t_direction = 3 ; // Explicit for D=4 + actionPar.clover_anisotropy.xi_0 = 1.0 ; + actionPar.clover_anisotropy.nu = 1.0 ; + + application.createModule("WilsonClover_" + flavour[i], actionPar); + + // solvers + MSolver::RBPrecCG::Par solverPar; + solverPar.action = "WilsonClover_" + flavour[i]; + solverPar.residual = 1.0e-8; + application.createModule("CG_" + flavour[i], + solverPar); + + // propagators + MFermion::GaugeProp::Par quarkPar; + quarkPar.solver = "CG_" + flavour[i]; + quarkPar.source = "pt"; + application.createModule("Qpt_" + flavour[i], quarkPar); + // quarkPar.source = "z2"; + // application.createModule("QZ2_" + flavour[i], quarkPar); + } + for (unsigned int i = 0; i < flavour.size(); ++i) + for (unsigned int j = i; j < flavour.size(); ++j) + { + MContraction::Meson::Par mesPar; + + mesPar.output = "Fund_mesons/pt_" + flavour[i] + flavour[j]; + mesPar.q1 = "Qpt_" + flavour[i]; + mesPar.q2 = "Qpt_" + flavour[j]; + mesPar.gammas = "all"; + mesPar.sink = "sink"; + application.createModule("meson_pt_" + + flavour[i] + flavour[j], + mesPar); + // mesPar.output = "mesons/Z2_" + flavour[i] + flavour[j]; + // mesPar.q1 = "QZ2_" + flavour[i]; + // mesPar.q2 = "QZ2_" + flavour[j]; + // mesPar.gammas = "all"; + // mesPar.sink = "sink"; + // application.createModule("meson_Z2_" + // + flavour[i] + flavour[j], + // mesPar); + } + for (unsigned int i = 0; i < flavour.size(); ++i) + for (unsigned int j = i; j < flavour.size(); ++j) + for (unsigned int k = j; k < flavour.size(); ++k) + { + MContraction::Baryon::Par barPar; + + barPar.output = "Fund_baryons/pt_" + flavour[i] + flavour[j] + flavour[k]; + barPar.q1 = "Qpt_" + flavour[i]; + barPar.q2 = "Qpt_" + flavour[j]; + barPar.q3 = "Qpt_" + flavour[k]; + application.createModule( + "baryon_pt_" + flavour[i] + flavour[j] + flavour[k], barPar); + } + + // execution + application.saveParameterFile("WilsonClover_spectrum.xml"); + application.run(); + + // epilogue + LOG(Message) << "Grid is finalizing now" << std::endl; + Grid_finalize(); + + return EXIT_SUCCESS; +} From 1a0163f45c43a304271bb9e8a99052c012b4b736 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:26:11 +0000 Subject: [PATCH 267/377] Updated to do list --- TODO | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/TODO b/TODO index 83bfda5e..95ccf1df 100644 --- a/TODO +++ b/TODO @@ -4,17 +4,17 @@ TODO: Large item work list: 1)- BG/Q port and check ; Andrew says ok. -2)- Christoph's local basis expansion Lanczos --- 3a)- RNG I/O in ILDG/SciDAC (minor) -3b)- Precision conversion and sort out localConvert <-- partial/easy 3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet 4)- Physical propagator interface -5)- Conserved currents 6)- Multigrid Wilson and DWF, compare to other Multigrid implementations 7)- HDCR resume - +---------------------------- Recent DONE +-- Precision conversion and sort out localConvert <-- partial/easy +-- Conserved currents (Andrew) +-- Split grid +-- Christoph's local basis expansion Lanczos -- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O ; <-- DONE ; bmark cori -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE -- GaugeFix into central location <-- DONE From 7b3ed160aa22c11cfb8e5acbd7e4414a4d718305 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:26:48 +0000 Subject: [PATCH 268/377] Rationalise MPI options --- configure.ac | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/configure.ac b/configure.ac index 496f7fd7..468d9d5f 100644 --- a/configure.ac +++ b/configure.ac @@ -337,15 +337,11 @@ case ${ac_PRECISION} in esac ###################### Shared memory allocation technique under MPI3 -AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs], +AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|hugetlbfs], [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen]) case ${ac_SHM} in - shmget) - AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) - ;; - shmopen) AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] ) ;; @@ -367,7 +363,7 @@ AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path], AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing]) ############### communication type selection -AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], +AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto], [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) case ${ac_COMMS} in @@ -375,22 +371,10 @@ case ${ac_COMMS} in AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) comms_type='none' ;; - mpi3*) + mpi*) AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) comms_type='mpi3' ;; - mpit) - AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] ) - comms_type='mpit' - ;; - mpi*) - AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) - comms_type='mpi' - ;; - shmem) - AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] ) - comms_type='shmem' - ;; *) AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); ;; From 9b32d51cd1a7ec710239ed280a94a3d836117e7a Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:27:14 +0000 Subject: [PATCH 269/377] Simplify comms layer proliferatoin --- benchmarks/Benchmark_comms.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index a270e3fa..29ccf96c 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -106,7 +106,7 @@ int main (int argc, char ** argv) for(int i=0;i requests; + std::vector requests; ncomm=0; for(int mu=0;mu<4;mu++){ @@ -202,7 +202,7 @@ int main (int argc, char ** argv) int recv_from_rank; { - std::vector requests; + std::vector requests; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.SendToRecvFromBegin(requests, (void *)&xbuf[mu][0], @@ -215,7 +215,7 @@ int main (int argc, char ** argv) comm_proc = mpi_layout[mu]-1; { - std::vector requests; + std::vector requests; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.SendToRecvFromBegin(requests, (void *)&xbuf[mu+4][0], @@ -290,7 +290,7 @@ int main (int argc, char ** argv) dbytes=0; ncomm=0; - std::vector requests; + std::vector requests; for(int mu=0;mu<4;mu++){ @@ -383,7 +383,7 @@ int main (int argc, char ** argv) for(int i=0;i requests; + std::vector requests; dbytes=0; ncomm=0; for(int mu=0;mu<4;mu++){ @@ -481,7 +481,7 @@ int main (int argc, char ** argv) for(int i=0;i requests; + std::vector requests; dbytes=0; ncomm=0; From 7eeab7f995332ae2a2ce60c318beb77c449fe0db Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:27:43 +0000 Subject: [PATCH 270/377] Simplify comms layers --- lib/communicator/Communicator.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/communicator/Communicator.h b/lib/communicator/Communicator.h index 09ce50dc..d4ec5a13 100644 --- a/lib/communicator/Communicator.h +++ b/lib/communicator/Communicator.h @@ -28,6 +28,7 @@ Author: Peter Boyle #ifndef GRID_COMMUNICATOR_H #define GRID_COMMUNICATOR_H +#include #include #endif From 6ecf2807237f7b476c495cc97d42cd2c9a1c5c72 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:28:04 +0000 Subject: [PATCH 271/377] Simplify comms layer proliferation --- lib/qcd/action/fermion/WilsonCompressor.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/qcd/action/fermion/WilsonCompressor.h b/lib/qcd/action/fermion/WilsonCompressor.h index cc5c3c63..b47700ac 100644 --- a/lib/qcd/action/fermion/WilsonCompressor.h +++ b/lib/qcd/action/fermion/WilsonCompressor.h @@ -265,7 +265,6 @@ public: if ( timer3 ) std::cout << GridLogMessage << " timer3 (commsMergeShm) " < same_node; std::vector surface_list; From 0a68470f9a2042af2c5bc443bc3fdb33bfc58e77 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:28:30 +0000 Subject: [PATCH 272/377] Simplify comms layers --- lib/stencil/Stencil.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/stencil/Stencil.h b/lib/stencil/Stencil.h index 887d8a7c..69c010f4 100644 --- a/lib/stencil/Stencil.h +++ b/lib/stencil/Stencil.h @@ -105,7 +105,6 @@ template class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in. public: - typedef CartesianCommunicator::CommsRequest_t CommsRequest_t; typedef typename cobj::vector_type vector_type; typedef typename cobj::scalar_type scalar_type; typedef typename cobj::scalar_object scalar_object; From b91282ad46630f006c9c678b33ec0d9448cec8d6 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:28:52 +0000 Subject: [PATCH 273/377] Simplify comms layer proliferation --- lib/util/Init.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/util/Init.cc b/lib/util/Init.cc index 031f8f5a..4f99e491 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -220,11 +220,11 @@ void Grid_init(int *argc,char ***argv) arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm"); GridCmdOptionInt(arg,MB); uint64_t MB64 = MB; - CartesianCommunicator::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL; + GlobalSharedMemory::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL; } if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){ - CartesianCommunicator::Hugepages = 1; + GlobalSharedMemory::Hugepages = 1; } @@ -392,8 +392,8 @@ void Grid_init(int *argc,char ***argv) Grid_default_latt, Grid_default_mpi); - std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "< Date: Mon, 8 Jan 2018 11:29:20 +0000 Subject: [PATCH 274/377] Simplify proliferation of comms layers --- scripts/filelist | 2 +- tests/solver/Test_dwf_mrhs_cg_mpi.cc | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/filelist b/scripts/filelist index 8d4b8e1a..74f8e334 100755 --- a/scripts/filelist +++ b/scripts/filelist @@ -6,7 +6,7 @@ home=`pwd` cd $home/lib HFILES=`find . -type f -name '*.h' -not -name '*Hdf5*' -not -path '*/gamma-gen/*' -not -path '*/Old/*' -not -path '*/Eigen/*'` HFILES="$HFILES" -CCFILES=`find . -type f -name '*.cc' -not -path '*/gamma-gen/*' -not -name '*Communicator*.cc' -not -name '*Hdf5*'` +CCFILES=`find . -type f -name '*.cc' -not -path '*/gamma-gen/*' -not -name '*Communicator*.cc' -not -name '*SharedMemory*.cc' -not -name '*Hdf5*'` HPPFILES=`find . -type f -name '*.hpp'` echo HFILES=$HFILES $HPPFILES > Make.inc echo >> Make.inc diff --git a/tests/solver/Test_dwf_mrhs_cg_mpi.cc b/tests/solver/Test_dwf_mrhs_cg_mpi.cc index 7e11d8d1..aa36ebbc 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc @@ -72,14 +72,17 @@ int main (int argc, char ** argv) int nrhs = 1; int me; for(int i=0;i Date: Mon, 8 Jan 2018 11:30:22 +0000 Subject: [PATCH 275/377] Simplify comms layer proliferation --- lib/Makefile.am | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/lib/Makefile.am b/lib/Makefile.am index 6dd7899e..dc33e7cf 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -1,28 +1,18 @@ extra_sources= extra_headers= -if BUILD_COMMS_MPI - extra_sources+=communicator/Communicator_mpi.cc - extra_sources+=communicator/Communicator_base.cc -endif if BUILD_COMMS_MPI3 extra_sources+=communicator/Communicator_mpi3.cc extra_sources+=communicator/Communicator_base.cc -endif - -if BUILD_COMMS_MPIT - extra_sources+=communicator/Communicator_mpit.cc - extra_sources+=communicator/Communicator_base.cc -endif - -if BUILD_COMMS_SHMEM - extra_sources+=communicator/Communicator_shmem.cc - extra_sources+=communicator/Communicator_base.cc + extra_sources+=communicator/SharedMemoryMPI.cc + extra_sources+=communicator/SharedMemory.cc endif if BUILD_COMMS_NONE extra_sources+=communicator/Communicator_none.cc extra_sources+=communicator/Communicator_base.cc + extra_sources+=communicator/SharedMemoryNone.cc + extra_sources+=communicator/SharedMemory.cc endif if BUILD_HDF5 From 0091eec23a08fd94ed5711d887019d2359e3503a Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:31:32 +0000 Subject: [PATCH 276/377] Simplify communicator cases --- lib/communicator/Communicator_base.h | 112 ++++----------------------- 1 file changed, 13 insertions(+), 99 deletions(-) diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 548515cd..a9b99c17 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -32,117 +32,33 @@ Author: Peter Boyle /////////////////////////////////// // Processor layout information /////////////////////////////////// -#ifdef GRID_COMMS_MPI -#include -#endif -#ifdef GRID_COMMS_MPI3 -#include -#endif -#ifdef GRID_COMMS_MPIT -#include -#endif -#ifdef GRID_COMMS_SHMEM -#include -#endif +#include namespace Grid { -class CartesianCommunicator { - public: +class CartesianCommunicator : public SharedMemory { +public: //////////////////////////////////////////// - // Isend/Irecv/Wait, or Sendrecv blocking + // Policies //////////////////////////////////////////// enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; static CommunicatorPolicy_t CommunicatorPolicy; static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } - - /////////////////////////////////////////// - // Up to 65536 ranks per node adequate for now - // 128MB shared memory for comms enought for 48^4 local vol comms - // Give external control (command line override?) of this - /////////////////////////////////////////// - static const int MAXLOG2RANKSPERNODE = 16; - static uint64_t MAX_MPI_SHM_BYTES; static int nCommThreads; - // use explicit huge pages - static int Hugepages; + //////////////////////////////////////////// // Communicator should know nothing of the physics grid, only processor grid. + //////////////////////////////////////////// int _Nprocessors; // How many in all std::vector _processors; // Which dimensions get relayed out over processors lanes. int _processor; // linear processor rank std::vector _processor_coor; // linear processor coordinate - unsigned long _ndimension; - -#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT) - static MPI_Comm communicator_world; - - MPI_Comm communicator; - std::vector communicator_halo; - - typedef MPI_Request CommsRequest_t; - -#else - typedef int CommsRequest_t; -#endif - - - //////////////////////////////////////////////////////////////////// - // Helper functionality for SHM Windows common to all other impls - //////////////////////////////////////////////////////////////////// - // Longer term; drop this in favour of a master / slave model with - // cartesian communicator on a subset of ranks, slave ranks controlled - // by group leader with data xfer via shared memory - //////////////////////////////////////////////////////////////////// -#ifdef GRID_COMMS_MPI3 - - static int ShmRank; - static int ShmSize; - static int GroupRank; - static int GroupSize; - static int WorldRank; - static int WorldSize; - - std::vector WorldDims; - std::vector GroupDims; - std::vector ShmDims; - - std::vector GroupCoor; - std::vector ShmCoor; - std::vector WorldCoor; - - static std::vector GroupRanks; - static std::vector MyGroup; - static int ShmSetup; - static MPI_Win ShmWindow; - static MPI_Comm ShmComm; - - std::vector LexicographicToWorldRank; - - static std::vector ShmCommBufs; - -#else - static void ShmInitGeneric(void); - static commVector ShmBufStorageVector; -#endif - - ///////////////////////////////// - // Grid information and queries - // Implemented in Communicator_base.C - ///////////////////////////////// - static void * ShmCommBuf; - - - size_t heap_top; - size_t heap_bytes; - - void *ShmBufferSelf(void); - void *ShmBuffer(int rank); - void *ShmBufferTranslate(int rank,void * local_p); - void *ShmBufferMalloc(size_t bytes); - void ShmBufferFreeAll(void) ; + unsigned long _ndimension; + static Grid_MPI_Comm communicator_world; + Grid_MPI_Comm communicator; + std::vector communicator_halo; //////////////////////////////////////////////// // Must call in Grid startup @@ -158,13 +74,13 @@ class CartesianCommunicator { virtual ~CartesianCommunicator(); private: -#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) + //////////////////////////////////////////////// // Private initialise from an MPI communicator // Can use after an MPI_Comm_split, but hidden from user so private //////////////////////////////////////////////// - void InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base); -#endif + void InitFromMPICommunicator(const std::vector &processors, Grid_MPI_Comm communicator_base); + public: //////////////////////////////////////////////////////////////////////////////////////// @@ -181,8 +97,6 @@ class CartesianCommunicator { const std::vector & ThisProcessorCoor(void) ; const std::vector & ProcessorGrid(void) ; int ProcessorCount(void) ; - int NodeCount(void) ; - int RankCount(void) ; //////////////////////////////////////////////////////////////////////////////// // very VERY rarely (Log, serial RNG) we need world without a grid From 357badce5ed7efac2df4c3f5bc5cf71815334c3a Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:32:16 +0000 Subject: [PATCH 277/377] Simplify communicator case proliferation --- lib/communicator/Communicator_base.cc | 288 -------------------------- 1 file changed, 288 deletions(-) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 3e561405..edbf26af 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -36,33 +36,9 @@ namespace Grid { /////////////////////////////////////////////////////////////// // Info that is setup once and indept of cartesian layout /////////////////////////////////////////////////////////////// -void * CartesianCommunicator::ShmCommBuf; -uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL; CartesianCommunicator::CommunicatorPolicy_t CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; int CartesianCommunicator::nCommThreads = -1; -int CartesianCommunicator::Hugepages = 0; - -///////////////////////////////// -// Alloc, free shmem region -///////////////////////////////// -void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){ - // bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes - void *ptr = (void *)heap_top; - heap_top += bytes; - heap_bytes+= bytes; - if (heap_bytes >= MAX_MPI_SHM_BYTES) { - std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < row(_ndimension,1); - assert(dim>=0 && dim<_ndimension); - - // Split the communicator - row[dim] = _processors[dim]; - - int me; - CartesianCommunicator Comm(row,*this,me); - Comm.AllToAll(in,out,words,bytes); -} -void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) -{ - // MPI is a pain and uses "int" arguments - // 64*64*64*128*16 == 500Million elements of data. - // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. - // (Turns up on 32^3 x 64 Gparity too) - MPI_Datatype object; - int iwords; - int ibytes; - iwords = words; - ibytes = bytes; - assert(words == iwords); // safe to cast to int ? - assert(bytes == ibytes); // safe to cast to int ? - MPI_Type_contiguous(ibytes,MPI_BYTE,&object); - MPI_Type_commit(&object); - MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); - MPI_Type_free(&object); -} -#endif - -#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) -{ - _ndimension = processors.size(); - - int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension); - std::vector parent_processor_coor(_ndimension,0); - std::vector parent_processors (_ndimension,1); - - // Can make 5d grid from 4d etc... - int pad = _ndimension-parent_ndimension; - for(int d=0;d ccoor(_ndimension); // coor within subcommunicator - std::vector scoor(_ndimension); // coor of split within parent - std::vector ssize(_ndimension); // coor of split within parent - - for(int d=0;d<_ndimension;d++){ - ccoor[d] = parent_processor_coor[d] % processors[d]; - scoor[d] = parent_processor_coor[d] / processors[d]; - ssize[d] = parent_processors[d] / processors[d]; - } - int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms - // Mpi uses the reverse Lexico convention to us - Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); - Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); - - MPI_Comm comm_split; - if ( Nchild > 1 ) { - - if(0){ - std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< &processors, MPI_Comm communicator_base) -{ - _ndimension = processors.size(); - _processor_coor.resize(_ndimension); - - ///////////////////////////////// - // Count the requested nodes - ///////////////////////////////// - _Nprocessors=1; - _processors = processors; - for(int i=0;i<_ndimension;i++){ - _Nprocessors*=_processors[i]; - } - - std::vector periodic(_ndimension,1); - MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); - MPI_Comm_rank(communicator,&_processor); - MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); - - if ( 0 && (communicator_base != communicator_world) ) { - std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"< &processors) -{ - InitFromMPICommunicator(processors,communicator_world); -} - -#endif - -#if !defined( GRID_COMMS_MPI3) -int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; -int CartesianCommunicator::RankCount(void) { return ProcessorCount();}; -#endif - -#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT) -double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes, int dir) -{ - std::vector list; - // Discard the "dir" - SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); - SendToRecvFromComplete(list); - return 2.0*bytes; -} -double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes, int dir) -{ - // Discard the "dir" - SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); - return 2.0*bytes; -} -void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall,int dir) -{ - SendToRecvFromComplete(waitall); -} -#endif - -#if !defined( GRID_COMMS_MPI3) - -void CartesianCommunicator::StencilBarrier(void){}; - -commVector CartesianCommunicator::ShmBufStorageVector; - -void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; } - -void *CartesianCommunicator::ShmBuffer(int rank) { - return NULL; -} -void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { - return NULL; -} -void CartesianCommunicator::ShmInitGeneric(void){ -#if 1 - int mmap_flag =0; -#ifdef MAP_ANONYMOUS - mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; -#endif -#ifdef MAP_ANON - mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON; -#endif -#ifdef MAP_HUGETLB - if ( Hugepages ) mmap_flag |= MAP_HUGETLB; -#endif - ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0); - if (ShmCommBuf == (void *)MAP_FAILED) { - perror("mmap failed "); - exit(EXIT_FAILURE); - } -#ifdef MADV_HUGEPAGE - if (!Hugepages ) madvise(ShmCommBuf,MAX_MPI_SHM_BYTES,MADV_HUGEPAGE); -#endif -#else - ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); - ShmCommBuf=(void *)&ShmBufStorageVector[0]; -#endif - bzero(ShmCommBuf,MAX_MPI_SHM_BYTES); -} - -#endif } From 9947cfbf14de0bc323c0a791f21267aadf9488ab Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:33:01 +0000 Subject: [PATCH 278/377] Simplify number of communicator cases --- lib/communicator/Communicator_mpi3.cc | 751 ++++++++------------------ 1 file changed, 213 insertions(+), 538 deletions(-) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index e41749d4..ef47d617 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -26,89 +26,20 @@ Author: Peter Boyle *************************************************************************************/ /* END LEGAL */ #include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_NUMAIF_H -#include -#endif - +#include namespace Grid { -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// -int CartesianCommunicator::ShmSetup = 0; +Grid_MPI_Comm CartesianCommunicator::communicator_world; -int CartesianCommunicator::ShmRank; -int CartesianCommunicator::ShmSize; -int CartesianCommunicator::GroupRank; -int CartesianCommunicator::GroupSize; -int CartesianCommunicator::WorldRank; -int CartesianCommunicator::WorldSize; - -MPI_Comm CartesianCommunicator::communicator_world; -MPI_Comm CartesianCommunicator::ShmComm; -MPI_Win CartesianCommunicator::ShmWindow; - -std::vector CartesianCommunicator::GroupRanks; -std::vector CartesianCommunicator::MyGroup; -std::vector CartesianCommunicator::ShmCommBufs; - -int CartesianCommunicator::NodeCount(void) { return GroupSize;}; -int CartesianCommunicator::RankCount(void) { return WorldSize;}; - - -#undef FORCE_COMMS -void *CartesianCommunicator::ShmBufferSelf(void) +//////////////////////////////////////////// +// First initialise of comms system +//////////////////////////////////////////// +void CartesianCommunicator::Init(int *argc, char ***argv) { - return ShmCommBufs[ShmRank]; -} -void *CartesianCommunicator::ShmBuffer(int rank) -{ - int gpeer = GroupRanks[rank]; -#ifdef FORCE_COMMS - return NULL; -#endif - if (gpeer == MPI_UNDEFINED){ - return NULL; - } else { - return ShmCommBufs[gpeer]; - } -} -void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) -{ - static int count =0; - int gpeer = GroupRanks[rank]; - assert(gpeer!=ShmRank); // never send to self - assert(rank!=WorldRank);// never send to self -#ifdef FORCE_COMMS - return NULL; -#endif - if (gpeer == MPI_UNDEFINED){ - return NULL; - } else { - uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank]; - uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset; - return (void *) remote; - } -} - -void CartesianCommunicator::Init(int *argc, char ***argv) { int flag; int provided; - // mtrace(); MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { @@ -119,487 +50,202 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { Grid_quiesce_nodes(); MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); - MPI_Comm_rank(communicator_world,&WorldRank); - MPI_Comm_size(communicator_world,&WorldSize); - if ( WorldRank == 0 ) { - std::cout << GridLogMessage<< "Initialising MPI "<< WorldRank <<"/"< world_ranks(WorldSize); - GroupRanks.resize(WorldSize); - for(int r=0;r()); - int myleader = MyGroup[0]; - - std::vector leaders_1hot(WorldSize,0); - std::vector leaders_group(GroupSize,0); - leaders_1hot [ myleader ] = 1; - - /////////////////////////////////////////////////////////////////// - // global sum leaders over comm world - /////////////////////////////////////////////////////////////////// - int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world); - assert(ierr==0); - /////////////////////////////////////////////////////////////////// - // find the group leaders world rank - /////////////////////////////////////////////////////////////////// - int group=0; - for(int l=0;l shmids(ShmSize); - - if ( ShmRank == 0 ) { - for(int r=0;r coor = _processor_coor; // my coord - assert(std::abs(shift) <_processors[dim]); - - coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim]; - Lexicographic::IndexFromCoor(coor,source,_processors); - source = LexicographicToWorldRank[source]; - - coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim]; - Lexicographic::IndexFromCoor(coor,dest,_processors); - dest = LexicographicToWorldRank[dest]; - -}// rank is world rank. - + int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); + assert(ierr==0); +} int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) { int rank; - Lexicographic::IndexFromCoor(coor,rank,_processors); - rank = LexicographicToWorldRank[rank]; + int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); + assert(ierr==0); return rank; -}// rank is world rank - +} void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) { - int lr=-1; - for(int r=0;r &processors) +{ + MPI_Comm optimal_comm; + GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine + InitFromMPICommunicator(processors,optimal_comm); + SetCommunicator(optimal_comm); } ////////////////////////////////// // Try to subdivide communicator ////////////////////////////////// -/* - * Use default in MPI compile - */ -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) - : CartesianCommunicator(processors) +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) { - std::cout << "Attempts to split MPI3 communicators will fail until implemented" <= parent._ndimension); + std::vector parent_processor_coor(_ndimension,0); + std::vector parent_processors (_ndimension,1); + + // Can make 5d grid from 4d etc... + int pad = _ndimension-parent_ndimension; + for(int d=0;d ccoor(_ndimension); // coor within subcommunicator + std::vector scoor(_ndimension); // coor of split within parent + std::vector ssize(_ndimension); // coor of split within parent + + for(int d=0;d<_ndimension;d++){ + ccoor[d] = parent_processor_coor[d] % processors[d]; + scoor[d] = parent_processor_coor[d] / processors[d]; + ssize[d] = parent_processors[d] / processors[d]; + } + + // rank within subcomm ; srank is rank of subcomm within blocks of subcomms + int crank; + // Mpi uses the reverse Lexico convention to us; so reversed routines called + Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions + Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); // ssize is the number of split grids + + MPI_Comm comm_split; + if ( Nchild > 1 ) { + + if(0){ + std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< &processors) -{ - int ierr; - communicator=communicator_world; - +void CartesianCommunicator::InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base) +{ _ndimension = processors.size(); + _processor_coor.resize(_ndimension); + + ///////////////////////////////// + // Count the requested nodes + ///////////////////////////////// + _Nprocessors=1; + _processors = processors; + for(int i=0;i<_ndimension;i++){ + _Nprocessors*=_processors[i]; + } + + std::vector periodic(_ndimension,1); + MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); + MPI_Comm_rank(communicator,&_processor); + MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); + + if ( 0 && (communicator_base != communicator_world) ) { + std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"< WorldDims = processors; - - ShmDims.resize (_ndimension,1); - GroupDims.resize(_ndimension); - ShmCoor.resize (_ndimension); - GroupCoor.resize(_ndimension); - WorldCoor.resize(_ndimension); - - int dim = 0; - for(int l2=0;l2 coor(_ndimension); - ProcessorCoorFromRank(wr,coor); // from world rank - int ck = RankFromProcessorCoor(coor); - assert(ck==wr); - - if ( wr == WorldRank ) { - for(int j=0;j mcoor = coor; - this->Broadcast(0,(void *)&mcoor[0],mcoor.size()*sizeof(int)); - for(int d = 0 ; d< _ndimension; d++) { - assert(coor[d] == mcoor[d]); - } - } -}; CartesianCommunicator::~CartesianCommunicator() { int MPI_is_finalised; @@ -734,19 +380,15 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector row(_ndimension,1); + assert(dim>=0 && dim<_ndimension); + + // Split the communicator + row[dim] = _processors[dim]; + + int me; + CartesianCommunicator Comm(row,*this,me); + Comm.AllToAll(in,out,words,bytes); +} +void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) +{ + // MPI is a pain and uses "int" arguments + // 64*64*64*128*16 == 500Million elements of data. + // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. + // (Turns up on 32^3 x 64 Gparity too) + MPI_Datatype object; + int iwords; + int ibytes; + iwords = words; + ibytes = bytes; + assert(words == iwords); // safe to cast to int ? + assert(bytes == ibytes); // safe to cast to int ? + MPI_Type_contiguous(ibytes,MPI_BYTE,&object); + MPI_Type_commit(&object); + MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); + MPI_Type_free(&object); +} + + + } From 0b85f1bfc8d6ceec46150ae1c75dc048f20629a3 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:33:47 +0000 Subject: [PATCH 279/377] Simplify the communicator proliferation: mpi and none. --- lib/communicator/Communicator_none.cc | 43 +++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index 26b330a7..c3763d53 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -32,14 +32,22 @@ namespace Grid { /////////////////////////////////////////////////////////////////////////////////////////////////// // Info that is setup once and indept of cartesian layout /////////////////////////////////////////////////////////////////////////////////////////////////// +Grid_MPI_Comm CartesianCommunicator::communicator_world; void CartesianCommunicator::Init(int *argc, char *** arv) { - ShmInitGeneric(); + GlobalSharedMemory::Init(communicator_world); + GlobalSharedMemory::SharedMemoryAllocate( + GlobalSharedMemory::MAX_MPI_SHM_BYTES, + GlobalSharedMemory::Hugepages); } CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) - : CartesianCommunicator(processors) { srank=0;} + : CartesianCommunicator(processors) +{ + srank=0; + SetCommunicator(communicator_world); +} CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { @@ -54,6 +62,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) assert(_processors[d]==1); _processor_coor[d] = 0; } + SetCommunicator(communicator_world); } CartesianCommunicator::~CartesianCommunicator(){} @@ -121,6 +130,36 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest dest=0; } +double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes, int dir) +{ + std::vector list; + // Discard the "dir" + SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); + SendToRecvFromComplete(list); + return 2.0*bytes; +} +double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes, int dir) +{ + // Discard the "dir" + SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); + return 2.0*bytes; +} +void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall,int dir) +{ + SendToRecvFromComplete(waitall); +} + +void CartesianCommunicator::StencilBarrier(void){}; + } From 44f65526e01369c193a8754e97ec959ed8d0a1d4 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:35:43 +0000 Subject: [PATCH 280/377] Simplify communicators --- lib/communicator/Communicator_mpi.cc | 222 ----- lib/communicator/Communicator_mpi3_leader.cc | 988 ------------------- lib/communicator/Communicator_mpit.cc | 273 ----- lib/communicator/Communicator_shmem.cc | 357 ------- lib/communicator/SharedMemory.cc | 54 + lib/communicator/SharedMemory.h | 158 +++ lib/communicator/SharedMemoryMPI.cc | 415 ++++++++ lib/communicator/SharedMemoryNone.cc | 150 +++ 8 files changed, 777 insertions(+), 1840 deletions(-) delete mode 100644 lib/communicator/Communicator_mpi.cc delete mode 100644 lib/communicator/Communicator_mpi3_leader.cc delete mode 100644 lib/communicator/Communicator_mpit.cc delete mode 100644 lib/communicator/Communicator_shmem.cc create mode 100644 lib/communicator/SharedMemory.cc create mode 100644 lib/communicator/SharedMemory.h create mode 100644 lib/communicator/SharedMemoryMPI.cc create mode 100644 lib/communicator/SharedMemoryNone.cc diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc deleted file mode 100644 index 2075e4bf..00000000 --- a/lib/communicator/Communicator_mpi.cc +++ /dev/null @@ -1,222 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/communicator/Communicator_mpi.cc - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include -#include -#include - -namespace Grid { - - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// -MPI_Comm CartesianCommunicator::communicator_world; - -// Should error check all MPI calls. -void CartesianCommunicator::Init(int *argc, char ***argv) { - int flag; - int provided; - MPI_Initialized(&flag); // needed to coexist with other libs apparently - if ( !flag ) { - MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); - if ( provided != MPI_THREAD_MULTIPLE ) { - QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; - } - } - MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); - ShmInitGeneric(); -} - -CartesianCommunicator::~CartesianCommunicator() -{ - int MPI_is_finalised; - MPI_Finalized(&MPI_is_finalised); - if (communicator && !MPI_is_finalised) - MPI_Comm_free(&communicator); -} - -void CartesianCommunicator::GlobalSum(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalXOR(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalXOR(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(float &f){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(float *f,int N) -{ - int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(double &d) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(double *d,int N) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) -{ - int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); - assert(ierr==0); -} -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - int rank; - int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); - assert(ierr==0); - return rank; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ - coor.resize(_ndimension); - int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); - assert(ierr==0); -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - std::vector reqs(0); - SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); - SendToRecvFromComplete(reqs); -} - -void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int sender, - int receiver, - int bytes) -{ - MPI_Status stat; - assert(sender != receiver); - int tag = sender; - if ( _processor == sender ) { - MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); - } - if ( _processor == receiver ) { - MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); - } -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - int myrank = _processor; - int ierr; - if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { - MPI_Request xrq; - MPI_Request rrq; - - ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); - ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); - - assert(ierr==0); - list.push_back(xrq); - list.push_back(rrq); - } else { - // Give the CPU to MPI immediately; can use threads to overlap optionally - ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, - recv,bytes,MPI_CHAR,from, from, - communicator,MPI_STATUS_IGNORE); - assert(ierr==0); - } -} -void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) -{ - if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { - int nreq=list.size(); - std::vector status(nreq); - int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); - } -} - -void CartesianCommunicator::Barrier(void) -{ - int ierr = MPI_Barrier(communicator); - assert(ierr==0); -} - -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ - int ierr=MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator); - assert(ierr==0); -} - /////////////////////////////////////////////////////// - // Should only be used prior to Grid Init finished. - // Check for this? - /////////////////////////////////////////////////////// -int CartesianCommunicator::RankWorld(void){ - int r; - MPI_Comm_rank(communicator_world,&r); - return r; -} -void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) -{ - int ierr= MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator_world); - assert(ierr==0); -} - - - -} - diff --git a/lib/communicator/Communicator_mpi3_leader.cc b/lib/communicator/Communicator_mpi3_leader.cc deleted file mode 100644 index 6e26bd3e..00000000 --- a/lib/communicator/Communicator_mpi3_leader.cc +++ /dev/null @@ -1,988 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/communicator/Communicator_mpi.cc - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include "Grid.h" -#include -//#include - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// Workarounds: -/// i) bloody mac os doesn't implement unnamed semaphores since it is "optional" posix. -/// darwin dispatch semaphores don't seem to be multiprocess. -/// -/// ii) openmpi under --mca shmem posix works with two squadrons per node; -/// openmpi under default mca settings (I think --mca shmem mmap) on MacOS makes two squadrons map the SAME -/// memory as each other, despite their living on different communicators. This appears to be a bug in OpenMPI. -/// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -typedef sem_t *Grid_semaphore; - - -#error /*THis is deprecated*/ - -#if 0 -#define SEM_INIT(S) S = sem_open(sem_name,0,0600,0); assert ( S != SEM_FAILED ); -#define SEM_INIT_EXCL(S) sem_unlink(sem_name); S = sem_open(sem_name,O_CREAT|O_EXCL,0600,0); assert ( S != SEM_FAILED ); -#define SEM_POST(S) assert ( sem_post(S) == 0 ); -#define SEM_WAIT(S) assert ( sem_wait(S) == 0 ); -#else -#define SEM_INIT(S) ; -#define SEM_INIT_EXCL(S) ; -#define SEM_POST(S) ; -#define SEM_WAIT(S) ; -#endif -#include - -namespace Grid { - -enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL, COMMAND_SENDRECV }; - -struct Descriptor { - uint64_t buf; - size_t bytes; - int rank; - int tag; - int command; - uint64_t xbuf; - uint64_t rbuf; - int xtag; - int rtag; - int src; - int dest; - MPI_Request request; -}; - -const int pool = 48; - -class SlaveState { -public: - volatile int head; - volatile int start; - volatile int tail; - volatile Descriptor Descrs[pool]; -}; - -class Slave { -public: - Grid_semaphore sem_head; - Grid_semaphore sem_tail; - SlaveState *state; - MPI_Comm squadron; - uint64_t base; - int universe_rank; - int vertical_rank; - char sem_name [NAME_MAX]; - //////////////////////////////////////////////////////////// - // Descriptor circular pointers - //////////////////////////////////////////////////////////// - Slave() {}; - - void Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank); - - void SemInit(void) { - sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank); - SEM_INIT(sem_head); - sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank); - SEM_INIT(sem_tail); - } - void SemInitExcl(void) { - sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank); - SEM_INIT_EXCL(sem_head); - sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank); - SEM_INIT_EXCL(sem_tail); - } - void WakeUpDMA(void) { - SEM_POST(sem_head); - }; - void WakeUpCompute(void) { - SEM_POST(sem_tail); - }; - void WaitForCommand(void) { - SEM_WAIT(sem_head); - }; - void WaitForComplete(void) { - SEM_WAIT(sem_tail); - }; - void EventLoop (void) { - // std::cout<< " Entering event loop "<head,0,0); - int s=state->start; - if ( s != state->head ) { - _mm_mwait(0,0); - } -#endif - Event(); - } - } - - int Event (void) ; - - uint64_t QueueCommand(int command,void *buf, int bytes, int hashtag, MPI_Comm comm,int u_rank) ; - void QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) ; - - void WaitAll() { - // std::cout << "Queueing WAIT command "<tail != state->head ); - } -}; - -//////////////////////////////////////////////////////////////////////// -// One instance of a data mover. -// Master and Slave must agree on location in shared memory -//////////////////////////////////////////////////////////////////////// - -class MPIoffloadEngine { -public: - - static std::vector Slaves; - - static int ShmSetup; - - static int UniverseRank; - static int UniverseSize; - - static MPI_Comm communicator_universe; - static MPI_Comm communicator_cached; - - static MPI_Comm HorizontalComm; - static int HorizontalRank; - static int HorizontalSize; - - static MPI_Comm VerticalComm; - static MPI_Win VerticalWindow; - static int VerticalSize; - static int VerticalRank; - - static std::vector VerticalShmBufs; - static std::vector > UniverseRanks; - static std::vector UserCommunicatorToWorldRanks; - - static MPI_Group WorldGroup, CachedGroup; - - static void CommunicatorInit (MPI_Comm &communicator_world, - MPI_Comm &ShmComm, - void * &ShmCommBuf); - - static void MapCommRankToWorldRank(int &hashtag, int & comm_world_peer,int tag, MPI_Comm comm,int commrank); - - ///////////////////////////////////////////////////////// - // routines for master proc must handle any communicator - ///////////////////////////////////////////////////////// - - static void QueueSend(int slave,void *buf, int bytes, int tag, MPI_Comm comm,int rank) { - // std::cout<< " Queueing send "<< bytes<< " slave "<< slave << " to comm "<= units ) { - mywork = myoff = 0; - } else { - mywork = (nwork+me)/units; - myoff = basework * me; - if ( me > backfill ) - myoff+= (me-backfill); - } - return; - }; - - static void QueueRoundRobinSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) { - uint8_t * cxbuf = (uint8_t *) xbuf; - uint8_t * crbuf = (uint8_t *) rbuf; - static int rrp=0; - int procs = VerticalSize-1; - int myoff=0; - int mywork=bytes; - QueueSendRecv(rrp+1,&cxbuf[myoff],&crbuf[myoff],mywork,xtag,rtag,comm,dest,src); - rrp = rrp+1; - if ( rrp == (VerticalSize-1) ) rrp = 0; - } - - static void QueueMultiplexedSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) { - uint8_t * cxbuf = (uint8_t *) xbuf; - uint8_t * crbuf = (uint8_t *) rbuf; - int mywork, myoff, procs; - procs = VerticalSize-1; - for(int s=0;s MPIoffloadEngine::Slaves; - -int MPIoffloadEngine::UniverseRank; -int MPIoffloadEngine::UniverseSize; - -MPI_Comm MPIoffloadEngine::communicator_universe; -MPI_Comm MPIoffloadEngine::communicator_cached; -MPI_Group MPIoffloadEngine::WorldGroup; -MPI_Group MPIoffloadEngine::CachedGroup; - -MPI_Comm MPIoffloadEngine::HorizontalComm; -int MPIoffloadEngine::HorizontalRank; -int MPIoffloadEngine::HorizontalSize; - -MPI_Comm MPIoffloadEngine::VerticalComm; -int MPIoffloadEngine::VerticalSize; -int MPIoffloadEngine::VerticalRank; -MPI_Win MPIoffloadEngine::VerticalWindow; -std::vector MPIoffloadEngine::VerticalShmBufs; -std::vector > MPIoffloadEngine::UniverseRanks; -std::vector MPIoffloadEngine::UserCommunicatorToWorldRanks; - -int CartesianCommunicator::NodeCount(void) { return HorizontalSize;}; -int MPIoffloadEngine::ShmSetup = 0; - -void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world, - MPI_Comm &ShmComm, - void * &ShmCommBuf) -{ - int flag; - assert(ShmSetup==0); - - ////////////////////////////////////////////////////////////////////// - // Universe is all nodes prior to squadron grouping - ////////////////////////////////////////////////////////////////////// - MPI_Comm_dup (MPI_COMM_WORLD,&communicator_universe); - MPI_Comm_rank(communicator_universe,&UniverseRank); - MPI_Comm_size(communicator_universe,&UniverseSize); - - ///////////////////////////////////////////////////////////////////// - // Split into groups that can share memory (Verticals) - ///////////////////////////////////////////////////////////////////// -#undef MPI_SHARED_MEM_DEBUG -#ifdef MPI_SHARED_MEM_DEBUG - MPI_Comm_split(communicator_universe,(UniverseRank/4),UniverseRank,&VerticalComm); -#else - MPI_Comm_split_type(communicator_universe, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&VerticalComm); -#endif - MPI_Comm_rank(VerticalComm ,&VerticalRank); - MPI_Comm_size(VerticalComm ,&VerticalSize); - - ////////////////////////////////////////////////////////////////////// - // Split into horizontal groups by rank in squadron - ////////////////////////////////////////////////////////////////////// - MPI_Comm_split(communicator_universe,VerticalRank,UniverseRank,&HorizontalComm); - MPI_Comm_rank(HorizontalComm,&HorizontalRank); - MPI_Comm_size(HorizontalComm,&HorizontalSize); - assert(HorizontalSize*VerticalSize==UniverseSize); - - //////////////////////////////////////////////////////////////////////////////// - // What is my place in the world - //////////////////////////////////////////////////////////////////////////////// - int WorldRank=0; - if(VerticalRank==0) WorldRank = HorizontalRank; - int ierr=MPI_Allreduce(MPI_IN_PLACE,&WorldRank,1,MPI_INT,MPI_SUM,VerticalComm); - assert(ierr==0); - - //////////////////////////////////////////////////////////////////////////////// - // Where is the world in the universe? - //////////////////////////////////////////////////////////////////////////////// - UniverseRanks = std::vector >(HorizontalSize,std::vector(VerticalSize,0)); - UniverseRanks[WorldRank][VerticalRank] = UniverseRank; - for(int w=0;w0 ) size = sizeof(SlaveState); - - sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r); - - shm_unlink(shm_name); - - int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600); - if ( fd < 0 ) { - perror("failed shm_open"); - assert(0); - } - - ftruncate(fd, size); - - VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if ( VerticalShmBufs[r] == MAP_FAILED ) { - perror("failed mmap"); - assert(0); - } - - /* - for(uint64_t page=0;page0 ) size = sizeof(SlaveState); - - sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r); - - int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600); - if ( fd<0 ) { - perror("failed shm_open"); - assert(0); - } - VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - - uint64_t * check = (uint64_t *) VerticalShmBufs[r]; - assert(check[0]== WorldRank); - assert(check[1]== r); - // std::cerr<<"SHM "<"<"< cached_ranks(size); - - for(int r=0;r"<>0 )&0xFFFF)^((icomm>>16)&0xFFFF) - ^ ((icomm>>32)&0xFFFF)^((icomm>>48)&0xFFFF); - - // hashtag = (comm_hash<<15) | tag; - hashtag = tag; - -}; - -void Slave::Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank) -{ - squadron=_squadron; - universe_rank=_universe_rank; - vertical_rank=_vertical_rank; - state =_state; - // std::cout << "state "<<_state<<" comm "<<_squadron<<" universe_rank"<head = state->tail = state->start = 0; - base = (uint64_t)MPIoffloadEngine::VerticalShmBufs[0]; - int rank; MPI_Comm_rank(_squadron,&rank); -} -#define PERI_PLUS(A) ( (A+1)%pool ) -int Slave::Event (void) { - - static int tail_last; - static int head_last; - static int start_last; - int ierr; - MPI_Status stat; - static int i=0; - - //////////////////////////////////////////////////// - // Try to advance the start pointers - //////////////////////////////////////////////////// - int s=state->start; - if ( s != state->head ) { - switch ( state->Descrs[s].command ) { - case COMMAND_ISEND: - ierr = MPI_Isend((void *)(state->Descrs[s].buf+base), - state->Descrs[s].bytes, - MPI_CHAR, - state->Descrs[s].rank, - state->Descrs[s].tag, - MPIoffloadEngine::communicator_universe, - (MPI_Request *)&state->Descrs[s].request); - assert(ierr==0); - state->start = PERI_PLUS(s); - return 1; - break; - - case COMMAND_IRECV: - ierr=MPI_Irecv((void *)(state->Descrs[s].buf+base), - state->Descrs[s].bytes, - MPI_CHAR, - state->Descrs[s].rank, - state->Descrs[s].tag, - MPIoffloadEngine::communicator_universe, - (MPI_Request *)&state->Descrs[s].request); - - // std::cout<< " Request is "<Descrs[s].request<Descrs[0].request<start = PERI_PLUS(s); - return 1; - break; - - case COMMAND_SENDRECV: - - // fprintf(stderr,"Sendrecv ->%d %d : <-%d %d \n",state->Descrs[s].dest, state->Descrs[s].xtag+i*10,state->Descrs[s].src, state->Descrs[s].rtag+i*10); - - ierr=MPI_Sendrecv((void *)(state->Descrs[s].xbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].dest, state->Descrs[s].xtag+i*10, - (void *)(state->Descrs[s].rbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].src , state->Descrs[s].rtag+i*10, - MPIoffloadEngine::communicator_universe,MPI_STATUS_IGNORE); - - assert(ierr==0); - - // fprintf(stderr,"Sendrecv done %d %d\n",ierr,i); - // MPI_Barrier(MPIoffloadEngine::HorizontalComm); - // fprintf(stderr,"Barrier\n"); - i++; - - state->start = PERI_PLUS(s); - - return 1; - break; - - case COMMAND_WAITALL: - - for(int t=state->tail;t!=s; t=PERI_PLUS(t) ){ - if ( state->Descrs[t].command != COMMAND_SENDRECV ) { - MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE); - } - }; - s=PERI_PLUS(s); - state->start = s; - state->tail = s; - - WakeUpCompute(); - - return 1; - break; - - default: - assert(0); - break; - } - } - return 0; -} - ////////////////////////////////////////////////////////////////////////////// - // External interaction with the queue - ////////////////////////////////////////////////////////////////////////////// - -void Slave::QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) -{ - int head =state->head; - int next = PERI_PLUS(head); - - // Set up descriptor - int worldrank; - int hashtag; - MPI_Comm communicator; - MPI_Request request; - uint64_t relative; - - relative = (uint64_t)xbuf - base; - state->Descrs[head].xbuf = relative; - - relative= (uint64_t)rbuf - base; - state->Descrs[head].rbuf = relative; - - state->Descrs[head].bytes = bytes; - - MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,xtag,comm,dest); - state->Descrs[head].dest = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]; - state->Descrs[head].xtag = hashtag; - - MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,rtag,comm,src); - state->Descrs[head].src = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]; - state->Descrs[head].rtag = hashtag; - - state->Descrs[head].command= COMMAND_SENDRECV; - - // Block until FIFO has space - while( state->tail==next ); - - // Msync on weak order architectures - - // Advance pointer - state->head = next; - -}; -uint64_t Slave::QueueCommand(int command,void *buf, int bytes, int tag, MPI_Comm comm,int commrank) -{ - ///////////////////////////////////////// - // Spin; if FIFO is full until not full - ///////////////////////////////////////// - int head =state->head; - int next = PERI_PLUS(head); - - // Set up descriptor - int worldrank; - int hashtag; - MPI_Comm communicator; - MPI_Request request; - - MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,tag,comm,commrank); - - uint64_t relative= (uint64_t)buf - base; - state->Descrs[head].buf = relative; - state->Descrs[head].bytes = bytes; - state->Descrs[head].rank = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]; - state->Descrs[head].tag = hashtag; - state->Descrs[head].command= command; - - /* - if ( command == COMMAND_ISEND ) { - std::cout << "QueueSend from "<< universe_rank <<" to commrank " << commrank - << " to worldrank " << worldrank <tail==next ); - - // Msync on weak order architectures - // Advance pointer - state->head = next; - - return 0; -} - - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// - -MPI_Comm CartesianCommunicator::communicator_world; - -void CartesianCommunicator::Init(int *argc, char ***argv) -{ - int flag; - MPI_Initialized(&flag); // needed to coexist with other libs apparently - if ( !flag ) { - MPI_Init(argc,argv); - } - communicator_world = MPI_COMM_WORLD; - MPI_Comm ShmComm; - MPIoffloadEngine::CommunicatorInit (communicator_world,ShmComm,ShmCommBuf); -} -void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) -{ - int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); - assert(ierr==0); -} -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - int rank; - int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); - assert(ierr==0); - return rank; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ - coor.resize(_ndimension); - int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); - assert(ierr==0); -} - -CartesianCommunicator::CartesianCommunicator(const std::vector &processors) -{ - _ndimension = processors.size(); - std::vector periodic(_ndimension,1); - - _Nprocessors=1; - _processors = processors; - - for(int i=0;i<_ndimension;i++){ - _Nprocessors*=_processors[i]; - } - - int Size; - MPI_Comm_size(communicator_world,&Size); - assert(Size==_Nprocessors); - - _processor_coor.resize(_ndimension); - MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); - MPI_Comm_rank (communicator,&_processor); - MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); -}; - -void CartesianCommunicator::GlobalSum(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(float &f){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(float *f,int N) -{ - int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(double &d) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(double *d,int N) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - std::vector reqs(0); - SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); - SendToRecvFromComplete(reqs); -} - -void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int sender, - int receiver, - int bytes) -{ - MPI_Status stat; - assert(sender != receiver); - int tag = sender; - if ( _processor == sender ) { - MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); - } - if ( _processor == receiver ) { - MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); - } -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - MPI_Request xrq; - MPI_Request rrq; - int rank = _processor; - int ierr; - ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); - ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); - - assert(ierr==0); - - list.push_back(xrq); - list.push_back(rrq); -} - -void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - uint64_t xmit_i = (uint64_t) xmit; - uint64_t recv_i = (uint64_t) recv; - uint64_t shm = (uint64_t) ShmCommBuf; - // assert xmit and recv lie in shared memory region - assert( (xmit_i >= shm) && (xmit_i+bytes <= shm+MAX_MPI_SHM_BYTES) ); - assert( (recv_i >= shm) && (recv_i+bytes <= shm+MAX_MPI_SHM_BYTES) ); - assert(from!=_processor); - assert(dest!=_processor); - - MPIoffloadEngine::QueueMultiplexedSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from); - - //MPIoffloadEngine::QueueRoundRobinSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from); - - //MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest); - //MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from); -} - -void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &list) -{ - MPIoffloadEngine::WaitAll(); - //this->Barrier(); -} - -void CartesianCommunicator::StencilBarrier(void) { } - -void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) -{ - int nreq=list.size(); - std::vector status(nreq); - int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); -} - -void CartesianCommunicator::Barrier(void) -{ - int ierr = MPI_Barrier(communicator); - assert(ierr==0); -} - -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ - int ierr=MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator); - assert(ierr==0); -} - -void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) -{ - int ierr= MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator_world); - assert(ierr==0); -} - -void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; } - -void *CartesianCommunicator::ShmBuffer(int rank) { - return NULL; -} -void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { - return NULL; -} - - -}; - diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc deleted file mode 100644 index bceea0d8..00000000 --- a/lib/communicator/Communicator_mpit.cc +++ /dev/null @@ -1,273 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/communicator/Communicator_mpi.cc - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include -#include -#include - -namespace Grid { - - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// -MPI_Comm CartesianCommunicator::communicator_world; - -// Should error check all MPI calls. -void CartesianCommunicator::Init(int *argc, char ***argv) { - int flag; - int provided; - MPI_Initialized(&flag); // needed to coexist with other libs apparently - if ( !flag ) { - MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); - if ( provided != MPI_THREAD_MULTIPLE ) { - QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; - } - } - MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); - ShmInitGeneric(); -} - -CartesianCommunicator::~CartesianCommunicator() -{ - int MPI_is_finalised; - MPI_Finalized(&MPI_is_finalised); - if (communicator && !MPI_is_finalised){ - MPI_Comm_free(&communicator); - for(int i=0;i< communicator_halo.size();i++){ - MPI_Comm_free(&communicator_halo[i]); - } - } -} - -void CartesianCommunicator::GlobalSum(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalXOR(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalXOR(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(float &f){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(float *f,int N) -{ - int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(double &d) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(double *d,int N) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) -{ - int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); - assert(ierr==0); -} -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - int rank; - int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); - assert(ierr==0); - return rank; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ - coor.resize(_ndimension); - int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); - assert(ierr==0); -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - std::vector reqs(0); - SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); - SendToRecvFromComplete(reqs); -} - -void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int sender, - int receiver, - int bytes) -{ - MPI_Status stat; - assert(sender != receiver); - int tag = sender; - if ( _processor == sender ) { - MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); - } - if ( _processor == receiver ) { - MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); - } -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - int myrank = _processor; - int ierr; - if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { - MPI_Request xrq; - MPI_Request rrq; - - ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); - ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); - - assert(ierr==0); - list.push_back(xrq); - list.push_back(rrq); - } else { - // Give the CPU to MPI immediately; can use threads to overlap optionally - ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, - recv,bytes,MPI_CHAR,from, from, - communicator,MPI_STATUS_IGNORE); - assert(ierr==0); - } -} -void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) -{ - if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { - int nreq=list.size(); - std::vector status(nreq); - int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); - } -} - -void CartesianCommunicator::Barrier(void) -{ - int ierr = MPI_Barrier(communicator); - assert(ierr==0); -} - -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ - int ierr=MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator); - assert(ierr==0); -} - /////////////////////////////////////////////////////// - // Should only be used prior to Grid Init finished. - // Check for this? - /////////////////////////////////////////////////////// -int CartesianCommunicator::RankWorld(void){ - int r; - MPI_Comm_rank(communicator_world,&r); - return r; -} -void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) -{ - int ierr= MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator_world); - assert(ierr==0); -} - -double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes,int dir) -{ - int myrank = _processor; - int ierr; - int ncomm =communicator_halo.size(); - int commdir=dir%ncomm; - - // std::cout << " sending on communicator "< &waitall,int dir) -{ - int nreq=waitall.size(); - MPI_Waitall(nreq, &waitall[0], MPI_STATUSES_IGNORE); -} -double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes,int dir) -{ - int myrank = _processor; - int ierr; - // std::cout << " sending on communicator "< - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include -#include - -namespace Grid { - - // Should error check all MPI calls. -#define SHMEM_VET(addr) - -#define SHMEM_VET_DEBUG(addr) { \ - if ( ! shmem_addr_accessible(addr,_processor) ) {\ - std::fprintf(stderr,"%d Inaccessible shmem address %lx %s %s\n",_processor,addr,__FUNCTION__,#addr); \ - BACKTRACEFILE(); \ - }\ -} - - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// - -typedef struct HandShake_t { - uint64_t seq_local; - uint64_t seq_remote; -} HandShake; - -std::array make_psync_init(void) { - std::array ret; - ret.fill(SHMEM_SYNC_VALUE); - return ret; -} -static std::array psync_init = make_psync_init(); - -static Vector< HandShake > XConnections; -static Vector< HandShake > RConnections; - -void CartesianCommunicator::Init(int *argc, char ***argv) { - shmem_init(); - XConnections.resize(shmem_n_pes()); - RConnections.resize(shmem_n_pes()); - for(int pe =0 ; pe &processors,const CartesianCommunicator &parent) - : CartesianCommunicator(processors) -{ - std::cout << "Attempts to split SHMEM communicators will fail " < &processors) -{ - _ndimension = processors.size(); - std::vector periodic(_ndimension,1); - - _Nprocessors=1; - _processors = processors; - _processor_coor.resize(_ndimension); - - _processor = shmem_my_pe(); - - Lexicographic::CoorFromIndex(_processor_coor,_processor,_processors); - - for(int i=0;i<_ndimension;i++){ - _Nprocessors*=_processors[i]; - } - - int Size = shmem_n_pes(); - - - assert(Size==_Nprocessors); -} - -void CartesianCommunicator::GlobalSum(uint32_t &u){ - static long long source ; - static long long dest ; - static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - // int nreduce=1; - // int pestart=0; - // int logStride=0; - - source = u; - dest = 0; - shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); // necessary? - u = dest; -} -void CartesianCommunicator::GlobalSum(uint64_t &u){ - static long long source ; - static long long dest ; - static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - // int nreduce=1; - // int pestart=0; - // int logStride=0; - - source = u; - dest = 0; - shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); // necessary? - u = dest; -} -void CartesianCommunicator::GlobalSum(float &f){ - static float source ; - static float dest ; - static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - source = f; - dest =0.0; - shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); - f = dest; -} -void CartesianCommunicator::GlobalSumVector(float *f,int N) -{ - static float source ; - static float dest = 0 ; - static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - if ( shmem_addr_accessible(f,_processor) ){ - shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); - return; - } - - for(int i=0;i psync = psync_init; - - source = d; - dest = 0; - shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); - d = dest; -} -void CartesianCommunicator::GlobalSumVector(double *d,int N) -{ - static double source ; - static double dest ; - static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - - if ( shmem_addr_accessible(d,_processor) ){ - shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); - return; - } - - for(int i=0;i coor = _processor_coor; - - assert(std::abs(shift) <_processors[dim]); - - coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim]; - Lexicographic::IndexFromCoor(coor,source,_processors); - - coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim]; - Lexicographic::IndexFromCoor(coor,dest,_processors); - -} -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - int rank; - Lexicographic::IndexFromCoor(coor,rank,_processors); - return rank; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ - Lexicographic::CoorFromIndex(coor,rank,_processors); -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - SHMEM_VET(xmit); - SHMEM_VET(recv); - std::vector reqs(0); - SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); - SendToRecvFromComplete(reqs); -} - -void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int sender, - int receiver, - int bytes) -{ - static uint64_t seq; - - assert(recv!=xmit); - volatile HandShake *RecvSeq = (volatile HandShake *) & RConnections[sender]; - volatile HandShake *SendSeq = (volatile HandShake *) & XConnections[receiver]; - - if ( _processor == sender ) { - - // Check he has posted a receive - while(SendSeq->seq_remote == SendSeq->seq_local); - - // Advance our send count - seq = ++(SendSeq->seq_local); - - // Send this packet - SHMEM_VET(recv); - shmem_putmem(recv,xmit,bytes,receiver); - shmem_fence(); - - //Notify him we're done - shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver); - shmem_fence(); - } - if ( _processor == receiver ) { - - // Post a receive - seq = ++(RecvSeq->seq_local); - shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender); - - // Now wait until he has advanced our reception counter - while(RecvSeq->seq_remote != RecvSeq->seq_local); - - } -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - SHMEM_VET(xmit); - SHMEM_VET(recv); - // shmem_putmem_nb(recv,xmit,bytes,dest,NULL); - shmem_putmem(recv,xmit,bytes,dest); - - if ( CommunicatorPolicy == CommunicatorPolicySequential ) shmem_barrier_all(); -} -void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) -{ - // shmem_quiet(); // I'm done - if( CommunicatorPolicy == CommunicatorPolicyConcurrent ) shmem_barrier_all();// He's done too -} -void CartesianCommunicator::Barrier(void) -{ - shmem_barrier_all(); -} -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ - static std::array psync = psync_init; - static uint32_t word; - uint32_t *array = (uint32_t *) data; - assert( (bytes % 4)==0); - int words = bytes/4; - - if ( shmem_addr_accessible(data,_processor) ){ - shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync.data()); - return; - } - - for(int w=0;w psync = psync_init; - static uint32_t word; - uint32_t *array = (uint32_t *) data; - assert( (bytes % 4)==0); - int words = bytes/4; - - for(int w=0;w + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +namespace Grid { + +// static data + +uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL; +int GlobalSharedMemory::Hugepages = 0; +int GlobalSharedMemory::ShmSetup; + +std::vector GlobalSharedMemory::WorldShmCommBufs; + +Grid_MPI_Comm GlobalSharedMemory::WorldShmComm; +int GlobalSharedMemory::WorldShmRank; +int GlobalSharedMemory::WorldShmSize; +std::vector GlobalSharedMemory::WorldShmRanks; + +Grid_MPI_Comm GlobalSharedMemory::WorldComm; +int GlobalSharedMemory::WorldSize; +int GlobalSharedMemory::WorldRank; + +int GlobalSharedMemory::WorldNodes; +int GlobalSharedMemory::WorldNode; + + +} diff --git a/lib/communicator/SharedMemory.h b/lib/communicator/SharedMemory.h new file mode 100644 index 00000000..2bb112e5 --- /dev/null +++ b/lib/communicator/SharedMemory.h @@ -0,0 +1,158 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/communicator/SharedMemory.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + + +// TODO +// 1) move includes into SharedMemory.cc +// +// 2) split shared memory into a) optimal communicator creation from comm world +// +// b) shared memory buffers container +// -- static globally shared; init once +// -- per instance set of buffers. +// + +#pragma once + +#include + +#if defined (GRID_COMMS_MPI3) +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_NUMAIF_H +#include +#endif + +namespace Grid { + +#if defined (GRID_COMMS_MPI3) + typedef MPI_Comm Grid_MPI_Comm; + typedef MPI_Request CommsRequest_t; +#else + typedef int CommsRequest_t; + typedef int Grid_MPI_Comm; +#endif + +class GlobalSharedMemory { + private: + // Init once lock on the buffer allocation + static int ShmSetup; + static const int MAXLOG2RANKSPERNODE = 16; + + public: + static uint64_t MAX_MPI_SHM_BYTES; + static int Hugepages; + + static std::vector WorldShmCommBufs; + + static Grid_MPI_Comm WorldComm; + static int WorldRank; + static int WorldSize; + + static Grid_MPI_Comm WorldShmComm; + static int WorldShmRank; + static int WorldShmSize; + + static int WorldNodes; + static int WorldNode; + + static std::vector WorldShmRanks; + + ////////////////////////////////////////////////////////////////////////////////////// + // Create an optimal reordered communicator that makes MPI_Cart_create get it right + ////////////////////////////////////////////////////////////////////////////////////// + static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD + static void OptimalCommunicator(const std::vector &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian + /////////////////////////////////////////////////// + // Provide shared memory facilities off comm world + /////////////////////////////////////////////////// + static void SharedMemoryAllocate(uint64_t bytes, int flags); + static void SharedMemoryFree(void); + +}; + +////////////////////////////// +// one per communicator +////////////////////////////// +class SharedMemory +{ + private: + static const int MAXLOG2RANKSPERNODE = 16; + + size_t heap_top; + size_t heap_bytes; + size_t heap_size; + + protected: + + Grid_MPI_Comm ShmComm; // for barriers + int ShmRank; + int ShmSize; + std::vector ShmCommBufs; + std::vector ShmRanks;// Mapping comm ranks to Shm ranks + + public: + SharedMemory() {}; + /////////////////////////////////////////////////////////////////////////////////////// + // set the buffers & sizes + /////////////////////////////////////////////////////////////////////////////////////// + void SetCommunicator(Grid_MPI_Comm comm); + + //////////////////////////////////////////////////////////////////////// + // For this instance ; disjoint buffer sets between splits if split grid + //////////////////////////////////////////////////////////////////////// + void ShmBarrier(void); + + /////////////////////////////////////////////////// + // Call on any instance + /////////////////////////////////////////////////// + void SharedMemoryTest(void); + void *ShmBufferSelf(void); + void *ShmBuffer (int rank); + void *ShmBufferTranslate(int rank,void * local_p); + void *ShmBufferMalloc(size_t bytes); + void ShmBufferFreeAll(void) ; + + ////////////////////////////////////////////////////////////////////////// + // Make info on Nodes & ranks and Shared memory available + ////////////////////////////////////////////////////////////////////////// + int NodeCount(void) { return GlobalSharedMemory::WorldNodes;}; + int RankCount(void) { return GlobalSharedMemory::WorldSize;}; + +}; + +} diff --git a/lib/communicator/SharedMemoryMPI.cc b/lib/communicator/SharedMemoryMPI.cc new file mode 100644 index 00000000..af4f9702 --- /dev/null +++ b/lib/communicator/SharedMemoryMPI.cc @@ -0,0 +1,415 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/communicator/SharedMemory.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +namespace Grid { + +/*Construct from an MPI communicator*/ +void GlobalSharedMemory::Init(Grid_MPI_Comm comm) +{ + WorldComm = comm; + MPI_Comm_rank(WorldComm,&WorldRank); + MPI_Comm_size(WorldComm,&WorldSize); + // WorldComm, WorldSize, WorldRank + + ///////////////////////////////////////////////////////////////////// + // Split into groups that can share memory + ///////////////////////////////////////////////////////////////////// + MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm); + MPI_Comm_rank(WorldShmComm ,&WorldShmRank); + MPI_Comm_size(WorldShmComm ,&WorldShmSize); + // WorldShmComm, WorldShmSize, WorldShmRank + + // WorldNodes + WorldNodes = WorldSize/WorldShmSize; + assert( (WorldNodes * WorldShmSize) == WorldSize ); + + // FIXME: Check all WorldShmSize are the same ? + + ///////////////////////////////////////////////////////////////////// + // find world ranks in our SHM group (i.e. which ranks are on our node) + ///////////////////////////////////////////////////////////////////// + MPI_Group WorldGroup, ShmGroup; + MPI_Comm_group (WorldComm, &WorldGroup); + MPI_Comm_group (WorldShmComm, &ShmGroup); + + std::vector world_ranks(WorldSize); for(int r=0;r MyGroup; + MyGroup.resize(WorldShmSize); + for(int rank=0;rank()); + int myleader = MyGroup[0]; + + std::vector leaders_1hot(WorldSize,0); + std::vector leaders_group(WorldNodes,0); + leaders_1hot [ myleader ] = 1; + + /////////////////////////////////////////////////////////////////// + // global sum leaders over comm world + /////////////////////////////////////////////////////////////////// + int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,WorldComm); + assert(ierr==0); + + /////////////////////////////////////////////////////////////////// + // find the group leaders world rank + /////////////////////////////////////////////////////////////////// + int group=0; + for(int l=0;l &processors,Grid_MPI_Comm & optimal_comm) +{ + //////////////////////////////////////////////////////////////// + // Assert power of two shm_size. + //////////////////////////////////////////////////////////////// + int log2size = -1; + for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){ + if ( (0x1< processor_coor(ndimension); + std::vector WorldDims = processors; std::vector ShmDims (ndimension,1); std::vector NodeDims (ndimension); + std::vector ShmCoor (ndimension); std::vector NodeCoor (ndimension); std::vector WorldCoor(ndimension); + int dim = 0; + for(int l2=0;l2 ranks(size); for(int r=0;r= heap_size) { + std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +namespace Grid { + +/*Construct from an MPI communicator*/ +void GlobalSharedMemory::Init(Grid_MPI_Comm comm) +{ + WorldComm = 0; + WorldRank = 0; + WorldSize = 1; + WorldShmComm = 0 ; + WorldShmRank = 0 ; + WorldShmSize = 1 ; + WorldNodes = 1 ; + WorldNode = 0 ; + WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0; + WorldShmCommBufs.resize(1); +} + +void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors,Grid_MPI_Comm & optimal_comm) +{ + optimal_comm = WorldComm; +} + +//////////////////////////////////////////////////////////////////////////////////////////// +// Hugetlbfs mapping intended, use anonymous mmap +//////////////////////////////////////////////////////////////////////////////////////////// +void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) +{ + void * ShmCommBuf ; + MAX_MPI_SHM_BYTES=bytes; + int mmap_flag =0; +#ifdef MAP_ANONYMOUS + mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; +#endif +#ifdef MAP_ANON + mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON; +#endif +#ifdef MAP_HUGETLB + if ( flags ) mmap_flag |= MAP_HUGETLB; +#endif + ShmCommBuf =(void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag, -1, 0); + if (ShmCommBuf == (void *)MAP_FAILED) { + perror("mmap failed "); + exit(EXIT_FAILURE); + } +#ifdef MADV_HUGEPAGE + if (!Hugepages ) madvise(ShmCommBuf,bytes,MADV_HUGEPAGE); +#endif + bzero(ShmCommBuf,bytes); + WorldShmCommBufs[0] = ShmCommBuf; +}; + +void GlobalSharedMemory::SharedMemoryFree(void) +{ + assert(ShmSetup); + assert(0); // unimplemented +} + + //////////////////////////////////////////////////////// + // Global shared functionality finished + // Now move to per communicator functionality + //////////////////////////////////////////////////////// +void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) +{ + ShmRanks.resize(1); + ShmCommBufs.resize(1); + ShmRanks[0] = 0; + ShmRank = 0; + ShmSize = 1; + ////////////////////////////////////////////////////////////////////// + // Map ShmRank to WorldShmRank and use the right buffer + ////////////////////////////////////////////////////////////////////// + ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0]; + heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES; + ShmBufferFreeAll(); + return; +} +////////////////////////////////////////////////////////////////// +// On node barrier +////////////////////////////////////////////////////////////////// +void SharedMemory::ShmBarrier(void){ return ; } + +////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Test the shared memory is working +////////////////////////////////////////////////////////////////////////////////////////////////////////// +void SharedMemory::SharedMemoryTest(void) { return; } + +void *SharedMemory::ShmBufferSelf(void) +{ + return ShmCommBufs[ShmRank]; +} +void *SharedMemory::ShmBuffer(int rank) +{ + return NULL; +} +void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) +{ + return NULL; +} + +///////////////////////////////// +// Alloc, free shmem region ; common to MPI and none? +///////////////////////////////// +void *SharedMemory::ShmBufferMalloc(size_t bytes){ + void *ptr = (void *)heap_top; + heap_top += bytes; + heap_bytes+= bytes; + if (heap_bytes >= heap_size) { + std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < Date: Mon, 8 Jan 2018 11:36:39 +0000 Subject: [PATCH 281/377] Synthetic test of lanczos --- ..._dwf_compressed_lanczos_reorg_synthetic.cc | 330 ++++++++++++++++++ 1 file changed, 330 insertions(+) create mode 100644 tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc new file mode 100644 index 00000000..132dff4e --- /dev/null +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc @@ -0,0 +1,330 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc + + Copyright (C) 2017 + +Author: Leans heavily on Christoph Lehner's code +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +/* + * Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features + * in Grid that were intended to be used to support blocked Aggregates, from + */ +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +class ProjectedHermOp : public LinearFunction > > { +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + LinearOperatorBase &_Linop; + Aggregation &_Aggregate; + + ProjectedHermOp(LinearOperatorBase& linop, Aggregation &aggregate) : + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + FineField fin(FineGrid); + FineField fout(FineGrid); + + _Aggregate.PromoteFromSubspace(in,fin); + _Linop.HermOp(fin,fout); + _Aggregate.ProjectToSubspace(out,fout); + } +}; + +template +class ProjectedFunctionHermOp : public LinearFunction > > { +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + + OperatorFunction & _poly; + LinearOperatorBase &_Linop; + Aggregation &_Aggregate; + + ProjectedFunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop, + Aggregation &aggregate) : + _poly(poly), + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + + FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard; + FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; + + _Aggregate.PromoteFromSubspace(in,fin); + _poly(_Linop,fin,fout); + _Aggregate.ProjectToSubspace(out,fout); + } +}; + +// Make serializable Lanczos params + +template +class CoarseFineIRL +{ +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice CoarseField; + typedef Lattice FineField; + +private: + GridBase *_CoarseGrid; + GridBase *_FineGrid; + int _checkerboard; + LinearOperatorBase & _FineOp; + Aggregation _Aggregate; + +public: + CoarseFineIRL(GridBase *FineGrid, + GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard) : + _CoarseGrid(CoarseGrid), + _FineGrid(FineGrid), + _Aggregate(CoarseGrid,FineGrid,checkerboard), + _FineOp(FineOp), + _checkerboard(checkerboard) + {}; + + template static RealD normalise(T& v) + { + RealD nn = norm2(v); + nn = ::sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void testFine(void) + { + int Nk = nbasis; + _Aggregate.subspace.resize(Nk,_FineGrid); + _Aggregate.subspace[0]=1.0; + _Aggregate.subspace[0].checkerboard=_checkerboard; + normalise(_Aggregate.subspace[0]); + PlainHermOp Op(_FineOp); + for(int k=1;k Cheby(alpha,beta,Npoly); + FunctionHermOp ChebyOp(Cheby,_FineOp); + PlainHermOp Op(_FineOp); + + int Nk = nbasis; + + std::vector eval(Nm); + + FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; + + ImplicitlyRestartedLanczos IRL(ChebyOp,Op,Nk,Nk,Nm,resid,MaxIt,betastp,MinRes); + _Aggregate.subspace.resize(Nm,_FineGrid); + IRL.calc(eval,_Aggregate.subspace,src,Nk,false); + _Aggregate.subspace.resize(Nk,_FineGrid); + for(int k=0;k Cheby(alpha,beta,Npoly); + ProjectedHermOp Op(_FineOp,_Aggregate); + ProjectedFunctionHermOp ChebyOp(Cheby,_FineOp,_Aggregate); + + std::vector eval(Nm); + std::vector evec(Nm,_CoarseGrid); + + CoarseField src(_CoarseGrid); src=1.0; + + ImplicitlyRestartedLanczos IRL(ChebyOp,ChebyOp,Nk,Nk,Nm,resid,MaxIt,betastp,MinRes); + IRL.calc(eval,evec,src,Nk,false); + + // We got the evalues of the Cheby operator; + // Reconstruct eigenvalues of original operator via Chebyshev inverse + for (int i=0;i, blockSize, + std::string, config, + std::vector < std::complex >, omega, + RealD, mass, + RealD, M5 + ); +}; + +int main (int argc, char ** argv) { + + Grid_init(&argc,&argv); + + CompressedLanczosParams Params; + { + Params.omega.resize(10); + Params.blockSize.resize(5); + XmlWriter writer("Params_template.xml"); + write(writer,"Params",Params); + std::cout << GridLogMessage << " Written Params_template.xml" < blockSize = Params.blockSize; + + // Grids + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector fineLatt = GridDefaultLatt(); + int dims=fineLatt.size(); + assert(blockSize.size()==dims+1); + std::vector coarseLatt(dims); + std::vector coarseLatt5d ; + + for (int d=0;d seeds4({1,2,3,4}); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + SU3::HotConfiguration(RNG4, Umu); + } + std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl; + + // ZMobius EO Operator + ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); + SchurDiagTwoOperator HermOp(Ddwf); + + // Eigenvector storage + LanczosParams fine =Params.FineParams; + LanczosParams coarse=Params.CoarseParams; + const int Nm1 = fine.Nm; + const int Nm2 = coarse.Nm; + + std::cout << GridLogMessage << "Keep " << fine.Nk << " full vectors" << std::endl; + std::cout << GridLogMessage << "Keep " << coarse.Nk << " total vectors" << std::endl; + assert(Nm2 >= Nm1); + + const int nbasis= 70; + CoarseFineIRL IRL(FrbGrid,CoarseGrid5rb,HermOp,Odd); + + std::cout << GridLogMessage << "Constructed CoarseFine IRL" << std::endl; + + std::cout << GridLogMessage << "Performing fine grid IRL Nk "<< nbasis<<" Nm "< Date: Mon, 8 Jan 2018 14:06:53 +0000 Subject: [PATCH 282/377] Clean up --- lib/communicator/Communicator_base.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index a9b99c17..359846c9 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -184,15 +184,10 @@ public: template void AllToAll(int dim,std::vector &in, std::vector &out){ assert(dim>=0); assert(dim<_ndimension); - int numnode = _processors[dim]; - // std::cerr << " AllToAll in.size() "< Date: Mon, 8 Jan 2018 15:20:26 +0000 Subject: [PATCH 283/377] Allow resize of the shared memory buffers --- TODO | 28 +++++++++++---- lib/communicator/SharedMemory.cc | 40 ++++++++++++++++++++- lib/communicator/SharedMemory.h | 10 ++++-- lib/communicator/SharedMemoryMPI.cc | 52 +++++++++------------------- lib/communicator/SharedMemoryNone.cc | 40 +++++---------------- 5 files changed, 93 insertions(+), 77 deletions(-) diff --git a/TODO b/TODO index 95ccf1df..746302ca 100644 --- a/TODO +++ b/TODO @@ -1,16 +1,32 @@ TODO: --------------- -Large item work list: +Code item work list + +a) namespaces & indentation + GRID_BEGIN_NAMESPACE(); + GRID_END_NAMESPACE(); +-- delete QCD namespace + +b) GPU branch +- start branch +- Increase Macro use in core library support; prepare for change +- Audit volume of "device" code +- Virtual function audit +- Start port once Nvidia box is up +- Cut down volume of code for first port? How? + +Physics item work list: 1)- BG/Q port and check ; Andrew says ok. -3a)- RNG I/O in ILDG/SciDAC (minor) -3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet -4)- Physical propagator interface -6)- Multigrid Wilson and DWF, compare to other Multigrid implementations -7)- HDCR resume +2)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet +3)- Physical propagator interface +4)- Multigrid Wilson and DWF, compare to other Multigrid implementations +5)- HDCR resume + ---------------------------- Recent DONE +-- RNG I/O in ILDG/SciDAC (minor) -- Precision conversion and sort out localConvert <-- partial/easy -- Conserved currents (Andrew) -- Split grid diff --git a/lib/communicator/SharedMemory.cc b/lib/communicator/SharedMemory.cc index f9d5e5bc..4682d420 100644 --- a/lib/communicator/SharedMemory.cc +++ b/lib/communicator/SharedMemory.cc @@ -34,7 +34,9 @@ namespace Grid { uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL; int GlobalSharedMemory::Hugepages = 0; -int GlobalSharedMemory::ShmSetup; +int GlobalSharedMemory::_ShmSetup; +int GlobalSharedMemory::_ShmAlloc; +uint64_t GlobalSharedMemory::_ShmAllocBytes; std::vector GlobalSharedMemory::WorldShmCommBufs; @@ -50,5 +52,41 @@ int GlobalSharedMemory::WorldRank; int GlobalSharedMemory::WorldNodes; int GlobalSharedMemory::WorldNode; +void GlobalSharedMemory::SharedMemoryFree(void) +{ + assert(_ShmAlloc); + assert(_ShmAllocBytes>0); + for(int r=0;r= heap_size) { + std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < &processors,Grid_MPI_Comm & optimal_comm) @@ -180,8 +182,8 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors, #ifdef GRID_MPI3_SHMMMAP void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) { - GlobalSharedMemory::MAX_MPI_SHM_BYTES = bytes; - assert(ShmSetup==0); ShmSetup=1; + assert(_ShmSetup==1); + assert(_ShmAlloc==0); ////////////////////////////////////////////////////////////////////////////////////////////////////////// // allocate the shared windows for our group ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -214,8 +216,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) perror("failed mmap"); assert(0); } assert(((uint64_t)ptr&0x3F)==0); + close(fd); WorldShmCommBufs[r] =ptr; } + _ShmAlloc=1; + _ShmAllocBytes = bytes; }; #endif // MMAP @@ -227,8 +232,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) //////////////////////////////////////////////////////////////////////////////////////////// void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) { - GlobalSharedMemory::MAX_MPI_SHM_BYTES = bytes; - assert(ShmSetup==0); ShmSetup=1; + assert(_ShmSetup==1); + assert(_ShmAlloc==0); MPI_Barrier(WorldShmComm); WorldShmCommBufs.resize(WorldShmSize); @@ -258,6 +263,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) assert(((uint64_t)ptr&0x3F)==0); WorldShmCommBufs[r] =ptr; + close(fd); } } @@ -277,17 +283,15 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); } assert(((uint64_t)ptr&0x3F)==0); WorldShmCommBufs[r] =ptr; + + close(fd); } } + _ShmAlloc=1; + _ShmAllocBytes = bytes; } #endif -void GlobalSharedMemory::SharedMemoryFree(void) -{ - assert(ShmSetup); - assert(0); // unimplemented -} - //////////////////////////////////////////////////////// // Global shared functionality finished // Now move to per communicator functionality @@ -310,7 +314,8 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) ////////////////////////////////////////////////////////////////////// // Map ShmRank to WorldShmRank and use the right buffer ////////////////////////////////////////////////////////////////////// - heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES; + assert (GlobalSharedMemory::ShmAlloc()==1); + heap_size = GlobalSharedMemory::ShmAllocBytes(); for(int r=0;r= heap_size) { - std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < &processors,Grid_MPI_Comm & optimal_comm) @@ -56,7 +58,8 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors, void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) { void * ShmCommBuf ; - MAX_MPI_SHM_BYTES=bytes; + assert(_ShmSetup==1); + assert(_ShmAlloc==0); int mmap_flag =0; #ifdef MAP_ANONYMOUS mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; @@ -77,20 +80,17 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) #endif bzero(ShmCommBuf,bytes); WorldShmCommBufs[0] = ShmCommBuf; + _ShmAllocBytes=bytes; + _ShmAlloc=1; }; -void GlobalSharedMemory::SharedMemoryFree(void) -{ - assert(ShmSetup); - assert(0); // unimplemented -} - //////////////////////////////////////////////////////// // Global shared functionality finished // Now move to per communicator functionality //////////////////////////////////////////////////////// void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) { + assert(GlobalSharedMemory::ShmAlloc()==1); ShmRanks.resize(1); ShmCommBufs.resize(1); ShmRanks[0] = 0; @@ -100,7 +100,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) // Map ShmRank to WorldShmRank and use the right buffer ////////////////////////////////////////////////////////////////////// ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0]; - heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES; + heap_size = GlobalSharedMemory::ShmAllocBytes(); ShmBufferFreeAll(); return; } @@ -114,10 +114,6 @@ void SharedMemory::ShmBarrier(void){ return ; } ////////////////////////////////////////////////////////////////////////////////////////////////////////// void SharedMemory::SharedMemoryTest(void) { return; } -void *SharedMemory::ShmBufferSelf(void) -{ - return ShmCommBufs[ShmRank]; -} void *SharedMemory::ShmBuffer(int rank) { return NULL; @@ -127,24 +123,4 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) return NULL; } -///////////////////////////////// -// Alloc, free shmem region ; common to MPI and none? -///////////////////////////////// -void *SharedMemory::ShmBufferMalloc(size_t bytes){ - void *ptr = (void *)heap_top; - heap_top += bytes; - heap_bytes+= bytes; - if (heap_bytes >= heap_size) { - std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < Date: Wed, 10 Jan 2018 10:59:58 +0000 Subject: [PATCH 284/377] Hadrons: result file macro with trajectory number --- extras/Hadrons/Global.cc | 7 +++++++ extras/Hadrons/Global.hpp | 13 +++++++++---- extras/Hadrons/Modules/MContraction/Baryon.hpp | 2 +- extras/Hadrons/Modules/MContraction/DiscLoop.hpp | 2 +- extras/Hadrons/Modules/MContraction/Gamma3pt.hpp | 2 +- extras/Hadrons/Modules/MContraction/Meson.hpp | 2 +- .../Modules/MContraction/WeakHamiltonianEye.cc | 2 +- .../Modules/MContraction/WeakHamiltonianNonEye.cc | 2 +- .../Modules/MContraction/WeakNeutral4ptDisc.cc | 2 +- extras/Hadrons/Modules/MScalar/ChargedProp.cc | 2 +- 10 files changed, 24 insertions(+), 12 deletions(-) diff --git a/extras/Hadrons/Global.cc b/extras/Hadrons/Global.cc index 942a4243..9a90a08c 100644 --- a/extras/Hadrons/Global.cc +++ b/extras/Hadrons/Global.cc @@ -67,3 +67,10 @@ std::string Hadrons::typeName(const std::type_info *info) return name; } + +// default writers/readers ///////////////////////////////////////////////////// +#ifdef HAVE_HDF5 +const std::string Hadrons::resultFileExt = "h5"; +#else +const std::string Hadrons::resultFileExt = "xml"; +#endif diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index 274e1934..fc069ed6 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -167,14 +167,19 @@ std::string typeName(void) } // default writers/readers +extern const std::string resultFileExt; + #ifdef HAVE_HDF5 -typedef Hdf5Reader CorrReader; -typedef Hdf5Writer CorrWriter; +typedef Hdf5Reader ResultReader; +typedef Hdf5Writer ResultWriter; #else -typedef XmlReader CorrReader; -typedef XmlWriter CorrWriter; +typedef XmlReader ResultReader; +typedef XmlWriter ResultWriter; #endif +#define RESULT_FILE_NAME(name) \ +name + "." + std::to_string(vm().getTrajectory()) + "." + resultFileExt + END_HADRONS_NAMESPACE #include diff --git a/extras/Hadrons/Modules/MContraction/Baryon.hpp b/extras/Hadrons/Modules/MContraction/Baryon.hpp index 625c7108..8966d95b 100644 --- a/extras/Hadrons/Modules/MContraction/Baryon.hpp +++ b/extras/Hadrons/Modules/MContraction/Baryon.hpp @@ -122,7 +122,7 @@ void TBaryon::execute(void) << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" << par().q3 << "'" << std::endl; - CorrWriter writer(par().output); + ResultWriter writer(RESULT_FILE_NAME(par().output)); auto &q1 = envGet(PropagatorField1, par().q1); auto &q2 = envGet(PropagatorField2, par().q2); auto &q3 = envGet(PropagatorField3, par().q2); diff --git a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp index 3d08f0eb..539abbbb 100644 --- a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp +++ b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp @@ -119,7 +119,7 @@ void TDiscLoop::execute(void) << "' using '" << par().q_loop << "' with " << par().gamma << " insertion." << std::endl; - CorrWriter writer(par().output); + ResultWriter writer(RESULT_FILE_NAME(par().output)); auto &q_loop = envGet(PropagatorField, par().q_loop); Gamma gamma(par().gamma); std::vector buf; diff --git a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp index 68701aeb..b4327a13 100644 --- a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp +++ b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp @@ -153,7 +153,7 @@ void TGamma3pt::execute(void) // Initialise variables. q2 and q3 are normal propagators, q1 may be // sink smeared. - CorrWriter writer(par().output); + ResultWriter writer(RESULT_FILE_NAME(par().output)); auto &q1 = envGet(SlicedPropagator1, par().q1); auto &q2 = envGet(PropagatorField2, par().q2); auto &q3 = envGet(PropagatorField2, par().q3); diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 5cf504e3..0197534d 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -172,7 +172,7 @@ void TMeson::execute(void) << " quarks '" << par().q1 << "' and '" << par().q2 << "'" << std::endl; - CorrWriter writer(par().output); + ResultWriter writer(RESULT_FILE_NAME(par().output)); std::vector buf; std::vector result; Gamma g5(Gamma::Algebra::Gamma5); diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc index b79c09e7..1d257fc7 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc @@ -104,7 +104,7 @@ void TWeakHamiltonianEye::execute(void) << par().q2 << ", '" << par().q3 << "' and '" << par().q4 << "'." << std::endl; - CorrWriter writer(par().output); + ResultWriter writer(RESULT_FILE_NAME(par().output)); auto &q1 = envGet(SlicedPropagator, par().q1); auto &q2 = envGet(PropagatorField, par().q2); auto &q3 = envGet(PropagatorField, par().q3); diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc index e66b6ee7..2ad2e7dc 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc @@ -104,7 +104,7 @@ void TWeakHamiltonianNonEye::execute(void) << par().q2 << ", '" << par().q3 << "' and '" << par().q4 << "'." << std::endl; - CorrWriter writer(par().output); + ResultWriter writer(RESULT_FILE_NAME(par().output)); auto &q1 = envGet(PropagatorField, par().q1); auto &q2 = envGet(PropagatorField, par().q2); auto &q3 = envGet(PropagatorField, par().q3); diff --git a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc index e0a00472..2c94b2ba 100644 --- a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc +++ b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc @@ -104,7 +104,7 @@ void TWeakNeutral4ptDisc::execute(void) << par().q2 << ", '" << par().q3 << "' and '" << par().q4 << "'." << std::endl; - CorrWriter writer(par().output); + ResultWriter writer(RESULT_FILE_NAME(par().output)); auto &q1 = envGet(PropagatorField, par().q1); auto &q2 = envGet(PropagatorField, par().q2); auto &q3 = envGet(PropagatorField, par().q3); diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc index da82617f..1470f1ad 100644 --- a/extras/Hadrons/Modules/MScalar/ChargedProp.cc +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -133,7 +133,7 @@ void TChargedProp::execute(void) LOG(Message) << "Saving zero-momentum projection to '" << filename << "'..." << std::endl; - CorrWriter writer(filename); + ResultWriter writer(RESULT_FILE_NAME(par().output)); std::vector vecBuf; std::vector result; From 29f026c3758b6e5c1cd2fcaf6f11066f015d0284 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 10 Jan 2018 11:01:03 +0000 Subject: [PATCH 285/377] Hadrons: scalar SU(N) tr(phi^n) 1-pt function --- extras/Hadrons/Modules.hpp | 30 +--- extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp | 155 ++++++++++++++++++++ extras/Hadrons/make_module_list.sh | 30 ---- extras/Hadrons/modules.inc | 1 + 4 files changed, 157 insertions(+), 59 deletions(-) create mode 100644 extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index e50d2b0b..523ac101 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -1,32 +1,3 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: extras/Hadrons/Modules.hpp - -Copyright (C) 2015-2018 - -Author: Antonin Portelli -Author: Lanny91 - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - #include #include #include @@ -56,5 +27,6 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include +#include #include #include diff --git a/extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp b/extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp new file mode 100644 index 00000000..8c6bead7 --- /dev/null +++ b/extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp @@ -0,0 +1,155 @@ +#ifndef Hadrons_MScalarSUN_TrPhi_hpp_ +#define Hadrons_MScalarSUN_TrPhi_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * TrPhi * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MScalarSUN) + +class TrPhiPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(TrPhiPar, + std::string, field, + unsigned int, maxPow, + std::string, output); +}; + +template +class TTrPhi: public Module +{ +public: + typedef typename SImpl::Field Field; + typedef typename SImpl::ComplexField ComplexField; + class Result: Serializable + { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(Result, + std::string, op, + Complex, value); + }; +public: + // constructor + TTrPhi(const std::string name); + // destructor + virtual ~TTrPhi(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +private: + // output name generator + std::string outName(const unsigned int n); +}; + +MODULE_REGISTER_NS(TrPhiSU2, TTrPhi>, MScalarSUN); +MODULE_REGISTER_NS(TrPhiSU3, TTrPhi>, MScalarSUN); +MODULE_REGISTER_NS(TrPhiSU4, TTrPhi>, MScalarSUN); +MODULE_REGISTER_NS(TrPhiSU5, TTrPhi>, MScalarSUN); +MODULE_REGISTER_NS(TrPhiSU6, TTrPhi>, MScalarSUN); + +/****************************************************************************** + * TTrPhi implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TTrPhi::TTrPhi(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TTrPhi::getInput(void) +{ + std::vector in = {par().field}; + + return in; +} + +template +std::vector TTrPhi::getOutput(void) +{ + std::vector out; + + for (unsigned int n = 2; n <= par().maxPow; n += 2) + { + out.push_back(outName(n)); + } + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TTrPhi::setup(void) +{ + if (par().maxPow < 2) + { + HADRON_ERROR(Size, "'maxPow' should be at least equal to 2"); + } + envTmpLat(Field, "phi2"); + envTmpLat(Field, "buf"); + for (unsigned int n = 2; n <= par().maxPow; n += 2) + { + envCreateLat(ComplexField, outName(n)); + } +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TTrPhi::execute(void) +{ + LOG(Message) << "Computing tr(phi^n) for n even up to " << par().maxPow + << "..." << std::endl; + + std::vector result; + auto &phi = envGet(Field, par().field); + + envGetTmp(Field, phi2); + envGetTmp(Field, buf); + buf = 1.; + phi2 = -phi*phi; + for (unsigned int n = 2; n <= par().maxPow; n += 2) + { + auto &phin = envGet(ComplexField, outName(n)); + + buf = buf*phi2; + phin = trace(buf); + if (!par().output.empty()) + { + Result r; + + r.op = "phi" + std::to_string(n); + r.value = TensorRemove(sum(phin)); + result.push_back(r); + } + } + if (result.size() > 0) + { + ResultWriter writer(RESULT_FILE_NAME(par().output)); + + write(writer, "trphi", result); + } +} + +// output name generator /////////////////////////////////////////////////////// +template +std::string TTrPhi::outName(const unsigned int n) +{ + return getName() + "_" + std::to_string(n); +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MScalarSUN_TrPhi_hpp_ diff --git a/extras/Hadrons/make_module_list.sh b/extras/Hadrons/make_module_list.sh index 8c6fa4da..ddc56ff6 100755 --- a/extras/Hadrons/make_module_list.sh +++ b/extras/Hadrons/make_module_list.sh @@ -7,36 +7,6 @@ echo 'modules_hpp =\' >> modules.inc find Modules -name '*.hpp' -type f -print | sed 's/^/ /;$q;s/$/ \\/' >> modules.inc echo '' >> modules.inc rm -f Modules.hpp -echo "/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: extras/Hadrons/Modules.hpp - -Copyright (C) 2015 -Copyright (C) 2016 -Copyright (C) 2017 - -Author: Antonin Portelli - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file \"LICENSE\" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -" > Modules.hpp for f in `find Modules -name '*.hpp'`; do echo "#include " >> Modules.hpp done diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 6e1ef6dc..00ef323f 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -39,6 +39,7 @@ modules_hpp =\ Modules/MScalar/ChargedProp.hpp \ Modules/MAction/DWF.hpp \ Modules/MAction/Wilson.hpp \ + Modules/MScalarSUN/TrPhi.hpp \ Modules/MIO/LoadNersc.hpp \ Modules/MIO/LoadBinary.hpp From b7cd7213085c7050f17c81ba2757c1d8ae63c914 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 10 Jan 2018 11:25:59 +0000 Subject: [PATCH 286/377] Hadrons: scalar SU(N) tr(mag^n) --- extras/Hadrons/Modules.hpp | 1 + extras/Hadrons/Modules/MScalarSUN/TrMag.hpp | 119 ++++++++++++++++++++ extras/Hadrons/modules.inc | 1 + 3 files changed, 121 insertions(+) create mode 100644 extras/Hadrons/Modules/MScalarSUN/TrMag.hpp diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 523ac101..1d059a79 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/extras/Hadrons/Modules/MScalarSUN/TrMag.hpp b/extras/Hadrons/Modules/MScalarSUN/TrMag.hpp new file mode 100644 index 00000000..f33784fa --- /dev/null +++ b/extras/Hadrons/Modules/MScalarSUN/TrMag.hpp @@ -0,0 +1,119 @@ +#ifndef Hadrons_MScalarSUN_TrMag_hpp_ +#define Hadrons_MScalarSUN_TrMag_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * TrMag * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MScalarSUN) + +class TrMagPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(TrMagPar, + std::string, field, + unsigned int, maxPow, + std::string, output); +}; + +template +class TTrMag: public Module +{ +public: + typedef typename SImpl::Field Field; + typedef typename SImpl::ComplexField ComplexField; + class Result: Serializable + { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(Result, + std::string, op, + Real, value); + }; +public: + // constructor + TTrMag(const std::string name); + // destructor + virtual ~TTrMag(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(TrMagSU2, TTrMag>, MScalarSUN); +MODULE_REGISTER_NS(TrMagSU3, TTrMag>, MScalarSUN); +MODULE_REGISTER_NS(TrMagSU4, TTrMag>, MScalarSUN); +MODULE_REGISTER_NS(TrMagSU5, TTrMag>, MScalarSUN); +MODULE_REGISTER_NS(TrMagSU6, TTrMag>, MScalarSUN); + +/****************************************************************************** + * TTrMag implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TTrMag::TTrMag(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TTrMag::getInput(void) +{ + std::vector in = {par().field}; + + return in; +} + +template +std::vector TTrMag::getOutput(void) +{ + std::vector out = {}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TTrMag::setup(void) +{} + +// execution /////////////////////////////////////////////////////////////////// +template +void TTrMag::execute(void) +{ + LOG(Message) << "Computing tr(mag^n) for n even up to " << par().maxPow + << "..." << std::endl; + + std::vector result; + ResultWriter writer(RESULT_FILE_NAME(par().output)); + auto &phi = envGet(Field, par().field); + + auto m2 = sum(phi), mn = m2; + + m2 = -m2*m2; + mn = 1.; + for (unsigned int n = 2; n <= par().maxPow; n += 2) + { + Result r; + + mn = mn*m2; + r.op = "tr(mag^" + std::to_string(n) + ")"; + r.value = TensorRemove(trace(mn)).real(); + result.push_back(r); + } + write(writer, "trmag", result); +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MScalarSUN_TrMag_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index 00ef323f..cea4dc2a 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -39,6 +39,7 @@ modules_hpp =\ Modules/MScalar/ChargedProp.hpp \ Modules/MAction/DWF.hpp \ Modules/MAction/Wilson.hpp \ + Modules/MScalarSUN/TrMag.hpp \ Modules/MScalarSUN/TrPhi.hpp \ Modules/MIO/LoadNersc.hpp \ Modules/MIO/LoadBinary.hpp From d9d1f43ba208fed87fae930e182e4b6cd550da6f Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 10 Jan 2018 11:29:49 +0000 Subject: [PATCH 287/377] Hadrons: code cleaning --- extras/Hadrons/Modules.hpp | 28 ++++++++++++++++ extras/Hadrons/Modules/MScalarSUN/TrMag.hpp | 31 +++++++++++++++-- extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp | 37 ++++++++++++++++++--- 3 files changed, 89 insertions(+), 7 deletions(-) diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 1d059a79..eea16839 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -1,3 +1,31 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules.hpp + +Copyright (C) 2015-2018 + +Author: Antonin Portelli +Author: Lanny91 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ #include #include #include diff --git a/extras/Hadrons/Modules/MScalarSUN/TrMag.hpp b/extras/Hadrons/Modules/MScalarSUN/TrMag.hpp index f33784fa..96eb794e 100644 --- a/extras/Hadrons/Modules/MScalarSUN/TrMag.hpp +++ b/extras/Hadrons/Modules/MScalarSUN/TrMag.hpp @@ -1,3 +1,30 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MScalarSUN/TrMag.hpp + +Copyright (C) 2015-2018 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ #ifndef Hadrons_MScalarSUN_TrMag_hpp_ #define Hadrons_MScalarSUN_TrMag_hpp_ @@ -8,7 +35,7 @@ BEGIN_HADRONS_NAMESPACE /****************************************************************************** - * TrMag * + * Module to compute tr(mag^n) * ******************************************************************************/ BEGIN_MODULE_NAMESPACE(MScalarSUN) @@ -55,7 +82,7 @@ MODULE_REGISTER_NS(TrMagSU5, TTrMag>, MScalarSUN); MODULE_REGISTER_NS(TrMagSU6, TTrMag>, MScalarSUN); /****************************************************************************** - * TTrMag implementation * + * TTrMag implementation * ******************************************************************************/ // constructor ///////////////////////////////////////////////////////////////// template diff --git a/extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp b/extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp index 8c6bead7..4586663d 100644 --- a/extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp +++ b/extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp @@ -1,3 +1,30 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp + +Copyright (C) 2015-2018 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ #ifndef Hadrons_MScalarSUN_TrPhi_hpp_ #define Hadrons_MScalarSUN_TrPhi_hpp_ @@ -8,7 +35,7 @@ BEGIN_HADRONS_NAMESPACE /****************************************************************************** - * TrPhi * + * Module to compute tr(phi^n) * ******************************************************************************/ BEGIN_MODULE_NAMESPACE(MScalarSUN) @@ -32,7 +59,7 @@ public: public: GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::string, op, - Complex, value); + Real, value); }; public: // constructor @@ -58,7 +85,7 @@ MODULE_REGISTER_NS(TrPhiSU5, TTrPhi>, MScalarSUN); MODULE_REGISTER_NS(TrPhiSU6, TTrPhi>, MScalarSUN); /****************************************************************************** - * TTrPhi implementation * + * TTrPhi implementation * ******************************************************************************/ // constructor ///////////////////////////////////////////////////////////////// template @@ -128,8 +155,8 @@ void TTrPhi::execute(void) { Result r; - r.op = "phi" + std::to_string(n); - r.value = TensorRemove(sum(phin)); + r.op = "tr(phi^" + std::to_string(n) + ")"; + r.value = TensorRemove(sum(phin)).real(); result.push_back(r); } } From a3affac963b85fa2f8de4296c58b31eaf737279b Mon Sep 17 00:00:00 2001 From: David Preti Date: Wed, 10 Jan 2018 14:56:54 +0100 Subject: [PATCH 288/377] SU3 restored + output filename for mesons and baryons fixed. --- extras/Hadrons/Modules/MContraction/Baryon.hpp | 4 +++- extras/Hadrons/Modules/MContraction/Meson.hpp | 4 +++- extras/Hadrons/Modules/MGauge/Random.cc | 2 +- extras/Hadrons/Modules/MGauge/Unit.cc | 2 +- lib/qcd/QCD.h | 2 +- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/extras/Hadrons/Modules/MContraction/Baryon.hpp b/extras/Hadrons/Modules/MContraction/Baryon.hpp index 78bde5a2..358f7025 100644 --- a/extras/Hadrons/Modules/MContraction/Baryon.hpp +++ b/extras/Hadrons/Modules/MContraction/Baryon.hpp @@ -112,7 +112,9 @@ void TBaryon::execute(void) << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" << par().q3 << "'" << std::endl; - CorrWriter writer(par().output); + std::string output_name = par().output + "." + std::to_string(env().getTrajectory()); + + CorrWriter writer(output_name); PropagatorField1 &q1 = *env().template getObject(par().q1); PropagatorField2 &q2 = *env().template getObject(par().q2); PropagatorField3 &q3 = *env().template getObject(par().q2); diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 7810326a..5355bace 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -165,8 +165,10 @@ void TMeson::execute(void) LOG(Message) << "Computing meson contractions '" << getName() << "' using" << " quarks '" << par().q1 << "' and '" << par().q2 << "'" << std::endl; + + std::string output_name = par().output + "." + std::to_string(env().getTrajectory()); - CorrWriter writer(par().output); + CorrWriter writer(output_name); std::vector buf; std::vector result; Gamma g5(Gamma::Algebra::Gamma5); diff --git a/extras/Hadrons/Modules/MGauge/Random.cc b/extras/Hadrons/Modules/MGauge/Random.cc index 5c063361..c10fdfc3 100644 --- a/extras/Hadrons/Modules/MGauge/Random.cc +++ b/extras/Hadrons/Modules/MGauge/Random.cc @@ -65,5 +65,5 @@ void TRandom::execute(void) { LOG(Message) << "Generating random gauge configuration" << std::endl; LatticeGaugeField &U = *env().createLattice(getName()); - SU4::HotConfiguration(*env().get4dRng(), U); + SU3::HotConfiguration(*env().get4dRng(), U); } diff --git a/extras/Hadrons/Modules/MGauge/Unit.cc b/extras/Hadrons/Modules/MGauge/Unit.cc index 12696ee9..18d75c59 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.cc +++ b/extras/Hadrons/Modules/MGauge/Unit.cc @@ -65,5 +65,5 @@ void TUnit::execute(void) { LOG(Message) << "Creating unit gauge configuration" << std::endl; LatticeGaugeField &U = *env().createLattice(getName()); - SU4::ColdConfiguration(*env().get4dRng(), U); + SU3::ColdConfiguration(*env().get4dRng(), U); } diff --git a/lib/qcd/QCD.h b/lib/qcd/QCD.h index 2caea7e9..9c6d54d4 100644 --- a/lib/qcd/QCD.h +++ b/lib/qcd/QCD.h @@ -49,7 +49,7 @@ namespace QCD { static const int Zm = 6; static const int Tm = 7; - static const int Nc=4; + static const int Nc=3; static const int Ns=4; static const int Nd=4; static const int Nhs=2; // half spinor From cf858deb16a393270d4b14c702031aae50257da3 Mon Sep 17 00:00:00 2001 From: pretidav Date: Wed, 10 Jan 2018 18:43:02 +0100 Subject: [PATCH 289/377] Lanczos with 2 reps fixed (tobe tested) --- tests/lanczos/Test_WCMultiRep_lanczos.cc | 128 +++++++++++++++++------ 1 file changed, 95 insertions(+), 33 deletions(-) diff --git a/tests/lanczos/Test_WCMultiRep_lanczos.cc b/tests/lanczos/Test_WCMultiRep_lanczos.cc index e8549234..98180db1 100644 --- a/tests/lanczos/Test_WCMultiRep_lanczos.cc +++ b/tests/lanczos/Test_WCMultiRep_lanczos.cc @@ -32,8 +32,17 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -typedef WilsonFermionR FermionOp; -typedef typename WilsonFermionR::FermionField FermionField; + +//typedef WilsonCloverFermionR FermionOp; +//typedef typename WilsonFermionR::FermionField FermionField; + +typedef WilsonImplR FundImplPolicy; +typedef WilsonCloverFermionR FundFermionAction; +typedef typename FundFermionAction::FermionField FundFermionField; + +typedef WilsonTwoIndexAntiSymmetricImplR ASymmImplPolicy; +typedef WilsonCloverTwoIndexAntiSymmetricFermionR ASymmFermionAction; +typedef typename ASymmFermionAction::FermionField ASymmFermionField; RealD AllZero(RealD x) { return 0.; } @@ -60,49 +69,102 @@ int main(int argc, char** argv) { GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5); - LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4, Umu); + GridParallelRNG pRNG(UGrid); + GridSerialRNG sRNG; -/* - std::vector U(4, UGrid); - for (int mu = 0; mu < Nd; mu++) { - U[mu] = PeekIndex(Umu, mu); - } -*/ + FundamentalRepresentation::LatticeField Umu(UGrid); + + TwoIndexAntiSymmetricRepresentation HiRep(UGrid); + TwoIndexAntiSymmetricRepresentation::LatticeField UmuAS(UGrid); - RealD mass = -0.1; - RealD M5 = 1.8; - RealD mob_b = 1.5; - FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass); - MdagMLinearOperator HermOp(WilsonOperator); /// <----- - //SchurDiagTwoOperator HermOp(WilsonOperator); + + CheckpointerParameters CPparams; + + CPparams.config_prefix = "ckpoint_lat"; + CPparams.rng_prefix = "ckpoint_rng"; + CPparams.format = "IEEE64BIG"; - const int Nstop = 20; - const int Nk = 60; - const int Np = 60; +//NerscHmcCheckpointer Checkpoint(std::string("ckpoint_lat"), + // std::string("ckpoint_rng"), 1); + +NerscHmcCheckpointer Checkpoint(CPparams); + + int CNFGSTART=1; + int CNFGEND=2; + int CNFGSTEP=1; + + Real Fundmass = -0.1; + Real Fundcsw = 1.0; + Real ASmass = -0.1; + Real AScsw = 1.0; + + std::cout << "Fund: mass and csw" << Fundmass << " and " << Fundcsw << std::endl; + std::cout << "AS : mass and csw" << ASmass << " and " << AScsw << std::endl; + + const int Nstop = 30; + const int Nk = 40; + const int Np = 40; const int Nm = Nk + Np; const int MaxIt = 10000; - RealD resid = 1.0e-6; + RealD resid = 1.0e-8; - std::vector Coeffs{0, 1.}; - Polynomial PolyX(Coeffs); - Chebyshev Cheb(0.0, 10., 12); - ImplicitlyRestartedLanczos IRL(HermOp, PolyX, Nstop, Nk, Nm, + for (int cnfg=CNFGSTART;cnfg<=CNFGEND;cnfg+=CNFGSTEP){ + Checkpoint.CheckpointRestore(cnfg,Umu, sRNG, pRNG); + + //SU4::HotConfiguration(RNG4, Umu); // temporary, then read. + + HiRep.update_representation(Umu); + UmuAS = HiRep.U; + + FundFermionAction FundFermOp(Umu,*FGrid,*FrbGrid, Fundmass, Fundcsw, Fundcsw); + MdagMLinearOperator HermOpFund(FundFermOp); /// <----- + + ASymmFermionAction ASFermOp(UmuAS,*FGrid,*FrbGrid, ASmass, AScsw, AScsw); + MdagMLinearOperator HermOpAS(ASFermOp); /// <----- + + std::vector Coeffs{0, -1.}; + Polynomial FundPolyX(Coeffs); + Chebyshev FundCheb(0.0, 10., 12); + ImplicitlyRestartedLanczos IRL_Fund(HermOpFund, FundPolyX, Nstop, Nk, Nm, resid, MaxIt); + + Polynomial ASPolyX(Coeffs); + Chebyshev ASCheb(0.0, 10., 12); + ImplicitlyRestartedLanczos IRL_AS(HermOpAS, ASPolyX, Nstop, Nk, Nm, + resid, MaxIt); + + std::vector Fundeval(Nm); + std::vector ASeval(Nm); - std::vector eval(Nm); - FermionField src(FGrid); - gaussian(RNG5, src); - std::vector evec(Nm, FGrid); + FundFermionField Fundsrc(FGrid); + ASymmFermionField ASsrc(FGrid); + + gaussian(RNG5, Fundsrc); + gaussian(RNG5, ASsrc); + + std::vector Fundevec(Nm, FGrid); + std::vector ASevec(Nm, FGrid); + for (int i = 0; i < 1; i++) { - std::cout << i << " / " << Nm << " grid pointer " << evec[i]._grid + std::cout << i << " / " << Nm << "Fund: grid pointer " << Fundevec[i]._grid << std::endl; }; + for (int i = 0; i < 1; i++) { + std::cout << i << " / " << Nm << "AS: grid pointer " << ASevec[i]._grid + << std::endl; + }; + + int FundNconv, ASNconv; + IRL_Fund.calc(Fundeval, Fundevec, Fundsrc, FundNconv); + IRL_AS.calc(ASeval, ASevec, ASsrc, ASNconv); - int Nconv; - IRL.calc(eval, evec, src, Nconv); - - std::cout << eval << std::endl; + for (int i=0;i #include #include +#include #include #include #include diff --git a/extras/Hadrons/Modules/MScalarSUN/TwoPoint.hpp b/extras/Hadrons/Modules/MScalarSUN/TwoPoint.hpp new file mode 100644 index 00000000..abfbf609 --- /dev/null +++ b/extras/Hadrons/Modules/MScalarSUN/TwoPoint.hpp @@ -0,0 +1,184 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MScalarSUN/TwoPoint.hpp + +Copyright (C) 2015-2018 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef Hadrons_MScalarSUN_TwoPoint_hpp_ +#define Hadrons_MScalarSUN_TwoPoint_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * 2-pt functions for a given set of operators * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MScalarSUN) + +class TwoPointPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(TwoPointPar, + std::vector, op, + std::string, output); +}; + +template +class TTwoPoint: public Module +{ +public: + typedef typename SImpl::Field Field; + typedef typename SImpl::ComplexField ComplexField; + class Result: Serializable + { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(Result, + std::string, sink, + std::string, source, + std::vector, data); + }; +public: + // constructor + TTwoPoint(const std::string name); + // destructor + virtual ~TTwoPoint(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +private: + // make 2-pt function + template + std::vector makeTwoPoint(const std::vector &sink, + const std::vector &source); +}; + +MODULE_REGISTER_NS(TwoPointSU2, TTwoPoint>, MScalarSUN); +MODULE_REGISTER_NS(TwoPointSU3, TTwoPoint>, MScalarSUN); +MODULE_REGISTER_NS(TwoPointSU4, TTwoPoint>, MScalarSUN); +MODULE_REGISTER_NS(TwoPointSU5, TTwoPoint>, MScalarSUN); +MODULE_REGISTER_NS(TwoPointSU6, TTwoPoint>, MScalarSUN); + +/****************************************************************************** + * TTwoPoint implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TTwoPoint::TTwoPoint(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TTwoPoint::getInput(void) +{ + return par().op; +} + +template +std::vector TTwoPoint::getOutput(void) +{ + std::vector out = {}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TTwoPoint::setup(void) +{ + const unsigned int nt = env().getDim().back(); + envTmp(std::vector>, "slicedOp", 1, par().op.size(), + std::vector(nt)); +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TTwoPoint::execute(void) +{ + LOG(Message) << "Computing 2-point functions for operators:" << std::endl; + for (auto &o: par().op) + { + LOG(Message) << " '" << o << "'" << std::endl; + } + + ResultWriter writer(RESULT_FILE_NAME(par().output)); + const unsigned int nd = env().getDim().size(); + std::vector result; + + envGetTmp(std::vector>, slicedOp); + for (unsigned int i = 0; i < par().op.size(); ++i) + { + auto &op = envGet(ComplexField, par().op[i]); + + sliceSum(op, slicedOp[i], nd - 1); + } + for (unsigned int i = 0; i < par().op.size(); ++i) + for (unsigned int j = 0; j < par().op.size(); ++j) + { + Result r; + + r.sink = par().op[i]; + r.source = par().op[j]; + r.data = makeTwoPoint(slicedOp[i], slicedOp[j]); + result.push_back(r); + } + write(writer, "twopt", result); +} + +// make 2-pt function ////////////////////////////////////////////////////////// +template +template +std::vector TTwoPoint::makeTwoPoint( + const std::vector &sink, + const std::vector &source) +{ + assert(sink.size() == source.size()); + + unsigned int nt = sink.size(); + std::vector res(nt, 0.); + + for (unsigned int dt = 0; dt < nt; ++dt) + { + for (unsigned int t = 0; t < nt; ++t) + { + res[dt] += TensorRemove(trace(sink[(t+dt)%nt]*source[t])); + } + res[dt] *= 1./static_cast(nt); + } + + return res; +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MScalarSUN_TwoPoint_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index cea4dc2a..b1ccb8cc 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -40,6 +40,7 @@ modules_hpp =\ Modules/MAction/DWF.hpp \ Modules/MAction/Wilson.hpp \ Modules/MScalarSUN/TrMag.hpp \ + Modules/MScalarSUN/TwoPoint.hpp \ Modules/MScalarSUN/TrPhi.hpp \ Modules/MIO/LoadNersc.hpp \ Modules/MIO/LoadBinary.hpp From b7f8c5b823dbc0e0b22f7461bb7b7faf9fcc1e39 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 12 Jan 2018 14:38:27 +0000 Subject: [PATCH 291/377] Modify test to merge with the new Lanczos interface --- tests/lanczos/Test_WCMultiRep_lanczos.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/lanczos/Test_WCMultiRep_lanczos.cc b/tests/lanczos/Test_WCMultiRep_lanczos.cc index 98180db1..b6d69aee 100644 --- a/tests/lanczos/Test_WCMultiRep_lanczos.cc +++ b/tests/lanczos/Test_WCMultiRep_lanczos.cc @@ -124,13 +124,21 @@ NerscHmcCheckpointer Checkpoint(CPparams); std::vector Coeffs{0, -1.}; Polynomial FundPolyX(Coeffs); - Chebyshev FundCheb(0.0, 10., 12); - ImplicitlyRestartedLanczos IRL_Fund(HermOpFund, FundPolyX, Nstop, Nk, Nm, + //Chebyshev FundCheb(0.0, 10., 12); + + FunctionHermOp FundPolyXOp(FundPolyX,HermOpFund); + PlainHermOp FundOp (HermOpFund); + + ImplicitlyRestartedLanczos IRL_Fund(FundOp, FundPolyXOp, Nstop, Nk, Nm, resid, MaxIt); Polynomial ASPolyX(Coeffs); - Chebyshev ASCheb(0.0, 10., 12); - ImplicitlyRestartedLanczos IRL_AS(HermOpAS, ASPolyX, Nstop, Nk, Nm, + //Chebyshev ASCheb(0.0, 10., 12); + + FunctionHermOp ASPolyXOp(ASPolyX,HermOpAS); + PlainHermOp ASOp (HermOpAS); + + ImplicitlyRestartedLanczos IRL_AS(ASOp, ASPolyXOp, Nstop, Nk, Nm, resid, MaxIt); std::vector Fundeval(Nm); From b00d2d2c39f8fdc976446072faaa884e8596b777 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 17 Jan 2018 13:46:12 +0000 Subject: [PATCH 292/377] Correction of Representations compilation and small compilation error for Intel 17 --- lib/qcd/action/fermion/FermionOperatorImpl.h | 89 +++++++++++--------- lib/qcd/action/fermion/WilsonFermion.cc | 4 +- lib/qcd/action/fermion/WilsonFermion5D.cc | 4 +- lib/qcd/action/fermion/WilsonKernels.h | 8 +- lib/qcd/representations/adjoint.h | 1 + lib/qcd/representations/fundamental.h | 1 + lib/qcd/representations/two_index.h | 1 + 7 files changed, 58 insertions(+), 50 deletions(-) diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 2b900569..c21a07ee 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -164,6 +164,7 @@ namespace QCD { public: static const int Dimension = Representation::Dimension; + static const bool isFundamental = Representation::isFundamental; static const bool LsVectorised=false; static const int Nhcs = Options::Nhcs; @@ -298,27 +299,28 @@ namespace QCD { //////////////////////////////////////////////////////////////////////////////////// // Single flavour four spinors with colour index, 5d redblack //////////////////////////////////////////////////////////////////////////////////// -template -class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { +template +class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Representation::Dimension> > { public: - typedef PeriodicGaugeImpl > Gimpl; + typedef PeriodicGaugeImpl > Gimpl; INHERIT_GIMPL_TYPES(Gimpl); - static const int Dimension = Nrepresentation; + static const int Dimension = Representation::Dimension; + static const bool isFundamental = Representation::isFundamental; static const bool LsVectorised=true; static const int Nhcs = Options::Nhcs; typedef typename Options::_Coeff_t Coeff_t; typedef typename Options::template PrecisionMapper::LowerPrecVector SimdL; - template using iImplSpinor = iScalar, Ns> >; - template using iImplPropagator = iScalar, Ns> >; - template using iImplHalfSpinor = iScalar, Nhs> >; - template using iImplHalfCommSpinor = iScalar, Nhcs> >; - template using iImplDoubledGaugeField = iVector >, Nds>; - template using iImplGaugeField = iVector >, Nd>; - template using iImplGaugeLink = iScalar > >; + template using iImplSpinor = iScalar, Ns> >; + template using iImplPropagator = iScalar, Ns> >; + template using iImplHalfSpinor = iScalar, Nhs> >; + template using iImplHalfCommSpinor = iScalar, Nhcs> >; + template using iImplDoubledGaugeField = iVector >, Nds>; + template using iImplGaugeField = iVector >, Nd>; + template using iImplGaugeLink = iScalar > >; typedef iImplSpinor SiteSpinor; typedef iImplPropagator SitePropagator; @@ -354,8 +356,8 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres const SiteHalfSpinor &chi, int mu, StencilEntry *SE, StencilImpl &St) { SiteGaugeLink UU; - for (int i = 0; i < Nrepresentation; i++) { - for (int j = 0; j < Nrepresentation; j++) { + for (int i = 0; i < Dimension; i++) { + for (int j = 0; j < Dimension; j++) { vsplat(UU()()(i, j), U(mu)()(i, j)); } } @@ -367,8 +369,8 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres const SitePropagator &chi, int mu) { SiteGaugeLink UU; - for (int i = 0; i < Nrepresentation; i++) { - for (int j = 0; j < Nrepresentation; j++) { + for (int i = 0; i < Dimension; i++) { + for (int j = 0; j < Dimension; j++) { vsplat(UU()()(i, j), U(mu)()(i, j)); } } @@ -472,25 +474,26 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres //////////////////////////////////////////////////////////////////////////////////////// // Flavour doubled spinors; is Gparity the only? what about C*? //////////////////////////////////////////////////////////////////////////////////////// -template -class GparityWilsonImpl : public ConjugateGaugeImpl > { +template +class GparityWilsonImpl : public ConjugateGaugeImpl > { public: - static const int Dimension = Nrepresentation; + static const int Dimension = Representation::Dimension; + static const bool isFundamental = Representation::isFundamental; static const int Nhcs = Options::Nhcs; static const bool LsVectorised=false; - typedef ConjugateGaugeImpl< GaugeImplTypes > Gimpl; + typedef ConjugateGaugeImpl< GaugeImplTypes > Gimpl; INHERIT_GIMPL_TYPES(Gimpl); typedef typename Options::_Coeff_t Coeff_t; typedef typename Options::template PrecisionMapper::LowerPrecVector SimdL; - template using iImplSpinor = iVector, Ns>, Ngp>; - template using iImplPropagator = iVector, Ns>, Ngp>; - template using iImplHalfSpinor = iVector, Nhs>, Ngp>; - template using iImplHalfCommSpinor = iVector, Nhcs>, Ngp>; - template using iImplDoubledGaugeField = iVector >, Nds>, Ngp>; + template using iImplSpinor = iVector, Ns>, Ngp>; + template using iImplPropagator = iVector, Ns>, Ngp>; + template using iImplHalfSpinor = iVector, Nhs>, Ngp>; + template using iImplHalfCommSpinor = iVector, Nhcs>, Ngp>; + template using iImplDoubledGaugeField = iVector >, Nds>, Ngp>; typedef iImplSpinor SiteSpinor; typedef iImplPropagator SitePropagator; @@ -711,6 +714,7 @@ class StaggeredImpl : public PeriodicGaugeImpl > Gimpl; @@ -839,6 +843,7 @@ class StaggeredImpl : public PeriodicGaugeImpl > Gimpl; @@ -1033,29 +1038,29 @@ typedef WilsonImpl W typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplF; // Float typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplD; // Double -typedef DomainWallVec5dImpl DomainWallVec5dImplR; // Real.. whichever prec -typedef DomainWallVec5dImpl DomainWallVec5dImplF; // Float -typedef DomainWallVec5dImpl DomainWallVec5dImplD; // Double +typedef DomainWallVec5dImpl DomainWallVec5dImplR; // Real.. whichever prec +typedef DomainWallVec5dImpl DomainWallVec5dImplF; // Float +typedef DomainWallVec5dImpl DomainWallVec5dImplD; // Double -typedef DomainWallVec5dImpl DomainWallVec5dImplRL; // Real.. whichever prec -typedef DomainWallVec5dImpl DomainWallVec5dImplFH; // Float -typedef DomainWallVec5dImpl DomainWallVec5dImplDF; // Double +typedef DomainWallVec5dImpl DomainWallVec5dImplRL; // Real.. whichever prec +typedef DomainWallVec5dImpl DomainWallVec5dImplFH; // Float +typedef DomainWallVec5dImpl DomainWallVec5dImplDF; // Double -typedef DomainWallVec5dImpl ZDomainWallVec5dImplR; // Real.. whichever prec -typedef DomainWallVec5dImpl ZDomainWallVec5dImplF; // Float -typedef DomainWallVec5dImpl ZDomainWallVec5dImplD; // Double +typedef DomainWallVec5dImpl ZDomainWallVec5dImplR; // Real.. whichever prec +typedef DomainWallVec5dImpl ZDomainWallVec5dImplF; // Float +typedef DomainWallVec5dImpl ZDomainWallVec5dImplD; // Double -typedef DomainWallVec5dImpl ZDomainWallVec5dImplRL; // Real.. whichever prec -typedef DomainWallVec5dImpl ZDomainWallVec5dImplFH; // Float -typedef DomainWallVec5dImpl ZDomainWallVec5dImplDF; // Double +typedef DomainWallVec5dImpl ZDomainWallVec5dImplRL; // Real.. whichever prec +typedef DomainWallVec5dImpl ZDomainWallVec5dImplFH; // Float +typedef DomainWallVec5dImpl ZDomainWallVec5dImplDF; // Double -typedef GparityWilsonImpl GparityWilsonImplR; // Real.. whichever prec -typedef GparityWilsonImpl GparityWilsonImplF; // Float -typedef GparityWilsonImpl GparityWilsonImplD; // Double +typedef GparityWilsonImpl GparityWilsonImplR; // Real.. whichever prec +typedef GparityWilsonImpl GparityWilsonImplF; // Float +typedef GparityWilsonImpl GparityWilsonImplD; // Double -typedef GparityWilsonImpl GparityWilsonImplRL; // Real.. whichever prec -typedef GparityWilsonImpl GparityWilsonImplFH; // Float -typedef GparityWilsonImpl GparityWilsonImplDF; // Double +typedef GparityWilsonImpl GparityWilsonImplRL; // Real.. whichever prec +typedef GparityWilsonImpl GparityWilsonImplFH; // Float +typedef GparityWilsonImpl GparityWilsonImplDF; // Double typedef StaggeredImpl StaggeredImplR; // Real.. whichever prec typedef StaggeredImpl StaggeredImplF; // Float diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index 2336f4b6..dfaa6758 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -419,7 +419,7 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, conformable(_grid, q_in._grid); conformable(_grid, q_out._grid); Lattice> ph(_grid), coor(_grid); - Complex i(0.0,1.0); + ComplexD i(0.0,1.0); PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid); unsigned int tshift = (mu == Tp) ? 1 : 0; unsigned int LLt = GridDefaultLatt()[Tp]; @@ -431,7 +431,7 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, LatticeCoordinate(coor, mu); ph = ph + mom[mu]*coor*((1./(_grid->_fdimensions[mu]))); } - ph = exp((Real)(2*M_PI)*i*ph); + ph = exp((RealD)(2*M_PI)*i*ph); q_out = zero; LatticeInteger coords(_grid); diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 393ee7f3..3e58fed6 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -793,7 +793,7 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, Lattice> ph(FermionGrid()), coor(FermionGrid()); PropagatorField tmpFwd(FermionGrid()), tmpBwd(FermionGrid()), tmp(FermionGrid()); - Complex i(0.0, 1.0); + ComplexD i(0.0, 1.0); unsigned int tshift = (mu == Tp) ? 1 : 0; unsigned int LLs = q_in._grid->_rdimensions[0]; unsigned int LLt = GridDefaultLatt()[Tp]; @@ -806,7 +806,7 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, LatticeCoordinate(coor, nu + 1); ph = ph + mom[nu]*coor*((1./(_FourDimGrid->_fdimensions[nu]))); } - ph = exp((Real)(2*M_PI)*i*ph); + ph = exp((RealD)(2*M_PI)*i*ph); q_out = zero; LatticeInteger coords(_FourDimGrid); diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index ed8d6be9..2369c98d 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -55,7 +55,7 @@ template class WilsonKernels : public FermionOperator , public public: template - typename std::enable_if::type + typename std::enable_if::type DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) { @@ -99,7 +99,7 @@ public: } template - typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type + typename std::enable_if<(Impl::isFundamental==false || (Impl::isFundamental==true && Nc != 3)) && EnableBool, void>::type DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) { // no kernel choice @@ -116,7 +116,7 @@ public: } template - typename std::enable_if::type + typename std::enable_if::type DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) { @@ -161,7 +161,7 @@ public: } template - typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type + typename std::enable_if<(Impl::isFundamental==false || (Impl::isFundamental==true && Nc != 3)) && EnableBool,void>::type DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf, int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) { diff --git a/lib/qcd/representations/adjoint.h b/lib/qcd/representations/adjoint.h index 078d12a1..052cd7a8 100644 --- a/lib/qcd/representations/adjoint.h +++ b/lib/qcd/representations/adjoint.h @@ -23,6 +23,7 @@ class AdjointRep { typedef typename SU_Adjoint::LatticeAdjMatrix LatticeMatrix; typedef typename SU_Adjoint::LatticeAdjField LatticeField; static const int Dimension = ncolour * ncolour - 1; + static const bool isFundamental = false; LatticeField U; diff --git a/lib/qcd/representations/fundamental.h b/lib/qcd/representations/fundamental.h index db52d893..9f039a07 100644 --- a/lib/qcd/representations/fundamental.h +++ b/lib/qcd/representations/fundamental.h @@ -19,6 +19,7 @@ template class FundamentalRep { public: static const int Dimension = ncolour; + static const bool isFundamental = true; // typdef to be used by the Representations class in HMC to get the // types for the higher representation fields diff --git a/lib/qcd/representations/two_index.h b/lib/qcd/representations/two_index.h index 082a52a5..2c7e8b3a 100644 --- a/lib/qcd/representations/two_index.h +++ b/lib/qcd/representations/two_index.h @@ -29,6 +29,7 @@ class TwoIndexRep { typedef typename SU_TwoIndex::LatticeTwoIndexMatrix LatticeMatrix; typedef typename SU_TwoIndex::LatticeTwoIndexField LatticeField; static const int Dimension = ncolour * (ncolour + S) / 2; + static const bool isFundamental = false; LatticeField U; From 0e5f6262260eb6d97962d0ffa17cdf40c7ec8c45 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 22 Jan 2018 19:37:47 +0000 Subject: [PATCH 293/377] Hadrons: module for scalar operator divergence --- extras/Hadrons/Modules.hpp | 1 + extras/Hadrons/Modules/MScalarSUN/Div.hpp | 166 ++++++++++++++++++++++ extras/Hadrons/modules.inc | 1 + 3 files changed, 168 insertions(+) create mode 100644 extras/Hadrons/Modules/MScalarSUN/Div.hpp diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 5834fb3f..6e123660 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -55,6 +55,7 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include +#include #include #include #include diff --git a/extras/Hadrons/Modules/MScalarSUN/Div.hpp b/extras/Hadrons/Modules/MScalarSUN/Div.hpp new file mode 100644 index 00000000..6680cd79 --- /dev/null +++ b/extras/Hadrons/Modules/MScalarSUN/Div.hpp @@ -0,0 +1,166 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MScalarSUN/Div.hpp + +Copyright (C) 2015-2018 + +Author: Antonin Portelli + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef Hadrons_MScalarSUN_Div_hpp_ +#define Hadrons_MScalarSUN_Div_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * Div * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MScalarSUN) + +class DivPar: Serializable +{ +public: + GRID_SERIALIZABLE_ENUM(DiffType, undef, forward, 1, backward, 2, central, 3); + GRID_SERIALIZABLE_CLASS_MEMBERS(DivPar, + std::vector, op, + DiffType, type, + std::string, output); +}; + +template +class TDiv: public Module +{ +public: + typedef typename SImpl::Field Field; + typedef typename SImpl::ComplexField ComplexField; + class Result: Serializable + { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(Result, + DivPar::DiffType, type, + Complex, value); + }; +public: + // constructor + TDiv(const std::string name); + // destructor + virtual ~TDiv(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(DivSU2, TDiv>, MScalarSUN); +MODULE_REGISTER_NS(DivSU3, TDiv>, MScalarSUN); +MODULE_REGISTER_NS(DivSU4, TDiv>, MScalarSUN); +MODULE_REGISTER_NS(DivSU5, TDiv>, MScalarSUN); +MODULE_REGISTER_NS(DivSU6, TDiv>, MScalarSUN); + +/****************************************************************************** + * TDiv implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TDiv::TDiv(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TDiv::getInput(void) +{ + return par().op; +} + +template +std::vector TDiv::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TDiv::setup(void) +{ + if (par().op.size() != env().getNd()) + { + HADRON_ERROR(Size, "the number of components differs from number of dimensions"); + } + envCreateLat(ComplexField, getName()); +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TDiv::execute(void) +{ + const auto nd = env().getNd(); + + LOG(Message) << "Computing the " << par().type << " divergence of ["; + for (unsigned int mu = 0; mu < nd; ++mu) + { + std::cout << par().op[mu] << ((mu == nd - 1) ? "]" : ", "); + } + std::cout << std::endl; + + auto &div = envGet(ComplexField, getName()); + div = zero; + for (unsigned int mu = 0; mu < nd; ++mu) + { + auto &op = envGet(ComplexField, par().op[mu]); + switch(par().type) + { + case DivPar::DiffType::backward: + div += op - Cshift(op, mu, -1); + break; + case DivPar::DiffType::forward: + div += Cshift(op, mu, 1) - op; + break; + case DivPar::DiffType::central: + div += 0.5*(Cshift(op, mu, 1) - Cshift(op, mu, -1)); + break; + } + } + if (!par().output.empty()) + { + Result r; + ResultWriter writer(RESULT_FILE_NAME(par().output)); + + r.type = par().type; + r.value = TensorRemove(sum(div)); + write(writer, "div", r); + } +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MScalarSUN_Div_hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index b1ccb8cc..1c71301a 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -39,6 +39,7 @@ modules_hpp =\ Modules/MScalar/ChargedProp.hpp \ Modules/MAction/DWF.hpp \ Modules/MAction/Wilson.hpp \ + Modules/MScalarSUN/Div.hpp \ Modules/MScalarSUN/TrMag.hpp \ Modules/MScalarSUN/TwoPoint.hpp \ Modules/MScalarSUN/TrPhi.hpp \ From 6e3ce7423ef97f53987e768113e3fa4699a3e93e Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 22 Jan 2018 20:04:05 +0000 Subject: [PATCH 294/377] Hadrons: don't display module list at startup (too long) --- extras/Hadrons/Application.cc | 6 ------ 1 file changed, 6 deletions(-) diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index 7ba98ade..39c898bc 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -43,12 +43,6 @@ using namespace Hadrons; Application::Application(void) { initLogger(); - LOG(Message) << "Modules available:" << std::endl; - auto list = ModuleFactory::getInstance().getBuilderList(); - for (auto &m: list) - { - LOG(Message) << " " << m << std::endl; - } auto dim = GridDefaultLatt(), mpi = GridDefaultMpi(), loc(dim); locVol_ = 1; for (unsigned int d = 0; d < dim.size(); ++d) From cff3bae1557f90077ec67d56c96eb72f817bb273 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 25 Jan 2018 13:46:31 +0100 Subject: [PATCH 295/377] Adding support for general Nc in the benchmark outputs --- benchmarks/Benchmark_dwf.cc | 15 ++++++---- benchmarks/Benchmark_dwf_sweep.cc | 11 ++++--- benchmarks/Benchmark_gparity.cc | 6 ++-- benchmarks/Benchmark_wilson.cc | 13 ++++++-- benchmarks/Benchmark_wilson_sweep.cc | 45 ++++++++++++++++++++++------ 5 files changed, 66 insertions(+), 24 deletions(-) diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 73621bbe..1d9de772 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -48,7 +48,6 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); - std::cout< latt4 = GridDefaultLatt(); int Ls=16; @@ -57,6 +56,10 @@ int main (int argc, char ** argv) std::stringstream ss(argv[i+1]); ss >> Ls; } + GridLogLayout(); + + long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc); + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); @@ -187,7 +190,7 @@ int main (int argc, char ** argv) FGrid->Barrier(); double volume=Ls; for(int mu=0;muBarrier(); double volume=Ls; for(int mu=0;muBarrier(); double volume=Ls; for(int mu=0;mu & latt4, int Ls, int threads,int report ) GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc); std::vector seeds4({1,2,3,4}); std::vector seeds5({5,6,7,8}); @@ -196,7 +198,7 @@ void benchDw(std::vector & latt4, int Ls, int threads,int report ) if ( ! report ) { double volume=Ls; for(int mu=0;mu & latt4, int Ls, int threads,int report ) if(!report){ double volume=Ls; for(int mu=0;mu & latt4, int Ls, int threads,int report ) #define CHECK_SDW void benchsDw(std::vector & latt4, int Ls, int threads, int report ) { + long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc); GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); @@ -321,7 +324,7 @@ void benchsDw(std::vector & latt4, int Ls, int threads, int report ) Counter.Report(); } else { double volume=Ls; for(int mu=0;mu & latt4, int Ls, int threads, int report ) CounterSdw.Report(); } else { double volume=Ls; for(int mu=0;muBarrier(); double volume=Ls; for(int mu=0;muBarrier(); double volume=Ls; for(int mu=0;mu latt_size = GridDefaultLatt(); std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); @@ -61,10 +64,15 @@ int main (int argc, char ** argv) GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); - std::cout< seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); @@ -134,9 +142,10 @@ int main (int argc, char ** argv) Dw.Dhop(src,result,0); } double t1=usecond(); - double flops=1344*volume*ncall; + double flops=single_site_flops*volume*ncall; std::cout<()); WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params); - + + // Full operator + bench_wilson(src,result,Dw,volume,DaggerNo); + bench_wilson(src,result,Dw,volume,DaggerYes); + std::cout << "\t"; + // EO bench_wilson(src,result,Dw,volume,DaggerNo); bench_wilson(src,result,Dw,volume,DaggerYes); std::cout << std::endl; @@ -122,9 +132,26 @@ void bench_wilson ( int const dag ) { int ncall = 1000; + long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc); double t0 = usecond(); for(int i=0; i Date: Sat, 27 Jan 2018 10:59:55 +0100 Subject: [PATCH 296/377] Correcting an missing semicolumn in avx512 --- lib/simd/Grid_avx512.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index 85d27421..cce77a58 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -556,7 +556,7 @@ namespace Optimization { v3 = _mm256_add_epi32(v1, v2); v1 = _mm256_hadd_epi32(v3, v3); v2 = _mm256_hadd_epi32(v1, v1); - u1 = _mm256_castsi256_si128(v2) // upper half + u1 = _mm256_castsi256_si128(v2); // upper half u2 = _mm256_extracti128_si256(v2, 1); // lower half ret = _mm_add_epi32(u1, u2); return _mm_cvtsi128_si32(ret); From 655a69259a76b844ab06a2e78fbe8a0441dbf774 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Sun, 28 Jan 2018 17:02:46 +0100 Subject: [PATCH 297/377] Added support for GCC compilation for Skylake AVX512 --- configure.ac | 3 +++ lib/qcd/action/fermion/CayleyFermion5Dvec.cc | 4 ++-- lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc | 2 +- lib/qcd/action/fermion/MobiusEOFAFermionvec.cc | 2 +- lib/simd/Intel512avx.h | 2 +- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index 468d9d5f..3a6a2960 100644 --- a/configure.ac +++ b/configure.ac @@ -249,6 +249,9 @@ case ${ax_cv_cxx_compiler_vendor} in AVX512) AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; + SKL) + AC_DEFINE([AVX512],[1],[AVX512 intrinsics for SkyLake Xeon]) + SIMD_FLAGS='-march=skylake-avx512';; KNC) AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner]) SIMD_FLAGS='';; diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc index 653e6ab3..2b2eace7 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -469,7 +469,7 @@ void CayleyFermion5D::MooeeInternalAsm(const FermionField &psi, FermionFie } a0 = a0+incr; a1 = a1+incr; - a2 = a2+sizeof(Simd::scalar_type); + a2 = a2+sizeof(typename Simd::scalar_type); }} { int lexa = s1+LLs*site; @@ -701,7 +701,7 @@ void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionFi } a0 = a0+incr; a1 = a1+incr; - a2 = a2+sizeof(Simd::scalar_type); + a2 = a2+sizeof(typename Simd::scalar_type); }} { int lexa = s1+LLs*site; diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc b/lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc index 81ce448c..c95172a5 100644 --- a/lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc +++ b/lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc @@ -475,7 +475,7 @@ namespace QCD { } a0 = a0 + incr; a1 = a1 + incr; - a2 = a2 + sizeof(Simd::scalar_type); + a2 = a2 + sizeof(typename Simd::scalar_type); } } diff --git a/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc b/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc index c4eaf0f3..290ba158 100644 --- a/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc +++ b/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc @@ -853,7 +853,7 @@ namespace QCD { a0 = a0 + incr; a1 = a1 + incr; - a2 = a2 + sizeof(Simd::scalar_type); + a2 = a2 + sizeof(typename Simd::scalar_type); } } diff --git a/lib/simd/Intel512avx.h b/lib/simd/Intel512avx.h index 7b5964ad..def37b9b 100644 --- a/lib/simd/Intel512avx.h +++ b/lib/simd/Intel512avx.h @@ -79,7 +79,7 @@ Author: paboyle #define ZEND2f(Criir,Ciirr, tmp) "vshufps $0xb1," #Ciirr "," #Ciirr "," #tmp ";\n"\ "vsubps " #tmp "," #Ciirr "," #Criir"{%k7}" ";\n" -#define ZEND1d(Criir,Ciirr, tmp) "vshufpd $0x55," #Criir "," #Criir "," #tmp ";\n"\ +#define ZEND1d(Criir,Ciirr, tmp) "vshufpd $0x55," #Criir "," #Criir "," #tmp ";\n"\ "vaddps " #tmp "," #Criir "," #Criir"{%k6}" ";\n" #define ZEND2d(Criir,Ciirr, tmp) "vshufpd $0x55," #Ciirr "," #Ciirr "," #tmp ";\n"\ From fb24e3a7d24abb2bcdef4c85711ce0d25319a153 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 29 Jan 2018 11:11:45 +0100 Subject: [PATCH 298/377] Adding utilities for perf profiling --- benchmarks/Benchmark_wilson.cc | 24 +++++++++++- lib/util/Profiling.h | 72 ++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 lib/util/Profiling.h diff --git a/benchmarks/Benchmark_wilson.cc b/benchmarks/Benchmark_wilson.cc index d1499a76..754051f0 100644 --- a/benchmarks/Benchmark_wilson.cc +++ b/benchmarks/Benchmark_wilson.cc @@ -4,7 +4,7 @@ Source file: ./benchmarks/Benchmark_wilson.cc - Copyright (C) 2015 + Copyright (C) 2018 Author: Peter Boyle Author: paboyle @@ -32,6 +32,9 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; + +#include "Grid/util/Profiling.h" + template struct scal { d internal; @@ -45,6 +48,7 @@ struct scal { }; bool overlapComms = false; +bool perfProfiling = false; int main (int argc, char ** argv) { @@ -53,6 +57,9 @@ int main (int argc, char ** argv) if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ overlapComms = true; } + if( GridCmdOptionExists(argv,argv+argc,"--perf") ){ + perfProfiling = true; + } long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc); @@ -144,6 +151,21 @@ int main (int argc, char ** argv) double t1=usecond(); double flops=single_site_flops*volume*ncall; + if (perfProfiling){ + std::cout< + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ + +#ifndef GRID_PERF_PROFILING_H +#define GRID_PERF_PROFILING_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct System +{ + static void profile(const std::string& name,std::function body) { + std::string filename = name.find(".data") == std::string::npos ? (name + ".data") : name; + + // Launch profiler + pid_t pid; + std::stringstream s; + s << getpid(); + pid = fork(); + if (pid == 0) { + auto fd=open("/dev/null",O_RDWR); + dup2(fd,1); + dup2(fd,2); + exit(execl("/usr/bin/perf","perf","record","-o",filename.c_str(),"-p",s.str().c_str(),nullptr)); + } + + // Run body + body(); + + // Kill profiler + kill(pid,SIGINT); + waitpid(pid,nullptr,0); + } + + static void profile(std::function body) { + profile("perf.data",body); + } +}; + +#endif // GRID_PERF_PROFILING_H \ No newline at end of file From cd44e851f1021db5f895a4caf409c885b35d7bd9 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 30 Jan 2018 06:04:30 +0100 Subject: [PATCH 299/377] Fixing compilation error in FundtoHirep --- extras/Hadrons/Modules/MGauge/FundtoHirep.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extras/Hadrons/Modules/MGauge/FundtoHirep.cc b/extras/Hadrons/Modules/MGauge/FundtoHirep.cc index f15a3b7c..31c5a34d 100644 --- a/extras/Hadrons/Modules/MGauge/FundtoHirep.cc +++ b/extras/Hadrons/Modules/MGauge/FundtoHirep.cc @@ -57,7 +57,7 @@ std::vector TFundtoHirep::getOutput(void) template void TFundtoHirep::setup(void) { - env().template registerLattice(getName()); + envCreateLat(typename Rep::LatticeField, getName()); } // execution /////////////////////////////////////////////////////////////////// @@ -70,6 +70,6 @@ void TFundtoHirep::execute(void) Rep TargetRepresentation(U._grid); TargetRepresentation.update_representation(U); - typename Rep::LatticeField &URep = *env().template createLattice(getName()); + auto &URep = envGet(typename Rep::LatticeField, getName()); URep = TargetRepresentation.U; } From 53bffb83d453080fe5dd16fb5601d16a94997d87 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 30 Jan 2018 12:42:36 +0100 Subject: [PATCH 300/377] Updating README with new SKL target --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 13dd6996..5a92cdec 100644 --- a/README.md +++ b/README.md @@ -187,10 +187,11 @@ Alternatively, some CPU codenames can be directly used: | `` | Description | | ----------- | -------------------------------------- | | `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | +| `SKL` | [Intel Skylake with AVX512 extensions](https://ark.intel.com/products/codename/37572/Skylake#@server) | | `BGQ` | Blue Gene/Q | #### Notes: -- We currently support AVX512 only for the Intel compiler. Support for GCC and clang will appear in future versions of Grid when the AVX512 support within GCC and clang will be more advanced. +- We currently support AVX512 for the Intel compiler and GCC (SKL target). Support for clang will appear in future versions of Grid when the AVX512 support in the compiler will be more advanced. - For BG/Q only [bgclang](http://trac.alcf.anl.gov/projects/llvm-bgq) is supported. We do not presently plan to support more compilers for this platform. - BG/Q performances are currently rather poor. This is being investigated for future versions. - The vector size for the `GEN` target can be specified with the `configure` script option `--enable-gen-simd-width`. From f0fcdf75b5b7c6be03224a50b1157170e441b3b5 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 30 Jan 2018 12:44:20 +0100 Subject: [PATCH 301/377] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a92cdec..86506f52 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ Alternatively, some CPU codenames can be directly used: | `BGQ` | Blue Gene/Q | #### Notes: -- We currently support AVX512 for the Intel compiler and GCC (SKL target). Support for clang will appear in future versions of Grid when the AVX512 support in the compiler will be more advanced. +- We currently support AVX512 for the Intel compiler and GCC (KNL and SKL target). Support for clang will appear in future versions of Grid when the AVX512 support in the compiler will be more advanced. - For BG/Q only [bgclang](http://trac.alcf.anl.gov/projects/llvm-bgq) is supported. We do not presently plan to support more compilers for this platform. - BG/Q performances are currently rather poor. This is being investigated for future versions. - The vector size for the `GEN` target can be specified with the `configure` script option `--enable-gen-simd-width`. From 896f3a8002b3116380e2293cf3ecca350c34ce5d Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 1 Feb 2018 18:51:51 +0000 Subject: [PATCH 302/377] Fix to MPI for Hokusai system --- lib/communicator/SharedMemoryMPI.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/communicator/SharedMemoryMPI.cc b/lib/communicator/SharedMemoryMPI.cc index d7bd7c65..2a62b7ac 100644 --- a/lib/communicator/SharedMemoryMPI.cc +++ b/lib/communicator/SharedMemoryMPI.cc @@ -182,6 +182,7 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors, #ifdef GRID_MPI3_SHMMMAP void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) { + std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "< &processors, ////////////////////////////////////////////////////////////////////////////////////////////////////// SetCommunicator(comm_split); + // Free the temp communicator + MPI_Comm_free(&comm_split); + if(0){ std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl; for(int d=0;d &processors, void CartesianCommunicator::InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base) { + //////////////////////////////////////////////////// + // Creates communicator, and the communicator_halo + //////////////////////////////////////////////////// _ndimension = processors.size(); _processor_coor.resize(_ndimension); From 7b8b2731e702838e3b5696faca6746f5f8157d02 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 12 Feb 2018 16:06:31 +0000 Subject: [PATCH 304/377] Conj error for complex coeffs --- lib/qcd/action/fermion/CayleyFermion5D.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index eace6484..e053b98c 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -73,7 +73,7 @@ void CayleyFermion5D::DminusDag(const FermionField &psi, FermionField &chi this->DW(psi,tmp_f,DaggerYes); for(int s=0;s Date: Tue, 13 Feb 2018 02:08:49 +0000 Subject: [PATCH 305/377] INterface to suit hadrons on Lanczos --- .../iterative/ImplicitlyRestartedLanczos.h | 7 + .../iterative/LocalCoherenceLanczos.h | 187 ++++++++++++------ tests/debug/Test_cayley_coarsen_support.cc | 3 +- tests/debug/Test_cayley_ldop_cr.cc | 3 +- .../Test_dwf_compressed_lanczos_reorg.cc | 14 +- 5 files changed, 143 insertions(+), 71 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 7b85c095..b4fca33a 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -181,6 +181,13 @@ enum IRLdiagonalisation { template class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester { public: + + static void Deflate(const std::vector &_v, + const std::vector& eval, + const Field& src_orig,Field& result) { + basisDeflate(_v,eval,src_orig,result); + } + LinearFunction &_HermOp; ImplicitlyRestartedLanczosHermOpTester(LinearFunction &HermOp) : _HermOp(HermOp) { }; int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox) diff --git a/lib/algorithms/iterative/LocalCoherenceLanczos.h b/lib/algorithms/iterative/LocalCoherenceLanczos.h index d5d1bbc2..c530a572 100644 --- a/lib/algorithms/iterative/LocalCoherenceLanczos.h +++ b/lib/algorithms/iterative/LocalCoherenceLanczos.h @@ -70,21 +70,24 @@ public: typedef Lattice FineField; LinearOperatorBase &_Linop; - Aggregation &_Aggregate; + std::vector &subspace; - ProjectedHermOp(LinearOperatorBase& linop, Aggregation &aggregate) : - _Linop(linop), - _Aggregate(aggregate) { }; + ProjectedHermOp(LinearOperatorBase& linop, std::vector & _subspace) : + _Linop(linop), subspace(_subspace) + { + assert(subspace.size() >0); + }; void operator()(const CoarseField& in, CoarseField& out) { + GridBase *FineGrid = subspace[0]._grid; + int checkerboard = subspace[0].checkerboard; + + FineField fin (FineGrid); fin.checkerboard= checkerboard; + FineField fout(FineGrid); fout.checkerboard = checkerboard; - GridBase *FineGrid = _Aggregate.FineGrid; - FineField fin(FineGrid); - FineField fout(FineGrid); - - _Aggregate.PromoteFromSubspace(in,fin); std::cout< & _poly; LinearOperatorBase &_Linop; - Aggregation &_Aggregate; + std::vector &subspace; - ProjectedFunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop, - Aggregation &aggregate) : + ProjectedFunctionHermOp(OperatorFunction & poly, + LinearOperatorBase& linop, + std::vector & _subspace) : _poly(poly), _Linop(linop), - _Aggregate(aggregate) { }; + subspace(_subspace) + { }; void operator()(const CoarseField& in, CoarseField& out) { - - GridBase *FineGrid = _Aggregate.FineGrid; - - FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard; - FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; - _Aggregate.PromoteFromSubspace(in,fin); std::cout< & _Poly; OperatorFunction & _smoother; LinearOperatorBase &_Linop; - Aggregation &_Aggregate; - RealD _coarse_relax_tol; + RealD _coarse_relax_tol; + std::vector &_subspace; + ImplicitlyRestartedLanczosSmoothedTester(LinearFunction &Poly, OperatorFunction &smoother, LinearOperatorBase &Linop, - Aggregation &Aggregate, + std::vector &subspace, RealD coarse_relax_tol=5.0e3) - : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { }; + : _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace), + _coarse_relax_tol(coarse_relax_tol) + { }; int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) { CoarseField v(B); RealD eval_poly = eval; + // Apply operator _Poly(B,v); @@ -168,14 +178,13 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc } int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) { - GridBase *FineGrid = _Aggregate.FineGrid; - - int checkerboard = _Aggregate.checkerboard; - + GridBase *FineGrid = _subspace[0]._grid; + int checkerboard = _subspace[0].checkerboard; FineField fB(FineGrid);fB.checkerboard =checkerboard; FineField fv(FineGrid);fv.checkerboard =checkerboard; - _Aggregate.PromoteFromSubspace(B,fv); + blockPromote(B,fv,_subspace); + _smoother(_Linop,fv,fB); RealD eval_poly = eval; @@ -217,27 +226,80 @@ protected: int _checkerboard; LinearOperatorBase & _FineOp; - // FIXME replace Aggregation with vector of fine; the code reuse is too small for - // the hassle and complexity of cross coupling. - Aggregation _Aggregate; - std::vector evals_fine; - std::vector evals_coarse; - std::vector evec_coarse; + std::vector &evals_fine; + std::vector &evals_coarse; + std::vector &subspace; + std::vector &evec_coarse; + +private: + std::vector _evals_fine; + std::vector _evals_coarse; + std::vector _subspace; + std::vector _evec_coarse; + public: + static void Deflate(std::vector subspace, + std::vector evec_coarse, + std::vector eval_coarse, + const FineField& src_orig,FineField& result) + { + int N = (int)evec_coarse.size(); + CoarseField src_coarse(evec_coarse[0]._grid); + CoarseField res_coarse(evec_coarse[0]._grid); res_coarse = zero; + blockProject(src_orig,src_coarse,subspace); + for (int i=0;i &FineOp, - int checkerboard) : + GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard) : _CoarseGrid(CoarseGrid), _FineGrid(FineGrid), - _Aggregate(CoarseGrid,FineGrid,checkerboard), _FineOp(FineOp), - _checkerboard(checkerboard) + _checkerboard(checkerboard), + evals_fine (_evals_fine), + evals_coarse(_evals_coarse), + subspace (_subspace), + evec_coarse(_evec_coarse) { evals_fine.resize(0); evals_coarse.resize(0); }; - void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } + ////////////////////////////////////////////////////////////////////////// + // Alternate constructore, external storage for use by Hadrons module + ////////////////////////////////////////////////////////////////////////// + LocalCoherenceLanczos(GridBase *FineGrid, + GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard, + std::vector &ext_subspace, + std::vector &ext_coarse, + std::vector &ext_eval_fine, + std::vector &ext_eval_coarse + ) : + _CoarseGrid(CoarseGrid), + _FineGrid(FineGrid), + _FineOp(FineOp), + _checkerboard(checkerboard), + evals_fine (ext_eval_fine), + evals_coarse(ext_eval_coarse), + subspace (ext_subspace), + evec_coarse (ext_coarse) + { + evals_fine.resize(0); + evals_coarse.resize(0); + }; + + void Orthogonalise(void ) { + CoarseScalar InnerProd(_CoarseGrid); + blockOrthogonalise(InnerProd,subspace);std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"< static RealD normalise(T& v) { @@ -246,43 +308,44 @@ public: v = v * (1.0/nn); return nn; } - + /* void fakeFine(void) { int Nk = nbasis; - _Aggregate.subspace.resize(Nk,_FineGrid); - _Aggregate.subspace[0]=1.0; - _Aggregate.subspace[0].checkerboard=_checkerboard; - normalise(_Aggregate.subspace[0]); + subspace.resize(Nk,_FineGrid); + subspace[0]=1.0; + subspace[0].checkerboard=_checkerboard; + normalise(subspace[0]); PlainHermOp Op(_FineOp); for(int k=1;k Op(_FineOp); ImplicitlyRestartedLanczosHermOpTester SimpleTester(Op); for(int k=0;k ChebySmooth(cheby_smooth); - ProjectedFunctionHermOp ChebyOp (ChebySmooth,_FineOp,_Aggregate); - ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); + ProjectedFunctionHermOp ChebyOp (ChebySmooth,_FineOp,_subspace); + ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax); for(int k=0;k Op(_FineOp); evals_fine.resize(Nm); - _Aggregate.subspace.resize(Nm,_FineGrid); + subspace.resize(Nm,_FineGrid); ImplicitlyRestartedLanczos IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; int Nconv; - IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false); + IRL.calc(evals_fine,subspace,src,Nconv,false); // Shrink down to number saved assert(Nstop>=nbasis); assert(Nconv>=nbasis); evals_fine.resize(nbasis); - _Aggregate.subspace.resize(nbasis,_FineGrid); + subspace.resize(nbasis,_FineGrid); } void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, int Nstop, int Nk, int Nm,RealD resid, RealD MaxIt, RealD betastp, int MinRes) { Chebyshev Cheby(cheby_op); - ProjectedHermOp Op(_FineOp,_Aggregate); - ProjectedFunctionHermOp ChebyOp (Cheby,_FineOp,_Aggregate); + ProjectedHermOp Op(_FineOp,_subspace); + ProjectedFunctionHermOp ChebyOp (Cheby,_FineOp,_subspace); ////////////////////////////////////////////////////////////////////////////////////////////////// // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL ////////////////////////////////////////////////////////////////////////////////////////////////// Chebyshev ChebySmooth(cheby_smooth); - ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); + ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_subspace,relax); evals_coarse.resize(Nm); evec_coarse.resize(Nm,_CoarseGrid); diff --git a/tests/debug/Test_cayley_coarsen_support.cc b/tests/debug/Test_cayley_coarsen_support.cc index c6532a0d..f57823e5 100644 --- a/tests/debug/Test_cayley_coarsen_support.cc +++ b/tests/debug/Test_cayley_coarsen_support.cc @@ -111,6 +111,7 @@ int main (int argc, char ** argv) std::cout< subspace(nbasis,FGrid); @@ -119,7 +120,7 @@ int main (int argc, char ** argv) MdagMLinearOperator HermDefOp(Ddwf); typedef Aggregation Subspace; - Subspace Aggregates(Coarse5d,FGrid); + Subspace Aggregates(Coarse5d,FGrid,cb); Aggregates.CreateSubspaceRandom(RNG5); subspace=Aggregates.subspace; diff --git a/tests/debug/Test_cayley_ldop_cr.cc b/tests/debug/Test_cayley_ldop_cr.cc index cbefdd46..c6005fd0 100644 --- a/tests/debug/Test_cayley_ldop_cr.cc +++ b/tests/debug/Test_cayley_ldop_cr.cc @@ -78,6 +78,7 @@ int main (int argc, char ** argv) RealD mass=0.1; RealD M5=1.5; + int cb=0; std::cout< HermDefOp(Ddwf); - Subspace Aggregates(Coarse5d,FGrid); + Subspace Aggregates(Coarse5d,FGrid,cb); Aggregates.CreateSubspace(RNG5,HermDefOp); diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index 4c702a33..3dff4b90 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -56,12 +56,12 @@ public: void checkpointFine(std::string evecs_file,std::string evals_file) { - assert(this->_Aggregate.subspace.size()==nbasis); + assert(this->subspace.size()==nbasis); emptyUserRecord record; Grid::QCD::ScidacWriter WR; WR.open(evecs_file); for(int k=0;k_Aggregate.subspace[k],record); + WR.writeScidacFieldRecord(this->subspace[k],record); } WR.close(); @@ -72,7 +72,7 @@ public: void checkpointFineRestore(std::string evecs_file,std::string evals_file) { this->evals_fine.resize(nbasis); - this->_Aggregate.subspace.resize(nbasis,this->_FineGrid); + this->subspace.resize(nbasis,this->_FineGrid); std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<_Aggregate.subspace[k].checkerboard=this->_checkerboard; - RD.readScidacFieldRecord(this->_Aggregate.subspace[k],record); + this->subspace[k].checkerboard=this->_checkerboard; + RD.readScidacFieldRecord(this->subspace[k],record); } RD.close(); @@ -221,7 +221,9 @@ int main (int argc, char ** argv) { std::cout << GridLogIRL<<"Checkpointing Fine evecs"< Date: Tue, 13 Feb 2018 02:11:37 +0000 Subject: [PATCH 306/377] Move deflate to right class --- .../iterative/ImplicitlyRestartedLanczos.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index b4fca33a..7d5a1889 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -182,12 +182,6 @@ template class ImplicitlyRestartedLanczosHermOpTester : public Imp { public: - static void Deflate(const std::vector &_v, - const std::vector& eval, - const Field& src_orig,Field& result) { - basisDeflate(_v,eval,src_orig,result); - } - LinearFunction &_HermOp; ImplicitlyRestartedLanczosHermOpTester(LinearFunction &HermOp) : _HermOp(HermOp) { }; int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox) @@ -250,6 +244,13 @@ class ImplicitlyRestartedLanczos { ///////////////////////// public: + + static void Deflate(const std::vector &_v, + const std::vector& eval, + const Field& src_orig,Field& result) { + basisDeflate(_v,eval,src_orig,result); + } + ////////////////////////////////////////////////////////////////// // PAB: ////////////////////////////////////////////////////////////////// From c96483e3bd559ab4a20c12102534c37447179b4c Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 13 Feb 2018 11:39:07 +0000 Subject: [PATCH 307/377] Whitespace only change --- lib/algorithms/iterative/LocalCoherenceLanczos.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/algorithms/iterative/LocalCoherenceLanczos.h b/lib/algorithms/iterative/LocalCoherenceLanczos.h index c530a572..4c05f4c7 100644 --- a/lib/algorithms/iterative/LocalCoherenceLanczos.h +++ b/lib/algorithms/iterative/LocalCoherenceLanczos.h @@ -28,7 +28,9 @@ Author: paboyle /* END LEGAL */ #ifndef GRID_LOCAL_COHERENCE_IRL_H #define GRID_LOCAL_COHERENCE_IRL_H + namespace Grid { + struct LanczosParams : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, From e30a80a2340275774e464b5ce7b328f0ece84b44 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Thu, 15 Feb 2018 17:13:36 +0000 Subject: [PATCH 308/377] Relaxed constraints on MPI thread mode when not using multiple comms threads --- lib/communicator/Communicator_mpi3.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 6732dcdf..eb0144f0 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -44,7 +44,10 @@ void CartesianCommunicator::Init(int *argc, char ***argv) MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); - assert (provided == MPI_THREAD_MULTIPLE); + //If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE + if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) || + (nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) ) + assert(0); } Grid_quiesce_nodes(); From 945684c470845d826fdbb8511ddf098a90779188 Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 20 Feb 2018 14:28:38 +0000 Subject: [PATCH 309/377] updates for deflation in the RB solver --- lib/algorithms/Algorithms.h | 1 + .../iterative/ImplicitlyRestartedLanczos.h | 19 ----------- .../iterative/LocalCoherenceLanczos.h | 16 +-------- lib/algorithms/iterative/SchurRedBlack.h | 33 ++++++++++++++++--- 4 files changed, 30 insertions(+), 39 deletions(-) diff --git a/lib/algorithms/Algorithms.h b/lib/algorithms/Algorithms.h index 070a1019..ef147c53 100644 --- a/lib/algorithms/Algorithms.h +++ b/lib/algorithms/Algorithms.h @@ -39,6 +39,7 @@ Author: Peter Boyle #include #include +#include #include #include #include diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 7d5a1889..787cf15a 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -149,19 +149,6 @@ void basisSortInPlace(std::vector & _v,std::vector& sort_vals, boo basisReorderInPlace(_v,sort_vals,idx); } -// PAB: faster to compute the inner products first then fuse loops. -// If performance critical can improve. -template -void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { - result = zero; - assert(_v.size()==eval.size()); - int N = (int)_v.size(); - for (int i=0;i &_v, - const std::vector& eval, - const Field& src_orig,Field& result) { - basisDeflate(_v,eval,src_orig,result); - } - ////////////////////////////////////////////////////////////////// // PAB: ////////////////////////////////////////////////////////////////// diff --git a/lib/algorithms/iterative/LocalCoherenceLanczos.h b/lib/algorithms/iterative/LocalCoherenceLanczos.h index 4c05f4c7..b8348c0c 100644 --- a/lib/algorithms/iterative/LocalCoherenceLanczos.h +++ b/lib/algorithms/iterative/LocalCoherenceLanczos.h @@ -31,6 +31,7 @@ Author: paboyle namespace Grid { + struct LanczosParams : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, @@ -240,21 +241,6 @@ private: std::vector _evec_coarse; public: - static void Deflate(std::vector subspace, - std::vector evec_coarse, - std::vector eval_coarse, - const FineField& src_orig,FineField& result) - { - int N = (int)evec_coarse.size(); - CoarseField src_coarse(evec_coarse[0]._grid); - CoarseField res_coarse(evec_coarse[0]._grid); res_coarse = zero; - blockProject(src_orig,src_coarse,subspace); - for (int i=0;i - void operator() (Matrix & _Matrix,const Field &in, Field &out){ + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + template + void operator() (Matrix & _Matrix,const Field &in, Field &out, Guesser &guess){ // FIXME CGdiagonalMee not implemented virtual function // FIXME use CBfactorise to control schur decomp @@ -129,7 +134,6 @@ namespace Grid { pickCheckerboard(Odd ,src_o,in); pickCheckerboard(Even,sol_e,out); pickCheckerboard(Odd ,sol_o,out); - std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" < - void operator() (Matrix & _Matrix,const Field &in, Field &out){ + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + template + void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ // FIXME CGdiagonalMee not implemented virtual function // FIXME use CBfactorise to control schur decomp @@ -225,6 +235,7 @@ namespace Grid { // Call the red-black solver ////////////////////////////////////////////////////////////// std::cout< - void operator() (Matrix & _Matrix,const Field &in, Field &out){ + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + template + void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ // FIXME CGdiagonalMee not implemented virtual function // FIXME use CBfactorise to control schur decomp @@ -305,6 +321,7 @@ namespace Grid { ////////////////////////////////////////////////////////////// std::cout< - void operator() (Matrix & _Matrix,const Field &in, Field &out){ + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + template + void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ // FIXME CGdiagonalMee not implemented virtual function // FIXME use CBfactorise to control schur decomp @@ -385,6 +407,7 @@ namespace Grid { std::cout< Date: Tue, 20 Feb 2018 14:29:08 +0000 Subject: [PATCH 310/377] Deflation interface for solvers --- lib/algorithms/iterative/Deflation.h | 101 +++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 lib/algorithms/iterative/Deflation.h diff --git a/lib/algorithms/iterative/Deflation.h b/lib/algorithms/iterative/Deflation.h new file mode 100644 index 00000000..b6aa0d3d --- /dev/null +++ b/lib/algorithms/iterative/Deflation.h @@ -0,0 +1,101 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_DEFLATION_H +#define GRID_DEFLATION_H + +namespace Grid { + +struct ZeroGuesser { +public: + template + void operator()(const Field &src,Field &guess) { guess = Zero(); }; +}; +struct SourceGuesser { +public: + template + void operator()(const Field &src,Field &guess) { guess = src; }; +}; + +//////////////////////////////// +// Fine grid deflation +//////////////////////////////// +template +struct DeflatedGuesser { +private: + const std::vector &evec; + const std::vector &eval; + +public: + + DeflatedGuesser(const std::vector & _evec,const std::vector & _eval) : evec(_evec), eval(_eval) {}; + + void operator()(const Field &src,Field &guess) { + guess = zero; + assert(evec.size()==eval.size()); + auto N = evec.size(); + for (int i=0;i +class LocalCoherenceDeflatedGuesser { +private: + const std::vector &subspace; + const std::vector &evec_coarse; + const std::vector &eval_coarse; +public: + + LocalCoherenceDeflatedGuesser(const std::vector &_subspace, + const std::vector &_evec_coarse, + const std::vector &_eval_coarse) + : subspace(_subspace), + evec_coarse(_evec_coarse), + eval_coarse(_eval_coarse) + { + } + + void operator()(const FineField &src,FineField &guess) { + int N = (int)evec_coarse.size(); + CoarseField src_coarse(evec_coarse[0]._grid); + CoarseField guess_coarse(evec_coarse[0]._grid); guess_coarse = zero; + blockProject(src,src_coarse,subspace); + for (int i=0;i Date: Tue, 20 Feb 2018 15:12:31 +0000 Subject: [PATCH 311/377] Extra communicator free that I had missed. Hard to audit them all as this is complex --- lib/communicator/Communicator_mpi3.cc | 12 ++++++++++-- lib/communicator/SharedMemory.h | 1 + lib/communicator/SharedMemoryMPI.cc | 4 ++++ lib/communicator/SharedMemoryNone.cc | 2 ++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index eb0144f0..424b7973 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -89,10 +89,16 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &c CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { MPI_Comm optimal_comm; - GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine + //////////////////////////////////////////////////// + // Remap using the shared memory optimising routine + // The remap creates a comm which must be freed + //////////////////////////////////////////////////// + GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); InitFromMPICommunicator(processors,optimal_comm); SetCommunicator(optimal_comm); + /////////////////////////////////////////////////// // Free the temp communicator + /////////////////////////////////////////////////// MPI_Comm_free(&optimal_comm); } @@ -202,8 +208,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, // Take the right SHM buffers ////////////////////////////////////////////////////////////////////////////////////////////////////// SetCommunicator(comm_split); - + + /////////////////////////////////////////////// // Free the temp communicator + /////////////////////////////////////////////// MPI_Comm_free(&comm_split); if(0){ diff --git a/lib/communicator/SharedMemory.h b/lib/communicator/SharedMemory.h index 0f647dc6..9f6b1a25 100644 --- a/lib/communicator/SharedMemory.h +++ b/lib/communicator/SharedMemory.h @@ -133,6 +133,7 @@ class SharedMemory public: SharedMemory() {}; + ~SharedMemory(); /////////////////////////////////////////////////////////////////////////////////////// // set the buffers & sizes /////////////////////////////////////////////////////////////////////////////////////// diff --git a/lib/communicator/SharedMemoryMPI.cc b/lib/communicator/SharedMemoryMPI.cc index 2a62b7ac..9e5d8f15 100644 --- a/lib/communicator/SharedMemoryMPI.cc +++ b/lib/communicator/SharedMemoryMPI.cc @@ -399,5 +399,9 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) return (void *) remote; } } +SharedMemory::~SharedMemory() +{ + MPI_Comm_free(&ShmComm); +}; } diff --git a/lib/communicator/SharedMemoryNone.cc b/lib/communicator/SharedMemoryNone.cc index 7feed7e4..a23e3c1c 100644 --- a/lib/communicator/SharedMemoryNone.cc +++ b/lib/communicator/SharedMemoryNone.cc @@ -122,5 +122,7 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) { return NULL; } +SharedMemory::~SharedMemory() +{}; } From 2e88408f5ce1bc1ba4052be07c4b1e94f0a99f5a Mon Sep 17 00:00:00 2001 From: Fionn O hOgain Date: Fri, 2 Mar 2018 22:27:41 +0000 Subject: [PATCH 312/377] Some changes needed for deflation interface --- lib/algorithms/iterative/Deflation.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/algorithms/iterative/Deflation.h b/lib/algorithms/iterative/Deflation.h index b6aa0d3d..b2239c55 100644 --- a/lib/algorithms/iterative/Deflation.h +++ b/lib/algorithms/iterative/Deflation.h @@ -59,7 +59,7 @@ public: assert(evec.size()==eval.size()); auto N = evec.size(); for (int i=0;i Date: Mon, 5 Mar 2018 12:22:18 +0000 Subject: [PATCH 313/377] Finalize protection --- lib/communicator/SharedMemoryMPI.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/communicator/SharedMemoryMPI.cc b/lib/communicator/SharedMemoryMPI.cc index 9e5d8f15..45edbb07 100644 --- a/lib/communicator/SharedMemoryMPI.cc +++ b/lib/communicator/SharedMemoryMPI.cc @@ -401,7 +401,10 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) } SharedMemory::~SharedMemory() { - MPI_Comm_free(&ShmComm); + int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised); + if ( !MPI_is_finalised ) { + MPI_Comm_free(&ShmComm); + } }; } From c399c2b44dea7e6cc4ca6ee34adcd1a86b07c338 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 5 Mar 2018 12:55:41 +0000 Subject: [PATCH 314/377] Guido broke the charge conjugate plaquette action with premature optimisation. This sector of the code does not matter for anything other than Guido's quenched HMC studies, and any plaq specific optimisations should be retained in a private branch instead of destroying the code simplicity. --- lib/qcd/action/gauge/WilsonGaugeAction.h | 12 ++++-------- lib/qcd/utils/WilsonLoops.h | 5 +++-- tests/forces/Test_gp_rect_force.cc | 4 ++-- tests/forces/Test_gpwilson_force.cc | 2 +- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/lib/qcd/action/gauge/WilsonGaugeAction.h b/lib/qcd/action/gauge/WilsonGaugeAction.h index 1ea780b7..77c2424c 100644 --- a/lib/qcd/action/gauge/WilsonGaugeAction.h +++ b/lib/qcd/action/gauge/WilsonGaugeAction.h @@ -71,18 +71,14 @@ class WilsonGaugeAction : public Action { RealD factor = 0.5 * beta / RealD(Nc); - //GaugeLinkField Umu(U._grid); + GaugeLinkField Umu(U._grid); GaugeLinkField dSdU_mu(U._grid); for (int mu = 0; mu < Nd; mu++) { - //Umu = PeekIndex(U, mu); + Umu = PeekIndex(U, mu); // Staple in direction mu - //WilsonLoops::Staple(dSdU_mu, U, mu); - //dSdU_mu = Ta(Umu * dSdU_mu) * factor; - - - WilsonLoops::StapleMult(dSdU_mu, U, mu); - dSdU_mu = Ta(dSdU_mu) * factor; + WilsonLoops::Staple(dSdU_mu, U, mu); + dSdU_mu = Ta(Umu * dSdU_mu) * factor; PokeIndex(dSdU, dSdU_mu, mu); } diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index cdd76ecc..6cf34e0c 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -212,6 +212,7 @@ public: // For the force term +/* static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { GridBase *grid = Umu._grid; std::vector U(Nd, grid); @@ -225,7 +226,7 @@ static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { for (int nu = 0; nu < Nd; nu++) { if (nu != mu) { - // this is ~10% faster than the Staple + // this is ~10% faster than the Staple -- PAB: so what it gives the WRONG answers for other BC's! tmp1 = Cshift(U[nu], mu, 1); tmp2 = Cshift(U[mu], nu, 1); staple += tmp1* adj(U[nu]*tmp2); @@ -235,7 +236,7 @@ static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { } staple = U[mu]*staple; } - +*/ ////////////////////////////////////////////////// // the sum over all staples on each site ////////////////////////////////////////////////// diff --git a/tests/forces/Test_gp_rect_force.cc b/tests/forces/Test_gp_rect_force.cc index bb35c77a..6b3349e0 100644 --- a/tests/forces/Test_gp_rect_force.cc +++ b/tests/forces/Test_gp_rect_force.cc @@ -59,8 +59,8 @@ int main (int argc, char ** argv) double beta = 1.0; double c1 = 0.331; - //GparityPlaqPlusRectangleActionR Action(beta,c1); - ConjugateWilsonGaugeActionR Action(beta); + ConjugatePlaqPlusRectangleActionR Action(beta,c1); + // ConjugateWilsonGaugeActionR Action(beta); //WilsonGaugeActionR Action(beta); ComplexD S = Action.S(U); diff --git a/tests/forces/Test_gpwilson_force.cc b/tests/forces/Test_gpwilson_force.cc index ebde61a5..e52ed7ee 100644 --- a/tests/forces/Test_gpwilson_force.cc +++ b/tests/forces/Test_gpwilson_force.cc @@ -91,7 +91,7 @@ int main (int argc, char ** argv) //////////////////////////////////// // Modify the gauge field a little //////////////////////////////////// - RealD dt = 0.0001; + RealD dt = 0.01; LatticeColourMatrix mommu(UGrid); LatticeColourMatrix forcemu(UGrid); From 485c5db0fe28b04c867caf33c879c58f9b924d96 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 6 Mar 2018 19:22:03 +0000 Subject: [PATCH 315/377] conversion of Grid tensors to nested std::vector in preparation for tensor serialisation --- lib/serialisation/BaseIO.h | 35 +++++++++++++++ tests/IO/Test_serialisation.cc | 78 +++++++--------------------------- 2 files changed, 51 insertions(+), 62 deletions(-) diff --git a/lib/serialisation/BaseIO.h b/lib/serialisation/BaseIO.h index 24e1cec7..1af5acc6 100644 --- a/lib/serialisation/BaseIO.h +++ b/lib/serialisation/BaseIO.h @@ -31,6 +31,7 @@ Author: Guido Cossu #define GRID_SERIALISATION_ABSTRACT_READER_H #include +#include namespace Grid { // Vector IO utilities /////////////////////////////////////////////////////// @@ -69,6 +70,40 @@ namespace Grid { return os; } + // convert Grid scalar tensors to std::vectors + template + void tensorToVec(V &v, const T& t) + { + v = t; + } + + template + void tensorToVec(V &v, const iScalar& t) + { + tensorToVec(v, t._internal); + } + + template + void tensorToVec(std::vector &v, const iVector& t) + { + v.resize(N); + for (unsigned int i = 0; i < N; i++) + { + tensorToVec(v[i], t._internal[i]); + } + } + + template + void tensorToVec(std::vector> &v, const iMatrix& t) + { + v.resize(N, std::vector(N)); + for (unsigned int i = 0; i < N; i++) + for (unsigned int j = 0; j < N; j++) + { + tensorToVec(v[i][j], t._internal[i][j]); + } + } + // Vector element trait ////////////////////////////////////////////////////// template struct element diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index 82638ad9..cdafd5c0 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -197,68 +197,22 @@ int main(int argc,char **argv) std::cout << flatdv.getVector() << std::endl; std::cout << std::endl; + std::cout << "==== Grid tensor to vector test" << std::endl; - std::cout << ".:::::: Testing JSON classes "<< std::endl; - - - { - JSONWriter JW("bother.json"); - - // test basic type writing - myenum a = myenum::red; - push(JW,"BasicTypes"); - write(JW,std::string("i16"),i16); - write(JW,"myenum",a); - write(JW,"u16",u16); - write(JW,"i32",i32); - write(JW,"u32",u32); - write(JW,"i64",i64); - write(JW,"u64",u64); - write(JW,"f",f); - write(JW,"d",d); - write(JW,"b",b); - pop(JW); - - - // test serializable class writing - myclass obj(1234); // non-trivial constructor - std::cout << obj << std::endl; - std::cout << "-- serialisable class writing to 'bother.json'..." << std::endl; - write(JW,"obj",obj); - JW.write("obj2", obj); - - - std::vector vec; - vec.push_back(myclass(1234)); - vec.push_back(myclass(5678)); - vec.push_back(myclass(3838)); - write(JW, "objvec", vec); - - } - - - { - JSONReader RD("bother.json"); - myclass jcopy1; - std::vector jveccopy1; - read(RD,"obj",jcopy1); - read(RD,"objvec", jveccopy1); - std::cout << "Loaded (JSON) -----------------" << std::endl; - std::cout << jcopy1 << std::endl << jveccopy1 << std::endl; - } - - -/* - // This is still work in progress - { - // Testing the next element function - JSONReader RD("test.json"); - RD.push("grid"); - RD.push("Observable"); - std::string name; - read(RD,"name", name); - } -*/ - + GridSerialRNG rng; + SpinColourMatrix scm; + SpinColourVector scv; + std::vector>>> scmv; + std::vector> scvv; + rng.SeedFixedIntegers(std::vector({42,10,81,9})); + random(rng, scm); + random(rng, scv); + std::cout << "Test spin-color matrix: " << scm << std::endl; + std::cout << "Test spin-color vector: " << scv << std::endl; + std::cout << "Converting to std::vector" << std::endl; + tensorToVec(scmv, scm); + tensorToVec(scvv, scv); + std::cout << "Spin-color matrix: " << scmv << std::endl; + std::cout << "Spin-color vector: " << scvv << std::endl; } From 8b14096990ff0fe1969ace0bad933ff3dbbac8fc Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 7 Mar 2018 15:12:18 +0000 Subject: [PATCH 316/377] Conversion of Grid tensors to std::vector made more elegant, also pair syntax changed to (x y) to avoid issues with JSON/XML --- lib/serialisation/BaseIO.h | 128 ++++++++++++++++++++++++------------- 1 file changed, 83 insertions(+), 45 deletions(-) diff --git a/lib/serialisation/BaseIO.h b/lib/serialisation/BaseIO.h index 1af5acc6..5b37e1fb 100644 --- a/lib/serialisation/BaseIO.h +++ b/lib/serialisation/BaseIO.h @@ -34,74 +34,76 @@ Author: Guido Cossu #include namespace Grid { - // Vector IO utilities /////////////////////////////////////////////////////// - // helper function to read space-separated values + // Grid scalar tensors to nested std::vectors ////////////////////////////////// template - std::vector strToVec(const std::string s) + struct TensorToVec { - std::istringstream sstr(s); - T buf; - std::vector v; - - while(!sstr.eof()) - { - sstr >> buf; - v.push_back(buf); - } - - return v; - } - - // output to streams for vectors - template < class T > - inline std::ostream & operator<<(std::ostream &os, const std::vector &v) + typedef T type; + }; + + template + struct TensorToVec> { - os << "["; - for (auto &x: v) - { - os << x << " "; - } - if (v.size() > 0) - { - os << "\b"; - } - os << "]"; - - return os; - } - - // convert Grid scalar tensors to std::vectors - template - void tensorToVec(V &v, const T& t) + typedef TensorToVec::type type; + }; + + template + struct TensorToVec> + { + typedef TensorToVec::type type; + }; + + template + struct TensorToVec> + { + typedef std::vector::type> type; + }; + + template + struct TensorToVec> + { + typedef std::vector::type>> type; + }; + + template + TensorToVec::type tensorToVec(const T &t) { v = t; } template - void tensorToVec(V &v, const iScalar& t) + TensorToVec>::type tensorToVec(V &v, const iScalar& t) { - tensorToVec(v, t._internal); + return tensorToVec(t._internal); } - template - void tensorToVec(std::vector &v, const iVector& t) + template + TensorToVec>::type tensorToVec(const iVector& t) { + TensorToVec>::type v; + v.resize(N); for (unsigned int i = 0; i < N; i++) { - tensorToVec(v[i], t._internal[i]); + v[i] = tensorToVec(t._internal[i]); } + + return v; } - template - void tensorToVec(std::vector> &v, const iMatrix& t) + template + TensorToVec>::type tensorToVec(const iMatrix& t) { + TensorToVec>::type v; + v.resize(N, std::vector(N)); for (unsigned int i = 0; i < N; i++) for (unsigned int j = 0; j < N; j++) { - tensorToVec(v[i][j], t._internal[i][j]); + v[i][j] = tensorToVec(t._internal[i][j]); } + + return v; } // Vector element trait ////////////////////////////////////////////////////// @@ -217,7 +219,43 @@ namespace Grid { template inline std::ostream & operator<<(std::ostream &os, const std::pair &p) { - os << "<" << p.first << " " << p.second << ">"; + os << "{" << p.first << " " << p.second << "}"; + return os; + } + + // Vector IO utilities /////////////////////////////////////////////////////// + // helper function to read space-separated values + template + std::vector strToVec(const std::string s) + { + std::istringstream sstr(s); + T buf; + std::vector v; + + while(!sstr.eof()) + { + sstr >> buf; + v.push_back(buf); + } + + return v; + } + + // output to streams for vectors + template < class T > + inline std::ostream & operator<<(std::ostream &os, const std::vector &v) + { + os << "["; + for (auto &x: v) + { + os << x << " "; + } + if (v.size() > 0) + { + os << "\b"; + } + os << "]"; + return os; } From 90dbe03e1764c28f3afac7f53576dd4249f07ea8 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 7 Mar 2018 15:12:18 +0000 Subject: [PATCH 317/377] Conversion of Grid tensors to std::vector made more elegant, also pair syntax changed to (x y) to avoid issues with JSON/XML --- lib/serialisation/BaseIO.h | 141 ++++++++++++++++++++------------- tests/IO/Test_serialisation.cc | 6 +- 2 files changed, 90 insertions(+), 57 deletions(-) diff --git a/lib/serialisation/BaseIO.h b/lib/serialisation/BaseIO.h index 1af5acc6..0a919aab 100644 --- a/lib/serialisation/BaseIO.h +++ b/lib/serialisation/BaseIO.h @@ -34,74 +34,73 @@ Author: Guido Cossu #include namespace Grid { - // Vector IO utilities /////////////////////////////////////////////////////// - // helper function to read space-separated values + // Grid scalar tensors to nested std::vectors ////////////////////////////////// template - std::vector strToVec(const std::string s) + struct TensorToVec { - std::istringstream sstr(s); - T buf; - std::vector v; - - while(!sstr.eof()) - { - sstr >> buf; - v.push_back(buf); - } - - return v; - } - - // output to streams for vectors - template < class T > - inline std::ostream & operator<<(std::ostream &os, const std::vector &v) + typedef T type; + }; + + template + struct TensorToVec> { - os << "["; - for (auto &x: v) - { - os << x << " "; - } - if (v.size() > 0) - { - os << "\b"; - } - os << "]"; - - return os; - } - - // convert Grid scalar tensors to std::vectors - template - void tensorToVec(V &v, const T& t) + typedef typename TensorToVec::type type; + }; + + template + struct TensorToVec> { - v = t; + typedef typename std::vector::type> type; + }; + + template + struct TensorToVec> + { + typedef typename std::vector::type>> type; + }; + + template + typename TensorToVec::type tensorToVec(const T &t) + { + return t; } - template - void tensorToVec(V &v, const iScalar& t) + template + typename TensorToVec>::type tensorToVec(const iScalar& t) { - tensorToVec(v, t._internal); + return tensorToVec(t._internal); } - template - void tensorToVec(std::vector &v, const iVector& t) + template + typename TensorToVec>::type tensorToVec(const iVector& t) { + typename TensorToVec>::type v; + v.resize(N); for (unsigned int i = 0; i < N; i++) { - tensorToVec(v[i], t._internal[i]); + v[i] = tensorToVec(t._internal[i]); } + + return v; } - template - void tensorToVec(std::vector> &v, const iMatrix& t) + template + typename TensorToVec>::type tensorToVec(const iMatrix& t) { - v.resize(N, std::vector(N)); + typename TensorToVec>::type v; + + v.resize(N); for (unsigned int i = 0; i < N; i++) - for (unsigned int j = 0; j < N; j++) { - tensorToVec(v[i][j], t._internal[i][j]); + v[i].resize(N); + for (unsigned int j = 0; j < N; j++) + { + v[i][j] = tensorToVec(t._internal[i][j]); + } } + + return v; } // Vector element trait ////////////////////////////////////////////////////// @@ -186,15 +185,15 @@ namespace Grid { do { is.get(c); - } while (c != '<' && !is.eof()); - if (c == '<') + } while (c != '(' && !is.eof()); + if (c == '(') { int start = is.tellg(); do { is.get(c); - } while (c != '>' && !is.eof()); - if (c == '>') + } while (c != ')' && !is.eof()); + if (c == ')') { int end = is.tellg(); int psize = end - start - 1; @@ -217,7 +216,43 @@ namespace Grid { template inline std::ostream & operator<<(std::ostream &os, const std::pair &p) { - os << "<" << p.first << " " << p.second << ">"; + os << "(" << p.first << " " << p.second << ")"; + return os; + } + + // Vector IO utilities /////////////////////////////////////////////////////// + // helper function to read space-separated values + template + std::vector strToVec(const std::string s) + { + std::istringstream sstr(s); + T buf; + std::vector v; + + while(!sstr.eof()) + { + sstr >> buf; + v.push_back(buf); + } + + return v; + } + + // output to streams for vectors + template < class T > + inline std::ostream & operator<<(std::ostream &os, const std::vector &v) + { + os << "["; + for (auto &x: v) + { + os << x << " "; + } + if (v.size() > 0) + { + os << "\b"; + } + os << "]"; + return os; } diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index cdafd5c0..d4b89652 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -202,8 +202,6 @@ int main(int argc,char **argv) GridSerialRNG rng; SpinColourMatrix scm; SpinColourVector scv; - std::vector>>> scmv; - std::vector> scvv; rng.SeedFixedIntegers(std::vector({42,10,81,9})); random(rng, scm); @@ -211,8 +209,8 @@ int main(int argc,char **argv) std::cout << "Test spin-color matrix: " << scm << std::endl; std::cout << "Test spin-color vector: " << scv << std::endl; std::cout << "Converting to std::vector" << std::endl; - tensorToVec(scmv, scm); - tensorToVec(scvv, scv); + auto scmv = tensorToVec(scm); + auto scvv = tensorToVec(scv); std::cout << "Spin-color matrix: " << scmv << std::endl; std::cout << "Spin-color vector: " << scvv << std::endl; } From 5e8af396fd2855d4cbc84931a1e0b99b86dbbb03 Mon Sep 17 00:00:00 2001 From: Dan H Date: Wed, 7 Mar 2018 13:11:51 -0500 Subject: [PATCH 318/377] Add print of the current git hash on Grid init. --- .gitignore | 1 + Makefile.am | 4 ++++ lib/util/Init.cc | 7 +++++++ 3 files changed, 12 insertions(+) diff --git a/.gitignore b/.gitignore index dc59879f..49295fc6 100644 --- a/.gitignore +++ b/.gitignore @@ -123,6 +123,7 @@ make-bin-BUCK.sh ##################### lib/qcd/spin/gamma-gen/*.h lib/qcd/spin/gamma-gen/*.cc +lib/version.h # vs code editor files # ######################## diff --git a/Makefile.am b/Makefile.am index 3a65cf1b..d507bf08 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,6 +5,10 @@ include $(top_srcdir)/doxygen.inc bin_SCRIPTS=grid-config +BUILT_SOURCES = version.h + +version.h: + echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d\\"%n" HEAD`" > $(srcdir)/lib/version.h .PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) diff --git a/lib/util/Init.cc b/lib/util/Init.cc index fb3d7a1e..b4ac14b7 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -49,6 +49,7 @@ Author: paboyle #include #include +#include #include @@ -288,6 +289,12 @@ void Grid_init(int *argc,char ***argv) std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"< Date: Thu, 8 Mar 2018 09:50:39 +0000 Subject: [PATCH 319/377] std::vector to tensor conversion + test units --- lib/serialisation/BaseIO.h | 32 +++++++++++++++++++++++++++++++ tests/IO/Test_serialisation.cc | 35 +++++++++++++++++++++++----------- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/lib/serialisation/BaseIO.h b/lib/serialisation/BaseIO.h index 0a919aab..d129b9e5 100644 --- a/lib/serialisation/BaseIO.h +++ b/lib/serialisation/BaseIO.h @@ -103,6 +103,38 @@ namespace Grid { return v; } + template + void vecToTensor(T &t, const typename TensorToVec::type &v) + { + t = v; + } + + + template + void vecToTensor(iScalar &t, const typename TensorToVec>::type &v) + { + vecToTensor(t._internal, v); + } + + template + void vecToTensor(iVector &t, const typename TensorToVec>::type &v) + { + for (unsigned int i = 0; i < N; i++) + { + vecToTensor(t._internal[i], v[i]); + } + } + + template + void vecToTensor(iMatrix &t, const typename TensorToVec>::type &v) + { + for (unsigned int i = 0; i < N; i++) + for (unsigned int j = 0; j < N; j++) + { + vecToTensor(t._internal[i][j], v[i][j]); + } + } + // Vector element trait ////////////////////////////////////////////////////// template struct element diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index d4b89652..93007e44 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -93,6 +93,24 @@ void ioTest(const std::string &filename, const O &object, const std::string &nam if (!good) exit(EXIT_FAILURE); } +template +void tensorConvTestFn(GridSerialRNG &rng, const std::string label) +{ + T t, ft; + Real n; + bool good; + + random(rng, t); + auto tv = tensorToVec(t); + vecToTensor(ft, tv); + n = norm2(t - ft); + good = (n == 0); + std::cout << label << " norm 2 diff: " << n << " -- " + << (good ? "success" : "failure") << std::endl; +} + +#define tensorConvTest(rng, type) tensorConvTestFn(rng, #type) + int main(int argc,char **argv) { std::cout << "==== basic IO" << std::endl; @@ -200,17 +218,12 @@ int main(int argc,char **argv) std::cout << "==== Grid tensor to vector test" << std::endl; GridSerialRNG rng; - SpinColourMatrix scm; - SpinColourVector scv; rng.SeedFixedIntegers(std::vector({42,10,81,9})); - random(rng, scm); - random(rng, scv); - std::cout << "Test spin-color matrix: " << scm << std::endl; - std::cout << "Test spin-color vector: " << scv << std::endl; - std::cout << "Converting to std::vector" << std::endl; - auto scmv = tensorToVec(scm); - auto scvv = tensorToVec(scv); - std::cout << "Spin-color matrix: " << scmv << std::endl; - std::cout << "Spin-color vector: " << scvv << std::endl; + tensorConvTest(rng, SpinColourMatrix); + tensorConvTest(rng, SpinColourVector); + tensorConvTest(rng, ColourMatrix); + tensorConvTest(rng, ColourVector); + tensorConvTest(rng, SpinMatrix); + tensorConvTest(rng, SpinVector); } From c49be8988be95f37c05741f6e807e41707c847b1 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 8 Mar 2018 09:51:22 +0000 Subject: [PATCH 320/377] Grid tensor serialisation --- lib/serialisation/Hdf5IO.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index 94ad9736..9140435d 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "Hdf5Type.h" #ifndef H5_NO_NAMESPACE @@ -37,6 +38,12 @@ namespace Grid template typename std::enable_if>::is_number, void>::type writeDefault(const std::string &s, const std::vector &x); + template + void writeDefault(const std::string &s, const iScalar &t); + template + void writeDefault(const std::string &s, const iVector &t); + template + void writeDefault(const std::string &s, const iMatrix &t); private: template void writeSingleAttribute(const U &x, const std::string &name, @@ -147,6 +154,24 @@ namespace Grid } pop(); } + + template + void Hdf5Writer::writeDefault(const std::string &s, const iScalar &t) + { + writeDefault(s, tensorToVec(t)); + } + + template + void Hdf5Writer::writeDefault(const std::string &s, const iVector &t) + { + writeDefault(s, tensorToVec(t)); + } + + template + void Hdf5Writer::writeDefault(const std::string &s, const iMatrix &t) + { + writeDefault(s, tensorToVec(t)); + } // Reader template implementation //////////////////////////////////////////// template From 360cface3349f923ef00abaabc064e4e4af94c2b Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 8 Mar 2018 19:12:03 +0000 Subject: [PATCH 321/377] Grid tensor serialisation fully implemented and tested --- lib/lattice/Lattice_comparison_utils.h | 2 +- lib/serialisation/BaseIO.h | 538 +++++++------------------ lib/serialisation/Hdf5IO.h | 24 -- lib/serialisation/VectorUtils.h | 336 +++++++++++++++ lib/tensors/Tensor_logical.h | 33 ++ tests/IO/Test_serialisation.cc | 27 +- 6 files changed, 519 insertions(+), 441 deletions(-) create mode 100644 lib/serialisation/VectorUtils.h diff --git a/lib/lattice/Lattice_comparison_utils.h b/lib/lattice/Lattice_comparison_utils.h index 14a19383..9580d4d2 100644 --- a/lib/lattice/Lattice_comparison_utils.h +++ b/lib/lattice/Lattice_comparison_utils.h @@ -198,7 +198,7 @@ namespace Grid { typedef typename vsimd::scalar_type scalar;\ return Comparison(functor(),lhs,rhs);\ }\ - template\ + template = 0>\ inline vInteger operator op(const iScalar &lhs,const iScalar &rhs)\ { \ return lhs._internal op rhs._internal; \ diff --git a/lib/serialisation/BaseIO.h b/lib/serialisation/BaseIO.h index d129b9e5..c9b3fb9e 100644 --- a/lib/serialisation/BaseIO.h +++ b/lib/serialisation/BaseIO.h @@ -32,178 +32,9 @@ Author: Guido Cossu #include #include +#include namespace Grid { - // Grid scalar tensors to nested std::vectors ////////////////////////////////// - template - struct TensorToVec - { - typedef T type; - }; - - template - struct TensorToVec> - { - typedef typename TensorToVec::type type; - }; - - template - struct TensorToVec> - { - typedef typename std::vector::type> type; - }; - - template - struct TensorToVec> - { - typedef typename std::vector::type>> type; - }; - - template - typename TensorToVec::type tensorToVec(const T &t) - { - return t; - } - - template - typename TensorToVec>::type tensorToVec(const iScalar& t) - { - return tensorToVec(t._internal); - } - - template - typename TensorToVec>::type tensorToVec(const iVector& t) - { - typename TensorToVec>::type v; - - v.resize(N); - for (unsigned int i = 0; i < N; i++) - { - v[i] = tensorToVec(t._internal[i]); - } - - return v; - } - - template - typename TensorToVec>::type tensorToVec(const iMatrix& t) - { - typename TensorToVec>::type v; - - v.resize(N); - for (unsigned int i = 0; i < N; i++) - { - v[i].resize(N); - for (unsigned int j = 0; j < N; j++) - { - v[i][j] = tensorToVec(t._internal[i][j]); - } - } - - return v; - } - - template - void vecToTensor(T &t, const typename TensorToVec::type &v) - { - t = v; - } - - - template - void vecToTensor(iScalar &t, const typename TensorToVec>::type &v) - { - vecToTensor(t._internal, v); - } - - template - void vecToTensor(iVector &t, const typename TensorToVec>::type &v) - { - for (unsigned int i = 0; i < N; i++) - { - vecToTensor(t._internal[i], v[i]); - } - } - - template - void vecToTensor(iMatrix &t, const typename TensorToVec>::type &v) - { - for (unsigned int i = 0; i < N; i++) - for (unsigned int j = 0; j < N; j++) - { - vecToTensor(t._internal[i][j], v[i][j]); - } - } - - // Vector element trait ////////////////////////////////////////////////////// - template - struct element - { - typedef T type; - static constexpr bool is_number = false; - }; - - template - struct element> - { - typedef typename element::type type; - static constexpr bool is_number = std::is_arithmetic::value - or is_complex::value - or element::is_number; - }; - - // Vector flattening utility class //////////////////////////////////////////// - // Class to flatten a multidimensional std::vector - template - class Flatten - { - public: - typedef typename element::type Element; - public: - explicit Flatten(const V &vector); - const V & getVector(void); - const std::vector & getFlatVector(void); - const std::vector & getDim(void); - private: - void accumulate(const Element &e); - template - void accumulate(const W &v); - void accumulateDim(const Element &e); - template - void accumulateDim(const W &v); - private: - const V &vector_; - std::vector flatVector_; - std::vector dim_; - }; - - // Class to reconstruct a multidimensional std::vector - template - class Reconstruct - { - public: - typedef typename element::type Element; - public: - Reconstruct(const std::vector &flatVector, - const std::vector &dim); - const V & getVector(void); - const std::vector & getFlatVector(void); - const std::vector & getDim(void); - private: - void fill(std::vector &v); - template - void fill(W &v); - void resize(std::vector &v, const unsigned int dim); - template - void resize(W &v, const unsigned int dim); - private: - V vector_; - const std::vector &flatVector_; - std::vector dim_; - size_t ind_{0}; - unsigned int dimInd_{0}; - }; - // Pair IO utilities ///////////////////////////////////////////////////////// // helper function to parse input in the format "" template @@ -252,42 +83,6 @@ namespace Grid { return os; } - // Vector IO utilities /////////////////////////////////////////////////////// - // helper function to read space-separated values - template - std::vector strToVec(const std::string s) - { - std::istringstream sstr(s); - T buf; - std::vector v; - - while(!sstr.eof()) - { - sstr >> buf; - v.push_back(buf); - } - - return v; - } - - // output to streams for vectors - template < class T > - inline std::ostream & operator<<(std::ostream &os, const std::vector &v) - { - os << "["; - for (auto &x: v) - { - os << x << " "; - } - if (v.size() > 0) - { - os << "\b"; - } - os << "]"; - - return os; - } - // Abstract writer/reader classes //////////////////////////////////////////// // static polymorphism implemented using CRTP idiom class Serializable; @@ -307,6 +102,12 @@ namespace Grid { template typename std::enable_if::value, void>::type write(const std::string& s, const U &output); + template + void write(const std::string &s, const iScalar &output); + template + void write(const std::string &s, const iVector &output); + template + void write(const std::string &s, const iMatrix &output); private: T *upcast; }; @@ -326,6 +127,12 @@ namespace Grid { template typename std::enable_if::value, void>::type read(const std::string& s, U &output); + template + void read(const std::string &s, iScalar &output); + template + void read(const std::string &s, iVector &output); + template + void read(const std::string &s, iMatrix &output); protected: template void fromString(U &output, const std::string &s); @@ -339,203 +146,9 @@ namespace Grid { }; template struct isWriter { static const bool value = false; - }; - - - - // Generic writer interface - // serializable base class - class Serializable - { - public: - template - static inline void write(Writer &WR,const std::string &s, - const Serializable &obj) - {} - - template - static inline void read(Reader &RD,const std::string &s, - Serializable &obj) - {} - - friend inline std::ostream & operator<<(std::ostream &os, - const Serializable &obj) - { - return os; - } }; - - // Flatten class template implementation ///////////////////////////////////// - template - void Flatten::accumulate(const Element &e) - { - flatVector_.push_back(e); - } - - template - template - void Flatten::accumulate(const W &v) - { - for (auto &e: v) - { - accumulate(e); - } - } - - template - void Flatten::accumulateDim(const Element &e) {}; - - template - template - void Flatten::accumulateDim(const W &v) - { - dim_.push_back(v.size()); - accumulateDim(v[0]); - } - - template - Flatten::Flatten(const V &vector) - : vector_(vector) - { - accumulate(vector_); - accumulateDim(vector_); - } - - template - const V & Flatten::getVector(void) - { - return vector_; - } - - template - const std::vector::Element> & - Flatten::getFlatVector(void) - { - return flatVector_; - } - - template - const std::vector & Flatten::getDim(void) - { - return dim_; - } - - // Reconstruct class template implementation ///////////////////////////////// - template - void Reconstruct::fill(std::vector &v) - { - for (auto &e: v) - { - e = flatVector_[ind_++]; - } - } - - template - template - void Reconstruct::fill(W &v) - { - for (auto &e: v) - { - fill(e); - } - } - - template - void Reconstruct::resize(std::vector &v, const unsigned int dim) - { - v.resize(dim_[dim]); - } - - template - template - void Reconstruct::resize(W &v, const unsigned int dim) - { - v.resize(dim_[dim]); - for (auto &e: v) - { - resize(e, dim + 1); - } - } - - template - Reconstruct::Reconstruct(const std::vector &flatVector, - const std::vector &dim) - : flatVector_(flatVector) - , dim_(dim) - { - resize(vector_, 0); - fill(vector_); - } - - template - const V & Reconstruct::getVector(void) - { - return vector_; - } - - template - const std::vector::Element> & - Reconstruct::getFlatVector(void) - { - return flatVector_; - } - - template - const std::vector & Reconstruct::getDim(void) - { - return dim_; - } - - // Generic writer interface ////////////////////////////////////////////////// - template - inline void push(Writer &w, const std::string &s) { - w.push(s); - } - - template - inline void push(Writer &w, const char *s) - { - w.push(std::string(s)); - } - - template - inline void pop(Writer &w) - { - w.pop(); - } - - template - inline void write(Writer &w, const std::string& s, const U &output) - { - w.write(s, output); - } - - // Generic reader interface - template - inline bool push(Reader &r, const std::string &s) - { - return r.push(s); - } - - template - inline bool push(Reader &r, const char *s) - { - return r.push(std::string(s)); - } - - template - inline void pop(Reader &r) - { - r.pop(); - } - - template - inline void read(Reader &r, const std::string &s, U &output) - { - r.read(s, output); - } - - // Writer template implementation //////////////////////////////////////////// + + // Writer template implementation template Writer::Writer(void) { @@ -569,6 +182,27 @@ namespace Grid { { upcast->writeDefault(s, output); } + + template + template + void Writer::write(const std::string &s, const iScalar &output) + { + upcast->writeDefault(s, tensorToVec(output)); + } + + template + template + void Writer::write(const std::string &s, const iVector &output) + { + upcast->writeDefault(s, tensorToVec(output)); + } + + template + template + void Writer::write(const std::string &s, const iMatrix &output) + { + upcast->writeDefault(s, tensorToVec(output)); + } // Reader template implementation template @@ -604,7 +238,37 @@ namespace Grid { { upcast->readDefault(s, output); } + + template + template + void Reader::read(const std::string &s, iScalar &output) + { + typename TensorToVec>::type v; + + upcast->readDefault(s, v); + vecToTensor(output, v); + } + + template + template + void Reader::read(const std::string &s, iVector &output) + { + typename TensorToVec>::type v; + + upcast->readDefault(s, v); + vecToTensor(output, v); + } + template + template + void Reader::read(const std::string &s, iMatrix &output) + { + typename TensorToVec>::type v; + + upcast->readDefault(s, v); + vecToTensor(output, v); + } + template template void Reader::fromString(U &output, const std::string &s) @@ -623,6 +287,76 @@ namespace Grid { abort(); } } + + // serializable base class /////////////////////////////////////////////////// + class Serializable + { + public: + template + static inline void write(Writer &WR,const std::string &s, + const Serializable &obj) + {} + + template + static inline void read(Reader &RD,const std::string &s, + Serializable &obj) + {} + + friend inline std::ostream & operator<<(std::ostream &os, + const Serializable &obj) + { + return os; + } + }; + + // Generic writer interface ////////////////////////////////////////////////// + template + inline void push(Writer &w, const std::string &s) { + w.push(s); + } + + template + inline void push(Writer &w, const char *s) + { + w.push(std::string(s)); + } + + template + inline void pop(Writer &w) + { + w.pop(); + } + + template + inline void write(Writer &w, const std::string& s, const U &output) + { + w.write(s, output); + } + + // Generic reader interface ////////////////////////////////////////////////// + template + inline bool push(Reader &r, const std::string &s) + { + return r.push(s); + } + + template + inline bool push(Reader &r, const char *s) + { + return r.push(std::string(s)); + } + + template + inline void pop(Reader &r) + { + r.pop(); + } + + template + inline void read(Reader &r, const std::string &s, U &output) + { + r.read(s, output); + } } #endif diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index 9140435d..12625ab8 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -38,12 +38,6 @@ namespace Grid template typename std::enable_if>::is_number, void>::type writeDefault(const std::string &s, const std::vector &x); - template - void writeDefault(const std::string &s, const iScalar &t); - template - void writeDefault(const std::string &s, const iVector &t); - template - void writeDefault(const std::string &s, const iMatrix &t); private: template void writeSingleAttribute(const U &x, const std::string &name, @@ -154,24 +148,6 @@ namespace Grid } pop(); } - - template - void Hdf5Writer::writeDefault(const std::string &s, const iScalar &t) - { - writeDefault(s, tensorToVec(t)); - } - - template - void Hdf5Writer::writeDefault(const std::string &s, const iVector &t) - { - writeDefault(s, tensorToVec(t)); - } - - template - void Hdf5Writer::writeDefault(const std::string &s, const iMatrix &t) - { - writeDefault(s, tensorToVec(t)); - } // Reader template implementation //////////////////////////////////////////// template diff --git a/lib/serialisation/VectorUtils.h b/lib/serialisation/VectorUtils.h new file mode 100644 index 00000000..f5c76b84 --- /dev/null +++ b/lib/serialisation/VectorUtils.h @@ -0,0 +1,336 @@ +#ifndef GRID_SERIALISATION_VECTORUTILS_H +#define GRID_SERIALISATION_VECTORUTILS_H + +#include +#include + +namespace Grid { + // Grid scalar tensors to nested std::vectors ////////////////////////////////// + template + struct TensorToVec + { + typedef T type; + }; + + template + struct TensorToVec> + { + typedef typename TensorToVec::type type; + }; + + template + struct TensorToVec> + { + typedef typename std::vector::type> type; + }; + + template + struct TensorToVec> + { + typedef typename std::vector::type>> type; + }; + + template + typename TensorToVec::type tensorToVec(const T &t) + { + return t; + } + + template + typename TensorToVec>::type tensorToVec(const iScalar& t) + { + return tensorToVec(t._internal); + } + + template + typename TensorToVec>::type tensorToVec(const iVector& t) + { + typename TensorToVec>::type v; + + v.resize(N); + for (unsigned int i = 0; i < N; i++) + { + v[i] = tensorToVec(t._internal[i]); + } + + return v; + } + + template + typename TensorToVec>::type tensorToVec(const iMatrix& t) + { + typename TensorToVec>::type v; + + v.resize(N); + for (unsigned int i = 0; i < N; i++) + { + v[i].resize(N); + for (unsigned int j = 0; j < N; j++) + { + v[i][j] = tensorToVec(t._internal[i][j]); + } + } + + return v; + } + + template + void vecToTensor(T &t, const typename TensorToVec::type &v) + { + t = v; + } + + + template + void vecToTensor(iScalar &t, const typename TensorToVec>::type &v) + { + vecToTensor(t._internal, v); + } + + template + void vecToTensor(iVector &t, const typename TensorToVec>::type &v) + { + for (unsigned int i = 0; i < N; i++) + { + vecToTensor(t._internal[i], v[i]); + } + } + + template + void vecToTensor(iMatrix &t, const typename TensorToVec>::type &v) + { + for (unsigned int i = 0; i < N; i++) + for (unsigned int j = 0; j < N; j++) + { + vecToTensor(t._internal[i][j], v[i][j]); + } + } + + // Vector element trait ////////////////////////////////////////////////////// + template + struct element + { + typedef T type; + static constexpr bool is_number = false; + }; + + template + struct element> + { + typedef typename element::type type; + static constexpr bool is_number = std::is_arithmetic::value + or is_complex::value + or element::is_number; + }; + + // Vector flattening utility class //////////////////////////////////////////// + // Class to flatten a multidimensional std::vector + template + class Flatten + { + public: + typedef typename element::type Element; + public: + explicit Flatten(const V &vector); + const V & getVector(void); + const std::vector & getFlatVector(void); + const std::vector & getDim(void); + private: + void accumulate(const Element &e); + template + void accumulate(const W &v); + void accumulateDim(const Element &e); + template + void accumulateDim(const W &v); + private: + const V &vector_; + std::vector flatVector_; + std::vector dim_; + }; + + // Class to reconstruct a multidimensional std::vector + template + class Reconstruct + { + public: + typedef typename element::type Element; + public: + Reconstruct(const std::vector &flatVector, + const std::vector &dim); + const V & getVector(void); + const std::vector & getFlatVector(void); + const std::vector & getDim(void); + private: + void fill(std::vector &v); + template + void fill(W &v); + void resize(std::vector &v, const unsigned int dim); + template + void resize(W &v, const unsigned int dim); + private: + V vector_; + const std::vector &flatVector_; + std::vector dim_; + size_t ind_{0}; + unsigned int dimInd_{0}; + }; + + // Flatten class template implementation + template + void Flatten::accumulate(const Element &e) + { + flatVector_.push_back(e); + } + + template + template + void Flatten::accumulate(const W &v) + { + for (auto &e: v) + { + accumulate(e); + } + } + + template + void Flatten::accumulateDim(const Element &e) {}; + + template + template + void Flatten::accumulateDim(const W &v) + { + dim_.push_back(v.size()); + accumulateDim(v[0]); + } + + template + Flatten::Flatten(const V &vector) + : vector_(vector) + { + accumulate(vector_); + accumulateDim(vector_); + } + + template + const V & Flatten::getVector(void) + { + return vector_; + } + + template + const std::vector::Element> & + Flatten::getFlatVector(void) + { + return flatVector_; + } + + template + const std::vector & Flatten::getDim(void) + { + return dim_; + } + + // Reconstruct class template implementation + template + void Reconstruct::fill(std::vector &v) + { + for (auto &e: v) + { + e = flatVector_[ind_++]; + } + } + + template + template + void Reconstruct::fill(W &v) + { + for (auto &e: v) + { + fill(e); + } + } + + template + void Reconstruct::resize(std::vector &v, const unsigned int dim) + { + v.resize(dim_[dim]); + } + + template + template + void Reconstruct::resize(W &v, const unsigned int dim) + { + v.resize(dim_[dim]); + for (auto &e: v) + { + resize(e, dim + 1); + } + } + + template + Reconstruct::Reconstruct(const std::vector &flatVector, + const std::vector &dim) + : flatVector_(flatVector) + , dim_(dim) + { + resize(vector_, 0); + fill(vector_); + } + + template + const V & Reconstruct::getVector(void) + { + return vector_; + } + + template + const std::vector::Element> & + Reconstruct::getFlatVector(void) + { + return flatVector_; + } + + template + const std::vector & Reconstruct::getDim(void) + { + return dim_; + } + + // Vector IO utilities /////////////////////////////////////////////////////// + // helper function to read space-separated values + template + std::vector strToVec(const std::string s) + { + std::istringstream sstr(s); + T buf; + std::vector v; + + while(!sstr.eof()) + { + sstr >> buf; + v.push_back(buf); + } + + return v; + } + + // output to streams for vectors + template < class T > + inline std::ostream & operator<<(std::ostream &os, const std::vector &v) + { + os << "["; + for (auto &x: v) + { + os << x << " "; + } + if (v.size() > 0) + { + os << "\b"; + } + os << "]"; + + return os; + } +} + +#endif \ No newline at end of file diff --git a/lib/tensors/Tensor_logical.h b/lib/tensors/Tensor_logical.h index 7ab3668b..58b2b03b 100644 --- a/lib/tensors/Tensor_logical.h +++ b/lib/tensors/Tensor_logical.h @@ -55,5 +55,38 @@ LOGICAL_BINOP(&); LOGICAL_BINOP(||); LOGICAL_BINOP(&&); +template +strong_inline bool operator==(const iScalar &t1, const iScalar &t2) +{ + return (t1._internal == t2._internal); +} + +template +strong_inline bool operator==(const iVector &t1, const iVector &t2) +{ + bool res = true; + + for (unsigned int i = 0; i < N; ++i) + { + res = (res && (t1._internal[i] == t2._internal[i])); + } + + return res; +} + +template +strong_inline bool operator==(const iMatrix &t1, const iMatrix &t2) +{ + bool res = true; + + for (unsigned int i = 0; i < N; ++i) + for (unsigned int j = 0; j < N; ++j) + { + res = (res && (t1._internal[i][j] == t2._internal[i][j])); + } + + return res; +} + } #endif diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index 93007e44..bca4d01c 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -45,7 +45,8 @@ public: bool , b, std::vector, array, std::vector >, twodimarray, - std::vector > >, cmplx3darray + std::vector > >, cmplx3darray, + SpinColourMatrix, scm ); myclass() {} myclass(int i) @@ -59,6 +60,12 @@ public: y=2*i; b=true; name="bother said pooh"; + scm()(0, 1)(2, 1) = 2.356; + scm()(3, 0)(1, 1) = 1.323; + scm()(2, 1)(0, 1) = 5.3336; + scm()(0, 2)(1, 1) = 6.336; + scm()(2, 1)(2, 2) = 7.344; + scm()(1, 1)(2, 0) = 8.3534; } }; @@ -113,6 +120,10 @@ void tensorConvTestFn(GridSerialRNG &rng, const std::string label) int main(int argc,char **argv) { + GridSerialRNG rng; + + rng.SeedFixedIntegers(std::vector({42,10,81,9})); + std::cout << "==== basic IO" << std::endl; XmlWriter WR("bother.xml"); @@ -138,7 +149,7 @@ int main(int argc,char **argv) std::cout << "-- serialisable class writing to 'bother.xml'..." << std::endl; write(WR,"obj",obj); WR.write("obj2", obj); - vec.push_back(myclass(1234)); + vec.push_back(obj); vec.push_back(myclass(5678)); vec.push_back(myclass(3838)); pair = std::make_pair(myenum::red, myenum::blue); @@ -149,8 +160,6 @@ int main(int argc,char **argv) std::cout << "-- serialisable class comparison:" << std::endl; std::cout << "vec[0] == obj: " << ((vec[0] == obj) ? "true" : "false") << std::endl; std::cout << "vec[1] == obj: " << ((vec[1] == obj) ? "true" : "false") << std::endl; - - write(WR, "objpair", pair); std::cout << "-- pair writing to std::cout:" << std::endl; std::cout << pair << std::endl; @@ -159,26 +168,20 @@ int main(int argc,char **argv) //// XML ioTest("iotest.xml", obj, "XML (object) "); ioTest("iotest.xml", vec, "XML (vector of objects)"); - ioTest("iotest.xml", pair, "XML (pair of objects)"); //// binary ioTest("iotest.bin", obj, "binary (object) "); ioTest("iotest.bin", vec, "binary (vector of objects)"); - ioTest("iotest.bin", pair, "binary (pair of objects)"); //// text ioTest("iotest.dat", obj, "text (object) "); ioTest("iotest.dat", vec, "text (vector of objects)"); - ioTest("iotest.dat", pair, "text (pair of objects)"); //// text ioTest("iotest.json", obj, "JSON (object) "); ioTest("iotest.json", vec, "JSON (vector of objects)"); - ioTest("iotest.json", pair, "JSON (pair of objects)"); //// HDF5 -#undef HAVE_HDF5 #ifdef HAVE_HDF5 ioTest("iotest.h5", obj, "HDF5 (object) "); ioTest("iotest.h5", vec, "HDF5 (vector of objects)"); - ioTest("iotest.h5", pair, "HDF5 (pair of objects)"); #endif std::cout << "\n==== vector flattening/reconstruction" << std::endl; @@ -216,10 +219,6 @@ int main(int argc,char **argv) std::cout << std::endl; std::cout << "==== Grid tensor to vector test" << std::endl; - - GridSerialRNG rng; - - rng.SeedFixedIntegers(std::vector({42,10,81,9})); tensorConvTest(rng, SpinColourMatrix); tensorConvTest(rng, SpinColourVector); tensorConvTest(rng, ColourMatrix); From b801e1fcd637b402e381de98261386f11d0d8da4 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 9 Mar 2018 20:44:10 +0000 Subject: [PATCH 322/377] fclose should be called through a call to close() --- lib/parallelIO/IldgIO.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index b86e250f..b0bd7e2c 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -568,7 +568,6 @@ class IldgWriter : public ScidacWriter { writeLimeIldgLFN(header.ildg_lfn); // rec writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum // limeDestroyWriter(LimeW); - fclose(File); } }; From 0fb84fa34b11ef81dd4bef25661420e0a088dbef Mon Sep 17 00:00:00 2001 From: Dan H Date: Mon, 12 Mar 2018 17:03:48 -0400 Subject: [PATCH 323/377] Make compilation faster by moving print of git hash. --- lib/util/Init.cc | 7 +------ lib/util/Init.h | 1 + lib/util/version.cc | 12 ++++++++++++ 3 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 lib/util/version.cc diff --git a/lib/util/Init.cc b/lib/util/Init.cc index b4ac14b7..45a37a02 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -289,12 +289,7 @@ void Grid_init(int *argc,char ***argv) std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"< &simd, std::vector &mpi); + void printHash(void); }; #endif diff --git a/lib/util/version.cc b/lib/util/version.cc new file mode 100644 index 00000000..19759274 --- /dev/null +++ b/lib/util/version.cc @@ -0,0 +1,12 @@ +#include +#include +namespace Grid { + void printHash(){ +#ifdef GITHASH + std::cout << "Current Grid git commit hash=" << GITHASH << std::endl; +#else + std::cout << "Current Grid git commit hash is undefined. Check makefile." << std::endl; +#endif +#undef GITHASH +} +} From d86936a3deb7c1670967874c21d5afb2d9ee051d Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 16 Mar 2018 12:26:39 +0000 Subject: [PATCH 324/377] Eliminating deprecated lex_sites --- lib/lattice/Lattice_coordinate.h | 18 ------------------ tests/core/Test_main.cc | 1 - 2 files changed, 19 deletions(-) diff --git a/lib/lattice/Lattice_coordinate.h b/lib/lattice/Lattice_coordinate.h index 2e20ba17..19eceba8 100644 --- a/lib/lattice/Lattice_coordinate.h +++ b/lib/lattice/Lattice_coordinate.h @@ -52,23 +52,5 @@ namespace Grid { } }; - // LatticeCoordinate(); - // FIXME for debug; deprecate this; made obscelete by - template void lex_sites(Lattice &l){ - Real *v_ptr = (Real *)&l._odata[0]; - size_t o_len = l._grid->oSites(); - size_t v_len = sizeof(vobj)/sizeof(vRealF); - size_t vec_len = vRealF::Nsimd(); - - for(int i=0;i Date: Fri, 16 Mar 2018 21:37:03 +0000 Subject: [PATCH 325/377] Extra SHM option --- configure.ac | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 3a6a2960..aced6a9c 100644 --- a/configure.ac +++ b/configure.ac @@ -340,7 +340,7 @@ case ${ac_PRECISION} in esac ###################### Shared memory allocation technique under MPI3 -AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|hugetlbfs], +AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|hugetlbfs|shmnone], [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen]) case ${ac_SHM} in @@ -349,6 +349,10 @@ case ${ac_SHM} in AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] ) ;; + shmnone) + AC_DEFINE([GRID_MPI3_SHM_NONE],[1],[GRID_MPI3_SHM_NONE] ) + ;; + hugetlbfs) AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] ) ;; From 01568b0e62d94ac5d79da8c3edd1db9eea5928e3 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 16 Mar 2018 21:54:28 +0000 Subject: [PATCH 326/377] Add a new SHM option --- lib/communicator/SharedMemoryMPI.cc | 44 ++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/lib/communicator/SharedMemoryMPI.cc b/lib/communicator/SharedMemoryMPI.cc index 45edbb07..1fa84dfb 100644 --- a/lib/communicator/SharedMemoryMPI.cc +++ b/lib/communicator/SharedMemoryMPI.cc @@ -226,6 +226,48 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) }; #endif // MMAP +#ifdef GRID_MPI3_SHM_NONE +void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) +{ + std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "< Date: Fri, 16 Mar 2018 21:54:56 +0000 Subject: [PATCH 327/377] 4GB clean the offsets in parallel IO for multifile records --- lib/parallelIO/BinaryIO.h | 34 +++++++++++++++------------- lib/parallelIO/IldgIO.h | 47 +++++++++++++++++++++++++-------------- lib/parallelIO/NerscIO.h | 10 ++++----- 3 files changed, 53 insertions(+), 38 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index b40a75af..39acf0e0 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -91,7 +91,7 @@ class BinaryIO { typedef typename vobj::scalar_object sobj; GridBase *grid = lat._grid; - int lsites = grid->lSites(); + uint64_t lsites = grid->lSites(); std::vector scalardata(lsites); unvectorizeToLexOrdArray(scalardata,lat); @@ -160,7 +160,9 @@ class BinaryIO { /* * Scidac csum is rather more heavyweight + * FIXME -- 128^3 x 256 x 16 will overflow. */ + int global_site; Lexicographic::CoorFromIndex(coor,local_site,local_vol); @@ -261,7 +263,7 @@ class BinaryIO { GridBase *grid, std::vector &iodata, std::string file, - Integer offset, + uint64_t offset, const std::string &format, int control, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -523,7 +525,7 @@ class BinaryIO { static inline void readLatticeObject(Lattice &Umu, std::string file, munger munge, - Integer offset, + uint64_t offset, const std::string &format, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -533,7 +535,7 @@ class BinaryIO { typedef typename vobj::Realified::scalar_type word; word w=0; GridBase *grid = Umu._grid; - int lsites = grid->lSites(); + uint64_t lsites = grid->lSites(); std::vector scalardata(lsites); std::vector iodata(lsites); // Munge, checksum, byte order in here @@ -544,7 +546,7 @@ class BinaryIO { GridStopWatch timer; timer.Start(); - parallel_for(int x=0;xBarrier(); @@ -560,7 +562,7 @@ class BinaryIO { static inline void writeLatticeObject(Lattice &Umu, std::string file, munger munge, - Integer offset, + uint64_t offset, const std::string &format, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -569,7 +571,7 @@ class BinaryIO { typedef typename vobj::scalar_object sobj; typedef typename vobj::Realified::scalar_type word; word w=0; GridBase *grid = Umu._grid; - int lsites = grid->lSites(); + uint64_t lsites = grid->lSites(); std::vector scalardata(lsites); std::vector iodata(lsites); // Munge, checksum, byte order in here @@ -580,7 +582,7 @@ class BinaryIO { GridStopWatch timer; timer.Start(); unvectorizeToLexOrdArray(scalardata,Umu); - parallel_for(int x=0;xBarrier(); timer.Stop(); @@ -597,7 +599,7 @@ class BinaryIO { static inline void readRNG(GridSerialRNG &serial, GridParallelRNG ¶llel, std::string file, - Integer offset, + uint64_t offset, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb) @@ -610,8 +612,8 @@ class BinaryIO { std::string format = "IEEE32BIG"; GridBase *grid = parallel._grid; - int gsites = grid->gSites(); - int lsites = grid->lSites(); + uint64_t gsites = grid->gSites(); + uint64_t lsites = grid->lSites(); uint32_t nersc_csum_tmp = 0; uint32_t scidac_csuma_tmp = 0; @@ -626,7 +628,7 @@ class BinaryIO { nersc_csum,scidac_csuma,scidac_csumb); timer.Start(); - parallel_for(int lidx=0;lidx tmp(RngStateCount); std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin()); parallel.SetState(tmp,lidx); @@ -659,7 +661,7 @@ class BinaryIO { static inline void writeRNG(GridSerialRNG &serial, GridParallelRNG ¶llel, std::string file, - Integer offset, + uint64_t offset, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb) @@ -670,8 +672,8 @@ class BinaryIO { typedef std::array RNGstate; GridBase *grid = parallel._grid; - int gsites = grid->gSites(); - int lsites = grid->lSites(); + uint64_t gsites = grid->gSites(); + uint64_t lsites = grid->lSites(); uint32_t nersc_csum_tmp; uint32_t scidac_csuma_tmp; @@ -684,7 +686,7 @@ class BinaryIO { timer.Start(); std::vector iodata(lsites); - parallel_for(int lidx=0;lidx tmp(RngStateCount); parallel.GetState(tmp,lidx); std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin()); diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index b0bd7e2c..8655b24c 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -337,6 +337,20 @@ class GridLimeWriter : public BinaryIO { template void writeLimeLatticeBinaryObject(Lattice &field,std::string record_name) { + //////////////////////////////////////////////////////////////////// + // NB: FILE and iostream are jointly writing disjoint sequences in the + // the same file through different file handles (integer units). + // + // These are both buffered, so why I think this code is right is as follows. + // + // i) write record header to FILE *File, telegraphing the size; flush + // ii) ftello reads the offset from FILE *File . + // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk. + // Closes iostream and flushes. + // iv) fseek on FILE * to end of this disjoint section. + // v) Continue writing scidac record. + //////////////////////////////////////////////////////////////////// + //////////////////////////////////////////// // Create record header //////////////////////////////////////////// @@ -350,25 +364,24 @@ class GridLimeWriter : public BinaryIO { // std::cout << "W Gsites " <_gsites<(); BinarySimpleMunger munge; - BinaryIO::writeLatticeObject(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); - // fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb); + + /////////////////////////////////////////// + // Wind forward and close the record + /////////////////////////////////////////// + fseek(File,0,SEEK_END); + unt64_t offset2 = ftello(File); // std::cout << " now at offset "<=0); //////////////////////////////////////// diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index 786839f2..e2c2bc39 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -57,7 +57,7 @@ namespace Grid { // for the header-reader static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field) { - int offset=0; + uint64_t offset=0; std::map header; std::string line; @@ -139,7 +139,7 @@ namespace Grid { typedef Lattice > GaugeField; GridBase *grid = Umu._grid; - int offset = readHeader(file,Umu._grid,header); + uint64_t offset = readHeader(file,Umu._grid,header); FieldMetaData clone(header); @@ -236,7 +236,7 @@ namespace Grid { GaugeStatistics(Umu,header); MachineCharacteristics(header); - int offset; + uint64_t offset; truncate(file); @@ -278,7 +278,7 @@ namespace Grid { header.plaquette=0.0; MachineCharacteristics(header); - int offset; + uint64_t offset; #ifdef RNG_RANLUX header.floating_point = std::string("UINT64"); @@ -313,7 +313,7 @@ namespace Grid { GridBase *grid = parallel._grid; - int offset = readHeader(file,grid,header); + uint64_t offset = readHeader(file,grid,header); FieldMetaData clone(header); From e1dcfd35538cf0e7ebe39d0c5f55c7427b921d38 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 16 Mar 2018 23:10:47 +0000 Subject: [PATCH 328/377] typo fix --- lib/parallelIO/IldgIO.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index 8655b24c..b81d1e43 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -378,7 +378,7 @@ class GridLimeWriter : public BinaryIO { // Wind forward and close the record /////////////////////////////////////////// fseek(File,0,SEEK_END); - unt64_t offset2 = ftello(File); // std::cout << " now at offset "< Date: Sat, 17 Mar 2018 09:35:01 +0000 Subject: [PATCH 329/377] Drop RB on coarse space ; that was a mistake --- tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index 3dff4b90..b55b66d9 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -180,7 +180,6 @@ int main (int argc, char ** argv) { GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4); GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4); - GridRedBlackCartesian * CoarseGrid5rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid5); // Gauge field LatticeGaugeField Umu(UGrid); @@ -206,7 +205,7 @@ int main (int argc, char ** argv) { const int nbasis= 60; assert(nbasis==Ns1); - LocalCoherenceLanczosScidac _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd); + LocalCoherenceLanczosScidac _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd); std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; assert( (Params.doFine)||(Params.doFineRead)); From b1a38bde7ac133f984ee177583a93d918b34d26c Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 20 Mar 2018 18:01:32 +0000 Subject: [PATCH 330/377] Extra test for Gparity with plaquette action --- tests/forces/Test_gp_plaq_force.cc | 123 +++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 tests/forces/Test_gp_plaq_force.cc diff --git a/tests/forces/Test_gp_plaq_force.cc b/tests/forces/Test_gp_plaq_force.cc new file mode 100644 index 00000000..e121f21b --- /dev/null +++ b/tests/forces/Test_gp_plaq_force.cc @@ -0,0 +1,123 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_gp_rect_force.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); + + int threads = GridThread::GetThreads(); + std::cout< seeds({1,2,3,4}); + + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + + LatticeGaugeField U(&Grid); + + SU3::HotConfiguration(pRNG,U); + + double beta = 1.0; + double c1 = 0.331; + + //ConjugatePlaqPlusRectangleActionR Action(beta,c1); + ConjugateWilsonGaugeActionR Action(beta); + //WilsonGaugeActionR Action(beta); + + ComplexD S = Action.S(U); + + // get the deriv of phidag MdagM phi with respect to "U" + LatticeGaugeField UdSdU(&Grid); + + Action.deriv(U,UdSdU); + + //////////////////////////////////// + // Modify the gauge field a little + //////////////////////////////////// + RealD dt = 0.0001; + + LatticeColourMatrix mommu(&Grid); + LatticeColourMatrix forcemu(&Grid); + LatticeGaugeField mom(&Grid); + LatticeGaugeField Uprime(&Grid); + + for(int mu=0;mu(mom,mommu,mu); + + // fourth order exponential approx + parallel_for(auto i=mom.begin();i(UdSdU,mu); + mommu = PeekIndex(mom,mu); + + // Update gauge action density + // U = exp(p dt) U + // dU/dt = p U + // so dSdt = trace( dUdt dSdU) = trace( p UdSdUmu ) + + dS = dS - trace(mommu*UdSdUmu)*dt*2.0; + + } + ComplexD dSpred = sum(dS); + + std::cout << GridLogMessage << " S "< Date: Tue, 20 Mar 2018 18:16:15 +0000 Subject: [PATCH 331/377] Put a username in the path --- lib/communicator/SharedMemoryMPI.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/communicator/SharedMemoryMPI.cc b/lib/communicator/SharedMemoryMPI.cc index 1fa84dfb..8eebdc0f 100644 --- a/lib/communicator/SharedMemoryMPI.cc +++ b/lib/communicator/SharedMemoryMPI.cc @@ -27,6 +27,7 @@ Author: Peter Boyle /* END LEGAL */ #include +#include namespace Grid { @@ -288,7 +289,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) size_t size = bytes; - sprintf(shm_name,"/myGrid_mpi3_shm_%d_%d",WorldNode,r); + struct passwd *pw = getpwuid (getuid()); + sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r); shm_unlink(shm_name); int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); From 60b57706c4bd264b39a765835d2cff0c19722bf0 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 21 Mar 2018 13:57:30 +0000 Subject: [PATCH 332/377] Small bug fix in the shm file names --- lib/communicator/SharedMemoryMPI.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/communicator/SharedMemoryMPI.cc b/lib/communicator/SharedMemoryMPI.cc index 8eebdc0f..d534a6d9 100644 --- a/lib/communicator/SharedMemoryMPI.cc +++ b/lib/communicator/SharedMemoryMPI.cc @@ -325,7 +325,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) size_t size = bytes ; - sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r); + struct passwd *pw = getpwuid (getuid()); + sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r); int fd=shm_open(shm_name,O_RDWR,0666); if ( fd<0 ) { perror("failed shm_open"); assert(0); } From 07fe7d0cbe4de70bbfcfe3dbe60c2cedf1b8390c Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 21 Mar 2018 14:26:04 +0000 Subject: [PATCH 333/377] Save file in current dir; print checksums --- tests/Test_dwf_mixedcg_prec.cc | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/Test_dwf_mixedcg_prec.cc b/tests/Test_dwf_mixedcg_prec.cc index 2601b76c..0a8d6540 100644 --- a/tests/Test_dwf_mixedcg_prec.cc +++ b/tests/Test_dwf_mixedcg_prec.cc @@ -103,6 +103,27 @@ int main (int argc, char ** argv) std::cout << "Diff between mixed and regular CG: " << diff << std::endl; + std::string file1("./Propagator1"); + std::string file2("./Propagator2"); + emptyUserRecord record; + uint32_t nersc_csum; + uint32_t scidac_csuma; + uint32_t scidac_csumb; + typedef SpinColourVectorD FermionD; + typedef vSpinColourVectorD vFermionD; + + BinarySimpleMunger munge; + std::string format = getFormatString(); + BinaryIO::writeLatticeObject(result_o,file1,munge, 0, format, + nersc_csum,scidac_csuma,scidac_csumb); + + std::cout << " Mixed checksums "<(result_o_2,file1,munge, 0, format, + nersc_csum,scidac_csuma,scidac_csumb); + + std::cout << " CG checksums "< Date: Wed, 21 Mar 2018 20:38:19 -0400 Subject: [PATCH 334/377] Add dimension match check to precisionChange. --- lib/lattice/Lattice_transfer.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 32c15d22..44f0337d 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -652,6 +652,7 @@ vectorizeFromLexOrdArray( std::vector &in, Lattice &out) template void precisionChange(Lattice &out, const Lattice &in){ assert(out._grid->Nd() == in._grid->Nd()); + assert(out._grid->FullDimensions() == in._grid->FullDimensions()); out.checkerboard = in.checkerboard; GridBase *in_grid=in._grid; GridBase *out_grid = out._grid; From 68168bf72dccbf6e1eb89f9be8c7479bd0dc2a7d Mon Sep 17 00:00:00 2001 From: Dan H Date: Wed, 21 Mar 2018 20:51:38 -0400 Subject: [PATCH 335/377] Revert "Add dimension match check to precisionChange." This reverts commit 8f601d9b39d3635f9972fb5f7326a905780bda5f. --- lib/lattice/Lattice_transfer.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 44f0337d..32c15d22 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -652,7 +652,6 @@ vectorizeFromLexOrdArray( std::vector &in, Lattice &out) template void precisionChange(Lattice &out, const Lattice &in){ assert(out._grid->Nd() == in._grid->Nd()); - assert(out._grid->FullDimensions() == in._grid->FullDimensions()); out.checkerboard = in.checkerboard; GridBase *in_grid=in._grid; GridBase *out_grid = out._grid; From ccde8b817f7b906150ca581b860d25a515fd8a96 Mon Sep 17 00:00:00 2001 From: Dan H Date: Wed, 21 Mar 2018 20:58:04 -0400 Subject: [PATCH 336/377] Add dimension check to precisionChange. --- lib/lattice/Lattice_transfer.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 32c15d22..44f0337d 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -652,6 +652,7 @@ vectorizeFromLexOrdArray( std::vector &in, Lattice &out) template void precisionChange(Lattice &out, const Lattice &in){ assert(out._grid->Nd() == in._grid->Nd()); + assert(out._grid->FullDimensions() == in._grid->FullDimensions()); out.checkerboard = in.checkerboard; GridBase *in_grid=in._grid; GridBase *out_grid = out._grid; From 5f8225461b51ae27587a7a25685e9c823ee682f6 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 23 Mar 2018 10:37:58 +0000 Subject: [PATCH 337/377] Fencing mixedcg test propagator write. LIME is still optional in Grid --- tests/Test_dwf_mixedcg_prec.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/Test_dwf_mixedcg_prec.cc b/tests/Test_dwf_mixedcg_prec.cc index 0a8d6540..84849ff9 100644 --- a/tests/Test_dwf_mixedcg_prec.cc +++ b/tests/Test_dwf_mixedcg_prec.cc @@ -103,6 +103,7 @@ int main (int argc, char ** argv) std::cout << "Diff between mixed and regular CG: " << diff << std::endl; + #ifdef HAVE_LIME std::string file1("./Propagator1"); std::string file2("./Propagator2"); emptyUserRecord record; @@ -124,6 +125,8 @@ int main (int argc, char ** argv) nersc_csum,scidac_csuma,scidac_csumb); std::cout << " CG checksums "< Date: Fri, 23 Mar 2018 11:14:23 +0000 Subject: [PATCH 338/377] Fix to pass CI tests --- tests/Test_dwf_mixedcg_prec.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/Test_dwf_mixedcg_prec.cc b/tests/Test_dwf_mixedcg_prec.cc index 84849ff9..a53d8921 100644 --- a/tests/Test_dwf_mixedcg_prec.cc +++ b/tests/Test_dwf_mixedcg_prec.cc @@ -104,6 +104,8 @@ int main (int argc, char ** argv) std::cout << "Diff between mixed and regular CG: " << diff << std::endl; #ifdef HAVE_LIME + if( GridCmdOptionExists(argv,argv+argc,"--checksums") ){ + std::string file1("./Propagator1"); std::string file2("./Propagator2"); emptyUserRecord record; @@ -125,6 +127,7 @@ int main (int argc, char ** argv) nersc_csum,scidac_csuma,scidac_csumb); std::cout << " CG checksums "< Date: Fri, 23 Mar 2018 11:27:56 +0000 Subject: [PATCH 339/377] Changes in messages in test dwf mixedprec --- tests/Test_dwf_mixedcg_prec.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/Test_dwf_mixedcg_prec.cc b/tests/Test_dwf_mixedcg_prec.cc index a53d8921..92567b6f 100644 --- a/tests/Test_dwf_mixedcg_prec.cc +++ b/tests/Test_dwf_mixedcg_prec.cc @@ -49,6 +49,8 @@ int main (int argc, char ** argv) const int Ls=8; + std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl; + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); @@ -90,24 +92,23 @@ int main (int argc, char ** argv) SchurDiagMooeeOperator HermOpEO(Ddwf); SchurDiagMooeeOperator HermOpEO_f(Ddwf_f); - std::cout << "Starting mixed CG" << std::endl; + std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl; MixedPrecisionConjugateGradient mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); mCG(src_o,result_o); - std::cout << "Starting regular CG" << std::endl; + std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl; ConjugateGradient CG(1.0e-8,10000); CG(HermOpEO,src_o,result_o_2); LatticeFermionD diff_o(FrbGrid); RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2); - std::cout << "Diff between mixed and regular CG: " << diff << std::endl; + std::cout << GridLogMessage << "::::::::::::: Diff between mixed and regular CG: " << diff << std::endl; #ifdef HAVE_LIME if( GridCmdOptionExists(argv,argv+argc,"--checksums") ){ std::string file1("./Propagator1"); - std::string file2("./Propagator2"); emptyUserRecord record; uint32_t nersc_csum; uint32_t scidac_csuma; @@ -121,12 +122,12 @@ int main (int argc, char ** argv) BinaryIO::writeLatticeObject(result_o,file1,munge, 0, format, nersc_csum,scidac_csuma,scidac_csumb); - std::cout << " Mixed checksums "<(result_o_2,file1,munge, 0, format, nersc_csum,scidac_csuma,scidac_csumb); - std::cout << " CG checksums "< Date: Thu, 29 Mar 2018 19:57:41 +0100 Subject: [PATCH 340/377] I/O benchmark --- benchmarks/Benchmark_IO.cc | 101 +++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 benchmarks/Benchmark_IO.cc diff --git a/benchmarks/Benchmark_IO.cc b/benchmarks/Benchmark_IO.cc new file mode 100644 index 00000000..26aac7ae --- /dev/null +++ b/benchmarks/Benchmark_IO.cc @@ -0,0 +1,101 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +#define MSG cout << GridLogMessage +#define SEP \ +"=============================================================================" +#ifndef BENCH_IO_LMAX +#define BENCH_IO_LMAX 40 +#endif + +typedef function WriterFn; +typedef function ReaderFn; + +string filestem(const int l) +{ + return "iobench_l" + to_string(l); +} + +void limeWrite(const string filestem, LatticeFermion &vec) +{ + emptyUserRecord record; + ScidacWriter binWriter; + + binWriter.open(filestem + ".bin"); + binWriter.writeScidacFieldRecord(vec, record); + binWriter.close(); +} + +void limeRead(LatticeFermion &vec, const string filestem) +{ + emptyUserRecord record; + ScidacReader binReader; + + binReader.open(filestem + ".bin"); + binReader.readScidacFieldRecord(vec, record); + binReader.close(); +} + +void writeBenchmark(const int l, const WriterFn &write) +{ + auto mpi = GridDefaultMpi(); + auto simd = GridDefaultSimd(Nd, vComplex::Nsimd()); + vector latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; + unique_ptr gPt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi)); + GridCartesian *g = gPt.get(); + GridParallelRNG rng(g); + LatticeFermion vec(g); + emptyUserRecord record; + ScidacWriter binWriter; + + cout << "-- Local volume " << l << "^4" << endl; + random(rng, vec); + write(filestem(l), vec); +} + +void readBenchmark(const int l, const ReaderFn &read) +{ + auto mpi = GridDefaultMpi(); + auto simd = GridDefaultSimd(Nd, vComplex::Nsimd()); + vector latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; + unique_ptr gPt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi)); + GridCartesian *g = gPt.get(); + LatticeFermion vec(g); + emptyUserRecord record; + ScidacReader binReader; + + cout << "-- Local volume " << l << "^4" << endl; + read(vec, filestem(l)); +} + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + auto simd = GridDefaultSimd(Nd,vComplex::Nsimd()); + auto mpi = GridDefaultMpi(); + + int64_t threads = GridThread::GetThreads(); + MSG << "Grid is setup to use " << threads << " threads" << endl; + MSG << SEP << endl; + MSG << "Benchmark Lime write" << endl; + MSG << SEP << endl; + for (int l = 4; l <= BENCH_IO_LMAX; l += 2) + { + writeBenchmark(l, limeWrite); + } + + MSG << "Benchmark Lime read" << endl; + MSG << SEP << endl; + for (int l = 4; l <= BENCH_IO_LMAX; l += 2) + { + readBenchmark(l, limeRead); + } + + Grid_finalize(); + + return EXIT_SUCCESS; +} \ No newline at end of file From 2f5add4d5f53d449002ca100a82b7f9c9a16c572 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 30 Mar 2018 12:30:58 +0100 Subject: [PATCH 341/377] Creation of file --- lib/parallelIO/BinaryIO.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index 39acf0e0..45fd522e 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -372,7 +372,7 @@ class BinaryIO { std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : " << iodata.size() * sizeof(fobj) << " bytes" << std::endl; std::ifstream fin; - fin.open(file, std::ios::binary | std::ios::in); + fin.open(file, std::ios::binary | std::ios::in); if (control & BINARYIO_MASTER_APPEND) { fin.seekg(-sizeof(fobj), fin.end); @@ -453,11 +453,15 @@ class BinaryIO { std::ofstream fout; fout.exceptions ( std::fstream::failbit | std::fstream::badbit ); try { - fout.open(file,std::ios::binary|std::ios::out|std::ios::in); + if (offset) { // Must already exist and contain data + fout.open(file,std::ios::binary|std::ios::out|std::ios::in); + } else { // Allow create + fout.open(file,std::ios::binary|std::ios::out); + } } catch (const std::fstream::failure& exc) { std::cout << GridLogError << "Error in opening the file " << file << " for output" < Date: Fri, 30 Mar 2018 12:41:30 +0100 Subject: [PATCH 342/377] Barrier required in parallel when we use ftell --- lib/parallelIO/IldgIO.h | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index b81d1e43..e29b1d39 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -350,26 +350,36 @@ class GridLimeWriter : public BinaryIO { // iv) fseek on FILE * to end of this disjoint section. // v) Continue writing scidac record. //////////////////////////////////////////////////////////////////// - + + GridBase *grid = field._grid; //////////////////////////////////////////// // Create record header //////////////////////////////////////////// typedef typename vobj::scalar_object sobj; int err; uint32_t nersc_csum,scidac_csuma,scidac_csumb; - uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites; + uint64_t PayloadSize = sizeof(sobj) * grid->_gsites; createLimeRecordHeader(record_name, 0, 0, PayloadSize); - + fflush(File); + // std::cout << "W sizeof(sobj)" <_gsites<Broadcast(0,(void *)&compare,sizeof(compare)); + + assert(compare == offset1 ); /////////////////////////////////////////// // Write by other means into the binary record /////////////////////////////////////////// - uint64_t offset1 = ftello(File); // std::cout << " Writing to offset "<(); BinarySimpleMunger munge; BinaryIO::writeLatticeObject(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb); @@ -380,7 +390,15 @@ class GridLimeWriter : public BinaryIO { fseek(File,0,SEEK_END); uint64_t offset2 = ftello(File); // std::cout << " now at offset "<Barrier(); + + ///////////////////////////////////////////////////////////// + // Check MPI-2 I/O did what we expect to file + ///////////////////////////////////////////////////////////// + assert( (offset2-offset1) == PayloadSize); err=limeWriterCloseRecord(LimeW); assert(err>=0); From ab6afd18ac2b6431544f92d147a530a7e21022b6 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 30 Mar 2018 13:39:20 +0100 Subject: [PATCH 343/377] Still compile if no LIME --- benchmarks/Benchmark_IO.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/benchmarks/Benchmark_IO.cc b/benchmarks/Benchmark_IO.cc index 26aac7ae..c8f2692a 100644 --- a/benchmarks/Benchmark_IO.cc +++ b/benchmarks/Benchmark_IO.cc @@ -1,3 +1,4 @@ +#ifdef HAVE_LIME #include using namespace std; @@ -98,4 +99,11 @@ int main (int argc, char ** argv) Grid_finalize(); return EXIT_SUCCESS; -} \ No newline at end of file +} +#else +#include +int main (int argc, char ** argv) +{ + return EXIT_SUCCESS; +} +#endif From a13c1091111d348ffe261c2c63863179d73ff2d0 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 30 Mar 2018 16:03:01 +0100 Subject: [PATCH 344/377] deterministic initialisation of field metadata --- lib/parallelIO/IldgIOtypes.h | 5 +++-- lib/parallelIO/MetaData.h | 18 ++++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/lib/parallelIO/IldgIOtypes.h b/lib/parallelIO/IldgIOtypes.h index 5b397e14..281b20f2 100644 --- a/lib/parallelIO/IldgIOtypes.h +++ b/lib/parallelIO/IldgIOtypes.h @@ -136,8 +136,9 @@ struct scidacRecord : Serializable { int, typesize, int, datacount); - scidacRecord() { version =1.0; } - + scidacRecord() + : version(1.0), recordtype(0), colors(0), spins(0), typesize(0), datacount(0) + {} }; //////////////////////// diff --git a/lib/parallelIO/MetaData.h b/lib/parallelIO/MetaData.h index ccc8b18f..55254786 100644 --- a/lib/parallelIO/MetaData.h +++ b/lib/parallelIO/MetaData.h @@ -81,18 +81,16 @@ namespace Grid { std::string, creation_date, std::string, archive_date, std::string, floating_point); - FieldMetaData(void) { - nd=4; - dimension.resize(4); - boundary.resize(4); - scidac_checksuma=0; - scidac_checksumb=0; - checksum=0; - } + // WARNING: non-initialised values might lead to twisted parallel IO + // issues, std::string are fine because they initliase to size 0 + // as per C++ standard. + FieldMetaData(void) + : nd(4), dimension(4,0), boundary(4, ""), data_start(0), + link_trace(0.), plaquette(0.), checksum(0), + scidac_checksuma(0), scidac_checksumb(0), sequence_number(0) + {} }; - - namespace QCD { using namespace Grid; From 276f113f288de9030468f84b8a0fef81a763ecbf Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 30 Mar 2018 16:17:05 +0100 Subject: [PATCH 345/377] IO uses master boss node for metadata. --- benchmarks/Benchmark_IO.cc | 7 +- lib/parallelIO/IldgIO.h | 144 ++++++++++-------- lib/qcd/hmc/checkpointers/ILDGCheckpointer.h | 4 +- tests/IO/Test_ildg_io.cc | 2 +- .../Test_dwf_compressed_lanczos_reorg.cc | 4 +- tests/solver/Test_dwf_mrhs_cg.cc | 4 +- 6 files changed, 94 insertions(+), 71 deletions(-) diff --git a/benchmarks/Benchmark_IO.cc b/benchmarks/Benchmark_IO.cc index c8f2692a..479ae037 100644 --- a/benchmarks/Benchmark_IO.cc +++ b/benchmarks/Benchmark_IO.cc @@ -1,5 +1,5 @@ -#ifdef HAVE_LIME #include +#ifdef HAVE_LIME using namespace std; using namespace Grid; @@ -23,7 +23,7 @@ string filestem(const int l) void limeWrite(const string filestem, LatticeFermion &vec) { emptyUserRecord record; - ScidacWriter binWriter; + ScidacWriter binWriter(vec._grid->IsBoss()); binWriter.open(filestem + ".bin"); binWriter.writeScidacFieldRecord(vec, record); @@ -50,7 +50,7 @@ void writeBenchmark(const int l, const WriterFn &write) GridParallelRNG rng(g); LatticeFermion vec(g); emptyUserRecord record; - ScidacWriter binWriter; + ScidacWriter binWriter(g->IsBoss()); cout << "-- Local volume " << l << "^4" << endl; random(rng, vec); @@ -101,7 +101,6 @@ int main (int argc, char ** argv) return EXIT_SUCCESS; } #else -#include int main (int argc, char ** argv) { return EXIT_SUCCESS; diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index e29b1d39..d1a684f3 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -272,8 +272,10 @@ class GridLimeReader : public BinaryIO { } }; -class GridLimeWriter : public BinaryIO { +class GridLimeWriter : public BinaryIO +{ public: + /////////////////////////////////////////////////// // FIXME: format for RNG? Now just binary out instead // FIXME: collective calls or not ? @@ -282,17 +284,24 @@ class GridLimeWriter : public BinaryIO { FILE *File; LimeWriter *LimeW; std::string filename; - + bool boss_node; + GridLimeWriter( bool isboss = true) { + boss_node = isboss; + } void open(const std::string &_filename) { filename= _filename; - File = fopen(filename.c_str(), "w"); - LimeW = limeCreateWriter(File); assert(LimeW != NULL ); + if ( boss_node ) { + File = fopen(filename.c_str(), "w"); + LimeW = limeCreateWriter(File); assert(LimeW != NULL ); + } } ///////////////////////////////////////////// // Close the file ///////////////////////////////////////////// void close(void) { - fclose(File); + if ( boss_node ) { + fclose(File); + } // limeDestroyWriter(LimeW); } /////////////////////////////////////////////////////// @@ -300,10 +309,12 @@ class GridLimeWriter : public BinaryIO { /////////////////////////////////////////////////////// int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize) { - LimeRecordHeader *h; - h = limeCreateHeader(MB, ME, const_cast(message.c_str()), PayloadSize); - assert(limeWriteRecordHeader(h, LimeW) >= 0); - limeDestroyHeader(h); + if ( boss_node ) { + LimeRecordHeader *h; + h = limeCreateHeader(MB, ME, const_cast(message.c_str()), PayloadSize); + assert(limeWriteRecordHeader(h, LimeW) >= 0); + limeDestroyHeader(h); + } return LIME_SUCCESS; } //////////////////////////////////////////// @@ -312,28 +323,31 @@ class GridLimeWriter : public BinaryIO { template void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name) { - std::string xmlstring; - { - XmlWriter WR("",""); - write(WR,object_name,object); - xmlstring = WR.XmlString(); + if ( boss_node ) { + std::string xmlstring; + { + XmlWriter WR("",""); + write(WR,object_name,object); + xmlstring = WR.XmlString(); + } + // std::cout << "WriteLimeObject" << record_name <(record_name.c_str()), nbytes); + assert(h!= NULL); + + err=limeWriteRecordHeader(h, LimeW); assert(err>=0); + err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); + err=limeWriterCloseRecord(LimeW); assert(err>=0); + limeDestroyHeader(h); } - // std::cout << "WriteLimeObject" << record_name <(record_name.c_str()), nbytes); - assert(h!= NULL); - - err=limeWriteRecordHeader(h, LimeW); assert(err>=0); - err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); - err=limeWriterCloseRecord(LimeW); assert(err>=0); - limeDestroyHeader(h); - // std::cout << " File offset is now"< void writeLimeLatticeBinaryObject(Lattice &field,std::string record_name) { @@ -352,6 +366,8 @@ class GridLimeWriter : public BinaryIO { //////////////////////////////////////////////////////////////////// GridBase *grid = field._grid; + assert(boss_node == field._grid->IsBoss() ); + //////////////////////////////////////////// // Create record header //////////////////////////////////////////// @@ -359,8 +375,10 @@ class GridLimeWriter : public BinaryIO { int err; uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint64_t PayloadSize = sizeof(sobj) * grid->_gsites; - createLimeRecordHeader(record_name, 0, 0, PayloadSize); - fflush(File); + if ( boss_node ) { + createLimeRecordHeader(record_name, 0, 0, PayloadSize); + fflush(File); + } // std::cout << "W sizeof(sobj)" <_gsites<Broadcast(0,(void *)&compare,sizeof(compare)); - - assert(compare == offset1 ); + uint64_t offset1; + if ( boss_node ) { + offset1 = ftello(File); + } + grid->Broadcast(0,(void *)&offset1,sizeof(offset1)); /////////////////////////////////////////// - // Write by other means into the binary record + // The above is collective. Write by other means into the binary record /////////////////////////////////////////// - std::string format = getFormatString(); BinarySimpleMunger munge; BinaryIO::writeLatticeObject(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb); @@ -387,21 +403,19 @@ class GridLimeWriter : public BinaryIO { /////////////////////////////////////////// // Wind forward and close the record /////////////////////////////////////////// - fseek(File,0,SEEK_END); - uint64_t offset2 = ftello(File); // std::cout << " now at offset "<Barrier(); + if ( boss_node ) { + fseek(File,0,SEEK_END); + uint64_t offset2 = ftello(File); // std::cout << " now at offset "<=0); + if ( boss_node ) { + err=limeWriterCloseRecord(LimeW); assert(err>=0); + } //////////////////////////////////////// // Write checksum element, propagaing forward from the BinaryIO // Always pair a checksum with a binary object, and close message @@ -411,21 +425,26 @@ class GridLimeWriter : public BinaryIO { std::stringstream streamb; streamb << std::hex << scidac_csumb; checksum.suma= streama.str(); checksum.sumb= streamb.str(); - // std::cout << GridLogMessage<<" writing scidac checksums "< - void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile) - { - scidacFile _scidacFile(grid); - writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); - writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); - } + ScidacWriter(bool isboss =true ) : GridLimeWriter(isboss) { }; + + template + void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile) + { + scidacFile _scidacFile(grid); + if ( this->boss_node ) { + writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); + writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); + } + } //////////////////////////////////////////////// // Write generic lattice field in scidac format //////////////////////////////////////////////// @@ -446,9 +465,12 @@ class ScidacWriter : public GridLimeWriter { ////////////////////////////////////////////// // Fill the Lime file record by record ////////////////////////////////////////////// - writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message - writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); - writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); + if ( this->boss_node ) { + writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message + writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); + writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); + } + // Collective call writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum } }; @@ -515,6 +537,8 @@ class ScidacReader : public GridLimeReader { class IldgWriter : public ScidacWriter { public: + + IldgWriter(bool isboss) : ScidacWriter(isboss) {}; /////////////////////////////////// // A little helper diff --git a/lib/qcd/hmc/checkpointers/ILDGCheckpointer.h b/lib/qcd/hmc/checkpointers/ILDGCheckpointer.h index 3bcdc77a..9bcc33df 100644 --- a/lib/qcd/hmc/checkpointers/ILDGCheckpointer.h +++ b/lib/qcd/hmc/checkpointers/ILDGCheckpointer.h @@ -74,10 +74,10 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer { if ((traj % Params.saveInterval) == 0) { std::string config, rng; this->build_filenames(traj, Params, config, rng); - + GridBase *grid = U._grid; uint32_t nersc_csum,scidac_csuma,scidac_csumb; BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); - IldgWriter _IldgWriter; + IldgWriter _IldgWriter(grid->IsBoss()); _IldgWriter.open(config); _IldgWriter.writeConfiguration(U, traj, config, config); _IldgWriter.close(); diff --git a/tests/IO/Test_ildg_io.cc b/tests/IO/Test_ildg_io.cc index 6aac2e38..55dd93b8 100644 --- a/tests/IO/Test_ildg_io.cc +++ b/tests/IO/Test_ildg_io.cc @@ -79,7 +79,7 @@ int main (int argc, char ** argv) std::cout <subspace.size()==nbasis); emptyUserRecord record; - Grid::QCD::ScidacWriter WR; + Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss()); WR.open(evecs_file); for(int k=0;ksubspace[k],record); @@ -96,7 +96,7 @@ public: { int n = this->evec_coarse.size(); emptyUserRecord record; - Grid::QCD::ScidacWriter WR; + Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss()); WR.open(evecs_file); for(int k=0;kevec_coarse[k],record); diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc index 207e1331..72ba3d26 100644 --- a/tests/solver/Test_dwf_mrhs_cg.cc +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -114,7 +114,7 @@ int main (int argc, char ** argv) { FGrid->Barrier(); - ScidacWriter _ScidacWriter; + ScidacWriter _ScidacWriter(FGrid->IsBoss()); _ScidacWriter.open(file); std::cout << GridLogMessage << "****************************************************************** "<IsBoss()); _ScidacWriter.open(filefn.str()); _ScidacWriter.writeScidacFieldRecord(src[n],record); _ScidacWriter.close(); From eddf023b8a9446fccfbf9bf0648defe35d6969b3 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 6 Apr 2018 16:17:22 +0100 Subject: [PATCH 346/377] pugixml 1.9 update --- lib/pugixml/README.md | 44 - lib/pugixml/pugiconfig.hpp | 11 +- lib/pugixml/pugixml.cc | 3189 ++++++++++++++++++++---------------- lib/pugixml/pugixml.h | 201 ++- lib/pugixml/readme.txt | 6 +- lib/serialisation/XmlIO.cc | 65 +- lib/serialisation/XmlIO.h | 9 +- 7 files changed, 1935 insertions(+), 1590 deletions(-) delete mode 100644 lib/pugixml/README.md diff --git a/lib/pugixml/README.md b/lib/pugixml/README.md deleted file mode 100644 index 9d8a935f..00000000 --- a/lib/pugixml/README.md +++ /dev/null @@ -1,44 +0,0 @@ -pugixml [![Build Status](https://travis-ci.org/zeux/pugixml.svg?branch=master)](https://travis-ci.org/zeux/pugixml) [![Build status](https://ci.appveyor.com/api/projects/status/9hdks1doqvq8pwe7/branch/master?svg=true)](https://ci.appveyor.com/project/zeux/pugixml) -======= - -pugixml is a C++ XML processing library, which consists of a DOM-like interface with rich traversal/modification -capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0 -implementation for complex data-driven tree queries. Full Unicode support is also available, with Unicode interface -variants and conversions between different Unicode encodings (which happen automatically during parsing/saving). - -pugixml is used by a lot of projects, both open-source and proprietary, for performance and easy-to-use interface. - -## Documentation - -Documentation for the current release of pugixml is available on-line as two separate documents: - -* [Quick-start guide](http://pugixml.org/docs/quickstart.html), that aims to provide enough information to start using the library; -* [Complete reference manual](http://pugixml.org/docs/manual.html), that describes all features of the library in detail. - -You’re advised to start with the quick-start guide; however, many important library features are either not described in it at all or only mentioned briefly; if you require more information you should read the complete manual. - -## License -This library is available to anybody free of charge, under the terms of MIT License: - -Copyright (c) 2006-2015 Arseny Kapoulkine - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without -restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. diff --git a/lib/pugixml/pugiconfig.hpp b/lib/pugixml/pugiconfig.hpp index 5ee5131f..f739e062 100644 --- a/lib/pugixml/pugiconfig.hpp +++ b/lib/pugixml/pugiconfig.hpp @@ -1,7 +1,7 @@ /** - * pugixml parser - version 1.6 + * pugixml parser - version 1.9 * -------------------------------------------------------- - * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at http://pugixml.org/ * * This library is distributed under the MIT License. See notice at the end @@ -17,6 +17,9 @@ // Uncomment this to enable wchar_t mode // #define PUGIXML_WCHAR_MODE +// Uncomment this to enable compact mode +// #define PUGIXML_COMPACT + // Uncomment this to disable XPath // #define PUGIXML_NO_XPATH @@ -46,7 +49,7 @@ #endif /** - * Copyright (c) 2006-2015 Arseny Kapoulkine + * Copyright (c) 2006-2018 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -59,7 +62,7 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND diff --git a/lib/pugixml/pugixml.cc b/lib/pugixml/pugixml.cc index a4f8fde2..dd08092c 100644 --- a/lib/pugixml/pugixml.cc +++ b/lib/pugixml/pugixml.cc @@ -1,7 +1,7 @@ /** - * pugixml parser - version 1.6 + * pugixml parser - version 1.9 * -------------------------------------------------------- - * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at http://pugixml.org/ * * This library is distributed under the MIT License. See notice at the end @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef PUGIXML_WCHAR_MODE # include @@ -28,9 +29,6 @@ #ifndef PUGIXML_NO_XPATH # include # include -# ifdef PUGIXML_NO_EXCEPTIONS -# include -# endif #endif #ifndef PUGIXML_NO_STL @@ -46,14 +44,17 @@ # pragma warning(push) # pragma warning(disable: 4127) // conditional expression is constant # pragma warning(disable: 4324) // structure was padded due to __declspec(align()) -# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable # pragma warning(disable: 4702) // unreachable code # pragma warning(disable: 4996) // this function or variable may be unsafe -# pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged +#endif + +#if defined(_MSC_VER) && defined(__c2__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe #endif #ifdef __INTEL_COMPILER -# pragma warning(disable: 177) // function was declared but never referenced +# pragma warning(disable: 177) // function was declared but never referenced # pragma warning(disable: 279) // controlling expression is constant # pragma warning(disable: 1478 1786) // function was declared "deprecated" # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type @@ -75,17 +76,21 @@ # pragma diag_suppress=237 // controlling expression is constant #endif +#ifdef __TI_COMPILER_VERSION__ +# pragma diag_suppress 179 // function was declared but never referenced +#endif + // Inlining controls #if defined(_MSC_VER) && _MSC_VER >= 1300 # define PUGI__NO_INLINE __declspec(noinline) #elif defined(__GNUC__) # define PUGI__NO_INLINE __attribute__((noinline)) #else -# define PUGI__NO_INLINE +# define PUGI__NO_INLINE #endif // Branch weight controls -#if defined(__GNUC__) +#if defined(__GNUC__) && !defined(__c2__) # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0) #else # define PUGI__UNLIKELY(cond) (cond) @@ -101,10 +106,29 @@ # define PUGI__DMC_VOLATILE #endif +// Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings +#if defined(__clang__) && defined(__has_attribute) +# if __has_attribute(no_sanitize) +# define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow"))) +# else +# define PUGI__UNSIGNED_OVERFLOW +# endif +#else +# define PUGI__UNSIGNED_OVERFLOW +#endif + // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all) #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST) using std::memcpy; using std::memmove; +using std::memset; +#endif + +// Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations +#if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX) +# define LLONG_MIN (-LLONG_MAX - 1LL) +# define LLONG_MAX __LONG_LONG_MAX__ +# define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL) #endif // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features @@ -112,6 +136,16 @@ using std::memmove; # define PUGI__MSVC_CRT_VERSION _MSC_VER #endif +// Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size. +#if __cplusplus >= 201103 +# define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__) +#elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 +# define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__) +#else +# define PUGI__SNPRINTF sprintf +#endif + +// We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat. #ifdef PUGIXML_HEADER_ONLY # define PUGI__NS_BEGIN namespace pugi { namespace impl { # define PUGI__NS_END } } @@ -130,9 +164,7 @@ using std::memmove; #endif // uintptr_t -#if !defined(_MSC_VER) || _MSC_VER >= 1600 -# include -#else +#if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561) namespace pugi { # ifndef _UINTPTR_T_DEFINED @@ -143,6 +175,8 @@ namespace pugi typedef unsigned __int16 uint16_t; typedef unsigned __int32 uint32_t; } +#else +# include #endif // Memory allocation @@ -204,7 +238,7 @@ PUGI__NS_BEGIN for (size_t i = 0; i < count; ++i) if (lhs[i] != rhs[i]) return false; - + return lhs[count] == 0; } @@ -221,21 +255,14 @@ PUGI__NS_BEGIN return static_cast(end - s); #endif } - -#ifdef PUGIXML_WCHAR_MODE - // Convert string to wide string, assuming all symbols are ASCII - PUGI__FN void widen_ascii(wchar_t* dest, const char* source) - { - for (const char* i = source; *i; ++i) *dest++ = *i; - *dest = 0; - } -#endif PUGI__NS_END // auto_ptr-like object for exception recovery PUGI__NS_BEGIN - template struct auto_deleter + template struct auto_deleter { + typedef void (*D)(T*); + T* data; D deleter; @@ -277,67 +304,37 @@ PUGI__NS_BEGIN } } - void** find(const void* key) + void* find(const void* key) { - assert(key); - if (_capacity == 0) return 0; - size_t hashmod = _capacity - 1; - size_t bucket = hash(key) & hashmod; + item_t* item = get_item(key); + assert(item); + assert(item->key == key || (item->key == 0 && item->value == 0)); - for (size_t probe = 0; probe <= hashmod; ++probe) - { - item_t& probe_item = _items[bucket]; - - if (probe_item.key == key) - return &probe_item.value; - - if (probe_item.key == 0) - return 0; - - // hash collision, quadratic probing - bucket = (bucket + probe + 1) & hashmod; - } - - assert(!"Hash table is full"); - return 0; + return item->value; } - void** insert(const void* key) + void insert(const void* key, void* value) { - assert(key); - assert(_count < _capacity * 3 / 4); + assert(_capacity != 0 && _count < _capacity - _capacity / 4); - size_t hashmod = _capacity - 1; - size_t bucket = hash(key) & hashmod; + item_t* item = get_item(key); + assert(item); - for (size_t probe = 0; probe <= hashmod; ++probe) + if (item->key == 0) { - item_t& probe_item = _items[bucket]; - - if (probe_item.key == 0) - { - probe_item.key = key; - _count++; - return &probe_item.value; - } - - if (probe_item.key == key) - return &probe_item.value; - - // hash collision, quadratic probing - bucket = (bucket + probe + 1) & hashmod; + _count++; + item->key = key; } - assert(!"Hash table is full"); - return 0; + item->value = value; } - bool reserve() + bool reserve(size_t extra = 16) { - if (_count + 16 >= _capacity - _capacity / 4) - return rehash(); + if (_count + extra >= _capacity - _capacity / 4) + return rehash(_count + extra); return true; } @@ -354,9 +351,32 @@ PUGI__NS_BEGIN size_t _count; - bool rehash(); + bool rehash(size_t count); - static unsigned int hash(const void* key) + item_t* get_item(const void* key) + { + assert(key); + assert(_capacity > 0); + + size_t hashmod = _capacity - 1; + size_t bucket = hash(key) & hashmod; + + for (size_t probe = 0; probe <= hashmod; ++probe) + { + item_t& probe_item = _items[bucket]; + + if (probe_item.key == key || probe_item.key == 0) + return &probe_item; + + // hash collision, quadratic probing + bucket = (bucket + probe + 1) & hashmod; + } + + assert(false && "Hash table is full"); // unreachable + return 0; + } + + static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key) { unsigned int h = static_cast(reinterpret_cast(key)); @@ -371,27 +391,33 @@ PUGI__NS_BEGIN } }; - PUGI__FN_NO_INLINE bool compact_hash_table::rehash() + PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count) { + size_t capacity = 32; + while (count >= capacity - capacity / 4) + capacity *= 2; + compact_hash_table rt; - rt._capacity = (_capacity == 0) ? 32 : _capacity * 2; - rt._items = static_cast(xml_memory::allocate(sizeof(item_t) * rt._capacity)); + rt._capacity = capacity; + rt._items = static_cast(xml_memory::allocate(sizeof(item_t) * capacity)); if (!rt._items) return false; - memset(rt._items, 0, sizeof(item_t) * rt._capacity); + memset(rt._items, 0, sizeof(item_t) * capacity); for (size_t i = 0; i < _capacity; ++i) if (_items[i].key) - *rt.insert(_items[i].key) = _items[i].value; + rt.insert(_items[i].key, _items[i].value); if (_items) xml_memory::deallocate(_items); - _capacity = rt._capacity; + _capacity = capacity; _items = rt._items; + assert(_count == rt._count); + return true; } @@ -399,43 +425,33 @@ PUGI__NS_END #endif PUGI__NS_BEGIN - static const size_t xml_memory_page_size = - #ifdef PUGIXML_MEMORY_PAGE_SIZE - PUGIXML_MEMORY_PAGE_SIZE - #else - 32768 - #endif - ; - #ifdef PUGIXML_COMPACT static const uintptr_t xml_memory_block_alignment = 4; - - static const uintptr_t xml_memory_page_alignment = sizeof(void*); #else static const uintptr_t xml_memory_block_alignment = sizeof(void*); - - static const uintptr_t xml_memory_page_alignment = 64; - static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); #endif // extra metadata bits - static const uintptr_t xml_memory_page_contents_shared_mask = 32; - static const uintptr_t xml_memory_page_name_allocated_mask = 16; - static const uintptr_t xml_memory_page_value_allocated_mask = 8; - static const uintptr_t xml_memory_page_type_mask = 7; + static const uintptr_t xml_memory_page_contents_shared_mask = 64; + static const uintptr_t xml_memory_page_name_allocated_mask = 32; + static const uintptr_t xml_memory_page_value_allocated_mask = 16; + static const uintptr_t xml_memory_page_type_mask = 15; // combined masks for string uniqueness static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; #ifdef PUGIXML_COMPACT + #define PUGI__GETHEADER_IMPL(object, page, flags) // unused #define PUGI__GETPAGE_IMPL(header) (header).get_page() #else - #define PUGI__GETPAGE_IMPL(header) reinterpret_cast((header) & impl::xml_memory_page_pointer_mask) + #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast(object) - reinterpret_cast(page)) << 8) | (flags)) + // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings + #define PUGI__GETPAGE_IMPL(header) static_cast(const_cast(static_cast(reinterpret_cast(&header) - (header >> 8)))) #endif #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) - #define PUGI__NODETYPE(n) static_cast(((n)->header & impl::xml_memory_page_type_mask) + 1) + #define PUGI__NODETYPE(n) static_cast((n)->header & impl::xml_memory_page_type_mask) struct xml_allocator; @@ -475,6 +491,14 @@ PUGI__NS_BEGIN #endif }; + static const size_t xml_memory_page_size = + #ifdef PUGIXML_MEMORY_PAGE_SIZE + (PUGIXML_MEMORY_PAGE_SIZE) + #else + 32768 + #endif + - sizeof(xml_memory_page); + struct xml_memory_string_header { uint16_t page_offset; // offset from page->data @@ -495,30 +519,21 @@ PUGI__NS_BEGIN size_t size = sizeof(xml_memory_page) + data_size; // allocate block with some alignment, leaving memory for worst-case padding - void* memory = xml_memory::allocate(size + xml_memory_page_alignment); + void* memory = xml_memory::allocate(size); if (!memory) return 0; - // align to next page boundary (note: this guarantees at least 1 usable byte before the page) - char* page_memory = reinterpret_cast((reinterpret_cast(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1)); - // prepare page structure - xml_memory_page* page = xml_memory_page::construct(page_memory); + xml_memory_page* page = xml_memory_page::construct(memory); assert(page); page->allocator = _root->allocator; - // record the offset for freeing the memory block - assert(page_memory > memory && page_memory - static_cast(memory) <= 127); - page_memory[-1] = static_cast(page_memory - static_cast(memory)); - return page; } static void deallocate_page(xml_memory_page* page) { - char* page_memory = reinterpret_cast(page); - - xml_memory::deallocate(page_memory - page_memory[-1]); + xml_memory::deallocate(page); } void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); @@ -627,7 +642,7 @@ PUGI__NS_BEGIN // allocate memory for string and header block size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); - + // round size up to block alignment boundary size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); @@ -749,12 +764,12 @@ PUGI__NS_BEGIN void operator&=(uintptr_t mod) { - _flags &= mod; + _flags &= static_cast(mod); } void operator|=(uintptr_t mod) { - _flags |= mod; + _flags |= static_cast(mod); } uintptr_t operator&(uintptr_t mod) const @@ -764,10 +779,11 @@ PUGI__NS_BEGIN xml_memory_page* get_page() const { + // round-trip through void* to silence 'cast increases required alignment of target type' warnings const char* page_marker = reinterpret_cast(this) - (_page << compact_alignment_log2); - const char* page = page_marker - *reinterpret_cast(page_marker); + const char* page = page_marker - *reinterpret_cast(static_cast(page_marker)); - return const_cast(reinterpret_cast(page)); + return const_cast(reinterpret_cast(static_cast(page))); } private: @@ -784,12 +800,12 @@ PUGI__NS_BEGIN template PUGI__FN_NO_INLINE T* compact_get_value(const void* object) { - return static_cast(*compact_get_page(object, header_offset)->allocator->_hash->find(object)); + return static_cast(compact_get_page(object, header_offset)->allocator->_hash->find(object)); } template PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) { - *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value; + compact_get_page(object, header_offset)->allocator->_hash->insert(object, value); } template class compact_pointer @@ -836,7 +852,7 @@ PUGI__NS_BEGIN { uintptr_t base = reinterpret_cast(this) & ~(compact_alignment - 1); - return reinterpret_cast(base + ((_data - 1 + start) << compact_alignment_log2)); + return reinterpret_cast(base + (_data - 1 + start) * compact_alignment); } else return compact_get_value(this); @@ -847,7 +863,7 @@ PUGI__NS_BEGIN T* operator->() const { - return operator T*(); + return *this; } private: @@ -914,7 +930,7 @@ PUGI__NS_BEGIN { uintptr_t base = reinterpret_cast(this) & ~(compact_alignment - 1); - return reinterpret_cast(base + ((_data - 1 - 65533) << compact_alignment_log2)); + return reinterpret_cast(base + (_data - 1 - 65533) * compact_alignment); } else if (_data == 65534) return static_cast(compact_get_page(this, header_offset)->compact_shared_parent); @@ -927,7 +943,7 @@ PUGI__NS_BEGIN T* operator->() const { - return operator T*(); + return *this; } private: @@ -959,7 +975,8 @@ PUGI__NS_BEGIN if (static_cast(offset) < (65535 << 7)) { - uint16_t* base = reinterpret_cast(reinterpret_cast(this) - base_offset); + // round-trip through void* to silence 'cast increases required alignment of target type' warnings + uint16_t* base = reinterpret_cast(static_cast(reinterpret_cast(this) - base_offset)); if (*base == 0) { @@ -1003,7 +1020,8 @@ PUGI__NS_BEGIN { xml_memory_page* page = compact_get_page(this, header_offset); - const uint16_t* base = reinterpret_cast(reinterpret_cast(this) - base_offset); + // round-trip through void* to silence 'cast increases required alignment of target type' warnings + const uint16_t* base = reinterpret_cast(static_cast(reinterpret_cast(this) - base_offset)); assert(*base); ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); @@ -1048,7 +1066,7 @@ namespace pugi struct xml_node_struct { - xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type - 1), namevalue_base(0) + xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0) { PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); } @@ -1075,8 +1093,9 @@ namespace pugi { struct xml_attribute_struct { - xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) + xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0) { + header = PUGI__GETHEADER_IMPL(this, page, 0); } uintptr_t header; @@ -1090,8 +1109,9 @@ namespace pugi struct xml_node_struct { - xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | (type - 1)), name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) + xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) { + header = PUGI__GETHEADER_IMPL(this, page, type); } uintptr_t header; @@ -1122,9 +1142,6 @@ PUGI__NS_BEGIN { xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) { - #ifdef PUGIXML_COMPACT - _hash = &hash; - #endif } const char_t* buffer; @@ -1599,28 +1616,11 @@ PUGI__NS_BEGIN } }; - template struct wchar_selector; - - template <> struct wchar_selector<2> + struct utf8_decoder { - typedef uint16_t type; - typedef utf16_counter counter; - typedef utf16_writer writer; - }; + typedef uint8_t type; - template <> struct wchar_selector<4> - { - typedef uint32_t type; - typedef utf32_counter counter; - typedef utf32_writer writer; - }; - - typedef wchar_selector::counter wchar_counter; - typedef wchar_selector::writer wchar_writer; - - template struct utf_decoder - { - static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result) + template static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) { const uint8_t utf8_byte_mask = 0x3f; @@ -1681,29 +1681,34 @@ PUGI__NS_BEGIN return result; } + }; - static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result) + template struct utf16_decoder + { + typedef uint16_t type; + + template static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) { - const uint16_t* end = data + size; - - while (data < end) + while (size) { - unsigned int lead = opt_swap::value ? endian_swap(*data) : *data; + uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; // U+0000..U+D7FF if (lead < 0xD800) { result = Traits::low(result, lead); data += 1; + size -= 1; } // U+E000..U+FFFF else if (static_cast(lead - 0xE000) < 0x2000) { result = Traits::low(result, lead); data += 1; + size -= 1; } // surrogate pair lead - else if (static_cast(lead - 0xD800) < 0x400 && data + 1 < end) + else if (static_cast(lead - 0xD800) < 0x400 && size >= 2) { uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; @@ -1711,26 +1716,32 @@ PUGI__NS_BEGIN { result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); data += 2; + size -= 2; } else { data += 1; + size -= 1; } } else { data += 1; + size -= 1; } } return result; } + }; - static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result) + template struct utf32_decoder + { + typedef uint32_t type; + + template static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) { - const uint32_t* end = data + size; - - while (data < end) + while (size) { uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; @@ -1739,53 +1750,76 @@ PUGI__NS_BEGIN { result = Traits::low(result, lead); data += 1; + size -= 1; } // U+10000..U+10FFFF else { result = Traits::high(result, lead); data += 1; + size -= 1; } } return result; } + }; - static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result) + struct latin1_decoder + { + typedef uint8_t type; + + template static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) { - for (size_t i = 0; i < size; ++i) + while (size) { - result = Traits::low(result, data[i]); + result = Traits::low(result, *data); + data += 1; + size -= 1; } return result; } - - static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result) - { - return decode_utf16_block(data, size, result); - } - - static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result) - { - return decode_utf32_block(data, size, result); - } - - static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result) - { - return decode_wchar_block_impl(reinterpret_cast::type*>(data), size, result); - } }; - template PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length) + template struct wchar_selector; + + template <> struct wchar_selector<2> { - for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]); - } + typedef uint16_t type; + typedef utf16_counter counter; + typedef utf16_writer writer; + typedef utf16_decoder decoder; + }; + + template <> struct wchar_selector<4> + { + typedef uint32_t type; + typedef utf32_counter counter; + typedef utf32_writer writer; + typedef utf32_decoder decoder; + }; + + typedef wchar_selector::counter wchar_counter; + typedef wchar_selector::writer wchar_writer; + + struct wchar_decoder + { + typedef wchar_t type; + + template static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) + { + typedef wchar_selector::decoder decoder; + + return decoder::process(reinterpret_cast(data), size, result, traits); + } + }; #ifdef PUGIXML_WCHAR_MODE PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) { - for (size_t i = 0; i < length; ++i) result[i] = static_cast(endian_swap(static_cast::type>(data[i]))); + for (size_t i = 0; i < length; ++i) + result[i] = static_cast(endian_swap(static_cast::type>(data[i]))); } #endif PUGI__NS_END @@ -1832,7 +1866,7 @@ PUGI__NS_BEGIN ctx_digit = 8, // 0-9 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . }; - + static const unsigned char chartypex_table[256] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 @@ -1854,7 +1888,7 @@ PUGI__NS_BEGIN 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 }; - + #ifdef PUGIXML_WCHAR_MODE #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast(c) < 128 ? table[static_cast(c)] : table[128]) & (ct)) #else @@ -1877,12 +1911,71 @@ PUGI__NS_BEGIN if (sizeof(wchar_t) == 2) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - else + else return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; } - PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3) + PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length) { + #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; } + #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; } + + // check if we have a non-empty XML declaration + if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space))) + return false; + + // scan XML declaration until the encoding field + for (size_t i = 6; i + 1 < size; ++i) + { + // declaration can not contain ? in quoted values + if (data[i] == '?') + return false; + + if (data[i] == 'e' && data[i + 1] == 'n') + { + size_t offset = i; + + // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed + PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o'); + PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g'); + + // S? = S? + PUGI__SCANCHARTYPE(ct_space); + PUGI__SCANCHAR('='); + PUGI__SCANCHARTYPE(ct_space); + + // the only two valid delimiters are ' and " + uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\''; + + PUGI__SCANCHAR(delimiter); + + size_t start = offset; + + out_encoding = data + offset; + + PUGI__SCANCHARTYPE(ct_symbol); + + out_length = offset - start; + + PUGI__SCANCHAR(delimiter); + + return true; + } + } + + return false; + + #undef PUGI__SCANCHAR + #undef PUGI__SCANCHARTYPE + } + + PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size) + { + // skip encoding autodetection if input buffer is too small + if (size < 4) return encoding_utf8; + + uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; + // look for BOM in first few bytes if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; @@ -1895,13 +1988,32 @@ PUGI__NS_BEGIN if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le; if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be; if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le; - if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8; // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be; if (d0 == 0x3c && d1 == 0) return encoding_utf16_le; - // no known BOM detected, assume utf8 + // no known BOM detected; parse declaration + const uint8_t* enc = 0; + size_t enc_length = 0; + + if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length)) + { + // iso-8859-1 (case-insensitive) + if (enc_length == 10 + && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o' + && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9' + && enc[8] == '-' && enc[9] == '1') + return encoding_latin1; + + // latin1 (case-insensitive) + if (enc_length == 6 + && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't' + && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n' + && enc[5] == '1') + return encoding_latin1; + } + return encoding_utf8; } @@ -1919,15 +2031,10 @@ PUGI__NS_BEGIN // only do autodetection if no explicit encoding is requested if (encoding != encoding_auto) return encoding; - // skip encoding autodetection if input buffer is too small - if (size < 4) return encoding_utf8; - // try to guess encoding (based on XML specification, Appendix F.1) const uint8_t* data = static_cast(contents); - PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; - - return guess_buffer_encoding(d0, d1, d2, d3); + return guess_buffer_encoding(data, size); } PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) @@ -1994,38 +2101,13 @@ PUGI__NS_BEGIN return true; } - PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size) + template PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) { - const uint8_t* data = static_cast(contents); - size_t data_length = size; + const typename D::type* data = static_cast(contents); + size_t data_length = size / sizeof(typename D::type); // first pass: get length in wchar_t units - size_t length = utf_decoder::decode_utf8_block(data, data_length, 0); - - // allocate buffer of suitable length - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - // second pass: convert utf8 input to wchar_t - wchar_writer::value_type obegin = reinterpret_cast(buffer); - wchar_writer::value_type oend = utf_decoder::decode_utf8_block(data, data_length, obegin); - - assert(oend == obegin + length); - *oend = 0; - - out_buffer = buffer; - out_length = length + 1; - - return true; - } - - template PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint16_t* data = static_cast(contents); - size_t data_length = size / sizeof(uint16_t); - - // first pass: get length in wchar_t units - size_t length = utf_decoder::decode_utf16_block(data, data_length, 0); + size_t length = D::process(data, data_length, 0, wchar_counter()); // allocate buffer of suitable length char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); @@ -2033,57 +2115,7 @@ PUGI__NS_BEGIN // second pass: convert utf16 input to wchar_t wchar_writer::value_type obegin = reinterpret_cast(buffer); - wchar_writer::value_type oend = utf_decoder::decode_utf16_block(data, data_length, obegin); - - assert(oend == obegin + length); - *oend = 0; - - out_buffer = buffer; - out_length = length + 1; - - return true; - } - - template PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint32_t* data = static_cast(contents); - size_t data_length = size / sizeof(uint32_t); - - // first pass: get length in wchar_t units - size_t length = utf_decoder::decode_utf32_block(data, data_length, 0); - - // allocate buffer of suitable length - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - // second pass: convert utf32 input to wchar_t - wchar_writer::value_type obegin = reinterpret_cast(buffer); - wchar_writer::value_type oend = utf_decoder::decode_utf32_block(data, data_length, obegin); - - assert(oend == obegin + length); - *oend = 0; - - out_buffer = buffer; - out_length = length + 1; - - return true; - } - - PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size) - { - const uint8_t* data = static_cast(contents); - size_t data_length = size; - - // get length in wchar_t units - size_t length = data_length; - - // allocate buffer of suitable length - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - // convert latin1 input to wchar_t - wchar_writer::value_type obegin = reinterpret_cast(buffer); - wchar_writer::value_type oend = utf_decoder::decode_latin1_block(data, data_length, obegin); + wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); assert(oend == obegin + length); *oend = 0; @@ -2100,13 +2132,16 @@ PUGI__NS_BEGIN xml_encoding wchar_encoding = get_wchar_encoding(); // fast path: no conversion required - if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); + if (encoding == wchar_encoding) + return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); // only endian-swapping is required - if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); + if (need_endian_swap_utf(encoding, wchar_encoding)) + return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); // source encoding is utf8 - if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size); + if (encoding == encoding_utf8) + return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); // source encoding is utf16 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) @@ -2114,8 +2149,8 @@ PUGI__NS_BEGIN xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; return (native_encoding == encoding) ? - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); + convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()) : + convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()); } // source encoding is utf32 @@ -2124,24 +2159,25 @@ PUGI__NS_BEGIN xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; return (native_encoding == encoding) ? - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); + convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()) : + convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()); } // source encoding is latin1 - if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size); + if (encoding == encoding_latin1) + return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); - assert(!"Invalid encoding"); + assert(false && "Invalid encoding"); // unreachable return false; } #else - template PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) + template PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) { - const uint16_t* data = static_cast(contents); - size_t data_length = size / sizeof(uint16_t); + const typename D::type* data = static_cast(contents); + size_t data_length = size / sizeof(typename D::type); // first pass: get length in utf8 units - size_t length = utf_decoder::decode_utf16_block(data, data_length, 0); + size_t length = D::process(data, data_length, 0, utf8_counter()); // allocate buffer of suitable length char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); @@ -2149,32 +2185,7 @@ PUGI__NS_BEGIN // second pass: convert utf16 input to utf8 uint8_t* obegin = reinterpret_cast(buffer); - uint8_t* oend = utf_decoder::decode_utf16_block(data, data_length, obegin); - - assert(oend == obegin + length); - *oend = 0; - - out_buffer = buffer; - out_length = length + 1; - - return true; - } - - template PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) - { - const uint32_t* data = static_cast(contents); - size_t data_length = size / sizeof(uint32_t); - - // first pass: get length in utf8 units - size_t length = utf_decoder::decode_utf32_block(data, data_length, 0); - - // allocate buffer of suitable length - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - // second pass: convert utf32 input to utf8 - uint8_t* obegin = reinterpret_cast(buffer); - uint8_t* oend = utf_decoder::decode_utf32_block(data, data_length, obegin); + uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); assert(oend == obegin + length); *oend = 0; @@ -2210,7 +2221,7 @@ PUGI__NS_BEGIN if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); // first pass: get length in utf8 units - size_t length = prefix_length + utf_decoder::decode_latin1_block(postfix, postfix_length, 0); + size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); // allocate buffer of suitable length char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); @@ -2220,7 +2231,7 @@ PUGI__NS_BEGIN memcpy(buffer, data, prefix_length); uint8_t* obegin = reinterpret_cast(buffer); - uint8_t* oend = utf_decoder::decode_latin1_block(postfix, postfix_length, obegin + prefix_length); + uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); assert(oend == obegin + length); *oend = 0; @@ -2234,7 +2245,8 @@ PUGI__NS_BEGIN PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) { // fast path: no conversion required - if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); + if (encoding == encoding_utf8) + return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); // source encoding is utf16 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) @@ -2242,8 +2254,8 @@ PUGI__NS_BEGIN xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; return (native_encoding == encoding) ? - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true()); + convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()) : + convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()); } // source encoding is utf32 @@ -2252,14 +2264,15 @@ PUGI__NS_BEGIN xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; return (native_encoding == encoding) ? - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) : - convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true()); + convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()) : + convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()); } // source encoding is latin1 - if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); + if (encoding == encoding_latin1) + return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); - assert(!"Invalid encoding"); + assert(false && "Invalid encoding"); // unreachable return false; } #endif @@ -2267,20 +2280,20 @@ PUGI__NS_BEGIN PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length) { // get length in utf8 characters - return utf_decoder::decode_wchar_block(str, length, 0); + return wchar_decoder::process(str, length, 0, utf8_counter()); } PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) { // convert to utf8 uint8_t* begin = reinterpret_cast(buffer); - uint8_t* end = utf_decoder::decode_wchar_block(str, length, begin); - + uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); + assert(begin + size == end); (void)!end; (void)!size; } - + #ifndef PUGIXML_NO_STL PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length) { @@ -2302,7 +2315,7 @@ PUGI__NS_BEGIN const uint8_t* data = reinterpret_cast(str); // first pass: get length in wchar_t units - size_t length = utf_decoder::decode_utf8_block(data, size, 0); + size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); // allocate resulting string std::basic_string result; @@ -2312,7 +2325,7 @@ PUGI__NS_BEGIN if (length > 0) { wchar_writer::value_type begin = reinterpret_cast(&result[0]); - wchar_writer::value_type end = utf_decoder::decode_utf8_block(data, size, begin); + wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); assert(begin + length == end); (void)!end; @@ -2340,17 +2353,15 @@ PUGI__NS_BEGIN } template - PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source) + PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) { - size_t source_length = strlength(source); - if (source_length == 0) { // empty string and null pointer are equivalent, so just deallocate old memory xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; if (header & header_mask) alloc->deallocate_string(dest); - + // mark the string as not allocated dest = 0; header &= ~header_mask; @@ -2360,8 +2371,9 @@ PUGI__NS_BEGIN else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) { // we can reuse old buffer, so just copy the new data (including zero terminator) - memcpy(dest, source, (source_length + 1) * sizeof(char_t)); - + memcpy(dest, source, source_length * sizeof(char_t)); + dest[source_length] = 0; + return true; } else @@ -2375,11 +2387,12 @@ PUGI__NS_BEGIN if (!buf) return false; // copy the string (including zero terminator) - memcpy(buf, source, (source_length + 1) * sizeof(char_t)); + memcpy(buf, source, source_length * sizeof(char_t)); + buf[source_length] = 0; // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) if (header & header_mask) alloc->deallocate_string(dest); - + // the string is now allocated, so set the flag dest = buf; header |= header_mask; @@ -2392,11 +2405,11 @@ PUGI__NS_BEGIN { char_t* end; size_t size; - + gap(): end(0), size(0) { } - + // Push new gap, move s count bytes further (skipping the gap). // Collapse previous gap. void push(char_t*& s, size_t count) @@ -2407,14 +2420,14 @@ PUGI__NS_BEGIN assert(s >= end); memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); } - + s += count; // end of current gap - + // "merge" two gaps end = s; size += count; } - + // Collapse all gaps, return past-the-end pointer char_t* flush(char_t* s) { @@ -2429,7 +2442,7 @@ PUGI__NS_BEGIN else return s; } }; - + PUGI__FN char_t* strconv_escape(char_t* s, gap& g) { char_t* stre = s + 1; @@ -2461,7 +2474,7 @@ PUGI__NS_BEGIN ch = *++stre; } - + ++stre; } else // &#... (dec code) @@ -2472,7 +2485,7 @@ PUGI__NS_BEGIN for (;;) { - if (static_cast(static_cast(ch) - '0') <= 9) + if (static_cast(ch - '0') <= 9) ucsc = 10 * ucsc + (ch - '0'); else if (ch == ';') break; @@ -2481,7 +2494,7 @@ PUGI__NS_BEGIN ch = *++stre; } - + ++stre; } @@ -2490,7 +2503,7 @@ PUGI__NS_BEGIN #else s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc)); #endif - + g.push(s, stre - s); return stre; } @@ -2505,7 +2518,7 @@ PUGI__NS_BEGIN { *s++ = '&'; ++stre; - + g.push(s, stre - s); return stre; } @@ -2530,7 +2543,7 @@ PUGI__NS_BEGIN { *s++ = '>'; ++stre; - + g.push(s, stre - s); return stre; } @@ -2543,7 +2556,7 @@ PUGI__NS_BEGIN { *s++ = '<'; ++stre; - + g.push(s, stre - s); return stre; } @@ -2556,7 +2569,7 @@ PUGI__NS_BEGIN { *s++ = '"'; ++stre; - + g.push(s, stre - s); return stre; } @@ -2566,7 +2579,7 @@ PUGI__NS_BEGIN default: break; } - + return stre; } @@ -2574,7 +2587,7 @@ PUGI__NS_BEGIN #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) - #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } + #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } #define PUGI__POPNODE() { cursor = cursor->parent; } #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } #define PUGI__SCANWHILE(X) { while (X) ++s; } @@ -2586,21 +2599,21 @@ PUGI__NS_BEGIN PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) { gap g; - + while (true) { PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); - + if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair { *s++ = '\n'; // replace first one with 0x0a - + if (*s == '\n') g.push(s, 1); } else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here { *g.flush(s) = 0; - + return s + (s[2] == '>' ? 3 : 2); } else if (*s == 0) @@ -2614,21 +2627,21 @@ PUGI__NS_BEGIN PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) { gap g; - + while (true) { PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); - + if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair { *s++ = '\n'; // replace first one with 0x0a - + if (*s == '\n') g.push(s, 1); } else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here { *g.flush(s) = 0; - + return s + 1; } else if (*s == 0) @@ -2638,9 +2651,9 @@ PUGI__NS_BEGIN else ++s; } } - + typedef char_t* (*strconv_pcdata_t)(char_t*); - + template struct strconv_pcdata_impl { static char_t* parse(char_t* s) @@ -2662,13 +2675,13 @@ PUGI__NS_BEGIN --end; *end = 0; - + return s + 1; } else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair { *s++ = '\n'; // replace first one with 0x0a - + if (*s == '\n') g.push(s, 1); } else if (opt_escape::value && *s == '&') @@ -2691,7 +2704,7 @@ PUGI__NS_BEGIN } } }; - + PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) { PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); @@ -2706,12 +2719,12 @@ PUGI__NS_BEGIN case 5: return strconv_pcdata_impl::parse; case 6: return strconv_pcdata_impl::parse; case 7: return strconv_pcdata_impl::parse; - default: assert(false); return 0; // should not get here + default: assert(false); return 0; // unreachable } } typedef char_t* (*strconv_attribute_t)(char_t*, char_t); - + template struct strconv_attribute_impl { static char_t* parse_wnorm(char_t* s, char_t end_quote) @@ -2722,35 +2735,35 @@ PUGI__NS_BEGIN if (PUGI__IS_CHARTYPE(*s, ct_space)) { char_t* str = s; - + do ++str; while (PUGI__IS_CHARTYPE(*str, ct_space)); - + g.push(s, str - s); } while (true) { PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); - + if (*s == end_quote) { char_t* str = g.flush(s); - + do *str-- = 0; while (PUGI__IS_CHARTYPE(*str, ct_space)); - + return s + 1; } else if (PUGI__IS_CHARTYPE(*s, ct_space)) { *s++ = ' '; - + if (PUGI__IS_CHARTYPE(*s, ct_space)) { char_t* str = s + 1; while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str; - + g.push(s, str - s); } } @@ -2773,11 +2786,11 @@ PUGI__NS_BEGIN while (true) { PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); - + if (*s == end_quote) { *g.flush(s) = 0; - + return s + 1; } else if (PUGI__IS_CHARTYPE(*s, ct_space)) @@ -2785,7 +2798,7 @@ PUGI__NS_BEGIN if (*s == '\r') { *s++ = ' '; - + if (*s == '\n') g.push(s, 1); } else *s++ = ' '; @@ -2809,17 +2822,17 @@ PUGI__NS_BEGIN while (true) { PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); - + if (*s == end_quote) { *g.flush(s) = 0; - + return s + 1; } else if (*s == '\r') { *s++ = '\n'; - + if (*s == '\n') g.push(s, 1); } else if (opt_escape::value && *s == '&') @@ -2841,11 +2854,11 @@ PUGI__NS_BEGIN while (true) { PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); - + if (*s == end_quote) { *g.flush(s) = 0; - + return s + 1; } else if (opt_escape::value && *s == '&') @@ -2864,7 +2877,7 @@ PUGI__NS_BEGIN PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) { PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); - + switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) { case 0: return strconv_attribute_impl::parse_simple; @@ -2883,7 +2896,7 @@ PUGI__NS_BEGIN case 13: return strconv_attribute_impl::parse_wnorm; case 14: return strconv_attribute_impl::parse_wnorm; case 15: return strconv_attribute_impl::parse_wnorm; - default: assert(false); return 0; // should not get here + default: assert(false); return 0; // unreachable } } @@ -2898,18 +2911,12 @@ PUGI__NS_BEGIN struct xml_parser { - xml_allocator alloc; - xml_allocator* alloc_state; + xml_allocator* alloc; char_t* error_offset; xml_parse_status error_status; - - xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok) - { - } - ~xml_parser() + xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok) { - *alloc_state = alloc; } // DOCTYPE consists of nested sections of the following possible types: @@ -3236,7 +3243,7 @@ PUGI__NS_BEGIN { strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); - + char_t ch = 0; xml_node_struct* cursor = root; char_t* mark = s; @@ -3267,10 +3274,10 @@ PUGI__NS_BEGIN while (true) { PUGI__SKIPWS(); // Eat any whitespace. - + if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #... { - xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute. + xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute. if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); a->name = s; // Save the offset. @@ -3285,7 +3292,7 @@ PUGI__NS_BEGIN ch = *s; ++s; } - + if (ch == '=') // '<... #=...' { PUGI__SKIPWS(); // Eat any whitespace. @@ -3297,7 +3304,7 @@ PUGI__NS_BEGIN a->value = s; // Save the offset. s = strconv_attribute(s, ch); - + if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value); // After this line the loop continues from the start; @@ -3312,7 +3319,7 @@ PUGI__NS_BEGIN else if (*s == '/') { ++s; - + if (*s == '>') { PUGI__POPNODE(); @@ -3353,7 +3360,7 @@ PUGI__NS_BEGIN { // we stepped over null terminator, backtrack & handle closing tag --s; - + if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s); } else PUGI__THROW_ERROR(status_bad_start_element, s); @@ -3362,20 +3369,22 @@ PUGI__NS_BEGIN { ++s; + mark = s; + char_t* name = cursor->name; - if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s); - + if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark); + while (PUGI__IS_CHARTYPE(*s, ct_symbol)) { - if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s); + if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark); } if (*name) { if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s); - else PUGI__THROW_ERROR(status_end_element_mismatch, s); + else PUGI__THROW_ERROR(status_end_element_mismatch, mark); } - + PUGI__POPNODE(); // Pop. PUGI__SKIPWS(); @@ -3429,23 +3438,31 @@ PUGI__NS_BEGIN if (!PUGI__OPTSET(parse_trim_pcdata)) s = mark; - + if (cursor->parent || PUGI__OPTSET(parse_fragment)) { - PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. - cursor->value = s; // Save the offset. + if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value) + { + cursor->value = s; // Save the offset. + } + else + { + PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. + + cursor->value = s; // Save the offset. + + PUGI__POPNODE(); // Pop since this is a standalone. + } s = strconv_pcdata(s); - - PUGI__POPNODE(); // Pop since this is a standalone. - + if (!*s) break; } else { PUGI__SCANFOR(*s == '<'); // '...<' if (!*s) break; - + ++s; } @@ -3493,14 +3510,14 @@ PUGI__NS_BEGIN // get last child of the root before parsing xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; - + // create parser on stack xml_parser parser(static_cast(xmldoc)); // save last character and make buffer zero-terminated (speeds up parsing) char_t endch = buffer[length - 1]; buffer[length - 1] = 0; - + // skip BOM to make sure it does not end up as part of parse output char_t* buffer_data = parse_skip_bom(buffer); @@ -3517,7 +3534,7 @@ PUGI__NS_BEGIN return make_parse_result(status_unrecognized_tag, length - 1); // check if there are any element nodes parsed - xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child; + xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0; if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) return make_parse_result(status_no_document_element, length - 1); @@ -3561,12 +3578,36 @@ PUGI__NS_BEGIN return encoding_utf8; } + template PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) + { + PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); + + typename T::value_type end = D::process(reinterpret_cast(data), length, dest, T()); + + return static_cast(end - dest) * sizeof(*dest); + } + + template PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) + { + PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); + + typename T::value_type end = D::process(reinterpret_cast(data), length, dest, T()); + + if (opt_swap) + { + for (typename T::value_type i = dest; i != end; ++i) + *i = endian_swap(*i); + } + + return static_cast(end - dest) * sizeof(*dest); + } + #ifdef PUGIXML_WCHAR_MODE PUGI__FN size_t get_valid_length(const char_t* data, size_t length) { if (length < 1) return 0; - // discard last character if it's the lead of a surrogate pair + // discard last character if it's the lead of a surrogate pair return (sizeof(wchar_t) == 2 && static_cast(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; } @@ -3579,58 +3620,32 @@ PUGI__NS_BEGIN return length * sizeof(char_t); } - + // convert to utf8 if (encoding == encoding_utf8) - { - uint8_t* dest = r_u8; - uint8_t* end = utf_decoder::decode_wchar_block(data, length, dest); - - return static_cast(end - dest); - } + return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); // convert to utf16 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) { - uint16_t* dest = r_u16; - - // convert to native utf16 - uint16_t* end = utf_decoder::decode_wchar_block(data, length, dest); - - // swap if necessary xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint16_t); + return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); } // convert to utf32 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) { - uint32_t* dest = r_u32; - - // convert to native utf32 - uint32_t* end = utf_decoder::decode_wchar_block(data, length, dest); - - // swap if necessary xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint32_t); + return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); } // convert to latin1 if (encoding == encoding_latin1) - { - uint8_t* dest = r_u8; - uint8_t* end = utf_decoder::decode_wchar_block(data, length, dest); + return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); - return static_cast(end - dest); - } - - assert(!"Invalid encoding"); + assert(false && "Invalid encoding"); // unreachable return 0; } #else @@ -3654,43 +3669,22 @@ PUGI__NS_BEGIN { if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) { - uint16_t* dest = r_u16; - - // convert to native utf16 - uint16_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); - - // swap if necessary xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint16_t); + return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); } if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) { - uint32_t* dest = r_u32; - - // convert to native utf32 - uint32_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); - - // swap if necessary xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest)); - - return static_cast(end - dest) * sizeof(uint32_t); + return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); } if (encoding == encoding_latin1) - { - uint8_t* dest = r_u8; - uint8_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest); + return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); - return static_cast(end - dest); - } - - assert(!"Invalid encoding"); + assert(false && "Invalid encoding"); // unreachable return 0; } #endif @@ -3914,10 +3908,10 @@ PUGI__NS_BEGIN while (*s) { const char_t* prev = s; - + // While *s is a usual symbol PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type)); - + writer.write_buffer(prev, static_cast(s - prev)); switch (*s) @@ -4083,7 +4077,7 @@ PUGI__NS_BEGIN writer.write(' '); } - writer.write_string(a->name ? a->name : default_name); + writer.write_string(a->name ? a->name + 0 : default_name); writer.write('=', '"'); if (a->value) @@ -4096,7 +4090,7 @@ PUGI__NS_BEGIN PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) { const char_t* default_name = PUGIXML_TEXT(":anonymous"); - const char_t* name = node->name ? node->name : default_name; + const char_t* name = node->name ? node->name + 0 : default_name; writer.write('<'); writer.write_string(name); @@ -4104,24 +4098,61 @@ PUGI__NS_BEGIN if (node->first_attribute) node_output_attributes(writer, node, indent, indent_length, flags, depth); - if (!node->first_child) + // element nodes can have value if parse_embed_pcdata was used + if (!node->value) { - writer.write(' ', '/', '>'); + if (!node->first_child) + { + if (flags & format_no_empty_element_tags) + { + writer.write('>', '<', '/'); + writer.write_string(name); + writer.write('>'); - return false; + return false; + } + else + { + if ((flags & format_raw) == 0) + writer.write(' '); + + writer.write('/', '>'); + + return false; + } + } + else + { + writer.write('>'); + + return true; + } } else { writer.write('>'); - return true; + text_output(writer, node->value, ctx_special_pcdata, flags); + + if (!node->first_child) + { + writer.write('<', '/'); + writer.write_string(name); + writer.write('>'); + + return false; + } + else + { + return true; + } } } PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) { const char_t* default_name = PUGIXML_TEXT(":anonymous"); - const char_t* name = node->name ? node->name : default_name; + const char_t* name = node->name ? node->name + 0 : default_name; writer.write('<', '/'); writer.write_string(name); @@ -4148,7 +4179,7 @@ PUGI__NS_BEGIN case node_pi: writer.write('<', '?'); - writer.write_string(node->name ? node->name : default_name); + writer.write_string(node->name ? node->name + 0 : default_name); if (node->value) { @@ -4161,7 +4192,7 @@ PUGI__NS_BEGIN case node_declaration: writer.write('<', '?'); - writer.write_string(node->name ? node->name : default_name); + writer.write_string(node->name ? node->name + 0 : default_name); node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); writer.write('?', '>'); break; @@ -4180,7 +4211,7 @@ PUGI__NS_BEGIN break; default: - assert(!"Invalid node type"); + assert(false && "Invalid node type"); // unreachable } } @@ -4222,6 +4253,10 @@ PUGI__NS_BEGIN if (node_output_start(writer, node, indent, indent_length, flags, depth)) { + // element nodes can have value if parse_embed_pcdata was used + if (node->value) + indent_flags = 0; + node = node->first_child; depth++; continue; @@ -4355,7 +4390,7 @@ PUGI__NS_BEGIN source_header |= xml_memory_page_contents_shared_mask; } else - strcpy_insitu(dest, header, header_mask, source); + strcpy_insitu(dest, header, header_mask, source, strlength(source)); } } @@ -4388,6 +4423,7 @@ PUGI__NS_BEGIN while (sit && sit != sn) { + // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop if (sit != dn) { xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit)); @@ -4438,39 +4474,96 @@ PUGI__NS_BEGIN } // get value with conversion functions - PUGI__FN int get_integer_base(const char_t* value) + template PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv) { + U result = 0; const char_t* s = value; while (PUGI__IS_CHARTYPE(*s, ct_space)) s++; - if (*s == '-') - s++; + bool negative = (*s == '-'); - return (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) ? 16 : 10; + s += (*s == '+' || *s == '-'); + + bool overflow = false; + + if (s[0] == '0' && (s[1] | ' ') == 'x') + { + s += 2; + + // since overflow detection relies on length of the sequence skip leading zeros + while (*s == '0') + s++; + + const char_t* start = s; + + for (;;) + { + if (static_cast(*s - '0') < 10) + result = result * 16 + (*s - '0'); + else if (static_cast((*s | ' ') - 'a') < 6) + result = result * 16 + ((*s | ' ') - 'a' + 10); + else + break; + + s++; + } + + size_t digits = static_cast(s - start); + + overflow = digits > sizeof(U) * 2; + } + else + { + // since overflow detection relies on length of the sequence skip leading zeros + while (*s == '0') + s++; + + const char_t* start = s; + + for (;;) + { + if (static_cast(*s - '0') < 10) + result = result * 10 + (*s - '0'); + else + break; + + s++; + } + + size_t digits = static_cast(s - start); + + PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); + + const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; + const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; + const size_t high_bit = sizeof(U) * 8 - 1; + + overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); + } + + if (negative) + { + // Workaround for crayc++ CC-3059: Expected no overflow in routine. + #ifdef _CRAYC + return (overflow || result > ~minv + 1) ? minv : ~result + 1; + #else + return (overflow || result > 0 - minv) ? minv : 0 - result; + #endif + } + else + return (overflow || result > maxv) ? maxv : result; } PUGI__FN int get_value_int(const char_t* value) { - int base = get_integer_base(value); - - #ifdef PUGIXML_WCHAR_MODE - return static_cast(wcstol(value, 0, base)); - #else - return static_cast(strtol(value, 0, base)); - #endif + return string_to_integer(value, static_cast(INT_MIN), INT_MAX); } PUGI__FN unsigned int get_value_uint(const char_t* value) { - int base = get_integer_base(value); - - #ifdef PUGIXML_WCHAR_MODE - return static_cast(wcstoul(value, 0, base)); - #else - return static_cast(strtoul(value, 0, base)); - #endif + return string_to_integer(value, 0, UINT_MAX); } PUGI__FN double get_value_double(const char_t* value) @@ -4503,118 +4596,117 @@ PUGI__NS_BEGIN #ifdef PUGIXML_HAS_LONG_LONG PUGI__FN long long get_value_llong(const char_t* value) { - int base = get_integer_base(value); - - #ifdef PUGIXML_WCHAR_MODE - #ifdef PUGI__MSVC_CRT_VERSION - return _wcstoi64(value, 0, base); - #else - return wcstoll(value, 0, base); - #endif - #else - #ifdef PUGI__MSVC_CRT_VERSION - return _strtoi64(value, 0, base); - #else - return strtoll(value, 0, base); - #endif - #endif + return string_to_integer(value, static_cast(LLONG_MIN), LLONG_MAX); } PUGI__FN unsigned long long get_value_ullong(const char_t* value) { - int base = get_integer_base(value); - - #ifdef PUGIXML_WCHAR_MODE - #ifdef PUGI__MSVC_CRT_VERSION - return _wcstoui64(value, 0, base); - #else - return wcstoull(value, 0, base); - #endif - #else - #ifdef PUGI__MSVC_CRT_VERSION - return _strtoui64(value, 0, base); - #else - return strtoull(value, 0, base); - #endif - #endif + return string_to_integer(value, 0, ULLONG_MAX); } #endif + template PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) + { + char_t* result = end - 1; + U rest = negative ? 0 - value : value; + + do + { + *result-- = static_cast('0' + (rest % 10)); + rest /= 10; + } + while (rest); + + assert(result >= begin); + (void)begin; + + *result = '-'; + + return result + !negative; + } + // set value with conversion functions template - PUGI__FN bool set_value_buffer(String& dest, Header& header, uintptr_t header_mask, char (&buf)[128]) + PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) { #ifdef PUGIXML_WCHAR_MODE char_t wbuf[128]; - impl::widen_ascii(wbuf, buf); + assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); - return strcpy_insitu(dest, header, header_mask, wbuf); + size_t offset = 0; + for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; + + return strcpy_insitu(dest, header, header_mask, wbuf, offset); #else - return strcpy_insitu(dest, header, header_mask, buf); + return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); #endif } - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, int value) + template + PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative) { - char buf[128]; - sprintf(buf, "%d", value); - - return set_value_buffer(dest, header, header_mask, buf); - } + char_t buf[64]; + char_t* end = buf + sizeof(buf) / sizeof(buf[0]); + char_t* begin = integer_to_string(buf, end, value, negative); - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned int value) - { - char buf[128]; - sprintf(buf, "%u", value); - - return set_value_buffer(dest, header, header_mask, buf); + return strcpy_insitu(dest, header, header_mask, begin, end - begin); } template PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value) { char buf[128]; - sprintf(buf, "%.9g", value); + PUGI__SNPRINTF(buf, "%.9g", value); - return set_value_buffer(dest, header, header_mask, buf); + return set_value_ascii(dest, header, header_mask, buf); } template PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value) { char buf[128]; - sprintf(buf, "%.17g", value); + PUGI__SNPRINTF(buf, "%.17g", value); - return set_value_buffer(dest, header, header_mask, buf); - } - - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value) - { - return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); - } - -#ifdef PUGIXML_HAS_LONG_LONG - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, long long value) - { - char buf[128]; - sprintf(buf, "%lld", value); - - return set_value_buffer(dest, header, header_mask, buf); + return set_value_ascii(dest, header, header_mask, buf); } template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned long long value) + PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value) { - char buf[128]; - sprintf(buf, "%llu", value); - - return set_value_buffer(dest, header, header_mask, buf); + return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); + } + + PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) + { + // check input buffer + if (!contents && size) return make_parse_result(status_io_error); + + // get actual encoding + xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); + + // get private buffer + char_t* buffer = 0; + size_t length = 0; + + if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); + + // delete original buffer if we performed a conversion + if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); + + // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself + if (own || buffer != contents) *out_buffer = buffer; + + // store buffer for offset_debug + doc->buffer = buffer; + + // parse + xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); + + // remember encoding + res.encoding = buffer_encoding; + + return res; } -#endif // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result) @@ -4644,7 +4736,7 @@ PUGI__NS_BEGIN // check for I/O errors if (length < 0) return status_io_error; - + // check for overflow size_t result = static_cast(length); @@ -4657,7 +4749,7 @@ PUGI__NS_BEGIN } // This function assumes that buffer has extra sizeof(char_t) writable bytes after size - PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) + PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) { // We only need to zero-terminate if encoding conversion does not do it for us #ifdef PUGIXML_WCHAR_MODE @@ -4681,7 +4773,7 @@ PUGI__NS_BEGIN return size; } - PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding) + PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) { if (!file) return make_parse_result(status_file_not_found); @@ -4689,7 +4781,7 @@ PUGI__NS_BEGIN size_t size = 0; xml_parse_status size_status = get_file_size(file, size); if (size_status != status_ok) return make_parse_result(size_status); - + size_t max_suffix_size = sizeof(char_t); // allocate buffer for the whole file @@ -4706,8 +4798,13 @@ PUGI__NS_BEGIN } xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); - - return doc.load_buffer_inplace_own(contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding); + + return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); + } + + PUGI__FN void close_file(FILE* file) + { + fclose(file); } #ifndef PUGIXML_NO_STL @@ -4717,7 +4814,7 @@ PUGI__NS_BEGIN { void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); if (!memory) return 0; - + return new (memory) xml_stream_chunk(); } @@ -4827,14 +4924,14 @@ PUGI__NS_BEGIN // return buffer size_t actual_length = static_cast(stream.gcount()); assert(actual_length <= read_length); - + *out_buffer = buffer.release(); *out_size = actual_length * sizeof(T); return status_ok; } - template PUGI__FN xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream& stream, unsigned int options, xml_encoding encoding) + template PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) { void* buffer = 0; size_t size = 0; @@ -4855,8 +4952,8 @@ PUGI__NS_BEGIN if (status != status_ok) return make_parse_result(status); xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); - - return doc.load_buffer_inplace_own(buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding); + + return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); } #endif @@ -4917,37 +5014,21 @@ PUGI__NS_BEGIN return ferror(file) == 0; } - PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) + struct name_null_sentry { - // check input buffer - if (!contents && size) return make_parse_result(status_io_error); + xml_node_struct* node; + char_t* name; - // get actual encoding - xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); + name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) + { + node->name = 0; + } - // get private buffer - char_t* buffer = 0; - size_t length = 0; - - if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); - - // delete original buffer if we performed a conversion - if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); - - // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself - if (own || buffer != contents) *out_buffer = buffer; - - // store buffer for offset_debug - doc->buffer = buffer; - - // parse - xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); - - // remember encoding - res.encoding = buffer_encoding; - - return res; - } + ~name_null_sentry() + { + node->name = name; + } + }; PUGI__NS_END namespace pugi @@ -4991,7 +5072,7 @@ namespace pugi PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0) { } - + PUGI__FN xml_tree_walker::~xml_tree_walker() { } @@ -5037,7 +5118,7 @@ namespace pugi { return (_attr == r._attr); } - + PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const { return (_attr != r._attr); @@ -5047,17 +5128,17 @@ namespace pugi { return (_attr < r._attr); } - + PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const { return (_attr > r._attr); } - + PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const { return (_attr <= r._attr); } - + PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const { return (_attr >= r._attr); @@ -5075,7 +5156,7 @@ namespace pugi PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const { - return (_attr && _attr->value) ? _attr->value : def; + return (_attr && _attr->value) ? _attr->value + 0 : def; } PUGI__FN int xml_attribute::as_int(int def) const @@ -5145,7 +5226,7 @@ namespace pugi set_value(rhs); return *this; } - + PUGI__FN xml_attribute& xml_attribute::operator=(int rhs) { set_value(rhs); @@ -5158,12 +5239,24 @@ namespace pugi return *this; } + PUGI__FN xml_attribute& xml_attribute::operator=(long rhs) + { + set_value(rhs); + return *this; + } + + PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs) + { + set_value(rhs); + return *this; + } + PUGI__FN xml_attribute& xml_attribute::operator=(double rhs) { set_value(rhs); return *this; } - + PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) { set_value(rhs); @@ -5193,29 +5286,43 @@ namespace pugi PUGI__FN bool xml_attribute::set_name(const char_t* rhs) { if (!_attr) return false; - - return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs); + + return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); } - + PUGI__FN bool xml_attribute::set_value(const char_t* rhs) { if (!_attr) return false; - return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); } PUGI__FN bool xml_attribute::set_value(int rhs) { if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); } PUGI__FN bool xml_attribute::set_value(unsigned int rhs) { if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); + } + + PUGI__FN bool xml_attribute::set_value(long rhs) + { + if (!_attr) return false; + + return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); + } + + PUGI__FN bool xml_attribute::set_value(unsigned long rhs) + { + if (!_attr) return false; + + return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); } PUGI__FN bool xml_attribute::set_value(double rhs) @@ -5224,7 +5331,7 @@ namespace pugi return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); } - + PUGI__FN bool xml_attribute::set_value(float rhs) { if (!_attr) return false; @@ -5236,7 +5343,7 @@ namespace pugi { if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); } #ifdef PUGIXML_HAS_LONG_LONG @@ -5244,14 +5351,14 @@ namespace pugi { if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); } PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) { if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); } #endif @@ -5274,7 +5381,7 @@ namespace pugi PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p) { } - + PUGI__FN static void unspecified_bool_xml_node(xml_node***) { } @@ -5298,7 +5405,7 @@ namespace pugi { return iterator(0, _root); } - + PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const { return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); @@ -5308,7 +5415,7 @@ namespace pugi { return attribute_iterator(0, _root); } - + PUGI__FN xml_object_range xml_node::children() const { return xml_object_range(begin(), end()); @@ -5338,17 +5445,17 @@ namespace pugi { return (_root < r._root); } - + PUGI__FN bool xml_node::operator>(const xml_node& r) const { return (_root > r._root); } - + PUGI__FN bool xml_node::operator<=(const xml_node& r) const { return (_root <= r._root); } - + PUGI__FN bool xml_node::operator>=(const xml_node& r) const { return (_root >= r._root); @@ -5358,7 +5465,7 @@ namespace pugi { return !_root; } - + PUGI__FN const char_t* xml_node::name() const { return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT(""); @@ -5368,12 +5475,12 @@ namespace pugi { return _root ? PUGI__NODETYPE(_root) : node_null; } - + PUGI__FN const char_t* xml_node::value() const { return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT(""); } - + PUGI__FN xml_node xml_node::child(const char_t* name_) const { if (!_root) return xml_node(); @@ -5391,14 +5498,14 @@ namespace pugi for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) if (i->name && impl::strequal(name_, i->name)) return xml_attribute(i); - + return xml_attribute(); } - + PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const { if (!_root) return xml_node(); - + for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) if (i->name && impl::strequal(name_, i->name)) return xml_node(i); @@ -5413,7 +5520,7 @@ namespace pugi PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const { if (!_root) return xml_node(); - + for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) if (i->name && impl::strequal(name_, i->name)) return xml_node(i); @@ -5456,7 +5563,7 @@ namespace pugi PUGI__FN xml_node xml_node::previous_sibling() const { if (!_root) return xml_node(); - + if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); else return xml_node(); } @@ -5479,7 +5586,11 @@ namespace pugi PUGI__FN const char_t* xml_node::child_value() const { if (!_root) return PUGIXML_TEXT(""); - + + // element nodes can have value if parse_embed_pcdata was used + if (PUGI__NODETYPE(_root) == node_element && _root->value) + return _root->value; + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) if (impl::is_text_node(i) && i->value) return i->value; @@ -5514,28 +5625,28 @@ namespace pugi PUGI__FN bool xml_node::set_name(const char_t* rhs) { - static const bool has_name[] = { false, false, true, false, false, false, true, true, false }; + xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; - if (!_root || !has_name[PUGI__NODETYPE(_root)]) + if (type_ != node_element && type_ != node_pi && type_ != node_declaration) return false; - return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs); + return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); } - + PUGI__FN bool xml_node::set_value(const char_t* rhs) { - static const bool has_value[] = { false, false, false, true, true, true, true, false, true }; + xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; - if (!_root || !has_value[PUGI__NODETYPE(_root)]) + if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) return false; - return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); } PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) { if (!impl::allow_insert_attribute(type())) return xml_attribute(); - + impl::xml_allocator& alloc = impl::get_allocator(_root); if (!alloc.reserve()) return xml_attribute(); @@ -5545,14 +5656,14 @@ namespace pugi impl::append_attribute(a._attr, _root); a.set_name(name_); - + return a; } PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) { if (!impl::allow_insert_attribute(type())) return xml_attribute(); - + impl::xml_allocator& alloc = impl::get_allocator(_root); if (!alloc.reserve()) return xml_attribute(); @@ -5570,7 +5681,7 @@ namespace pugi { if (!impl::allow_insert_attribute(type())) return xml_attribute(); if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - + impl::xml_allocator& alloc = impl::get_allocator(_root); if (!alloc.reserve()) return xml_attribute(); @@ -5588,7 +5699,7 @@ namespace pugi { if (!impl::allow_insert_attribute(type())) return xml_attribute(); if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - + impl::xml_allocator& alloc = impl::get_allocator(_root); if (!alloc.reserve()) return xml_attribute(); @@ -5675,7 +5786,7 @@ namespace pugi PUGI__FN xml_node xml_node::append_child(xml_node_type type_) { if (!impl::allow_insert_child(type(), type_)) return xml_node(); - + impl::xml_allocator& alloc = impl::get_allocator(_root); if (!alloc.reserve()) return xml_node(); @@ -5695,12 +5806,12 @@ namespace pugi impl::xml_allocator& alloc = impl::get_allocator(_root); if (!alloc.reserve()) return xml_node(); - + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); impl::prepend_node(n._root, _root); - + if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); return n; @@ -5713,7 +5824,7 @@ namespace pugi impl::xml_allocator& alloc = impl::get_allocator(_root); if (!alloc.reserve()) return xml_node(); - + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); @@ -5731,7 +5842,7 @@ namespace pugi impl::xml_allocator& alloc = impl::get_allocator(_root); if (!alloc.reserve()) return xml_node(); - + xml_node n(impl::allocate_node(alloc, type_)); if (!n) return xml_node(); @@ -5963,31 +6074,27 @@ namespace pugi // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense doc->header |= impl::xml_memory_page_contents_shared_mask; - + // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) impl::xml_memory_page* page = 0; - impl::xml_extra_buffer* extra = static_cast(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page)); + impl::xml_extra_buffer* extra = static_cast(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page)); (void)page; if (!extra) return impl::make_parse_result(status_out_of_memory); + #ifdef PUGIXML_COMPACT + // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned + // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account + extra = reinterpret_cast((reinterpret_cast(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1)); + #endif + // add extra buffer to the list extra->buffer = 0; extra->next = doc->extra_buffers; doc->extra_buffers = extra; // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level - struct name_sentry - { - xml_node_struct* node; - char_t* name; - - ~name_sentry() { node->name = name; } - }; - - name_sentry sentry = { _root, _root->name }; - - sentry.node->name = 0; + impl::name_null_sentry sentry(_root); return impl::load_buffer_impl(doc, _root, const_cast(contents), size, options, encoding, false, false, &extra->buffer); } @@ -5995,7 +6102,7 @@ namespace pugi PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const { if (!_root) return xml_node(); - + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) if (i->name && impl::strequal(name_, i->name)) { @@ -6010,7 +6117,7 @@ namespace pugi PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const { if (!_root) return xml_node(); - + for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) @@ -6040,7 +6147,7 @@ namespace pugi if (j != _root) result[--offset] = delimiter; - if (j->name && *j->name) + if (j->name) { size_t length = impl::strlength(j->name); @@ -6059,7 +6166,7 @@ namespace pugi { xml_node found = *this; // Current search context. - if (!_root || !path_ || !path_[0]) return found; + if (!_root || !path_[0]) return found; if (path_[0] == delimiter) { @@ -6105,48 +6212,47 @@ namespace pugi PUGI__FN bool xml_node::traverse(xml_tree_walker& walker) { walker._depth = -1; - - xml_node arg_begin = *this; + + xml_node arg_begin(_root); if (!walker.begin(arg_begin)) return false; - xml_node cur = first_child(); - + xml_node_struct* cur = _root ? _root->first_child + 0 : 0; + if (cur) { ++walker._depth; - do + do { - xml_node arg_for_each = cur; + xml_node arg_for_each(cur); if (!walker.for_each(arg_for_each)) return false; - - if (cur.first_child()) + + if (cur->first_child) { ++walker._depth; - cur = cur.first_child(); + cur = cur->first_child; } - else if (cur.next_sibling()) - cur = cur.next_sibling(); + else if (cur->next_sibling) + cur = cur->next_sibling; else { - // Borland C++ workaround - while (!cur.next_sibling() && cur != *this && !cur.parent().empty()) + while (!cur->next_sibling && cur != _root && cur->parent) { --walker._depth; - cur = cur.parent(); + cur = cur->parent; } - - if (cur != *this) - cur = cur.next_sibling(); + + if (cur != _root) + cur = cur->next_sibling; } } - while (cur && cur != *this); + while (cur && cur != _root); } assert(walker._depth == -1); - xml_node arg_end = *this; + xml_node arg_end(_root); return walker.end(arg_end); } @@ -6213,6 +6319,7 @@ namespace pugi return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; default: + assert(false && "Invalid node type"); // unreachable return -1; } } @@ -6237,6 +6344,10 @@ namespace pugi { if (!_root || impl::is_text_node(_root)) return _root; + // element nodes can have value if parse_embed_pcdata was used + if (PUGI__NODETYPE(_root) == node_element && _root->value) + return _root; + for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) if (impl::is_text_node(node)) return node; @@ -6286,7 +6397,7 @@ namespace pugi { xml_node_struct* d = _data(); - return (d && d->value) ? d->value : def; + return (d && d->value) ? d->value + 0 : def; } PUGI__FN int xml_text::as_int(int def) const @@ -6344,21 +6455,35 @@ namespace pugi { xml_node_struct* dn = _data_new(); - return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; } PUGI__FN bool xml_text::set(int rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; } PUGI__FN bool xml_text::set(unsigned int rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; + } + + PUGI__FN bool xml_text::set(long rhs) + { + xml_node_struct* dn = _data_new(); + + return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; + } + + PUGI__FN bool xml_text::set(unsigned long rhs) + { + xml_node_struct* dn = _data_new(); + + return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; } PUGI__FN bool xml_text::set(float rhs) @@ -6379,7 +6504,7 @@ namespace pugi { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; } #ifdef PUGIXML_HAS_LONG_LONG @@ -6387,14 +6512,14 @@ namespace pugi { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; } PUGI__FN bool xml_text::set(unsigned long long rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; } #endif @@ -6416,6 +6541,18 @@ namespace pugi return *this; } + PUGI__FN xml_text& xml_text::operator=(long rhs) + { + set(rhs); + return *this; + } + + PUGI__FN xml_text& xml_text::operator=(unsigned long rhs) + { + set(rhs); + return *this; + } + PUGI__FN xml_text& xml_text::operator=(double rhs) { set(rhs); @@ -6481,7 +6618,7 @@ namespace pugi { return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; } - + PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const { return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; @@ -6496,7 +6633,7 @@ namespace pugi PUGI__FN xml_node* xml_node_iterator::operator->() const { assert(_wrap._root); - return const_cast(&_wrap); // BCC32 workaround + return const_cast(&_wrap); // BCC5 workaround } PUGI__FN const xml_node_iterator& xml_node_iterator::operator++() @@ -6542,7 +6679,7 @@ namespace pugi { return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; } - + PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const { return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; @@ -6557,7 +6694,7 @@ namespace pugi PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const { assert(_wrap._attr); - return const_cast(&_wrap); // BCC32 workaround + return const_cast(&_wrap); // BCC5 workaround } PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++() @@ -6618,7 +6755,7 @@ namespace pugi PUGI__FN xml_node* xml_named_node_iterator::operator->() const { assert(_wrap._root); - return const_cast(&_wrap); // BCC32 workaround + return const_cast(&_wrap); // BCC5 workaround } PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++() @@ -6699,18 +6836,37 @@ namespace pugi PUGI__FN xml_document::xml_document(): _buffer(0) { - create(); + _create(); } PUGI__FN xml_document::~xml_document() { - destroy(); + _destroy(); } +#ifdef PUGIXML_HAS_MOVE + PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0) + { + _create(); + _move(rhs); + } + + PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT + { + if (this == &rhs) return *this; + + _destroy(); + _create(); + _move(rhs); + + return *this; + } +#endif + PUGI__FN void xml_document::reset() { - destroy(); - create(); + _destroy(); + _create(); } PUGI__FN void xml_document::reset(const xml_document& proto) @@ -6721,31 +6877,30 @@ namespace pugi append_copy(cur); } - PUGI__FN void xml_document::create() + PUGI__FN void xml_document::_create() { assert(!_root); #ifdef PUGIXML_COMPACT - const size_t page_offset = sizeof(uint32_t); + // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit + const size_t page_offset = sizeof(void*); #else const size_t page_offset = 0; #endif // initialize sentinel page - PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) + page_offset <= sizeof(_memory)); - - // align upwards to page boundary - void* page_memory = reinterpret_cast((reinterpret_cast(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1)); + PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory)); // prepare page structure - impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory); + impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory); assert(page); page->busy_size = impl::xml_memory_page_size; // setup first page marker #ifdef PUGIXML_COMPACT - page->compact_page_marker = reinterpret_cast(reinterpret_cast(page) + sizeof(impl::xml_memory_page)); + // round-trip through void* to avoid 'cast increases required alignment of target type' warning + page->compact_page_marker = reinterpret_cast(static_cast(reinterpret_cast(page) + sizeof(impl::xml_memory_page))); *page->compact_page_marker = sizeof(impl::xml_memory_page); #endif @@ -6756,11 +6911,16 @@ namespace pugi // setup sentinel page page->allocator = static_cast(_root); + // setup hash table pointer in allocator + #ifdef PUGIXML_COMPACT + page->allocator->_hash = &static_cast(_root)->hash; + #endif + // verify the document allocation assert(reinterpret_cast(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); } - PUGI__FN void xml_document::destroy() + PUGI__FN void xml_document::_destroy() { assert(_root); @@ -6799,19 +6959,126 @@ namespace pugi _root = 0; } +#ifdef PUGIXML_HAS_MOVE + PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT + { + impl::xml_document_struct* doc = static_cast(_root); + impl::xml_document_struct* other = static_cast(rhs._root); + + // save first child pointer for later; this needs hash access + xml_node_struct* other_first_child = other->first_child; + + #ifdef PUGIXML_COMPACT + // reserve space for the hash table up front; this is the only operation that can fail + // if it does, we have no choice but to throw (if we have exceptions) + if (other_first_child) + { + size_t other_children = 0; + for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) + other_children++; + + // in compact mode, each pointer assignment could result in a hash table request + // during move, we have to relocate document first_child and parents of all children + // normally there's just one child and its parent has a pointerless encoding but + // we assume the worst here + if (!other->_hash->reserve(other_children + 1)) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return; + #else + throw std::bad_alloc(); + #endif + } + } + #endif + + // move allocation state + doc->_root = other->_root; + doc->_busy_size = other->_busy_size; + + // move buffer state + doc->buffer = other->buffer; + doc->extra_buffers = other->extra_buffers; + _buffer = rhs._buffer; + + #ifdef PUGIXML_COMPACT + // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child + doc->hash = other->hash; + doc->_hash = &doc->hash; + + // make sure we don't access other hash up until the end when we reinitialize other document + other->_hash = 0; + #endif + + // move page structure + impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc); + assert(doc_page && !doc_page->prev && !doc_page->next); + + impl::xml_memory_page* other_page = PUGI__GETPAGE(other); + assert(other_page && !other_page->prev); + + // relink pages since root page is embedded into xml_document + if (impl::xml_memory_page* page = other_page->next) + { + assert(page->prev == other_page); + + page->prev = doc_page; + + doc_page->next = page; + other_page->next = 0; + } + + // make sure pages point to the correct document state + for (impl::xml_memory_page* page = doc_page->next; page; page = page->next) + { + assert(page->allocator == other); + + page->allocator = doc; + + #ifdef PUGIXML_COMPACT + // this automatically migrates most children between documents and prevents ->parent assignment from allocating + if (page->compact_shared_parent == other) + page->compact_shared_parent = doc; + #endif + } + + // move tree structure + assert(!doc->first_child); + + doc->first_child = other_first_child; + + for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) + { + #ifdef PUGIXML_COMPACT + // most children will have migrated when we reassigned compact_shared_parent + assert(node->parent == other || node->parent == doc); + + node->parent = doc; + #else + assert(node->parent == other); + node->parent = doc; + #endif + } + + // reset other document + new (other) impl::xml_document_struct(PUGI__GETPAGE(other)); + rhs._buffer = 0; + } +#endif + #ifndef PUGIXML_NO_STL PUGI__FN xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options, xml_encoding encoding) { reset(); - return impl::load_stream_impl(*this, stream, options, encoding); + return impl::load_stream_impl(static_cast(_root), stream, options, encoding, &_buffer); } PUGI__FN xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options) { reset(); - return impl::load_stream_impl(*this, stream, options, encoding_wchar); + return impl::load_stream_impl(static_cast(_root), stream, options, encoding_wchar, &_buffer); } #endif @@ -6837,9 +7104,9 @@ namespace pugi reset(); using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(fopen(path_, "rb"), fclose); + auto_deleter file(fopen(path_, "rb"), impl::close_file); - return impl::load_file_impl(*this, file.data, options, encoding); + return impl::load_file_impl(static_cast(_root), file.data, options, encoding, &_buffer); } PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding) @@ -6847,9 +7114,9 @@ namespace pugi reset(); using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(impl::open_file_wide(path_, L"rb"), fclose); + auto_deleter file(impl::open_file_wide(path_, L"rb"), impl::close_file); - return impl::load_file_impl(*this, file.data, options, encoding); + return impl::load_file_impl(static_cast(_root), file.data, options, encoding, &_buffer); } PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) @@ -6920,7 +7187,7 @@ namespace pugi PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const { using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), fclose); + auto_deleter file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); return impl::save_file_impl(*this, file.data, indent, flags, encoding); } @@ -6928,7 +7195,7 @@ namespace pugi PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const { using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), fclose); + auto_deleter file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file); return impl::save_file_impl(*this, file.data, indent, flags, encoding); } @@ -6956,14 +7223,14 @@ namespace pugi { return impl::as_utf8_impl(str.c_str(), str.size()); } - + PUGI__FN std::basic_string PUGIXML_FUNCTION as_wide(const char* str) { assert(str); return impl::as_wide_impl(str, strlen(str)); } - + PUGI__FN std::basic_string PUGIXML_FUNCTION as_wide(const std::string& str) { return impl::as_wide_impl(str.c_str(), str.size()); @@ -7095,7 +7362,7 @@ PUGI__NS_BEGIN if (begin == end) return begin; // last written element - I write = begin++; + I write = begin++; // merge unique elements while (begin != end) @@ -7110,134 +7377,76 @@ PUGI__NS_BEGIN return write + 1; } - template void copy_backwards(I begin, I end, I target) + template void insertion_sort(T* begin, T* end, const Pred& pred) { - while (begin != end) *--target = *--end; - } + if (begin == end) + return; - template void insertion_sort(I begin, I end, const Pred& pred, T*) - { - assert(begin != end); - - for (I it = begin + 1; it != end; ++it) + for (T* it = begin + 1; it != end; ++it) { T val = *it; + T* hole = it; - if (pred(val, *begin)) + // move hole backwards + while (hole > begin && pred(val, *(hole - 1))) { - // move to front - copy_backwards(begin, it, it + 1); - *begin = val; + *hole = *(hole - 1); + hole--; } + + // fill hole with element + *hole = val; + } + } + + template I median3(I first, I middle, I last, const Pred& pred) + { + if (pred(*middle, *first)) swap(middle, first); + if (pred(*last, *middle)) swap(last, middle); + if (pred(*middle, *first)) swap(middle, first); + + return middle; + } + + template void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend) + { + // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups) + T* eq = begin; + T* lt = begin; + T* gt = end; + + while (lt < gt) + { + if (pred(*lt, pivot)) + lt++; + else if (*lt == pivot) + swap(*eq++, *lt++); else - { - I hole = it; - - // move hole backwards - while (pred(val, *(hole - 1))) - { - *hole = *(hole - 1); - hole--; - } - - // fill hole with element - *hole = val; - } + swap(*lt, *--gt); } - } - // std variant for elements with == - template void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend) - { - I eqbeg = middle, eqend = middle + 1; + // we now have just 4 groups: = < >; move equal elements to the middle + T* eqbeg = gt; - // expand equal range - while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg; - while (eqend != end && *eqend == *eqbeg) ++eqend; + for (T* it = begin; it != eq; ++it) + swap(*it, *--eqbeg); - // process outer elements - I ltend = eqbeg, gtbeg = eqend; - - for (;;) - { - // find the element from the right side that belongs to the left one - for (; gtbeg != end; ++gtbeg) - if (!pred(*eqbeg, *gtbeg)) - { - if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++); - else break; - } - - // find the element from the left side that belongs to the right one - for (; ltend != begin; --ltend) - if (!pred(*(ltend - 1), *eqbeg)) - { - if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg); - else break; - } - - // scanned all elements - if (gtbeg == end && ltend == begin) - { - *out_eqbeg = eqbeg; - *out_eqend = eqend; - return; - } - - // make room for elements by moving equal area - if (gtbeg == end) - { - if (--ltend != --eqbeg) swap(*ltend, *eqbeg); - swap(*eqbeg, *--eqend); - } - else if (ltend == begin) - { - if (eqend != gtbeg) swap(*eqbeg, *eqend); - ++eqend; - swap(*gtbeg++, *eqbeg++); - } - else swap(*gtbeg++, *--ltend); - } - } - - template void median3(I first, I middle, I last, const Pred& pred) - { - if (pred(*middle, *first)) swap(*middle, *first); - if (pred(*last, *middle)) swap(*last, *middle); - if (pred(*middle, *first)) swap(*middle, *first); - } - - template void median(I first, I middle, I last, const Pred& pred) - { - if (last - first <= 40) - { - // median of three for small chunks - median3(first, middle, last, pred); - } - else - { - // median of nine - size_t step = (last - first + 1) / 8; - - median3(first, first + step, first + 2 * step, pred); - median3(middle - step, middle, middle + step, pred); - median3(last - 2 * step, last - step, last, pred); - median3(first + step, middle, last - step, pred); - } + *out_eqbeg = eqbeg; + *out_eqend = gt; } template void sort(I begin, I end, const Pred& pred) { // sort large chunks - while (end - begin > 32) + while (end - begin > 16) { // find median element I middle = begin + (end - begin) / 2; - median(begin, middle, end - 1, pred); + I median = median3(begin, middle, end - 1, pred); // partition in three chunks (< = >) I eqbeg, eqend; - partition(begin, middle, end, pred, &eqbeg, &eqend); + partition3(begin, end, *median, pred, &eqbeg, &eqend); // loop on larger half if (eqbeg - begin > end - eqend) @@ -7253,7 +7462,7 @@ PUGI__NS_BEGIN } // insertion sort small chunk - if (begin != end) insertion_sort(begin, end, pred, &*begin); + insertion_sort(begin, end, pred); } PUGI__NS_END @@ -7270,7 +7479,7 @@ PUGI__NS_BEGIN static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); struct xpath_memory_block - { + { xpath_memory_block* next; size_t capacity; @@ -7280,25 +7489,18 @@ PUGI__NS_BEGIN double alignment; }; }; - - class xpath_allocator + + struct xpath_allocator { xpath_memory_block* _root; size_t _root_size; + bool* _error; - public: - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf* error_handler; - #endif - - xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) + xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error) { - #ifdef PUGIXML_NO_EXCEPTIONS - error_handler = 0; - #endif } - - void* allocate_nothrow(size_t size) + + void* allocate(size_t size) { // round size up to block alignment boundary size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); @@ -7319,35 +7521,22 @@ PUGI__NS_BEGIN size_t block_size = block_capacity + offsetof(xpath_memory_block, data); xpath_memory_block* block = static_cast(xml_memory::allocate(block_size)); - if (!block) return 0; - + if (!block) + { + if (_error) *_error = true; + return 0; + } + block->next = _root; block->capacity = block_capacity; - + _root = block; _root_size = size; - + return block->data; } } - void* allocate(size_t size) - { - void* result = allocate_nothrow(size); - - if (!result) - { - #ifdef PUGIXML_NO_EXCEPTIONS - assert(error_handler); - longjmp(*error_handler, 1); - #else - throw std::bad_alloc(); - #endif - } - - return result; - } - void* reallocate(void* ptr, size_t old_size, size_t new_size) { // round size up to block alignment boundary @@ -7357,33 +7546,35 @@ PUGI__NS_BEGIN // we can only reallocate the last object assert(ptr == 0 || static_cast(ptr) + old_size == &_root->data[0] + _root_size); - // adjust root size so that we have not allocated the object at all - bool only_object = (_root_size == old_size); + // try to reallocate the object inplace + if (ptr && _root_size - old_size + new_size <= _root->capacity) + { + _root_size = _root_size - old_size + new_size; + return ptr; + } - if (ptr) _root_size -= old_size; - - // allocate a new version (this will obviously reuse the memory if possible) + // allocate a new block void* result = allocate(new_size); - assert(result); + if (!result) return 0; // we have a new block - if (result != ptr && ptr) + if (ptr) { - // copy old data + // copy old data (we only support growing) assert(new_size >= old_size); memcpy(result, ptr, old_size); // free the previous page if it had no other objects - if (only_object) - { - assert(_root->data == result); - assert(_root->next); + assert(_root->data == result); + assert(_root->next); + if (_root->next->data == ptr) + { + // deallocate the whole page, unless it was the first one xpath_memory_block* next = _root->next->next; if (next) { - // deallocate the whole page, unless it was the first one xml_memory::deallocate(_root->next); _root->next = next; } @@ -7455,22 +7646,15 @@ PUGI__NS_BEGIN xpath_allocator result; xpath_allocator temp; xpath_stack stack; + bool oom; - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf error_handler; - #endif - - xpath_stack_data(): result(blocks + 0), temp(blocks + 1) + xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false) { blocks[0].next = blocks[1].next = 0; blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); stack.result = &result; stack.temp = &temp; - - #ifdef PUGIXML_NO_EXCEPTIONS - result.error_handler = temp.error_handler = &error_handler; - #endif } ~xpath_stack_data() @@ -7492,7 +7676,7 @@ PUGI__NS_BEGIN static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) { char_t* result = static_cast(alloc->allocate((length + 1) * sizeof(char_t))); - assert(result); + if (!result) return 0; memcpy(result, string, length * sizeof(char_t)); result[length] = 0; @@ -7521,9 +7705,13 @@ PUGI__NS_BEGIN { assert(begin <= end); - size_t length = static_cast(end - begin); + if (begin == end) + return xpath_string(); - return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length); + size_t length = static_cast(end - begin); + const char_t* data = duplicate_string(begin, length, alloc); + + return data ? xpath_string(data, true, length) : xpath_string(); } xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) @@ -7549,7 +7737,7 @@ PUGI__NS_BEGIN // allocate new buffer char_t* result = static_cast(alloc->reallocate(_uses_heap ? const_cast(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); - assert(result); + if (!result) return; // append first string to the new buffer in case there was no reallocation if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); @@ -7574,15 +7762,18 @@ PUGI__NS_BEGIN { return _uses_heap ? _length_heap : strlength(_buffer); } - + char_t* data(xpath_allocator* alloc) { // make private heap copy if (!_uses_heap) { size_t length_ = strlength(_buffer); + const char_t* data_ = duplicate_string(_buffer, length_, alloc); - _buffer = duplicate_string(_buffer, length_, alloc); + if (!data_) return 0; + + _buffer = data_; _uses_heap = true; _length_heap = length_; } @@ -7664,14 +7855,18 @@ PUGI__NS_BEGIN case node_comment: case node_pi: return xpath_string::from_const(n.value()); - + case node_document: case node_element: { xpath_string result; + // element nodes can have value if parse_embed_pcdata was used + if (n.value()[0]) + result.append(xpath_string::from_const(n.value()), alloc); + xml_node cur = n.first_child(); - + while (cur && cur != n) { if (cur.type() == node_pcdata || cur.type() == node_cdata) @@ -7689,16 +7884,16 @@ PUGI__NS_BEGIN if (cur != n) cur = cur.next_sibling(); } } - + return result; } - + default: return xpath_string(); } } } - + PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) { assert(ln->parent == rn->parent); @@ -7722,7 +7917,7 @@ PUGI__NS_BEGIN // if rn sibling chain ended ln must be before rn return !rs; } - + PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) { // find common ancestor at the same depth, if any @@ -7803,7 +7998,7 @@ PUGI__NS_BEGIN return 0; } - + struct document_order_comparator { bool operator()(const xpath_node& lhs, const xpath_node& rhs) const @@ -7827,10 +8022,10 @@ PUGI__NS_BEGIN for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) if (a == rhs.attribute()) return true; - + return false; } - + // compare attribute parents ln = lhs.parent(); rn = rhs.parent(); @@ -7839,21 +8034,21 @@ PUGI__NS_BEGIN { // attributes go after the parent element if (lhs.parent() == rhs.node()) return false; - + ln = lhs.parent(); } else if (rhs.attribute()) { // attributes go after the parent element if (rhs.parent() == lhs.node()) return true; - + rn = rhs.parent(); } if (ln == rn) return false; if (!ln || !rn) return ln < rn; - + return node_is_before(ln.internal_object(), rn.internal_object()); } }; @@ -7866,20 +8061,22 @@ PUGI__NS_BEGIN else return rhs.attribute() ? false : lhs.node() < rhs.node(); } }; - + PUGI__FN double gen_nan() { #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) - union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1]; - u[0].i = 0x7fc00000; - return u[0].f; + PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t)); + typedef uint32_t UI; // BCC5 workaround + union { float f; UI i; } u; + u.i = 0x7fc00000; + return u.f; #else // fallback const volatile double zero = 0.0; return zero / zero; #endif } - + PUGI__FN bool is_nan(double value) { #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) @@ -7892,7 +8089,7 @@ PUGI__NS_BEGIN return v != v; #endif } - + PUGI__FN const char_t* convert_number_to_string_special(double value) { #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) @@ -7924,12 +8121,12 @@ PUGI__NS_BEGIN return 0; #endif } - + PUGI__FN bool convert_number_to_boolean(double value) { return (value != 0 && !is_nan(value)); } - + PUGI__FN void truncate_zeros(char* begin, char* end) { while (begin != end && end[-1] == '0') end--; @@ -7939,11 +8136,11 @@ PUGI__NS_BEGIN // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) - PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) + PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) { // get base values int sign, exponent; - _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign); + _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign); // truncate redundant zeros truncate_zeros(buffer, buffer + strlen(buffer)); @@ -7953,12 +8150,10 @@ PUGI__NS_BEGIN *out_exponent = exponent; } #else - PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) + PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) { // get a scientific notation value with IEEE DBL_DIG decimals - sprintf(buffer, "%.*e", DBL_DIG, value); - assert(strlen(buffer) < buffer_size); - (void)!buffer_size; + PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value); // get the exponent (possibly negative) char* exponent_string = strchr(buffer, 'e'); @@ -7995,12 +8190,12 @@ PUGI__NS_BEGIN char* mantissa; int exponent; - convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent); + convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent); // allocate a buffer of suitable length for the number size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; char_t* result = static_cast(alloc->allocate(sizeof(char_t) * result_size)); - assert(result); + if (!result) return xpath_string(); // make the number! char_t* s = result; @@ -8017,7 +8212,7 @@ PUGI__NS_BEGIN { while (exponent > 0) { - assert(*mantissa == 0 || static_cast(static_cast(*mantissa) - '0') <= 9); + assert(*mantissa == 0 || static_cast(*mantissa - '0') <= 9); *s++ = *mantissa ? *mantissa++ : '0'; exponent--; } @@ -8050,7 +8245,7 @@ PUGI__NS_BEGIN return xpath_string::from_heap_preallocated(result, s); } - + PUGI__FN bool check_string_to_number_format(const char_t* string) { // parse leading whitespace @@ -8117,7 +8312,7 @@ PUGI__NS_BEGIN return true; } - + PUGI__FN double round_nearest(double value) { return floor(value + 0.5); @@ -8129,17 +8324,17 @@ PUGI__NS_BEGIN // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); } - + PUGI__FN const char_t* qualified_name(const xpath_node& node) { return node.attribute() ? node.attribute().name() : node.node().name(); } - + PUGI__FN const char_t* local_name(const xpath_node& node) { const char_t* name = qualified_name(node); const char_t* p = find_char(name, ':'); - + return p ? p + 1 : name; } @@ -8169,39 +8364,39 @@ PUGI__NS_BEGIN PUGI__FN const char_t* namespace_uri(xml_node node) { namespace_uri_predicate pred = node.name(); - + xml_node p = node; - + while (p) { xml_attribute a = p.find_attribute(pred); - + if (a) return a.value(); - + p = p.parent(); } - + return PUGIXML_TEXT(""); } PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) { namespace_uri_predicate pred = attr.name(); - + // Default namespace does not apply to attributes if (!pred.prefix) return PUGIXML_TEXT(""); - + xml_node p = parent; - + while (p) { xml_attribute a = p.find_attribute(pred); - + if (a) return a.value(); - + p = p.parent(); } - + return PUGIXML_TEXT(""); } @@ -8284,12 +8479,10 @@ PUGI__NS_BEGIN if (!table[i]) table[i] = static_cast(i); - void* result = alloc->allocate_nothrow(sizeof(table)); + void* result = alloc->allocate(sizeof(table)); + if (!result) return 0; - if (result) - { - memcpy(result, table, sizeof(table)); - } + memcpy(result, table, sizeof(table)); return static_cast(result); } @@ -8376,7 +8569,7 @@ PUGI__NS_BEGIN static const xpath_node_set dummy_node_set; - PUGI__FN unsigned int hash_string(const char_t* str) + PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str) { // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) unsigned int result = 0; @@ -8387,11 +8580,11 @@ PUGI__NS_BEGIN result += result << 10; result ^= result >> 6; } - + result += result << 3; result ^= result >> 11; result += result << 15; - + return result; } @@ -8459,7 +8652,7 @@ PUGI__NS_BEGIN break; default: - assert(!"Invalid variable type"); + assert(false && "Invalid variable type"); // unreachable } } @@ -8480,7 +8673,7 @@ PUGI__NS_BEGIN return lhs->set(static_cast(rhs)->value); default: - assert(!"Invalid variable type"); + assert(false && "Invalid variable type"); // unreachable return false; } } @@ -8545,9 +8738,9 @@ PUGI__NS_BEGIN else type = sorted; } - + if (type != order) reverse(begin, end); - + return order; } @@ -8567,7 +8760,7 @@ PUGI__NS_BEGIN return *min_element(begin, end, document_order_comparator()); default: - assert(!"Invalid node set type"); + assert(false && "Invalid node set type"); // unreachable return xpath_node(); } } @@ -8632,7 +8825,7 @@ PUGI__NS_BEGIN { // reallocate the old array or allocate a new one xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); - assert(data); + if (!data) return; // finalize _begin = data; @@ -8660,7 +8853,7 @@ PUGI__NS_BEGIN { if (_type == xpath_node_set::type_unsorted) sort(_begin, _end, duplicate_comparator()); - + _end = unique(_begin, _end); } @@ -8684,7 +8877,7 @@ PUGI__NS_BEGIN // reallocate the old array or allocate a new one xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); - assert(data); + if (!data) return; // finalize _begin = data; @@ -8768,12 +8961,12 @@ PUGI__NS_BEGIN { next(); } - + const char_t* state() const { return _cur; } - + void next() { const char_t* cur = _cur; @@ -8788,7 +8981,7 @@ PUGI__NS_BEGIN case 0: _cur_lexeme = lex_eof; break; - + case '>': if (*(cur+1) == '=') { @@ -8832,7 +9025,7 @@ PUGI__NS_BEGIN _cur_lexeme = lex_equal; break; - + case '+': cur += 1; _cur_lexeme = lex_plus; @@ -8856,7 +9049,7 @@ PUGI__NS_BEGIN _cur_lexeme = lex_union; break; - + case '$': cur += 1; @@ -8874,7 +9067,7 @@ PUGI__NS_BEGIN } _cur_lexeme_contents.end = cur; - + _cur_lexeme = lex_var_ref; } else @@ -8895,7 +9088,7 @@ PUGI__NS_BEGIN _cur_lexeme = lex_close_brace; break; - + case '[': cur += 1; _cur_lexeme = lex_open_square_brace; @@ -8926,7 +9119,7 @@ PUGI__NS_BEGIN _cur_lexeme = lex_slash; } break; - + case '.': if (*(cur+1) == '.') { @@ -8942,7 +9135,7 @@ PUGI__NS_BEGIN while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; _cur_lexeme_contents.end = cur; - + _cur_lexeme = lex_number; } else @@ -8968,7 +9161,7 @@ PUGI__NS_BEGIN _cur_lexeme_contents.begin = cur; while (*cur && *cur != terminator) cur++; _cur_lexeme_contents.end = cur; - + if (!*cur) _cur_lexeme = lex_none; else @@ -8998,7 +9191,7 @@ PUGI__NS_BEGIN _cur_lexeme_contents.begin = cur; while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; - + if (*cur == '.') { cur++; @@ -9031,7 +9224,7 @@ PUGI__NS_BEGIN } _cur_lexeme_contents.end = cur; - + _cur_lexeme = lex_string; } else @@ -9142,7 +9335,7 @@ PUGI__NS_BEGIN axis_preceding_sibling, axis_self }; - + enum nodetest_t { nodetest_none, @@ -9177,7 +9370,7 @@ PUGI__NS_BEGIN }; template const axis_t axis_to_type::axis = N; - + class xpath_ast_node { private: @@ -9297,7 +9490,7 @@ PUGI__NS_BEGIN } } - assert(!"Wrong types"); + assert(false && "Wrong types"); // unreachable return false; } @@ -9372,7 +9565,7 @@ PUGI__NS_BEGIN } else { - assert(!"Wrong types"); + assert(false && "Wrong types"); // unreachable return false; } } @@ -9496,7 +9689,7 @@ PUGI__NS_BEGIN return true; } break; - + case nodetest_type_node: case nodetest_all: if (is_xpath_attribute(name)) @@ -9505,7 +9698,7 @@ PUGI__NS_BEGIN return true; } break; - + case nodetest_all_in_namespace: if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) { @@ -9513,14 +9706,14 @@ PUGI__NS_BEGIN return true; } break; - + default: ; } return false; } - + bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) { assert(n); @@ -9536,11 +9729,11 @@ PUGI__NS_BEGIN return true; } break; - + case nodetest_type_node: ns.push_back(xml_node(n), alloc); return true; - + case nodetest_type_comment: if (type == node_comment) { @@ -9548,7 +9741,7 @@ PUGI__NS_BEGIN return true; } break; - + case nodetest_type_text: if (type == node_pcdata || type == node_cdata) { @@ -9556,7 +9749,7 @@ PUGI__NS_BEGIN return true; } break; - + case nodetest_type_pi: if (type == node_pi) { @@ -9564,7 +9757,7 @@ PUGI__NS_BEGIN return true; } break; - + case nodetest_pi: if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) { @@ -9572,7 +9765,7 @@ PUGI__NS_BEGIN return true; } break; - + case nodetest_all: if (type == node_element) { @@ -9580,7 +9773,7 @@ PUGI__NS_BEGIN return true; } break; - + case nodetest_all_in_namespace: if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) { @@ -9590,7 +9783,7 @@ PUGI__NS_BEGIN break; default: - assert(!"Unknown axis"); + assert(false && "Unknown axis"); // unreachable } return false; @@ -9607,33 +9800,33 @@ PUGI__NS_BEGIN for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) if (step_push(ns, a, n, alloc) & once) return; - + break; } - + case axis_child: { for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) if (step_push(ns, c, alloc) & once) return; - + break; } - + case axis_descendant: case axis_descendant_or_self: { if (axis == axis_descendant_or_self) if (step_push(ns, n, alloc) & once) return; - + xml_node_struct* cur = n->first_child; - + while (cur) { if (step_push(ns, cur, alloc) & once) return; - + if (cur->first_child) cur = cur->first_child; else @@ -9644,32 +9837,32 @@ PUGI__NS_BEGIN if (cur == n) return; } - + cur = cur->next_sibling; } } - + break; } - + case axis_following_sibling: { for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) if (step_push(ns, c, alloc) & once) return; - + break; } - + case axis_preceding_sibling: { for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) if (step_push(ns, c, alloc) & once) return; - + break; } - + case axis_following: { xml_node_struct* cur = n; @@ -9748,7 +9941,7 @@ PUGI__NS_BEGIN break; } - + case axis_ancestor: case axis_ancestor_or_self: { @@ -9757,15 +9950,15 @@ PUGI__NS_BEGIN return; xml_node_struct* cur = n->parent; - + while (cur) { if (step_push(ns, cur, alloc) & once) return; - + cur = cur->parent; } - + break; } @@ -9783,12 +9976,12 @@ PUGI__NS_BEGIN break; } - + default: - assert(!"Unimplemented axis"); + assert(false && "Unimplemented axis"); // unreachable } } - + template void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) { const axis_t axis = T::axis; @@ -9803,15 +9996,15 @@ PUGI__NS_BEGIN return; xml_node_struct* cur = p; - + while (cur) { if (step_push(ns, cur, alloc) & once) return; - + cur = cur->parent; } - + break; } @@ -9827,7 +10020,7 @@ PUGI__NS_BEGIN case axis_following: { xml_node_struct* cur = p; - + while (cur) { if (cur->first_child) @@ -9864,9 +10057,9 @@ PUGI__NS_BEGIN step_fill(ns, p, alloc, once, v); break; } - + default: - assert(!"Unimplemented axis"); + assert(false && "Unimplemented axis"); // unreachable } } @@ -9908,7 +10101,7 @@ PUGI__NS_BEGIN // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); - + step_fill(ns, *it, stack.result, once, v); if (_right) apply_predicates(ns, size, stack, eval); } @@ -9926,7 +10119,7 @@ PUGI__NS_BEGIN return ns; } - + public: xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value): _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) @@ -9941,14 +10134,14 @@ PUGI__NS_BEGIN assert(type == ast_number_constant); _data.number = value; } - + xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value): _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) { assert(type == ast_variable); _data.variable = value; } - + xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0): _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) { @@ -9983,25 +10176,25 @@ PUGI__NS_BEGIN { case ast_op_or: return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); - + case ast_op_and: return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); - + case ast_op_equal: return compare_eq(_left, _right, c, stack, equal_to()); case ast_op_not_equal: return compare_eq(_left, _right, c, stack, not_equal_to()); - + case ast_op_less: return compare_rel(_left, _right, c, stack, less()); - + case ast_op_greater: return compare_rel(_right, _left, c, stack, less()); case ast_op_less_or_equal: return compare_rel(_left, _right, c, stack, less_equal()); - + case ast_op_greater_or_equal: return compare_rel(_right, _left, c, stack, less_equal()); @@ -10027,43 +10220,43 @@ PUGI__NS_BEGIN case ast_func_boolean: return _left->eval_boolean(c, stack); - + case ast_func_not: return !_left->eval_boolean(c, stack); - + case ast_func_true: return true; - + case ast_func_false: return false; case ast_func_lang: { if (c.n.attribute()) return false; - + xpath_allocator_capture cr(stack.result); xpath_string lang = _left->eval_string(c, stack); - + for (xml_node n = c.n.node(); n; n = n.parent()) { xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); - + if (a) { const char_t* value = a.value(); - + // strnicmp / strncasecmp is not portable for (const char_t* lit = lang.c_str(); *lit; ++lit) { if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; ++value; } - + return *value == 0 || *value == '-'; } } - + return false; } @@ -10082,25 +10275,24 @@ PUGI__NS_BEGIN if (_rettype == xpath_type_boolean) return _data.variable->get_boolean(); - - // fallthrough to type conversion } + // fallthrough default: { switch (_rettype) { case xpath_type_number: return convert_number_to_boolean(eval_number(c, stack)); - + case xpath_type_string: { xpath_allocator_capture cr(stack.result); return !eval_string(c, stack).empty(); } - - case xpath_type_node_set: + + case xpath_type_node_set: { xpath_allocator_capture cr(stack.result); @@ -10108,7 +10300,7 @@ PUGI__NS_BEGIN } default: - assert(!"Wrong expression for return type boolean"); + assert(false && "Wrong expression for return type boolean"); // unreachable return false; } } @@ -10121,7 +10313,7 @@ PUGI__NS_BEGIN { case ast_op_add: return _left->eval_number(c, stack) + _right->eval_number(c, stack); - + case ast_op_subtract: return _left->eval_number(c, stack) - _right->eval_number(c, stack); @@ -10142,7 +10334,7 @@ PUGI__NS_BEGIN case ast_func_last: return static_cast(c.size); - + case ast_func_position: return static_cast(c.position); @@ -10152,28 +10344,28 @@ PUGI__NS_BEGIN return static_cast(_left->eval_node_set(c, stack, nodeset_eval_all).size()); } - + case ast_func_string_length_0: { xpath_allocator_capture cr(stack.result); return static_cast(string_value(c.n, stack.result).length()); } - + case ast_func_string_length_1: { xpath_allocator_capture cr(stack.result); return static_cast(_left->eval_string(c, stack).length()); } - + case ast_func_number_0: { xpath_allocator_capture cr(stack.result); return convert_string_to_number(string_value(c.n, stack.result).c_str()); } - + case ast_func_number_1: return _left->eval_number(c, stack); @@ -10182,76 +10374,75 @@ PUGI__NS_BEGIN xpath_allocator_capture cr(stack.result); double r = 0; - + xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); - + for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) { xpath_allocator_capture cri(stack.result); r += convert_string_to_number(string_value(*it, stack.result).c_str()); } - + return r; } case ast_func_floor: { double r = _left->eval_number(c, stack); - + return r == r ? floor(r) : r; } case ast_func_ceiling: { double r = _left->eval_number(c, stack); - + return r == r ? ceil(r) : r; } case ast_func_round: return round_nearest_nzero(_left->eval_number(c, stack)); - + case ast_variable: { assert(_rettype == _data.variable->type()); if (_rettype == xpath_type_number) return _data.variable->get_number(); - - // fallthrough to type conversion } + // fallthrough default: { switch (_rettype) { case xpath_type_boolean: return eval_boolean(c, stack) ? 1 : 0; - + case xpath_type_string: { xpath_allocator_capture cr(stack.result); return convert_string_to_number(eval_string(c, stack).c_str()); } - + case xpath_type_node_set: { xpath_allocator_capture cr(stack.result); return convert_string_to_number(eval_string(c, stack).c_str()); } - + default: - assert(!"Wrong expression for return type number"); + assert(false && "Wrong expression for return type number"); // unreachable return 0; } - + } } } - + xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) { assert(_type == ast_func_concat); @@ -10262,16 +10453,9 @@ PUGI__NS_BEGIN size_t count = 1; for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; - // gather all strings - xpath_string static_buffer[4]; - xpath_string* buffer = static_buffer; - - // allocate on-heap for large concats - if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) - { - buffer = static_cast(stack.temp->allocate(count * sizeof(xpath_string))); - assert(buffer); - } + // allocate a buffer for temporary string objects + xpath_string* buffer = static_cast(stack.temp->allocate(count * sizeof(xpath_string))); + if (!buffer) return xpath_string(); // evaluate all strings to temporary stack xpath_stack swapped_stack = {stack.temp, stack.result}; @@ -10288,7 +10472,7 @@ PUGI__NS_BEGIN // create final string char_t* result = static_cast(stack.result->allocate((length + 1) * sizeof(char_t))); - assert(result); + if (!result) return xpath_string(); char_t* ri = result; @@ -10307,11 +10491,11 @@ PUGI__NS_BEGIN { case ast_string_constant: return xpath_string::from_const(_data.string); - + case ast_func_local_name_0: { xpath_node na = c.n; - + return xpath_string::from_const(local_name(na)); } @@ -10321,14 +10505,14 @@ PUGI__NS_BEGIN xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); xpath_node na = ns.first(); - + return xpath_string::from_const(local_name(na)); } case ast_func_name_0: { xpath_node na = c.n; - + return xpath_string::from_const(qualified_name(na)); } @@ -10338,14 +10522,14 @@ PUGI__NS_BEGIN xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); xpath_node na = ns.first(); - + return xpath_string::from_const(qualified_name(na)); } case ast_func_namespace_uri_0: { xpath_node na = c.n; - + return xpath_string::from_const(namespace_uri(na)); } @@ -10355,7 +10539,7 @@ PUGI__NS_BEGIN xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); xpath_node na = ns.first(); - + return xpath_string::from_const(namespace_uri(na)); } @@ -10378,10 +10562,10 @@ PUGI__NS_BEGIN xpath_string p = _right->eval_string(c, swapped_stack); const char_t* pos = find_substring(s.c_str(), p.c_str()); - + return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); } - + case ast_func_substring_after: { xpath_allocator_capture cr(stack.temp); @@ -10390,7 +10574,7 @@ PUGI__NS_BEGIN xpath_string s = _left->eval_string(c, swapped_stack); xpath_string p = _right->eval_string(c, swapped_stack); - + const char_t* pos = find_substring(s.c_str(), p.c_str()); if (!pos) return xpath_string(); @@ -10410,19 +10594,19 @@ PUGI__NS_BEGIN size_t s_length = s.length(); double first = round_nearest(_right->eval_number(c, stack)); - + if (is_nan(first)) return xpath_string(); // NaN else if (first >= s_length + 1) return xpath_string(); - + size_t pos = first < 1 ? 1 : static_cast(first); assert(1 <= pos && pos <= s_length + 1); const char_t* rbegin = s.c_str() + (pos - 1); const char_t* rend = s.c_str() + s.length(); - + return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); } - + case ast_func_substring_3: { xpath_allocator_capture cr(stack.temp); @@ -10434,12 +10618,12 @@ PUGI__NS_BEGIN double first = round_nearest(_right->eval_number(c, stack)); double last = first + round_nearest(_right->_next->eval_number(c, stack)); - + if (is_nan(first) || is_nan(last)) return xpath_string(); else if (first >= s_length + 1) return xpath_string(); else if (first >= last) return xpath_string(); else if (last < 1) return xpath_string(); - + size_t pos = first < 1 ? 1 : static_cast(first); size_t end = last >= s_length + 1 ? s_length + 1 : static_cast(last); @@ -10455,6 +10639,8 @@ PUGI__NS_BEGIN xpath_string s = string_value(c.n, stack.result); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = normalize_space(begin); return xpath_string::from_heap_preallocated(begin, end); @@ -10465,8 +10651,10 @@ PUGI__NS_BEGIN xpath_string s = _left->eval_string(c, stack); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = normalize_space(begin); - + return xpath_string::from_heap_preallocated(begin, end); } @@ -10481,6 +10669,8 @@ PUGI__NS_BEGIN xpath_string to = _right->_next->eval_string(c, swapped_stack); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); return xpath_string::from_heap_preallocated(begin, end); @@ -10491,6 +10681,8 @@ PUGI__NS_BEGIN xpath_string s = _left->eval_string(c, stack); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = translate_table(begin, _data.table); return xpath_string::from_heap_preallocated(begin, end); @@ -10502,20 +10694,19 @@ PUGI__NS_BEGIN if (_rettype == xpath_type_string) return xpath_string::from_const(_data.variable->get_string()); - - // fallthrough to type conversion } + // fallthrough default: { switch (_rettype) { case xpath_type_boolean: return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); - + case xpath_type_number: return convert_number_to_string(eval_number(c, stack), stack.result); - + case xpath_type_node_set: { xpath_allocator_capture cr(stack.temp); @@ -10525,9 +10716,9 @@ PUGI__NS_BEGIN xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); } - + default: - assert(!"Wrong expression for return type string"); + assert(false && "Wrong expression for return type string"); // unreachable return xpath_string(); } } @@ -10566,20 +10757,20 @@ PUGI__NS_BEGIN bool once = eval_once(set.type(), eval); apply_predicate(set, 0, stack, once); - + return set; } - + case ast_func_id: return xpath_node_set_raw(); - + case ast_step: { switch (_axis) { case axis_ancestor: return step_do(c, stack, eval, axis_to_type()); - + case axis_ancestor_or_self: return step_do(c, stack, eval, axis_to_type()); @@ -10588,7 +10779,7 @@ PUGI__NS_BEGIN case axis_child: return step_do(c, stack, eval, axis_to_type()); - + case axis_descendant: return step_do(c, stack, eval, axis_to_type()); @@ -10597,28 +10788,28 @@ PUGI__NS_BEGIN case axis_following: return step_do(c, stack, eval, axis_to_type()); - + case axis_following_sibling: return step_do(c, stack, eval, axis_to_type()); - + case axis_namespace: // namespaced axis is not supported return xpath_node_set_raw(); - + case axis_parent: return step_do(c, stack, eval, axis_to_type()); - + case axis_preceding: return step_do(c, stack, eval, axis_to_type()); case axis_preceding_sibling: return step_do(c, stack, eval, axis_to_type()); - + case axis_self: return step_do(c, stack, eval, axis_to_type()); default: - assert(!"Unknown axis"); + assert(false && "Unknown axis"); // unreachable return xpath_node_set_raw(); } } @@ -10652,21 +10843,25 @@ PUGI__NS_BEGIN return ns; } - - // fallthrough to type conversion } + // fallthrough default: - assert(!"Wrong expression for return type node set"); + assert(false && "Wrong expression for return type node set"); // unreachable return xpath_node_set_raw(); } } void optimize(xpath_allocator* alloc) { - if (_left) _left->optimize(alloc); - if (_right) _right->optimize(alloc); - if (_next) _next->optimize(alloc); + if (_left) + _left->optimize(alloc); + + if (_right) + _right->optimize(alloc); + + if (_next) + _next->optimize(alloc); optimize_self(alloc); } @@ -10730,7 +10925,7 @@ PUGI__NS_BEGIN _type = ast_opt_compare_attribute; } } - + bool is_posinv_expr() const { switch (_type) @@ -10754,10 +10949,10 @@ PUGI__NS_BEGIN default: if (_left && !_left->is_posinv_expr()) return false; - + for (xpath_ast_node* n = _right; n; n = n->_next) if (!n->is_posinv_expr()) return false; - + return true; } } @@ -10795,65 +10990,77 @@ PUGI__NS_BEGIN char_t _scratch[32]; - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf _error_handler; - #endif - - void throw_error(const char* message) + xpath_ast_node* error(const char* message) { _result->error = message; _result->offset = _lexer.current_pos() - _query; - #ifdef PUGIXML_NO_EXCEPTIONS - longjmp(_error_handler, 1); - #else - throw xpath_exception(*_result); - #endif + return 0; } - void throw_error_oom() + xpath_ast_node* error_oom() { - #ifdef PUGIXML_NO_EXCEPTIONS - throw_error("Out of memory"); - #else - throw std::bad_alloc(); - #endif + assert(_alloc->_error); + *_alloc->_error = true; + + return 0; } void* alloc_node() { - void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node)); + return _alloc->allocate(sizeof(xpath_ast_node)); + } - if (!result) throw_error_oom(); + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; + } - return result; + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0; } const char_t* alloc_string(const xpath_lexer_string& value) { - if (value.begin) - { - size_t length = static_cast(value.end - value.begin); + if (!value.begin) + return PUGIXML_TEXT(""); - char_t* c = static_cast(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); - if (!c) throw_error_oom(); - assert(c); // workaround for clang static analysis + size_t length = static_cast(value.end - value.begin); - memcpy(c, value.begin, length * sizeof(char_t)); - c[length] = 0; + char_t* c = static_cast(_alloc->allocate((length + 1) * sizeof(char_t))); + if (!c) return 0; - return c; - } - else return 0; - } + memcpy(c, value.begin, length * sizeof(char_t)); + c[length] = 0; - xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) - { - assert(argc <= 1); - - if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - - return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); + return c; } xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) @@ -10862,111 +11069,118 @@ PUGI__NS_BEGIN { case 'b': if (name == PUGIXML_TEXT("boolean") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); - + return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]); + break; - + case 'c': if (name == PUGIXML_TEXT("count") && argc == 1) { - if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); + if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(ast_func_count, xpath_type_number, args[0]); } else if (name == PUGIXML_TEXT("contains") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); + return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); else if (name == PUGIXML_TEXT("concat") && argc >= 2) - return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("ceiling") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); - - break; - - case 'f': - if (name == PUGIXML_TEXT("false") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean); - else if (name == PUGIXML_TEXT("floor") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); - - break; - - case 'i': - if (name == PUGIXML_TEXT("id") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); - - break; - - case 'l': - if (name == PUGIXML_TEXT("last") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number); - else if (name == PUGIXML_TEXT("lang") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); - else if (name == PUGIXML_TEXT("local-name") && argc <= 1) - return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); - - break; - - case 'n': - if (name == PUGIXML_TEXT("name") && argc <= 1) - return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); - else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) - return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); - else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("not") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); - else if (name == PUGIXML_TEXT("number") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); - - break; - - case 'p': - if (name == PUGIXML_TEXT("position") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number); - - break; - - case 'r': - if (name == PUGIXML_TEXT("round") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); + return alloc_node(ast_func_ceiling, xpath_type_number, args[0]); break; - - case 's': - if (name == PUGIXML_TEXT("string") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); - else if (name == PUGIXML_TEXT("string-length") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); - else if (name == PUGIXML_TEXT("starts-with") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring-before") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring-after") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) - return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("sum") && argc == 1) + + case 'f': + if (name == PUGIXML_TEXT("false") && argc == 0) + return alloc_node(ast_func_false, xpath_type_boolean); + else if (name == PUGIXML_TEXT("floor") && argc == 1) + return alloc_node(ast_func_floor, xpath_type_number, args[0]); + + break; + + case 'i': + if (name == PUGIXML_TEXT("id") && argc == 1) + return alloc_node(ast_func_id, xpath_type_node_set, args[0]); + + break; + + case 'l': + if (name == PUGIXML_TEXT("last") && argc == 0) + return alloc_node(ast_func_last, xpath_type_number); + else if (name == PUGIXML_TEXT("lang") && argc == 1) + return alloc_node(ast_func_lang, xpath_type_boolean, args[0]); + else if (name == PUGIXML_TEXT("local-name") && argc <= 1) { - if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); } break; - + + case 'n': + if (name == PUGIXML_TEXT("name") && argc <= 1) + { + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); + } + else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) + { + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); + } + else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) + return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("not") && argc == 1) + return alloc_node(ast_func_not, xpath_type_boolean, args[0]); + else if (name == PUGIXML_TEXT("number") && argc <= 1) + return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); + + break; + + case 'p': + if (name == PUGIXML_TEXT("position") && argc == 0) + return alloc_node(ast_func_position, xpath_type_number); + + break; + + case 'r': + if (name == PUGIXML_TEXT("round") && argc == 1) + return alloc_node(ast_func_round, xpath_type_number, args[0]); + + break; + + case 's': + if (name == PUGIXML_TEXT("string") && argc <= 1) + return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); + else if (name == PUGIXML_TEXT("string-length") && argc <= 1) + return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); + else if (name == PUGIXML_TEXT("starts-with") && argc == 2) + return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); + else if (name == PUGIXML_TEXT("substring-before") && argc == 2) + return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("substring-after") && argc == 2) + return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) + return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); + else if (name == PUGIXML_TEXT("sum") && argc == 1) + { + if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(ast_func_sum, xpath_type_number, args[0]); + } + + break; + case 't': if (name == PUGIXML_TEXT("translate") && argc == 3) - return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("true") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean); - + return alloc_node(ast_func_true, xpath_type_boolean); + break; default: break; } - throw_error("Unrecognized function or wrong parameter count"); - - return 0; + return error("Unrecognized function or wrong parameter count"); } axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) @@ -10982,37 +11196,37 @@ PUGI__NS_BEGIN return axis_ancestor_or_self; else if (name == PUGIXML_TEXT("attribute")) return axis_attribute; - + break; - + case 'c': if (name == PUGIXML_TEXT("child")) return axis_child; - + break; - + case 'd': if (name == PUGIXML_TEXT("descendant")) return axis_descendant; else if (name == PUGIXML_TEXT("descendant-or-self")) return axis_descendant_or_self; - + break; - + case 'f': if (name == PUGIXML_TEXT("following")) return axis_following; else if (name == PUGIXML_TEXT("following-sibling")) return axis_following_sibling; - + break; - + case 'n': if (name == PUGIXML_TEXT("namespace")) return axis_namespace; - + break; - + case 'p': if (name == PUGIXML_TEXT("parent")) return axis_parent; @@ -11020,13 +11234,13 @@ PUGI__NS_BEGIN return axis_preceding; else if (name == PUGIXML_TEXT("preceding-sibling")) return axis_preceding_sibling; - + break; - + case 's': if (name == PUGIXML_TEXT("self")) return axis_self; - + break; default: @@ -11064,7 +11278,7 @@ PUGI__NS_BEGIN return nodetest_type_text; break; - + default: break; } @@ -11082,18 +11296,18 @@ PUGI__NS_BEGIN xpath_lexer_string name = _lexer.contents(); if (!_variables) - throw_error("Unknown variable: variable set is not provided"); + return error("Unknown variable: variable set is not provided"); xpath_variable* var = 0; if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) - throw_error_oom(); + return error_oom(); if (!var) - throw_error("Unknown variable: variable set does not contain the given name"); + return error("Unknown variable: variable set does not contain the given name"); _lexer.next(); - return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var); + return alloc_node(ast_variable, var->type(), var); } case lex_open_brace: @@ -11101,9 +11315,10 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* n = parse_expression(); + if (!n) return 0; if (_lexer.current() != lex_close_brace) - throw_error("Unmatched braces"); + return error("Expected ')' to match an opening '('"); _lexer.next(); @@ -11113,11 +11328,11 @@ PUGI__NS_BEGIN case lex_quoted_string: { const char_t* value = alloc_string(_lexer.contents()); + if (!value) return 0; - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); _lexer.next(); - return n; + return alloc_node(ast_string_constant, xpath_type_string, value); } case lex_number: @@ -11125,84 +11340,86 @@ PUGI__NS_BEGIN double value = 0; if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) - throw_error_oom(); + return error_oom(); - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); _lexer.next(); - return n; + return alloc_node(ast_number_constant, xpath_type_number, value); } case lex_string: { xpath_ast_node* args[2] = {0}; size_t argc = 0; - + xpath_lexer_string function = _lexer.contents(); _lexer.next(); - - xpath_ast_node* last_arg = 0; - - if (_lexer.current() != lex_open_brace) - throw_error("Unrecognized function call"); - _lexer.next(); - if (_lexer.current() != lex_close_brace) - args[argc++] = parse_expression(); + xpath_ast_node* last_arg = 0; + + if (_lexer.current() != lex_open_brace) + return error("Unrecognized function call"); + _lexer.next(); while (_lexer.current() != lex_close_brace) { - if (_lexer.current() != lex_comma) - throw_error("No comma between function arguments"); - _lexer.next(); - + if (argc > 0) + { + if (_lexer.current() != lex_comma) + return error("No comma between function arguments"); + _lexer.next(); + } + xpath_ast_node* n = parse_expression(); - + if (!n) return 0; + if (argc < 2) args[argc] = n; else last_arg->set_next(n); argc++; last_arg = n; } - + _lexer.next(); return parse_function(function, argc, args); } default: - throw_error("Unrecognizable primary expression"); - - return 0; + return error("Unrecognizable primary expression"); } } - + // FilterExpr ::= PrimaryExpr | FilterExpr Predicate // Predicate ::= '[' PredicateExpr ']' // PredicateExpr ::= Expr xpath_ast_node* parse_filter_expression() { xpath_ast_node* n = parse_primary_expression(); + if (!n) return 0; while (_lexer.current() == lex_open_square_brace) { _lexer.next(); + if (n->rettype() != xpath_type_node_set) + return error("Predicate has to be applied to node set"); + xpath_ast_node* expr = parse_expression(); + if (!expr) return 0; - if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set"); - - n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); + n = alloc_node(ast_filter, n, expr, predicate_default); + if (!n) return 0; if (_lexer.current() != lex_close_square_brace) - throw_error("Unmatched square brace"); - + return error("Expected ']' to match an opening '['"); + _lexer.next(); } - + return n; } - + // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep // AxisSpecifier ::= AxisName '::' | '@'? // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' @@ -11211,7 +11428,7 @@ PUGI__NS_BEGIN xpath_ast_node* parse_step(xpath_ast_node* set) { if (set && set->rettype() != xpath_type_node_set) - throw_error("Step has to be applied to node set"); + return error("Step has to be applied to node set"); bool axis_specified = false; axis_t axis = axis_child; // implied child axis @@ -11220,25 +11437,31 @@ PUGI__NS_BEGIN { axis = axis_attribute; axis_specified = true; - + _lexer.next(); } else if (_lexer.current() == lex_dot) { _lexer.next(); - - return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0); + + if (_lexer.current() == lex_open_square_brace) + return error("Predicates are not allowed after an abbreviated step"); + + return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0); } else if (_lexer.current() == lex_double_dot) { _lexer.next(); - - return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0); + + if (_lexer.current() == lex_open_square_brace) + return error("Predicates are not allowed after an abbreviated step"); + + return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0); } - + nodetest_t nt_type = nodetest_none; xpath_lexer_string nt_name; - + if (_lexer.current() == lex_string) { // node name test @@ -11249,11 +11472,13 @@ PUGI__NS_BEGIN if (_lexer.current() == lex_double_colon) { // parse axis name - if (axis_specified) throw_error("Two axis specifiers in one step"); + if (axis_specified) + return error("Two axis specifiers in one step"); axis = parse_axis_name(nt_name, axis_specified); - if (!axis_specified) throw_error("Unknown axis"); + if (!axis_specified) + return error("Unknown axis"); // read actual node test _lexer.next(); @@ -11269,42 +11494,47 @@ PUGI__NS_BEGIN nt_name = _lexer.contents(); _lexer.next(); } - else throw_error("Unrecognized node test"); + else + { + return error("Unrecognized node test"); + } } - + if (nt_type == nodetest_none) { // node type test or processing-instruction if (_lexer.current() == lex_open_brace) { _lexer.next(); - + if (_lexer.current() == lex_close_brace) { _lexer.next(); nt_type = parse_node_test_type(nt_name); - if (nt_type == nodetest_none) throw_error("Unrecognized node type"); - + if (nt_type == nodetest_none) + return error("Unrecognized node type"); + nt_name = xpath_lexer_string(); } else if (nt_name == PUGIXML_TEXT("processing-instruction")) { if (_lexer.current() != lex_quoted_string) - throw_error("Only literals are allowed as arguments to processing-instruction()"); - + return error("Only literals are allowed as arguments to processing-instruction()"); + nt_type = nodetest_pi; nt_name = _lexer.contents(); _lexer.next(); - + if (_lexer.current() != lex_close_brace) - throw_error("Unmatched brace near processing-instruction()"); + return error("Unmatched brace near processing-instruction()"); _lexer.next(); } else - throw_error("Unmatched brace near node type test"); - + { + return error("Unmatched brace near node type test"); + } } // QName or NCName:* else @@ -11312,10 +11542,13 @@ PUGI__NS_BEGIN if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:* { nt_name.end--; // erase * - + nt_type = nodetest_all_in_namespace; } - else nt_type = nodetest_name; + else + { + nt_type = nodetest_name; + } } } } @@ -11324,52 +11557,66 @@ PUGI__NS_BEGIN nt_type = nodetest_all; _lexer.next(); } - else throw_error("Unrecognized node test"); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name)); - + else + { + return error("Unrecognized node test"); + } + + const char_t* nt_name_copy = alloc_string(nt_name); + if (!nt_name_copy) return 0; + + xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy); + if (!n) return 0; + xpath_ast_node* last = 0; - + while (_lexer.current() == lex_open_square_brace) { _lexer.next(); - - xpath_ast_node* expr = parse_expression(); - xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default); - + xpath_ast_node* expr = parse_expression(); + if (!expr) return 0; + + xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default); + if (!pred) return 0; + if (_lexer.current() != lex_close_square_brace) - throw_error("Unmatched square brace"); + return error("Expected ']' to match an opening '['"); _lexer.next(); - + if (last) last->set_next(pred); else n->set_right(pred); - + last = pred; } return n; } - + // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) { xpath_ast_node* n = parse_step(set); - + if (!n) return 0; + while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) { lexeme_t l = _lexer.current(); _lexer.next(); if (l == lex_double_slash) - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); - + { + n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + if (!n) return 0; + } + n = parse_step(n); + if (!n) return 0; } - + return n; } - + // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath xpath_ast_node* parse_location_path() @@ -11377,8 +11624,9 @@ PUGI__NS_BEGIN if (_lexer.current() == lex_slash) { _lexer.next(); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + + xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); + if (!n) return 0; // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path lexeme_t l = _lexer.current(); @@ -11391,17 +11639,20 @@ PUGI__NS_BEGIN else if (_lexer.current() == lex_double_slash) { _lexer.next(); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); - + + xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); + if (!n) return 0; + + n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + if (!n) return 0; + return parse_relative_location_path(n); } // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 return parse_relative_location_path(0); } - + // PathExpr ::= LocationPath // | FilterExpr // | FilterExpr '/' RelativeLocationPath @@ -11416,8 +11667,7 @@ PUGI__NS_BEGIN // PrimaryExpr begins with '$' in case of it being a variable reference, // '(' in case of it being an expression, string literal, number constant or // function call. - - if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || + if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || _lexer.current() == lex_string) { @@ -11425,29 +11675,34 @@ PUGI__NS_BEGIN { // This is either a function call, or not - if not, we shall proceed with location path const char_t* state = _lexer.state(); - + while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; - - if (*state != '(') return parse_location_path(); + + if (*state != '(') + return parse_location_path(); // This looks like a function call; however this still can be a node-test. Check it. - if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path(); + if (parse_node_test_type(_lexer.contents()) != nodetest_none) + return parse_location_path(); } - + xpath_ast_node* n = parse_filter_expression(); + if (!n) return 0; if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) { lexeme_t l = _lexer.current(); _lexer.next(); - + if (l == lex_double_slash) { - if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set"); + if (n->rettype() != xpath_type_node_set) + return error("Step has to be applied to node set"); - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + if (!n) return 0; } - + // select from location path return parse_relative_location_path(n); } @@ -11459,12 +11714,15 @@ PUGI__NS_BEGIN _lexer.next(); // precedence 7+ - only parses union expressions - xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7); + xpath_ast_node* n = parse_expression(7); + if (!n) return 0; - return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); + return alloc_node(ast_op_negate, xpath_type_number, n); } else + { return parse_location_path(); + } } struct binary_op_t @@ -11542,20 +11800,23 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* rhs = parse_path_or_unary_expression(); + if (!rhs) return 0; binary_op_t nextop = binary_op_t::parse(_lexer); while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) { rhs = parse_expression_rec(rhs, nextop.precedence); + if (!rhs) return 0; nextop = binary_op_t::parse(_lexer); } if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) - throw_error("Union operator has to be applied to node sets"); + return error("Union operator has to be applied to node sets"); - lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs); + lhs = alloc_node(op.asttype, op.rettype, lhs, rhs); + if (!lhs) return 0; op = binary_op_t::parse(_lexer); } @@ -11581,9 +11842,12 @@ PUGI__NS_BEGIN // | MultiplicativeExpr '*' UnaryExpr // | MultiplicativeExpr 'div' UnaryExpr // | MultiplicativeExpr 'mod' UnaryExpr - xpath_ast_node* parse_expression() + xpath_ast_node* parse_expression(int limit = 0) { - return parse_expression_rec(parse_path_or_unary_expression(), 0); + xpath_ast_node* n = parse_path_or_unary_expression(); + if (!n) return 0; + + return parse_expression_rec(n, limit); } xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) @@ -11592,28 +11856,21 @@ PUGI__NS_BEGIN xpath_ast_node* parse() { - xpath_ast_node* result = parse_expression(); - + xpath_ast_node* n = parse_expression(); + if (!n) return 0; + + // check if there are unparsed tokens left if (_lexer.current() != lex_eof) - { - // there are still unparsed tokens left, error - throw_error("Incorrect query"); - } - - return result; + return error("Incorrect query"); + + return n; } static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) { xpath_parser parser(query, variables, alloc, result); - #ifdef PUGIXML_NO_EXCEPTIONS - int error = setjmp(parser._error_handler); - - return (error == 0) ? parser.parse() : 0; - #else return parser.parse(); - #endif } }; @@ -11636,7 +11893,7 @@ PUGI__NS_BEGIN xml_memory::deallocate(impl); } - xpath_query_impl(): root(0), alloc(&block) + xpath_query_impl(): root(0), alloc(&block, &oom), oom(false) { block.next = 0; block.capacity = sizeof(block.data); @@ -11645,21 +11902,9 @@ PUGI__NS_BEGIN xpath_ast_node* root; xpath_allocator alloc; xpath_memory_block block; + bool oom; }; - PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd) - { - if (!impl) return xpath_string(); - - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_string(); - #endif - - xpath_context c(n, 1, 1); - - return impl->root->eval_string(c, sd.stack); - } - PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) { if (!impl) return 0; @@ -11687,7 +11932,7 @@ namespace pugi { assert(_result.error); } - + PUGI__FN const char* xpath_exception::what() const throw() { return _result.error; @@ -11698,15 +11943,15 @@ namespace pugi return _result; } #endif - + PUGI__FN xpath_node::xpath_node() { } - + PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_) { } - + PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) { } @@ -11715,12 +11960,12 @@ namespace pugi { return _attribute ? xml_node() : _node; } - + PUGI__FN xml_attribute xpath_node::attribute() const { return _attribute; } - + PUGI__FN xml_node xpath_node::parent() const { return _attribute ? _node : _node.parent(); @@ -11734,7 +11979,7 @@ namespace pugi { return (_node || _attribute) ? unspecified_bool_xpath_node : 0; } - + PUGI__FN bool xpath_node::operator!() const { return !(_node || _attribute); @@ -11744,7 +11989,7 @@ namespace pugi { return _node == n._node && _attribute == n._attribute; } - + PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const { return _node != n._node || _attribute != n._attribute; @@ -11795,7 +12040,7 @@ namespace pugi } memcpy(storage, begin_, size_ * sizeof(xpath_node)); - + // deallocate old buffer if (_begin != &_storage) impl::xml_memory::deallocate(_begin); @@ -11806,8 +12051,8 @@ namespace pugi } } -#if __cplusplus >= 201103 - PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) +#ifdef PUGIXML_HAS_MOVE + PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT { _type = rhs._type; _storage = rhs._storage; @@ -11834,12 +12079,12 @@ namespace pugi if (_begin != &_storage) impl::xml_memory::deallocate(_begin); } - + PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage) { _assign(ns._begin, ns._end, ns._type); } - + PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) { if (this == &ns) return *this; @@ -11849,13 +12094,13 @@ namespace pugi return *this; } -#if __cplusplus >= 201103 - PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage) +#ifdef PUGIXML_HAS_MOVE + PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(&_storage), _end(&_storage) { _move(rhs); } - PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) + PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT { if (this == &rhs) return *this; @@ -11872,17 +12117,17 @@ namespace pugi { return _type; } - + PUGI__FN size_t xpath_node_set::size() const { return _end - _begin; } - + PUGI__FN bool xpath_node_set::empty() const { return _begin == _end; } - + PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const { assert(index < size()); @@ -11893,12 +12138,12 @@ namespace pugi { return _begin; } - + PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const { return _end; } - + PUGI__FN void xpath_node_set::sort(bool reverse) { _type = impl::xpath_sort(_begin, _end, _type, reverse); @@ -11944,7 +12189,7 @@ namespace pugi return static_cast(this)->name; default: - assert(!"Invalid variable type"); + assert(false && "Invalid variable type"); // unreachable return 0; } } @@ -12049,8 +12294,8 @@ namespace pugi return *this; } -#if __cplusplus >= 201103 - PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) +#ifdef PUGIXML_HAS_MOVE + PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT { for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) { @@ -12059,7 +12304,7 @@ namespace pugi } } - PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) + PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT { for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) { @@ -12230,6 +12475,15 @@ namespace pugi _impl = impl.release(); _result.error = 0; } + else + { + #ifdef PUGIXML_NO_EXCEPTIONS + if (qimpl->oom) _result.error = "Out of memory"; + #else + if (qimpl->oom) throw std::bad_alloc(); + throw xpath_exception(_result); + #endif + } } } @@ -12243,14 +12497,16 @@ namespace pugi impl::xpath_query_impl::destroy(static_cast(_impl)); } -#if __cplusplus >= 201103 - PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) +#ifdef PUGIXML_HAS_MOVE + PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT { _impl = rhs._impl; + _result = rhs._result; rhs._impl = 0; + rhs._result = xpath_parse_result(); } - xpath_query& xpath_query::operator=(xpath_query&& rhs) + PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT { if (this == &rhs) return *this; @@ -12258,7 +12514,9 @@ namespace pugi impl::xpath_query_impl::destroy(static_cast(_impl)); _impl = rhs._impl; + _result = rhs._result; rhs._impl = 0; + rhs._result = xpath_parse_result(); return *this; } @@ -12274,37 +12532,63 @@ namespace pugi PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const { if (!_impl) return false; - + impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return false; - #endif - - return static_cast(_impl)->root->eval_boolean(c, sd.stack); + bool r = static_cast(_impl)->root->eval_boolean(c, sd.stack); + + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return false; + #else + throw std::bad_alloc(); + #endif + } + + return r; } - + PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const { if (!_impl) return impl::gen_nan(); - + impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return impl::gen_nan(); - #endif + double r = static_cast(_impl)->root->eval_number(c, sd.stack); - return static_cast(_impl)->root->eval_number(c, sd.stack); + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return impl::gen_nan(); + #else + throw std::bad_alloc(); + #endif + } + + return r; } #ifndef PUGIXML_NO_STL PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const { + if (!_impl) return string_t(); + + impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - impl::xpath_string r = impl::evaluate_string_impl(static_cast(_impl), n, sd); + impl::xpath_string r = static_cast(_impl)->root->eval_string(c, sd.stack); + + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return string_t(); + #else + throw std::bad_alloc(); + #endif + } return string_t(r.c_str(), r.length()); } @@ -12312,12 +12596,22 @@ namespace pugi PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const { + impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - impl::xpath_string r = impl::evaluate_string_impl(static_cast(_impl), n, sd); + impl::xpath_string r = _impl ? static_cast(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string(); + + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + r = impl::xpath_string(); + #else + throw std::bad_alloc(); + #endif + } size_t full_size = r.length() + 1; - + if (capacity > 0) { size_t size = (full_size < capacity) ? full_size : capacity; @@ -12326,7 +12620,7 @@ namespace pugi memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); buffer[size - 1] = 0; } - + return full_size; } @@ -12338,12 +12632,17 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_node_set(); - #endif - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return xpath_node_set(); + #else + throw std::bad_alloc(); + #endif + } + return xpath_node_set(r.begin(), r.end(), r.type()); } @@ -12355,12 +12654,17 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_node(); - #endif - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return xpath_node(); + #else + throw std::bad_alloc(); + #endif + } + return r.first(); } @@ -12386,7 +12690,7 @@ namespace pugi PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const { xpath_query q(query, variables); - return select_node(q); + return q.evaluate_node(*this); } PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const @@ -12397,7 +12701,7 @@ namespace pugi PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const { xpath_query q(query, variables); - return select_nodes(q); + return q.evaluate_node_set(*this); } PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const @@ -12408,7 +12712,7 @@ namespace pugi PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const { xpath_query q(query, variables); - return select_single_node(q); + return q.evaluate_node(*this); } PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const @@ -12429,16 +12733,23 @@ namespace pugi # pragma warning(pop) #endif +#if defined(_MSC_VER) && defined(__c2__) +# pragma clang diagnostic pop +#endif + // Undefine all local macros (makes sure we're not leaking macros in header-only mode) #undef PUGI__NO_INLINE #undef PUGI__UNLIKELY #undef PUGI__STATIC_ASSERT #undef PUGI__DMC_VOLATILE +#undef PUGI__UNSIGNED_OVERFLOW #undef PUGI__MSVC_CRT_VERSION +#undef PUGI__SNPRINTF #undef PUGI__NS_BEGIN #undef PUGI__NS_END #undef PUGI__FN #undef PUGI__FN_NO_INLINE +#undef PUGI__GETHEADER_IMPL #undef PUGI__GETPAGE_IMPL #undef PUGI__GETPAGE #undef PUGI__NODETYPE @@ -12460,7 +12771,7 @@ namespace pugi #endif /** - * Copyright (c) 2006-2015 Arseny Kapoulkine + * Copyright (c) 2006-2018 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -12473,7 +12784,7 @@ namespace pugi * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND diff --git a/lib/pugixml/pugixml.h b/lib/pugixml/pugixml.h index cdd24b6d..86403be3 100644 --- a/lib/pugixml/pugixml.h +++ b/lib/pugixml/pugixml.h @@ -1,7 +1,7 @@ /** - * pugixml parser - version 1.6 + * pugixml parser - version 1.9 * -------------------------------------------------------- - * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at http://pugixml.org/ * * This library is distributed under the MIT License. See notice at the end @@ -13,7 +13,7 @@ #ifndef PUGIXML_VERSION // Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons -# define PUGIXML_VERSION 160 +# define PUGIXML_VERSION 190 #endif // Include user configuration file (this can define various configuration macros) @@ -72,6 +72,44 @@ # endif #endif +// If the platform is known to have move semantics support, compile move ctor/operator implementation +#ifndef PUGIXML_HAS_MOVE +# if __cplusplus >= 201103 +# define PUGIXML_HAS_MOVE +# elif defined(_MSC_VER) && _MSC_VER >= 1600 +# define PUGIXML_HAS_MOVE +# endif +#endif + +// If C++ is 2011 or higher, add 'noexcept' specifiers +#ifndef PUGIXML_NOEXCEPT +# if __cplusplus >= 201103 +# define PUGIXML_NOEXCEPT noexcept +# elif defined(_MSC_VER) && _MSC_VER >= 1900 +# define PUGIXML_NOEXCEPT noexcept +# else +# define PUGIXML_NOEXCEPT +# endif +#endif + +// Some functions can not be noexcept in compact mode +#ifdef PUGIXML_COMPACT +# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT +#else +# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT PUGIXML_NOEXCEPT +#endif + +// If C++ is 2011 or higher, add 'override' qualifiers +#ifndef PUGIXML_OVERRIDE +# if __cplusplus >= 201103 +# define PUGIXML_OVERRIDE override +# elif defined(_MSC_VER) && _MSC_VER >= 1700 +# define PUGIXML_OVERRIDE override +# else +# define PUGIXML_OVERRIDE +# endif +#endif + // Character interface macros #ifdef PUGIXML_WCHAR_MODE # define PUGIXML_TEXT(t) L ## t @@ -133,13 +171,13 @@ namespace pugi // This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default. const unsigned int parse_eol = 0x0020; - + // This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default. const unsigned int parse_wconv_attribute = 0x0040; // This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default. const unsigned int parse_wnorm_attribute = 0x0080; - + // This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default. const unsigned int parse_declaration = 0x0100; @@ -158,6 +196,11 @@ namespace pugi // is a valid document. This flag is off by default. const unsigned int parse_fragment = 0x1000; + // This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of + // the document; this flag is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments. + // This flag is off by default. + const unsigned int parse_embed_pcdata = 0x2000; + // The default parsing mode. // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded, // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. @@ -184,16 +227,16 @@ namespace pugi }; // Formatting flags - + // Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default. const unsigned int format_indent = 0x01; - + // Write encoding-specific BOM to the output stream. This flag is off by default. const unsigned int format_write_bom = 0x02; // Use raw output mode (no indentation and no line breaks are written). This flag is off by default. const unsigned int format_raw = 0x04; - + // Omit default XML declaration even if there is no declaration in the document. This flag is off by default. const unsigned int format_no_declaration = 0x08; @@ -206,6 +249,9 @@ namespace pugi // Write every attribute on a new line with appropriate indentation. This flag is off by default. const unsigned int format_indent_attributes = 0x40; + // Don't output empty element tags, instead writing an explicit start and end tag even if there are no children. This flag is off by default. + const unsigned int format_no_empty_element_tags = 0x80; + // The default set of formatting flags. // Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none. const unsigned int format_default = format_indent; @@ -225,7 +271,7 @@ namespace pugi class xml_node; class xml_text; - + #ifndef PUGIXML_NO_XPATH class xpath_node; class xpath_node_set; @@ -268,7 +314,7 @@ namespace pugi // Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio xml_writer_file(void* file); - virtual void write(const void* data, size_t size); + virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE; private: void* file; @@ -283,7 +329,7 @@ namespace pugi xml_writer_stream(std::basic_ostream >& stream); xml_writer_stream(std::basic_ostream >& stream); - virtual void write(const void* data, size_t size); + virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE; private: std::basic_ostream >* narrow_stream; @@ -299,13 +345,13 @@ namespace pugi private: xml_attribute_struct* _attr; - + typedef void (*unspecified_bool_type)(xml_attribute***); public: // Default constructor. Constructs an empty attribute. xml_attribute(); - + // Constructs attribute from internal pointer explicit xml_attribute(xml_attribute_struct* attr); @@ -354,6 +400,8 @@ namespace pugi // Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false") bool set_value(int rhs); bool set_value(unsigned int rhs); + bool set_value(long rhs); + bool set_value(unsigned long rhs); bool set_value(double rhs); bool set_value(float rhs); bool set_value(bool rhs); @@ -367,6 +415,8 @@ namespace pugi xml_attribute& operator=(const char_t* rhs); xml_attribute& operator=(int rhs); xml_attribute& operator=(unsigned int rhs); + xml_attribute& operator=(long rhs); + xml_attribute& operator=(unsigned long rhs); xml_attribute& operator=(double rhs); xml_attribute& operator=(float rhs); xml_attribute& operator=(bool rhs); @@ -417,7 +467,7 @@ namespace pugi // Borland C++ workaround bool operator!() const; - + // Comparison operators (compares wrapped node pointers) bool operator==(const xml_node& r) const; bool operator!=(const xml_node& r) const; @@ -438,7 +488,7 @@ namespace pugi // Get node value, or "" if node is empty or it has no value // Note: For text node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes. const char_t* value() const; - + // Get attribute list xml_attribute first_attribute() const; xml_attribute last_attribute() const; @@ -450,7 +500,7 @@ namespace pugi // Get next/previous sibling in the children list of the parent node xml_node next_sibling() const; xml_node previous_sibling() const; - + // Get parent node xml_node parent() const; @@ -478,7 +528,7 @@ namespace pugi // Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value) bool set_name(const char_t* rhs); bool set_value(const char_t* rhs); - + // Add attribute with specified name. Returns added attribute, or empty attribute on errors. xml_attribute append_attribute(const char_t* name); xml_attribute prepend_attribute(const char_t* name); @@ -532,11 +582,11 @@ namespace pugi template xml_attribute find_attribute(Predicate pred) const { if (!_root) return xml_attribute(); - + for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute()) if (pred(attrib)) return attrib; - + return xml_attribute(); } @@ -544,11 +594,11 @@ namespace pugi template xml_node find_child(Predicate pred) const { if (!_root) return xml_node(); - + for (xml_node node = first_child(); node; node = node.next_sibling()) if (pred(node)) return node; - + return xml_node(); } @@ -558,7 +608,7 @@ namespace pugi if (!_root) return xml_node(); xml_node cur = first_child(); - + while (cur._root && cur._root != _root) { if (pred(cur)) return cur; @@ -590,7 +640,7 @@ namespace pugi // Recursively traverse subtree with xml_tree_walker bool traverse(xml_tree_walker& walker); - + #ifndef PUGIXML_NO_XPATH // Select single node by evaluating XPath query. Returns first node from the resulting node set. xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const; @@ -601,11 +651,11 @@ namespace pugi xpath_node_set select_nodes(const xpath_query& query) const; // (deprecated: use select_node instead) Select single node by evaluating XPath query. - xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const; - xpath_node select_single_node(const xpath_query& query) const; + PUGIXML_DEPRECATED xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const; + PUGIXML_DEPRECATED xpath_node select_single_node(const xpath_query& query) const; #endif - + // Print subtree using a writer object void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; @@ -701,6 +751,8 @@ namespace pugi // Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false") bool set(int rhs); bool set(unsigned int rhs); + bool set(long rhs); + bool set(unsigned long rhs); bool set(double rhs); bool set(float rhs); bool set(bool rhs); @@ -714,6 +766,8 @@ namespace pugi xml_text& operator=(const char_t* rhs); xml_text& operator=(int rhs); xml_text& operator=(unsigned int rhs); + xml_text& operator=(long rhs); + xml_text& operator=(unsigned long rhs); xml_text& operator=(double rhs); xml_text& operator=(float rhs); xml_text& operator=(bool rhs); @@ -867,11 +921,11 @@ namespace pugi private: int _depth; - + protected: // Get current traversal depth int depth() const; - + public: xml_tree_walker(); virtual ~xml_tree_walker(); @@ -942,13 +996,14 @@ namespace pugi char_t* _buffer; char _memory[192]; - + // Non-copyable semantics xml_document(const xml_document&); - const xml_document& operator=(const xml_document&); + xml_document& operator=(const xml_document&); - void create(); - void destroy(); + void _create(); + void _destroy(); + void _move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT; public: // Default constructor, makes empty document @@ -957,6 +1012,12 @@ namespace pugi // Destructor, invalidates all node/attribute handles to this document ~xml_document(); + #ifdef PUGIXML_HAS_MOVE + // Move semantics support + xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT; + xml_document& operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT; + #endif + // Removes all nodes, leaving the empty document void reset(); @@ -970,7 +1031,7 @@ namespace pugi #endif // (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied. - xml_parse_result load(const char_t* contents, unsigned int options = parse_default); + PUGIXML_DEPRECATED xml_parse_result load(const char_t* contents, unsigned int options = parse_default); // Load document from zero-terminated string. No encoding conversions are applied. xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default); @@ -1051,7 +1112,7 @@ namespace pugi // Non-copyable semantics xpath_variable(const xpath_variable&); xpath_variable& operator=(const xpath_variable&); - + public: // Get variable name const char_t* name() const; @@ -1095,10 +1156,10 @@ namespace pugi xpath_variable_set(const xpath_variable_set& rhs); xpath_variable_set& operator=(const xpath_variable_set& rhs); - #if __cplusplus >= 201103 + #ifdef PUGIXML_HAS_MOVE // Move semantics support - xpath_variable_set(xpath_variable_set&& rhs); - xpath_variable_set& operator=(xpath_variable_set&& rhs); + xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT; + xpath_variable_set& operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT; #endif // Add a new variable or get the existing one, if the types match @@ -1139,29 +1200,29 @@ namespace pugi // Destructor ~xpath_query(); - #if __cplusplus >= 201103 + #ifdef PUGIXML_HAS_MOVE // Move semantics support - xpath_query(xpath_query&& rhs); - xpath_query& operator=(xpath_query&& rhs); + xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT; + xpath_query& operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT; #endif // Get query expression return type xpath_value_type return_type() const; - + // Evaluate expression as boolean value in the specified context; performs type conversion if necessary. // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. bool evaluate_boolean(const xpath_node& n) const; - + // Evaluate expression as double value in the specified context; performs type conversion if necessary. // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. double evaluate_number(const xpath_node& n) const; - + #ifndef PUGIXML_NO_STL // Evaluate expression as string value in the specified context; performs type conversion if necessary. // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. string_t evaluate_string(const xpath_node& n) const; #endif - + // Evaluate expression as string value in the specified context; performs type conversion if necessary. // At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero). // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. @@ -1188,7 +1249,7 @@ namespace pugi // Borland C++ workaround bool operator!() const; }; - + #ifndef PUGIXML_NO_EXCEPTIONS // XPath exception class class PUGIXML_CLASS xpath_exception: public std::exception @@ -1201,26 +1262,26 @@ namespace pugi explicit xpath_exception(const xpath_parse_result& result); // Get error message - virtual const char* what() const throw(); + virtual const char* what() const throw() PUGIXML_OVERRIDE; // Get parse result const xpath_parse_result& result() const; }; #endif - + // XPath node class (either xml_node or xml_attribute) class PUGIXML_CLASS xpath_node { private: xml_node _node; xml_attribute _attribute; - + typedef void (*unspecified_bool_type)(xpath_node***); public: // Default constructor; constructs empty XPath node xpath_node(); - + // Construct XPath node from XML node/attribute xpath_node(const xml_node& node); xpath_node(const xml_attribute& attribute, const xml_node& parent); @@ -1228,13 +1289,13 @@ namespace pugi // Get node/attribute, if any xml_node node() const; xml_attribute attribute() const; - + // Get parent of contained node/attribute xml_node parent() const; // Safe bool conversion operator operator unspecified_bool_type() const; - + // Borland C++ workaround bool operator!() const; @@ -1260,13 +1321,13 @@ namespace pugi type_sorted, // Sorted by document order (ascending) type_sorted_reverse // Sorted by document order (descending) }; - + // Constant iterator type typedef const xpath_node* const_iterator; // We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work typedef const xpath_node* iterator; - + // Default constructor. Constructs empty set. xpath_node_set(); @@ -1275,49 +1336,49 @@ namespace pugi // Destructor ~xpath_node_set(); - + // Copy constructor/assignment operator xpath_node_set(const xpath_node_set& ns); xpath_node_set& operator=(const xpath_node_set& ns); - #if __cplusplus >= 201103 + #ifdef PUGIXML_HAS_MOVE // Move semantics support - xpath_node_set(xpath_node_set&& rhs); - xpath_node_set& operator=(xpath_node_set&& rhs); + xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT; + xpath_node_set& operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT; #endif // Get collection type type_t type() const; - + // Get collection size size_t size() const; // Indexing operator const xpath_node& operator[](size_t index) const; - + // Collection iterators const_iterator begin() const; const_iterator end() const; // Sort the collection in ascending/descending order by document order void sort(bool reverse = false); - + // Get first node in the collection by document order xpath_node first() const; - + // Check if collection is empty bool empty() const; - + private: type_t _type; - + xpath_node _storage; - + xpath_node* _begin; xpath_node* _end; void _assign(const_iterator begin, const_iterator end, type_t type); - void _move(xpath_node_set& rhs); + void _move(xpath_node_set& rhs) PUGIXML_NOEXCEPT; }; #endif @@ -1325,7 +1386,7 @@ namespace pugi // Convert wide string to UTF8 std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf8(const wchar_t* str); std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf8(const std::basic_string, std::allocator >& str); - + // Convert UTF8 to wide string std::basic_string, std::allocator > PUGIXML_FUNCTION as_wide(const char* str); std::basic_string, std::allocator > PUGIXML_FUNCTION as_wide(const std::basic_string, std::allocator >& str); @@ -1333,13 +1394,13 @@ namespace pugi // Memory allocation function interface; returns pointer to allocated memory or NULL on failure typedef void* (*allocation_function)(size_t size); - + // Memory deallocation function interface typedef void (*deallocation_function)(void* ptr); // Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions. void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); - + // Get current memory management functions allocation_function PUGIXML_FUNCTION get_memory_allocation_function(); deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function(); @@ -1375,7 +1436,7 @@ namespace std #endif /** - * Copyright (c) 2006-2015 Arseny Kapoulkine + * Copyright (c) 2006-2018 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -1388,7 +1449,7 @@ namespace std * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND diff --git a/lib/pugixml/readme.txt b/lib/pugixml/readme.txt index faa41d37..5beb08a9 100644 --- a/lib/pugixml/readme.txt +++ b/lib/pugixml/readme.txt @@ -1,6 +1,6 @@ -pugixml 1.6 - an XML processing library +pugixml 1.9 - an XML processing library -Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) +Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) Report bugs and download new versions at http://pugixml.org/ This is the distribution of pugixml, which is a C++ XML processing library, @@ -28,7 +28,7 @@ The distribution contains the following folders: This library is distributed under the MIT License: -Copyright (c) 2006-2015 Arseny Kapoulkine +Copyright (c) 2006-2018 Arseny Kapoulkine Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation diff --git a/lib/serialisation/XmlIO.cc b/lib/serialisation/XmlIO.cc index 8ac7422c..291b8ad1 100644 --- a/lib/serialisation/XmlIO.cc +++ b/lib/serialisation/XmlIO.cc @@ -54,6 +54,11 @@ void XmlWriter::push(const string &s) node_ = node_.append_child(s.c_str()); } +void XmlWriter::pushXmlString(const std::string &s) +{ + +} + void XmlWriter::pop(void) { node_ = node_.parent(); @@ -65,40 +70,47 @@ std::string XmlWriter::XmlString(void) return oss.str(); } -XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("") -{ - pugi::xml_parse_result result; - result = doc_.load_string(xmlstring); - if ( !result ) { - cerr << "XML error description (from char *): " << result.description() << "\nXML\n"<< xmlstring << "\n"; - cerr << "XML error offset (from char *) " << result.offset << "\nXML\n"<< xmlstring <<"\n"; - abort(); - } - if ( toplev == std::string("") ) { - node_ = doc_; - } else { - node_ = doc_.child(toplev.c_str()); - } -} - // Reader implementation /////////////////////////////////////////////////////// -XmlReader::XmlReader(const string &fileName,string toplev) : fileName_(fileName) +void XmlReader::initDoc(const std::string &toplev) { - pugi::xml_parse_result result; - result = doc_.load_file(fileName_.c_str()); - if ( !result ) { - cerr << "XML error description: " << result.description() <<" "<< fileName_ <<"\n"; - cerr << "XML error offset : " << result.offset <<" "<< fileName_ <<"\n"; - abort(); - } if ( toplev == std::string("") ) { - node_ = doc_; + node_ = doc_; } else { node_ = doc_.child(toplev.c_str()); } } -bool XmlReader::push(const string &s) +XmlReader::XmlReader(const char *xmlstring, const std::string toplev) +: fileName_("") +{ + auto result = doc_.load_string(xmlstring); + + if ( !result ) { + std::cerr << "XML error description (from char *): " + << result.description() << "\nXML\n"<< xmlstring << "\n"; + std::cerr << "XML error offset (from char *) " + << result.offset << "\nXML\n"<< xmlstring << std::endl; + abort(); + } + initDoc(toplev); +} + +XmlReader::XmlReader(const std::string &fileName, std::string toplev) +: fileName_(fileName) +{ + auto result = doc_.load_file(fileName_.c_str()); + + if ( !result ) { + std::cerr << "XML error description: " + << result.description() <<" "<< fileName_ <<"\n"; + std::cerr << "XML error offset : " + << result.offset <<" "<< fileName_ << std::endl; + abort(); + } + initDoc(toplev); +} + +bool XmlReader::push(const std::string &s) { if (node_.child(s.c_str())) { @@ -129,7 +141,6 @@ bool XmlReader::nextElement(const std::string &s) { return false; } - } template <> diff --git a/lib/serialisation/XmlIO.h b/lib/serialisation/XmlIO.h index e37eb8d9..799c5883 100644 --- a/lib/serialisation/XmlIO.h +++ b/lib/serialisation/XmlIO.h @@ -47,9 +47,10 @@ namespace Grid class XmlWriter: public Writer { public: - XmlWriter(const std::string &fileName,std::string toplev = std::string("grid") ); + XmlWriter(const std::string &fileName, std::string toplev = std::string("grid") ); virtual ~XmlWriter(void); void push(const std::string &s); + void pushXmlString(const std::string &s); void pop(void); template void writeDefault(const std::string &s, const U &x); @@ -65,8 +66,8 @@ namespace Grid class XmlReader: public Reader { public: - XmlReader(const char *xmlstring,std::string toplev = std::string("grid") ); - XmlReader(const std::string &fileName,std::string toplev = std::string("grid") ); + XmlReader(const char *xmlstring, std::string toplev = std::string("grid") ); + XmlReader(const std::string &fileName, std::string toplev = std::string("grid") ); virtual ~XmlReader(void) = default; bool push(const std::string &s); void pop(void); @@ -75,6 +76,8 @@ namespace Grid void readDefault(const std::string &s, U &output); template void readDefault(const std::string &s, std::vector &output); + private: + void initDoc(const std::string &toplev); private: pugi::xml_document doc_; pugi::xml_node node_; From 1569a374a986334775513c4882d406a610855ad8 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 6 Apr 2018 18:32:14 +0100 Subject: [PATCH 347/377] XML interface polish, XML fragments can be pushed into a writer --- lib/parallelIO/IldgIO.h | 19 +++++------ lib/serialisation/XmlIO.cc | 65 ++++++++++++++++++++------------------ lib/serialisation/XmlIO.h | 7 ++-- 3 files changed, 48 insertions(+), 43 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index d1a684f3..90c05546 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -248,7 +248,6 @@ class GridLimeReader : public BinaryIO { template void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name) { - std::string xmlstring; // should this be a do while; can we miss a first record?? while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { @@ -262,7 +261,8 @@ class GridLimeReader : public BinaryIO { limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); // std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] < using namespace Grid; using namespace std; +void Grid::xmlCheckParse(const pugi::xml_parse_result &result, const std::string name) +{ + if (!result) + { + std::cerr << "XML parsing error for " << name << std::endl; + std::cerr << "XML error description: " << result.description() << std::endl; + std::cerr << "XML error offset : " << result.offset << std::endl; + abort(); + } +} + // Writer implementation /////////////////////////////////////////////////////// XmlWriter::XmlWriter(const string &fileName, string toplev) : fileName_(fileName) { @@ -56,7 +67,14 @@ void XmlWriter::push(const string &s) void XmlWriter::pushXmlString(const std::string &s) { + pugi::xml_document doc; + auto result = doc.load_buffer(s.c_str(), s.size()); + xmlCheckParse(result, "fragment\n'" + s +"'"); + for (pugi::xml_node child = doc.first_child(); child; child = child.next_sibling()) + { + node_ = node_.append_copy(child); + } } void XmlWriter::pop(void) @@ -71,8 +89,23 @@ std::string XmlWriter::XmlString(void) } // Reader implementation /////////////////////////////////////////////////////// -void XmlReader::initDoc(const std::string &toplev) +XmlReader::XmlReader(const std::string &s, const bool isBuffer, + std::string toplev) { + pugi::xml_parse_result result; + + if (isBuffer) + { + fileName_ = ""; + result = doc_.load_string(s.c_str()); + xmlCheckParse(result, "string\n'" + s + "'"); + } + else + { + fileName_ = s; + result = doc_.load_file(s.c_str()); + xmlCheckParse(result, "file '" + fileName_ + "'"); + } if ( toplev == std::string("") ) { node_ = doc_; } else { @@ -80,36 +113,6 @@ void XmlReader::initDoc(const std::string &toplev) } } -XmlReader::XmlReader(const char *xmlstring, const std::string toplev) -: fileName_("") -{ - auto result = doc_.load_string(xmlstring); - - if ( !result ) { - std::cerr << "XML error description (from char *): " - << result.description() << "\nXML\n"<< xmlstring << "\n"; - std::cerr << "XML error offset (from char *) " - << result.offset << "\nXML\n"<< xmlstring << std::endl; - abort(); - } - initDoc(toplev); -} - -XmlReader::XmlReader(const std::string &fileName, std::string toplev) -: fileName_(fileName) -{ - auto result = doc_.load_file(fileName_.c_str()); - - if ( !result ) { - std::cerr << "XML error description: " - << result.description() <<" "<< fileName_ <<"\n"; - std::cerr << "XML error offset : " - << result.offset <<" "<< fileName_ << std::endl; - abort(); - } - initDoc(toplev); -} - bool XmlReader::push(const std::string &s) { if (node_.child(s.c_str())) diff --git a/lib/serialisation/XmlIO.h b/lib/serialisation/XmlIO.h index 799c5883..673b2f46 100644 --- a/lib/serialisation/XmlIO.h +++ b/lib/serialisation/XmlIO.h @@ -43,6 +43,7 @@ Author: paboyle namespace Grid { + void xmlCheckParse(const pugi::xml_parse_result &result, const std::string name); class XmlWriter: public Writer { @@ -66,8 +67,8 @@ namespace Grid class XmlReader: public Reader { public: - XmlReader(const char *xmlstring, std::string toplev = std::string("grid") ); - XmlReader(const std::string &fileName, std::string toplev = std::string("grid") ); + XmlReader(const std::string &fileName, const bool isBuffer = false, + std::string toplev = std::string("grid") ); virtual ~XmlReader(void) = default; bool push(const std::string &s); void pop(void); @@ -77,7 +78,7 @@ namespace Grid template void readDefault(const std::string &s, std::vector &output); private: - void initDoc(const std::string &toplev); + void checkParse(const pugi::xml_parse_result &result, const std::string name); private: pugi::xml_document doc_; pugi::xml_node node_; From c8d4d184ee832ecdebe656cd4c17db13509cf455 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 6 Apr 2018 22:53:01 +0100 Subject: [PATCH 348/377] XML push fragment fix --- lib/serialisation/XmlIO.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/serialisation/XmlIO.cc b/lib/serialisation/XmlIO.cc index 050ca6ff..62f2e58c 100644 --- a/lib/serialisation/XmlIO.cc +++ b/lib/serialisation/XmlIO.cc @@ -75,6 +75,7 @@ void XmlWriter::pushXmlString(const std::string &s) { node_ = node_.append_copy(child); } + pop(); } void XmlWriter::pop(void) From ddcaa6ad299fecc457e0eefb784e75d2d994e3ee Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 17 Apr 2018 10:48:33 +0100 Subject: [PATCH 349/377] Master does header on Nersc --- lib/parallelIO/NerscIO.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index e2c2bc39..0eb52071 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -237,20 +237,24 @@ namespace Grid { MachineCharacteristics(header); uint64_t offset; - - truncate(file); // Sod it -- always write 3x3 double header.floating_point = std::string("IEEE64BIG"); header.data_type = std::string("4D_SU3_GAUGE_3x3"); GaugeSimpleUnmunger munge; - offset = writeHeader(header,file); + if ( grid->IsBoss() ) { + truncate(file); + offset = writeHeader(header,file); + } + grid->Broadcast(0,(void *)&offset,sizeof(offset)); uint32_t nersc_csum,scidac_csuma,scidac_csumb; BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, nersc_csum,scidac_csuma,scidac_csumb); header.checksum = nersc_csum; - writeHeader(header,file); + if ( grid->IsBoss() ) { + writeHeader(header,file); + } std::cout<IsBoss() ) { + truncate(file); + offset = writeHeader(header,file); + } + grid->Broadcast(0,(void *)&offset,sizeof(offset)); + uint32_t nersc_csum,scidac_csuma,scidac_csumb; BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); header.checksum = nersc_csum; - offset = writeHeader(header,file); + if ( grid->IsBoss() ) { + offset = writeHeader(header,file); + } std::cout< Date: Wed, 18 Apr 2018 01:43:29 +0100 Subject: [PATCH 350/377] physical fermion interface, cshift benchmark in SU3. --- benchmarks/Benchmark_su3.cc | 44 ++++++++++++++++++++--- lib/qcd/action/fermion/CayleyFermion5D.cc | 29 +++++++++++++++ lib/qcd/action/fermion/CayleyFermion5D.h | 5 +++ lib/qcd/action/fermion/FermionOperator.h | 13 +++++++ 4 files changed, 86 insertions(+), 5 deletions(-) diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index 035af2d9..628ad5bd 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -35,7 +35,8 @@ using namespace Grid::QCD; int main (int argc, char ** argv) { Grid_init(&argc,&argv); -#define LMAX (64) +#define LMAX (40) +#define LINC (4) int64_t Nloop=20; @@ -51,7 +52,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -83,7 +84,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -114,7 +115,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -145,7 +146,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -170,5 +171,38 @@ int main (int argc, char ** argv) } + + std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); + + for(int mu=0;mu<=4;mu++){ + double start=usecond(); + for(int64_t i=0;i +void CayleyFermion5D::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d) +{ + int Ls = this->Ls; + FermionField tmp(this->FermionGrid()); + tmp = solution5d; + conformable(solution5d._grid,this->FermionGrid()); + conformable(exported4d._grid,this->GaugeGrid()); + axpby_ssp_pminus(tmp, 0., solution5d, 1., solution5d, 0, 0); + axpby_ssp_pplus (tmp, 1., tmp , 1., solution5d, 0, Ls-1); + ExtractSlice(exported4d, tmp, 0, 0); +} +template +void CayleyFermion5D::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) +{ + int Ls = this->Ls; + FermionField tmp(this->FermionGrid()); + conformable(imported5d._grid,this->FermionGrid()); + conformable(input4d._grid ,this->GaugeGrid()); + tmp = zero; + InsertSlice(input4d, tmp, 0 , 0); + InsertSlice(input4d, tmp, Ls-1, 0); + axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, 0, 0); + axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); + Dminus(tmp,imported5d); +} template void CayleyFermion5D::Dminus(const FermionField &psi, FermionField &chi) { diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h index ef75235a..b370b09d 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.h +++ b/lib/qcd/action/fermion/CayleyFermion5D.h @@ -83,8 +83,13 @@ namespace Grid { virtual void M5D (const FermionField &psi, FermionField &chi); virtual void M5Ddag(const FermionField &psi, FermionField &chi); + /////////////////////////////////////////////////////////////// + // Physical surface field utilities + /////////////////////////////////////////////////////////////// virtual void Dminus(const FermionField &psi, FermionField &chi); virtual void DminusDag(const FermionField &psi, FermionField &chi); + virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); + virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d); ///////////////////////////////////////////////////// // Instantiate different versions depending on Impl diff --git a/lib/qcd/action/fermion/FermionOperator.h b/lib/qcd/action/fermion/FermionOperator.h index 1d395d53..5be36f13 100644 --- a/lib/qcd/action/fermion/FermionOperator.h +++ b/lib/qcd/action/fermion/FermionOperator.h @@ -128,6 +128,19 @@ namespace Grid { std::vector mom, unsigned int tmin, unsigned int tmax)=0; + /////////////////////////////////////////////// + // Physical field import/export + /////////////////////////////////////////////// + virtual void Dminus(const FermionField &psi, FermionField &chi) { chi=psi; } + virtual void DminusDag(const FermionField &psi, FermionField &chi) { chi=psi; } + virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported) + { + imported = input; + }; + virtual void ExportPhysicalFermionSolution(const FermionField &solution,FermionField &exported) + { + exported=solution; + }; }; } From 870b1a85aebc8bf545a162053fb04823fd937fa2 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 18 Apr 2018 14:17:49 +0100 Subject: [PATCH 351/377] Think I have the physical prop interface to CF and PF overlap right, but need a strong check/regression. Only support Hw overlap, not Ht for now. Ht needs a new Dminus implemented. --- .../fermion/ContinuedFractionFermion5D.cc | 21 +++++++++++++++++++ .../fermion/ContinuedFractionFermion5D.h | 8 +++++++ .../fermion/PartialFractionFermion5D.cc | 21 +++++++++++++++++++ .../action/fermion/PartialFractionFermion5D.h | 6 ++++++ 4 files changed, 56 insertions(+) diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc index 5d39ef9b..f6857115 100644 --- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc +++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc @@ -295,6 +295,27 @@ namespace Grid { assert((Ls&0x1)==1); // Odd Ls required } + template + void ContinuedFractionFermion5D::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d) + { + int Ls = this->Ls; + conformable(solution5d._grid,this->FermionGrid()); + conformable(exported4d._grid,this->GaugeGrid()); + ExtractSlice(exported4d, solution5d, Ls-1, Ls-1); + } + template + void ContinuedFractionFermion5D::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) + { + int Ls = this->Ls; + conformable(imported5d._grid,this->FermionGrid()); + conformable(input4d._grid ,this->GaugeGrid()); + FermionField tmp(this->FermionGrid()); + tmp=zero; + InsertSlice(input4d, tmp, Ls-1, Ls-1); + tmp=Gamma(Gamma::Algebra::Gamma5)*tmp; + this->Dminus(tmp,imported5d); + } + FermOpTemplateInstantiate(ContinuedFractionFermion5D); } diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h index e1e50aa5..b551fc28 100644 --- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h +++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h @@ -65,6 +65,14 @@ namespace Grid { // Efficient support for multigrid coarsening virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + /////////////////////////////////////////////////////////////// + // Physical surface field utilities + /////////////////////////////////////////////////////////////// + // virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case + // virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case + virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); + virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d); + // Constructors ContinuedFractionFermion5D(GaugeField &_Umu, GridCartesian &FiveDimGrid, diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.cc b/lib/qcd/action/fermion/PartialFractionFermion5D.cc index 3a78e043..11840027 100644 --- a/lib/qcd/action/fermion/PartialFractionFermion5D.cc +++ b/lib/qcd/action/fermion/PartialFractionFermion5D.cc @@ -396,6 +396,27 @@ namespace Grid { amax=zolo_hi; } + template + void PartialFractionFermion5D::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d) + { + int Ls = this->Ls; + conformable(solution5d._grid,this->FermionGrid()); + conformable(exported4d._grid,this->GaugeGrid()); + ExtractSlice(exported4d, solution5d, Ls-1, Ls-1); + } + template + void PartialFractionFermion5D::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) + { + int Ls = this->Ls; + conformable(imported5d._grid,this->FermionGrid()); + conformable(input4d._grid ,this->GaugeGrid()); + FermionField tmp(this->FermionGrid()); + tmp=zero; + InsertSlice(input4d, tmp, Ls-1, Ls-1); + tmp=Gamma(Gamma::Algebra::Gamma5)*tmp; + this->Dminus(tmp,imported5d); + } + // Constructors template PartialFractionFermion5D::PartialFractionFermion5D(GaugeField &_Umu, diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.h b/lib/qcd/action/fermion/PartialFractionFermion5D.h index 0ec72de4..91f1bd3c 100644 --- a/lib/qcd/action/fermion/PartialFractionFermion5D.h +++ b/lib/qcd/action/fermion/PartialFractionFermion5D.h @@ -70,6 +70,12 @@ namespace Grid { // Efficient support for multigrid coarsening virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + /////////////////////////////////////////////////////////////// + // Physical surface field utilities + /////////////////////////////////////////////////////////////// + virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); + virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d); + // Constructors PartialFractionFermion5D(GaugeField &_Umu, GridCartesian &FiveDimGrid, From c11a3ca0a74096ad2eea03487105fbc7924b625a Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 20 Apr 2018 17:13:04 +0100 Subject: [PATCH 352/377] vectorise/unvectorise in reverse order --- lib/lattice/Lattice_transfer.h | 93 ++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 44f0337d..f988f310 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -599,6 +599,51 @@ unvectorizeToLexOrdArray(std::vector &out, const Lattice &in) extract1(in_vobj, out_ptrs, 0); } } + +template +typename std::enable_if::value && !isSIMDvectorized::value, void>::type +unvectorizeToRevLexOrdArray(std::vector &out, const Lattice &in) +{ + + typedef typename vobj::vector_type vtype; + + GridBase* in_grid = in._grid; + out.resize(in_grid->lSites()); + + int ndim = in_grid->Nd(); + int in_nsimd = vtype::Nsimd(); + + std::vector > in_icoor(in_nsimd); + + for(int lane=0; lane < in_nsimd; lane++){ + in_icoor[lane].resize(ndim); + in_grid->iCoorFromIindex(in_icoor[lane], lane); + } + + parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index + //Assemble vector of pointers to output elements + std::vector out_ptrs(in_nsimd); + + std::vector in_ocoor(ndim); + in_grid->oCoorFromOindex(in_ocoor, in_oidx); + + std::vector lcoor(in_grid->Nd()); + + for(int lane=0; lane < in_nsimd; lane++){ + for(int mu=0;mu_rdimensions[mu]*in_icoor[lane][mu]; + + int lex; + Lexicographic::IndexFromCoorReversed(lcoor, lex, in_grid->_ldimensions); + out_ptrs[lane] = &out[lex]; + } + + //Unpack into those ptrs + const vobj & in_vobj = in._odata[in_oidx]; + extract1(in_vobj, out_ptrs, 0); + } +} + //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order template typename std::enable_if::value @@ -648,6 +693,54 @@ vectorizeFromLexOrdArray( std::vector &in, Lattice &out) } } +template +typename std::enable_if::value + && !isSIMDvectorized::value, void>::type +vectorizeFromRevLexOrdArray( std::vector &in, Lattice &out) +{ + + typedef typename vobj::vector_type vtype; + + GridBase* grid = out._grid; + assert(in.size()==grid->lSites()); + + int ndim = grid->Nd(); + int nsimd = vtype::Nsimd(); + + std::vector > icoor(nsimd); + + for(int lane=0; lane < nsimd; lane++){ + icoor[lane].resize(ndim); + grid->iCoorFromIindex(icoor[lane],lane); + } + + parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index + //Assemble vector of pointers to output elements + std::vector ptrs(nsimd); + + std::vector ocoor(ndim); + grid->oCoorFromOindex(ocoor, oidx); + + std::vector lcoor(grid->Nd()); + + for(int lane=0; lane < nsimd; lane++){ + + for(int mu=0;mu_rdimensions[mu]*icoor[lane][mu]; + } + + int lex; + Lexicographic::IndexFromCoorReversed(lcoor, lex, grid->_ldimensions); + ptrs[lane] = &in[lex]; + } + + //pack from those ptrs + vobj vecobj; + merge1(vecobj, ptrs, 0); + out._odata[oidx] = vecobj; + } +} + //Convert a Lattice from one precision to another template void precisionChange(Lattice &out, const Lattice &in){ From 94edf9cf8be8eb2fe8990db39c3c07c4a38bd71b Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 20 Apr 2018 17:13:21 +0100 Subject: [PATCH 353/377] HDF5: direct access to group for custom operations --- lib/serialisation/Hdf5IO.cc | 10 ++++++++++ lib/serialisation/Hdf5IO.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/lib/serialisation/Hdf5IO.cc b/lib/serialisation/Hdf5IO.cc index 1fb7be0c..b915a988 100644 --- a/lib/serialisation/Hdf5IO.cc +++ b/lib/serialisation/Hdf5IO.cc @@ -55,6 +55,11 @@ void Hdf5Writer::writeDefault(const std::string &s, const char *x) writeDefault(s, sx); } +Group & Hdf5Writer::getGroup(void) +{ + return group_; +} + // Reader implementation /////////////////////////////////////////////////////// Hdf5Reader::Hdf5Reader(const std::string &fileName) : fileName_(fileName) @@ -103,3 +108,8 @@ void Hdf5Reader::readDefault(const std::string &s, std::string &x) x.resize(strType.getSize()); attribute.read(strType, &(x[0])); } + +Group & Hdf5Reader::getGroup(void) +{ + return group_; +} diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index 12625ab8..1ae2791e 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -38,6 +38,7 @@ namespace Grid template typename std::enable_if>::is_number, void>::type writeDefault(const std::string &s, const std::vector &x); + H5NS::Group & getGroup(void); private: template void writeSingleAttribute(const U &x, const std::string &name, @@ -65,6 +66,7 @@ namespace Grid template typename std::enable_if>::is_number, void>::type readDefault(const std::string &s, std::vector &x); + H5NS::Group & getGroup(void); private: template void readSingleAttribute(U &x, const std::string &name, From 141da3ae71b255d0c175016a2e2dc03ee241487a Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 20 Apr 2018 17:13:34 +0100 Subject: [PATCH 354/377] function to get tensor dimensions --- lib/serialisation/VectorUtils.h | 42 +++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/lib/serialisation/VectorUtils.h b/lib/serialisation/VectorUtils.h index f5c76b84..53088998 100644 --- a/lib/serialisation/VectorUtils.h +++ b/lib/serialisation/VectorUtils.h @@ -30,6 +30,48 @@ namespace Grid { typedef typename std::vector::type>> type; }; + template + void tensorDim(std::vector &dim, const T &t, const bool wipe = true) + { + if (wipe) + { + dim.clear(); + } + } + + template + void tensorDim(std::vector &dim, const iScalar &t, const bool wipe = true) + { + if (wipe) + { + dim.clear(); + } + tensorDim(dim, t._internal, false); + } + + template + void tensorDim(std::vector &dim, const iVector &t, const bool wipe = true) + { + if (wipe) + { + dim.clear(); + } + dim.push_back(N); + tensorDim(dim, t._internal[0], false); + } + + template + void tensorDim(std::vector &dim, const iMatrix &t, const bool wipe = true) + { + if (wipe) + { + dim.clear(); + } + dim.push_back(N); + dim.push_back(N); + tensorDim(dim, t._internal[0][0], false); + } + template typename TensorToVec::type tensorToVec(const T &t) { From a1be53332956d4eb074632ef10a1449a0aac582e Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 24 Apr 2018 01:19:53 -0700 Subject: [PATCH 355/377] Corrected Flop count in Benchmark su3 and expanded the Wilson flow output --- benchmarks/Benchmark_su3.cc | 4 ++-- lib/qcd/smearing/WilsonFlow.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index 628ad5bd..5f088fdc 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -166,7 +166,7 @@ int main (int argc, char ** argv) double time = (stop-start)/Nloop*1000.0; double bytes=3*vol*Nc*Nc*sizeof(Complex); - double flops=Nc*Nc*(8+8+8)*vol; + double flops=Nc*Nc*(6+8+8)*vol; std::cout<::smear(GaugeField& out, const GaugeField& in) const { std::cout << "Time to evolve " << diff.count() << " s\n"; #endif std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " - << step << " " - << energyDensityPlaquette(step,out) << std::endl; + << step << " " << tau(step) << " " + << energyDensityPlaquette(step,out) << std::endl; if( step % measure_interval == 0){ std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " @@ -193,8 +193,8 @@ void WilsonFlow::smear_adaptive(GaugeField& out, const GaugeField& in, Re //std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl; evolve_step_adaptive(out, maxTau); std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " - << step << " " - << energyDensityPlaquette(out) << std::endl; + << step << " " << taus << " " + << energyDensityPlaquette(out) << std::endl; if( step % measure_interval == 0){ std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " From c5b9147b5334be7f7996e8814df6762f0b9ae1e9 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 24 Apr 2018 08:03:57 -0700 Subject: [PATCH 356/377] Correction of a minor bug in the su3 benchmark --- benchmarks/Benchmark_su3.cc | 52 ++++++++++++++++++------------------- lib/cshift/Cshift_mpi.h | 13 +++++++--- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index 5f088fdc..b31af942 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -52,7 +52,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -147,30 +147,30 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; - - GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - - LatticeColourMatrix z(&Grid); random(pRNG,z); - LatticeColourMatrix x(&Grid); random(pRNG,x); - LatticeColourMatrix y(&Grid); random(pRNG,y); - - double start=usecond(); - for(int64_t i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); + + double start=usecond(); + for(int64_t i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -190,7 +189,7 @@ int main (int argc, char ** argv) LatticeColourMatrix x(&Grid); random(pRNG,x); LatticeColourMatrix y(&Grid); random(pRNG,y); - for(int mu=0;mu<=4;mu++){ + for(int mu=0;mu<4;mu++){ double start=usecond(); for(int64_t i=0;i Lattice Cshift(const Lattice &rhs,int dimension if ( !comm_dim ) { - // std::cout << "Cshift_local" < void Cshift_comms_simd(Lattice& ret,const LatticeCheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even); sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd); + //std::cout << "Cshift_comms_simd dim "< void Cshift_comms_simd(Lattice &ret,const Lattice_simd_layout[dimension]; int comm_dim = grid->_processors[dimension] >1 ; + //std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<=0); From 276a2353dfca4677c687effc524b36b5ccfb054b Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 25 Apr 2018 00:11:07 +0100 Subject: [PATCH 357/377] Move constructor --- lib/lattice/Lattice_base.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h index 014e443d..dcd55702 100644 --- a/lib/lattice/Lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -257,7 +257,11 @@ public: } } - + Lattice(Lattice&& r){ // move constructor + _grid = r._grid; + checkerboard = r.checkerboard; + _odata=std::move(r._odata); + } virtual ~Lattice(void) = default; @@ -286,6 +290,24 @@ public: } return *this; } + + strong_inline Lattice & operator = (const Lattice & r){ + _grid = r._grid; + checkerboard = r.checkerboard; + _odata.resize(_grid->oSites());// essential + + parallel_for(int ss=0;ss<_grid->oSites();ss++){ + _odata[ss]=r._odata[ss]; + } + return *this; + } + strong_inline Lattice & operator = (Lattice && r) + { + _grid = r._grid; + checkerboard = r.checkerboard; + _odata =std::move(r._odata); + return *this; + } // *=,+=,-= operators inherit behvour from correspond */+/- operation template strong_inline Lattice &operator *=(const T &r) { From 362ba0443ad73dc726fd45bcf0f7b3447ec9fb11 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 25 Apr 2018 00:12:11 +0100 Subject: [PATCH 358/377] Cshift updates --- benchmarks/Benchmark_su3.cc | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index 628ad5bd..b458d48a 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -35,17 +35,18 @@ using namespace Grid::QCD; int main (int argc, char ** argv) { Grid_init(&argc,&argv); -#define LMAX (40) +#define LMAX (16) +#define LMIN (16) #define LINC (4) - int64_t Nloop=20; + int64_t Nloop=2000; std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); int64_t threads = GridThread::GetThreads(); std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -190,7 +191,7 @@ int main (int argc, char ** argv) LatticeColourMatrix x(&Grid); random(pRNG,x); LatticeColourMatrix y(&Grid); random(pRNG,y); - for(int mu=0;mu<=4;mu++){ + for(int mu=0;mu<4;mu++){ double start=usecond(); for(int64_t i=0;i Date: Thu, 26 Apr 2018 12:01:56 +0900 Subject: [PATCH 359/377] More timers in the integrator --- benchmarks/Benchmark_su3.cc | 22 ++--- lib/qcd/hmc/integrators/Integrator.h | 12 ++- lib/qcd/smearing/GaugeConfiguration.h | 137 ++++++++++++++++---------- 3 files changed, 104 insertions(+), 67 deletions(-) diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index d9f1341c..5f2d83d2 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -35,8 +35,8 @@ using namespace Grid::QCD; int main (int argc, char ** argv) { Grid_init(&argc,&argv); -#define LMAX (16) -#define LMIN (16) +#define LMAX (32) +#define LMIN (4) #define LINC (4) int64_t Nloop=2000; @@ -193,17 +193,17 @@ int main (int argc, char ** argv) LatticeColourMatrix y(&Grid); random(pRNG,y); for(int mu=0;mu<4;mu++){ - double start=usecond(); - for(int64_t i=0;iis_smeared); + double start_force = usecond(); as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl; if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force); force = FieldImplementation::projectForce(force); // Ta for gauge fields + double end_force = usecond(); Real force_abs = std::sqrt(norm2(force)/U._grid->gSites()); - std::cout << GridLogIntegrator << "Force average: " << force_abs << std::endl; + std::cout << GridLogIntegrator << "["< -class NoSmearing { +//trivial class for no smearing +template +class NoSmearing +{ public: INHERIT_FIELD_TYPES(Impl); - Field* ThinField; + Field *ThinField; - NoSmearing(): ThinField(NULL) {} + NoSmearing() : ThinField(NULL) {} - void set_Field(Field& U) { ThinField = &U; } + void set_Field(Field &U) { ThinField = &U; } - void smeared_force(Field&) const {} + void smeared_force(Field &) const {} - Field& get_SmearedU() { return *ThinField; } + Field &get_SmearedU() { return *ThinField; } - Field& get_U(bool smeared = false) { + Field &get_U(bool smeared = false) + { return *ThinField; } - }; /*! @@ -44,32 +47,36 @@ public: It stores a list of smeared configurations. */ template -class SmearedConfiguration { - public: +class SmearedConfiguration +{ +public: INHERIT_GIMPL_TYPES(Gimpl); - private: +private: const unsigned int smearingLevels; Smear_Stout StoutSmearing; std::vector SmearedSet; // Member functions //==================================================================== - void fill_smearedSet(GaugeField& U) { - ThinLinks = &U; // attach the smearing routine to the field U + void fill_smearedSet(GaugeField &U) + { + ThinLinks = &U; // attach the smearing routine to the field U // check the pointer is not null if (ThinLinks == NULL) std::cout << GridLogError << "[SmearedConfiguration] Error in ThinLinks pointer\n"; - if (smearingLevels > 0) { + if (smearingLevels > 0) + { std::cout << GridLogDebug << "[SmearedConfiguration] Filling SmearedSet\n"; GaugeField previous_u(ThinLinks->_grid); previous_u = *ThinLinks; - for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl) { + for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl) + { StoutSmearing.smear(SmearedSet[smearLvl], previous_u); previous_u = SmearedSet[smearLvl]; @@ -81,9 +88,10 @@ class SmearedConfiguration { } } //==================================================================== - GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime, - const GaugeField& GaugeK) const { - GridBase* grid = GaugeK._grid; + GaugeField AnalyticSmearedForce(const GaugeField &SigmaKPrime, + const GaugeField &GaugeK) const + { + GridBase *grid = GaugeK._grid; GaugeField C(grid), SigmaK(grid), iLambda(grid); GaugeLinkField iLambda_mu(grid); GaugeLinkField iQ(grid), e_iQ(grid); @@ -94,7 +102,8 @@ class SmearedConfiguration { SigmaK = zero; iLambda = zero; - for (int mu = 0; mu < Nd; mu++) { + for (int mu = 0; mu < Nd; mu++) + { Cmu = peekLorentz(C, mu); GaugeKmu = peekLorentz(GaugeK, mu); SigmaKPrime_mu = peekLorentz(SigmaKPrime, mu); @@ -104,20 +113,22 @@ class SmearedConfiguration { pokeLorentz(iLambda, iLambda_mu, mu); } StoutSmearing.derivative(SigmaK, iLambda, - GaugeK); // derivative of SmearBase + GaugeK); // derivative of SmearBase return SigmaK; } /*! @brief Returns smeared configuration at level 'Level' */ - const GaugeField& get_smeared_conf(int Level) const { + const GaugeField &get_smeared_conf(int Level) const + { return SmearedSet[Level]; } //==================================================================== - void set_iLambda(GaugeLinkField& iLambda, GaugeLinkField& e_iQ, - const GaugeLinkField& iQ, const GaugeLinkField& Sigmap, - const GaugeLinkField& GaugeK) const { - GridBase* grid = iQ._grid; + void set_iLambda(GaugeLinkField &iLambda, GaugeLinkField &e_iQ, + const GaugeLinkField &iQ, const GaugeLinkField &Sigmap, + const GaugeLinkField &GaugeK) const + { + GridBase *grid = iQ._grid; GaugeLinkField iQ2(grid), iQ3(grid), B1(grid), B2(grid), USigmap(grid); GaugeLinkField unity(grid); unity = 1.0; @@ -206,15 +217,15 @@ class SmearedConfiguration { } //==================================================================== - public: - GaugeField* - ThinLinks; /*!< @brief Pointer to the thin - links configuration */ +public: + GaugeField * + ThinLinks; /* Pointer to the thin links configuration */ - /*! @brief Standard constructor */ - SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear, - Smear_Stout& Stout) - : smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL) { + /* Standard constructor */ + SmearedConfiguration(GridCartesian *UGrid, unsigned int Nsmear, + Smear_Stout &Stout) + : smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL) + { for (unsigned int i = 0; i < smearingLevels; ++i) SmearedSet.push_back(*(new GaugeField(UGrid))); } @@ -223,21 +234,29 @@ class SmearedConfiguration { SmearedConfiguration() : smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {} - - // attach the smeared routines to the thin links U and fill the smeared set - void set_Field(GaugeField& U) { fill_smearedSet(U); } + void set_Field(GaugeField &U) + { + double start = usecond(); + fill_smearedSet(U); + double end = usecond(); + double time = (stop - start)/ 1e3; + std::cout << GridLogMessage << "Smearing in " << time << " ms" << std::endl; + } //==================================================================== - void smeared_force(GaugeField& SigmaTilde) const { - if (smearingLevels > 0) { + void smeared_force(GaugeField &SigmaTilde) const + { + if (smearingLevels > 0) + { + double start = usecond(); GaugeField force = SigmaTilde; // actually = U*SigmaTilde GaugeLinkField tmp_mu(SigmaTilde._grid); - for (int mu = 0; mu < Nd; mu++) { + for (int mu = 0; mu < Nd; mu++) + { // to get just SigmaTilde - tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) * - peekLorentz(force, mu); + tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) * peekLorentz(force, mu); pokeLorentz(force, tmp_mu, mu); } @@ -246,33 +265,43 @@ class SmearedConfiguration { force = AnalyticSmearedForce(force, *ThinLinks); - for (int mu = 0; mu < Nd; mu++) { + for (int mu = 0; mu < Nd; mu++) + { tmp_mu = peekLorentz(*ThinLinks, mu) * peekLorentz(force, mu); pokeLorentz(SigmaTilde, tmp_mu, mu); } - } // if smearingLevels = 0 do nothing + double end = usecond(); + double time = (stop - start)/ 1e3; + std::cout << GridLogMessage << "Smearing force in " << time << " ms" << std::endl; + } // if smearingLevels = 0 do nothing } //==================================================================== - GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; } + GaugeField &get_SmearedU() { return SmearedSet[smearingLevels - 1]; } - GaugeField& get_U(bool smeared = false) { + GaugeField &get_U(bool smeared = false) + { // get the config, thin links by default - if (smeared) { - if (smearingLevels) { + if (smeared) + { + if (smearingLevels) + { RealD impl_plaq = WilsonLoops::avgPlaquette(SmearedSet[smearingLevels - 1]); std::cout << GridLogDebug << "getting Usmr Plaq: " << impl_plaq << std::endl; return get_SmearedU(); - - } else { + } + else + { RealD impl_plaq = WilsonLoops::avgPlaquette(*ThinLinks); std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq << std::endl; return *ThinLinks; } - } else { + } + else + { RealD impl_plaq = WilsonLoops::avgPlaquette(*ThinLinks); std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq << std::endl; From 6358f35b7e93d36f1f9a4378ddd53cc2a88242be Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 26 Apr 2018 14:18:11 +0900 Subject: [PATCH 360/377] Debug of previous commit --- lib/qcd/smearing/GaugeConfiguration.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/qcd/smearing/GaugeConfiguration.h b/lib/qcd/smearing/GaugeConfiguration.h index 55d5351f..6fea875b 100644 --- a/lib/qcd/smearing/GaugeConfiguration.h +++ b/lib/qcd/smearing/GaugeConfiguration.h @@ -240,7 +240,7 @@ public: double start = usecond(); fill_smearedSet(U); double end = usecond(); - double time = (stop - start)/ 1e3; + double time = (end - start)/ 1e3; std::cout << GridLogMessage << "Smearing in " << time << " ms" << std::endl; } @@ -271,7 +271,7 @@ public: pokeLorentz(SigmaTilde, tmp_mu, mu); } double end = usecond(); - double time = (stop - start)/ 1e3; + double time = (end - start)/ 1e3; std::cout << GridLogMessage << "Smearing force in " << time << " ms" << std::endl; } // if smearingLevels = 0 do nothing } From 8f44c799a69d3041f00af7b7785a268a914ed6c5 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 26 Apr 2018 14:48:03 +0100 Subject: [PATCH 361/377] Saving the benchmarking tests for Cshift --- benchmarks/Benchmark_su3.cc | 64 +++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index b31af942..656f816a 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -35,24 +35,25 @@ using namespace Grid::QCD; int main (int argc, char ** argv) { Grid_init(&argc,&argv); +#define LMIN (16) #define LMAX (40) -#define LINC (4) +#define LINC (8) - int64_t Nloop=20; + int64_t Nloop=200; std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); int64_t threads = GridThread::GetThreads(); std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -84,7 +85,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -115,7 +116,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -146,7 +147,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -170,7 +171,6 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -203,6 +203,52 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); + LatticeColourMatrix tmp(&Grid); + + for(int mu=0;mu<4;mu++){ + double tshift=0; + double tmult =0; + + double start=usecond(); + for(int64_t i=0;i Date: Thu, 26 Apr 2018 14:48:35 +0100 Subject: [PATCH 362/377] Improvement --- benchmarks/Benchmark_memory_bandwidth.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/Benchmark_memory_bandwidth.cc b/benchmarks/Benchmark_memory_bandwidth.cc index 848f271d..cc965050 100644 --- a/benchmarks/Benchmark_memory_bandwidth.cc +++ b/benchmarks/Benchmark_memory_bandwidth.cc @@ -55,7 +55,7 @@ int main (int argc, char ** argv) std::cout< Date: Thu, 26 Apr 2018 14:48:57 +0100 Subject: [PATCH 363/377] Guard bare openmp statemetn with ifdef --- lib/allocator/AlignedAllocator.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/allocator/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h index 3b27aec9..b0f7e206 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/lib/allocator/AlignedAllocator.h @@ -277,7 +277,9 @@ public: uint8_t *cp = (uint8_t *)ptr; if ( ptr ) { // One touch per 4k page, static OMP loop to catch same loop order +#ifdef GRID_OMP #pragma omp parallel for schedule(static) +#endif for(size_type n=0;n Date: Thu, 26 Apr 2018 14:49:42 +0100 Subject: [PATCH 364/377] Force static --- lib/threads/Threads.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/threads/Threads.h b/lib/threads/Threads.h index 36daf2af..dacaf5d8 100644 --- a/lib/threads/Threads.h +++ b/lib/threads/Threads.h @@ -40,7 +40,7 @@ Author: paboyle #define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)") #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") -#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") +#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for schedule(static) collapse(2)") #define PARALLEL_REGION _Pragma("omp parallel") #define PARALLEL_CRITICAL _Pragma("omp critical") #else From 03e9832efa55892c00e028fd4601220c7378a13d Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 26 Apr 2018 14:50:02 +0100 Subject: [PATCH 365/377] Use macros for bare openmp --- lib/parallelIO/BinaryIO.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index 45fd522e..ce84fc81 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -110,11 +110,11 @@ class BinaryIO { lsites = 1; } - #pragma omp parallel +PARALLEL_REGION { uint32_t nersc_csum_thr = 0; - #pragma omp for +PARALLEL_FOR_LOOP_INTERN for (uint64_t local_site = 0; local_site < lsites; local_site++) { uint32_t *site_buf = (uint32_t *)&fbuf[local_site]; @@ -124,7 +124,7 @@ class BinaryIO { } } - #pragma omp critical +PARALLEL_CRITICAL { nersc_csum += nersc_csum_thr; } @@ -146,14 +146,14 @@ class BinaryIO { std::vector local_start =grid->LocalStarts(); std::vector global_vol =grid->FullDimensions(); -#pragma omp parallel +PARALLEL_REGION { std::vector coor(nd); uint32_t scidac_csuma_thr=0; uint32_t scidac_csumb_thr=0; uint32_t site_crc=0; -#pragma omp for +PARALLEL_FOR_LOOP_INTERN for(uint64_t local_site=0;local_site>(32-gsite31); } -#pragma omp critical +PARALLEL_CRITICAL { scidac_csuma^= scidac_csuma_thr; scidac_csumb^= scidac_csumb_thr; From fa0d8feff421001740acf3a1d039ec3e86980164 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 26 Apr 2018 17:56:27 +0100 Subject: [PATCH 366/377] Performance of CovariantCshift now non-embarrassing. --- benchmarks/Benchmark_su3.cc | 6 +-- lib/cshift/Cshift_common.h | 93 ++++++++++++++++++++++--------------- lib/lattice/Lattice_base.h | 3 +- 3 files changed, 60 insertions(+), 42 deletions(-) diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index 7e5436b1..7b1b2c1a 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -36,7 +36,7 @@ int main (int argc, char ** argv) { Grid_init(&argc,&argv); #define LMAX (32) -#define LMIN (4) +#define LMIN (16) #define LINC (4) int64_t Nloop=2000; @@ -204,7 +204,7 @@ int main (int argc, char ** argv) std::cout< &rhs,commVector &buffer,int dimen int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane int e1=rhs._grid->_slice_nblock[dimension]; int e2=rhs._grid->_slice_block[dimension]; + int ent = 0; + + static std::vector > table; table.resize(e1*e2); int stride=rhs._grid->_slice_stride[dimension]; if ( cbmask == 0x3 ) { - parallel_for_nest2(int n=0;n(off+bo+b,so+o+b); } } } else { int bo=0; - std::vector > table; for(int n=0;nCheckerBoardFromOindex(o+b); if ( ocb &cbmask ) { - table.push_back(std::pair (bo++,o+b)); + table[ent++]=std::pair (off+bo++,so+o+b); } } } - parallel_for(int i=0;i void Scatter_plane_simple (Lattice &rhs,commVector_slice_nblock[dimension]; int e2=rhs._grid->_slice_block[dimension]; int stride=rhs._grid->_slice_stride[dimension]; - + + static std::vector > table; table.resize(e1*e2); + int ent =0; + if ( cbmask ==0x3 ) { - parallel_for_nest2(int n=0;n_slice_stride[dimension]; int bo =n*rhs._grid->_slice_block[dimension]; - rhs._odata[so+o+b]=buffer[bo+b]; + table[ent++] = std::pair(so+o+b,bo); } } + } else { - std::vector > table; int bo=0; for(int n=0;n_slice_stride[dimension]; int ocb=1<CheckerBoardFromOindex(o+b);// Could easily be a table lookup if ( ocb & cbmask ) { - table.push_back(std::pair (so+o+b,bo++)); + table[ent++]=std::pair (so+o+b,bo++); } } } - parallel_for(int i=0;i void Copy_plane(Lattice& lhs,const Lattice &rhs int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc int e2=rhs._grid->_slice_block[dimension]; int stride = rhs._grid->_slice_stride[dimension]; + static std::vector > table; table.resize(e1*e2); + int ent=0; + if(cbmask == 0x3 ){ - parallel_for_nest2(int n=0;n(lo+o,ro+o); } } } else { - parallel_for_nest2(int n=0;nCheckerBoardFromOindex(o); if ( ocb&cbmask ) { - //lhs._odata[lo+o]=rhs._odata[ro+o]; - vstream(lhs._odata[lo+o],rhs._odata[ro+o]); + table[ent++] = std::pair(lo+o,ro+o); } } } } - + + parallel_for(int i=0;i void Copy_plane_permute(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type) @@ -269,16 +278,28 @@ template void Copy_plane_permute(Lattice& lhs,const Lattice_slice_block [dimension]; int stride = rhs._grid->_slice_stride[dimension]; - parallel_for_nest2(int n=0;n > table; table.resize(e1*e2); + int ent=0; + double t_tab,t_perm; + if ( cbmask == 0x3 ) { + for(int n=0;n(lo+o+b,ro+o+b); + }} + } else { + for(int n=0;nCheckerBoardFromOindex(o+b); - if ( ocb&cbmask ) { - permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type); - } + if ( ocb&cbmask ) table[ent++] = std::pair(lo+o+b,ro+o+b); + }} + } - }} + parallel_for(int i=0;i void Cshift_local(Lattice& ret,const Lattice &r sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even); sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd); + double t_local; + if ( sshift[0] == sshift[1] ) { Cshift_local(ret,rhs,dimension,shift,0x3); } else { @@ -299,7 +322,7 @@ template void Cshift_local(Lattice& ret,const Lattice &r } } -template Lattice Cshift_local(Lattice &ret,const Lattice &rhs,int dimension,int shift,int cbmask) +template void Cshift_local(Lattice &ret,const Lattice &rhs,int dimension,int shift,int cbmask) { GridBase *grid = rhs._grid; int fd = grid->_fdimensions[dimension]; @@ -325,11 +348,7 @@ template Lattice Cshift_local(Lattice &ret,const Lattice int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); int sx = (x+sshift)%rd; - - // FIXME : This must change where we have a - // Rotate slice. - // Document how this works ; why didn't I do this when I first wrote it... // wrap is whether sshift > rd. // num is sshift mod rd. // @@ -365,10 +384,8 @@ template Lattice Cshift_local(Lattice &ret,const Lattice if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist); else Copy_plane(ret,rhs,dimension,x,sx,cbmask); - } - return ret; } } #endif diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h index 98713c14..1169d18f 100644 --- a/lib/lattice/Lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -256,7 +256,7 @@ public: _odata[ss]=r._odata[ss]; } } - + Lattice(Lattice&& r){ // move constructor _grid = r._grid; checkerboard = r.checkerboard; @@ -270,6 +270,7 @@ public: _odata =std::move(r._odata); return *this; } + inline Lattice & operator = (const Lattice & r){ _grid = r._grid; checkerboard = r.checkerboard; From e9f1ac09ded186335c465b54bed60f3a44477ab9 Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 26 Apr 2018 23:00:08 +0100 Subject: [PATCH 367/377] static --- lib/threads/Threads.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/threads/Threads.h b/lib/threads/Threads.h index 36daf2af..dacaf5d8 100644 --- a/lib/threads/Threads.h +++ b/lib/threads/Threads.h @@ -40,7 +40,7 @@ Author: paboyle #define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)") #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") -#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") +#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for schedule(static) collapse(2)") #define PARALLEL_REGION _Pragma("omp parallel") #define PARALLEL_CRITICAL _Pragma("omp critical") #else From 7ecc47ac89ddb310583fe5b548fd0804dfb6e0ce Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 26 Apr 2018 23:00:28 +0100 Subject: [PATCH 368/377] Quenched test compile --- lib/lattice/Lattice_comparison_utils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/lattice/Lattice_comparison_utils.h b/lib/lattice/Lattice_comparison_utils.h index 9580d4d2..579449f1 100644 --- a/lib/lattice/Lattice_comparison_utils.h +++ b/lib/lattice/Lattice_comparison_utils.h @@ -198,7 +198,7 @@ namespace Grid { typedef typename vsimd::scalar_type scalar;\ return Comparison(functor(),lhs,rhs);\ }\ - template = 0>\ + template\ inline vInteger operator op(const iScalar &lhs,const iScalar &rhs)\ { \ return lhs._internal op rhs._internal; \ @@ -212,7 +212,7 @@ namespace Grid { inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar &rhs) \ { \ return lhs op rhs._internal; \ - } + } \ DECLARE_RELATIONAL(<,slt); From 1be80896048e42c825f81a6bc5a26d79c537f37d Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 26 Apr 2018 23:42:45 +0100 Subject: [PATCH 369/377] Clean compile --- lib/lattice/Lattice_comparison_utils.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/lattice/Lattice_comparison_utils.h b/lib/lattice/Lattice_comparison_utils.h index 579449f1..cbac20ec 100644 --- a/lib/lattice/Lattice_comparison_utils.h +++ b/lib/lattice/Lattice_comparison_utils.h @@ -179,7 +179,7 @@ namespace Grid { return ret; } -#define DECLARE_RELATIONAL(op,functor) \ +#define DECLARE_RELATIONAL_EQ(op,functor) \ template = 0>\ inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\ {\ @@ -198,11 +198,6 @@ namespace Grid { typedef typename vsimd::scalar_type scalar;\ return Comparison(functor(),lhs,rhs);\ }\ - template\ - inline vInteger operator op(const iScalar &lhs,const iScalar &rhs)\ - { \ - return lhs._internal op rhs._internal; \ - } \ template\ inline vInteger operator op(const iScalar &lhs,const typename vsimd::scalar_type &rhs) \ { \ @@ -214,12 +209,19 @@ namespace Grid { return lhs op rhs._internal; \ } \ +#define DECLARE_RELATIONAL(op,functor) \ + DECLARE_RELATIONAL_EQ(op,functor) \ + template\ + inline vInteger operator op(const iScalar &lhs,const iScalar &rhs)\ + { \ + return lhs._internal op rhs._internal; \ + } DECLARE_RELATIONAL(<,slt); DECLARE_RELATIONAL(<=,sle); DECLARE_RELATIONAL(>,sgt); DECLARE_RELATIONAL(>=,sge); -DECLARE_RELATIONAL(==,seq); +DECLARE_RELATIONAL_EQ(==,seq); DECLARE_RELATIONAL(!=,sne); #undef DECLARE_RELATIONAL From 809b1cdd58d33ca43cab460a838589dfd0f2ce78 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 27 Apr 2018 05:19:10 +0100 Subject: [PATCH 370/377] Bug fix for MPI running ; introduced last night --- lib/cshift/Cshift_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/cshift/Cshift_common.h b/lib/cshift/Cshift_common.h index b2f87315..24e0d2ab 100644 --- a/lib/cshift/Cshift_common.h +++ b/lib/cshift/Cshift_common.h @@ -152,7 +152,7 @@ template void Scatter_plane_simple (Lattice &rhs,commVector_slice_stride[dimension]; int bo =n*rhs._grid->_slice_block[dimension]; - table[ent++] = std::pair(so+o+b,bo); + table[ent++] = std::pair(so+o+b,bo+b); } } From 0734e9ddd4a24e4d7d1ee7224568105cfd39ac5b Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 27 Apr 2018 14:39:01 +0900 Subject: [PATCH 371/377] Debugging Scatter_plane_simple --- lib/cshift/Cshift_common.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/cshift/Cshift_common.h b/lib/cshift/Cshift_common.h index b2f87315..c9faf365 100644 --- a/lib/cshift/Cshift_common.h +++ b/lib/cshift/Cshift_common.h @@ -150,9 +150,9 @@ template void Scatter_plane_simple (Lattice &rhs,commVector_slice_stride[dimension]; - int bo =n*rhs._grid->_slice_block[dimension]; - table[ent++] = std::pair(so+o+b,bo); + int o =n*rhs._grid->_slice_stride[dimension]; + int bo =n*rhs._grid->_slice_block[dimension]; + table[ent++] = std::pair(so+o+b,bo+b); } } From 75e4483407fe4f9f9715f06ba6e95be4c9eef2b8 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 27 Apr 2018 07:49:57 +0100 Subject: [PATCH 372/377] Stronger convergence test --- .../iterative/ImplicitlyRestartedLanczos.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 787cf15a..8011e796 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -479,15 +479,13 @@ until convergence Field B(grid); B.checkerboard = evec[0].checkerboard; // power of two search pattern; not every evalue in eval2 is assessed. + int allconv =1; for(int jj = 1; jj<=Nstop; jj*=2){ int j = Nstop-jj; RealD e = eval2_copy[j]; // Discard the evalue basisRotateJ(B,evec,Qt,j,0,Nk,Nm); - if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) { - if ( j > Nconv ) { - Nconv=j+1; - jj=Nstop; // Terminate the scan - } + if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) { + allconv=0; } } // Do evec[0] for good measure @@ -495,8 +493,10 @@ until convergence int j=0; RealD e = eval2_copy[0]; basisRotateJ(B,evec,Qt,j,0,Nk,Nm); - _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox); + if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0; } + if ( allconv ) Nconv = Nstop; + // test if we converged, if so, terminate std::cout<= "<=Nstop || beta_k < betastp){ From b27f0e5a539fc59214d026d04143e67d2d5a0264 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 27 Apr 2018 07:50:15 +0100 Subject: [PATCH 373/377] Control over IO --- lib/algorithms/iterative/LocalCoherenceLanczos.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/algorithms/iterative/LocalCoherenceLanczos.h b/lib/algorithms/iterative/LocalCoherenceLanczos.h index b8348c0c..54e4c6c8 100644 --- a/lib/algorithms/iterative/LocalCoherenceLanczos.h +++ b/lib/algorithms/iterative/LocalCoherenceLanczos.h @@ -48,6 +48,7 @@ struct LanczosParams : Serializable { struct LocalCoherenceLanczosParams : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams, + bool, saveEvecs, bool, doFine, bool, doFineRead, bool, doCoarse, From 9b0240d1016c071750aefd28dbfbd97b781a8229 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 27 Apr 2018 07:50:51 +0100 Subject: [PATCH 374/377] Hot start test --- tests/Test_compressed_lanczos_hot_start.cc | 243 +++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 tests/Test_compressed_lanczos_hot_start.cc diff --git a/tests/Test_compressed_lanczos_hot_start.cc b/tests/Test_compressed_lanczos_hot_start.cc new file mode 100644 index 00000000..998f1b9f --- /dev/null +++ b/tests/Test_compressed_lanczos_hot_start.cc @@ -0,0 +1,243 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc + + Copyright (C) 2017 + +Author: Leans heavily on Christoph Lehner's code +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +/* + * Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features + * in Grid that were intended to be used to support blocked Aggregates, from + */ +#include +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos +{ +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard) + // Base constructor + : LocalCoherenceLanczos(FineGrid,CoarseGrid,FineOp,checkerboard) + {}; + + void checkpointFine(std::string evecs_file,std::string evals_file) + { + assert(this->subspace.size()==nbasis); + emptyUserRecord record; + Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss()); + WR.open(evecs_file); + for(int k=0;ksubspace[k],record); + } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_fine); + } + + void checkpointFineRestore(std::string evecs_file,std::string evals_file) + { + this->evals_fine.resize(nbasis); + this->subspace.resize(nbasis,this->_FineGrid); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<evals_fine); + + assert(this->evals_fine.size()==nbasis); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<subspace[k].checkerboard=this->_checkerboard; + RD.readScidacFieldRecord(this->subspace[k],record); + + } + RD.close(); + } + + void checkpointCoarse(std::string evecs_file,std::string evals_file) + { + int n = this->evec_coarse.size(); + emptyUserRecord record; + Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss()); + WR.open(evecs_file); + for(int k=0;kevec_coarse[k],record); + } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_coarse); + } + + void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec) + { + std::cout << "resizing coarse vecs to " << nvec<< std::endl; + this->evals_coarse.resize(nvec); + this->evec_coarse.resize(nvec,this->_CoarseGrid); + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<evals_coarse); + + assert(this->evals_coarse.size()==nvec); + emptyUserRecord record; + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<evec_coarse[k],record); + } + RD.close(); + } +}; + +int main (int argc, char ** argv) { + + Grid_init(&argc,&argv); + GridLogIRL.TimingMode(1); + + LocalCoherenceLanczosParams Params; + { + Params.omega.resize(10); + Params.blockSize.resize(5); + XmlWriter writer("Params_template.xml"); + write(writer,"Params",Params); + std::cout << GridLogMessage << " Written Params_template.xml" < blockSize = Params.blockSize; + std::vector latt({16,16,16,16}); + uint64_t vol = Ls*latt[0]*latt[1]*latt[2]*latt[3]; + double mat_flop= 2.0*1320.0*vol; + // Grids + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt, + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector fineLatt = latt; + int dims=fineLatt.size(); + assert(blockSize.size()==dims+1); + std::vector coarseLatt(dims); + std::vector coarseLatt5d ; + + for (int d=0;d seeds4({1,2,3,4}); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + LatticeGaugeField Umu(UGrid); + SU3::HotConfiguration(RNG4,Umu); + // FieldMetaData header; + // NerscIO::readConfiguration(Umu,header,Params.config); + + std::cout << GridLogMessage << "Lattice dimensions: " << latt << " Ls: " << Ls << std::endl; + + // ZMobius EO Operator + ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); + SchurDiagTwoOperator HermOp(Ddwf); + + // Eigenvector storage + LanczosParams fine =Params.FineParams; + LanczosParams coarse=Params.CoarseParams; + + const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop; + const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk; + const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm; + + std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl; + std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl; + assert(Nm2 >= Nm1); + + const int nbasis= 60; + assert(nbasis==Ns1); + LocalCoherenceLanczosScidac _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd); + std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; + + assert( (Params.doFine)||(Params.doFineRead)); + + if ( Params.doFine ) { + std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "< Date: Fri, 27 Apr 2018 07:51:12 +0100 Subject: [PATCH 375/377] Roll over version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index bfad377d..a0211af1 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ -Version : 0.7.0 +Version : 0.8.0 - Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended - MPI and MPI3 comms optimisations for KNL and OPA finished From e369d7306d8bc53fb1e2456152d444b7e61fda07 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 27 Apr 2018 07:51:44 +0100 Subject: [PATCH 376/377] Rename --- tests/lanczos/Test_compressed_lanczos.cc | 253 +++++++++++++++++++++++ 1 file changed, 253 insertions(+) create mode 100644 tests/lanczos/Test_compressed_lanczos.cc diff --git a/tests/lanczos/Test_compressed_lanczos.cc b/tests/lanczos/Test_compressed_lanczos.cc new file mode 100644 index 00000000..8bce82bb --- /dev/null +++ b/tests/lanczos/Test_compressed_lanczos.cc @@ -0,0 +1,253 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc + + Copyright (C) 2017 + +Author: Leans heavily on Christoph Lehner's code +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +/* + * Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features + * in Grid that were intended to be used to support blocked Aggregates, from + */ +#include +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos +{ +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard) + // Base constructor + : LocalCoherenceLanczos(FineGrid,CoarseGrid,FineOp,checkerboard) + {}; + + void checkpointFine(std::string evecs_file,std::string evals_file) + { + assert(this->subspace.size()==nbasis); + emptyUserRecord record; + Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss()); + WR.open(evecs_file); + for(int k=0;ksubspace[k],record); + } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_fine); + } + + void checkpointFineRestore(std::string evecs_file,std::string evals_file) + { + this->evals_fine.resize(nbasis); + this->subspace.resize(nbasis,this->_FineGrid); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<evals_fine); + + assert(this->evals_fine.size()==nbasis); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<subspace[k].checkerboard=this->_checkerboard; + RD.readScidacFieldRecord(this->subspace[k],record); + + } + RD.close(); + } + + void checkpointCoarse(std::string evecs_file,std::string evals_file) + { + int n = this->evec_coarse.size(); + emptyUserRecord record; + Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss()); + WR.open(evecs_file); + for(int k=0;kevec_coarse[k],record); + } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_coarse); + } + + void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec) + { + std::cout << "resizing coarse vecs to " << nvec<< std::endl; + this->evals_coarse.resize(nvec); + this->evec_coarse.resize(nvec,this->_CoarseGrid); + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<evals_coarse); + + assert(this->evals_coarse.size()==nvec); + emptyUserRecord record; + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<evec_coarse[k],record); + } + RD.close(); + } +}; + +int main (int argc, char ** argv) { + + Grid_init(&argc,&argv); + GridLogIRL.TimingMode(1); + + LocalCoherenceLanczosParams Params; + { + Params.omega.resize(10); + Params.blockSize.resize(5); + XmlWriter writer("Params_template.xml"); + write(writer,"Params",Params); + std::cout << GridLogMessage << " Written Params_template.xml" < blockSize = Params.blockSize; + + // Grids + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector fineLatt = GridDefaultLatt(); + int dims=fineLatt.size(); + assert(blockSize.size()==dims+1); + std::vector coarseLatt(dims); + std::vector coarseLatt5d ; + + for (int d=0;d HermOp(Ddwf); + + // Eigenvector storage + LanczosParams fine =Params.FineParams; + LanczosParams coarse=Params.CoarseParams; + + const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop; + const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk; + const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm; + + std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl; + std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl; + assert(Nm2 >= Nm1); + + const int nbasis= 60; + assert(nbasis==Ns1); + LocalCoherenceLanczosScidac _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd); + std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; + + assert( (Params.doFine)||(Params.doFineRead)); + + if ( Params.doFine ) { + std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "< Date: Fri, 27 Apr 2018 08:57:34 +0100 Subject: [PATCH 377/377] Update with LIME library guard --- tests/Test_compressed_lanczos_hot_start.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/Test_compressed_lanczos_hot_start.cc b/tests/Test_compressed_lanczos_hot_start.cc index 998f1b9f..3276d0f8 100644 --- a/tests/Test_compressed_lanczos_hot_start.cc +++ b/tests/Test_compressed_lanczos_hot_start.cc @@ -56,6 +56,7 @@ public: void checkpointFine(std::string evecs_file,std::string evals_file) { +#ifdef HAVE_LIME assert(this->subspace.size()==nbasis); emptyUserRecord record; Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss()); @@ -67,10 +68,14 @@ public: XmlWriter WRx(evals_file); write(WRx,"evals",this->evals_fine); +#else + assert(0); +#endif } void checkpointFineRestore(std::string evecs_file,std::string evals_file) { +#ifdef HAVE_LIME this->evals_fine.resize(nbasis); this->subspace.resize(nbasis,this->_FineGrid); @@ -90,10 +95,14 @@ public: } RD.close(); +#else + assert(0); +#endif } void checkpointCoarse(std::string evecs_file,std::string evals_file) { +#ifdef HAVE_LIME int n = this->evec_coarse.size(); emptyUserRecord record; Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss()); @@ -105,10 +114,14 @@ public: XmlWriter WRx(evals_file); write(WRx,"evals",this->evals_coarse); +#else + assert(0); +#endif } void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec) { +#ifdef HAVE_LIME std::cout << "resizing coarse vecs to " << nvec<< std::endl; this->evals_coarse.resize(nvec); this->evec_coarse.resize(nvec,this->_CoarseGrid); @@ -125,6 +138,9 @@ public: RD.readScidacFieldRecord(this->evec_coarse[k],record); } RD.close(); +#else + assert(0); +#endif } };