From 1c4bc7ed38c6e008972e205920d11e31880313d0 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 31 Mar 2017 14:41:48 +0900 Subject: [PATCH 01/13] Debugged staggered conventions --- .../fermion/ImprovedStaggeredFermion.cc | 61 ++- .../action/fermion/ImprovedStaggeredFermion.h | 9 + lib/qcd/action/fermion/StaggeredKernelsAsm.cc | 13 +- .../action/fermion/StaggeredKernelsHand.cc | 21 +- scripts/zmobius.sh | 35 ++ tests/qdpxx/Test_qdpxx_stag.cc | 364 ++++++++++++++++++ 6 files changed, 491 insertions(+), 12 deletions(-) create mode 100644 scripts/zmobius.sh create mode 100644 tests/qdpxx/Test_qdpxx_stag.cc diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc b/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc index ec5811e0..2ba4f4af 100644 --- a/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc @@ -40,10 +40,10 @@ ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, // Constructor and gauge import ///////////////////////////////// + template -ImprovedStaggeredFermion::ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Ufat, GridCartesian &Fgrid, - GridRedBlackCartesian &Hgrid, RealD _mass, - RealD _c1, RealD _c2,RealD _u0, +ImprovedStaggeredFermion::ImprovedStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, + RealD _mass, const ImplParams &p) : Kernels(p), _grid(&Fgrid), @@ -52,9 +52,6 @@ ImprovedStaggeredFermion::ImprovedStaggeredFermion(GaugeField &_Uthin, Gau StencilEven(&Hgrid, npoint, Even, directions, displacements), // source is Even StencilOdd(&Hgrid, npoint, Odd, directions, displacements), // source is Odd mass(_mass), - c1(_c1), - c2(_c2), - u0(_u0), Lebesgue(_grid), LebesgueEvenOdd(_cbgrid), Umu(&Fgrid), @@ -65,9 +62,29 @@ ImprovedStaggeredFermion::ImprovedStaggeredFermion(GaugeField &_Uthin, Gau UUUmuOdd(&Hgrid) , _tmp(&Hgrid) { - // Allocate the required comms buffer +} + +template +ImprovedStaggeredFermion::ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Ufat, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + RealD _c1, RealD _c2,RealD _u0, + const ImplParams &p) + : ImprovedStaggeredFermion(Fgrid,Hgrid,_mass,p) +{ + c1=_c1; + c2=_c2; + u0=_u0; ImportGauge(_Uthin,_Ufat); } +template +ImprovedStaggeredFermion::ImprovedStaggeredFermion(GaugeField &_Uthin,GaugeField &_Utriple, GaugeField &_Ufat, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + const ImplParams &p) + : ImprovedStaggeredFermion(Fgrid,Hgrid,_mass,p) +{ + ImportGaugeSimple(_Utriple,_Ufat); +} + //////////////////////////////////////////////////////////// // Momentum space propagator should be @@ -86,6 +103,34 @@ void ImprovedStaggeredFermion::ImportGauge(const GaugeField &_Uthin) ImportGauge(_Uthin,_Uthin); }; template +void ImprovedStaggeredFermion::ImportGaugeSimple(const GaugeField &_Utriple,const GaugeField &_Ufat) +{ + ///////////////////////////////////////////////////////////////// + // Trivial import; phases and fattening and such like preapplied + ///////////////////////////////////////////////////////////////// + GaugeLinkField U(GaugeGrid()); + + for (int mu = 0; mu < Nd; mu++) { + + U = PeekIndex(_Utriple, mu); + PokeIndex(UUUmu, U, mu ); + + U = adj( Cshift(U, mu, -3)); + PokeIndex(UUUmu, -U, mu+4 ); + + U = PeekIndex(_Ufat, mu); + PokeIndex(Umu, U, mu); + + U = adj( Cshift(U, mu, -1)); + PokeIndex(Umu, -U, mu+4); + + } + pickCheckerboard(Even, UmuEven, Umu); + pickCheckerboard(Odd, UmuOdd , Umu); + pickCheckerboard(Even, UUUmuEven,UUUmu); + pickCheckerboard(Odd, UUUmuOdd, UUUmu); +} +template void ImprovedStaggeredFermion::ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat) { GaugeLinkField U(GaugeGrid()); @@ -115,6 +160,8 @@ void ImprovedStaggeredFermion::ImportGauge(const GaugeField &_Uthin,const PokeIndex(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4); } + std::cout << " Umu " << Umu._odata[0]<, public ImprovedS RealD _c1=9.0/8.0, RealD _c2=-1.0/24.0,RealD _u0=1.0, const ImplParams &p = ImplParams()); + ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Utriple, GaugeField &_Ufat, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + const ImplParams &p = ImplParams()); + + ImprovedStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, RealD _mass, + const ImplParams &p = ImplParams()); + + // DoubleStore impl dependent + void ImportGaugeSimple(const GaugeField &_Utriple, const GaugeField &_Ufat); void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat); void ImportGauge(const GaugeField &_Uthin); diff --git a/lib/qcd/action/fermion/StaggeredKernelsAsm.cc b/lib/qcd/action/fermion/StaggeredKernelsAsm.cc index bfe13f07..fd881716 100644 --- a/lib/qcd/action/fermion/StaggeredKernelsAsm.cc +++ b/lib/qcd/action/fermion/StaggeredKernelsAsm.cc @@ -587,7 +587,6 @@ void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, int sU, const FermionField &in, FermionField &out) { assert(0); - }; @@ -905,9 +904,17 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, #endif } +#define KERNEL_INSTANTIATE(CLASS,FUNC,IMPL) \ + template void CLASS::FUNC(StencilImpl &st, LebesgueOrder &lo, \ + DoubledGaugeField &U, \ + DoubledGaugeField &UUU, \ + SiteSpinor *buf, int LLs, \ + int sU, const FermionField &in, FermionField &out); -FermOpStaggeredTemplateInstantiate(StaggeredKernels); -FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels); +KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD); +KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF); +KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplD); +KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplF); }} diff --git a/lib/qcd/action/fermion/StaggeredKernelsHand.cc b/lib/qcd/action/fermion/StaggeredKernelsHand.cc index f5806657..7de8480c 100644 --- a/lib/qcd/action/fermion/StaggeredKernelsHand.cc +++ b/lib/qcd/action/fermion/StaggeredKernelsHand.cc @@ -299,7 +299,24 @@ void StaggeredKernels::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l } -FermOpStaggeredTemplateInstantiate(StaggeredKernels); -FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels); +#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \ + template void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \ + DoubledGaugeField &U,DoubledGaugeField &UUU, \ + SiteSpinor *buf, int LLs, \ + int sU, const FermionField &in, FermionField &out, int dag); + +#define DHOP_SITE_DEPTH_HAND_INSTANTIATE(IMPL) \ + template void StaggeredKernels::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, \ + SiteSpinor *buf, int sF, \ + int sU, const FermionField &in, SiteSpinor &out,int threeLink) ; +DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD); +DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF); +DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD); +DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplF); + +DHOP_SITE_DEPTH_HAND_INSTANTIATE(StaggeredImplD); +DHOP_SITE_DEPTH_HAND_INSTANTIATE(StaggeredImplF); +DHOP_SITE_DEPTH_HAND_INSTANTIATE(StaggeredVec5dImplD); +DHOP_SITE_DEPTH_HAND_INSTANTIATE(StaggeredVec5dImplF); }} diff --git a/scripts/zmobius.sh b/scripts/zmobius.sh new file mode 100644 index 00000000..04b223d2 --- /dev/null +++ b/scripts/zmobius.sh @@ -0,0 +1,35 @@ +#!/bin/bash +fn=$1 + +grep "double zmobius_" $fn | +awk 'BEGIN{ m["zmobius_b_coeff"]=0; m["zmobius_c_coeff"]=1; }{ val[m[substr($2,0,15)]][substr($2,17)+0]=$4; }END{ + + ls=length(val[0])/2; + + print "ls = " ls + + bmc=-111; + + for (s=0;s(%.15g,%.15g) );\n",omegar[s],omegai[s]); + } + +}' diff --git a/tests/qdpxx/Test_qdpxx_stag.cc b/tests/qdpxx/Test_qdpxx_stag.cc new file mode 100644 index 00000000..a7563924 --- /dev/null +++ b/tests/qdpxx/Test_qdpxx_stag.cc @@ -0,0 +1,364 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/qdpxx/Test_qdpxx_munprec.cc + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +double mq=0.1; + +typedef Grid::QCD::StaggeredImplR::FermionField FermionField; +typedef Grid::QCD::LatticeGaugeField GaugeField; + +void make_gauge (GaugeField & lat, FermionField &src); +void calc_grid (GaugeField & lat, GaugeField & uthin,GaugeField & ufat, FermionField &src, FermionField &res,int dag); +void calc_chroma (GaugeField & lat,GaugeField & uthin,GaugeField & ufat, FermionField &src, FermionField &res,int dag); + +#include +#include +#include + +namespace Chroma { + + +class ChromaWrapper { +public: + + typedef multi1d U; + typedef LatticeStaggeredFermion T4; + + static void ImportGauge(GaugeField & gr, + QDP::multi1d & ch) + { + Grid::QCD::LorentzColourMatrix LCM; + Grid::Complex cc; + QDP::ColorMatrix cm; + QDP::Complex c; + + std::vector x(4); + QDP::multi1d cx(4); + std::vector gd= gr._grid->GlobalDimensions(); + + for (x[0]=0;x[0] & ch) + { + Grid::QCD::LorentzColourMatrix LCM; + Grid::Complex cc; + QDP::ColorMatrix cm; + QDP::Complex c; + + std::vector x(4); + QDP::multi1d cx(4); + std::vector gd= gr._grid->GlobalDimensions(); + + for (x[0]=0;x[0] x(5); + QDP::multi1d cx(4); + std::vector gd= gr._grid->GlobalDimensions(); + + for (x[0]=0;x[0] x(5); + QDP::multi1d cx(4); + std::vector gd= gr._grid->GlobalDimensions(); + + for (x[0]=0;x[0] > GetLinOp (U &u,U &u_fat,U &u_triple) + { + QDP::Real _mq(mq); + QDP::multi1d bcs(QDP::Nd); + + bcs[0] = bcs[1] = bcs[2] = bcs[3] = 1; + + Chroma::AsqtadFermActParams p; + p.Mass = _mq; + p.u0 = Real(1.0); + + + Chroma::Handle > fbc(new Chroma::SimpleFermBC< T4, U, U >(bcs)); + Chroma::Handle > cfs( new Chroma::CreateSimpleFermState(fbc)); + Chroma::AsqtadFermAct S_f(cfs,p); + Chroma::Handle< Chroma::FermState > ffs( S_f.createState(u) ); + u_fat =ffs.cast()->getFatLinks(); + u_triple=ffs.cast()->getTripleLinks(); + return S_f.linOp(ffs); + } + +}; +} + +int main (int argc,char **argv ) +{ + + /******************************************************** + * Setup QDP + *********************************************************/ + Chroma::initialize(&argc,&argv); + Chroma::WilsonTypeFermActs4DEnv::registerAll(); + + /******************************************************** + * Setup Grid + *********************************************************/ + Grid::Grid_init(&argc,&argv); + Grid::GridCartesian * UGrid = Grid::QCD::SpaceTimeGrid::makeFourDimGrid(Grid::GridDefaultLatt(), + Grid::GridDefaultSimd(Grid::QCD::Nd,Grid::vComplex::Nsimd()), + Grid::GridDefaultMpi()); + + std::vector gd = UGrid->GlobalDimensions(); + QDP::multi1d nrow(QDP::Nd); + for(int mu=0;mu<4;mu++) nrow[mu] = gd[mu]; + + QDP::Layout::setLattSize(nrow); + QDP::Layout::create(); + + GaugeField uthin (UGrid); + GaugeField ufat (UGrid); + GaugeField utriple(UGrid); + FermionField src(UGrid); + FermionField res_chroma(UGrid); + FermionField res_grid (UGrid); + + + { + + std::cout << "*****************************"< U; + + U u(4); + U ut(4); + U uf(4); + + // Chroma::HotSt(u); + Chroma::ChromaWrapper::ImportGauge(lat,u) ; + + QDP::LatticeStaggeredFermion check; + QDP::LatticeStaggeredFermion result; + QDP::LatticeStaggeredFermion tmp; + QDP::LatticeStaggeredFermion psi; + + Chroma::ChromaWrapper::ImportFermion(src,psi); + + auto linop =Chroma::ChromaWrapper::GetLinOp(u,uf,ut); + + Chroma::ChromaWrapper::ExportGauge(uthin,ut) ; + Chroma::ChromaWrapper::ExportGauge(ufat ,uf) ; + + enum Chroma::PlusMinus isign; + if ( dag ) { + isign=Chroma::MINUS; + } else { + isign=Chroma::PLUS; + } + + std::cout << "Calling Chroma Linop "<< std::endl; + linop->evenEvenLinOp(tmp,psi,isign); check[rb[0]] = tmp; + linop->oddOddLinOp (tmp,psi,isign); check[rb[1]] = tmp; + linop->evenOddLinOp(tmp,psi,isign) ; check[rb[0]]+= tmp; + linop->oddEvenLinOp(tmp,psi,isign) ; check[rb[1]]+= tmp; + + Chroma::ChromaWrapper::ExportFermion(res,check) ; +} + + +void make_gauge(GaugeField & Umu,FermionField &src) +{ + using namespace Grid; + using namespace Grid::QCD; + + std::vector seeds4({1,2,3,4}); + + Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Umu._grid; + Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + Grid::QCD::SU3::HotConfiguration(RNG4,Umu); + Grid::gaussian(RNG4,src); +} + +void calc_grid(GaugeField & Uthin, GaugeField & Utriple, GaugeField & Ufat, FermionField &src, FermionField &res,int dag) +{ + using namespace Grid; + using namespace Grid::QCD; + + Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Uthin._grid; + Grid::GridRedBlackCartesian * UrbGrid = Grid::QCD::SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + + Grid::QCD::ImprovedStaggeredFermionR Dstag(Uthin,Utriple,Ufat,*UGrid,*UrbGrid,mq*2.0); + + std::cout << Grid::GridLogMessage <<" Calling Grid staggered multiply "< Date: Fri, 31 Mar 2017 17:07:43 +0900 Subject: [PATCH 02/13] Christoph's coefficients. --- tests/core/Test_zmobius_even_odd.cc | 26 ++++++++++++++++++++------ tests/solver/Test_zmobius_cg_prec.cc | 26 ++++++++++++++++++++------ 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/tests/core/Test_zmobius_even_odd.cc b/tests/core/Test_zmobius_even_odd.cc index d547f2f7..867c3359 100644 --- a/tests/core/Test_zmobius_even_odd.cc +++ b/tests/core/Test_zmobius_even_odd.cc @@ -53,7 +53,7 @@ int main (int argc, char ** argv) std::cout< > omegas; +#if 0 for(int i=0;i temp (0.25+0.01*i, imag*0.1); - omegas.push_back(temp); + double imag = 0.; + if (i==0) imag=1.; + if (i==Ls-1) imag=-1.; + std::complex temp (0.25+0.01*i, imag*0.01); + omegas.push_back(temp); } +#else + omegas.push_back( std::complex(1.45806438985048,-0) ); + omegas.push_back( std::complex(1.18231318389348,-0) ); + omegas.push_back( std::complex(0.830951166685955,-0) ); + omegas.push_back( std::complex(0.542352409156791,-0) ); + omegas.push_back( std::complex(0.341985020453729,-0) ); + omegas.push_back( std::complex(0.21137902619029,-0) ); + omegas.push_back( std::complex(0.126074299502912,-0) ); + omegas.push_back( std::complex(0.0990136651962626,-0) ); + omegas.push_back( std::complex(0.0686324988446592,0.0550658530827402) ); + omegas.push_back( std::complex(0.0686324988446592,-0.0550658530827402) ); +#endif + ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,1.,0.); // DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); diff --git a/tests/solver/Test_zmobius_cg_prec.cc b/tests/solver/Test_zmobius_cg_prec.cc index 4ae98d71..922ca8ae 100644 --- a/tests/solver/Test_zmobius_cg_prec.cc +++ b/tests/solver/Test_zmobius_cg_prec.cc @@ -43,7 +43,7 @@ Gamma::Algebra Gmu[] = {Gamma::Algebra::GammaX, Gamma::Algebra::GammaY, Gamma::A int main(int argc, char** argv) { Grid_init(&argc, &argv); - const int Ls = 16; + const int Ls = 10; GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid( GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), @@ -80,13 +80,27 @@ int main(int argc, char** argv) { RealD mass = 0.01; RealD M5 = 1.8; std::vector < std::complex > omegas; +#if 0 for(int i=0;i temp (0.25+0.01*i, imag*0.01); - omegas.push_back(temp); + double imag = 0.; + if (i==0) imag=1.; + if (i==Ls-1) imag=-1.; + std::complex temp (0.25+0.01*i, imag*0.01); + omegas.push_back(temp); } +#else + omegas.push_back( std::complex(1.45806438985048,-0) ); + omegas.push_back( std::complex(1.18231318389348,-0) ); + omegas.push_back( std::complex(0.830951166685955,-0) ); + omegas.push_back( std::complex(0.542352409156791,-0) ); + omegas.push_back( std::complex(0.341985020453729,-0) ); + omegas.push_back( std::complex(0.21137902619029,-0) ); + omegas.push_back( std::complex(0.126074299502912,-0) ); + omegas.push_back( std::complex(0.0990136651962626,-0) ); + omegas.push_back( std::complex(0.0686324988446592,0.0550658530827402) ); + omegas.push_back( std::complex(0.0686324988446592,-0.0550658530827402) ); +#endif + ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,1.,0.); LatticeFermion src_o(FrbGrid); From 7e5faa0f3486c9eba1f8ecc68bd386686a73da5b Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:25:44 +0900 Subject: [PATCH 03/13] Multiple RNGs --- lib/parallelIO/NerscIO.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index afd35236..72b3e32d 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -491,10 +491,15 @@ static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG ¶llel #ifdef RNG_RANLUX header.floating_point = std::string("UINT64"); header.data_type = std::string("RANLUX48"); -#else +#endif +#ifdef RNG_MT19937 header.floating_point = std::string("UINT32"); header.data_type = std::string("MT19937"); #endif +#ifdef RNG_SITMO + header.floating_point = std::string("UINT64"); + header.data_type = std::string("SITMO"); +#endif truncate(file); offset = writeHeader(header,file); @@ -522,10 +527,15 @@ static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel #ifdef RNG_RANLUX assert(format == std::string("UINT64")); assert(data_type == std::string("RANLUX48")); -#else +#endif +#ifdef RNG_MT19937 assert(format == std::string("UINT32")); assert(data_type == std::string("MT19937")); #endif +#ifdef RNG_SITMO + assert(format == std::string("UINT64")); + assert(data_type == std::string("SITMO")); +#endif // depending on datatype, set up munger; // munger is a function of From d1d63a4f2d0bc78c46930b762683c441ef6b226e Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:26:05 +0900 Subject: [PATCH 04/13] sitmo default --- configure.ac | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 95f573f5..3a323a5d 100644 --- a/configure.ac +++ b/configure.ac @@ -321,7 +321,7 @@ AM_CONDITIONAL(BUILD_COMMS_NONE, [ test "${comms_type}X" == "noneX" ]) ############### RNG selection AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\ [Select Random Number Generator to be used])],\ - [ac_RNG=${enable_rng}],[ac_RNG=ranlux48]) + [ac_RNG=${enable_rng}],[ac_RNG=sitmo]) case ${ac_RNG} in ranlux48) @@ -401,6 +401,7 @@ AC_CONFIG_FILES(tests/hadrons/Makefile) AC_CONFIG_FILES(tests/hmc/Makefile) AC_CONFIG_FILES(tests/solver/Makefile) AC_CONFIG_FILES(tests/qdpxx/Makefile) +AC_CONFIG_FILES(tests/testu01/Makefile) AC_CONFIG_FILES(benchmarks/Makefile) AC_CONFIG_FILES(extras/Makefile) AC_CONFIG_FILES(extras/Hadrons/Makefile) From f18f5ed926ac435b4cb14a7cde3b2a4a9fc3887e Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:26:26 +0900 Subject: [PATCH 05/13] Drop random device --- benchmarks/Benchmark_memory_asynch.cc | 3 ++- benchmarks/Benchmark_memory_bandwidth.cc | 8 ++++---- benchmarks/Benchmark_staggered.cc | 2 +- benchmarks/Benchmark_su3.cc | 8 ++++---- benchmarks/Benchmark_wilson.cc | 2 +- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/benchmarks/Benchmark_memory_asynch.cc b/benchmarks/Benchmark_memory_asynch.cc index 9f4f5d61..2124b137 100644 --- a/benchmarks/Benchmark_memory_asynch.cc +++ b/benchmarks/Benchmark_memory_asynch.cc @@ -66,7 +66,8 @@ int main (int argc, char ** argv) Vec tsum; tsum = zero; - GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(std::vector({56,17,89,101})); std::vector stop(threads); Vector sum(threads); diff --git a/benchmarks/Benchmark_memory_bandwidth.cc b/benchmarks/Benchmark_memory_bandwidth.cc index 435af7f4..d57c4df5 100644 --- a/benchmarks/Benchmark_memory_bandwidth.cc +++ b/benchmarks/Benchmark_memory_bandwidth.cc @@ -65,7 +65,7 @@ int main (int argc, char ** argv) uint64_t Nloop=NLOOP; - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeVec z(&Grid); //random(pRNG,z); LatticeVec x(&Grid); //random(pRNG,x); @@ -100,7 +100,7 @@ int main (int argc, char ** argv) int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeVec z(&Grid); //random(pRNG,z); LatticeVec x(&Grid); //random(pRNG,x); @@ -138,7 +138,7 @@ int main (int argc, char ** argv) GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeVec z(&Grid); //random(pRNG,z); LatticeVec x(&Grid); //random(pRNG,x); @@ -173,7 +173,7 @@ int main (int argc, char ** argv) uint64_t Nloop=NLOOP; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeVec z(&Grid); //random(pRNG,z); LatticeVec x(&Grid); //random(pRNG,x); LatticeVec y(&Grid); //random(pRNG,y); diff --git a/benchmarks/Benchmark_staggered.cc b/benchmarks/Benchmark_staggered.cc index 121dc0d5..dc2dcf91 100644 --- a/benchmarks/Benchmark_staggered.cc +++ b/benchmarks/Benchmark_staggered.cc @@ -51,7 +51,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - // pRNG.SeedRandomDevice(); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); typedef typename ImprovedStaggeredFermionR::FermionField FermionField; typename ImprovedStaggeredFermionR::ImplParams params; diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index b6d1d303..c234c301 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -55,7 +55,7 @@ int main (int argc, char ** argv) std::vector latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeColourMatrix z(&Grid);// random(pRNG,z); LatticeColourMatrix x(&Grid);// random(pRNG,x); @@ -88,7 +88,7 @@ int main (int argc, char ** argv) int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeColourMatrix z(&Grid); //random(pRNG,z); LatticeColourMatrix x(&Grid); //random(pRNG,x); @@ -119,7 +119,7 @@ int main (int argc, char ** argv) int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeColourMatrix z(&Grid); //random(pRNG,z); LatticeColourMatrix x(&Grid); //random(pRNG,x); @@ -150,7 +150,7 @@ int main (int argc, char ** argv) int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeColourMatrix z(&Grid); //random(pRNG,z); LatticeColourMatrix x(&Grid); //random(pRNG,x); diff --git a/benchmarks/Benchmark_wilson.cc b/benchmarks/Benchmark_wilson.cc index 11f1353f..671e7654 100644 --- a/benchmarks/Benchmark_wilson.cc +++ b/benchmarks/Benchmark_wilson.cc @@ -69,7 +69,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - // pRNG.SeedRandomDevice(); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeFermion src (&Grid); random(pRNG,src); LatticeFermion result(&Grid); result=zero; From 9cbcdd65d7c5717a67376d3068970c3657d4c683 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:26:57 +0900 Subject: [PATCH 06/13] No random device seed --- lib/algorithms/CoarsenedMatrix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h index 73f6baff..c2910151 100644 --- a/lib/algorithms/CoarsenedMatrix.h +++ b/lib/algorithms/CoarsenedMatrix.h @@ -425,7 +425,7 @@ namespace Grid { A[p]=zero; } - GridParallelRNG RNG(Grid()); RNG.SeedRandomDevice(); + GridParallelRNG RNG(Grid()); RNG.SeedFixedIntegers(std::vector({55,72,19,17,34})); Lattice > val(Grid()); random(RNG,val); Complex one(1.0); From 935d82f5b10f594f40a46e1d8c0dd042d36b3352 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:27:28 +0900 Subject: [PATCH 07/13] sanity checks --- lib/cartesian/Cartesian_base.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index cea0f3dc..4ca8c580 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -177,9 +177,11 @@ public: // Global addressing //////////////////////////////////////////////////////////////// void GlobalIndexToGlobalCoor(int gidx,std::vector &gcoor){ + assert(gidx< gSites()); Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions); } void LocalIndexToLocalCoor(int lidx,std::vector &lcoor){ + assert(lidx & gcoor,int & gidx){ From 9dc7ca4c3be4bad81bedb7c54e34b26db58fad38 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:28:22 +0900 Subject: [PATCH 08/13] Sitmo fast init --- lib/lattice/Lattice_rng.h | 252 ++++++++++++++++++-------------------- 1 file changed, 122 insertions(+), 130 deletions(-) diff --git a/lib/lattice/Lattice_rng.h b/lib/lattice/Lattice_rng.h index 3d653d17..31fef729 100644 --- a/lib/lattice/Lattice_rng.h +++ b/lib/lattice/Lattice_rng.h @@ -69,40 +69,6 @@ namespace Grid { return multiplicity; } - // Wrap seed_seq to give common interface with random_device - // Should rather wrap random_device and have a generate - class fixedSeed { - public: - - typedef std::seed_seq::result_type result_type; - - std::seed_seq src; - - template fixedSeed(const std::vector &seeds) : src(seeds.begin(),seeds.end()) {}; - - template< class RandomIt > void generate( RandomIt begin, RandomIt end ) { - src.generate(begin,end); - } - - }; - - - class deviceSeed { - public: - - std::random_device rd; - - typedef std::random_device::result_type result_type; - - deviceSeed(void) : rd(){}; - - template< class RandomIt > void generate( RandomIt begin, RandomIt end ) { - for(RandomIt it=begin; it!=end;it++){ - *it = rd(); - } - } - }; - // real scalars are one component template void fillScalar(scalar &s,distribution &dist,generator & gen) { @@ -118,67 +84,100 @@ namespace Grid { } class GridRNGbase { - public: - - int _seeded; // One generator per site. // Uniform and Gaussian distributions from these generators. #ifdef RNG_RANLUX - typedef uint64_t RngStateType; typedef std::ranlux48 RngEngine; + typedef uint64_t RngStateType; static const int RngStateCount = 15; -#elif RNG_MT19937 +#endif +#ifdef RNG_MT19937 typedef std::mt19937 RngEngine; typedef uint32_t RngStateType; static const int RngStateCount = std::mt19937::state_size; -#elif RNG_SITMO +#endif +#ifdef RNG_SITMO typedef sitmo::prng_engine RngEngine; typedef uint64_t RngStateType; static const int RngStateCount = 4; #endif - std::vector _generators; - std::vector> _uniform; - std::vector> _gaussian; - std::vector> _bernoulli; + /////////////////////// + // support for parallel init + /////////////////////// +#ifdef RNG_SITMO + static void Skip(RngEngine &eng) + { + uint64_t skip = 0x1; skip = skip<<40; + eng.discard(skip); + } +#endif + static RngEngine Reseed(RngEngine &eng) + { + const int reseeds=4; + std::uniform_int_distribution uid; + std::vector newseed(reseeds); + for(int i=0;i & saved,int gen) { + std::vector _generators; + std::vector > _uniform; + std::vector > _gaussian; + std::vector > _bernoulli; + std::vector > _uid; + + void GetState(std::vector & saved,RngEngine &eng) { saved.resize(RngStateCount); std::stringstream ss; - ss<<_generators[gen]; + ss<>saved[i]; } } - void SetState(std::vector & saved,int gen){ + void GetState(std::vector & saved,int gen) { + GetState(saved,_generators[gen]); + } + void SetState(std::vector & saved,RngEngine &eng){ assert(saved.size()==RngStateCount); std::stringstream ss; for(int i=0;i>_generators[gen]; + ss>>eng; } + void SetState(std::vector & saved,int gen){ + SetState(saved,_generators[gen]); + } + void SetEngine(RngEngine &Eng, int gen){ + _generators[gen]=Eng; + } + void GetEngine(RngEngine &Eng, int gen){ + Eng=_generators[gen]; + } + + template void Seed(source &src, int gen) + { + _generators[gen] = RngEngine(src); + } }; class GridSerialRNG : public GridRNGbase { public: - // FIXME ... do we require lockstep draws of randoms - // from all nodes keeping seeds consistent. - // place a barrier/broadcast in the fill routine - GridSerialRNG() : GridRNGbase() { _generators.resize(1); _uniform.resize(1,std::uniform_real_distribution{0,1}); _gaussian.resize(1,std::normal_distribution(0.0,1.0) ); _bernoulli.resize(1,std::discrete_distribution{1,1}); - _seeded=0; + _uid.resize(1,std::uniform_int_distribution() ); } - - template inline void fill(sobj &l,std::vector &dist){ typedef typename sobj::scalar_type scalar_type; @@ -195,7 +194,6 @@ namespace Grid { CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); }; - template inline void fill(ComplexF &l,std::vector &dist){ dist[0].reset(); fillScalar(l,dist[0],_generators[0]); @@ -250,19 +248,10 @@ namespace Grid { CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); } - template void Seed(source &src) - { - _generators[0] = RngEngine(src); - _seeded=1; - } - void SeedRandomDevice(void){ - deviceSeed src; - Seed(src); - } void SeedFixedIntegers(const std::vector &seeds){ CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); - fixedSeed src(seeds); - Seed(src); + std::seed_seq src(seeds.begin(),seeds.end()); + Seed(src,0); } }; @@ -285,15 +274,9 @@ namespace Grid { _uniform.resize(_vol,std::uniform_real_distribution{0,1}); _gaussian.resize(_vol,std::normal_distribution(0.0,1.0) ); _bernoulli.resize(_vol,std::discrete_distribution{1,1}); - _seeded=0; + _uid.resize(_vol,std::uniform_int_distribution() ); } - - - //FIXME implement generic IO and create state save/restore - //void SaveState(const std::string &file); - //void LoadState(const std::string &file); - template inline void fill(Lattice &l,std::vector &dist){ typedef typename vobj::scalar_object scalar_object; @@ -329,79 +312,88 @@ namespace Grid { } }; - // This loop could be made faster to avoid the Ahmdahl by - // i) seed generators on each timeslice, for x=y=z=0; - // ii) seed generators on each z for x=y=0 - // iii)seed generators on each y,z for x=0 - // iv) seed generators on each y,z,x - // made possible by physical indexing. - template void Seed(source &src) - { + void SeedFixedIntegers(const std::vector &seeds){ - typedef typename source::result_type seed_t; - std::uniform_int_distribution uid; + CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); - int numseed=4; - int gsites = _grid->_gsites; - std::vector site_init(numseed); + std::seed_seq source(seeds.begin(),seeds.end()); + + RngEngine master_engine(source); + +#ifdef RNG_SITMO std::vector gcoor; + for(int gidx=0;gidx<_grid->_gsites;gidx++){ - // Master RngEngine - std::vector master_init(numseed); src.generate(master_init.begin(),master_init.end()); - _grid->Broadcast(0,(void *)&master_init[0],sizeof(seed_t)*numseed); - fixedSeed master_seed(master_init); - RngEngine master_engine(master_seed); + Skip(master_engine); // advance the state; does this work? - // Per node RngEngine - std::vector node_init(numseed); - for(int r=0;r<_grid->ProcessorCount();r++) { - - std::vector rank_init(numseed); - for(int i=0;iThisRank() ) { - for(int i=0;iGlobalIndexToGlobalCoor(gidx,gcoor); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); if( rank == _grid->ThisRank() ){ int l_idx=generator_idx(o_idx,i_idx); - for(int i=0;iProcessorCount(); + int me= _grid->ThisRank(); + std::vector seeders(Nproc); + + for(int p=0;p seeders(Nthread); + for(int t=0;tlSites();l++) { + if ( (l%Nthread)==t ) { + _generators[l] = Reseed(master_engine); + } + } } } - _seeded=1; - } - void SeedRandomDevice(void){ - deviceSeed src; - Seed(src); +#endif } - void SeedFixedIntegers(const std::vector &seeds){ - CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); - fixedSeed src(seeds); - Seed(src); + + uint32_t GlobalU01(int gsite){ + + std::vector gcoor; + _grid->GlobalIndexToGlobalCoor(gsite,gcoor); + + int rank,o_idx,i_idx; + _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + + int l_idx=generator_idx(o_idx,i_idx); + + uint32_t the_number; + if( rank == _grid->ThisRank() ){ + the_number = _uid[l_idx](_generators[l_idx]); + } + + _grid->Broadcast(rank,(void *)&the_number,sizeof(the_number)); + + return the_number; } + }; template inline void random(GridParallelRNG &rng,Lattice &l){ From 0fade84ab258c454951b383ea04635c3ab95ab7f Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:29:40 +0900 Subject: [PATCH 09/13] No random device --- tests/IO/Test_nersc_io.cc | 4 +- tests/Test_cshift.cc | 2 +- tests/Test_simd.cc | 10 +- tests/Test_stencil.cc | 2 +- tests/core/Test_cshift_red_black.cc | 2 +- tests/core/Test_cshift_red_black_rotate.cc | 2 +- tests/core/Test_cshift_rotate.cc | 2 +- tests/core/Test_gamma.cc | 2 +- tests/core/Test_gpwilson_even_odd.cc | 2 +- tests/core/Test_lie_generators.cc | 2 +- tests/core/Test_main.cc | 4 +- tests/core/Test_rng.cc | 4 +- tests/core/Test_staggered.cc | 2 +- tests/core/Test_wilson_even_odd.cc | 2 +- tests/core/Test_wilson_tm_even_odd.cc | 2 +- tests/forces/Test_dwf_gpforce.cc | 4 +- tests/forces/Test_gp_rect_force.cc | 2 +- tests/forces/Test_rect_force.cc | 2 +- tests/forces/Test_wilson_force.cc | 2 +- tests/forces/Test_wilson_force_phiMdagMphi.cc | 2 +- tests/forces/Test_wilson_force_phiMphi.cc | 2 +- tests/testu01/Makefile.am | 3 + tests/testu01/Test_smallcrush.cc | 176 ++++++++++++++++++ 23 files changed, 208 insertions(+), 29 deletions(-) create mode 100644 tests/testu01/Makefile.am create mode 100644 tests/testu01/Test_smallcrush.cc diff --git a/tests/IO/Test_nersc_io.cc b/tests/IO/Test_nersc_io.cc index 0273d02a..e5ea7cec 100644 --- a/tests/IO/Test_nersc_io.cc +++ b/tests/IO/Test_nersc_io.cc @@ -54,8 +54,8 @@ int main (int argc, char ** argv) GridSerialRNG sRNGa; GridSerialRNG sRNGb; - pRNGa.SeedRandomDevice(); - sRNGa.SeedRandomDevice(); + pRNGa.SeedFixedIntegers(std::vector({45,12,81,9}); + sRNGa.SeedFixedIntegers(std::vector({45,12,81,9}); std::string rfile("./ckpoint_rng.4000"); NerscIO::writeRNGState(sRNGa,pRNGa,rfile); diff --git a/tests/Test_cshift.cc b/tests/Test_cshift.cc index e1dd0db8..f9559a83 100644 --- a/tests/Test_cshift.cc +++ b/tests/Test_cshift.cc @@ -41,7 +41,7 @@ int main (int argc, char ** argv) GridCartesian Fine(latt_size,simd_layout,mpi_layout); - GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice(); + GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeComplex U(&Fine); LatticeComplex ShiftU(&Fine); diff --git a/tests/Test_simd.cc b/tests/Test_simd.cc index f19ebd76..b3933ec6 100644 --- a/tests/Test_simd.cc +++ b/tests/Test_simd.cc @@ -125,7 +125,7 @@ template void Tester(const functor &func) { GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); int Nsimd = vec::Nsimd(); @@ -184,7 +184,7 @@ void IntTester(const functor &func) typedef Integer scal; typedef vInteger vec; GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); int Nsimd = vec::Nsimd(); @@ -242,7 +242,7 @@ template void ReductionTester(const functor &func) { GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); int Nsimd = vec::Nsimd(); @@ -343,7 +343,7 @@ template void PermTester(const functor &func) { GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); int Nsimd = vec::Nsimd(); @@ -409,7 +409,7 @@ template void ExchangeTester(const functor &func) { GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); int Nsimd = vec::Nsimd(); diff --git a/tests/Test_stencil.cc b/tests/Test_stencil.cc index 38873310..d7bc5a6c 100644 --- a/tests/Test_stencil.cc +++ b/tests/Test_stencil.cc @@ -52,7 +52,7 @@ int main (int argc, char ** argv) GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout); GridParallelRNG fRNG(&Fine); - // fRNG.SeedRandomDevice(); + // fRNG.SeedFixedIntegers(std::vector({45,12,81,9}); std::vector seeds({1,2,3,4}); fRNG.SeedFixedIntegers(seeds); diff --git a/tests/core/Test_cshift_red_black.cc b/tests/core/Test_cshift_red_black.cc index ae55cece..f9269709 100644 --- a/tests/core/Test_cshift_red_black.cc +++ b/tests/core/Test_cshift_red_black.cc @@ -49,7 +49,7 @@ int main (int argc, char ** argv) GridCartesian Fine (latt_size,simd_layout,mpi_layout); GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout,mask,1); - GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice(); + GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeComplex U(&Fine); LatticeComplex ShiftU(&Fine); diff --git a/tests/core/Test_cshift_red_black_rotate.cc b/tests/core/Test_cshift_red_black_rotate.cc index 4ff7067b..3ef1cd21 100644 --- a/tests/core/Test_cshift_red_black_rotate.cc +++ b/tests/core/Test_cshift_red_black_rotate.cc @@ -49,7 +49,7 @@ int main (int argc, char ** argv) GridCartesian Fine (latt_size,simd_layout,mpi_layout); GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout,mask,1); - GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice(); + GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeComplex err(&Fine); LatticeComplex U(&Fine); diff --git a/tests/core/Test_cshift_rotate.cc b/tests/core/Test_cshift_rotate.cc index a42fd22e..64c08892 100644 --- a/tests/core/Test_cshift_rotate.cc +++ b/tests/core/Test_cshift_rotate.cc @@ -41,7 +41,7 @@ int main (int argc, char ** argv) GridCartesian Fine(latt_size,simd_layout,mpi_layout); - GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice(); + GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeComplex U(&Fine); LatticeComplex ShiftU(&Fine); diff --git a/tests/core/Test_gamma.cc b/tests/core/Test_gamma.cc index ee31a69a..f3805bdb 100644 --- a/tests/core/Test_gamma.cc +++ b/tests/core/Test_gamma.cc @@ -245,7 +245,7 @@ int main(int argc, char *argv[]) GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); std::cout << GridLogMessage << "======== Test algebra" << std::endl; createTestAlgebra(); diff --git a/tests/core/Test_gpwilson_even_odd.cc b/tests/core/Test_gpwilson_even_odd.cc index b8b320d8..fc12fe75 100644 --- a/tests/core/Test_gpwilson_even_odd.cc +++ b/tests/core/Test_gpwilson_even_odd.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) GridParallelRNG pRNG(&Grid); // std::vector seeds({1,2,3,4}); // pRNG.SeedFixedIntegers(seeds); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); typedef typename GparityWilsonFermionR::FermionField FermionField; diff --git a/tests/core/Test_lie_generators.cc b/tests/core/Test_lie_generators.cc index 5623b74b..114b49f7 100644 --- a/tests/core/Test_lie_generators.cc +++ b/tests/core/Test_lie_generators.cc @@ -86,7 +86,7 @@ int main(int argc, char** argv) { // Projectors GridParallelRNG gridRNG(grid); - gridRNG.SeedRandomDevice(); + gridRNG.SeedFixedIntegers(std::vector({45,12,81,9})); SU3Adjoint::LatticeAdjMatrix Gauss(grid); SU3::LatticeAlgebraVector ha(grid); SU3::LatticeAlgebraVector hb(grid); diff --git a/tests/core/Test_main.cc b/tests/core/Test_main.cc index 78c28539..1868b0fe 100644 --- a/tests/core/Test_main.cc +++ b/tests/core/Test_main.cc @@ -89,8 +89,8 @@ int main(int argc, char **argv) { GridSerialRNG SerialRNG; GridSerialRNG SerialRNG1; - FineRNG.SeedRandomDevice(); - SerialRNG.SeedRandomDevice(); + FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + SerialRNG.SeedFixedIntegers(std::vector({45,12,81,9})); std::cout << "SerialRNG" << SerialRNG._generators[0] << std::endl; diff --git a/tests/core/Test_rng.cc b/tests/core/Test_rng.cc index 1fcb3a13..b5d27c29 100644 --- a/tests/core/Test_rng.cc +++ b/tests/core/Test_rng.cc @@ -43,10 +43,10 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); - GridSerialRNG sRNG; sRNG.SeedRandomDevice(); + GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); GridSerialRNG fsRNG; fsRNG.SeedFixedIntegers(seeds); - GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); GridParallelRNG fpRNG(&Grid); fpRNG.SeedFixedIntegers(seeds); SpinMatrix rnd ; diff --git a/tests/core/Test_staggered.cc b/tests/core/Test_staggered.cc index 89055fc7..75531c83 100644 --- a/tests/core/Test_staggered.cc +++ b/tests/core/Test_staggered.cc @@ -51,7 +51,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - // pRNG.SeedRandomDevice(); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); typedef typename ImprovedStaggeredFermionR::FermionField FermionField; typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField; diff --git a/tests/core/Test_wilson_even_odd.cc b/tests/core/Test_wilson_even_odd.cc index b33bd74d..4933c36e 100644 --- a/tests/core/Test_wilson_even_odd.cc +++ b/tests/core/Test_wilson_even_odd.cc @@ -62,7 +62,7 @@ int main (int argc, char ** argv) GridParallelRNG pRNG(&Grid); // std::vector seeds({1,2,3,4}); // pRNG.SeedFixedIntegers(seeds); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeFermion src (&Grid); random(pRNG,src); LatticeFermion phi (&Grid); random(pRNG,phi); diff --git a/tests/core/Test_wilson_tm_even_odd.cc b/tests/core/Test_wilson_tm_even_odd.cc index 36de83ea..a2773244 100644 --- a/tests/core/Test_wilson_tm_even_odd.cc +++ b/tests/core/Test_wilson_tm_even_odd.cc @@ -61,7 +61,7 @@ int main (int argc, char ** argv) GridParallelRNG pRNG(&Grid); // std::vector seeds({1,2,3,4}); // pRNG.SeedFixedIntegers(seeds); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeFermion src (&Grid); random(pRNG,src); LatticeFermion phi (&Grid); random(pRNG,phi); diff --git a/tests/forces/Test_dwf_gpforce.cc b/tests/forces/Test_dwf_gpforce.cc index 5094b8a7..96a90ff6 100644 --- a/tests/forces/Test_dwf_gpforce.cc +++ b/tests/forces/Test_dwf_gpforce.cc @@ -54,8 +54,8 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); - GridParallelRNG RNG5(FGrid); RNG5.SeedRandomDevice(); - GridParallelRNG RNG4(UGrid); RNG4.SeedRandomDevice(); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(std::vector({45,12,81,9})); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(std::vector({45,12,81,9})); FermionField phi (FGrid); gaussian(RNG5,phi); FermionField Mphi (FGrid); diff --git a/tests/forces/Test_gp_rect_force.cc b/tests/forces/Test_gp_rect_force.cc index 551c3a20..17a7cd2b 100644 --- a/tests/forces/Test_gp_rect_force.cc +++ b/tests/forces/Test_gp_rect_force.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeGaugeField U(&Grid); diff --git a/tests/forces/Test_rect_force.cc b/tests/forces/Test_rect_force.cc index 97281854..2e2f87b2 100644 --- a/tests/forces/Test_rect_force.cc +++ b/tests/forces/Test_rect_force.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeGaugeField U(&Grid); diff --git a/tests/forces/Test_wilson_force.cc b/tests/forces/Test_wilson_force.cc index 60d31b51..52ed00b4 100644 --- a/tests/forces/Test_wilson_force.cc +++ b/tests/forces/Test_wilson_force.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeFermion phi (&Grid); gaussian(pRNG,phi); LatticeFermion Mphi (&Grid); diff --git a/tests/forces/Test_wilson_force_phiMdagMphi.cc b/tests/forces/Test_wilson_force_phiMdagMphi.cc index 7717e9bc..2a5814fe 100644 --- a/tests/forces/Test_wilson_force_phiMdagMphi.cc +++ b/tests/forces/Test_wilson_force_phiMdagMphi.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeFermion phi (&Grid); gaussian(pRNG,phi); LatticeFermion Mphi (&Grid); diff --git a/tests/forces/Test_wilson_force_phiMphi.cc b/tests/forces/Test_wilson_force_phiMphi.cc index c9e56c32..8cfb1de6 100644 --- a/tests/forces/Test_wilson_force_phiMphi.cc +++ b/tests/forces/Test_wilson_force_phiMphi.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeFermion phi (&Grid); gaussian(pRNG,phi); LatticeFermion Mphi (&Grid); diff --git a/tests/testu01/Makefile.am b/tests/testu01/Makefile.am new file mode 100644 index 00000000..eb4d1eae --- /dev/null +++ b/tests/testu01/Makefile.am @@ -0,0 +1,3 @@ +AM_LDFLAGS += -L$(LIBRARY_PATH) -ltestu01 -lprobdist -lmylib -lm +AM_CXXFLAGS += -I$(C_INCLUDE_PATH) +include Make.inc diff --git a/tests/testu01/Test_smallcrush.cc b/tests/testu01/Test_smallcrush.cc new file mode 100644 index 00000000..28974855 --- /dev/null +++ b/tests/testu01/Test_smallcrush.cc @@ -0,0 +1,176 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_smallcrush.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +// Wrap Grid's parallel RNG for testU01 + +extern "C" { +#include "TestU01.h" +#include "gdef.h" +#include "unif01.h" +#include "ucarry.h" +#include "bbattery.h" +} + +std::vector EngineRanlux; +std::vector EngineMT; +std::vector EngineSitmo; + +std::uniform_int_distribution uid; + +uint32_t GetU01Ranlux(void) { + return uid(EngineRanlux[0]); +}; +uint32_t GetU01MT(void) { + return uid(EngineMT[0]); +}; +uint32_t GetU01Sitmo(void) { + return uid(EngineSitmo[0]); +}; + +typedef Grid::GridRNGbase::RngEngine RngEngine; + +struct TestRNG { +public: + static GridParallelRNG *pRNG; + static GridSerialRNG *sRNG; + static GridBase *_grid; + static RngEngine Eng; + static uint64_t site; + static uint64_t gsites; + static char *name; + + static void Init(GridParallelRNG *_pRNG,GridSerialRNG *_sRNG,GridBase *grid) { + pRNG = _pRNG; + sRNG = _sRNG; + _grid= grid; + gsites= grid->_gsites; + std::cout << "Init: Global sites is " < uid; + uint32_t ret_val; +#if 0 + ret_val = sRNG->_uid[0](sRNG->_generators[0]); +#else + ret_val = pRNG->GlobalU01(site); + site=(site+1)%gsites; +#endif + // std::cout << "site "< latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + + std::vector seeds({1,2,3,4}); + std::seed_seq seq(seeds.begin(),seeds.end()); + + EngineRanlux.push_back(std::ranlux48(seq)); + EngineMT.push_back(std::mt19937(seq)); + EngineSitmo.push_back(sitmo::prng_engine(seq)); + + std::cout << GridLogMessage<< "Initialising Grid RNGs "<({43,12,7019,9})); + GridSerialRNG sRNG; + sRNG.SeedFixedIntegers(std::vector({102,12,99,15})); + std::cout << GridLogMessage<< "Initialised Grid RNGs "< Date: Sun, 2 Apr 2017 12:10:51 +0900 Subject: [PATCH 10/13] Big/Small crush test, and fast SITMO rng init, faster but not ideal MT and Ranlux init. --- lib/lattice/Lattice_rng.h | 175 ++++++++++++++++++------------- tests/testu01/Test_smallcrush.cc | 42 +++----- 2 files changed, 117 insertions(+), 100 deletions(-) diff --git a/lib/lattice/Lattice_rng.h b/lib/lattice/Lattice_rng.h index 31fef729..ddd2170f 100644 --- a/lib/lattice/Lattice_rng.h +++ b/lib/lattice/Lattice_rng.h @@ -30,12 +30,19 @@ Author: paboyle #define GRID_LATTICE_RNG_H #include + +#ifdef RNG_SITMO #include +#endif + +#if defined(RNG_SITMO) +#define RNG_FAST_DISCARD +#else +#undef RNG_FAST_DISCARD +#endif namespace Grid { - //http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-90Ar1.pdf ? - ////////////////////////////////////////////////////////////// // Allow the RNG state to be less dense than the fine grid ////////////////////////////////////////////////////////////// @@ -65,20 +72,22 @@ namespace Grid { multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d]; } - return multiplicity; } // real scalars are one component - template void fillScalar(scalar &s,distribution &dist,generator & gen) + template + void fillScalar(scalar &s,distribution &dist,generator & gen) { s=dist(gen); } - template void fillScalar(ComplexF &s,distribution &dist, generator &gen) + template + void fillScalar(ComplexF &s,distribution &dist, generator &gen) { s=ComplexF(dist(gen),dist(gen)); } - template void fillScalar(ComplexD &s,distribution &dist,generator &gen) + template + void fillScalar(ComplexD &s,distribution &dist,generator &gen) { s=ComplexD(dist(gen),dist(gen)); } @@ -102,27 +111,6 @@ namespace Grid { typedef uint64_t RngStateType; static const int RngStateCount = 4; #endif - /////////////////////// - // support for parallel init - /////////////////////// -#ifdef RNG_SITMO - static void Skip(RngEngine &eng) - { - uint64_t skip = 0x1; skip = skip<<40; - eng.discard(skip); - } -#endif - static RngEngine Reseed(RngEngine &eng) - { - const int reseeds=4; - std::uniform_int_distribution uid; - std::vector newseed(reseeds); - for(int i=0;i _generators; std::vector > _uniform; @@ -130,6 +118,46 @@ namespace Grid { std::vector > _bernoulli; std::vector > _uid; + /////////////////////// + // support for parallel init + /////////////////////// +#ifdef RNG_FAST_DISCARD + static void Skip(RngEngine &eng) + { + ///////////////////////////////////////////////////////////////////////////////////// + // Skip by 2^40 elements between successive lattice sites + // This goes by 10^12. + // Consider quenched updating; likely never exceeding rate of 1000 sweeps + // per second on any machine. This gives us of order 10^9 seconds, or 100 years + // skip ahead. + // For HMC unlikely to go at faster than a solve per second, and + // tens of seconds per trajectory so this is clean in all reasonable cases, + // and margin of safety is orders of magnitude. + // We could hack Sitmo to skip in the higher order words of state if necessary + ///////////////////////////////////////////////////////////////////////////////////// + uint64_t skip = 0x1; skip = skip<<40; + eng.discard(skip); + } +#endif + static RngEngine Reseed(RngEngine &eng) + { + std::vector newseed; + std::uniform_int_distribution uid; + return Reseed(eng,newseed,uid); + } + static RngEngine Reseed(RngEngine &eng,std::vector & newseed, + std::uniform_int_distribution &uid) + { + const int reseeds=4; + + newseed.resize(reseeds); + for(int i=0;i & saved,RngEngine &eng) { saved.resize(RngStateCount); std::stringstream ss; @@ -160,7 +188,6 @@ namespace Grid { void GetEngine(RngEngine &Eng, int gen){ Eng=_generators[gen]; } - template void Seed(source &src, int gen) { _generators[gen] = RngEngine(src); @@ -190,10 +217,11 @@ namespace Grid { for(int idx=0;idx inline void fill(ComplexF &l,std::vector &dist){ dist[0].reset(); fillScalar(l,dist[0],_generators[0]); @@ -253,14 +281,13 @@ namespace Grid { std::seed_seq src(seeds.begin(),seeds.end()); Seed(src,0); } - }; class GridParallelRNG : public GridRNGbase { public: - GridBase *_grid; int _vol; + public: int generator_idx(int os,int is){ return is*_grid->oSites()+os; @@ -289,7 +316,6 @@ namespace Grid { int osites=_grid->oSites(); int words=sizeof(scalar_object)/sizeof(scalar_type); - parallel_for(int ss=0;ss buf(Nsimd); @@ -314,23 +340,34 @@ namespace Grid { void SeedFixedIntegers(const std::vector &seeds){ + // Everyone generates the same seed_seq based on input seeds CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); std::seed_seq source(seeds.begin(),seeds.end()); RngEngine master_engine(source); -#ifdef RNG_SITMO +#ifdef RNG_FAST_DISCARD + //////////////////////////////////////////////// + // Skip ahead through a single stream. + // Applicable to SITMO and other has based/crypto RNGs + // Should be applicable to Mersenne Twister, but the C++11 + // MT implementation does not implement fast discard even though + // in principle this is possible + //////////////////////////////////////////////// std::vector gcoor; + int rank,o_idx,i_idx; + // Everybody loops over global volume. for(int gidx=0;gidx<_grid->_gsites;gidx++){ - Skip(master_engine); // advance the state; does this work? + Skip(master_engine); // Skip to next RNG sequence - int rank,o_idx,i_idx; + // Where is it? _grid->GlobalIndexToGlobalCoor(gidx,gcoor); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + // If this is one of mine we take it if( rank == _grid->ThisRank() ){ int l_idx=generator_idx(o_idx,i_idx); _generators[l_idx] = master_engine; @@ -338,23 +375,24 @@ namespace Grid { } #else - // Machine and thread decomposition dependent seeding - // is efficient and maximally parallel; but not - // reproducible from machine to machine. Not ideal, but fast. - // Different seed for each node. + //////////////////////////////////////////////////////////////// + // Machine and thread decomposition dependent seeding is efficient + // and maximally parallel; but NOT reproducible from machine to machine. + // Not ideal, but fastest way to reseed all nodes. + //////////////////////////////////////////////////////////////// { + // Obtain one Reseed per processor int Nproc = _grid->ProcessorCount(); - int me= _grid->ThisRank(); std::vector seeders(Nproc); - + int me= _grid->ThisRank(); for(int p=0;p seeders(Nthread); for(int t=0;t newseeds; + std::uniform_int_distribution uid; for(int l=0;l<_grid->lSites();l++) { if ( (l%Nthread)==t ) { - _generators[l] = Reseed(master_engine); + _generators[l] = Reseed(seeders[t],newseeds,uid); } } } } #endif } - + //////////////////////////////////////////////////////////////////////// + // Support for rigorous test of RNG's + // Return uniform random uint32_t from requested site generator + //////////////////////////////////////////////////////////////////////// uint32_t GlobalU01(int gsite){ - std::vector gcoor; - _grid->GlobalIndexToGlobalCoor(gsite,gcoor); + uint32_t the_number; + // who + std::vector gcoor; int rank,o_idx,i_idx; + _grid->GlobalIndexToGlobalCoor(gsite,gcoor); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + // draw int l_idx=generator_idx(o_idx,i_idx); - - uint32_t the_number; if( rank == _grid->ThisRank() ){ the_number = _uid[l_idx](_generators[l_idx]); } - + + // share & return _grid->Broadcast(rank,(void *)&the_number,sizeof(the_number)); - return the_number; } - }; - template inline void random(GridParallelRNG &rng,Lattice &l){ - rng.fill(l,rng._uniform); - } + template inline void random(GridParallelRNG &rng,Lattice &l) { rng.fill(l,rng._uniform); } + template inline void gaussian(GridParallelRNG &rng,Lattice &l) { rng.fill(l,rng._gaussian); } + template inline void bernoulli(GridParallelRNG &rng,Lattice &l){ rng.fill(l,rng._bernoulli);} - template inline void gaussian(GridParallelRNG &rng,Lattice &l){ - rng.fill(l,rng._gaussian); - } - - template inline void bernoulli(GridParallelRNG &rng,Lattice &l){ - rng.fill(l,rng._bernoulli); - } - - template inline void random(GridSerialRNG &rng,sobj &l){ - rng.fill(l,rng._uniform); - } - - template inline void gaussian(GridSerialRNG &rng,sobj &l){ - rng.fill(l,rng._gaussian); - } - - template inline void bernoulli(GridSerialRNG &rng,sobj &l){ - rng.fill(l,rng._bernoulli); - } + template inline void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); } + template inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); } + template inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); } } #endif diff --git a/tests/testu01/Test_smallcrush.cc b/tests/testu01/Test_smallcrush.cc index 28974855..51f0d60c 100644 --- a/tests/testu01/Test_smallcrush.cc +++ b/tests/testu01/Test_smallcrush.cc @@ -32,17 +32,17 @@ using namespace Grid; using namespace Grid::QCD; // Wrap Grid's parallel RNG for testU01 +#define BIG_CRUSH // Big crush enable (long running) +#undef TEST_RNG_STANDALONE // Test serial RNGs in isolation extern "C" { #include "TestU01.h" -#include "gdef.h" -#include "unif01.h" -#include "ucarry.h" -#include "bbattery.h" } std::vector EngineRanlux; std::vector EngineMT; + +#include std::vector EngineSitmo; std::uniform_int_distribution uid; @@ -74,21 +74,13 @@ public: sRNG = _sRNG; _grid= grid; gsites= grid->_gsites; - std::cout << "Init: Global sites is " < uid; uint32_t ret_val; -#if 0 - ret_val = sRNG->_uid[0](sRNG->_generators[0]); -#else ret_val = pRNG->GlobalU01(site); site=(site+1)%gsites; -#endif - // std::cout << "site "< Date: Sun, 2 Apr 2017 23:13:48 +0900 Subject: [PATCH 11/13] Clean up --- tests/testu01/Test_smallcrush.cc | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/testu01/Test_smallcrush.cc b/tests/testu01/Test_smallcrush.cc index 51f0d60c..d09cd577 100644 --- a/tests/testu01/Test_smallcrush.cc +++ b/tests/testu01/Test_smallcrush.cc @@ -32,7 +32,9 @@ using namespace Grid; using namespace Grid::QCD; // Wrap Grid's parallel RNG for testU01 -#define BIG_CRUSH // Big crush enable (long running) +#undef BIG_CRUSH // Big crush enable (long running) +#define MIDDLE_CRUSH // Big crush enable (long running) +#undef SMALL_CRUSH // Big crush enable (long running) #undef TEST_RNG_STANDALONE // Test serial RNGs in isolation extern "C" { @@ -154,12 +156,19 @@ int main (int argc, char ** argv) std::cout << GridLogMessage<< "Testing Grid BigCrush for "<< std::string(TestRNG::name) < Date: Wed, 5 Apr 2017 02:18:15 +0900 Subject: [PATCH 12/13] UID fix --- lib/communicator/Communicator_mpi3.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 7685768c..6e40142c 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -203,7 +203,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; - sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r); + sprintf(shm_name,"/Grid_mpi3_uid%d_shm_%d_%d",getuid(),GroupRank,r); shm_unlink(shm_name); int fd=shm_open(shm_name,O_RDWR|O_CREAT,0660); @@ -224,7 +224,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { for(int r=0;r Date: Wed, 5 Apr 2017 02:35:34 +0900 Subject: [PATCH 13/13] Creation mode better implementation --- lib/communicator/Communicator_mpi3.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 6e40142c..a8bffc14 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -203,10 +203,10 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; - sprintf(shm_name,"/Grid_mpi3_uid%d_shm_%d_%d",getuid(),GroupRank,r); + sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r); shm_unlink(shm_name); - int fd=shm_open(shm_name,O_RDWR|O_CREAT,0660); + int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); if ( fd < 0 ) { perror("failed shm_open"); assert(0); } ftruncate(fd, size); @@ -224,9 +224,9 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { for(int r=0;r