From 6af459cae4509c19d95c9d9658140b9a4a1ad488 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 31 Mar 2017 17:07:43 +0900 Subject: [PATCH 01/16] Christoph's coefficients. --- tests/core/Test_zmobius_even_odd.cc | 26 ++++++++++++++++++++------ tests/solver/Test_zmobius_cg_prec.cc | 26 ++++++++++++++++++++------ 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/tests/core/Test_zmobius_even_odd.cc b/tests/core/Test_zmobius_even_odd.cc index d547f2f7..867c3359 100644 --- a/tests/core/Test_zmobius_even_odd.cc +++ b/tests/core/Test_zmobius_even_odd.cc @@ -53,7 +53,7 @@ int main (int argc, char ** argv) std::cout< > omegas; +#if 0 for(int i=0;i temp (0.25+0.01*i, imag*0.1); - omegas.push_back(temp); + double imag = 0.; + if (i==0) imag=1.; + if (i==Ls-1) imag=-1.; + std::complex temp (0.25+0.01*i, imag*0.01); + omegas.push_back(temp); } +#else + omegas.push_back( std::complex(1.45806438985048,-0) ); + omegas.push_back( std::complex(1.18231318389348,-0) ); + omegas.push_back( std::complex(0.830951166685955,-0) ); + omegas.push_back( std::complex(0.542352409156791,-0) ); + omegas.push_back( std::complex(0.341985020453729,-0) ); + omegas.push_back( std::complex(0.21137902619029,-0) ); + omegas.push_back( std::complex(0.126074299502912,-0) ); + omegas.push_back( std::complex(0.0990136651962626,-0) ); + omegas.push_back( std::complex(0.0686324988446592,0.0550658530827402) ); + omegas.push_back( std::complex(0.0686324988446592,-0.0550658530827402) ); +#endif + ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,1.,0.); // DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); diff --git a/tests/solver/Test_zmobius_cg_prec.cc b/tests/solver/Test_zmobius_cg_prec.cc index 4ae98d71..922ca8ae 100644 --- a/tests/solver/Test_zmobius_cg_prec.cc +++ b/tests/solver/Test_zmobius_cg_prec.cc @@ -43,7 +43,7 @@ Gamma::Algebra Gmu[] = {Gamma::Algebra::GammaX, Gamma::Algebra::GammaY, Gamma::A int main(int argc, char** argv) { Grid_init(&argc, &argv); - const int Ls = 16; + const int Ls = 10; GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid( GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), @@ -80,13 +80,27 @@ int main(int argc, char** argv) { RealD mass = 0.01; RealD M5 = 1.8; std::vector < std::complex > omegas; +#if 0 for(int i=0;i temp (0.25+0.01*i, imag*0.01); - omegas.push_back(temp); + double imag = 0.; + if (i==0) imag=1.; + if (i==Ls-1) imag=-1.; + std::complex temp (0.25+0.01*i, imag*0.01); + omegas.push_back(temp); } +#else + omegas.push_back( std::complex(1.45806438985048,-0) ); + omegas.push_back( std::complex(1.18231318389348,-0) ); + omegas.push_back( std::complex(0.830951166685955,-0) ); + omegas.push_back( std::complex(0.542352409156791,-0) ); + omegas.push_back( std::complex(0.341985020453729,-0) ); + omegas.push_back( std::complex(0.21137902619029,-0) ); + omegas.push_back( std::complex(0.126074299502912,-0) ); + omegas.push_back( std::complex(0.0990136651962626,-0) ); + omegas.push_back( std::complex(0.0686324988446592,0.0550658530827402) ); + omegas.push_back( std::complex(0.0686324988446592,-0.0550658530827402) ); +#endif + ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,1.,0.); LatticeFermion src_o(FrbGrid); From 7e5faa0f3486c9eba1f8ecc68bd386686a73da5b Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:25:44 +0900 Subject: [PATCH 02/16] Multiple RNGs --- lib/parallelIO/NerscIO.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index afd35236..72b3e32d 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -491,10 +491,15 @@ static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG ¶llel #ifdef RNG_RANLUX header.floating_point = std::string("UINT64"); header.data_type = std::string("RANLUX48"); -#else +#endif +#ifdef RNG_MT19937 header.floating_point = std::string("UINT32"); header.data_type = std::string("MT19937"); #endif +#ifdef RNG_SITMO + header.floating_point = std::string("UINT64"); + header.data_type = std::string("SITMO"); +#endif truncate(file); offset = writeHeader(header,file); @@ -522,10 +527,15 @@ static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel #ifdef RNG_RANLUX assert(format == std::string("UINT64")); assert(data_type == std::string("RANLUX48")); -#else +#endif +#ifdef RNG_MT19937 assert(format == std::string("UINT32")); assert(data_type == std::string("MT19937")); #endif +#ifdef RNG_SITMO + assert(format == std::string("UINT64")); + assert(data_type == std::string("SITMO")); +#endif // depending on datatype, set up munger; // munger is a function of From d1d63a4f2d0bc78c46930b762683c441ef6b226e Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:26:05 +0900 Subject: [PATCH 03/16] sitmo default --- configure.ac | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 95f573f5..3a323a5d 100644 --- a/configure.ac +++ b/configure.ac @@ -321,7 +321,7 @@ AM_CONDITIONAL(BUILD_COMMS_NONE, [ test "${comms_type}X" == "noneX" ]) ############### RNG selection AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\ [Select Random Number Generator to be used])],\ - [ac_RNG=${enable_rng}],[ac_RNG=ranlux48]) + [ac_RNG=${enable_rng}],[ac_RNG=sitmo]) case ${ac_RNG} in ranlux48) @@ -401,6 +401,7 @@ AC_CONFIG_FILES(tests/hadrons/Makefile) AC_CONFIG_FILES(tests/hmc/Makefile) AC_CONFIG_FILES(tests/solver/Makefile) AC_CONFIG_FILES(tests/qdpxx/Makefile) +AC_CONFIG_FILES(tests/testu01/Makefile) AC_CONFIG_FILES(benchmarks/Makefile) AC_CONFIG_FILES(extras/Makefile) AC_CONFIG_FILES(extras/Hadrons/Makefile) From f18f5ed926ac435b4cb14a7cde3b2a4a9fc3887e Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:26:26 +0900 Subject: [PATCH 04/16] Drop random device --- benchmarks/Benchmark_memory_asynch.cc | 3 ++- benchmarks/Benchmark_memory_bandwidth.cc | 8 ++++---- benchmarks/Benchmark_staggered.cc | 2 +- benchmarks/Benchmark_su3.cc | 8 ++++---- benchmarks/Benchmark_wilson.cc | 2 +- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/benchmarks/Benchmark_memory_asynch.cc b/benchmarks/Benchmark_memory_asynch.cc index 9f4f5d61..2124b137 100644 --- a/benchmarks/Benchmark_memory_asynch.cc +++ b/benchmarks/Benchmark_memory_asynch.cc @@ -66,7 +66,8 @@ int main (int argc, char ** argv) Vec tsum; tsum = zero; - GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(std::vector({56,17,89,101})); std::vector stop(threads); Vector sum(threads); diff --git a/benchmarks/Benchmark_memory_bandwidth.cc b/benchmarks/Benchmark_memory_bandwidth.cc index 435af7f4..d57c4df5 100644 --- a/benchmarks/Benchmark_memory_bandwidth.cc +++ b/benchmarks/Benchmark_memory_bandwidth.cc @@ -65,7 +65,7 @@ int main (int argc, char ** argv) uint64_t Nloop=NLOOP; - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeVec z(&Grid); //random(pRNG,z); LatticeVec x(&Grid); //random(pRNG,x); @@ -100,7 +100,7 @@ int main (int argc, char ** argv) int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeVec z(&Grid); //random(pRNG,z); LatticeVec x(&Grid); //random(pRNG,x); @@ -138,7 +138,7 @@ int main (int argc, char ** argv) GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeVec z(&Grid); //random(pRNG,z); LatticeVec x(&Grid); //random(pRNG,x); @@ -173,7 +173,7 @@ int main (int argc, char ** argv) uint64_t Nloop=NLOOP; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeVec z(&Grid); //random(pRNG,z); LatticeVec x(&Grid); //random(pRNG,x); LatticeVec y(&Grid); //random(pRNG,y); diff --git a/benchmarks/Benchmark_staggered.cc b/benchmarks/Benchmark_staggered.cc index 121dc0d5..dc2dcf91 100644 --- a/benchmarks/Benchmark_staggered.cc +++ b/benchmarks/Benchmark_staggered.cc @@ -51,7 +51,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - // pRNG.SeedRandomDevice(); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); typedef typename ImprovedStaggeredFermionR::FermionField FermionField; typename ImprovedStaggeredFermionR::ImplParams params; diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index b6d1d303..c234c301 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -55,7 +55,7 @@ int main (int argc, char ** argv) std::vector latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeColourMatrix z(&Grid);// random(pRNG,z); LatticeColourMatrix x(&Grid);// random(pRNG,x); @@ -88,7 +88,7 @@ int main (int argc, char ** argv) int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeColourMatrix z(&Grid); //random(pRNG,z); LatticeColourMatrix x(&Grid); //random(pRNG,x); @@ -119,7 +119,7 @@ int main (int argc, char ** argv) int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeColourMatrix z(&Grid); //random(pRNG,z); LatticeColourMatrix x(&Grid); //random(pRNG,x); @@ -150,7 +150,7 @@ int main (int argc, char ** argv) int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeColourMatrix z(&Grid); //random(pRNG,z); LatticeColourMatrix x(&Grid); //random(pRNG,x); diff --git a/benchmarks/Benchmark_wilson.cc b/benchmarks/Benchmark_wilson.cc index 11f1353f..671e7654 100644 --- a/benchmarks/Benchmark_wilson.cc +++ b/benchmarks/Benchmark_wilson.cc @@ -69,7 +69,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - // pRNG.SeedRandomDevice(); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); LatticeFermion src (&Grid); random(pRNG,src); LatticeFermion result(&Grid); result=zero; From 9cbcdd65d7c5717a67376d3068970c3657d4c683 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:26:57 +0900 Subject: [PATCH 05/16] No random device seed --- lib/algorithms/CoarsenedMatrix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h index 73f6baff..c2910151 100644 --- a/lib/algorithms/CoarsenedMatrix.h +++ b/lib/algorithms/CoarsenedMatrix.h @@ -425,7 +425,7 @@ namespace Grid { A[p]=zero; } - GridParallelRNG RNG(Grid()); RNG.SeedRandomDevice(); + GridParallelRNG RNG(Grid()); RNG.SeedFixedIntegers(std::vector({55,72,19,17,34})); Lattice > val(Grid()); random(RNG,val); Complex one(1.0); From 935d82f5b10f594f40a46e1d8c0dd042d36b3352 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:27:28 +0900 Subject: [PATCH 06/16] sanity checks --- lib/cartesian/Cartesian_base.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index cea0f3dc..4ca8c580 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -177,9 +177,11 @@ public: // Global addressing //////////////////////////////////////////////////////////////// void GlobalIndexToGlobalCoor(int gidx,std::vector &gcoor){ + assert(gidx< gSites()); Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions); } void LocalIndexToLocalCoor(int lidx,std::vector &lcoor){ + assert(lidx & gcoor,int & gidx){ From 9dc7ca4c3be4bad81bedb7c54e34b26db58fad38 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:28:22 +0900 Subject: [PATCH 07/16] Sitmo fast init --- lib/lattice/Lattice_rng.h | 252 ++++++++++++++++++-------------------- 1 file changed, 122 insertions(+), 130 deletions(-) diff --git a/lib/lattice/Lattice_rng.h b/lib/lattice/Lattice_rng.h index 3d653d17..31fef729 100644 --- a/lib/lattice/Lattice_rng.h +++ b/lib/lattice/Lattice_rng.h @@ -69,40 +69,6 @@ namespace Grid { return multiplicity; } - // Wrap seed_seq to give common interface with random_device - // Should rather wrap random_device and have a generate - class fixedSeed { - public: - - typedef std::seed_seq::result_type result_type; - - std::seed_seq src; - - template fixedSeed(const std::vector &seeds) : src(seeds.begin(),seeds.end()) {}; - - template< class RandomIt > void generate( RandomIt begin, RandomIt end ) { - src.generate(begin,end); - } - - }; - - - class deviceSeed { - public: - - std::random_device rd; - - typedef std::random_device::result_type result_type; - - deviceSeed(void) : rd(){}; - - template< class RandomIt > void generate( RandomIt begin, RandomIt end ) { - for(RandomIt it=begin; it!=end;it++){ - *it = rd(); - } - } - }; - // real scalars are one component template void fillScalar(scalar &s,distribution &dist,generator & gen) { @@ -118,67 +84,100 @@ namespace Grid { } class GridRNGbase { - public: - - int _seeded; // One generator per site. // Uniform and Gaussian distributions from these generators. #ifdef RNG_RANLUX - typedef uint64_t RngStateType; typedef std::ranlux48 RngEngine; + typedef uint64_t RngStateType; static const int RngStateCount = 15; -#elif RNG_MT19937 +#endif +#ifdef RNG_MT19937 typedef std::mt19937 RngEngine; typedef uint32_t RngStateType; static const int RngStateCount = std::mt19937::state_size; -#elif RNG_SITMO +#endif +#ifdef RNG_SITMO typedef sitmo::prng_engine RngEngine; typedef uint64_t RngStateType; static const int RngStateCount = 4; #endif - std::vector _generators; - std::vector> _uniform; - std::vector> _gaussian; - std::vector> _bernoulli; + /////////////////////// + // support for parallel init + /////////////////////// +#ifdef RNG_SITMO + static void Skip(RngEngine &eng) + { + uint64_t skip = 0x1; skip = skip<<40; + eng.discard(skip); + } +#endif + static RngEngine Reseed(RngEngine &eng) + { + const int reseeds=4; + std::uniform_int_distribution uid; + std::vector newseed(reseeds); + for(int i=0;i & saved,int gen) { + std::vector _generators; + std::vector > _uniform; + std::vector > _gaussian; + std::vector > _bernoulli; + std::vector > _uid; + + void GetState(std::vector & saved,RngEngine &eng) { saved.resize(RngStateCount); std::stringstream ss; - ss<<_generators[gen]; + ss<>saved[i]; } } - void SetState(std::vector & saved,int gen){ + void GetState(std::vector & saved,int gen) { + GetState(saved,_generators[gen]); + } + void SetState(std::vector & saved,RngEngine &eng){ assert(saved.size()==RngStateCount); std::stringstream ss; for(int i=0;i>_generators[gen]; + ss>>eng; } + void SetState(std::vector & saved,int gen){ + SetState(saved,_generators[gen]); + } + void SetEngine(RngEngine &Eng, int gen){ + _generators[gen]=Eng; + } + void GetEngine(RngEngine &Eng, int gen){ + Eng=_generators[gen]; + } + + template void Seed(source &src, int gen) + { + _generators[gen] = RngEngine(src); + } }; class GridSerialRNG : public GridRNGbase { public: - // FIXME ... do we require lockstep draws of randoms - // from all nodes keeping seeds consistent. - // place a barrier/broadcast in the fill routine - GridSerialRNG() : GridRNGbase() { _generators.resize(1); _uniform.resize(1,std::uniform_real_distribution{0,1}); _gaussian.resize(1,std::normal_distribution(0.0,1.0) ); _bernoulli.resize(1,std::discrete_distribution{1,1}); - _seeded=0; + _uid.resize(1,std::uniform_int_distribution() ); } - - template inline void fill(sobj &l,std::vector &dist){ typedef typename sobj::scalar_type scalar_type; @@ -195,7 +194,6 @@ namespace Grid { CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); }; - template inline void fill(ComplexF &l,std::vector &dist){ dist[0].reset(); fillScalar(l,dist[0],_generators[0]); @@ -250,19 +248,10 @@ namespace Grid { CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); } - template void Seed(source &src) - { - _generators[0] = RngEngine(src); - _seeded=1; - } - void SeedRandomDevice(void){ - deviceSeed src; - Seed(src); - } void SeedFixedIntegers(const std::vector &seeds){ CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); - fixedSeed src(seeds); - Seed(src); + std::seed_seq src(seeds.begin(),seeds.end()); + Seed(src,0); } }; @@ -285,15 +274,9 @@ namespace Grid { _uniform.resize(_vol,std::uniform_real_distribution{0,1}); _gaussian.resize(_vol,std::normal_distribution(0.0,1.0) ); _bernoulli.resize(_vol,std::discrete_distribution{1,1}); - _seeded=0; + _uid.resize(_vol,std::uniform_int_distribution() ); } - - - //FIXME implement generic IO and create state save/restore - //void SaveState(const std::string &file); - //void LoadState(const std::string &file); - template inline void fill(Lattice &l,std::vector &dist){ typedef typename vobj::scalar_object scalar_object; @@ -329,79 +312,88 @@ namespace Grid { } }; - // This loop could be made faster to avoid the Ahmdahl by - // i) seed generators on each timeslice, for x=y=z=0; - // ii) seed generators on each z for x=y=0 - // iii)seed generators on each y,z for x=0 - // iv) seed generators on each y,z,x - // made possible by physical indexing. - template void Seed(source &src) - { + void SeedFixedIntegers(const std::vector &seeds){ - typedef typename source::result_type seed_t; - std::uniform_int_distribution uid; + CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); - int numseed=4; - int gsites = _grid->_gsites; - std::vector site_init(numseed); + std::seed_seq source(seeds.begin(),seeds.end()); + + RngEngine master_engine(source); + +#ifdef RNG_SITMO std::vector gcoor; + for(int gidx=0;gidx<_grid->_gsites;gidx++){ - // Master RngEngine - std::vector master_init(numseed); src.generate(master_init.begin(),master_init.end()); - _grid->Broadcast(0,(void *)&master_init[0],sizeof(seed_t)*numseed); - fixedSeed master_seed(master_init); - RngEngine master_engine(master_seed); + Skip(master_engine); // advance the state; does this work? - // Per node RngEngine - std::vector node_init(numseed); - for(int r=0;r<_grid->ProcessorCount();r++) { - - std::vector rank_init(numseed); - for(int i=0;iThisRank() ) { - for(int i=0;iGlobalIndexToGlobalCoor(gidx,gcoor); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); if( rank == _grid->ThisRank() ){ int l_idx=generator_idx(o_idx,i_idx); - for(int i=0;iProcessorCount(); + int me= _grid->ThisRank(); + std::vector seeders(Nproc); + + for(int p=0;p seeders(Nthread); + for(int t=0;tlSites();l++) { + if ( (l%Nthread)==t ) { + _generators[l] = Reseed(master_engine); + } + } } } - _seeded=1; - } - void SeedRandomDevice(void){ - deviceSeed src; - Seed(src); +#endif } - void SeedFixedIntegers(const std::vector &seeds){ - CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); - fixedSeed src(seeds); - Seed(src); + + uint32_t GlobalU01(int gsite){ + + std::vector gcoor; + _grid->GlobalIndexToGlobalCoor(gsite,gcoor); + + int rank,o_idx,i_idx; + _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + + int l_idx=generator_idx(o_idx,i_idx); + + uint32_t the_number; + if( rank == _grid->ThisRank() ){ + the_number = _uid[l_idx](_generators[l_idx]); + } + + _grid->Broadcast(rank,(void *)&the_number,sizeof(the_number)); + + return the_number; } + }; template inline void random(GridParallelRNG &rng,Lattice &l){ From 0fade84ab258c454951b383ea04635c3ab95ab7f Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 2 Apr 2017 00:29:40 +0900 Subject: [PATCH 08/16] No random device --- tests/IO/Test_nersc_io.cc | 4 +- tests/Test_cshift.cc | 2 +- tests/Test_simd.cc | 10 +- tests/Test_stencil.cc | 2 +- tests/core/Test_cshift_red_black.cc | 2 +- tests/core/Test_cshift_red_black_rotate.cc | 2 +- tests/core/Test_cshift_rotate.cc | 2 +- tests/core/Test_gamma.cc | 2 +- tests/core/Test_gpwilson_even_odd.cc | 2 +- tests/core/Test_lie_generators.cc | 2 +- tests/core/Test_main.cc | 4 +- tests/core/Test_rng.cc | 4 +- tests/core/Test_staggered.cc | 2 +- tests/core/Test_wilson_even_odd.cc | 2 +- tests/core/Test_wilson_tm_even_odd.cc | 2 +- tests/forces/Test_dwf_gpforce.cc | 4 +- tests/forces/Test_gp_rect_force.cc | 2 +- tests/forces/Test_rect_force.cc | 2 +- tests/forces/Test_wilson_force.cc | 2 +- tests/forces/Test_wilson_force_phiMdagMphi.cc | 2 +- tests/forces/Test_wilson_force_phiMphi.cc | 2 +- tests/testu01/Makefile.am | 3 + tests/testu01/Test_smallcrush.cc | 176 ++++++++++++++++++ 23 files changed, 208 insertions(+), 29 deletions(-) create mode 100644 tests/testu01/Makefile.am create mode 100644 tests/testu01/Test_smallcrush.cc diff --git a/tests/IO/Test_nersc_io.cc b/tests/IO/Test_nersc_io.cc index 0273d02a..e5ea7cec 100644 --- a/tests/IO/Test_nersc_io.cc +++ b/tests/IO/Test_nersc_io.cc @@ -54,8 +54,8 @@ int main (int argc, char ** argv) GridSerialRNG sRNGa; GridSerialRNG sRNGb; - pRNGa.SeedRandomDevice(); - sRNGa.SeedRandomDevice(); + pRNGa.SeedFixedIntegers(std::vector({45,12,81,9}); + sRNGa.SeedFixedIntegers(std::vector({45,12,81,9}); std::string rfile("./ckpoint_rng.4000"); NerscIO::writeRNGState(sRNGa,pRNGa,rfile); diff --git a/tests/Test_cshift.cc b/tests/Test_cshift.cc index e1dd0db8..f9559a83 100644 --- a/tests/Test_cshift.cc +++ b/tests/Test_cshift.cc @@ -41,7 +41,7 @@ int main (int argc, char ** argv) GridCartesian Fine(latt_size,simd_layout,mpi_layout); - GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice(); + GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeComplex U(&Fine); LatticeComplex ShiftU(&Fine); diff --git a/tests/Test_simd.cc b/tests/Test_simd.cc index f19ebd76..b3933ec6 100644 --- a/tests/Test_simd.cc +++ b/tests/Test_simd.cc @@ -125,7 +125,7 @@ template void Tester(const functor &func) { GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); int Nsimd = vec::Nsimd(); @@ -184,7 +184,7 @@ void IntTester(const functor &func) typedef Integer scal; typedef vInteger vec; GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); int Nsimd = vec::Nsimd(); @@ -242,7 +242,7 @@ template void ReductionTester(const functor &func) { GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); int Nsimd = vec::Nsimd(); @@ -343,7 +343,7 @@ template void PermTester(const functor &func) { GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); int Nsimd = vec::Nsimd(); @@ -409,7 +409,7 @@ template void ExchangeTester(const functor &func) { GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); int Nsimd = vec::Nsimd(); diff --git a/tests/Test_stencil.cc b/tests/Test_stencil.cc index 38873310..d7bc5a6c 100644 --- a/tests/Test_stencil.cc +++ b/tests/Test_stencil.cc @@ -52,7 +52,7 @@ int main (int argc, char ** argv) GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout); GridParallelRNG fRNG(&Fine); - // fRNG.SeedRandomDevice(); + // fRNG.SeedFixedIntegers(std::vector({45,12,81,9}); std::vector seeds({1,2,3,4}); fRNG.SeedFixedIntegers(seeds); diff --git a/tests/core/Test_cshift_red_black.cc b/tests/core/Test_cshift_red_black.cc index ae55cece..f9269709 100644 --- a/tests/core/Test_cshift_red_black.cc +++ b/tests/core/Test_cshift_red_black.cc @@ -49,7 +49,7 @@ int main (int argc, char ** argv) GridCartesian Fine (latt_size,simd_layout,mpi_layout); GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout,mask,1); - GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice(); + GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeComplex U(&Fine); LatticeComplex ShiftU(&Fine); diff --git a/tests/core/Test_cshift_red_black_rotate.cc b/tests/core/Test_cshift_red_black_rotate.cc index 4ff7067b..3ef1cd21 100644 --- a/tests/core/Test_cshift_red_black_rotate.cc +++ b/tests/core/Test_cshift_red_black_rotate.cc @@ -49,7 +49,7 @@ int main (int argc, char ** argv) GridCartesian Fine (latt_size,simd_layout,mpi_layout); GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout,mask,1); - GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice(); + GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeComplex err(&Fine); LatticeComplex U(&Fine); diff --git a/tests/core/Test_cshift_rotate.cc b/tests/core/Test_cshift_rotate.cc index a42fd22e..64c08892 100644 --- a/tests/core/Test_cshift_rotate.cc +++ b/tests/core/Test_cshift_rotate.cc @@ -41,7 +41,7 @@ int main (int argc, char ** argv) GridCartesian Fine(latt_size,simd_layout,mpi_layout); - GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice(); + GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeComplex U(&Fine); LatticeComplex ShiftU(&Fine); diff --git a/tests/core/Test_gamma.cc b/tests/core/Test_gamma.cc index ee31a69a..f3805bdb 100644 --- a/tests/core/Test_gamma.cc +++ b/tests/core/Test_gamma.cc @@ -245,7 +245,7 @@ int main(int argc, char *argv[]) GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); std::cout << GridLogMessage << "======== Test algebra" << std::endl; createTestAlgebra(); diff --git a/tests/core/Test_gpwilson_even_odd.cc b/tests/core/Test_gpwilson_even_odd.cc index b8b320d8..fc12fe75 100644 --- a/tests/core/Test_gpwilson_even_odd.cc +++ b/tests/core/Test_gpwilson_even_odd.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) GridParallelRNG pRNG(&Grid); // std::vector seeds({1,2,3,4}); // pRNG.SeedFixedIntegers(seeds); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); typedef typename GparityWilsonFermionR::FermionField FermionField; diff --git a/tests/core/Test_lie_generators.cc b/tests/core/Test_lie_generators.cc index 5623b74b..114b49f7 100644 --- a/tests/core/Test_lie_generators.cc +++ b/tests/core/Test_lie_generators.cc @@ -86,7 +86,7 @@ int main(int argc, char** argv) { // Projectors GridParallelRNG gridRNG(grid); - gridRNG.SeedRandomDevice(); + gridRNG.SeedFixedIntegers(std::vector({45,12,81,9})); SU3Adjoint::LatticeAdjMatrix Gauss(grid); SU3::LatticeAlgebraVector ha(grid); SU3::LatticeAlgebraVector hb(grid); diff --git a/tests/core/Test_main.cc b/tests/core/Test_main.cc index 78c28539..1868b0fe 100644 --- a/tests/core/Test_main.cc +++ b/tests/core/Test_main.cc @@ -89,8 +89,8 @@ int main(int argc, char **argv) { GridSerialRNG SerialRNG; GridSerialRNG SerialRNG1; - FineRNG.SeedRandomDevice(); - SerialRNG.SeedRandomDevice(); + FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + SerialRNG.SeedFixedIntegers(std::vector({45,12,81,9})); std::cout << "SerialRNG" << SerialRNG._generators[0] << std::endl; diff --git a/tests/core/Test_rng.cc b/tests/core/Test_rng.cc index 1fcb3a13..b5d27c29 100644 --- a/tests/core/Test_rng.cc +++ b/tests/core/Test_rng.cc @@ -43,10 +43,10 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); - GridSerialRNG sRNG; sRNG.SeedRandomDevice(); + GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); GridSerialRNG fsRNG; fsRNG.SeedFixedIntegers(seeds); - GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice(); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); GridParallelRNG fpRNG(&Grid); fpRNG.SeedFixedIntegers(seeds); SpinMatrix rnd ; diff --git a/tests/core/Test_staggered.cc b/tests/core/Test_staggered.cc index 89055fc7..75531c83 100644 --- a/tests/core/Test_staggered.cc +++ b/tests/core/Test_staggered.cc @@ -51,7 +51,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - // pRNG.SeedRandomDevice(); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); typedef typename ImprovedStaggeredFermionR::FermionField FermionField; typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField; diff --git a/tests/core/Test_wilson_even_odd.cc b/tests/core/Test_wilson_even_odd.cc index b33bd74d..4933c36e 100644 --- a/tests/core/Test_wilson_even_odd.cc +++ b/tests/core/Test_wilson_even_odd.cc @@ -62,7 +62,7 @@ int main (int argc, char ** argv) GridParallelRNG pRNG(&Grid); // std::vector seeds({1,2,3,4}); // pRNG.SeedFixedIntegers(seeds); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeFermion src (&Grid); random(pRNG,src); LatticeFermion phi (&Grid); random(pRNG,phi); diff --git a/tests/core/Test_wilson_tm_even_odd.cc b/tests/core/Test_wilson_tm_even_odd.cc index 36de83ea..a2773244 100644 --- a/tests/core/Test_wilson_tm_even_odd.cc +++ b/tests/core/Test_wilson_tm_even_odd.cc @@ -61,7 +61,7 @@ int main (int argc, char ** argv) GridParallelRNG pRNG(&Grid); // std::vector seeds({1,2,3,4}); // pRNG.SeedFixedIntegers(seeds); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeFermion src (&Grid); random(pRNG,src); LatticeFermion phi (&Grid); random(pRNG,phi); diff --git a/tests/forces/Test_dwf_gpforce.cc b/tests/forces/Test_dwf_gpforce.cc index 5094b8a7..96a90ff6 100644 --- a/tests/forces/Test_dwf_gpforce.cc +++ b/tests/forces/Test_dwf_gpforce.cc @@ -54,8 +54,8 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); - GridParallelRNG RNG5(FGrid); RNG5.SeedRandomDevice(); - GridParallelRNG RNG4(UGrid); RNG4.SeedRandomDevice(); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(std::vector({45,12,81,9})); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(std::vector({45,12,81,9})); FermionField phi (FGrid); gaussian(RNG5,phi); FermionField Mphi (FGrid); diff --git a/tests/forces/Test_gp_rect_force.cc b/tests/forces/Test_gp_rect_force.cc index 551c3a20..17a7cd2b 100644 --- a/tests/forces/Test_gp_rect_force.cc +++ b/tests/forces/Test_gp_rect_force.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeGaugeField U(&Grid); diff --git a/tests/forces/Test_rect_force.cc b/tests/forces/Test_rect_force.cc index 97281854..2e2f87b2 100644 --- a/tests/forces/Test_rect_force.cc +++ b/tests/forces/Test_rect_force.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeGaugeField U(&Grid); diff --git a/tests/forces/Test_wilson_force.cc b/tests/forces/Test_wilson_force.cc index 60d31b51..52ed00b4 100644 --- a/tests/forces/Test_wilson_force.cc +++ b/tests/forces/Test_wilson_force.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeFermion phi (&Grid); gaussian(pRNG,phi); LatticeFermion Mphi (&Grid); diff --git a/tests/forces/Test_wilson_force_phiMdagMphi.cc b/tests/forces/Test_wilson_force_phiMdagMphi.cc index 7717e9bc..2a5814fe 100644 --- a/tests/forces/Test_wilson_force_phiMdagMphi.cc +++ b/tests/forces/Test_wilson_force_phiMdagMphi.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeFermion phi (&Grid); gaussian(pRNG,phi); LatticeFermion Mphi (&Grid); diff --git a/tests/forces/Test_wilson_force_phiMphi.cc b/tests/forces/Test_wilson_force_phiMphi.cc index c9e56c32..8cfb1de6 100644 --- a/tests/forces/Test_wilson_force_phiMphi.cc +++ b/tests/forces/Test_wilson_force_phiMphi.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); - pRNG.SeedRandomDevice(); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); LatticeFermion phi (&Grid); gaussian(pRNG,phi); LatticeFermion Mphi (&Grid); diff --git a/tests/testu01/Makefile.am b/tests/testu01/Makefile.am new file mode 100644 index 00000000..eb4d1eae --- /dev/null +++ b/tests/testu01/Makefile.am @@ -0,0 +1,3 @@ +AM_LDFLAGS += -L$(LIBRARY_PATH) -ltestu01 -lprobdist -lmylib -lm +AM_CXXFLAGS += -I$(C_INCLUDE_PATH) +include Make.inc diff --git a/tests/testu01/Test_smallcrush.cc b/tests/testu01/Test_smallcrush.cc new file mode 100644 index 00000000..28974855 --- /dev/null +++ b/tests/testu01/Test_smallcrush.cc @@ -0,0 +1,176 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_smallcrush.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +// Wrap Grid's parallel RNG for testU01 + +extern "C" { +#include "TestU01.h" +#include "gdef.h" +#include "unif01.h" +#include "ucarry.h" +#include "bbattery.h" +} + +std::vector EngineRanlux; +std::vector EngineMT; +std::vector EngineSitmo; + +std::uniform_int_distribution uid; + +uint32_t GetU01Ranlux(void) { + return uid(EngineRanlux[0]); +}; +uint32_t GetU01MT(void) { + return uid(EngineMT[0]); +}; +uint32_t GetU01Sitmo(void) { + return uid(EngineSitmo[0]); +}; + +typedef Grid::GridRNGbase::RngEngine RngEngine; + +struct TestRNG { +public: + static GridParallelRNG *pRNG; + static GridSerialRNG *sRNG; + static GridBase *_grid; + static RngEngine Eng; + static uint64_t site; + static uint64_t gsites; + static char *name; + + static void Init(GridParallelRNG *_pRNG,GridSerialRNG *_sRNG,GridBase *grid) { + pRNG = _pRNG; + sRNG = _sRNG; + _grid= grid; + gsites= grid->_gsites; + std::cout << "Init: Global sites is " < uid; + uint32_t ret_val; +#if 0 + ret_val = sRNG->_uid[0](sRNG->_generators[0]); +#else + ret_val = pRNG->GlobalU01(site); + site=(site+1)%gsites; +#endif + // std::cout << "site "< latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + + std::vector seeds({1,2,3,4}); + std::seed_seq seq(seeds.begin(),seeds.end()); + + EngineRanlux.push_back(std::ranlux48(seq)); + EngineMT.push_back(std::mt19937(seq)); + EngineSitmo.push_back(sitmo::prng_engine(seq)); + + std::cout << GridLogMessage<< "Initialising Grid RNGs "<({43,12,7019,9})); + GridSerialRNG sRNG; + sRNG.SeedFixedIntegers(std::vector({102,12,99,15})); + std::cout << GridLogMessage<< "Initialised Grid RNGs "< Date: Sun, 2 Apr 2017 12:10:51 +0900 Subject: [PATCH 09/16] Big/Small crush test, and fast SITMO rng init, faster but not ideal MT and Ranlux init. --- lib/lattice/Lattice_rng.h | 175 ++++++++++++++++++------------- tests/testu01/Test_smallcrush.cc | 42 +++----- 2 files changed, 117 insertions(+), 100 deletions(-) diff --git a/lib/lattice/Lattice_rng.h b/lib/lattice/Lattice_rng.h index 31fef729..ddd2170f 100644 --- a/lib/lattice/Lattice_rng.h +++ b/lib/lattice/Lattice_rng.h @@ -30,12 +30,19 @@ Author: paboyle #define GRID_LATTICE_RNG_H #include + +#ifdef RNG_SITMO #include +#endif + +#if defined(RNG_SITMO) +#define RNG_FAST_DISCARD +#else +#undef RNG_FAST_DISCARD +#endif namespace Grid { - //http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-90Ar1.pdf ? - ////////////////////////////////////////////////////////////// // Allow the RNG state to be less dense than the fine grid ////////////////////////////////////////////////////////////// @@ -65,20 +72,22 @@ namespace Grid { multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d]; } - return multiplicity; } // real scalars are one component - template void fillScalar(scalar &s,distribution &dist,generator & gen) + template + void fillScalar(scalar &s,distribution &dist,generator & gen) { s=dist(gen); } - template void fillScalar(ComplexF &s,distribution &dist, generator &gen) + template + void fillScalar(ComplexF &s,distribution &dist, generator &gen) { s=ComplexF(dist(gen),dist(gen)); } - template void fillScalar(ComplexD &s,distribution &dist,generator &gen) + template + void fillScalar(ComplexD &s,distribution &dist,generator &gen) { s=ComplexD(dist(gen),dist(gen)); } @@ -102,27 +111,6 @@ namespace Grid { typedef uint64_t RngStateType; static const int RngStateCount = 4; #endif - /////////////////////// - // support for parallel init - /////////////////////// -#ifdef RNG_SITMO - static void Skip(RngEngine &eng) - { - uint64_t skip = 0x1; skip = skip<<40; - eng.discard(skip); - } -#endif - static RngEngine Reseed(RngEngine &eng) - { - const int reseeds=4; - std::uniform_int_distribution uid; - std::vector newseed(reseeds); - for(int i=0;i _generators; std::vector > _uniform; @@ -130,6 +118,46 @@ namespace Grid { std::vector > _bernoulli; std::vector > _uid; + /////////////////////// + // support for parallel init + /////////////////////// +#ifdef RNG_FAST_DISCARD + static void Skip(RngEngine &eng) + { + ///////////////////////////////////////////////////////////////////////////////////// + // Skip by 2^40 elements between successive lattice sites + // This goes by 10^12. + // Consider quenched updating; likely never exceeding rate of 1000 sweeps + // per second on any machine. This gives us of order 10^9 seconds, or 100 years + // skip ahead. + // For HMC unlikely to go at faster than a solve per second, and + // tens of seconds per trajectory so this is clean in all reasonable cases, + // and margin of safety is orders of magnitude. + // We could hack Sitmo to skip in the higher order words of state if necessary + ///////////////////////////////////////////////////////////////////////////////////// + uint64_t skip = 0x1; skip = skip<<40; + eng.discard(skip); + } +#endif + static RngEngine Reseed(RngEngine &eng) + { + std::vector newseed; + std::uniform_int_distribution uid; + return Reseed(eng,newseed,uid); + } + static RngEngine Reseed(RngEngine &eng,std::vector & newseed, + std::uniform_int_distribution &uid) + { + const int reseeds=4; + + newseed.resize(reseeds); + for(int i=0;i & saved,RngEngine &eng) { saved.resize(RngStateCount); std::stringstream ss; @@ -160,7 +188,6 @@ namespace Grid { void GetEngine(RngEngine &Eng, int gen){ Eng=_generators[gen]; } - template void Seed(source &src, int gen) { _generators[gen] = RngEngine(src); @@ -190,10 +217,11 @@ namespace Grid { for(int idx=0;idx inline void fill(ComplexF &l,std::vector &dist){ dist[0].reset(); fillScalar(l,dist[0],_generators[0]); @@ -253,14 +281,13 @@ namespace Grid { std::seed_seq src(seeds.begin(),seeds.end()); Seed(src,0); } - }; class GridParallelRNG : public GridRNGbase { public: - GridBase *_grid; int _vol; + public: int generator_idx(int os,int is){ return is*_grid->oSites()+os; @@ -289,7 +316,6 @@ namespace Grid { int osites=_grid->oSites(); int words=sizeof(scalar_object)/sizeof(scalar_type); - parallel_for(int ss=0;ss buf(Nsimd); @@ -314,23 +340,34 @@ namespace Grid { void SeedFixedIntegers(const std::vector &seeds){ + // Everyone generates the same seed_seq based on input seeds CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); std::seed_seq source(seeds.begin(),seeds.end()); RngEngine master_engine(source); -#ifdef RNG_SITMO +#ifdef RNG_FAST_DISCARD + //////////////////////////////////////////////// + // Skip ahead through a single stream. + // Applicable to SITMO and other has based/crypto RNGs + // Should be applicable to Mersenne Twister, but the C++11 + // MT implementation does not implement fast discard even though + // in principle this is possible + //////////////////////////////////////////////// std::vector gcoor; + int rank,o_idx,i_idx; + // Everybody loops over global volume. for(int gidx=0;gidx<_grid->_gsites;gidx++){ - Skip(master_engine); // advance the state; does this work? + Skip(master_engine); // Skip to next RNG sequence - int rank,o_idx,i_idx; + // Where is it? _grid->GlobalIndexToGlobalCoor(gidx,gcoor); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + // If this is one of mine we take it if( rank == _grid->ThisRank() ){ int l_idx=generator_idx(o_idx,i_idx); _generators[l_idx] = master_engine; @@ -338,23 +375,24 @@ namespace Grid { } #else - // Machine and thread decomposition dependent seeding - // is efficient and maximally parallel; but not - // reproducible from machine to machine. Not ideal, but fast. - // Different seed for each node. + //////////////////////////////////////////////////////////////// + // Machine and thread decomposition dependent seeding is efficient + // and maximally parallel; but NOT reproducible from machine to machine. + // Not ideal, but fastest way to reseed all nodes. + //////////////////////////////////////////////////////////////// { + // Obtain one Reseed per processor int Nproc = _grid->ProcessorCount(); - int me= _grid->ThisRank(); std::vector seeders(Nproc); - + int me= _grid->ThisRank(); for(int p=0;p seeders(Nthread); for(int t=0;t newseeds; + std::uniform_int_distribution uid; for(int l=0;l<_grid->lSites();l++) { if ( (l%Nthread)==t ) { - _generators[l] = Reseed(master_engine); + _generators[l] = Reseed(seeders[t],newseeds,uid); } } } } #endif } - + //////////////////////////////////////////////////////////////////////// + // Support for rigorous test of RNG's + // Return uniform random uint32_t from requested site generator + //////////////////////////////////////////////////////////////////////// uint32_t GlobalU01(int gsite){ - std::vector gcoor; - _grid->GlobalIndexToGlobalCoor(gsite,gcoor); + uint32_t the_number; + // who + std::vector gcoor; int rank,o_idx,i_idx; + _grid->GlobalIndexToGlobalCoor(gsite,gcoor); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + // draw int l_idx=generator_idx(o_idx,i_idx); - - uint32_t the_number; if( rank == _grid->ThisRank() ){ the_number = _uid[l_idx](_generators[l_idx]); } - + + // share & return _grid->Broadcast(rank,(void *)&the_number,sizeof(the_number)); - return the_number; } - }; - template inline void random(GridParallelRNG &rng,Lattice &l){ - rng.fill(l,rng._uniform); - } + template inline void random(GridParallelRNG &rng,Lattice &l) { rng.fill(l,rng._uniform); } + template inline void gaussian(GridParallelRNG &rng,Lattice &l) { rng.fill(l,rng._gaussian); } + template inline void bernoulli(GridParallelRNG &rng,Lattice &l){ rng.fill(l,rng._bernoulli);} - template inline void gaussian(GridParallelRNG &rng,Lattice &l){ - rng.fill(l,rng._gaussian); - } - - template inline void bernoulli(GridParallelRNG &rng,Lattice &l){ - rng.fill(l,rng._bernoulli); - } - - template inline void random(GridSerialRNG &rng,sobj &l){ - rng.fill(l,rng._uniform); - } - - template inline void gaussian(GridSerialRNG &rng,sobj &l){ - rng.fill(l,rng._gaussian); - } - - template inline void bernoulli(GridSerialRNG &rng,sobj &l){ - rng.fill(l,rng._bernoulli); - } + template inline void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); } + template inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); } + template inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); } } #endif diff --git a/tests/testu01/Test_smallcrush.cc b/tests/testu01/Test_smallcrush.cc index 28974855..51f0d60c 100644 --- a/tests/testu01/Test_smallcrush.cc +++ b/tests/testu01/Test_smallcrush.cc @@ -32,17 +32,17 @@ using namespace Grid; using namespace Grid::QCD; // Wrap Grid's parallel RNG for testU01 +#define BIG_CRUSH // Big crush enable (long running) +#undef TEST_RNG_STANDALONE // Test serial RNGs in isolation extern "C" { #include "TestU01.h" -#include "gdef.h" -#include "unif01.h" -#include "ucarry.h" -#include "bbattery.h" } std::vector EngineRanlux; std::vector EngineMT; + +#include std::vector EngineSitmo; std::uniform_int_distribution uid; @@ -74,21 +74,13 @@ public: sRNG = _sRNG; _grid= grid; gsites= grid->_gsites; - std::cout << "Init: Global sites is " < uid; uint32_t ret_val; -#if 0 - ret_val = sRNG->_uid[0](sRNG->_generators[0]); -#else ret_val = pRNG->GlobalU01(site); site=(site+1)%gsites; -#endif - // std::cout << "site "< Date: Sun, 2 Apr 2017 23:13:48 +0900 Subject: [PATCH 10/16] Clean up --- tests/testu01/Test_smallcrush.cc | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/testu01/Test_smallcrush.cc b/tests/testu01/Test_smallcrush.cc index 51f0d60c..d09cd577 100644 --- a/tests/testu01/Test_smallcrush.cc +++ b/tests/testu01/Test_smallcrush.cc @@ -32,7 +32,9 @@ using namespace Grid; using namespace Grid::QCD; // Wrap Grid's parallel RNG for testU01 -#define BIG_CRUSH // Big crush enable (long running) +#undef BIG_CRUSH // Big crush enable (long running) +#define MIDDLE_CRUSH // Big crush enable (long running) +#undef SMALL_CRUSH // Big crush enable (long running) #undef TEST_RNG_STANDALONE // Test serial RNGs in isolation extern "C" { @@ -154,12 +156,19 @@ int main (int argc, char ** argv) std::cout << GridLogMessage<< "Testing Grid BigCrush for "<< std::string(TestRNG::name) < Date: Wed, 5 Apr 2017 02:18:15 +0900 Subject: [PATCH 11/16] UID fix --- lib/communicator/Communicator_mpi3.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 7685768c..6e40142c 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -203,7 +203,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; - sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r); + sprintf(shm_name,"/Grid_mpi3_uid%d_shm_%d_%d",getuid(),GroupRank,r); shm_unlink(shm_name); int fd=shm_open(shm_name,O_RDWR|O_CREAT,0660); @@ -224,7 +224,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { for(int r=0;r Date: Wed, 5 Apr 2017 02:35:34 +0900 Subject: [PATCH 12/16] Creation mode better implementation --- lib/communicator/Communicator_mpi3.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 6e40142c..a8bffc14 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -203,10 +203,10 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; - sprintf(shm_name,"/Grid_mpi3_uid%d_shm_%d_%d",getuid(),GroupRank,r); + sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r); shm_unlink(shm_name); - int fd=shm_open(shm_name,O_RDWR|O_CREAT,0660); + int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); if ( fd < 0 ) { perror("failed shm_open"); assert(0); } ftruncate(fd, size); @@ -224,9 +224,9 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { for(int r=0;r Date: Wed, 5 Apr 2017 16:24:04 +0100 Subject: [PATCH 13/16] Correcting names in tests --- tests/core/Test_wilson_even_odd.cc | 9 ++++----- ..._even_odd.cc => Test_wilson_twisted_mass_even_odd.cc} | 9 +++++---- 2 files changed, 9 insertions(+), 9 deletions(-) rename tests/core/{Test_wilson_tm_even_odd.cc => Test_wilson_twisted_mass_even_odd.cc} (97%) diff --git a/tests/core/Test_wilson_even_odd.cc b/tests/core/Test_wilson_even_odd.cc index 4933c36e..a2773244 100644 --- a/tests/core/Test_wilson_even_odd.cc +++ b/tests/core/Test_wilson_even_odd.cc @@ -2,11 +2,10 @@ Grid physics library, www.github.com/paboyle/Grid - Source file: ./tests/Test_wilson_even_odd.cc + Source file: ./tests/Test_wilson_tm_even_odd.cc Copyright (C) 2015 -Author: Peter Boyle Author: paboyle This program is free software; you can redistribute it and/or modify @@ -89,8 +88,8 @@ int main (int argc, char ** argv) } RealD mass=0.1; - RealD mu = 0.1; - WilsonTMFermionR Dw(Umu,Grid,RBGrid,mass,mu); + + WilsonFermionR Dw(Umu,Grid,RBGrid,mass); LatticeFermion src_e (&RBGrid); LatticeFermion src_o (&RBGrid); @@ -207,7 +206,7 @@ int main (int argc, char ** argv) pickCheckerboard(Odd ,phi_o,phi); RealD t1,t2; - SchurDiagMooeeOperator HermOpEO(Dw); + SchurDiagMooeeOperator HermOpEO(Dw); HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); diff --git a/tests/core/Test_wilson_tm_even_odd.cc b/tests/core/Test_wilson_twisted_mass_even_odd.cc similarity index 97% rename from tests/core/Test_wilson_tm_even_odd.cc rename to tests/core/Test_wilson_twisted_mass_even_odd.cc index a2773244..4933c36e 100644 --- a/tests/core/Test_wilson_tm_even_odd.cc +++ b/tests/core/Test_wilson_twisted_mass_even_odd.cc @@ -2,10 +2,11 @@ Grid physics library, www.github.com/paboyle/Grid - Source file: ./tests/Test_wilson_tm_even_odd.cc + Source file: ./tests/Test_wilson_even_odd.cc Copyright (C) 2015 +Author: Peter Boyle Author: paboyle This program is free software; you can redistribute it and/or modify @@ -88,8 +89,8 @@ int main (int argc, char ** argv) } RealD mass=0.1; - - WilsonFermionR Dw(Umu,Grid,RBGrid,mass); + RealD mu = 0.1; + WilsonTMFermionR Dw(Umu,Grid,RBGrid,mass,mu); LatticeFermion src_e (&RBGrid); LatticeFermion src_o (&RBGrid); @@ -206,7 +207,7 @@ int main (int argc, char ** argv) pickCheckerboard(Odd ,phi_o,phi); RealD t1,t2; - SchurDiagMooeeOperator HermOpEO(Dw); + SchurDiagMooeeOperator HermOpEO(Dw); HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); From 86aaa35294b6bcc1dc90441283bd89f434cfc977 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 7 Apr 2017 11:07:40 +0900 Subject: [PATCH 14/16] Christoph needs SchurDiagTwoKappa which is mobius specific. --- lib/qcd/action/fermion/Fermion.h | 1 + lib/qcd/action/fermion/SchurDiagTwoKappa.h | 102 +++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 lib/qcd/action/fermion/SchurDiagTwoKappa.h diff --git a/lib/qcd/action/fermion/Fermion.h b/lib/qcd/action/fermion/Fermion.h index 791afeee..ac42f177 100644 --- a/lib/qcd/action/fermion/Fermion.h +++ b/lib/qcd/action/fermion/Fermion.h @@ -58,6 +58,7 @@ Author: Peter Boyle #include #include #include +#include #include #include #include diff --git a/lib/qcd/action/fermion/SchurDiagTwoKappa.h b/lib/qcd/action/fermion/SchurDiagTwoKappa.h new file mode 100644 index 00000000..8305f98a --- /dev/null +++ b/lib/qcd/action/fermion/SchurDiagTwoKappa.h @@ -0,0 +1,102 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: SchurDiagTwoKappa.h + + Copyright (C) 2017 + +Author: Christoph Lehner +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef _SCHUR_DIAG_TWO_KAPPA_H +#define _SCHUR_DIAG_TWO_KAPPA_H + +namespace Grid { + + // This is specific to (Z)mobius fermions + template + class KappaSimilarityTransform { + public: + INHERIT_IMPL_TYPES(Matrix); + std::vector kappa, kappaDag, kappaInv, kappaInvDag; + + KappaSimilarityTransform (Matrix &zmob) { + for (int i=0;i<(int)zmob.bs.size();i++) { + Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) ); + kappa.push_back( k ); + kappaDag.push_back( conj(k) ); + kappaInv.push_back( 1.0 / k ); + kappaInvDag.push_back( 1.0 / conj(k) ); + } + } + + template + void sscale(const Lattice& in, Lattice& out, Coeff_t* s) { + GridBase *grid=out._grid; + out.checkerboard = in.checkerboard; + assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now + int Ls = grid->_rdimensions[0]; + parallel_for(int ss=0;ssoSites();ss++){ + vobj tmp = s[ss % Ls]*in._odata[ss]; + vstream(out._odata[ss],tmp); + } + } + + RealD sscale_norm(const Field& in, Field& out, Coeff_t* s) { + sscale(in,out,s); + return norm2(out); + } + + virtual RealD M (const Field& in, Field& out) { return sscale_norm(in,out,&kappa[0]); } + virtual RealD MDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaDag[0]);} + virtual RealD MInv (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInv[0]);} + virtual RealD MInvDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInvDag[0]);} + + }; + + template + class SchurDiagTwoKappaOperator : public SchurOperatorBase { + public: + KappaSimilarityTransform _S; + SchurDiagTwoOperator _Mat; + + SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {}; + + virtual RealD Mpc (const Field &in, Field &out) { + Field tmp(in._grid); + + _S.MInv(in,out); + _Mat.Mpc(out,tmp); + return _S.M(tmp,out); + + } + virtual RealD MpcDag (const Field &in, Field &out){ + Field tmp(in._grid); + + _S.MDag(in,out); + _Mat.MpcDag(out,tmp); + return _S.MInvDag(tmp,out); + } + }; + +} + +#endif From 98a24ebf31d071f0d13bc67f165ce904c49db7d7 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 10 Apr 2017 16:58:54 +0100 Subject: [PATCH 15/16] =?UTF-8?q?The=20macro=20=E2=80=9Cmagics=E2=80=9D=20?= =?UTF-8?q?is=20very=20intensive=20for=20the=20preprocessor=20in=20the=20m?= =?UTF-8?q?easurement=20code=20which=20has=20numerous=20serialisable=20cla?= =?UTF-8?q?sses.=20Reducing=20the=20number=20of=20serialisable=20fields=20?= =?UTF-8?q?to=2064=20(instead=20of=201024)=20helps=20a=20lot,=20this=20is?= =?UTF-8?q?=20enough=20for=20now=20and=20can=20be=20extended=20trivially?= =?UTF-8?q?=20if=20needed=20in=20the=20future.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/serialisation/MacroMagic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/serialisation/MacroMagic.h b/lib/serialisation/MacroMagic.h index ca789312..a864989c 100644 --- a/lib/serialisation/MacroMagic.h +++ b/lib/serialisation/MacroMagic.h @@ -54,7 +54,7 @@ THE SOFTWARE. #define GRID_MACRO_EMPTY() -#define GRID_MACRO_EVAL(...) GRID_MACRO_EVAL1024(__VA_ARGS__) +#define GRID_MACRO_EVAL(...) GRID_MACRO_EVAL64(__VA_ARGS__) #define GRID_MACRO_EVAL1024(...) GRID_MACRO_EVAL512(GRID_MACRO_EVAL512(__VA_ARGS__)) #define GRID_MACRO_EVAL512(...) GRID_MACRO_EVAL256(GRID_MACRO_EVAL256(__VA_ARGS__)) #define GRID_MACRO_EVAL256(...) GRID_MACRO_EVAL128(GRID_MACRO_EVAL128(__VA_ARGS__)) From 8ef4300412ee7a3ac028effae9b3a52a732179fe Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 10 Apr 2017 17:00:22 +0100 Subject: [PATCH 16/16] spurious .dirstamp files removed --- lib/algorithms/approx/.dirstamp | 0 lib/communicator/.dirstamp | 0 lib/qcd/action/fermion/.dirstamp | 0 lib/qcd/spin/.dirstamp | 0 lib/qcd/utils/.dirstamp | 0 lib/stencil/.dirstamp | 0 6 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 lib/algorithms/approx/.dirstamp delete mode 100644 lib/communicator/.dirstamp delete mode 100644 lib/qcd/action/fermion/.dirstamp delete mode 100644 lib/qcd/spin/.dirstamp delete mode 100644 lib/qcd/utils/.dirstamp delete mode 100644 lib/stencil/.dirstamp diff --git a/lib/algorithms/approx/.dirstamp b/lib/algorithms/approx/.dirstamp deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/communicator/.dirstamp b/lib/communicator/.dirstamp deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/qcd/action/fermion/.dirstamp b/lib/qcd/action/fermion/.dirstamp deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/qcd/spin/.dirstamp b/lib/qcd/spin/.dirstamp deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/qcd/utils/.dirstamp b/lib/qcd/utils/.dirstamp deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/stencil/.dirstamp b/lib/stencil/.dirstamp deleted file mode 100644 index e69de29b..00000000