diff --git a/lib/lattice/Lattice_rng.h b/lib/lattice/Lattice_rng.h index 3d653d17..31fef729 100644 --- a/lib/lattice/Lattice_rng.h +++ b/lib/lattice/Lattice_rng.h @@ -69,40 +69,6 @@ namespace Grid { return multiplicity; } - // Wrap seed_seq to give common interface with random_device - // Should rather wrap random_device and have a generate - class fixedSeed { - public: - - typedef std::seed_seq::result_type result_type; - - std::seed_seq src; - - template fixedSeed(const std::vector &seeds) : src(seeds.begin(),seeds.end()) {}; - - template< class RandomIt > void generate( RandomIt begin, RandomIt end ) { - src.generate(begin,end); - } - - }; - - - class deviceSeed { - public: - - std::random_device rd; - - typedef std::random_device::result_type result_type; - - deviceSeed(void) : rd(){}; - - template< class RandomIt > void generate( RandomIt begin, RandomIt end ) { - for(RandomIt it=begin; it!=end;it++){ - *it = rd(); - } - } - }; - // real scalars are one component template void fillScalar(scalar &s,distribution &dist,generator & gen) { @@ -118,67 +84,100 @@ namespace Grid { } class GridRNGbase { - public: - - int _seeded; // One generator per site. // Uniform and Gaussian distributions from these generators. #ifdef RNG_RANLUX - typedef uint64_t RngStateType; typedef std::ranlux48 RngEngine; + typedef uint64_t RngStateType; static const int RngStateCount = 15; -#elif RNG_MT19937 +#endif +#ifdef RNG_MT19937 typedef std::mt19937 RngEngine; typedef uint32_t RngStateType; static const int RngStateCount = std::mt19937::state_size; -#elif RNG_SITMO +#endif +#ifdef RNG_SITMO typedef sitmo::prng_engine RngEngine; typedef uint64_t RngStateType; static const int RngStateCount = 4; #endif - std::vector _generators; - std::vector> _uniform; - std::vector> _gaussian; - std::vector> _bernoulli; + /////////////////////// + // support for parallel init + /////////////////////// +#ifdef RNG_SITMO + static void Skip(RngEngine &eng) + { + uint64_t skip = 0x1; skip = skip<<40; + eng.discard(skip); + } +#endif + static RngEngine Reseed(RngEngine &eng) + { + const int reseeds=4; + std::uniform_int_distribution uid; + std::vector newseed(reseeds); + for(int i=0;i & saved,int gen) { + std::vector _generators; + std::vector > _uniform; + std::vector > _gaussian; + std::vector > _bernoulli; + std::vector > _uid; + + void GetState(std::vector & saved,RngEngine &eng) { saved.resize(RngStateCount); std::stringstream ss; - ss<<_generators[gen]; + ss<>saved[i]; } } - void SetState(std::vector & saved,int gen){ + void GetState(std::vector & saved,int gen) { + GetState(saved,_generators[gen]); + } + void SetState(std::vector & saved,RngEngine &eng){ assert(saved.size()==RngStateCount); std::stringstream ss; for(int i=0;i>_generators[gen]; + ss>>eng; } + void SetState(std::vector & saved,int gen){ + SetState(saved,_generators[gen]); + } + void SetEngine(RngEngine &Eng, int gen){ + _generators[gen]=Eng; + } + void GetEngine(RngEngine &Eng, int gen){ + Eng=_generators[gen]; + } + + template void Seed(source &src, int gen) + { + _generators[gen] = RngEngine(src); + } }; class GridSerialRNG : public GridRNGbase { public: - // FIXME ... do we require lockstep draws of randoms - // from all nodes keeping seeds consistent. - // place a barrier/broadcast in the fill routine - GridSerialRNG() : GridRNGbase() { _generators.resize(1); _uniform.resize(1,std::uniform_real_distribution{0,1}); _gaussian.resize(1,std::normal_distribution(0.0,1.0) ); _bernoulli.resize(1,std::discrete_distribution{1,1}); - _seeded=0; + _uid.resize(1,std::uniform_int_distribution() ); } - - template inline void fill(sobj &l,std::vector &dist){ typedef typename sobj::scalar_type scalar_type; @@ -195,7 +194,6 @@ namespace Grid { CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); }; - template inline void fill(ComplexF &l,std::vector &dist){ dist[0].reset(); fillScalar(l,dist[0],_generators[0]); @@ -250,19 +248,10 @@ namespace Grid { CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); } - template void Seed(source &src) - { - _generators[0] = RngEngine(src); - _seeded=1; - } - void SeedRandomDevice(void){ - deviceSeed src; - Seed(src); - } void SeedFixedIntegers(const std::vector &seeds){ CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); - fixedSeed src(seeds); - Seed(src); + std::seed_seq src(seeds.begin(),seeds.end()); + Seed(src,0); } }; @@ -285,15 +274,9 @@ namespace Grid { _uniform.resize(_vol,std::uniform_real_distribution{0,1}); _gaussian.resize(_vol,std::normal_distribution(0.0,1.0) ); _bernoulli.resize(_vol,std::discrete_distribution{1,1}); - _seeded=0; + _uid.resize(_vol,std::uniform_int_distribution() ); } - - - //FIXME implement generic IO and create state save/restore - //void SaveState(const std::string &file); - //void LoadState(const std::string &file); - template inline void fill(Lattice &l,std::vector &dist){ typedef typename vobj::scalar_object scalar_object; @@ -329,79 +312,88 @@ namespace Grid { } }; - // This loop could be made faster to avoid the Ahmdahl by - // i) seed generators on each timeslice, for x=y=z=0; - // ii) seed generators on each z for x=y=0 - // iii)seed generators on each y,z for x=0 - // iv) seed generators on each y,z,x - // made possible by physical indexing. - template void Seed(source &src) - { + void SeedFixedIntegers(const std::vector &seeds){ - typedef typename source::result_type seed_t; - std::uniform_int_distribution uid; + CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); - int numseed=4; - int gsites = _grid->_gsites; - std::vector site_init(numseed); + std::seed_seq source(seeds.begin(),seeds.end()); + + RngEngine master_engine(source); + +#ifdef RNG_SITMO std::vector gcoor; + for(int gidx=0;gidx<_grid->_gsites;gidx++){ - // Master RngEngine - std::vector master_init(numseed); src.generate(master_init.begin(),master_init.end()); - _grid->Broadcast(0,(void *)&master_init[0],sizeof(seed_t)*numseed); - fixedSeed master_seed(master_init); - RngEngine master_engine(master_seed); + Skip(master_engine); // advance the state; does this work? - // Per node RngEngine - std::vector node_init(numseed); - for(int r=0;r<_grid->ProcessorCount();r++) { - - std::vector rank_init(numseed); - for(int i=0;iThisRank() ) { - for(int i=0;iGlobalIndexToGlobalCoor(gidx,gcoor); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); if( rank == _grid->ThisRank() ){ int l_idx=generator_idx(o_idx,i_idx); - for(int i=0;iProcessorCount(); + int me= _grid->ThisRank(); + std::vector seeders(Nproc); + + for(int p=0;p seeders(Nthread); + for(int t=0;tlSites();l++) { + if ( (l%Nthread)==t ) { + _generators[l] = Reseed(master_engine); + } + } } } - _seeded=1; - } - void SeedRandomDevice(void){ - deviceSeed src; - Seed(src); +#endif } - void SeedFixedIntegers(const std::vector &seeds){ - CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); - fixedSeed src(seeds); - Seed(src); + + uint32_t GlobalU01(int gsite){ + + std::vector gcoor; + _grid->GlobalIndexToGlobalCoor(gsite,gcoor); + + int rank,o_idx,i_idx; + _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + + int l_idx=generator_idx(o_idx,i_idx); + + uint32_t the_number; + if( rank == _grid->ThisRank() ){ + the_number = _uid[l_idx](_generators[l_idx]); + } + + _grid->Broadcast(rank,(void *)&the_number,sizeof(the_number)); + + return the_number; } + }; template inline void random(GridParallelRNG &rng,Lattice &l){