/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/lattice/Lattice_rng.h Copyright (C) 2015 Author: Peter Boyle Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #ifndef GRID_LATTICE_RNG_H #define GRID_LATTICE_RNG_H #include namespace Grid { ////////////////////////////////////////////////////////////// // Allow the RNG state to be less dense than the fine grid ////////////////////////////////////////////////////////////// inline int RNGfillable(GridBase *coarse,GridBase *fine) { int rngdims = coarse->_ndimension; // trivially extended in higher dims, with locality guaranteeing RNG state is local to node int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0); for(int d=0;d_simd_layout[d]==1); assert(fine->_processors[d]==1); } int multiplicity=1; for(int d=0;d_rdimensions[d]; } // local and global volumes subdivide cleanly after SIMDization for(int d=0;d_processors[d] == fine->_processors[fd]); assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]); assert(((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d])==fine->_rdimensions[fd]); multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d]; } return multiplicity; } inline int RNGfillable_general(GridBase *coarse,GridBase *fine) { int rngdims = coarse->_ndimension; // trivially extended in higher dims, with locality guaranteeing RNG state is local to node int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0); // assumes that the higher dimensions are not using more processors // all further divisions are local for(int d=0;d_processors[d]==1); for(int d=0;d_processors[d] == fine->_processors[d+lowerdims]); // then divide the number of local sites // check that the total number of sims agree, meanse the iSites are the same assert(fine->Nsimd() == coarse->Nsimd()); // check that the two grids divide cleanly assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() ); return fine->lSites() / coarse->lSites(); } // Wrap seed_seq to give common interface with random_device class fixedSeed { public: typedef std::seed_seq::result_type result_type; std::seed_seq src; fixedSeed(const std::vector &seeds) : src(seeds.begin(),seeds.end()) {}; result_type operator () (void){ std::vector list(1); src.generate(list.begin(),list.end()); return list[0]; } }; // real scalars are one component template void fillScalar(scalar &s,distribution &dist,generator & gen) { s=dist(gen); } template void fillScalar(ComplexF &s,distribution &dist, generator &gen) { s=ComplexF(dist(gen),dist(gen)); } template void fillScalar(ComplexD &s,distribution &dist,generator &gen) { s=ComplexD(dist(gen),dist(gen)); } class GridRNGbase { public: int _seeded; // One generator per site. // Uniform and Gaussian distributions from these generators. #ifdef RNG_RANLUX typedef uint64_t RngStateType; typedef std::ranlux48 RngEngine; static const int RngStateCount = 15; #else typedef std::mt19937 RngEngine; typedef uint32_t RngStateType; static const int RngStateCount = std::mt19937::state_size; #endif std::vector _generators; std::vector> _uniform; std::vector> _gaussian; std::vector> _bernoulli; void GetState(std::vector & saved,int gen) { saved.resize(RngStateCount); std::stringstream ss; ss<<_generators[gen]; ss.seekg(0,ss.beg); for(int i=0;i>saved[i]; } } void SetState(std::vector & saved,int gen){ assert(saved.size()==RngStateCount); std::stringstream ss; for(int i=0;i>_generators[gen]; } }; class GridSerialRNG : public GridRNGbase { public: // FIXME ... do we require lockstep draws of randoms // from all nodes keeping seeds consistent. // place a barrier/broadcast in the fill routine template void Seed(source &src) { typename source::result_type init = src(); CartesianCommunicator::BroadcastWorld(0,(void *)&init,sizeof(init)); _generators[0] = RngEngine(init); _seeded=1; } GridSerialRNG() : GridRNGbase() { _generators.resize(1); _uniform.resize(1,std::uniform_real_distribution{0,1}); _gaussian.resize(1,std::normal_distribution(0.0,1.0) ); _bernoulli.resize(1,std::discrete_distribution{1,1}); _seeded=0; } template inline void fill(sobj &l,std::vector &dist){ typedef typename sobj::scalar_type scalar_type; int words = sizeof(sobj)/sizeof(scalar_type); scalar_type *buf = (scalar_type *) & l; dist[0].reset(); for(int idx=0;idx inline void fill(ComplexF &l,std::vector &dist){ dist[0].reset(); fillScalar(l,dist[0],_generators[0]); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); } template inline void fill(ComplexD &l,std::vector &dist){ dist[0].reset(); fillScalar(l,dist[0],_generators[0]); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); } template inline void fill(RealF &l,std::vector &dist){ dist[0].reset(); fillScalar(l,dist[0],_generators[0]); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); } template inline void fill(RealD &l,std::vector &dist){ dist[0].reset(); fillScalar(l,dist[0],_generators[0]); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); } // vector fill template inline void fill(vComplexF &l,std::vector &dist){ RealF *pointer=(RealF *)&l; dist[0].reset(); for(int i=0;i<2*vComplexF::Nsimd();i++){ fillScalar(pointer[i],dist[0],_generators[0]); } CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); } template inline void fill(vComplexD &l,std::vector &dist){ RealD *pointer=(RealD *)&l; dist[0].reset(); for(int i=0;i<2*vComplexD::Nsimd();i++){ fillScalar(pointer[i],dist[0],_generators[0]); } CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); } template inline void fill(vRealF &l,std::vector &dist){ RealF *pointer=(RealF *)&l; dist[0].reset(); for(int i=0;i inline void fill(vRealD &l,std::vector &dist){ RealD *pointer=(RealD *)&l; dist[0].reset(); for(int i=0;i &seeds){ fixedSeed src(seeds); Seed(src); } }; class GridParallelRNG : public GridRNGbase { double _time_counter; public: GridBase *_grid; unsigned int _vol; int generator_idx(int os,int is){ return is*_grid->oSites()+os; } GridParallelRNG(GridBase *grid) : GridRNGbase() { _grid = grid; _vol =_grid->iSites()*_grid->oSites(); _generators.resize(_vol); _uniform.resize(_vol,std::uniform_real_distribution{0,1}); _gaussian.resize(_vol,std::normal_distribution(0.0,1.0) ); _bernoulli.resize(_vol,std::discrete_distribution{1,1}); _seeded = 0; _time_counter = 0.0; } // This loop could be made faster to avoid the Ahmdahl by // i) seed generators on each timeslice, for x=y=z=0; // ii) seed generators on each z for x=y=0 // iii)seed generators on each y,z for x=0 // iv) seed generators on each y,z,x // made possible by physical indexing. template void Seed(source &src) { std::vector gcoor; int gsites = _grid->_gsites; typename source::result_type init = src(); RngEngine pseeder(init); std::uniform_int_distribution ui; for(int gidx=0;gidxGlobalIndexToGlobalCoor(gidx,gcoor); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); int l_idx=generator_idx(o_idx,i_idx); const int num_rand_seed=16; std::vector site_seeds(num_rand_seed); for(int i=0;iBroadcast(0,(void *)&site_seeds[0],sizeof(int)*site_seeds.size()); if( rank == _grid->ThisRank() ){ fixedSeed ssrc(site_seeds); typename source::result_type sinit = ssrc(); _generators[l_idx] = RngEngine(sinit); } } _seeded=1; } //FIXME implement generic IO and create state save/restore //void SaveState(const std::string &file); //void LoadState(const std::string &file); template inline void fill(Lattice &l, std::vector &dist) { typedef typename vobj::scalar_object scalar_object; typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; double inner_time_counter = usecond(); int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid int Nsimd = _grid->Nsimd();// guaranteed to be the same for l._grid too int osites = _grid->oSites();// guaranteed to be <= l._grid->oSites() by a factor multiplicity int words = sizeof(scalar_object) / sizeof(scalar_type); PARALLEL_FOR_LOOP for (int ss = 0; ss < osites; ss++) { std::vector buf(Nsimd); for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times int sm = multiplicity * ss + m; // Maps the generator site to the fine site for (int si = 0; si < Nsimd; si++) { int gdx = generator_idx(ss, si); // index of generator state scalar_type *pointer = (scalar_type *)&buf[si]; dist[gdx].reset(); for (int idx = 0; idx < words; idx++) fillScalar(pointer[idx], dist[gdx], _generators[gdx]); } // merge into SIMD lanes, FIXME suboptimal implementation merge(l._odata[sm], buf); } } _time_counter += usecond()- inner_time_counter; }; void SeedRandomDevice(void) { std::random_device rd; Seed(rd); } void SeedFixedIntegers(const std::vector &seeds) { fixedSeed src(seeds); Seed(src); } void Report(){ std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl; } }; template inline void random(GridParallelRNG &rng,Lattice &l){ rng.fill(l,rng._uniform); } template inline void gaussian(GridParallelRNG &rng,Lattice &l){ rng.fill(l,rng._gaussian); } template inline void bernoulli(GridParallelRNG &rng,Lattice &l){ rng.fill(l,rng._bernoulli); } template inline void random(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._uniform); } template inline void gaussian(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._gaussian); } template inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); } } #endif