From 42f167ea37e526568db9e8cbfd0a980ab17578b2 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sun, 19 Apr 2015 14:55:58 +0100 Subject: [PATCH] Rework of RNG to use C++11 random. Should work correctly maintaining parallel RNG across a machine. If a "fixedSeed" is used, randoms should be reproducible across different machine decomposition since the generators are physically indexed and assigned in lexico ordering. --- lib/cartesian/Grid_cartesian_base.h | 51 +++++--- lib/cartesian/Grid_cartesian_full.h | 6 +- lib/cartesian/Grid_cartesian_red_black.h | 9 +- lib/lattice/Grid_lattice_conformable.h | 4 + lib/lattice/Grid_lattice_rng.h | 144 +++++++++++++++++++---- tests/Grid_main.cc | 25 ++-- tests/test_Grid_stencil.cc | 7 +- 7 files changed, 191 insertions(+), 55 deletions(-) diff --git a/lib/cartesian/Grid_cartesian_base.h b/lib/cartesian/Grid_cartesian_base.h index 2731d9a5..9ee10293 100644 --- a/lib/cartesian/Grid_cartesian_base.h +++ b/lib/cartesian/Grid_cartesian_base.h @@ -6,22 +6,25 @@ namespace Grid{ -class GridBase : public CartesianCommunicator { + class GridRNG ; // Forward declaration; + + class GridBase : public CartesianCommunicator { public: - // Give Lattice access - template friend class Lattice; - - GridBase(std::vector & processor_grid) : CartesianCommunicator(processor_grid) {}; + // Give Lattice access + template friend class Lattice; - - //FIXME - // protected: - // Lattice wide random support. not yet fully implemented. Need seed strategy - // and one generator per site. - // std::default_random_engine generator; - // static std::mt19937 generator( 9 ); - + GridRNG *_rng; + + GridBase(std::vector & processor_grid) : CartesianCommunicator(processor_grid) {}; + + //FIXME + // protected: + // Lattice wide random support. not yet fully implemented. Need seed strategy + // and one generator per site. + // std::default_random_engine generator; + // static std::mt19937 generator( 9 ); + ////////////////////////////////////////////////////////////////////// // Commicator provides information on the processor grid ////////////////////////////////////////////////////////////////////// @@ -32,7 +35,7 @@ public: ////////////////////////////////////////////////////////////////////// // Physics Grid information. - std::vector _simd_layout; // Which dimensions get relayed out over simd lanes. + std::vector _simd_layout;// Which dimensions get relayed out over simd lanes. std::vector _fdimensions;// Global dimensions of array prior to cb removal std::vector _gdimensions;// Global dimensions of array after cb removal std::vector _ldimensions;// local dimensions of array with processor images removed @@ -41,6 +44,8 @@ public: std::vector _istride; // Inner stride i.e. within simd lane int _osites; // _isites*_osites = product(dimensions). int _isites; + int _fsites; // _isites*_osites = product(dimensions). + int _gsites; std::vector _slice_block; // subslice information std::vector _slice_stride; std::vector _slice_nblock; @@ -149,6 +154,21 @@ public: //////////////////////////////////////////////////////////////// // Global addressing //////////////////////////////////////////////////////////////// + void GlobalIndexToGlobalCoor(int gidx,std::vector &gcoor){ + gcoor.resize(_ndimension); + for(int d=0;d<_ndimension;d++){ + gcoor[d] = gidx % _gdimensions[d]; + gidx = gidx / _gdimensions[d]; + } + } + void GlobalCoorToGlobalIndex(const std::vector & gcoor,int & gidx){ + gidx=0; + int mult=1; + for(int mu=0;mu<_ndimension;mu++) { + gidx+=mult*gcoor[mu]; + mult*=_gdimensions[mu]; + } + } void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector &gcoor) { gcoor.resize(_ndimension); @@ -194,7 +214,8 @@ public: i_idx= iIndex(lcoor); o_idx= oIndex(lcoor); } - }; + + } #endif diff --git a/lib/cartesian/Grid_cartesian_full.h b/lib/cartesian/Grid_cartesian_full.h index 383bf14f..73bd08b3 100644 --- a/lib/cartesian/Grid_cartesian_full.h +++ b/lib/cartesian/Grid_cartesian_full.h @@ -43,12 +43,14 @@ public: _ostride.resize(_ndimension); _istride.resize(_ndimension); - _osites = 1; - _isites = 1; + _fsites = _gsites = _osites = _isites = 1; + for(int d=0;d<_ndimension;d++){ _fdimensions[d] = dimensions[d]; // Global dimensions _gdimensions[d] = _fdimensions[d]; // Global dimensions _simd_layout[d] = simd_layout[d]; + _fsites = _fsites * _fdimensions[d]; + _gsites = _gsites * _gdimensions[d]; //FIXME check for exact division diff --git a/lib/cartesian/Grid_cartesian_red_black.h b/lib/cartesian/Grid_cartesian_red_black.h index e85fd3f7..55bd1f20 100644 --- a/lib/cartesian/Grid_cartesian_red_black.h +++ b/lib/cartesian/Grid_cartesian_red_black.h @@ -62,14 +62,17 @@ public: _ostride.resize(_ndimension); _istride.resize(_ndimension); - _osites = 1; - _isites = 1; + _fsites = _gsites = _osites = _isites = 1; + for(int d=0;d<_ndimension;d++){ _fdimensions[d] = dimensions[d]; _gdimensions[d] = _fdimensions[d]; + _fsites = _fsites * _fdimensions[d]; + _gsites = _gsites * _gdimensions[d]; + if (d==0) _gdimensions[0] = _gdimensions[0]/2; // Remove a checkerboard _ldimensions[d] = _gdimensions[d]/_processors[d]; - + // Use a reduced simd grid _simd_layout[d] = simd_layout[d]; _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; diff --git a/lib/lattice/Grid_lattice_conformable.h b/lib/lattice/Grid_lattice_conformable.h index b9af33b4..faa8c7a7 100644 --- a/lib/lattice/Grid_lattice_conformable.h +++ b/lib/lattice/Grid_lattice_conformable.h @@ -9,6 +9,10 @@ namespace Grid { assert(lhs._grid == rhs._grid); assert(lhs.checkerboard == rhs.checkerboard); } + void inline conformable(const GridBase *lhs,GridBase *rhs) + { + assert(lhs == rhs); + } } #endif diff --git a/lib/lattice/Grid_lattice_rng.h b/lib/lattice/Grid_lattice_rng.h index 636bfb72..24627c92 100644 --- a/lib/lattice/Grid_lattice_rng.h +++ b/lib/lattice/Grid_lattice_rng.h @@ -1,32 +1,136 @@ #ifndef GRID_LATTICE_RNG_H #define GRID_LATTICE_RNG_H +#include + namespace Grid { - // FIXME Randomise; deprecate this - template inline void random(Lattice &l){ - Real *v_ptr = (Real *)&l._odata[0]; - size_t v_len = l._grid->oSites()*sizeof(vobj); - size_t d_len = v_len/sizeof(Real); - - for(int i=0;i inline void gaussian(Lattice &l){ - // Zero mean, unit variance. - std::normal_distribution distribution(0.0,1.0); - Real *v_ptr = (Real *)&l._odata[0]; - size_t v_len = l._grid->oSites()*sizeof(vobj); - size_t d_len = v_len/sizeof(Real); + fixedSeed(std::vector &seeds) : src(seeds.begin(),seeds.end()) {}; - for(int i=0;i list(1); + + src.generate(list.begin(),list.end()); + + return list[0]; + + } + + }; + class GridRNG { + public: + // One generator per site. + std::vector _generators; + + // Uniform and Gaussian distributions from these generators. + std::uniform_real_distribution _uniform; + std::normal_distribution _gaussian; + + GridBase *_grid; + int _vol; + + int generator_idx(int os,int is){ + return is*_grid->oSites()+os; + } + + GridRNG(GridBase *grid) : _uniform{0,1}, _gaussian(0.0,1.0) { + _grid=grid; + _vol =_grid->iSites()*_grid->oSites(); + _generators.resize(_vol); + // SeedFixedIntegers(seeds); + // worst case we seed properly but non-deterministically + SeedRandomDevice(); + } + + // FIXME: drive seeding from node zero and transmit to all + // to get unique randoms on each node + void SeedRandomDevice(void){ + std::random_device rd; + Seed(rd); + } + void SeedFixedIntegers(std::vector &seeds){ + fixedSeed src(seeds); + Seed(src); + } + + // This loop could be made faster to avoid the Ahmdahl by + // i) seed generators on each timeslice, for x=y=z=0; + // ii) seed generators on each z for x=y=0 + // iii)seed generators on each y,z for x=0 + // iv) seed generators on each y,z,x + // made possible by physical indexing. + template void Seed(source &src) + { + std::vector gcoor; + + for(int gidx=0;gidx<_grid->_gsites;gidx++){ + int rank,o_idx,i_idx; + _grid->GlobalIndexToGlobalCoor(gidx,gcoor); + _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + + int l_idx=generator_idx(o_idx,i_idx); + + typename source::result_type init = src(); + + _grid->Broadcast(0,(void *)&init,sizeof(init)); + if( rank == _grid->ThisRank() ){ + _generators[l_idx] = std::ranlux48(init); + } + } + } + + //FIXME implement generic IO and create state save/restore + //void SaveState(const std::string &file); + //void LoadState(const std::string &file); + + template inline void fill(Lattice &l,distribution &dist){ + + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + conformable(_grid,l._grid); + + int Nsimd =_grid->Nsimd(); + int osites=_grid->oSites(); + + int words = sizeof(vobj)/sizeof(vector_type); + std::vector > buf(Nsimd,std::vector(words)); + std::vector pointers(Nsimd); + + for(int ss=0;ss inline void random(GridRNG &rng,Lattice &l){ + rng.fill(l,rng._uniform); + } + + template inline void gaussian(GridRNG &rng,Lattice &l){ + rng.fill(l,rng._gaussian); + } } #endif diff --git a/tests/Grid_main.cc b/tests/Grid_main.cc index bd206ea1..83cacf92 100644 --- a/tests/Grid_main.cc +++ b/tests/Grid_main.cc @@ -57,7 +57,8 @@ int main (int argc, char ** argv) GridCartesian Fine(latt_size,simd_layout,mpi_layout); GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout); - + GridRNG FineRNG(&Fine); + LatticeColourMatrix Foo(&Fine); LatticeColourMatrix Bar(&Fine); @@ -91,17 +92,17 @@ int main (int argc, char ** argv) iSpinMatrix iGammaFive; ColourMatrix cmat; - random(Foo); - gaussian(Bar); - random(scFoo); - random(scBar); + random(FineRNG,Foo); + gaussian(FineRNG,Bar); + random(FineRNG,scFoo); + random(FineRNG,scBar); - random(cMat); - random(sMat); - random(scMat); - random(cVec); - random(sVec); - random(scVec); + random(FineRNG,cMat); + random(FineRNG,sMat); + random(FineRNG,scMat); + random(FineRNG,cVec); + random(FineRNG,sVec); + random(FineRNG,scVec); fflush(stdout); cVec = cMat * cVec; // LatticeColourVector = LatticeColourMatrix * LatticeColourVector @@ -277,7 +278,7 @@ int main (int argc, char ** argv) peekSite(bar,Bar,coor); for(int r=0;r<3;r++){ for(int c=0;c<3;c++){ - cout<<"bar "<