Rework of RNG to use C++11 random. Should work correctly maintaining parallel RNG across

a machine. If a "fixedSeed" is used, randoms should be reproducible across different machine decomposition since the generators are physically indexed and assigned in lexico ordering.
2026-05-31 06:24:18 +01:00 · 2015-04-19 14:55:58 +01:00
parent f6ab726cef
commit 42f167ea37
7 changed files with 191 additions and 55 deletions
@@ -6,22 +6,25 @@

 namespace Grid{

-class GridBase : public CartesianCommunicator {
+  class GridRNG ; // Forward declaration;
+
+  class GridBase : public CartesianCommunicator {
 public:

- // Give Lattice access
- template<class object> friend class Lattice;
-  
- GridBase(std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
+    // Give Lattice access
+    template<class object> friend class Lattice;

-        
- //FIXME 
- // protected:
- // Lattice wide random support. not yet fully implemented. Need seed strategy
- // and one generator per site.
- // std::default_random_engine generator;
- //    static std::mt19937  generator( 9 );
- 
+    GridRNG *_rng;
+
+    GridBase(std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
+            
+    //FIXME 
+    // protected:
+    // Lattice wide random support. not yet fully implemented. Need seed strategy
+    // and one generator per site.
+    // std::default_random_engine generator;
+    //    static std::mt19937  generator( 9 );
+    
    //////////////////////////////////////////////////////////////////////
    // Commicator provides information on the processor grid
    //////////////////////////////////////////////////////////////////////
@@ -32,7 +35,7 @@ public:
    //////////////////////////////////////////////////////////////////////

    // Physics Grid information.
-    std::vector<int> _simd_layout;     // Which dimensions get relayed out over simd lanes.
+    std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
    std::vector<int> _fdimensions;// Global dimensions of array prior to cb removal
    std::vector<int> _gdimensions;// Global dimensions of array after cb removal
    std::vector<int> _ldimensions;// local dimensions of array with processor images removed
@@ -41,6 +44,8 @@ public:
    std::vector<int> _istride;    // Inner stride i.e. within simd lane
    int _osites;                  // _isites*_osites = product(dimensions).
    int _isites;
+    int _fsites;                  // _isites*_osites = product(dimensions).
+    int _gsites;
    std::vector<int> _slice_block;   // subslice information
    std::vector<int> _slice_stride;
    std::vector<int> _slice_nblock;
@@ -149,6 +154,21 @@ public:
    ////////////////////////////////////////////////////////////////
    // Global addressing
    ////////////////////////////////////////////////////////////////
+    void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){
+      gcoor.resize(_ndimension);
+      for(int d=0;d<_ndimension;d++){
+	gcoor[d] = gidx % _gdimensions[d];
+	gidx     = gidx / _gdimensions[d];
+      }
+    }
+    void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
+      gidx=0;
+      int mult=1;
+      for(int mu=0;mu<_ndimension;mu++) {
+	gidx+=mult*gcoor[mu];
+	mult*=_gdimensions[mu];
+      }
+    }
    void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
    {
      gcoor.resize(_ndimension);
@@ -194,7 +214,8 @@ public:
      i_idx= iIndex(lcoor);
      o_idx= oIndex(lcoor);
    }
-
 };
+
+
 }
 #endif
@@ -43,12 +43,14 @@ public:
        _ostride.resize(_ndimension);
        _istride.resize(_ndimension);
            
-        _osites = 1;
-        _isites = 1;
+        _fsites = _gsites = _osites = _isites = 1;
+
        for(int d=0;d<_ndimension;d++){
 	  _fdimensions[d] = dimensions[d]; // Global dimensions
 	  _gdimensions[d] = _fdimensions[d]; // Global dimensions
 	  _simd_layout[d] = simd_layout[d];
+	  _fsites = _fsites * _fdimensions[d];
+	  _gsites = _gsites * _gdimensions[d];

 	  //FIXME check for exact division

@@ -62,14 +62,17 @@ public:
        _ostride.resize(_ndimension);
        _istride.resize(_ndimension);
        
-        _osites = 1;
-        _isites = 1;
+        _fsites = _gsites = _osites = _isites = 1;
+
        for(int d=0;d<_ndimension;d++){
            _fdimensions[d] = dimensions[d];
            _gdimensions[d] = _fdimensions[d];
+	    _fsites = _fsites * _fdimensions[d];
+	    _gsites = _gsites * _gdimensions[d];
+                
            if (d==0) _gdimensions[0] = _gdimensions[0]/2; // Remove a checkerboard
            _ldimensions[d] = _gdimensions[d]/_processors[d];
-                
+
            // Use a reduced simd grid
            _simd_layout[d] = simd_layout[d];
            _rdimensions[d]= _ldimensions[d]/_simd_layout[d];
@@ -9,6 +9,10 @@ namespace Grid {
        assert(lhs._grid == rhs._grid);
        assert(lhs.checkerboard == rhs.checkerboard);
    }
+    void inline conformable(const GridBase *lhs,GridBase *rhs)
+    {
+        assert(lhs == rhs);
+    }

 }
 #endif
@@ -1,32 +1,136 @@
 #ifndef GRID_LATTICE_RNG_H
 #define GRID_LATTICE_RNG_H

+#include <random>
+
 namespace Grid {

-    // FIXME Randomise; deprecate this
-    template <class vobj> inline void random(Lattice<vobj> &l){
-        Real *v_ptr = (Real *)&l._odata[0];
-        size_t v_len = l._grid->oSites()*sizeof(vobj);
-        size_t d_len = v_len/sizeof(Real);
-	
-        for(int i=0;i<d_len;i++){
+  // Wrap seed_seq to give common interface with random_device
+  class fixedSeed {
+  public:

-            v_ptr[i]=drand48();
-        }
-    };
+    typedef std::seed_seq::result_type result_type;
+
+    std::seed_seq src;
    
-    // FIXME Implement a consistent seed management strategy
-    template <class vobj> inline void gaussian(Lattice<vobj> &l){
-        // Zero mean, unit variance.
-        std::normal_distribution<double> distribution(0.0,1.0);
-        Real *v_ptr = (Real *)&l._odata[0];
-        size_t v_len = l._grid->oSites()*sizeof(vobj);
-        size_t d_len = v_len/sizeof(Real);
+    fixedSeed(std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};

-        for(int i=0;i<d_len;i++){
-	  v_ptr[i]= drand48();
-        }
+    result_type operator () (void){
+
+      std::vector<result_type> list(1);
+
+      src.generate(list.begin(),list.end());
+
+      return list[0];
+
+    }
+
+  };
+  class GridRNG {
+  public:
+    // One generator per site.
+    std::vector<std::ranlux48>             _generators;
+    
+    // Uniform and Gaussian distributions from these generators.
+    std::uniform_real_distribution<double> _uniform;
+    std::normal_distribution<double>       _gaussian;
+
+    GridBase *_grid;
+    int _vol;
+
+    int generator_idx(int os,int is){
+      return is*_grid->oSites()+os;
+    }
+
+    GridRNG(GridBase *grid) : _uniform{0,1}, _gaussian(0.0,1.0) {
+      _grid=grid;
+      _vol =_grid->iSites()*_grid->oSites();
+      _generators.resize(_vol);
+      //      SeedFixedIntegers(seeds);
+      // worst case we seed properly but non-deterministically
+      SeedRandomDevice();
+    }
+
+    // FIXME: drive seeding from node zero and transmit to all
+    // to get unique randoms on each node
+    void SeedRandomDevice(void){
+      std::random_device rd;
+      Seed(rd);
+    }
+    void SeedFixedIntegers(std::vector<int> &seeds){
+      fixedSeed src(seeds);
+      Seed(src);
+    }
+
+    // This loop could be made faster to avoid the Ahmdahl by
+    // i)  seed generators on each timeslice, for x=y=z=0;
+    // ii) seed generators on each z for x=y=0
+    // iii)seed generators on each y,z for x=0
+    // iv) seed generators on each y,z,x 
+    // made possible by physical indexing.
+    template<class source> void Seed(source &src)
+    {
+      std::vector<int> gcoor;
+
+      for(int gidx=0;gidx<_grid->_gsites;gidx++){
+	int rank,o_idx,i_idx;
+	_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
+	_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
+
+	int l_idx=generator_idx(o_idx,i_idx);
+
+	typename source::result_type init = src();
+
+	_grid->Broadcast(0,(void *)&init,sizeof(init));
+	if( rank == _grid->ThisRank() ){
+	  _generators[l_idx] = std::ranlux48(init);
+	}
+      }
+    }    
+
+    //FIXME implement generic IO and create state save/restore
+    //void SaveState(const std::string<char> &file);
+    //void LoadState(const std::string<char> &file);
+
+    template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,distribution &dist){
+
+      typedef typename vobj::scalar_type scalar_type;
+      typedef typename vobj::vector_type vector_type;
+      
+      conformable(_grid,l._grid);
+
+      int     Nsimd =_grid->Nsimd();
+      int     osites=_grid->oSites();
+
+      int words = sizeof(vobj)/sizeof(vector_type);
+      std::vector<std::vector<scalar_type> > buf(Nsimd,std::vector<scalar_type>(words));
+      std::vector<scalar_type *> pointers(Nsimd);  
+      
+      for(int ss=0;ss<osites;ss++){
+
+	for(int si=0;si<Nsimd;si++){
+
+	  int gdx = generator_idx(ss,si); // index of generator state
+
+	  pointers[si] = (scalar_type *)&buf[si][0];
+	  for(int idx=0;idx<words;idx++){
+	    pointers[si][idx] = dist(_generators[gdx]);
+	  }
+
+	}
+	// merge into SIMD lanes
+	merge(l._odata[ss],pointers);
+      }
    };
+  };

+  // FIXME Implement a consistent seed management strategy
+  template <class vobj> inline void random(GridRNG &rng,Lattice<vobj> &l){
+    rng.fill(l,rng._uniform);
+  }
+
+  template <class vobj> inline void gaussian(GridRNG &rng,Lattice<vobj> &l){
+    rng.fill(l,rng._gaussian);
+  }
 }
 #endif
@@ -57,7 +57,8 @@ int main (int argc, char ** argv)
    
    GridCartesian Fine(latt_size,simd_layout,mpi_layout);
    GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
-    
+    GridRNG       FineRNG(&Fine);
+
    LatticeColourMatrix Foo(&Fine);
    LatticeColourMatrix Bar(&Fine);

@@ -91,17 +92,17 @@ int main (int argc, char ** argv)
    iSpinMatrix<vComplex> iGammaFive;
    ColourMatrix cmat;
    
-    random(Foo);
-    gaussian(Bar);
-    random(scFoo);
-    random(scBar);
+    random(FineRNG,Foo);
+    gaussian(FineRNG,Bar);
+    random(FineRNG,scFoo);
+    random(FineRNG,scBar);

-    random(cMat);
-    random(sMat);
-    random(scMat);
-    random(cVec);
-    random(sVec);
-    random(scVec);
+    random(FineRNG,cMat);
+    random(FineRNG,sMat);
+    random(FineRNG,scMat);
+    random(FineRNG,cVec);
+    random(FineRNG,sVec);
+    random(FineRNG,scVec);

    fflush(stdout);
    cVec = cMat * cVec;  // LatticeColourVector     = LatticeColourMatrix     * LatticeColourVector
@@ -277,7 +278,7 @@ int main (int argc, char ** argv)
        peekSite(bar,Bar,coor);
        for(int r=0;r<3;r++){
        for(int c=0;c<3;c++){
-	  cout<<"bar "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "<<bar()()(r,c)<<std::endl;
+	  //	  cout<<"bar "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "<<bar()()(r,c)<<std::endl;
 	}}
      }}}}
    } 
@@ -48,14 +48,15 @@ int main (int argc, char ** argv)
    
    GridCartesian Fine(latt_size,simd_layout,mpi_layout);
    GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
-    
+    GridRNG       fRNG(&Fine);
+
    LatticeColourMatrix Foo(&Fine);
    LatticeColourMatrix Bar(&Fine);
    LatticeColourMatrix Check(&Fine);
    LatticeColourMatrix Diff(&Fine);
    
-    random(Foo);
-    gaussian(Bar);
+    random(fRNG,Foo);
+    gaussian(fRNG,Bar);


    for(int dir=0;dir<4;dir++){