Merge branch 'develop' into feature/hadrons-new-memory-model

2026-03-12 23:46:12 +00:00 · 2017-12-06 16:49:21 +01:00
parent 5422251959 62eb1f0e59
commit 29e2eddea8
6 changed files with 45 additions and 57 deletions
--- a/lib/communicator/Communicator_base.h
+++ b/lib/communicator/Communicator_base.h
@@ -276,10 +276,11 @@ class CartesianCommunicator {
    assert(in.size()==out.size());
    uint64_t bytes=sizeof(T);
    uint64_t words=in.size()/numnode;
-
+    //    std:: cout << "AllToAll buffer size "<< in.size()*sizeof(T)<<std::endl;
+    //    std:: cout << "AllToAll datum bytes "<< bytes<<std::endl;
+    //    std:: cout << "AllToAll datum count "<< words<<std::endl;
    assert(numnode * words == in.size());
-    assert(words < (1ULL<<32));
-
+    assert(words < (1ULL<<31));
    AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes);
  }
  void AllToAll(int dim  ,void *in,void *out,uint64_t words,uint64_t bytes);
--- a/lib/lattice/Lattice_rng.h
+++ b/lib/lattice/Lattice_rng.h
@@ -77,9 +77,6 @@ namespace Grid {

  
 // merge of April 11 2017
-//<<<<<<< HEAD
-
-
  // this function is necessary for the LS vectorised field
  inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
  {
@@ -91,7 +88,6 @@ namespace Grid {
    // all further divisions are local
    for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
    for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
-    

    // then divide the number of local sites
    // check that the total number of sims agree, meanse the iSites are the same
@@ -102,27 +98,6 @@ namespace Grid {

    return fine->lSites() / coarse->lSites();
  }
-
-  /*
-  // Wrap seed_seq to give common interface with random_device
-  class fixedSeed {
-  public:
-    typedef std::seed_seq::result_type result_type;
-    std::seed_seq src;
-    
-    fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};
-
-    result_type operator () (void){
-      std::vector<result_type> list(1);
-      src.generate(list.begin(),list.end());
-      return list[0];
-    }
-
-  };
-
-=======
->>>>>>> develop
-  */
  
  // real scalars are one component
  template<class scalar,class distribution,class generator> 
@@ -171,7 +146,7 @@ namespace Grid {
    // support for parallel init
    ///////////////////////
 #ifdef RNG_FAST_DISCARD
-    static void Skip(RngEngine &eng)
+    static void Skip(RngEngine &eng,uint64_t site)
    {
      /////////////////////////////////////////////////////////////////////////////////////
      // Skip by 2^40 elements between successive lattice sites
@@ -184,8 +159,11 @@ namespace Grid {
      // and margin of safety is orders of magnitude.
      // We could hack Sitmo to skip in the higher order words of state if necessary
      /////////////////////////////////////////////////////////////////////////////////////
-      uint64_t skip = 0x1; skip = skip<<40;
+      //      uint64_t skip = site+1;  //   Old init Skipped then drew.  Checked compat with faster init
+      uint64_t skip = site;
+      skip = skip<<40;
      eng.discard(skip);
+      //      std::cout << " Engine  " <<site << " state " <<eng<<std::endl;
    } 
 #endif
    static RngEngine Reseed(RngEngine &eng)
@@ -407,15 +385,14 @@ namespace Grid {
      // MT implementation does not implement fast discard even though
      // in principle this is possible
      ////////////////////////////////////////////////
-      std::vector<int> gcoor;
-      int rank,o_idx,i_idx;

      // Everybody loops over global volume.
-      for(int gidx=0;gidx<_grid->_gsites;gidx++){
-
-	Skip(master_engine); // Skip to next RNG sequence
+      parallel_for(int gidx=0;gidx<_grid->_gsites;gidx++){

 	// Where is it?
+	int rank,o_idx,i_idx;
+	std::vector<int> gcoor;
+
 	_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
 	_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);

@@ -423,6 +400,7 @@ namespace Grid {
 	if( rank == _grid->ThisRank() ){
 	  int l_idx=generator_idx(o_idx,i_idx);
 	  _generators[l_idx] = master_engine;
+	  Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
 	}

      }
--- a/lib/lattice/Lattice_transfer.h
+++ b/lib/lattice/Lattice_transfer.h
@@ -822,6 +822,7 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)

      // Loop over reordered data post A2A
      parallel_for(int c=0;c<chunk;c++){
+	std::vector<int> coor(ndim);
 	for(int m=0;m<M;m++){
 	  for(int s=0;s<sP;s++){
 	    
@@ -833,7 +834,6 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
 	    uint64_t lex_vec      = lex_fvol_vec/fvol;

 	    // which node sets an adder to the coordinate
-	    std::vector<int> coor(ndim);
 	    Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);	  
 	    coor[d] += m*ldims[d];
 	    Lexicographic::IndexFromCoor(coor, lex_r, rdims);	  
@@ -940,10 +940,11 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
 	
      {
 	// Loop over reordered data post A2A
-	for(int c=0;c<chunk;c++){
+	parallel_for(int c=0;c<chunk;c++){
+	  std::vector<int> coor(ndim);
 	  for(int m=0;m<M;m++){
 	    for(int s=0;s<sP;s++){
-	      
+
 	      // addressing; use lexico
 	      int lex_r;
 	      uint64_t lex_c = c+chunk*m+chunk*M*s;
@@ -952,7 +953,6 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
 	      uint64_t lex_vec      = lex_fvol_vec/fvol;
 	      
 	      // which node sets an adder to the coordinate
-	      std::vector<int> coor(ndim);
 	      Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);	  
 	      coor[d] += m*ldims[d];
 	      Lexicographic::IndexFromCoor(coor, lex_r, rdims);	  
@@ -978,9 +978,9 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)

  lsites = full_grid->lSites();
  for(int v=0;v<nvector;v++){
-    assert(v<full.size());
+    //    assert(v<full.size());
    parallel_for(int site=0;site<lsites;site++){
-      assert(v*lsites+site < alldata.size());
+      //      assert(v*lsites+site < alldata.size());
      scalardata[site] = alldata[v*lsites+site];
    }
    vectorizeFromLexOrdArray(scalardata,full[v]);    
--- a/lib/qcd/action/fermion/FermionOperator.h
+++ b/lib/qcd/action/fermion/FermionOperator.h
@@ -47,6 +47,7 @@ namespace Grid {
      INHERIT_IMPL_TYPES(Impl);

      FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {};
+      virtual ~FermionOperator(void) = default;

      virtual FermionField &tmp(void) = 0;

--- a/lib/qcd/utils/SUn.h
+++ b/lib/qcd/utils/SUn.h
@@ -746,7 +746,7 @@ template<typename GaugeField,typename GaugeMat>
    }
  }
  template<typename GaugeField>
-  static void ColdConfiguration(GridParallelRNG &pRNG,GaugeField &out){
+  static void ColdConfiguration(GaugeField &out){
    typedef typename GaugeField::vector_type vector_type;
    typedef iSUnMatrix<vector_type> vMatrixType;
    typedef Lattice<vMatrixType> LatticeMatrixType;
@@ -757,6 +757,10 @@ template<typename GaugeField,typename GaugeMat>
      PokeIndex<LorentzIndex>(out,Umu,mu);
    }
  }
+  template<typename GaugeField>
+  static void ColdConfiguration(GridParallelRNG &pRNG,GaugeField &out){
+    ColdConfiguration(out);
+  }

  template<typename LatticeMatrixType>
  static void taProj( const LatticeMatrixType &in,  LatticeMatrixType &out){