Merge branch 'develop' into feature/hadrons-new-memory-model

2026-07-22 11:33:27 +01:00 · 2017-12-06 16:49:21 +01:00
parent 5422251959 62eb1f0e59
commit 29e2eddea8
6 changed files with 45 additions and 57 deletions
@@ -276,10 +276,11 @@ class CartesianCommunicator {
    assert(in.size()==out.size());
    uint64_t bytes=sizeof(T);
    uint64_t words=in.size()/numnode;
-
+    //    std:: cout << "AllToAll buffer size "<< in.size()*sizeof(T)<<std::endl;
+    //    std:: cout << "AllToAll datum bytes "<< bytes<<std::endl;
+    //    std:: cout << "AllToAll datum count "<< words<<std::endl;
    assert(numnode * words == in.size());
-    assert(words < (1ULL<<32));
-
+    assert(words < (1ULL<<31));
    AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes);
  }
  void AllToAll(int dim  ,void *in,void *out,uint64_t words,uint64_t bytes);
@@ -77,9 +77,6 @@ namespace Grid {

  
 // merge of April 11 2017
-//<<<<<<< HEAD
-
-
  // this function is necessary for the LS vectorised field
  inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
  {
@@ -91,7 +88,6 @@ namespace Grid {
    // all further divisions are local
    for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
    for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
-    

    // then divide the number of local sites
    // check that the total number of sims agree, meanse the iSites are the same
@@ -102,27 +98,6 @@ namespace Grid {

    return fine->lSites() / coarse->lSites();
  }
-
-  /*
-  // Wrap seed_seq to give common interface with random_device
-  class fixedSeed {
-  public:
-    typedef std::seed_seq::result_type result_type;
-    std::seed_seq src;
-    
-    fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};
-
-    result_type operator () (void){
-      std::vector<result_type> list(1);
-      src.generate(list.begin(),list.end());
-      return list[0];
-    }
-
-  };
-
-=======
->>>>>>> develop
-  */
  
  // real scalars are one component
  template<class scalar,class distribution,class generator> 
@@ -171,7 +146,7 @@ namespace Grid {
    // support for parallel init
    ///////////////////////
 #ifdef RNG_FAST_DISCARD
-    static void Skip(RngEngine &eng)
+    static void Skip(RngEngine &eng,uint64_t site)
    {
      /////////////////////////////////////////////////////////////////////////////////////
      // Skip by 2^40 elements between successive lattice sites
@@ -184,8 +159,11 @@ namespace Grid {
      // and margin of safety is orders of magnitude.
      // We could hack Sitmo to skip in the higher order words of state if necessary
      /////////////////////////////////////////////////////////////////////////////////////
-      uint64_t skip = 0x1; skip = skip<<40;
+      //      uint64_t skip = site+1;  //   Old init Skipped then drew.  Checked compat with faster init
+      uint64_t skip = site;
+      skip = skip<<40;
      eng.discard(skip);
+      //      std::cout << " Engine  " <<site << " state " <<eng<<std::endl;
    } 
 #endif
    static RngEngine Reseed(RngEngine &eng)
@@ -407,15 +385,14 @@ namespace Grid {
      // MT implementation does not implement fast discard even though
      // in principle this is possible
      ////////////////////////////////////////////////
-      std::vector<int> gcoor;
-      int rank,o_idx,i_idx;

      // Everybody loops over global volume.
-      for(int gidx=0;gidx<_grid->_gsites;gidx++){
-
-	Skip(master_engine); // Skip to next RNG sequence
+      parallel_for(int gidx=0;gidx<_grid->_gsites;gidx++){

 	// Where is it?
+	int rank,o_idx,i_idx;
+	std::vector<int> gcoor;
+
 	_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
 	_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);

@@ -423,6 +400,7 @@ namespace Grid {
 	if( rank == _grid->ThisRank() ){
 	  int l_idx=generator_idx(o_idx,i_idx);
 	  _generators[l_idx] = master_engine;
+	  Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
 	}

      }
@@ -822,6 +822,7 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)

      // Loop over reordered data post A2A
      parallel_for(int c=0;c<chunk;c++){
+	std::vector<int> coor(ndim);
 	for(int m=0;m<M;m++){
 	  for(int s=0;s<sP;s++){
 	    
@@ -833,7 +834,6 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
 	    uint64_t lex_vec      = lex_fvol_vec/fvol;

 	    // which node sets an adder to the coordinate
-	    std::vector<int> coor(ndim);
 	    Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);	  
 	    coor[d] += m*ldims[d];
 	    Lexicographic::IndexFromCoor(coor, lex_r, rdims);	  
@@ -940,10 +940,11 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
 	
      {
 	// Loop over reordered data post A2A
-	for(int c=0;c<chunk;c++){
+	parallel_for(int c=0;c<chunk;c++){
+	  std::vector<int> coor(ndim);
 	  for(int m=0;m<M;m++){
 	    for(int s=0;s<sP;s++){
-	      
+
 	      // addressing; use lexico
 	      int lex_r;
 	      uint64_t lex_c = c+chunk*m+chunk*M*s;
@@ -952,7 +953,6 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
 	      uint64_t lex_vec      = lex_fvol_vec/fvol;
 	      
 	      // which node sets an adder to the coordinate
-	      std::vector<int> coor(ndim);
 	      Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);	  
 	      coor[d] += m*ldims[d];
 	      Lexicographic::IndexFromCoor(coor, lex_r, rdims);	  
@@ -978,9 +978,9 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)

  lsites = full_grid->lSites();
  for(int v=0;v<nvector;v++){
-    assert(v<full.size());
+    //    assert(v<full.size());
    parallel_for(int site=0;site<lsites;site++){
-      assert(v*lsites+site < alldata.size());
+      //      assert(v*lsites+site < alldata.size());
      scalardata[site] = alldata[v*lsites+site];
    }
    vectorizeFromLexOrdArray(scalardata,full[v]);    
@@ -47,6 +47,7 @@ namespace Grid {
      INHERIT_IMPL_TYPES(Impl);

      FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {};
+      virtual ~FermionOperator(void) = default;

      virtual FermionField &tmp(void) = 0;

@@ -746,7 +746,7 @@ template<typename GaugeField,typename GaugeMat>
    }
  }
  template<typename GaugeField>
-  static void ColdConfiguration(GridParallelRNG &pRNG,GaugeField &out){
+  static void ColdConfiguration(GaugeField &out){
    typedef typename GaugeField::vector_type vector_type;
    typedef iSUnMatrix<vector_type> vMatrixType;
    typedef Lattice<vMatrixType> LatticeMatrixType;
@@ -757,6 +757,10 @@ template<typename GaugeField,typename GaugeMat>
      PokeIndex<LorentzIndex>(out,Umu,mu);
    }
  }
+  template<typename GaugeField>
+  static void ColdConfiguration(GridParallelRNG &pRNG,GaugeField &out){
+    ColdConfiguration(out);
+  }

  template<typename LatticeMatrixType>
  static void taProj( const LatticeMatrixType &in,  LatticeMatrixType &out){
@@ -81,21 +81,20 @@ int main (int argc, char ** argv)
  GridCartesian         * SFGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid);
  GridRedBlackCartesian * SrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
  GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid);
-
+  std::cout << GridLogMessage << "Made the grids"<<std::endl;
  ///////////////////////////////////////////////
  // Set up the problem as a 4d spreadout job
  ///////////////////////////////////////////////
  std::vector<int> seeds({1,2,3,4});

-  GridParallelRNG pRNG(UGrid );  pRNG.SeedFixedIntegers(seeds);
-  GridParallelRNG pRNG5(FGrid);  pRNG5.SeedFixedIntegers(seeds);
  std::vector<FermionField>    src(nrhs,FGrid);
  std::vector<FermionField> src_chk(nrhs,FGrid);
  std::vector<FermionField> result(nrhs,FGrid);
  FermionField tmp(FGrid);
+  std::cout << GridLogMessage << "Made the Fermion Fields"<<std::endl;

  for(int s=0;s<nrhs;s++) result[s]=zero;
-#define LEXICO_TEST
+#undef LEXICO_TEST
 #ifdef LEXICO_TEST
  {
    LatticeFermion lex(FGrid);  lex = zero;
@@ -117,6 +116,7 @@ int main (int argc, char ** argv)
    }    
  }
 #else
+  GridParallelRNG pRNG5(FGrid);  pRNG5.SeedFixedIntegers(seeds);
  for(int s=0;s<nrhs;s++) {
    random(pRNG5,src[s]);
    tmp = 100.0*s;
@@ -124,13 +124,21 @@ int main (int argc, char ** argv)
    std::cout << GridLogMessage << " src ["<<s<<"] "<<norm2(src[s])<<std::endl;
  }
 #endif
+  std::cout << GridLogMessage << "Intialised the Fermion Fields"<<std::endl;

-  for(int n =0 ; n< nrhs ; n++) { 
-    //    std::cout << " src"<<n<<"\n"<< src[n] <<std::endl;
+  LatticeGaugeField Umu(UGrid); 
+  if(1) { 
+    GridParallelRNG pRNG(UGrid );  
+    std::cout << GridLogMessage << "Intialising 4D RNG "<<std::endl;
+    pRNG.SeedFixedIntegers(seeds);
+    std::cout << GridLogMessage << "Intialised 4D RNG "<<std::endl;
+    SU3::HotConfiguration(pRNG,Umu);
+    std::cout << "Intialised the HOT Gauge Field"<<std::endl;
+    //    std::cout << " Site zero "<< Umu._odata[0]   <<std::endl;
+  } else { 
+    SU3::ColdConfiguration(Umu);
+    std::cout << GridLogMessage << "Intialised the COLD Gauge Field"<<std::endl;
  }
-
-  LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
-
  /////////////////
  // MPI only sends
  /////////////////
@@ -139,13 +147,13 @@ int main (int argc, char ** argv)
  FermionField s_tmp(SFGrid);
  FermionField s_res(SFGrid);

+  std::cout << GridLogMessage << "Made the split grid fields"<<std::endl;
  ///////////////////////////////////////////////////////////////
  // split the source out using MPI instead of I/O
  ///////////////////////////////////////////////////////////////
  Grid_split  (Umu,s_Umu);
  Grid_split  (src,s_src);
  std::cout << GridLogMessage << " split rank  " <<me << " s_src "<<norm2(s_src)<<std::endl;
-  //  std::cout << " s_src\n "<< s_src <<std::endl;

 #ifdef LEXICO_TEST
  FermionField s_src_tmp(SFGrid);
@@ -169,16 +177,12 @@ int main (int argc, char ** argv)
  }
  s_src_diff = s_src_tmp - s_src;
  std::cout << GridLogMessage <<" LEXICO test:  s_src_diff " << norm2(s_src_diff)<<std::endl;
-
-  //  std::cout << " s_src \n" << s_src << std::endl;
-  //  std::cout << " s_src_tmp \n" << s_src_tmp << std::endl;
-  //  std::cout << " s_src_diff \n" << s_src_diff << std::endl;
-  //  exit(0);
 #endif

  ///////////////////////////////////////////////////////////////
  // Set up N-solvers as trivially parallel
  ///////////////////////////////////////////////////////////////
+  std::cout << GridLogMessage << " Building the solvers"<<std::endl;
  RealD mass=0.01;
  RealD M5=1.8;
  DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5);