Merge branch 'develop' into feature/hadrons

2025-06-16 23:07:05 +01:00 · 2017-12-06 16:49:10 +01:00
parent d391f05cb7 62eb1f0e59
commit 0a038ea15a
6 changed files with 45 additions and 57 deletions
--- a/lib/communicator/Communicator_base.h
+++ b/lib/communicator/Communicator_base.h
@ -276,10 +276,11 @@ class CartesianCommunicator {
    assert(in.size()==out.size());
    uint64_t bytes=sizeof(T);
    uint64_t words=in.size()/numnode;
-
+    //    std:: cout << "AllToAll buffer size "<< in.size()*sizeof(T)<<std::endl;
    //    std:: cout << "AllToAll datum bytes "<< bytes<<std::endl;
    //    std:: cout << "AllToAll datum count "<< words<<std::endl;
    assert(numnode * words == in.size());
-    assert(words < (1ULL<<32));
+    assert(words < (1ULL<<31));
    AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes);
  }
  void AllToAll(int dim  ,void *in,void *out,uint64_t words,uint64_t bytes);
--- a/lib/lattice/Lattice_rng.h
+++ b/lib/lattice/Lattice_rng.h
@ -77,9 +77,6 @@ namespace Grid {
 // merge of April 11 2017
 //<<<<<<< HEAD
  // this function is necessary for the LS vectorised field
  inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
  {
@ -92,7 +89,6 @@ namespace Grid {
    for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
    for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
    // then divide the number of local sites
    // check that the total number of sims agree, meanse the iSites are the same
    assert(fine->Nsimd() == coarse->Nsimd());
@ -103,27 +99,6 @@ namespace Grid {
    return fine->lSites() / coarse->lSites();
  }
  /*
  // Wrap seed_seq to give common interface with random_device
  class fixedSeed {
  public:
    typedef std::seed_seq::result_type result_type;
    std::seed_seq src;
    fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};
    result_type operator () (void){
      std::vector<result_type> list(1);
      src.generate(list.begin(),list.end());
      return list[0];
    }
  };
 =======
 >>>>>>> develop
  */
  // real scalars are one component
  template<class scalar,class distribution,class generator> 
  void fillScalar(scalar &s,distribution &dist,generator & gen)
@ -171,7 +146,7 @@ namespace Grid {
    // support for parallel init
    ///////////////////////
 #ifdef RNG_FAST_DISCARD
-    static void Skip(RngEngine &eng)
+    static void Skip(RngEngine &eng,uint64_t site)
    {
      /////////////////////////////////////////////////////////////////////////////////////
      // Skip by 2^40 elements between successive lattice sites
@ -184,8 +159,11 @@ namespace Grid {
      // and margin of safety is orders of magnitude.
      // We could hack Sitmo to skip in the higher order words of state if necessary
      /////////////////////////////////////////////////////////////////////////////////////
-      uint64_t skip = 0x1; skip = skip<<40;
+      //      uint64_t skip = site+1;  //   Old init Skipped then drew.  Checked compat with faster init
      uint64_t skip = site;
      skip = skip<<40;
      eng.discard(skip);
      //      std::cout << " Engine  " <<site << " state " <<eng<<std::endl;
    } 
 #endif
    static RngEngine Reseed(RngEngine &eng)
@ -407,15 +385,14 @@ namespace Grid {
      // MT implementation does not implement fast discard even though
      // in principle this is possible
      ////////////////////////////////////////////////
      std::vector<int> gcoor;
      int rank,o_idx,i_idx;
      // Everybody loops over global volume.
-      for(int gidx=0;gidx<_grid->_gsites;gidx++){
+      parallel_for(int gidx=0;gidx<_grid->_gsites;gidx++){
 	Skip(master_engine); // Skip to next RNG sequence
 	// Where is it?
 	int rank,o_idx,i_idx;
 	std::vector<int> gcoor;
 	_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
 	_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
@ -423,6 +400,7 @@ namespace Grid {
 	if( rank == _grid->ThisRank() ){
 	  int l_idx=generator_idx(o_idx,i_idx);
 	  _generators[l_idx] = master_engine;
 	  Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
 	}
      }
--- a/lib/lattice/Lattice_transfer.h
+++ b/lib/lattice/Lattice_transfer.h
@ -822,6 +822,7 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
      // Loop over reordered data post A2A
      parallel_for(int c=0;c<chunk;c++){
 	std::vector<int> coor(ndim);
 	for(int m=0;m<M;m++){
 	  for(int s=0;s<sP;s++){
@ -833,7 +834,6 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
 	    uint64_t lex_vec      = lex_fvol_vec/fvol;
 	    // which node sets an adder to the coordinate
 	    std::vector<int> coor(ndim);
 	    Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);	  
 	    coor[d] += m*ldims[d];
 	    Lexicographic::IndexFromCoor(coor, lex_r, rdims);	  
@ -940,7 +940,8 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
      {
 	// Loop over reordered data post A2A
-	for(int c=0;c<chunk;c++){
+	parallel_for(int c=0;c<chunk;c++){
 	  std::vector<int> coor(ndim);
 	  for(int m=0;m<M;m++){
 	    for(int s=0;s<sP;s++){
@ -952,7 +953,6 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
 	      uint64_t lex_vec      = lex_fvol_vec/fvol;
 	      // which node sets an adder to the coordinate
 	      std::vector<int> coor(ndim);
 	      Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);	  
 	      coor[d] += m*ldims[d];
 	      Lexicographic::IndexFromCoor(coor, lex_r, rdims);	  
@ -978,9 +978,9 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
  lsites = full_grid->lSites();
  for(int v=0;v<nvector;v++){
-    assert(v<full.size());
+    //    assert(v<full.size());
    parallel_for(int site=0;site<lsites;site++){
-      assert(v*lsites+site < alldata.size());
+      //      assert(v*lsites+site < alldata.size());
      scalardata[site] = alldata[v*lsites+site];
    }
    vectorizeFromLexOrdArray(scalardata,full[v]);    
--- a/lib/qcd/action/fermion/FermionOperator.h
+++ b/lib/qcd/action/fermion/FermionOperator.h
@ -47,6 +47,7 @@ namespace Grid {
      INHERIT_IMPL_TYPES(Impl);
      FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {};
      virtual ~FermionOperator(void) = default;
      virtual FermionField &tmp(void) = 0;
--- a/lib/qcd/utils/SUn.h
+++ b/lib/qcd/utils/SUn.h
@ -746,7 +746,7 @@ template<typename GaugeField,typename GaugeMat>
    }
  }
  template<typename GaugeField>
-  static void ColdConfiguration(GridParallelRNG &pRNG,GaugeField &out){
+  static void ColdConfiguration(GaugeField &out){
    typedef typename GaugeField::vector_type vector_type;
    typedef iSUnMatrix<vector_type> vMatrixType;
    typedef Lattice<vMatrixType> LatticeMatrixType;
@ -757,6 +757,10 @@ template<typename GaugeField,typename GaugeMat>
      PokeIndex<LorentzIndex>(out,Umu,mu);
    }
  }
  template<typename GaugeField>
  static void ColdConfiguration(GridParallelRNG &pRNG,GaugeField &out){
    ColdConfiguration(out);
  }
  template<typename LatticeMatrixType>
  static void taProj( const LatticeMatrixType &in,  LatticeMatrixType &out){
--- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc
+++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc
@ -81,21 +81,20 @@ int main (int argc, char ** argv)
  GridCartesian         * SFGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid);
  GridRedBlackCartesian * SrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
  GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid);
-
+  std::cout << GridLogMessage << "Made the grids"<<std::endl;
  ///////////////////////////////////////////////
  // Set up the problem as a 4d spreadout job
  ///////////////////////////////////////////////
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG pRNG(UGrid );  pRNG.SeedFixedIntegers(seeds);
  GridParallelRNG pRNG5(FGrid);  pRNG5.SeedFixedIntegers(seeds);
  std::vector<FermionField>    src(nrhs,FGrid);
  std::vector<FermionField> src_chk(nrhs,FGrid);
  std::vector<FermionField> result(nrhs,FGrid);
  FermionField tmp(FGrid);
  std::cout << GridLogMessage << "Made the Fermion Fields"<<std::endl;
  for(int s=0;s<nrhs;s++) result[s]=zero;
-#define LEXICO_TEST
+#undef LEXICO_TEST
 #ifdef LEXICO_TEST
  {
    LatticeFermion lex(FGrid);  lex = zero;
@ -117,6 +116,7 @@ int main (int argc, char ** argv)
    }    
  }
 #else
  GridParallelRNG pRNG5(FGrid);  pRNG5.SeedFixedIntegers(seeds);
  for(int s=0;s<nrhs;s++) {
    random(pRNG5,src[s]);
    tmp = 100.0*s;
@ -124,13 +124,21 @@ int main (int argc, char ** argv)
    std::cout << GridLogMessage << " src ["<<s<<"] "<<norm2(src[s])<<std::endl;
  }
 #endif
  std::cout << GridLogMessage << "Intialised the Fermion Fields"<<std::endl;
-  for(int n =0 ; n< nrhs ; n++) { 
+  LatticeGaugeField Umu(UGrid); 
-    //    std::cout << " src"<<n<<"\n"<< src[n] <<std::endl;
+  if(1) { 
    GridParallelRNG pRNG(UGrid );  
    std::cout << GridLogMessage << "Intialising 4D RNG "<<std::endl;
    pRNG.SeedFixedIntegers(seeds);
    std::cout << GridLogMessage << "Intialised 4D RNG "<<std::endl;
    SU3::HotConfiguration(pRNG,Umu);
    std::cout << "Intialised the HOT Gauge Field"<<std::endl;
    //    std::cout << " Site zero "<< Umu._odata[0]   <<std::endl;
  } else { 
    SU3::ColdConfiguration(Umu);
    std::cout << GridLogMessage << "Intialised the COLD Gauge Field"<<std::endl;
  }
  LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
  /////////////////
  // MPI only sends
  /////////////////
@ -139,13 +147,13 @@ int main (int argc, char ** argv)
  FermionField s_tmp(SFGrid);
  FermionField s_res(SFGrid);
  std::cout << GridLogMessage << "Made the split grid fields"<<std::endl;
  ///////////////////////////////////////////////////////////////
  // split the source out using MPI instead of I/O
  ///////////////////////////////////////////////////////////////
  Grid_split  (Umu,s_Umu);
  Grid_split  (src,s_src);
  std::cout << GridLogMessage << " split rank  " <<me << " s_src "<<norm2(s_src)<<std::endl;
  //  std::cout << " s_src\n "<< s_src <<std::endl;
 #ifdef LEXICO_TEST
  FermionField s_src_tmp(SFGrid);
@ -169,16 +177,12 @@ int main (int argc, char ** argv)
  }
  s_src_diff = s_src_tmp - s_src;
  std::cout << GridLogMessage <<" LEXICO test:  s_src_diff " << norm2(s_src_diff)<<std::endl;
  //  std::cout << " s_src \n" << s_src << std::endl;
  //  std::cout << " s_src_tmp \n" << s_src_tmp << std::endl;
  //  std::cout << " s_src_diff \n" << s_src_diff << std::endl;
  //  exit(0);
 #endif
  ///////////////////////////////////////////////////////////////
  // Set up N-solvers as trivially parallel
  ///////////////////////////////////////////////////////////////
  std::cout << GridLogMessage << " Building the solvers"<<std::endl;
  RealD mass=0.01;
  RealD M5=1.8;
  DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5);