Merge branch 'develop' of https://github.com/paboyle/Grid into develop

2025-12-23 06:04:29 +00:00 · 2018-04-26 11:31:57 +09:00
parent 64ac815fd9 2baf193031
commit c897878776
3 changed files with 66 additions and 34 deletions
--- a/benchmarks/Benchmark_su3.cc
+++ b/benchmarks/Benchmark_su3.cc
@@ -35,24 +35,25 @@ using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
-#define LMAX (40)
+#define LMAX (16)
+#define LMIN (16)
 #define LINC (4)

-  int64_t Nloop=20;
+  int64_t Nloop=2000;

  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();

  int64_t threads = GridThread::GetThreads();
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
-
+#if 1
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  x= x*y"<<std::endl;
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;

-  for(int lat=2;lat<=LMAX;lat+=LINC){
+  for(int lat=4;lat<=LMAX;lat+=LINC){

      std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
      int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@@ -147,30 +148,31 @@ int main (int argc, char ** argv)
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;

  for(int lat=2;lat<=LMAX;lat+=LINC){
-
-      std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
-      int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
-
-      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
-      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
-
-      LatticeColourMatrix z(&Grid); random(pRNG,z);
-      LatticeColourMatrix x(&Grid); random(pRNG,x);
-      LatticeColourMatrix y(&Grid); random(pRNG,y);
-
-      double start=usecond();
-      for(int64_t i=0;i<Nloop;i++){
-	mac(z,x,y);
-      }
-      double stop=usecond();
-      double time = (stop-start)/Nloop*1000.0;
-      
-      double bytes=3*vol*Nc*Nc*sizeof(Complex);
-      double flops=Nc*Nc*(6+8+8)*vol;
-      std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
-
+    
+    std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
+    int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
+    
+    GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
+    GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
+    
+    LatticeColourMatrix z(&Grid); random(pRNG,z);
+    LatticeColourMatrix x(&Grid); random(pRNG,x);
+    LatticeColourMatrix y(&Grid); random(pRNG,y);
+    
+    double start=usecond();
+    for(int64_t i=0;i<Nloop;i++){
+      mac(z,x,y);
    }
-
+    double stop=usecond();
+    double time = (stop-start)/Nloop*1000.0;
+    
+    double bytes=3*vol*Nc*Nc*sizeof(Complex);
+    double flops=Nc*Nc*(6+8+8)*vol;
+    std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
+    
+  }
+  
+#endif

  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  CovShiftForward(z,x,y)"<<std::endl;
@@ -178,7 +180,7 @@ int main (int argc, char ** argv)
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;

-  for(int lat=2;lat<=LMAX;lat+=LINC){
+  for(int lat=LMIN;lat<=LMAX;lat+=LINC){

      std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
      int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@@ -190,7 +192,7 @@ int main (int argc, char ** argv)
      LatticeColourMatrix x(&Grid); random(pRNG,x);
      LatticeColourMatrix y(&Grid); random(pRNG,y);

-      for(int mu=0;mu<=4;mu++){
+      for(int mu=0;mu<4;mu++){
 	double start=usecond();
 	for(int64_t i=0;i<Nloop;i++){
 	  z = PeriodicBC::CovShiftForward(x,mu,y);
@@ -198,11 +200,12 @@ int main (int argc, char ** argv)
 	double stop=usecond();
 	double time = (stop-start)/Nloop*1000.0;
 	
+	
 	double bytes=3*vol*Nc*Nc*sizeof(Complex);
 	double flops=Nc*Nc*(6+8+8)*vol;
 	std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
      }
-    }
+  }

  Grid_finalize();
 }
--- a/lib/cshift/Cshift_mpi.h
+++ b/lib/cshift/Cshift_mpi.h
@@ -54,13 +54,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension


  if ( !comm_dim ) {
-    //    std::cout << "Cshift_local" <<std::endl;
+    //std::cout << "CSHIFT: Cshift_local" <<std::endl;
    Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
  } else if ( splice_dim ) {
-    //    std::cout << "Cshift_comms_simd" <<std::endl;
+    //std::cout << "CSHIFT: Cshift_comms_simd call - splice_dim = " << splice_dim << " shift " << shift << " dimension = " << dimension << std::endl;
    Cshift_comms_simd(ret,rhs,dimension,shift);
  } else {
-    //    std::cout << "Cshift_comms" <<std::endl;
+    //std::cout << "CSHIFT: Cshift_comms" <<std::endl;
    Cshift_comms(ret,rhs,dimension,shift);
  }
  return ret;
@@ -91,9 +91,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vob
  sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
  sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);

+  //std::cout << "Cshift_comms_simd dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
  if ( sshift[0] == sshift[1] ) {
+    //std::cout << "Single pass Cshift_comms" <<std::endl;
    Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
  } else {
+    //std::cout << "Two pass Cshift_comms" <<std::endl;
    Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
    Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
  }
@@ -175,6 +178,10 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
  int simd_layout     = grid->_simd_layout[dimension];
  int comm_dim        = grid->_processors[dimension] >1 ;

+  //std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
+  //    << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout 
+  //    << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
+
  assert(comm_dim==1);
  assert(simd_layout==2);
  assert(shift>=0);
--- a/lib/lattice/Lattice_base.h
+++ b/lib/lattice/Lattice_base.h
@@ -257,7 +257,11 @@ public:
    }  	
  }
  
-  
+  Lattice(Lattice&& r){ // move constructor
+    _grid = r._grid;
+    checkerboard = r.checkerboard;
+    _odata=std::move(r._odata);
+  }
  
  virtual ~Lattice(void) = default;
    
@@ -286,6 +290,24 @@ public:
    }
    return *this;
  }
+
+  strong_inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
+    _grid        = r._grid;
+    checkerboard = r.checkerboard;
+    _odata.resize(_grid->oSites());// essential
+    
+    parallel_for(int ss=0;ss<_grid->oSites();ss++){
+      _odata[ss]=r._odata[ss];
+    }  	
+    return *this;
+  }
+  strong_inline Lattice<vobj> & operator = (Lattice<vobj> && r)
+  {
+    _grid        = r._grid;
+    checkerboard = r.checkerboard;
+    _odata       =std::move(r._odata);
+    return *this;
+  }
  
  // *=,+=,-= operators inherit behvour from correspond */+/- operation
  template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {