mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-02 21:14:32 +00:00 
			
		
		
		
	updating benchmarks for red black 4d for Ls vectorised code
This commit is contained in:
		@@ -45,9 +45,9 @@ struct scal {
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
bool overlapComms = false;
 | 
			
		||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
 | 
			
		||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplF> WilsonFermion5DF;
 | 
			
		||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplD> WilsonFermion5DD;
 | 
			
		||||
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
 | 
			
		||||
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
 | 
			
		||||
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
@@ -70,8 +70,8 @@ int main (int argc, char ** argv)
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
 | 
			
		||||
  GridCartesian         * sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
 | 
			
		||||
  GridCartesian         * sFGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
 | 
			
		||||
  std::cout << GridLogMessage << "Making s innermost rb grids"<<std::endl;
 | 
			
		||||
  GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> seeds4({1,2,3,4});
 | 
			
		||||
@@ -86,6 +86,16 @@ int main (int argc, char ** argv)
 | 
			
		||||
  LatticeFermion    tmp(FGrid);
 | 
			
		||||
  LatticeFermion    err(FGrid);
 | 
			
		||||
 | 
			
		||||
  /*  src=zero;
 | 
			
		||||
  std::vector<int> origin(5,0);
 | 
			
		||||
  SpinColourVector f=zero;
 | 
			
		||||
  for(int sp=0;sp<4;sp++){
 | 
			
		||||
  for(int co=0;co<3;co++){
 | 
			
		||||
    f()(sp)(co)=Complex(1.0,0.0); 
 | 
			
		||||
  }}
 | 
			
		||||
  pokeSite(f,src,origin);
 | 
			
		||||
  */
 | 
			
		||||
 | 
			
		||||
  ColourMatrix cm = Complex(1.0,0.0);
 | 
			
		||||
 | 
			
		||||
  LatticeGaugeField Umu(UGrid); 
 | 
			
		||||
@@ -126,19 +136,16 @@ int main (int argc, char ** argv)
 | 
			
		||||
  RealD mass=0.1;
 | 
			
		||||
  RealD M5  =1.8;
 | 
			
		||||
 | 
			
		||||
  typename DomainWallFermionR::ImplParams params; 
 | 
			
		||||
  params.overlapCommsCompute = overlapComms;
 | 
			
		||||
  
 | 
			
		||||
  RealD NP = UGrid->_Nprocessors;
 | 
			
		||||
 | 
			
		||||
  for(int doasm=1;doasm<2;doasm++){
 | 
			
		||||
 | 
			
		||||
    QCD::WilsonKernelsStatic::AsmOpt=doasm;
 | 
			
		||||
 | 
			
		||||
  DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
 | 
			
		||||
  DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
			
		||||
  
 | 
			
		||||
  std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
 | 
			
		||||
  int ncall =10;
 | 
			
		||||
  int ncall =100;
 | 
			
		||||
  if (1) {
 | 
			
		||||
 | 
			
		||||
    double t0=usecond();
 | 
			
		||||
@@ -164,11 +171,12 @@ int main (int argc, char ** argv)
 | 
			
		||||
 | 
			
		||||
  if (1)
 | 
			
		||||
  {
 | 
			
		||||
    typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
 | 
			
		||||
    typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
 | 
			
		||||
    LatticeFermion ssrc(sFGrid);
 | 
			
		||||
    LatticeFermion sref(sFGrid);
 | 
			
		||||
    LatticeFermion sresult(sFGrid);
 | 
			
		||||
    WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5,params);
 | 
			
		||||
 | 
			
		||||
    WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
 | 
			
		||||
  
 | 
			
		||||
    for(int x=0;x<latt4[0];x++){
 | 
			
		||||
    for(int y=0;y<latt4[1];y++){
 | 
			
		||||
@@ -180,7 +188,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
      peekSite(tmp,src,site);
 | 
			
		||||
      pokeSite(tmp,ssrc,site);
 | 
			
		||||
    }}}}}
 | 
			
		||||
 | 
			
		||||
    std::cout<<"src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
 | 
			
		||||
    double t0=usecond();
 | 
			
		||||
    for(int i=0;i<ncall;i++){
 | 
			
		||||
      __SSC_START;
 | 
			
		||||
@@ -207,6 +215,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::cout<<"res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    RealF sum=0;
 | 
			
		||||
@@ -220,9 +229,11 @@ int main (int argc, char ** argv)
 | 
			
		||||
      peekSite(normal,result,site);
 | 
			
		||||
      peekSite(simd,sresult,site);
 | 
			
		||||
      sum=sum+norm2(normal-simd);
 | 
			
		||||
      //      std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
 | 
			
		||||
      //      std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<normal<<std::endl;
 | 
			
		||||
      //      std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<simd<<std::endl;
 | 
			
		||||
      if (norm2(normal-simd) > 1.0e-6 ) {
 | 
			
		||||
	std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
 | 
			
		||||
	std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
 | 
			
		||||
	std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd   "<<simd<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }}}}}
 | 
			
		||||
    std::cout<<" difference between normal and simd is "<<sum<<std::endl;
 | 
			
		||||
 | 
			
		||||
@@ -267,9 +278,9 @@ int main (int argc, char ** argv)
 | 
			
		||||
      pickCheckerboard(Even,ssrc_e,sresult);
 | 
			
		||||
      pickCheckerboard(Odd ,ssrc_o,sresult);
 | 
			
		||||
      ssrc_e = ssrc_e - sr_e;
 | 
			
		||||
      std::cout<<GridLogMessage << "sE norm diff   "<< norm2(ssrc_e)<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage << "sE norm diff   "<< norm2(ssrc_e)<< "  vec nrm"<<norm2(sr_e) <<std::endl;
 | 
			
		||||
      ssrc_o = ssrc_o - sr_o;
 | 
			
		||||
      std::cout<<GridLogMessage << "sO norm diff   "<< norm2(ssrc_o)<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage << "sO norm diff   "<< norm2(ssrc_o)<< "  vec nrm"<<norm2(sr_o) <<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -125,7 +125,6 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
 | 
			
		||||
 | 
			
		||||
  ColourMatrix cm = Complex(1.0,0.0);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  LatticeGaugeField Umu5d(FGrid); 
 | 
			
		||||
 | 
			
		||||
  // replicate across fifth dimension
 | 
			
		||||
@@ -144,11 +143,10 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#ifdef CHECK
 | 
			
		||||
  if (1)
 | 
			
		||||
  {
 | 
			
		||||
  if (1) {
 | 
			
		||||
 | 
			
		||||
    ref = zero;
 | 
			
		||||
    for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
 | 
			
		||||
      tmp = U[mu]*Cshift(src,mu+1,1);
 | 
			
		||||
      ref=ref + tmp - Gamma(Gmu[mu])*tmp;
 | 
			
		||||
 | 
			
		||||
@@ -192,20 +190,19 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
 | 
			
		||||
    Counter.Report();
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if ( ! report ) 
 | 
			
		||||
    {
 | 
			
		||||
      double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
      double flops=1344*volume*ncall;
 | 
			
		||||
      std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
 | 
			
		||||
    }
 | 
			
		||||
  if ( ! report ) {
 | 
			
		||||
    double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
    double flops=1344*volume*ncall;
 | 
			
		||||
    std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
#ifdef CHECK
 | 
			
		||||
    err = ref-result; 
 | 
			
		||||
    RealD errd = norm2(err);
 | 
			
		||||
    if ( errd> 1.0e-4 ) {
 | 
			
		||||
      std::cout<<GridLogMessage << "oops !!! norm diff   "<< norm2(err)<<std::endl;
 | 
			
		||||
      exit(-1);
 | 
			
		||||
    }
 | 
			
		||||
  err = ref-result; 
 | 
			
		||||
  RealD errd = norm2(err);
 | 
			
		||||
  if ( errd> 1.0e-4 ) {
 | 
			
		||||
    std::cout<<GridLogMessage << "oops !!! norm diff   "<< norm2(err)<<std::endl;
 | 
			
		||||
    exit(-1);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
    
 | 
			
		||||
  LatticeFermion src_e (FrbGrid);
 | 
			
		||||
@@ -231,10 +228,9 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
 | 
			
		||||
      std::cout<< flops/(t1-t0);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#undef CHECK_SDW
 | 
			
		||||
#define CHECK_SDW
 | 
			
		||||
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
@@ -242,7 +238,9 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
 | 
			
		||||
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
  GridCartesian         * sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
 | 
			
		||||
  GridCartesian         * sFGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
@@ -276,93 +274,89 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  RealD mass=0.1;
 | 
			
		||||
  RealD M5  =1.8;
 | 
			
		||||
 | 
			
		||||
    typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
 | 
			
		||||
    LatticeFermion ssrc(sFGrid);
 | 
			
		||||
    LatticeFermion sref(sFGrid);
 | 
			
		||||
    LatticeFermion sresult(sFGrid);
 | 
			
		||||
    WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5);
 | 
			
		||||
  typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
 | 
			
		||||
  LatticeFermion ssrc(sFGrid);
 | 
			
		||||
  LatticeFermion sref(sFGrid);
 | 
			
		||||
  LatticeFermion sresult(sFGrid);
 | 
			
		||||
  WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
 | 
			
		||||
  
 | 
			
		||||
    for(int x=0;x<latt4[0];x++){
 | 
			
		||||
    for(int y=0;y<latt4[1];y++){
 | 
			
		||||
    for(int z=0;z<latt4[2];z++){
 | 
			
		||||
    for(int t=0;t<latt4[3];t++){
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      std::vector<int> site({s,x,y,z,t});
 | 
			
		||||
      SpinColourVector tmp;
 | 
			
		||||
      peekSite(tmp,src,site);
 | 
			
		||||
      pokeSite(tmp,ssrc,site);
 | 
			
		||||
    }}}}}
 | 
			
		||||
  for(int x=0;x<latt4[0];x++){
 | 
			
		||||
  for(int y=0;y<latt4[1];y++){
 | 
			
		||||
  for(int z=0;z<latt4[2];z++){
 | 
			
		||||
  for(int t=0;t<latt4[3];t++){
 | 
			
		||||
  for(int s=0;s<Ls;s++){
 | 
			
		||||
    std::vector<int> site({s,x,y,z,t});
 | 
			
		||||
    SpinColourVector tmp;
 | 
			
		||||
    peekSite(tmp,src,site);
 | 
			
		||||
    pokeSite(tmp,ssrc,site);
 | 
			
		||||
  }}}}}
 | 
			
		||||
 | 
			
		||||
    double t0=usecond();
 | 
			
		||||
    sDw.Dhop(ssrc,sresult,0);
 | 
			
		||||
    double t1=usecond();
 | 
			
		||||
  double t0=usecond();
 | 
			
		||||
  sDw.Dhop(ssrc,sresult,0);
 | 
			
		||||
  double t1=usecond();
 | 
			
		||||
 | 
			
		||||
#ifdef TIMERS_OFF
 | 
			
		||||
    int ncall =10;
 | 
			
		||||
  int ncall =10;
 | 
			
		||||
#else 
 | 
			
		||||
    int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
 | 
			
		||||
  int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    PerformanceCounter Counter(8);
 | 
			
		||||
    Counter.Start();
 | 
			
		||||
    t0=usecond();
 | 
			
		||||
    for(int i=0;i<ncall;i++){
 | 
			
		||||
      sDw.Dhop(ssrc,sresult,0);
 | 
			
		||||
    }
 | 
			
		||||
    t1=usecond();
 | 
			
		||||
    Counter.Stop();
 | 
			
		||||
  PerformanceCounter Counter(8);
 | 
			
		||||
  Counter.Start();
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
  for(int i=0;i<ncall;i++){
 | 
			
		||||
    sDw.Dhop(ssrc,sresult,0);
 | 
			
		||||
  }
 | 
			
		||||
  t1=usecond();
 | 
			
		||||
  Counter.Stop();
 | 
			
		||||
  
 | 
			
		||||
  if ( report ) {
 | 
			
		||||
    Counter.Report();
 | 
			
		||||
  } else { 
 | 
			
		||||
    double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
    double flops=1344*volume*ncall;
 | 
			
		||||
    std::cout<<"\t"<< flops/(t1-t0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
    if ( report ) {
 | 
			
		||||
      Counter.Report();
 | 
			
		||||
    } else { 
 | 
			
		||||
 | 
			
		||||
      double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
      double flops=1344*volume*ncall;
 | 
			
		||||
      std::cout<<"\t"<< flops/(t1-t0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    LatticeFermion sr_eo(sFGrid);
 | 
			
		||||
    LatticeFermion serr(sFGrid);
 | 
			
		||||
    
 | 
			
		||||
    LatticeFermion ssrc_e (sFrbGrid);
 | 
			
		||||
    LatticeFermion ssrc_o (sFrbGrid);
 | 
			
		||||
    LatticeFermion sr_e   (sFrbGrid);
 | 
			
		||||
    LatticeFermion sr_o   (sFrbGrid);
 | 
			
		||||
  LatticeFermion sr_eo(sFGrid);
 | 
			
		||||
  LatticeFermion serr(sFGrid);
 | 
			
		||||
  
 | 
			
		||||
  LatticeFermion ssrc_e (sFrbGrid);
 | 
			
		||||
  LatticeFermion ssrc_o (sFrbGrid);
 | 
			
		||||
  LatticeFermion sr_e   (sFrbGrid);
 | 
			
		||||
  LatticeFermion sr_o   (sFrbGrid);
 | 
			
		||||
      
 | 
			
		||||
    pickCheckerboard(Even,ssrc_e,ssrc);
 | 
			
		||||
    pickCheckerboard(Odd,ssrc_o,ssrc);
 | 
			
		||||
 | 
			
		||||
    setCheckerboard(sr_eo,ssrc_o);
 | 
			
		||||
    setCheckerboard(sr_eo,ssrc_e);
 | 
			
		||||
    
 | 
			
		||||
    sr_e = zero;
 | 
			
		||||
    sr_o = zero;
 | 
			
		||||
  pickCheckerboard(Even,ssrc_e,ssrc);
 | 
			
		||||
  pickCheckerboard(Odd,ssrc_o,ssrc);
 | 
			
		||||
  
 | 
			
		||||
  setCheckerboard(sr_eo,ssrc_o);
 | 
			
		||||
  setCheckerboard(sr_eo,ssrc_e);
 | 
			
		||||
    
 | 
			
		||||
  sr_e = zero;
 | 
			
		||||
  sr_o = zero;
 | 
			
		||||
  
 | 
			
		||||
  sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
 | 
			
		||||
  PerformanceCounter CounterSdw(8);
 | 
			
		||||
  CounterSdw.Start();
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
  for(int i=0;i<ncall;i++){
 | 
			
		||||
    __SSC_START;
 | 
			
		||||
    sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
 | 
			
		||||
    PerformanceCounter CounterSdw(8);
 | 
			
		||||
    CounterSdw.Start();
 | 
			
		||||
    t0=usecond();
 | 
			
		||||
    for(int i=0;i<ncall;i++){
 | 
			
		||||
      __SSC_START;
 | 
			
		||||
      sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
 | 
			
		||||
      __SSC_STOP;
 | 
			
		||||
    }
 | 
			
		||||
    t1=usecond();
 | 
			
		||||
    CounterSdw.Stop();
 | 
			
		||||
    __SSC_STOP;
 | 
			
		||||
  }
 | 
			
		||||
  t1=usecond();
 | 
			
		||||
  CounterSdw.Stop();
 | 
			
		||||
 | 
			
		||||
    if ( report ) { 
 | 
			
		||||
      CounterSdw.Report();
 | 
			
		||||
    } else {
 | 
			
		||||
 | 
			
		||||
      double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
      double flops=(1344.0*volume*ncall)/2;
 | 
			
		||||
      std::cout<<"\t"<< flops/(t1-t0);
 | 
			
		||||
    }
 | 
			
		||||
  if ( report ) { 
 | 
			
		||||
    CounterSdw.Report();
 | 
			
		||||
  } else {
 | 
			
		||||
    double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
    double flops=(1344.0*volume*ncall)/2;
 | 
			
		||||
    std::cout<<"\t"<< flops/(t1-t0);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user