diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 73621bbe..1d9de772 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -48,7 +48,6 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); - std::cout< latt4 = GridDefaultLatt(); int Ls=16; @@ -57,6 +56,10 @@ int main (int argc, char ** argv) std::stringstream ss(argv[i+1]); ss >> Ls; } + GridLogLayout(); + + long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc); + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); @@ -187,7 +190,7 @@ int main (int argc, char ** argv) FGrid->Barrier(); double volume=Ls; for(int mu=0;muBarrier(); double volume=Ls; for(int mu=0;muBarrier(); double volume=Ls; for(int mu=0;mu & latt4, int Ls, int threads,int report ) GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc); std::vector seeds4({1,2,3,4}); std::vector seeds5({5,6,7,8}); @@ -196,7 +198,7 @@ void benchDw(std::vector & latt4, int Ls, int threads,int report ) if ( ! report ) { double volume=Ls; for(int mu=0;mu & latt4, int Ls, int threads,int report ) if(!report){ double volume=Ls; for(int mu=0;mu & latt4, int Ls, int threads,int report ) #define CHECK_SDW void benchsDw(std::vector & latt4, int Ls, int threads, int report ) { + long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc); GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); @@ -321,7 +324,7 @@ void benchsDw(std::vector & latt4, int Ls, int threads, int report ) Counter.Report(); } else { double volume=Ls; for(int mu=0;mu & latt4, int Ls, int threads, int report ) CounterSdw.Report(); } else { double volume=Ls; for(int mu=0;muBarrier(); double volume=Ls; for(int mu=0;muBarrier(); double volume=Ls; for(int mu=0;mu latt_size = GridDefaultLatt(); std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); @@ -61,10 +64,15 @@ int main (int argc, char ** argv) GridRedBlackCartesian RBGrid(&Grid); int threads = GridThread::GetThreads(); - std::cout< seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); @@ -134,9 +142,10 @@ int main (int argc, char ** argv) Dw.Dhop(src,result,0); } double t1=usecond(); - double flops=1344*volume*ncall; + double flops=single_site_flops*volume*ncall; std::cout<()); WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params); - + + // Full operator + bench_wilson(src,result,Dw,volume,DaggerNo); + bench_wilson(src,result,Dw,volume,DaggerYes); + std::cout << "\t"; + // EO bench_wilson(src,result,Dw,volume,DaggerNo); bench_wilson(src,result,Dw,volume,DaggerYes); std::cout << std::endl; @@ -122,9 +132,26 @@ void bench_wilson ( int const dag ) { int ncall = 1000; + long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc); double t0 = usecond(); for(int i=0; i