diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 7ad4a147..1bb77aff 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -30,7 +30,6 @@ Author: paboyle using namespace Grid; - std::vector L_list; std::vector Ls_list; std::vector mflop_list; @@ -76,7 +75,6 @@ struct controls { int Opt; int CommsOverlap; Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch; - // int HugePages; }; class Benchmark { @@ -119,14 +117,15 @@ public: std::cout<({45,12,81,9})); - for(int lat=8;lat<=lmax;lat+=4){ + for(int lat=8;lat<=lmax;lat+=8){ Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + GridCartesian Grid(latt_size,simd_layout,mpi_layout); // NP= Grid.RankCount(); @@ -270,191 +265,8 @@ public: } }; -#if 0 - static double DWF5(int Ls,int L) - { - // RealD mass=0.1; - RealD M5 =1.8; - double mflops; - double mflops_best = 0; - double mflops_worst= 0; - std::vector mflops_all; - - /////////////////////////////////////////////////////// - // Set/Get the layout & grid size - /////////////////////////////////////////////////////// - int threads = GridThread::GetThreads(); - Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4); - Coordinate local({L,L,L,L}); - - GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(Coordinate({64,64,64,64}), - GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); - uint64_t NP = TmpGrid->RankCount(); - uint64_t NN = TmpGrid->NodeCount(); - NN_global=NN; - uint64_t SHM=NP/NN; - - Coordinate internal; - if ( SHM == 1 ) internal = Coordinate({1,1,1,1}); - else if ( SHM == 2 ) internal = Coordinate({2,1,1,1}); - else if ( SHM == 4 ) internal = Coordinate({2,2,1,1}); - else if ( SHM == 8 ) internal = Coordinate({2,2,2,1}); - else assert(0); - - Coordinate nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); - Coordinate latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); - - ///////// Welcome message //////////// - std::cout< seeds4({1,2,3,4}); - std::vector seeds5({5,6,7,8}); - GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - GridParallelRNG RNG5(sFGrid); RNG5.SeedFixedIntegers(seeds5); - std::cout << GridLogMessage << "Initialised RNGs" << std::endl; - - ///////// Source preparation //////////// - LatticeFermion src (sFGrid); - LatticeFermion tmp (sFGrid); - std::cout << GridLogMessage << "allocated src and tmp" << std::endl; - random(RNG5,src); - std::cout << GridLogMessage << "intialised random source" << std::endl; - - RealD N2 = 1.0/::sqrt(norm2(src)); - src = src*N2; - - LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); - - WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); - LatticeFermion src_e (sFrbGrid); - LatticeFermion src_o (sFrbGrid); - LatticeFermion r_e (sFrbGrid); - LatticeFermion r_o (sFrbGrid); - LatticeFermion r_eo (sFGrid); - LatticeFermion err (sFGrid); - { - - pickCheckerboard(Even,src_e,src); - pickCheckerboard(Odd,src_o,src); - -#if defined(AVX512) - const int num_cases = 6; - std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); -#else - const int num_cases = 4; - std::string fmt("U/S ; U/O ; G/S ; G/O "); -#endif - controls Cases [] = { -#ifdef AVX512 - { WilsonKernelsStatic::OptInlineAsm , WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, - { WilsonKernelsStatic::OptInlineAsm , WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, -#endif - { WilsonKernelsStatic::OptHandUnroll, WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, - { WilsonKernelsStatic::OptHandUnroll, WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, - { WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, - { WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential } - }; - - for(int c=0;cBarrier(); - for(int i=0;iBarrier(); - double t1=usecond(); - - sDw.ZeroCounters(); - time_statistics timestat; - std::vector t_time(ncall); - for(uint64_t i=0;iBarrier(); - - double volume=Ls; for(int mu=0;mumflops_best ) mflops_best = mflops; - if ( mflopsRankCount(); uint64_t NN = TmpGrid->NodeCount(); NN_global=NN; uint64_t SHM=NP/NN; - Coordinate internal; - if ( SHM == 1 ) internal = Coordinate({1,1,1,1}); - else if ( SHM == 2 ) internal = Coordinate({2,1,1,1}); - else if ( SHM == 4 ) internal = Coordinate({2,2,1,1}); - else if ( SHM == 8 ) internal = Coordinate({2,2,2,1}); - else assert(0); - - Coordinate nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); - Coordinate latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); + Coordinate latt4({local[0]*mpi[0],local[1]*mpi[1],local[2]*mpi[2],local[3]*mpi[3]}); ///////// Welcome message //////////// std::cout< U(4,FGrid); - auto Umu_v = Umu.View(); - auto Umu5d_v = Umu5d.View(); - for(int ss=0;ssoSites();ss++){ - for(int s=0;s(Umu5d,mu); - } - for(int mu=0;muBarrier(); for(int i=0;iBarrier(); double t1=usecond(); - // uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); - // if (ncall < 500) ncall = 500; - uint64_t ncall = 1000; + uint64_t ncall = 50; FGrid->Broadcast(0,&ncall,sizeof(ncall)); @@ -649,24 +406,11 @@ public: std::cout< seeds4({1,2,3,4}); + GridParallelRNG RNG4(FGrid); RNG4.SeedFixedIntegers(seeds4); + std::cout << GridLogMessage << "Initialised RNGs" << std::endl; + + RealD mass=0.1; + RealD c1=9.0/8.0; + RealD c2=-1.0/24.0; + RealD u0=1.0; + + typedef ImprovedStaggeredFermionF Action; + typedef typename Action::FermionField Fermion; + typedef LatticeGaugeFieldF Gauge; + + Gauge Umu(FGrid); SU3::HotConfiguration(RNG4,Umu); + + typename Action::ImplParams params; + Action Ds(Umu,Umu,*FGrid,*FrbGrid,mass,c1,c2,u0,params); + + ///////// Source preparation //////////// + Fermion src (FGrid); random(RNG4,src); + Fermion src_e (FrbGrid); + Fermion src_o (FrbGrid); + Fermion r_e (FrbGrid); + Fermion r_o (FrbGrid); + Fermion r_eo (FGrid); + + { + + pickCheckerboard(Even,src_e,src); + pickCheckerboard(Odd,src_o,src); + + const int num_cases = 4; + std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S "); + + controls Cases [] = { + { StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicyConcurrent }, + { StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicyConcurrent }, + { StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, + { StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential } + }; + + for(int c=0;cBarrier(); + for(int i=0;iBarrier(); + double t1=usecond(); + uint64_t ncall = 500; + + FGrid->Broadcast(0,&ncall,sizeof(ncall)); + + // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"< t_time(ncall); + for(uint64_t i=0;iBarrier(); + + double volume=1; for(int mu=0;mumflops_best ) mflops_best = mflops; + if ( mflops L_list({16,24,32}); int selm1=sel-1; - std::vector robust_list; std::vector wilson; std::vector dwf4; - std::vector dwf5; + std::vector staggered; - if ( do_wilson ) { - int Ls=1; - std::cout<