Grid benchmark big formatting cleanup

This commit is contained in:
Antonin Portelli 2023-01-27 18:49:53 +00:00
parent 8f043343fb
commit 78c464d1d8
2 changed files with 47 additions and 184 deletions

View File

@ -84,16 +84,10 @@ class Benchmark
{ {
int threads = GridThread::GetThreads(); int threads = GridThread::GetThreads();
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
std::cout << GridLogMessage << "= Grid is setup to use " << threads << " threads" std::cout << GridLogMessage << "= Grid is setup to use " << threads << " threads"
<< std::endl; << std::endl;
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
std::cout << GridLogMessage << "Grid Default Decomposition patterns\n"; std::cout << GridLogMessage << "Grid Default Decomposition patterns\n";
std::cout << GridLogMessage << "\tOpenMP threads : " << GridThread::GetThreads() std::cout << GridLogMessage << "\tOpenMP threads : " << GridThread::GetThreads()
<< std::endl; << std::endl;
@ -141,16 +135,10 @@ class Benchmark
std::vector<double> t_time(Nloop); std::vector<double> t_time(Nloop);
time_statistics timestat; time_statistics timestat;
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"============================="
<< std::endl;
std::cout << GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in " std::cout << GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "
<< nmu << " dimensions" << std::endl; << nmu << " dimensions" << std::endl;
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"============================="
<< std::endl;
grid_printf("%5s %5s %15s %15s %15s %15s %15s\n", "L", "dir", "payload (B)", grid_printf("%5s %5s %15s %15s %15s %15s %15s\n", "L", "dir", "payload (B)",
"time (usec)", "rate (GB/s)", "std dev", "max"); "time (usec)", "rate (GB/s)", "std dev", "max");
@ -245,15 +233,9 @@ class Benchmark
Coordinate simd_layout = GridDefaultSimd(Nd, vReal::Nsimd()); Coordinate simd_layout = GridDefaultSimd(Nd, vReal::Nsimd());
Coordinate mpi_layout = GridDefaultMpi(); Coordinate mpi_layout = GridDefaultMpi();
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
std::cout << GridLogMessage << "= Benchmarking a*x + y bandwidth" << std::endl; std::cout << GridLogMessage << "= Benchmarking a*x + y bandwidth" << std::endl;
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
std::cout << GridLogMessage << " L " std::cout << GridLogMessage << " L "
<< "\t\t" << "\t\t"
<< "bytes" << "bytes"
@ -263,9 +245,6 @@ class Benchmark
<< "Gflop/s" << "Gflop/s"
<< "\t\t seconds" << "\t\t seconds"
<< "\t\tGB/s / node" << std::endl; << "\t\tGB/s / node" << std::endl;
std::cout << GridLogMessage
<< "----------------------------------------------------------"
<< std::endl;
// uint64_t NP; // uint64_t NP;
uint64_t NN; uint64_t NN;
@ -332,15 +311,9 @@ class Benchmark
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexF::Nsimd()); Coordinate simd_layout = GridDefaultSimd(Nd, vComplexF::Nsimd());
Coordinate mpi_layout = GridDefaultMpi(); Coordinate mpi_layout = GridDefaultMpi();
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
std::cout << GridLogMessage << "= Benchmarking z = y*x SU(4) bandwidth" << std::endl; std::cout << GridLogMessage << "= Benchmarking z = y*x SU(4) bandwidth" << std::endl;
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
std::cout << GridLogMessage << " L " std::cout << GridLogMessage << " L "
<< "\t\t" << "\t\t"
<< "bytes" << "bytes"
@ -350,9 +323,6 @@ class Benchmark
<< "Gflop/s" << "Gflop/s"
<< "\t\t seconds" << "\t\t seconds"
<< "\t\tGB/s / node" << std::endl; << "\t\tGB/s / node" << std::endl;
std::cout << GridLogMessage
<< "----------------------------------------------------------"
<< std::endl;
uint64_t NN; uint64_t NN;
@ -433,10 +403,7 @@ class Benchmark
uint64_t SHM = NP / NN; uint64_t SHM = NP / NN;
///////// Welcome message //////////// ///////// Welcome message ////////////
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
std::cout << GridLogMessage << "Benchmark DWF on " << L << "^4 local volume " std::cout << GridLogMessage << "Benchmark DWF on " << L << "^4 local volume "
<< std::endl; << std::endl;
std::cout << GridLogMessage << "* Nc : " << Nc << std::endl; std::cout << GridLogMessage << "* Nc : " << Nc << std::endl;
@ -449,10 +416,7 @@ class Benchmark
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi)
<< std::endl; << std::endl;
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl; std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
///////// Lattice Init //////////// ///////// Lattice Init ////////////
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid( GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(
@ -511,10 +475,7 @@ class Benchmark
WilsonKernelsStatic::Opt = Cases[c].Opt; WilsonKernelsStatic::Opt = Cases[c].Opt;
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
std::cout << GridLogMessage grid_small_sep();
<< "==================================================================="
"==============="
<< std::endl;
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric) if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric)
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl; std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl;
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute) if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute)
@ -522,10 +483,7 @@ class Benchmark
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute)
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl; std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
std::cout << GridLogMessage << "* SINGLE precision " << std::endl; std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
std::cout << GridLogMessage grid_small_sep();
<< "==================================================================="
"==============="
<< std::endl;
int nwarm = 10; int nwarm = 10;
double t0 = usecond(); double t0 = usecond();
@ -598,10 +556,7 @@ class Benchmark
<< "Deo mflop/s per node " << mflops / NN << std::endl; << "Deo mflop/s per node " << mflops / NN << std::endl;
} }
std::cout << GridLogMessage grid_small_sep();
<< "====================================================================="
"============="
<< std::endl;
std::cout << GridLogMessage << L << "^4 x " << Ls std::cout << GridLogMessage << L << "^4 x " << Ls
<< " Deo Best mflop/s = " << mflops_best << " ; " << " Deo Best mflop/s = " << mflops_best << " ; "
<< mflops_best / NN << " per node " << std::endl; << mflops_best / NN << " per node " << std::endl;
@ -616,10 +571,6 @@ class Benchmark
std::cout << mflops_all[i] / NN << " ; "; std::cout << mflops_all[i] / NN << " ; ";
} }
std::cout << std::endl; std::cout << std::endl;
std::cout << GridLogMessage
<< "====================================================================="
"============="
<< std::endl;
} }
return mflops_best; return mflops_best;
} }
@ -649,10 +600,7 @@ class Benchmark
uint64_t SHM = NP / NN; uint64_t SHM = NP / NN;
///////// Welcome message //////////// ///////// Welcome message ////////////
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
std::cout << GridLogMessage << "Benchmark ImprovedStaggered on " << L std::cout << GridLogMessage << "Benchmark ImprovedStaggered on " << L
<< "^4 local volume " << std::endl; << "^4 local volume " << std::endl;
std::cout << GridLogMessage std::cout << GridLogMessage
@ -663,10 +611,7 @@ class Benchmark
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi)
<< std::endl; << std::endl;
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl; std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
///////// Lattice Init //////////// ///////// Lattice Init ////////////
GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid( GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(
@ -728,10 +673,7 @@ class Benchmark
StaggeredKernelsStatic::Opt = Cases[c].Opt; StaggeredKernelsStatic::Opt = Cases[c].Opt;
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
std::cout << GridLogMessage grid_small_sep();
<< "==================================================================="
"==============="
<< std::endl;
if (StaggeredKernelsStatic::Opt == StaggeredKernelsStatic::OptGeneric) if (StaggeredKernelsStatic::Opt == StaggeredKernelsStatic::OptGeneric)
std::cout << GridLogMessage << "* Using GENERIC Nc StaggeredKernels" std::cout << GridLogMessage << "* Using GENERIC Nc StaggeredKernels"
<< std::endl; << std::endl;
@ -740,10 +682,7 @@ class Benchmark
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsThenCompute) if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsThenCompute)
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl; std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
std::cout << GridLogMessage << "* SINGLE precision " << std::endl; std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
std::cout << GridLogMessage grid_small_sep();
<< "==================================================================="
"==============="
<< std::endl;
int nwarm = 10; int nwarm = 10;
double t0 = usecond(); double t0 = usecond();
@ -757,9 +696,6 @@ class Benchmark
uint64_t ncall = 500; uint64_t ncall = 500;
FGrid->Broadcast(0, &ncall, sizeof(ncall)); FGrid->Broadcast(0, &ncall, sizeof(ncall));
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per
// second"<<std::endl;
Ds.ZeroCounters(); Ds.ZeroCounters();
time_statistics timestat; time_statistics timestat;
@ -804,10 +740,7 @@ class Benchmark
<< "Deo mflop/s per node " << mflops / NN << std::endl; << "Deo mflop/s per node " << mflops / NN << std::endl;
} }
std::cout << GridLogMessage grid_small_sep();
<< "====================================================================="
"============="
<< std::endl;
std::cout << GridLogMessage << L std::cout << GridLogMessage << L
<< "^4 Deo Best mflop/s = " << mflops_best << " ; " << "^4 Deo Best mflop/s = " << mflops_best << " ; "
<< mflops_best / NN << " per node " << std::endl; << mflops_best / NN << " per node " << std::endl;
@ -823,10 +756,6 @@ class Benchmark
} }
std::cout << std::endl; std::cout << std::endl;
} }
std::cout << GridLogMessage
<< "======================================================================="
"==========="
<< std::endl;
return mflops_best; return mflops_best;
} }
}; };
@ -868,69 +797,34 @@ int main(int argc, char **argv)
if (do_flops) if (do_flops)
{ {
Ls = 1; Ls = 1;
std::cout grid_big_sep();
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl; std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
std::cout
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
for (int l = 0; l < L_list.size(); l++) for (int l = 0; l < L_list.size(); l++)
{ {
wilson.push_back(Benchmark::DWF(Ls, L_list[l])); wilson.push_back(Benchmark::DWF(Ls, L_list[l]));
} }
Ls = 12; Ls = 12;
std::cout grid_big_sep();
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl; std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
std::cout
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
for (int l = 0; l < L_list.size(); l++) for (int l = 0; l < L_list.size(); l++)
{ {
double result = Benchmark::DWF(Ls, L_list[l]); double result = Benchmark::DWF(Ls, L_list[l]);
dwf4.push_back(result); dwf4.push_back(result);
} }
std::cout grid_big_sep();
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised"
<< std::endl; << std::endl;
std::cout
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
for (int l = 0; l < L_list.size(); l++) for (int l = 0; l < L_list.size(); l++)
{ {
double result = Benchmark::Staggered(L_list[l]); double result = Benchmark::Staggered(L_list[l]);
staggered.push_back(result); staggered.push_back(result);
} }
std::cout grid_big_sep();
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl; std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl;
std::cout grid_big_sep();
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl; std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl;
for (int l = 0; l < L_list.size(); l++) for (int l = 0; l < L_list.size(); l++)
{ {
@ -943,97 +837,52 @@ int main(int argc, char **argv)
tmp["mflops_staggered"] = staggered[l]; tmp["mflops_staggered"] = staggered[l];
json_results["flops"].push_back(tmp); json_results["flops"].push_back(tmp);
} }
std::cout
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
} }
int NN = NN_global; int NN = NN_global;
if (do_memory) if (do_memory)
{ {
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
std::cout << GridLogMessage << " Memory benchmark " << std::endl; std::cout << GridLogMessage << " Memory benchmark " << std::endl;
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
Benchmark::Memory(); Benchmark::Memory();
} }
if (do_su4) if (do_su4)
{ {
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
std::cout << GridLogMessage << " SU(4) benchmark " << std::endl; std::cout << GridLogMessage << " SU(4) benchmark " << std::endl;
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
Benchmark::SU4(); Benchmark::SU4();
} }
if (do_comms) if (do_comms)
{ {
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
std::cout << GridLogMessage << " Communications benchmark " << std::endl; std::cout << GridLogMessage << " Communications benchmark " << std::endl;
std::cout << GridLogMessage grid_big_sep();
<< "======================================================================="
"==========="
<< std::endl;
Benchmark::Comms(); Benchmark::Comms();
} }
if (do_flops) if (do_flops)
{ {
std::cout grid_big_sep();
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl; std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl;
std::cout grid_big_sep();
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl; std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl;
for (int l = 0; l < L_list.size(); l++) for (int l = 0; l < L_list.size(); l++)
{ {
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t " std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t "
<< dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl; << dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl;
} }
std::cout grid_big_sep();
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
std::cout
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
std::cout << GridLogMessage std::cout << GridLogMessage
<< " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN << " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN
<< " Mflop/s per node" << std::endl; << " Mflop/s per node" << std::endl;
std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+" std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+"
<< dwf4[selm1] / NN << ") " << std::endl; << dwf4[selm1] / NN << ") " << std::endl;
std::cout << std::setprecision(3); std::cout << std::setprecision(3);
std::cout grid_big_sep();
<< GridLogMessage
<< "========================================================================="
"========="
<< std::endl;
} }
if (!json_filename.empty()) if (!json_filename.empty())

View File

@ -26,6 +26,20 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#define GRID_MSG_MAXSIZE 1024 #define GRID_MSG_MAXSIZE 1024
#endif #endif
#define GRID_BIG_SEP \
"==============================================================================="
#define GRID_SMALL_SEP "------------------------------------------"
#define grid_big_sep() \
{ \
GRID_MSG << GRID_BIG_SEP << std::endl; \
}
#define grid_small_sep() \
{ \
GRID_MSG << GRID_SMALL_SEP << std::endl; \
}
#define grid_printf(...) \ #define grid_printf(...) \
{ \ { \
char _buf[GRID_MSG_MAXSIZE]; \ char _buf[GRID_MSG_MAXSIZE]; \