diff --git a/Grid/Benchmark_Grid.cpp b/Grid/Benchmark_Grid.cpp index 3e0dd4d..841f26e 100644 --- a/Grid/Benchmark_Grid.cpp +++ b/Grid/Benchmark_Grid.cpp @@ -18,6 +18,7 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . */ +#include "Common.hpp" #include using namespace Grid; @@ -119,17 +120,13 @@ class Benchmark << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd())) << std::endl; - std::cout << GridLogMessage - << "=======================================================================" - "===========" - << std::endl; } static void Comms(void) { int Nloop = 200; int nmu = 0; - int maxlat = 32; + int maxlat = 48; Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); @@ -151,7 +148,8 @@ class Benchmark << "=======================================================================" "=============================" << std::endl; - comms_header(); + grid_printf("%5s %5s %15s %15s %15s %15s %15s\n", "L", "dir", "payload (B)", + "time (usec)", "rate (GB/s)", "std dev", "max"); for (int lat = 16; lat <= maxlat; lat += 8) { @@ -172,8 +170,6 @@ class Benchmark { xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); - // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); } double dbytes; @@ -213,17 +209,12 @@ class Benchmark timestat.statistics(t_time); dbytes = dbytes * ppn; - double xbytes = dbytes * 0.5; - double bidibytes = dbytes; - - std::cout << GridLogMessage << lat << "\t" << Ls << "\t " << bytes << " \t " - << xbytes / timestat.mean << " \t " - << xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t " - << xbytes / timestat.max << " " << xbytes / timestat.min << "\t\t" - << bidibytes / timestat.mean << " " - << bidibytes * timestat.err / (timestat.mean * timestat.mean) << " " - << bidibytes / timestat.max << " " << bidibytes / timestat.min - << std::endl; + double bidibytes = 2. * dbytes; + double rate = bidibytes / (timestat.mean / 1.e6) / 1024. / 1024. / 1024.; + double rate_err = rate * timestat.err / timestat.mean; + double rate_max = rate * timestat.mean / timestat.min; + grid_printf("%5d %5d %15d %15.2f %15.2f %15.1f %15.2f\n", lat, dir, bytes, + timestat.mean, rate, rate_err, rate_max); } } for (int d = 0; d < 8; d++) @@ -526,7 +517,7 @@ class Benchmark FGrid->Broadcast(0, &ncall, sizeof(ncall)); // std::cout << GridLogMessage << " Estimate " << ncall << " calls per - //second"<Broadcast(0, &ncall, sizeof(ncall)); // std::cout << GridLogMessage << " Estimate " << ncall << " calls per - //second"< L_list({8, 12, 16, 24, 32}); @@ -841,71 +834,84 @@ int main(int argc, char **argv) std::vector dwf4; std::vector staggered; - int Ls = 1; - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; - std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl; - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; - for (int l = 0; l < L_list.size(); l++) + if (do_flops) { - wilson.push_back(Benchmark::DWF(Ls, L_list[l])); - } + Ls = 1; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + for (int l = 0; l < L_list.size(); l++) + { + wilson.push_back(Benchmark::DWF(Ls, L_list[l])); + } - Ls = 12; - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; - std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl; - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; - for (int l = 0; l < L_list.size(); l++) - { - double result = Benchmark::DWF(Ls, L_list[l]); - dwf4.push_back(result); - } + Ls = 12; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + for (int l = 0; l < L_list.size(); l++) + { + double result = Benchmark::DWF(Ls, L_list[l]); + dwf4.push_back(result); + } - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; - std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" << std::endl; - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; - for (int l = 0; l < L_list.size(); l++) - { - double result = Benchmark::Staggered(L_list[l]); - staggered.push_back(result); - } + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" + << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + for (int l = 0; l < L_list.size(); l++) + { + double result = Benchmark::Staggered(L_list[l]); + staggered.push_back(result); + } - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; - std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl; - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; - std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl; - for (int l = 0; l < L_list.size(); l++) - { - std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t " - << dwf4[l] << " \t\t " << staggered[l] << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl; + for (int l = 0; l < L_list.size(); l++) + { + std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t " + << dwf4[l] << " \t\t " << staggered[l] << std::endl; + } + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; } - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; int NN = NN_global; if (do_memory) @@ -950,40 +956,48 @@ int main(int argc, char **argv) Benchmark::Comms(); } - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; - std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl; - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; - std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl; - for (int l = 0; l < L_list.size(); l++) + if (do_flops) { - std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t " - << dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl; - } - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl; + for (int l = 0; l < L_list.size(); l++) + { + std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t " + << dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl; + } + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; - std::cout << GridLogMessage - << " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN - << " Mflop/s per node" << std::endl; - std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+" - << dwf4[selm1] / NN << ") " << std::endl; - std::cout << std::setprecision(3); - std::cout << GridLogMessage - << "=========================================================================" - "=========" - << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage + << " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN + << " Mflop/s per node" << std::endl; + std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+" + << dwf4[selm1] / NN << ") " << std::endl; + std::cout << std::setprecision(3); + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + } Grid_finalize(); } diff --git a/Grid/Common.hpp b/Grid/Common.hpp new file mode 100644 index 0000000..ccae02c --- /dev/null +++ b/Grid/Common.hpp @@ -0,0 +1,36 @@ +/* +Copyright © 2022 Antonin Portelli + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +#ifndef Grid_Benchmarks_Common_hpp_ +#define Grid_Benchmarks_Common_hpp_ + +#ifndef GRID_MSG +#define GRID_MSG std::cout << GridLogMessage +#endif + +#ifndef GRID_MSG_MAXSIZE +#define GRID_MSG_MAXSIZE 1024 +#endif + +#define grid_printf(...) \ + { \ + char _buf[GRID_MSG_MAXSIZE]; \ + snprintf(_buf, GRID_MSG_MAXSIZE, __VA_ARGS__); \ + GRID_MSG << _buf; \ + } + +#endif // Grid_Benchmarks_Common_hpp_