diff --git a/Grid/Benchmark_Grid.cpp b/Grid/Benchmark_Grid.cpp
index 3e0dd4d..841f26e 100644
--- a/Grid/Benchmark_Grid.cpp
+++ b/Grid/Benchmark_Grid.cpp
@@ -18,6 +18,7 @@ You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
+#include "Common.hpp"
#include
using namespace Grid;
@@ -119,17 +120,13 @@ class Benchmark
<< "bits ; "
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd()))
<< std::endl;
- std::cout << GridLogMessage
- << "======================================================================="
- "==========="
- << std::endl;
}
static void Comms(void)
{
int Nloop = 200;
int nmu = 0;
- int maxlat = 32;
+ int maxlat = 48;
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
Coordinate mpi_layout = GridDefaultMpi();
@@ -151,7 +148,8 @@ class Benchmark
<< "======================================================================="
"============================="
<< std::endl;
- comms_header();
+ grid_printf("%5s %5s %15s %15s %15s %15s %15s\n", "L", "dir", "payload (B)",
+ "time (usec)", "rate (GB/s)", "std dev", "max");
for (int lat = 16; lat <= maxlat; lat += 8)
{
@@ -172,8 +170,6 @@ class Benchmark
{
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
- // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
- // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
double dbytes;
@@ -213,17 +209,12 @@ class Benchmark
timestat.statistics(t_time);
dbytes = dbytes * ppn;
- double xbytes = dbytes * 0.5;
- double bidibytes = dbytes;
-
- std::cout << GridLogMessage << lat << "\t" << Ls << "\t " << bytes << " \t "
- << xbytes / timestat.mean << " \t "
- << xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t "
- << xbytes / timestat.max << " " << xbytes / timestat.min << "\t\t"
- << bidibytes / timestat.mean << " "
- << bidibytes * timestat.err / (timestat.mean * timestat.mean) << " "
- << bidibytes / timestat.max << " " << bidibytes / timestat.min
- << std::endl;
+ double bidibytes = 2. * dbytes;
+ double rate = bidibytes / (timestat.mean / 1.e6) / 1024. / 1024. / 1024.;
+ double rate_err = rate * timestat.err / timestat.mean;
+ double rate_max = rate * timestat.mean / timestat.min;
+ grid_printf("%5d %5d %15d %15.2f %15.2f %15.1f %15.2f\n", lat, dir, bytes,
+ timestat.mean, rate, rate_err, rate_max);
}
}
for (int d = 0; d < 8; d++)
@@ -526,7 +517,7 @@ class Benchmark
FGrid->Broadcast(0, &ncall, sizeof(ncall));
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per
- //second"<Broadcast(0, &ncall, sizeof(ncall));
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per
- //second"< L_list({8, 12, 16, 24, 32});
@@ -841,71 +834,84 @@ int main(int argc, char **argv)
std::vector dwf4;
std::vector staggered;
- int Ls = 1;
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
- std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
- for (int l = 0; l < L_list.size(); l++)
+ if (do_flops)
{
- wilson.push_back(Benchmark::DWF(Ls, L_list[l]));
- }
+ Ls = 1;
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
+ std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
+ for (int l = 0; l < L_list.size(); l++)
+ {
+ wilson.push_back(Benchmark::DWF(Ls, L_list[l]));
+ }
- Ls = 12;
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
- std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
- for (int l = 0; l < L_list.size(); l++)
- {
- double result = Benchmark::DWF(Ls, L_list[l]);
- dwf4.push_back(result);
- }
+ Ls = 12;
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
+ std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
+ for (int l = 0; l < L_list.size(); l++)
+ {
+ double result = Benchmark::DWF(Ls, L_list[l]);
+ dwf4.push_back(result);
+ }
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
- std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" << std::endl;
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
- for (int l = 0; l < L_list.size(); l++)
- {
- double result = Benchmark::Staggered(L_list[l]);
- staggered.push_back(result);
- }
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
+ std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised"
+ << std::endl;
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
+ for (int l = 0; l < L_list.size(); l++)
+ {
+ double result = Benchmark::Staggered(L_list[l]);
+ staggered.push_back(result);
+ }
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
- std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl;
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
- std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl;
- for (int l = 0; l < L_list.size(); l++)
- {
- std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t "
- << dwf4[l] << " \t\t " << staggered[l] << std::endl;
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
+ std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl;
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
+ std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl;
+ for (int l = 0; l < L_list.size(); l++)
+ {
+ std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t "
+ << dwf4[l] << " \t\t " << staggered[l] << std::endl;
+ }
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
}
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
int NN = NN_global;
if (do_memory)
@@ -950,40 +956,48 @@ int main(int argc, char **argv)
Benchmark::Comms();
}
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
- std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl;
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
- std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl;
- for (int l = 0; l < L_list.size(); l++)
+ if (do_flops)
{
- std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t "
- << dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl;
- }
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
+ std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl;
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
+ std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl;
+ for (int l = 0; l < L_list.size(); l++)
+ {
+ std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t "
+ << dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl;
+ }
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
- std::cout << GridLogMessage
- << " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN
- << " Mflop/s per node" << std::endl;
- std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+"
- << dwf4[selm1] / NN << ") " << std::endl;
- std::cout << std::setprecision(3);
- std::cout << GridLogMessage
- << "========================================================================="
- "========="
- << std::endl;
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
+ std::cout << GridLogMessage
+ << " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN
+ << " Mflop/s per node" << std::endl;
+ std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+"
+ << dwf4[selm1] / NN << ") " << std::endl;
+ std::cout << std::setprecision(3);
+ std::cout
+ << GridLogMessage
+ << "========================================================================="
+ "========="
+ << std::endl;
+ }
Grid_finalize();
}
diff --git a/Grid/Common.hpp b/Grid/Common.hpp
new file mode 100644
index 0000000..ccae02c
--- /dev/null
+++ b/Grid/Common.hpp
@@ -0,0 +1,36 @@
+/*
+Copyright © 2022 Antonin Portelli
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see .
+*/
+
+#ifndef Grid_Benchmarks_Common_hpp_
+#define Grid_Benchmarks_Common_hpp_
+
+#ifndef GRID_MSG
+#define GRID_MSG std::cout << GridLogMessage
+#endif
+
+#ifndef GRID_MSG_MAXSIZE
+#define GRID_MSG_MAXSIZE 1024
+#endif
+
+#define grid_printf(...) \
+ { \
+ char _buf[GRID_MSG_MAXSIZE]; \
+ snprintf(_buf, GRID_MSG_MAXSIZE, __VA_ARGS__); \
+ GRID_MSG << _buf; \
+ }
+
+#endif // Grid_Benchmarks_Common_hpp_