Merge pull request 'Add json output' (#1) from simon.buerger/lattice-benchmarks:main into main

Reviewed-on: portelli/lattice-benchmarks#1
2023-01-27 12:28:30 +00:00
parent a85f4cc169 086d58da32
commit f68930d6b5
3 changed files with 24751 additions and 3 deletions
--- a/Grid/Benchmark_Grid.cpp
+++ b/Grid/Benchmark_Grid.cpp
@@ -19,6 +19,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

 #include "Common.hpp"
+#include "json.hpp"
 #include <Grid/Grid.h>

 using namespace Grid;
@@ -32,6 +33,8 @@ double mflop_ref_err;

 int NN_global;

+nlohmann::json json_results;
+
 struct time_statistics
 {
  double mean;
@@ -215,6 +218,13 @@ class Benchmark
          double rate_max = rate * timestat.mean / timestat.min;
          grid_printf("%5d %5d %15d %15.2f %15.2f %15.1f %15.2f\n", lat, dir, bytes,
                      timestat.mean, rate, rate_err, rate_max);
+          nlohmann::json tmp;
+          tmp["L"] = lat;
+          tmp["dir"] = dir;
+          tmp["bytes"] = bytes;
+          tmp["time"] = timestat.mean;
+          tmp["GB_per_second"] = rate;
+          json_results["comms"].push_back(tmp);
        }
      }
      for (int d = 0; d < 8; d++)
@@ -304,6 +314,13 @@ class Benchmark
                << "   \t\t" << bytes / time << "\t\t" << flops / time << "\t\t"
                << (stop - start) / 1000. / 1000. << "\t\t" << bytes / time / NN
                << std::endl;
+
+      nlohmann::json tmp;
+      tmp["L"] = lat;
+      tmp["bytes"] = bytes;
+      tmp["gflops"] = flops / time;
+      tmp["GB_per_second"] = bytes / time;
+      json_results["axpy"].push_back(tmp);
    }
  };

@@ -378,6 +395,13 @@ class Benchmark
                << "   \t\t" << bytes / time << "\t\t" << flops / time << "\t\t"
                << (stop - start) / 1000. / 1000. << "\t\t" << bytes / time / NN
                << std::endl;
+
+      nlohmann::json tmp;
+      tmp["L"] = lat;
+      tmp["bytes"] = bytes;
+      tmp["GB_per_second"] = bytes / time;
+      tmp["gflops"] = flops / time;
+      json_results["SU4"].push_back(tmp);
    }
  };

@@ -811,6 +835,13 @@ int main(int argc, char **argv)
 {
  Grid_init(&argc, &argv);

+  std::string json_filename = ""; // empty indicates no json output
+  for (int i = 0; i < argc; i++)
+  {
+    if (std::string(argv[i]) == "--json-out")
+      json_filename = argv[i + 1];
+  }
+
  CartesianCommunicator::SetCommunicatorPolicy(
      CartesianCommunicator::CommunicatorPolicySequential);
 #ifdef KNL
@@ -820,10 +851,10 @@ int main(int argc, char **argv)
 #endif
  Benchmark::Decomposition();

-  int do_su4 = 0;
-  int do_memory = 0;
+  int do_su4 = 1;
+  int do_memory = 1;
  int do_comms = 1;
-  int do_flops = 0;
+  int do_flops = 1;
  int Ls = 1;

  int sel = 4;
@@ -905,6 +936,12 @@ int main(int argc, char **argv)
    {
      std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t "
                << dwf4[l] << " \t\t " << staggered[l] << std::endl;
+      nlohmann::json tmp;
+      tmp["L"] = L_list[l];
+      tmp["mflops_wilson"] = wilson[l];
+      tmp["mflops_dwf4"] = dwf4[l];
+      tmp["mflops_staggered"] = staggered[l];
+      json_results["flops"].push_back(tmp);
    }
    std::cout
        << GridLogMessage
@@ -999,5 +1036,19 @@ int main(int argc, char **argv)
        << std::endl;
  }

+  if (!json_filename.empty())
+  {
+    std::cout << GridLogMessage << "writing benchmark results to " << json_filename
+              << std::endl;
+
+    int me = 0;
+    MPI_Comm_rank(MPI_COMM_WORLD, &me);
+    if (me == 0)
+    {
+      std::ofstream json_file(json_filename);
+      json_file << std::setw(4) << json_results;
+    }
+  }
+
  Grid_finalize();
 }
--- a/Grid/Benchmark_dwf_fp32.cpp
+++ b/Grid/Benchmark_dwf_fp32.cpp
@@ -1,6 +1,7 @@
 /*
 Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk>
 Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
+Copyright © 2023 Simon Bürger <simon.buerger@rwth-aachen.de>

 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
@@ -16,6 +17,7 @@ You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

+#include "json.hpp"
 #include <Grid/Grid.h>
 #ifdef GRID_CUDA
 #define CUDA_PROFILE
@@ -44,26 +46,41 @@ int main(int argc, char **argv)

  Coordinate latt4 = GridDefaultLatt();
  int Ls = 16;
+  std::string json_filename = ""; // empty indicates no json output
+  nlohmann::json json;
+
+  // benchmark specific command line arguments
  for (int i = 0; i < argc; i++)
+  {
    if (std::string(argv[i]) == "-Ls")
    {
      std::stringstream ss(argv[i + 1]);
      ss >> Ls;
    }
+    if (std::string(argv[i]) == "--json-out")
+      json_filename = argv[i + 1];
+  }

  GridLogLayout();

  long unsigned int single_site_flops = 8 * Nc * (7 + 16 * Nc);

+  json["single_site_flops"] = single_site_flops;
+
  GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(
      GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
  GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+
  GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
  GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);

+  json["grid"] = FGrid->FullDimensions().toVector();
+  json["local_grid"] = FGrid->LocalDimensions().toVector();
+
  std::cout << GridLogMessage << "Making s innermost grids" << std::endl;
  GridCartesian *sUGrid =
      SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(), GridDefaultMpi());
+
  GridRedBlackCartesian *sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
  GridCartesian *sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls, UGrid);
  GridRedBlackCartesian *sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls, UGrid);
@@ -175,6 +192,9 @@ int main(int argc, char **argv)
  RealD NP = UGrid->_Nprocessors;
  RealD NN = UGrid->NodeCount();

+  json["ranks"] = NP;
+  json["nodes"] = NN;
+
  std::cout << GridLogMessage
            << "*****************************************************************"
            << std::endl;
@@ -193,6 +213,7 @@ int main(int argc, char **argv)
            << std::endl;
  std::cout << GridLogMessage << "* VComplexF size is " << sizeof(vComplexF) << " B"
            << std::endl;
+
  if (sizeof(RealF) == 4)
    std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
  if (sizeof(RealF) == 8)
@@ -208,6 +229,7 @@ int main(int argc, char **argv)
  if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
    std::cout << GridLogMessage << "* Using Nc=3       WilsonKernels" << std::endl;
  if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
+
    std::cout << GridLogMessage << "* Using Asm Nc=3   WilsonKernels" << std::endl;
  std::cout << GridLogMessage
            << "*****************************************************************"
@@ -249,6 +271,14 @@ int main(int argc, char **argv)
        (volume * (2 * Nd + 1) * Nd * Nc + (volume / Ls) * 2 * Nd * Nc * Nc) * simdwidth /
        nsimd * ncall / (1024. * 1024. * 1024.);

+    json["Dw"]["calls"] = ncall;
+    json["Dw"]["time"] = t1 - t0;
+    json["Dw"]["mflops"] = flops / (t1 - t0);
+    json["Dw"]["mflops_per_rank"] = flops / (t1 - t0) / NP;
+    json["Dw"]["mflops_per_node"] = flops / (t1 - t0) / NN;
+    json["Dw"]["RF"] = 1000000. * data_rf / ((t1 - t0));
+    json["Dw"]["mem"] = 1000000. * data_mem / ((t1 - t0));
+
    std::cout << GridLogMessage << "Called Dw " << ncall << " times in " << t1 - t0
              << " us" << std::endl;
    //    std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
@@ -258,6 +288,7 @@ int main(int argc, char **argv)
              << std::endl;
    std::cout << GridLogMessage << "mflop/s per node =  " << flops / (t1 - t0) / NN
              << std::endl;
+
    std::cout << GridLogMessage
              << "RF  GiB/s (base 2) =   " << 1000000. * data_rf / ((t1 - t0))
              << std::endl;
@@ -334,6 +365,7 @@ int main(int argc, char **argv)
  }
  //  dump=1;
  Dw.Dhop(src, result, 1);
+
  std::cout << GridLogMessage
            << "Compare to naive wilson implementation Dag to verify correctness"
            << std::endl;
@@ -366,11 +398,13 @@ int main(int argc, char **argv)

  // S-direction is INNERMOST and takes no part in the parity.
  std::cout << GridLogMessage
+
            << "*********************************************************" << std::endl;
  std::cout << GridLogMessage
            << "* Benchmarking DomainWallFermionF::DhopEO                " << std::endl;
  std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd()
            << std::endl;
+
  if (sizeof(RealF) == 4)
    std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
  if (sizeof(RealF) == 8)
@@ -389,6 +423,7 @@ int main(int argc, char **argv)
    std::cout << GridLogMessage << "* Using Asm Nc=3   WilsonKernels" << std::endl;
  std::cout << GridLogMessage
            << "*********************************************************" << std::endl;
+
  {
    Dw.ZeroCounters();
    FGrid->Barrier();
@@ -414,11 +449,18 @@ int main(int argc, char **argv)
      volume = volume * latt4[mu];
    double flops = (single_site_flops * volume * ncall) / 2.0;

+    json["Deo"]["calls"] = ncall;
+    json["Deo"]["time"] = t1 - t0;
+    json["Deo"]["mflops"] = flops / (t1 - t0);
+    json["Deo"]["mflops_per_rank"] = flops / (t1 - t0) / NP;
+    json["Deo"]["mflops_per_node"] = flops / (t1 - t0) / NN;
+
    std::cout << GridLogMessage << "Deo mflop/s =   " << flops / (t1 - t0) << std::endl;
    std::cout << GridLogMessage << "Deo mflop/s per rank   " << flops / (t1 - t0) / NP
              << std::endl;
    std::cout << GridLogMessage << "Deo mflop/s per node   " << flops / (t1 - t0) / NN
              << std::endl;
+
    Dw.Report();
  }
  Dw.DhopEO(src_o, r_e, DaggerNo);
@@ -450,6 +492,21 @@ int main(int argc, char **argv)

  assert(norm2(src_e) < 1.0e-4);
  assert(norm2(src_o) < 1.0e-4);
+
+  if (!json_filename.empty())
+  {
+    std::cout << GridLogMessage << "writing benchmark results to " << json_filename
+              << std::endl;
+
+    int me = 0;
+    MPI_Comm_rank(MPI_COMM_WORLD, &me);
+    if (me == 0)
+    {
+      std::ofstream json_file(json_filename);
+      json_file << std::setw(4) << json;
+    }
+  }
+
  Grid_finalize();
  exit(0);
 }
--- a/Grid/json.hpp
+++ b/Grid/json.hpp