add timestamps to benchmarks

This commit is contained in:
Simon Bürger 2023-06-19 18:22:24 +01:00
parent 0d588d065a
commit 8cd10019db

View File

@ -2,6 +2,7 @@
#include <array>
#include <blas_quda.h>
#include <cassert>
#include <chrono>
#include <color_spinor_field.h>
#include <dirac_quda.h>
#include <fstream>
@ -20,6 +21,17 @@ json json_results;
using namespace quda;
// timestamp = seconds since program start.
// these are written to the json output with the goal of later matching them against
// power-measurments to determine energy efficiency.
using Clock = std::chrono::steady_clock;
Clock::time_point program_start_time = Clock::now();
double get_timestamp()
{
auto dur = Clock::now() - program_start_time;
return std::chrono::duration_cast<std::chrono::microseconds>(dur).count() * 1.0e-6;
}
// This is the MPI grid, i.e. the layout of ranks
int nranks = -1;
std::array<int, 4> mpi_grid = {1, 1, 1, 1};
@ -197,8 +209,10 @@ void benchmark_wilson(std::vector<int> const &L_list, int niter)
dirac.Flops(); // reset flops counter
device_timer_t device_timer;
device_timer.start();
double start_time = get_timestamp();
for (int iter = 0; iter < niter; ++iter)
dirac.Dslash(res, src, QUDA_EVEN_PARITY);
double end_time = get_timestamp();
device_timer.stop();
double secs = device_timer.last() / niter;
@ -220,6 +234,8 @@ void benchmark_wilson(std::vector<int> const &L_list, int niter)
json tmp;
tmp["L"] = L;
tmp["Gflops_wilson"] = flops / secs * 1e-9;
tmp["start_time"] = start_time;
tmp["end_time"] = end_time;
json_results["flops"]["results"].push_back(tmp);
}
}
@ -265,8 +281,10 @@ void benchmark_dwf(std::vector<int> const &L_list, int niter)
dirac.Flops(); // reset flops counter
device_timer_t device_timer;
device_timer.start();
double start_time = get_timestamp();
for (int iter = 0; iter < niter; ++iter)
dirac.Dslash(res, src, QUDA_EVEN_PARITY);
double end_time = get_timestamp();
device_timer.stop();
double secs = device_timer.last() / niter;
@ -287,6 +305,8 @@ void benchmark_dwf(std::vector<int> const &L_list, int niter)
json tmp;
tmp["L"] = L;
tmp["Gflops_dwf4"] = flops / secs * 1e-9;
tmp["start_time"] = start_time;
tmp["end_time"] = end_time;
json_results["flops"]["results"].push_back(tmp);
}
}
@ -357,8 +377,10 @@ void benchmark_axpy(std::vector<int> const &L_list, int niter)
// running the actual benchmark
device_timer_t device_timer;
device_timer.start();
double start_time = get_timestamp();
for (int iter = 0; iter < niter; ++iter)
blas::axpy(1.234, fieldA, fieldB);
double end_time = get_timestamp();
device_timer.stop();
double secs = device_timer.last() / niter; // seconds per iteration
double mem_MiB = memory / 1024. / 1024.;
@ -371,6 +393,8 @@ void benchmark_axpy(std::vector<int> const &L_list, int niter)
tmp["size_MB"] = mem_MiB;
tmp["GBps"] = GBps;
tmp["GFlops"] = flops / secs * 1e-9;
tmp["start_time"] = start_time;
tmp["end_time"] = end_time;
json_results["axpy"].push_back(tmp);
}
}