From 8cd10019dbd31e94510c5257b8d051ab0afabc15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20B=C3=BCrger?= Date: Mon, 19 Jun 2023 18:22:24 +0100 Subject: [PATCH] add timestamps to benchmarks --- Quda/Benchmark_Quda.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Quda/Benchmark_Quda.cpp b/Quda/Benchmark_Quda.cpp index 84b8565..689cf32 100644 --- a/Quda/Benchmark_Quda.cpp +++ b/Quda/Benchmark_Quda.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,17 @@ json json_results; using namespace quda; +// timestamp = seconds since program start. +// these are written to the json output with the goal of later matching them against +// power-measurments to determine energy efficiency. +using Clock = std::chrono::steady_clock; +Clock::time_point program_start_time = Clock::now(); +double get_timestamp() +{ + auto dur = Clock::now() - program_start_time; + return std::chrono::duration_cast(dur).count() * 1.0e-6; +} + // This is the MPI grid, i.e. the layout of ranks int nranks = -1; std::array mpi_grid = {1, 1, 1, 1}; @@ -197,8 +209,10 @@ void benchmark_wilson(std::vector const &L_list, int niter) dirac.Flops(); // reset flops counter device_timer_t device_timer; device_timer.start(); + double start_time = get_timestamp(); for (int iter = 0; iter < niter; ++iter) dirac.Dslash(res, src, QUDA_EVEN_PARITY); + double end_time = get_timestamp(); device_timer.stop(); double secs = device_timer.last() / niter; @@ -220,6 +234,8 @@ void benchmark_wilson(std::vector const &L_list, int niter) json tmp; tmp["L"] = L; tmp["Gflops_wilson"] = flops / secs * 1e-9; + tmp["start_time"] = start_time; + tmp["end_time"] = end_time; json_results["flops"]["results"].push_back(tmp); } } @@ -265,8 +281,10 @@ void benchmark_dwf(std::vector const &L_list, int niter) dirac.Flops(); // reset flops counter device_timer_t device_timer; device_timer.start(); + double start_time = get_timestamp(); for (int iter = 0; iter < niter; ++iter) dirac.Dslash(res, src, QUDA_EVEN_PARITY); + double end_time = get_timestamp(); device_timer.stop(); double secs = device_timer.last() / niter; @@ -287,6 +305,8 @@ void benchmark_dwf(std::vector const &L_list, int niter) json tmp; tmp["L"] = L; tmp["Gflops_dwf4"] = flops / secs * 1e-9; + tmp["start_time"] = start_time; + tmp["end_time"] = end_time; json_results["flops"]["results"].push_back(tmp); } } @@ -357,8 +377,10 @@ void benchmark_axpy(std::vector const &L_list, int niter) // running the actual benchmark device_timer_t device_timer; device_timer.start(); + double start_time = get_timestamp(); for (int iter = 0; iter < niter; ++iter) blas::axpy(1.234, fieldA, fieldB); + double end_time = get_timestamp(); device_timer.stop(); double secs = device_timer.last() / niter; // seconds per iteration double mem_MiB = memory / 1024. / 1024.; @@ -371,6 +393,8 @@ void benchmark_axpy(std::vector const &L_list, int niter) tmp["size_MB"] = mem_MiB; tmp["GBps"] = GBps; tmp["GFlops"] = flops / secs * 1e-9; + tmp["start_time"] = start_time; + tmp["end_time"] = end_time; json_results["axpy"].push_back(tmp); } }