benchmark-quda #3
@ -2,6 +2,7 @@
|
||||
#include <array>
|
||||
#include <blas_quda.h>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <color_spinor_field.h>
|
||||
#include <dirac_quda.h>
|
||||
#include <fstream>
|
||||
@ -20,6 +21,17 @@ json json_results;
|
||||
|
||||
using namespace quda;
|
||||
|
||||
// timestamp = seconds since program start.
|
||||
// these are written to the json output with the goal of later matching them against
|
||||
// power-measurments to determine energy efficiency.
|
||||
using Clock = std::chrono::steady_clock;
|
||||
Clock::time_point program_start_time = Clock::now();
|
||||
double get_timestamp()
|
||||
{
|
||||
auto dur = Clock::now() - program_start_time;
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(dur).count() * 1.0e-6;
|
||||
}
|
||||
|
||||
// This is the MPI grid, i.e. the layout of ranks
|
||||
int nranks = -1;
|
||||
std::array<int, 4> mpi_grid = {1, 1, 1, 1};
|
||||
@ -197,8 +209,10 @@ void benchmark_wilson(std::vector<int> const &L_list, int niter)
|
||||
dirac.Flops(); // reset flops counter
|
||||
device_timer_t device_timer;
|
||||
device_timer.start();
|
||||
double start_time = get_timestamp();
|
||||
for (int iter = 0; iter < niter; ++iter)
|
||||
dirac.Dslash(res, src, QUDA_EVEN_PARITY);
|
||||
double end_time = get_timestamp();
|
||||
device_timer.stop();
|
||||
|
||||
double secs = device_timer.last() / niter;
|
||||
@ -220,6 +234,8 @@ void benchmark_wilson(std::vector<int> const &L_list, int niter)
|
||||
json tmp;
|
||||
tmp["L"] = L;
|
||||
tmp["Gflops_wilson"] = flops / secs * 1e-9;
|
||||
tmp["start_time"] = start_time;
|
||||
tmp["end_time"] = end_time;
|
||||
json_results["flops"]["results"].push_back(tmp);
|
||||
}
|
||||
}
|
||||
@ -265,8 +281,10 @@ void benchmark_dwf(std::vector<int> const &L_list, int niter)
|
||||
dirac.Flops(); // reset flops counter
|
||||
device_timer_t device_timer;
|
||||
device_timer.start();
|
||||
double start_time = get_timestamp();
|
||||
for (int iter = 0; iter < niter; ++iter)
|
||||
dirac.Dslash(res, src, QUDA_EVEN_PARITY);
|
||||
double end_time = get_timestamp();
|
||||
device_timer.stop();
|
||||
|
||||
double secs = device_timer.last() / niter;
|
||||
@ -287,6 +305,8 @@ void benchmark_dwf(std::vector<int> const &L_list, int niter)
|
||||
json tmp;
|
||||
tmp["L"] = L;
|
||||
tmp["Gflops_dwf4"] = flops / secs * 1e-9;
|
||||
tmp["start_time"] = start_time;
|
||||
tmp["end_time"] = end_time;
|
||||
json_results["flops"]["results"].push_back(tmp);
|
||||
}
|
||||
}
|
||||
@ -357,8 +377,10 @@ void benchmark_axpy(std::vector<int> const &L_list, int niter)
|
||||
// running the actual benchmark
|
||||
device_timer_t device_timer;
|
||||
device_timer.start();
|
||||
double start_time = get_timestamp();
|
||||
for (int iter = 0; iter < niter; ++iter)
|
||||
blas::axpy(1.234, fieldA, fieldB);
|
||||
double end_time = get_timestamp();
|
||||
device_timer.stop();
|
||||
double secs = device_timer.last() / niter; // seconds per iteration
|
||||
double mem_MiB = memory / 1024. / 1024.;
|
||||
@ -371,6 +393,8 @@ void benchmark_axpy(std::vector<int> const &L_list, int niter)
|
||||
tmp["size_MB"] = mem_MiB;
|
||||
tmp["GBps"] = GBps;
|
||||
tmp["GFlops"] = flops / secs * 1e-9;
|
||||
tmp["start_time"] = start_time;
|
||||
tmp["end_time"] = end_time;
|
||||
json_results["axpy"].push_back(tmp);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user