benchmark-quda #3

Open
simon.buerger wants to merge 16 commits from simon.buerger/lattice-benchmarks:benchmark-quda into main
Showing only changes of commit 0af6b9047a - Show all commits

View File

@ -106,21 +106,29 @@ cudaGaugeField make_gauge_field(int L)
// create a random source vector (L = local size)
ColorSpinorField make_source(int L)
{
// NOTE: `param.x` directly determines the size of the (local, per rank) memory
// allocation. Thus for checkerboarding, we have to specifly x=(L/2,L,L,L) to get a
// physical local volume of L^4, thus implicity choosing a dimension for the
// checkerboarding (shouldnt really matter of course which one).
ColorSpinorParam param;
param.nColor = 3;
param.nSpin = 4;
param.nVec = 1; // only a single vector
param.pad = 0;
param.siteSubset = QUDA_FULL_SITE_SUBSET;
param.siteSubset = QUDA_PARITY_SITE_SUBSET;
param.nDim = 4;
param.x[0] = L;
param.x[0] = L / 2;
param.x[1] = L;
param.x[2] = L;
param.x[3] = L;
param.x[4] = 1; // no fifth dimension
param.pc_type = QUDA_4D_PC;
param.siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
param.gammaBasis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
// somewhat surprisingly, the DiracWilson::Dslash(...) function only works with the
// UKQCD_GAMMA_BASIS
param.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
param.create = QUDA_NULL_FIELD_CREATE; // do not (zero-) initilize the field
param.setPrecision(QUDA_DOUBLE_PRECISION);
param.location = QUDA_CUDA_FIELD_LOCATION;
@ -168,14 +176,14 @@ void benchmark_wilson()
// couple iterations without timing to warm up
for (int iter = 0; iter < niter_warmup; ++iter)
dirac.M(tmp, src);
dirac.Dslash(tmp, src, QUDA_EVEN_PARITY);
// actual benchmark with timings
dirac.Flops(); // reset flops counter
device_timer_t device_timer;
device_timer.start();
for (int iter = 0; iter < niter; ++iter)
dirac.M(tmp, src);
dirac.Dslash(tmp, src, QUDA_EVEN_PARITY);
device_timer.stop();
double secs = device_timer.last() / niter;