From eaa4feee433e05ce69c5e3926b0e8d367b4ea545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20B=C3=BCrger?= Date: Mon, 24 Apr 2023 14:58:53 +0100 Subject: [PATCH] benchmark Dslash(...) instead of full M(...) --- Quda/Benchmark_Quda.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/Quda/Benchmark_Quda.cpp b/Quda/Benchmark_Quda.cpp index 3a0b795..37b3dea 100644 --- a/Quda/Benchmark_Quda.cpp +++ b/Quda/Benchmark_Quda.cpp @@ -106,21 +106,29 @@ cudaGaugeField make_gauge_field(int L) // create a random source vector (L = local size) ColorSpinorField make_source(int L) { + // NOTE: `param.x` directly determines the size of the (local, per rank) memory + // allocation. Thus for checkerboarding, we have to specifly x=(L/2,L,L,L) to get a + // physical local volume of L^4, thus implicity choosing a dimension for the + // checkerboarding (shouldnt really matter of course which one). ColorSpinorParam param; param.nColor = 3; param.nSpin = 4; param.nVec = 1; // only a single vector param.pad = 0; - param.siteSubset = QUDA_FULL_SITE_SUBSET; + param.siteSubset = QUDA_PARITY_SITE_SUBSET; param.nDim = 4; - param.x[0] = L; + param.x[0] = L / 2; param.x[1] = L; param.x[2] = L; param.x[3] = L; param.x[4] = 1; // no fifth dimension param.pc_type = QUDA_4D_PC; param.siteOrder = QUDA_EVEN_ODD_SITE_ORDER; - param.gammaBasis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; + + // somewhat surprisingly, the DiracWilson::Dslash(...) function only works with the + // UKQCD_GAMMA_BASIS + param.gammaBasis = QUDA_UKQCD_GAMMA_BASIS; + param.create = QUDA_NULL_FIELD_CREATE; // do not (zero-) initilize the field param.setPrecision(QUDA_DOUBLE_PRECISION); param.location = QUDA_CUDA_FIELD_LOCATION; @@ -168,14 +176,14 @@ void benchmark_wilson() // couple iterations without timing to warm up for (int iter = 0; iter < niter_warmup; ++iter) - dirac.M(tmp, src); + dirac.Dslash(tmp, src, QUDA_EVEN_PARITY); // actual benchmark with timings dirac.Flops(); // reset flops counter device_timer_t device_timer; device_timer.start(); for (int iter = 0; iter < niter; ++iter) - dirac.M(tmp, src); + dirac.Dslash(tmp, src, QUDA_EVEN_PARITY); device_timer.stop(); double secs = device_timer.last() / niter;