benchmark Dslash(...) instead of full M(...)
This commit is contained in:
parent
9de49f8672
commit
0af6b9047a
@ -106,21 +106,29 @@ cudaGaugeField make_gauge_field(int L)
|
||||
// create a random source vector (L = local size)
|
||||
ColorSpinorField make_source(int L)
|
||||
{
|
||||
// NOTE: `param.x` directly determines the size of the (local, per rank) memory
|
||||
// allocation. Thus for checkerboarding, we have to specifly x=(L/2,L,L,L) to get a
|
||||
// physical local volume of L^4, thus implicity choosing a dimension for the
|
||||
// checkerboarding (shouldnt really matter of course which one).
|
||||
ColorSpinorParam param;
|
||||
param.nColor = 3;
|
||||
param.nSpin = 4;
|
||||
param.nVec = 1; // only a single vector
|
||||
param.pad = 0;
|
||||
param.siteSubset = QUDA_FULL_SITE_SUBSET;
|
||||
param.siteSubset = QUDA_PARITY_SITE_SUBSET;
|
||||
param.nDim = 4;
|
||||
param.x[0] = L;
|
||||
param.x[0] = L / 2;
|
||||
param.x[1] = L;
|
||||
param.x[2] = L;
|
||||
param.x[3] = L;
|
||||
param.x[4] = 1; // no fifth dimension
|
||||
param.pc_type = QUDA_4D_PC;
|
||||
param.siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
|
||||
param.gammaBasis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
|
||||
|
||||
// somewhat surprisingly, the DiracWilson::Dslash(...) function only works with the
|
||||
// UKQCD_GAMMA_BASIS
|
||||
param.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
|
||||
|
||||
param.create = QUDA_NULL_FIELD_CREATE; // do not (zero-) initilize the field
|
||||
param.setPrecision(QUDA_DOUBLE_PRECISION);
|
||||
param.location = QUDA_CUDA_FIELD_LOCATION;
|
||||
@ -168,14 +176,14 @@ void benchmark_wilson()
|
||||
|
||||
// couple iterations without timing to warm up
|
||||
for (int iter = 0; iter < niter_warmup; ++iter)
|
||||
dirac.M(tmp, src);
|
||||
dirac.Dslash(tmp, src, QUDA_EVEN_PARITY);
|
||||
|
||||
// actual benchmark with timings
|
||||
dirac.Flops(); // reset flops counter
|
||||
device_timer_t device_timer;
|
||||
device_timer.start();
|
||||
for (int iter = 0; iter < niter; ++iter)
|
||||
dirac.M(tmp, src);
|
||||
dirac.Dslash(tmp, src, QUDA_EVEN_PARITY);
|
||||
device_timer.stop();
|
||||
|
||||
double secs = device_timer.last() / niter;
|
||||
|
Loading…
Reference in New Issue
Block a user