From bb3c1770003e16859f878c3469d2ead98cd42c78 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 20 Aug 2024 14:31:41 +0000 Subject: [PATCH] Better benchmarking --- Grid/algorithms/blas/BatchedBlas.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Grid/algorithms/blas/BatchedBlas.h b/Grid/algorithms/blas/BatchedBlas.h index e376bb18..f4092bc5 100644 --- a/Grid/algorithms/blas/BatchedBlas.h +++ b/Grid/algorithms/blas/BatchedBlas.h @@ -853,8 +853,7 @@ public: CComplex alpha(1.0); CComplex beta (1.0); RealD flops = 8.0*M*N*K*BATCH; - int ncall=10; - RealD t0 = usecond(); + int ncall=1000; deviceVector As(BATCH); deviceVector Bs(BATCH); deviceVector Cs(BATCH); @@ -865,6 +864,16 @@ public: ptr = &C[b*M*N]; acceleratorPut(Cs[b],ptr); } + // Warm up call + gemmBatched(M,N,K, + alpha, + As, // m x k + Bs, // k x n + beta, + Cs); + synchronise(); + + RealD t0 = usecond(); for(int i=0;i