1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Better benchmarking

This commit is contained in:
Peter Boyle 2024-08-20 14:31:41 +00:00
parent a3322b470f
commit bb3c177000

View File

@ -853,8 +853,7 @@ public:
CComplex alpha(1.0); CComplex alpha(1.0);
CComplex beta (1.0); CComplex beta (1.0);
RealD flops = 8.0*M*N*K*BATCH; RealD flops = 8.0*M*N*K*BATCH;
int ncall=10; int ncall=1000;
RealD t0 = usecond();
deviceVector<CComplex *> As(BATCH); deviceVector<CComplex *> As(BATCH);
deviceVector<CComplex *> Bs(BATCH); deviceVector<CComplex *> Bs(BATCH);
deviceVector<CComplex *> Cs(BATCH); deviceVector<CComplex *> Cs(BATCH);
@ -865,6 +864,16 @@ public:
ptr = &C[b*M*N]; acceleratorPut(Cs[b],ptr); ptr = &C[b*M*N]; acceleratorPut(Cs[b],ptr);
} }
// Warm up call
gemmBatched(M,N,K,
alpha,
As, // m x k
Bs, // k x n
beta,
Cs);
synchronise();
RealD t0 = usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
gemmBatched(M,N,K, gemmBatched(M,N,K,
alpha, alpha,
@ -872,8 +881,8 @@ public:
Bs, // k x n Bs, // k x n
beta, beta,
Cs); Cs);
}
synchronise(); synchronise();
}
RealD t1 = usecond(); RealD t1 = usecond();
RealD bytes = 1.0*sizeof(CComplex)*(M*N*2+N*K+M*K)*BATCH; RealD bytes = 1.0*sizeof(CComplex)*(M*N*2+N*K+M*K)*BATCH;
flops = 8.0*M*N*K*BATCH*ncall; flops = 8.0*M*N*K*BATCH*ncall;