mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Better benchmarking
This commit is contained in:
parent
a3322b470f
commit
bb3c177000
@ -853,8 +853,7 @@ public:
|
|||||||
CComplex alpha(1.0);
|
CComplex alpha(1.0);
|
||||||
CComplex beta (1.0);
|
CComplex beta (1.0);
|
||||||
RealD flops = 8.0*M*N*K*BATCH;
|
RealD flops = 8.0*M*N*K*BATCH;
|
||||||
int ncall=10;
|
int ncall=1000;
|
||||||
RealD t0 = usecond();
|
|
||||||
deviceVector<CComplex *> As(BATCH);
|
deviceVector<CComplex *> As(BATCH);
|
||||||
deviceVector<CComplex *> Bs(BATCH);
|
deviceVector<CComplex *> Bs(BATCH);
|
||||||
deviceVector<CComplex *> Cs(BATCH);
|
deviceVector<CComplex *> Cs(BATCH);
|
||||||
@ -865,6 +864,16 @@ public:
|
|||||||
ptr = &C[b*M*N]; acceleratorPut(Cs[b],ptr);
|
ptr = &C[b*M*N]; acceleratorPut(Cs[b],ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Warm up call
|
||||||
|
gemmBatched(M,N,K,
|
||||||
|
alpha,
|
||||||
|
As, // m x k
|
||||||
|
Bs, // k x n
|
||||||
|
beta,
|
||||||
|
Cs);
|
||||||
|
synchronise();
|
||||||
|
|
||||||
|
RealD t0 = usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
gemmBatched(M,N,K,
|
gemmBatched(M,N,K,
|
||||||
alpha,
|
alpha,
|
||||||
@ -872,8 +881,8 @@ public:
|
|||||||
Bs, // k x n
|
Bs, // k x n
|
||||||
beta,
|
beta,
|
||||||
Cs);
|
Cs);
|
||||||
}
|
|
||||||
synchronise();
|
synchronise();
|
||||||
|
}
|
||||||
RealD t1 = usecond();
|
RealD t1 = usecond();
|
||||||
RealD bytes = 1.0*sizeof(CComplex)*(M*N*2+N*K+M*K)*BATCH;
|
RealD bytes = 1.0*sizeof(CComplex)*(M*N*2+N*K+M*K)*BATCH;
|
||||||
flops = 8.0*M*N*K*BATCH*ncall;
|
flops = 8.0*M*N*K*BATCH*ncall;
|
||||||
|
Loading…
Reference in New Issue
Block a user