From 81441e98f4feded0c3b09735713b2cb49ae18841 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 16 Sep 2020 03:35:03 +0100 Subject: [PATCH] HIP runs sensible --- Grid/lattice/Lattice_arith.h | 6 ++--- Grid/threads/Accelerator.cc | 2 +- benchmarks/Benchmark_su3.cc | 42 +++++++++++++++++++++++++++++---- benchmarks/Benchmark_su3_gpu.cc | 3 ++- 4 files changed, 44 insertions(+), 9 deletions(-) diff --git a/Grid/lattice/Lattice_arith.h b/Grid/lattice/Lattice_arith.h index a3ae1f28..3c269c58 100644 --- a/Grid/lattice/Lattice_arith.h +++ b/Grid/lattice/Lattice_arith.h @@ -60,9 +60,9 @@ void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ autoView( lhs_v , lhs, AcceleratorRead); autoView( rhs_v , rhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ - decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); auto rhs_t=rhs_v(ss); + auto tmp =ret_v(ss); mac(&tmp,&lhs_t,&rhs_t); coalescedWrite(ret_v[ss],tmp); }); @@ -124,7 +124,7 @@ void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ autoView( ret_v , ret, AcceleratorWrite); autoView( lhs_v , lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ - decltype(coalescedRead(obj1())) tmp; + auto tmp =ret_v(ss); auto lhs_t=lhs_v(ss); mac(&tmp,&lhs_t,&rhs); coalescedWrite(ret_v[ss],tmp); @@ -182,7 +182,7 @@ void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ autoView( ret_v , ret, AcceleratorWrite); autoView( rhs_v , lhs, AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ - decltype(coalescedRead(obj1())) tmp; + auto tmp =ret_v(ss); auto rhs_t=rhs_v(ss); mac(&tmp,&lhs,&rhs_t); coalescedWrite(ret_v[ss],tmp); diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index 35e10d31..2134d158 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -127,7 +127,7 @@ void acceleratorInit(void) printf("AcceleratorHipInit: ========================\n"); printf("AcceleratorHipInit: Device identifier: %s\n", prop.name); - GPU_PROP_FMT(totalGlobalMem,"%lld"); + GPU_PROP_FMT(totalGlobalMem,"%lu"); // GPU_PROP(managedMemory); GPU_PROP(isMultiGpuBoard); GPU_PROP(warpSize); diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index d24a3e25..d094da0c 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -36,12 +36,12 @@ int main (int argc, char ** argv) { Grid_init(&argc,&argv); -#define LMAX (48) +#define LMAX (40) #define LMIN (8) #define LADD (8) - int64_t Nwarm=50; - int64_t Nloop=500; + int64_t Nwarm=10; + int64_t Nloop=100; Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); @@ -118,6 +118,41 @@ int main (int argc, char ** argv) } + + std::cout<({45,12,81,9})); + + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); + + for(int64_t i=0;i