From 622f78ebea56fa355afeb69db83ab2cfa2cfa2bc Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 4 Sep 2024 13:53:48 +0000 Subject: [PATCH] SYCL updates -- operator = giving trouble on Aurora. SYCL reduction is failing intermittently with SVM interface - returns zero, expect non-zero. Think I need to remove ALL dependence on SVM. --- Grid/lattice/Lattice_base.h | 7 ++-- Grid/lattice/Lattice_reduction_sycl.h | 46 ++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index 9d4d3d5f..515c847f 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -236,10 +236,13 @@ public: template inline Lattice & operator = (const sobj & r){ vobj vtmp; vtmp = r; -#if defined(GRID_HIP) || defined(GRID_CUDA) || defined (GRID_SYCL) +#if 0 + deviceVector vvtmp(1); + acceleratorPut(vvtmp[0],vtmp); + vobj *vvtmp_p = & vvtmp[0]; auto me = View(AcceleratorWrite); accelerator_for(ss,me.size(),vobj::Nsimd(),{ - auto stmp=coalescedRead(vtmp); + auto stmp=coalescedRead(*vvtmp_p); coalescedWrite(me[ss],stmp); }); #else diff --git a/Grid/lattice/Lattice_reduction_sycl.h b/Grid/lattice/Lattice_reduction_sycl.h index b8dc5378..7dff7939 100644 --- a/Grid/lattice/Lattice_reduction_sycl.h +++ b/Grid/lattice/Lattice_reduction_sycl.h @@ -4,16 +4,36 @@ NAMESPACE_BEGIN(Grid); // Possibly promote to double and sum ///////////////////////////////////////////////////////////////////////////////////////////////////////// + template inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites) { typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_objectD sobjD; +#if 1 + sobj identity; zeroit(identity); + sobj ret; zeroit(ret); + Integer nsimd= vobj::Nsimd(); + { + sycl::buffer abuff(&ret, {1}); + theGridAccelerator->submit([&](cl::sycl::handler &cgh) { + auto Reduction = cl::sycl::reduction(abuff,cgh,identity,std::plus<>()); + cgh.parallel_for(cl::sycl::range<1>{osites}, + Reduction, + [=] (cl::sycl::id<1> item, auto &sum) { + auto osite = item[0]; + sum +=Reduce(lat[osite]); + }); + }); + } + sobjD dret; convertType(dret,ret); + return dret; +#else static Vector mysum; mysum.resize(1); sobj *mysum_p = & mysum[0]; sobj identity; zeroit(identity); - mysum[0] = identity; + acceleratorPut(mysum[0],identity); sobj ret ; Integer nsimd= vobj::Nsimd(); @@ -33,6 +53,7 @@ inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer os // free(mysum,*theGridAccelerator); sobjD dret; convertType(dret,ret); return dret; +#endif } template @@ -76,12 +97,28 @@ inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osite template Word svm_xor(Word *vec,uint64_t L) { - Word xorResult; xorResult = 0; +#if 1 + Word identity; identity=0; + Word ret = 0; + { + sycl::buffer abuff(&ret, {1}); + theGridAccelerator->submit([&](cl::sycl::handler &cgh) { + auto Reduction = cl::sycl::reduction(abuff,cgh,identity,std::bit_xor<>()); + cgh.parallel_for(cl::sycl::range<1>{L}, + Reduction, + [=] (cl::sycl::id<1> index, auto &sum) { + sum ^=vec[index]; + }); + }); + } + theGridAccelerator->wait(); + return ret; +#else static Vector d_sum; d_sum.resize(1); Word *d_sum_p=&d_sum[0]; Word identity; identity=0; - d_sum[0] = identity; + acceleratorPut(d_sum[0],identity); const cl::sycl::property_list PropList ({ cl::sycl::property::reduction::initialize_to_identity() }); theGridAccelerator->submit([&](cl::sycl::handler &cgh) { auto Reduction = cl::sycl::reduction(d_sum_p,identity,std::bit_xor<>(),PropList); @@ -92,9 +129,10 @@ template Word svm_xor(Word *vec,uint64_t L) }); }); theGridAccelerator->wait(); - Word ret = d_sum[0]; + Word ret = acceleratorGet(d_sum[0]); // free(d_sum,*theGridAccelerator); return ret; +#endif } NAMESPACE_END(Grid);