From d8a9a745d8117a924a4255422b8f984c14d511fd Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 24 Mar 2023 15:40:30 -0400 Subject: [PATCH] stream synchronise --- Grid/lattice/Lattice_reduction_gpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Grid/lattice/Lattice_reduction_gpu.h b/Grid/lattice/Lattice_reduction_gpu.h index 4bdcce0b..bd83a1ea 100644 --- a/Grid/lattice/Lattice_reduction_gpu.h +++ b/Grid/lattice/Lattice_reduction_gpu.h @@ -217,19 +217,19 @@ inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osi // which worked with earlier drivers. // Not sure which driver had first fail and this bears checking // Is awkward as must install multiple driver versions -#undef UVM_BLOCK_BUFFER +#undef UVM_BLOCK_BUFFER #ifndef UVM_BLOCK_BUFFER commVector buffer(numBlocks); sobj *buffer_v = &buffer[0]; sobj result; - reduceKernel<<< numBlocks, numThreads, smemSize >>>(lat, buffer_v, size); + reduceKernel<<< numBlocks, numThreads, smemSize, computeStream >>>(lat, buffer_v, size); accelerator_barrier(); acceleratorCopyFromDevice(buffer_v,&result,sizeof(result)); #else Vector buffer(numBlocks); sobj *buffer_v = &buffer[0]; sobj result; - reduceKernel<<< numBlocks, numThreads, smemSize >>>(lat, buffer_v, size); + reduceKernel<<< numBlocks, numThreads, smemSize, computeStream >>>(lat, buffer_v, size); accelerator_barrier(); result = *buffer_v; #endif