From af64c1c6b6dd52109e4cc87e4977ad03f6426060 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 30 Mar 2023 17:34:44 -0400 Subject: [PATCH] Had managed to drop the accelerator_barrier() in the Wilson Compressor gather --- Grid/stencil/Stencil.h | 1 + Grid/threads/Accelerator.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 1568cbf9..92c229ab 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -434,6 +434,7 @@ public: //////////////////////////////////////////////////////////////////////// void CommunicateBegin(std::vector > &reqs) { + accelerator_barrier(); for(int i=0;iStencilSendToRecvFromBegin(MpiReqs, Packets[i].send_buf, diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 06376131..b6212929 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -458,8 +458,8 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); // Common on all GPU targets ////////////////////////////////////////////// #if defined(GRID_SYCL) || defined(GRID_CUDA) || defined(GRID_HIP) -#define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} ); accelerator_barrier(dummy); - +// FIXME -- the non-blocking nature got broken March 30 2023 by PAB +#define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} ); #define accelerator_for( iter, num, nsimd, ... ) \ accelerator_forNB(iter, num, nsimd, { __VA_ARGS__ } ); \