diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 1568cbf9..92c229ab 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -434,6 +434,7 @@ public: //////////////////////////////////////////////////////////////////////// void CommunicateBegin(std::vector > &reqs) { + accelerator_barrier(); for(int i=0;iStencilSendToRecvFromBegin(MpiReqs, Packets[i].send_buf, diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 06376131..b6212929 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -458,8 +458,8 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); // Common on all GPU targets ////////////////////////////////////////////// #if defined(GRID_SYCL) || defined(GRID_CUDA) || defined(GRID_HIP) -#define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} ); accelerator_barrier(dummy); - +// FIXME -- the non-blocking nature got broken March 30 2023 by PAB +#define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} ); #define accelerator_for( iter, num, nsimd, ... ) \ accelerator_forNB(iter, num, nsimd, { __VA_ARGS__ } ); \