From a7a16df9d0f71a7945521b4c08c6b417ed2e5292 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 12 Feb 2025 14:59:28 +0000 Subject: [PATCH] GET not put has kinder barrier sequence for NVLINK type access as when GET is done, I can use it without barrier. Moves a barrier to a nicer place, overlapped with DtoH DMA --- Grid/stencil/Stencil.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index fa03183c..2a666a04 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -501,6 +501,9 @@ public: void HaloGather(const Lattice &source,compressor &compress) { // accelerator_barrier(); + ////////////////////////////////// + // I will overwrite my send buffers + ////////////////////////////////// _grid->StencilBarrier();// Synch shared memory on a single nodes assert(source.Grid()==_grid); @@ -514,7 +517,12 @@ public: HaloGatherDir(source,compress,point,face_idx); } accelerator_barrier(); // All my local gathers are complete - // _grid->StencilBarrier();// Synch shared memory on a single nodes +#ifdef NVLINK_GET + #warning "NVLINK_GET" + _grid->StencilBarrier(); // He can now get mu local gather, I can get his + // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check + // Or issue barrier AFTER the DMA is running +#endif face_table_computed=1; assert(u_comm_offset==_unified_buffer_size); } @@ -553,6 +561,7 @@ public: coalescedWrite(to[j] ,coalescedRead(from [j])); }); acceleratorFenceComputeStream(); + // Also fenced in WilsonKernels } }