From 6616d5d09012cb7097e2703d19dfb039bae8a539 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 2 Feb 2022 16:38:24 -0500 Subject: [PATCH 1/4] Commit --- Grid/stencil/Stencil.h | 107 ++++++++++++++++++++++++++++++++++------- 1 file changed, 90 insertions(+), 17 deletions(-) diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index c2bc8dab..269ecfe4 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -240,6 +240,19 @@ public: cobj * mpi_p; Integer buffer_size; }; + struct CopyReceiveBuffer { + void * from_p; + void * to_p; + Integer bytes; + }; + struct CachedTransfer { + Integer direction; + Integer OrthogPlane; + Integer DestProc; + Integer bytes; + Integer lane; + void *recv_buf; + }; protected: @@ -271,7 +284,8 @@ public: std::vector MergersSHM; std::vector Decompressions; std::vector DecompressionsSHM; - + std::vector CopyReceiveBuffers ; + std::vector CachedTransfers; /////////////////////////////////////////////////////////// // Unified Comms buffers for all directions /////////////////////////////////////////////////////////// @@ -551,8 +565,57 @@ public: Mergers.resize(0); MergersSHM.resize(0); Packets.resize(0); + CopyReceiveBuffers.resize(0); + CachedTransfers.resize(0); calls++; } + void AddCopy(void *from,void * to, Integer bytes) + { + CopyReceiveBuffer obj; + obj.from_p = from; + obj.to_p = to; + obj.bytes= bytes; + CopyReceiveBuffers.push_back(obj); + } + void CommsCopy() + { + // These are device resident MPI buffers. + for(int i=0;i void CommsMerge(decompressor decompress) { + CommsCopy(); CommsMerge(decompress,Mergers,Decompressions); } template void CommsMergeSHM(decompressor decompress) { @@ -590,8 +654,8 @@ public: } template - void CommsMerge(decompressor decompress,std::vector &mm,std::vector &dd) { - + void CommsMerge(decompressor decompress,std::vector &mm,std::vector &dd) + { mergetime-=usecond(); for(int i=0;i>1; @@ -1045,9 +1111,10 @@ public: recv_buf=this->u_recv_buf_p; } + cobj *send_buf; send_buf = this->u_send_buf_p; // Gather locally, must send - + //////////////////////////////////////////////////////// // Gather locally //////////////////////////////////////////////////////// @@ -1056,23 +1123,27 @@ public: Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); face_idx++; gathertime+=usecond(); - /////////////////////////////////////////////////////////// - // Build a list of things to do after we synchronise GPUs - // Start comms now??? - /////////////////////////////////////////////////////////// - AddPacket((void *)&send_buf[u_comm_offset], - (void *)&recv_buf[u_comm_offset], - xmit_to_rank, - recv_from_rank, - bytes); + int duplicate = CheckForDuplicate(dimension,x,comm_proc,(void *)&recv_buf[u_comm_offset],0,bytes); + if (!duplicate || 1) { // Force comms for now + /////////////////////////////////////////////////////////// + // Build a list of things to do after we synchronise GPUs + // Start comms now??? + /////////////////////////////////////////////////////////// + AddPacket((void *)&send_buf[u_comm_offset], + (void *)&recv_buf[u_comm_offset], + xmit_to_rank, + recv_from_rank, + bytes); + } + if ( compress.DecompressionStep() ) { AddDecompress(&this->u_recv_buf_p[u_comm_offset], &recv_buf[u_comm_offset], words,Decompressions); } u_comm_offset+=words; - } + } } return 0; } @@ -1181,8 +1252,10 @@ public: rpointers[i] = rp; - AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes); - + int duplicate = CheckForDuplicate(dimension,x,nbr_proc,(void *)rp,i,bytes); + if (!duplicate || 1 ) { // Force comms for now + AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes); + } } else { From 6283d11d5042ef36d306b28dfafe47ee7eab9d23 Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Tue, 8 Feb 2022 15:22:06 +0000 Subject: [PATCH 2/4] Add the comment line to tell the existance of copied data/buffer --- Grid/stencil/Stencil.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 269ecfe4..d39345f7 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -571,6 +571,7 @@ public: } void AddCopy(void *from,void * to, Integer bytes) { + std::cout << "Adding CopyReceiveBuffer "< Date: Tue, 15 Feb 2022 10:27:39 -0500 Subject: [PATCH 3/4] Bug fix to detection case --- Grid/stencil/Stencil.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index d39345f7..5a2d5099 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -601,6 +601,7 @@ public: obj.recv_buf = recv_buf; obj.bytes = bytes; obj.lane = lane; + for(int i=0;i Date: Thu, 17 Feb 2022 04:51:13 +0000 Subject: [PATCH 4/4] Staggered fix finished --- Grid/stencil/Stencil.h | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 5a2d5099..246bdb36 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -251,6 +251,7 @@ public: Integer DestProc; Integer bytes; Integer lane; + Integer cb; void *recv_buf; }; @@ -571,7 +572,7 @@ public: } void AddCopy(void *from,void * to, Integer bytes) { - std::cout << "Adding CopyReceiveBuffer "<