From a762b1fb712fa620e5662e4e7d41a94925ecff00 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 21 Oct 2016 09:03:26 +0100 Subject: [PATCH] MPI3 working with a bounce through shared memory on my laptop. Longer term plan: make the "u_comm_buf" in Stencil point to the shared region and avoid the send between ranks on same node. --- benchmarks/Benchmark_dwf.cc | 7 +- lib/AlignedAllocator.h | 9 - lib/Init.cc | 2 +- lib/Log.cc | 2 +- lib/Threads.h | 17 ++ lib/communicator/Communicator_base.h | 26 ++- lib/communicator/Communicator_mpi.cc | 11 +- lib/communicator/Communicator_mpi3.cc | 250 +++++++++++++++++--------- 8 files changed, 208 insertions(+), 116 deletions(-) diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 6a283085..a32d7535 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -208,7 +208,6 @@ int main (int argc, char ** argv) std::cout< 1.0e-6 ) { - std::cout << "site "< &logstreams) { //////////////////////////////////////////////////////////// void Grid_quiesce_nodes(void) { int me = 0; -#ifdef GRID_COMMS_MPI +#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) MPI_Comm_rank(MPI_COMM_WORLD, &me); #endif #ifdef GRID_COMMS_SHMEM diff --git a/lib/Threads.h b/lib/Threads.h index d502dd82..70350685 100644 --- a/lib/Threads.h +++ b/lib/Threads.h @@ -31,6 +31,7 @@ Author: paboyle #ifdef _OPENMP #define GRID_OMP +#warning "OpenMP" #endif #define UNROLL _Pragma("unroll") @@ -127,6 +128,22 @@ class GridThread { ThreadBarrier(); }; + static void bcopy(const void *src, void *dst, size_t len) { +#ifdef GRID_OMP +#pragma omp parallel + { + const char *c_src =(char *) src; + char *c_dest=(char *) dst; + int me,mywork,myoff; + GridThread::GetWorkBarrier(len,me, mywork,myoff); + bcopy(&c_src[myoff],&c_dest[myoff],mywork); + } +#else + bcopy(src,dst,len); +#endif + } + + }; } diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 9b5ae8cb..6d6602be 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -45,7 +45,7 @@ class CartesianCommunicator { public: // Communicator should know nothing of the physics grid, only processor grid. - + int _Nprocessors; // How many in all std::vector _processors; // Which dimensions get relayed out over processors lanes. int _processor; // linear processor rank @@ -56,10 +56,13 @@ class CartesianCommunicator { MPI_Comm communicator; typedef MPI_Request CommsRequest_t; #elif GRID_COMMS_MPI3 + int shm_mode; + MPI_Comm communicator; typedef MPI_Request CommsRequest_t; - const int MAXLOG2RANKSPERNODE = 16; // 65536 ranks per node adequate for now + const int MAXLOG2RANKSPERNODE = 16; // 65536 ranks per node adequate for now + const uint64_t MAX_MPI_SHM_BYTES = 256*1024*1024; // 256MB shared memory for comms enought for 48^4 local vol comms std::vector WorldDims; std::vector GroupDims; @@ -69,14 +72,23 @@ class CartesianCommunicator { std::vector ShmCoor; std::vector WorldCoor; - int GroupRank; - int ShmRank; - int WorldRank; + static std::vector GroupRanks; + static std::vector MyGroup; + static int ShmSetup; + static MPI_Win ShmWindow; + static MPI_Comm ShmComm; - int GroupSize; - int ShmSize; + void * ShmCommBuf; + std::vector ShmCommBufs; + + int WorldRank; int WorldSize; + static int ShmRank; + static int ShmSize; + static int GroupSize; + static int GroupRank; + std::vector LexicographicToWorldRank; #else typedef int CommsRequest_t; diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index dff9811a..9c66202f 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -39,11 +39,11 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { } } - int Rank(void) { - int pe; - MPI_Comm_rank(MPI_COMM_WORLD,&pe); - return pe; - } +int Rank(void) { + int pe; + MPI_Comm_rank(MPI_COMM_WORLD,&pe); + return pe; +} CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { @@ -168,7 +168,6 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector & int nreq=list.size(); std::vector status(nreq); int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); } diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 11734fd9..1a36e610 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -30,6 +30,8 @@ Author: Peter Boyle namespace Grid { + + // Global used by Init and nowhere else. How to hide? int Rank(void) { int pe; @@ -76,29 +78,129 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &c rank = LexicographicToWorldRank[rank]; } +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////////////////////////////////////////// +int CartesianCommunicator::ShmSetup = 0; +int CartesianCommunicator::ShmRank; +int CartesianCommunicator::ShmSize; +int CartesianCommunicator::GroupRank; +int CartesianCommunicator::GroupSize; +MPI_Comm CartesianCommunicator::ShmComm; +MPI_Win CartesianCommunicator::ShmWindow; +std::vector CartesianCommunicator::GroupRanks; +std::vector CartesianCommunicator::MyGroup; + CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { + _ndimension = processors.size(); - std::cout << "Creating "<< _ndimension << " dim communicator "< world_ranks(WorldSize); + GroupRanks.resize(WorldSize); + MyGroup.resize(ShmSize); + for(int r=0;r()); + int myleader = MyGroup[0]; + + std::vector leaders_1hot(WorldSize,0); + std::vector leaders_group(GroupSize,0); + leaders_1hot [ myleader ] = 1; + + /////////////////////////////////////////////////////////////////// + // global sum leaders over comm world + /////////////////////////////////////////////////////////////////// + ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator); + assert(ierr==0); + + /////////////////////////////////////////////////////////////////// + // find the group leaders world rank + /////////////////////////////////////////////////////////////////// + int group=0; + for(int l=0;l &processors) ShmDims.resize(_ndimension,1); GroupDims.resize(_ndimension); - + ShmCoor.resize(_ndimension); GroupCoor.resize(_ndimension); WorldCoor.resize(_ndimension); @@ -129,12 +231,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) ShmDims[dim]*=2; dim=(dim+1)%_ndimension; } - - std::cout << "Shm group dims "< &processors) for(int d=0;d<_ndimension;d++){ GroupDims[d] = WorldDims[d]/ShmDims[d]; } - std::cout << "Group dims "< world_ranks(WorldSize); - std::vector group_ranks(WorldSize); - std::vector mygroup(GroupSize); - for(int r=0;r &processors) _processors = processors; _processor_coor.resize(_ndimension); for(int i=0;i<_ndimension;i++){ - std::cout << " p " << _processors[i]<()); - int myleader = mygroup[0]; - - std::vector leaders_1hot(WorldSize,0); - std::vector leaders_group(GroupSize,0); - leaders_1hot [ myleader ] = 1; - - /////////////////////////////////////////////////////////////////// - // global sum leaders over comm world - /////////////////////////////////////////////////////////////////// - int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator); - assert(ierr==0); - - /////////////////////////////////////////////////////////////////// - // find the group leaders world rank - /////////////////////////////////////////////////////////////////// - int group=0; - for(int l=0;l &lis { MPI_Request xrq; MPI_Request rrq; + int rank = _processor; int ierr; - ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); - ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); + int tag; + int small = (bytes &list) { int nreq=list.size();