From b6a65059a21f00c2d9f2a01f10f550996d6acb02 Mon Sep 17 00:00:00 2001 From: azusayamaguchi Date: Mon, 24 Oct 2016 17:30:43 +0100 Subject: [PATCH] Update to use shared memory to contain the stencil comms buffers Tested on 2.1.1.1 1.2.1.1 4.1.1.1 1.4.1.1 2.2.1.1 subnode decompositions --- benchmarks/Benchmark_dwf.cc | 21 +- lib/Init.cc | 14 +- lib/Init.h | 2 + lib/Log.cc | 5 + lib/Log.h | 59 ++-- lib/Makefile.am | 4 + lib/Stencil.h | 73 ++--- lib/communicator/Communicator_base.cc | 132 +++++++++ lib/communicator/Communicator_base.h | 313 ++++++++++---------- lib/communicator/Communicator_mpi.cc | 41 +-- lib/communicator/Communicator_mpi3.cc | 388 ++++++++++++++----------- lib/communicator/Communicator_none.cc | 55 ++-- lib/communicator/Communicator_shmem.cc | 57 ++-- 13 files changed, 706 insertions(+), 458 deletions(-) create mode 100644 lib/communicator/Communicator_base.cc diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index f75f0385..c9e2fa22 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -156,6 +156,7 @@ int main (int argc, char ** argv) std::cout<1.0e-5) { + setCheckerboard(ssrc,ssrc_o); + setCheckerboard(ssrc,ssrc_e); + std::cout<< ssrc << std::endl; + } } @@ -306,7 +314,7 @@ int main (int argc, char ** argv) std::cout< & vec){ ///////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////// +static int Grid_is_initialised = 0; + + void Grid_init(int *argc,char ***argv) { + GridLogger::StopWatch.Start(); + CartesianCommunicator::Init(argc,argv); // Parse command line args. - GridLogger::StopWatch.Start(); - std::string arg; std::vector logstreams; std::string defaultLog("Error,Warning,Message,Performance"); @@ -216,11 +219,14 @@ void Grid_init(int *argc,char ***argv) if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){ LebesgueOrder::UseLebesgueOrder=1; } - if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){ arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking"); GridCmdOptionIntVector(arg,LebesgueOrder::Block); } + if( GridCmdOptionExists(*argv,*argv+*argc,"--timestamp") ){ + GridLogTimestamp(1); + } + GridParseLayout(*argv,*argc, Grid_default_latt, Grid_default_mpi); @@ -274,6 +280,8 @@ void Grid_init(int *argc,char ***argv) std::cout << "GNU General Public License for more details."< &GridDefaultMpi(void); const int &GridThreads(void) ; void GridSetThreads(int t) ; + void GridLogTimestamp(int); // Common parsing chores std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option); diff --git a/lib/Log.cc b/lib/Log.cc index a55a1c9e..d4ac42ee 100644 --- a/lib/Log.cc +++ b/lib/Log.cc @@ -34,8 +34,13 @@ directory namespace Grid { GridStopWatch Logger::StopWatch; +int Logger::timestamp; std::ostream Logger::devnull(0); +void GridLogTimestamp(int on){ + Logger::Timestamp(on); +} + Colours GridLogColours(0); GridLogger GridLogError(1, "Error", GridLogColours, "RED"); GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW"); diff --git a/lib/Log.h b/lib/Log.h index 156f52ee..dd3fe927 100644 --- a/lib/Log.h +++ b/lib/Log.h @@ -37,10 +37,11 @@ #include #endif - namespace Grid { +namespace Grid { +////////////////////////////////////////////////////////////////////////////////////////////////// // Dress the output; use std::chrono for time stamping via the StopWatch class -int Rank(void); // used for early stage debug before library init +////////////////////////////////////////////////////////////////////////////////////////////////// class Colours{ @@ -55,7 +56,6 @@ public: void Active(bool activate){ is_active=activate; - if (is_active){ colour["BLACK"] ="\033[30m"; colour["RED"] ="\033[31m"; @@ -66,21 +66,18 @@ public: colour["CYAN"] ="\033[36m"; colour["WHITE"] ="\033[37m"; colour["NORMAL"] ="\033[0;39m"; - } else { - colour["BLACK"] =""; - colour["RED"] =""; - colour["GREEN"] =""; - colour["YELLOW"]=""; - colour["BLUE"] =""; - colour["PURPLE"]=""; - colour["CYAN"] =""; - colour["WHITE"] =""; - colour["NORMAL"]=""; - } - - -}; - + } else { + colour["BLACK"] =""; + colour["RED"] =""; + colour["GREEN"] =""; + colour["YELLOW"]=""; + colour["BLUE"] =""; + colour["PURPLE"]=""; + colour["CYAN"] =""; + colour["WHITE"] =""; + colour["NORMAL"]=""; + } + }; }; @@ -88,6 +85,7 @@ class Logger { protected: Colours &Painter; int active; + static int timestamp; std::string name, topName; std::string COLOUR; @@ -99,25 +97,28 @@ public: std::string evidence() {return Painter.colour["YELLOW"];} std::string colour() {return Painter.colour[COLOUR];} - Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col) - : active(on), - name(nm), - topName(topNm), - Painter(col_class), - COLOUR(col){} ; + Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col) : active(on), + name(nm), + topName(topNm), + Painter(col_class), + COLOUR(col) {} ; void Active(int on) {active = on;}; int isActive(void) {return active;}; + static void Timestamp(int on) {timestamp = on;}; friend std::ostream& operator<< (std::ostream& stream, Logger& log){ if ( log.active ) { - StopWatch.Stop(); - GridTime now = StopWatch.Elapsed(); - StopWatch.Start(); stream << log.background()<< log.topName << log.background()<< " : "; stream << log.colour() < > u_simd_send_buf_hide; - std::vector > u_simd_recv_buf_hide; - commVector u_send_buf; - commVector u_recv_buf_hide; + // std::vector > u_simd_send_buf_hide; + // std::vector > u_simd_recv_buf_hide; + // commVector u_send_buf_hide; + // commVector u_recv_buf_hide; + // These are used; either SHM objects or refs to the above symmetric heap vectors // depending on comms target cobj* u_recv_buf_p; @@ -439,36 +440,19 @@ PARALLEL_FOR_LOOP ///////////////////////////////////////////////////////////////////////////////// const int Nsimd = grid->Nsimd(); - uint8_t *shm_ptr = (uint8_t *)_grid->ShmBufferSelf(); + _grid->ShmBufferFreeAll(); u_simd_send_buf.resize(Nsimd); u_simd_recv_buf.resize(Nsimd); - u_send_buf.resize(_unified_buffer_size); - - if( ShmDirectCopy && shm_ptr != NULL ) { - - u_recv_buf_p=(cobj *)shm_ptr; shm_ptr+= _unified_buffer_size*sizeof(cobj); - for(int l=0;l(_unified_buffer_size)); - u_simd_recv_buf_hide.resize(Nsimd,commVector(_unified_buffer_size)); - - u_recv_buf_p=&u_recv_buf_hide[0]; - for(int l=0;lShmBufferMalloc(_unified_buffer_size*sizeof(cobj)); + u_recv_buf_p=(cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj)); + for(int l=0;lShmBufferMalloc(_unified_buffer_size*sizeof(scalar_object)); + u_simd_send_buf[l] = (scalar_object *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(scalar_object)); } PrecomputeByteOffsets(); - } void Local (int point, int dimension,int shiftpm,int cbmask) @@ -698,6 +682,7 @@ PARALLEL_FOR_LOOP calls++; Mergers.resize(0); Packets.resize(0); + _grid->StencilBarrier(); HaloGather(source,compress); this->CommunicateBegin(reqs); this->CommunicateComplete(reqs); @@ -836,19 +821,17 @@ PARALLEL_FOR_LOOP // try the direct copy if possible ///////////////////////////////////////////////////////// - cobj *u_send_buf_p = &u_send_buf[0]; - if (ShmDirectCopy) { - cobj *shm = (cobj *) _grid->ShmBuffer(xmit_to_rank); - if ( shm!=NULL) { - u_send_buf_p = shm; - } + + cobj *send_buf = (cobj *)_grid->ShmBufferTranslate(xmit_to_rank,u_recv_buf_p); + if ( (ShmDirectCopy==0)||send_buf==NULL ) { + cobj *send_buf = u_send_buf_p; } t_data-=usecond(); - Gather_plane_simple_table (face_table[face_idx],rhs,u_send_buf_p,compress,u_comm_offset,so); face_idx++; + Gather_plane_simple_table (face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); face_idx++; t_data+=usecond(); - AddPacket((void *)&u_send_buf_p[u_comm_offset], + AddPacket((void *)&send_buf[u_comm_offset], (void *)&u_recv_buf_p[u_comm_offset], xmit_to_rank, recv_from_rank, @@ -947,18 +930,16 @@ PARALLEL_FOR_LOOP _grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank); - - AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes); - - auto shm_or_rp = rp; - if (ShmDirectCopy) { - scalar_object *shm = (scalar_object *) _grid->ShmBufferTranslate(xmit_to_rank,sp); - if ( shm!=NULL) { - shm_or_rp = shm; - } - } + scalar_object *shm = (scalar_object *) _grid->ShmBufferTranslate(recv_from_rank,sp); + if ((ShmDirectCopy==0)||(shm==NULL)) { + shm = rp; + } - rpointers[i] = shm_or_rp; + // if Direct, StencilSendToRecvFrom will suppress copy to a peer on node + // assuming above pointer flip + AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes); + + rpointers[i] = shm; } else { diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc new file mode 100644 index 00000000..1272b6a2 --- /dev/null +++ b/lib/communicator/Communicator_base.cc @@ -0,0 +1,132 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/communicator/Communicator_none.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include "Grid.h" +namespace Grid { + +/////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////// +int CartesianCommunicator::ShmRank; +int CartesianCommunicator::ShmSize; +int CartesianCommunicator::GroupRank; +int CartesianCommunicator::GroupSize; +int CartesianCommunicator::WorldRank; +int CartesianCommunicator::WorldSize; +int CartesianCommunicator::Slave; +void * CartesianCommunicator::ShmCommBuf; + +///////////////////////////////// +// Alloc, free shmem region +///////////////////////////////// +void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){ + // bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes + void *ptr = (void *)heap_top; + heap_top += bytes; + heap_bytes+= bytes; + assert(heap_bytes < MAX_MPI_SHM_BYTES); + return ptr; +} +void *CartesianCommunicator::ShmBufferFreeAll(void) { + heap_top =(size_t)ShmBufferSelf(); + heap_bytes=0; +} + +///////////////////////////////// +// Grid information queries +///////////////////////////////// +int CartesianCommunicator::IsBoss(void) { return _processor==0; }; +int CartesianCommunicator::BossRank(void) { return 0; }; +int CartesianCommunicator::ThisRank(void) { return _processor; }; +const std::vector & CartesianCommunicator::ThisProcessorCoor(void) { return _processor_coor; }; +const std::vector & CartesianCommunicator::ProcessorGrid(void) { return _processors; }; +int CartesianCommunicator::ProcessorCount(void) { return _Nprocessors; }; + +//////////////////////////////////////////////////////////////////////////////// +// very VERY rarely (Log, serial RNG) we need world without a grid +//////////////////////////////////////////////////////////////////////////////// +int CartesianCommunicator::RankWorld(void) { return WorldRank; }; +int CartesianCommunicator::Ranks (void) { return WorldSize; }; +int CartesianCommunicator::Nodes (void) { return GroupSize; }; +int CartesianCommunicator::Cores (void) { return ShmSize; }; +int CartesianCommunicator::NodeRank (void) { return GroupRank; }; +int CartesianCommunicator::CoreRank (void) { return ShmRank; }; + +void CartesianCommunicator::GlobalSum(ComplexF &c) +{ + GlobalSumVector((float *)&c,2); +} +void CartesianCommunicator::GlobalSumVector(ComplexF *c,int N) +{ + GlobalSumVector((float *)c,2*N); +} +void CartesianCommunicator::GlobalSum(ComplexD &c) +{ + GlobalSumVector((double *)&c,2); +} +void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) +{ + GlobalSumVector((double *)c,2*N); +} + +#ifndef GRID_COMMS_MPI3 + +void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes) +{ + SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); +} +void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall) +{ + SendToRecvFromComplete(waitall); +} +void StencilBarrier(void){}; + +commVector CartesianCommunicator::ShmBufStorageVector; + +void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; } +void *CartesianCommunicator::ShmBuffer(int rank) { + if (rank != ShmRank ) return NULL; + else return ShmCommBuf; +} +void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { + if (rank != ShmRank ) return NULL; + else return local_p; +} +void CartesianCommunicator::ShmInitGeneric(void){ + ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); + ShmCommBuf=(void *)&ShmBufStorageVector[0]; +} + +#endif + +} + diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 4139f72a..576c749e 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -40,169 +40,188 @@ Author: Peter Boyle #ifdef GRID_COMMS_SHMEM #include #endif + namespace Grid { + class CartesianCommunicator { public: + // 65536 ranks per node adequate for now + // 128MB shared memory for comms enought for 48^4 local vol comms + // Give external control (command line override?) of this + + static const int MAXLOG2RANKSPERNODE = 16; + static const uint64_t MAX_MPI_SHM_BYTES = 128*1024*1024; + // Communicator should know nothing of the physics grid, only processor grid. - - int _Nprocessors; // How many in all - std::vector _processors; // Which dimensions get relayed out over processors lanes. - int _processor; // linear processor rank - std::vector _processor_coor; // linear processor coordinate - unsigned long _ndimension; + int _Nprocessors; // How many in all + std::vector _processors; // Which dimensions get relayed out over processors lanes. + int _processor; // linear processor rank + std::vector _processor_coor; // linear processor coordinate + unsigned long _ndimension; -#ifdef GRID_COMMS_MPI - MPI_Comm communicator; - typedef MPI_Request CommsRequest_t; -#elif GRID_COMMS_MPI3 - int shm_mode; - - MPI_Comm communicator; - typedef MPI_Request CommsRequest_t; - - const int MAXLOG2RANKSPERNODE = 16; // 65536 ranks per node adequate for now - const uint64_t MAX_MPI_SHM_BYTES = 256*1024*1024; // 256MB shared memory for comms enought for 48^4 local vol comms - - std::vector WorldDims; - std::vector GroupDims; - std::vector ShmDims; - - std::vector GroupCoor; - std::vector ShmCoor; - std::vector WorldCoor; - - static std::vector GroupRanks; - static std::vector MyGroup; - static int ShmSetup; - static MPI_Win ShmWindow; - static MPI_Comm ShmComm; - - void * ShmCommBuf; - std::vector ShmCommBufs; - - int WorldRank; - int WorldSize; - - static int ShmRank; - static int ShmSize; - static int GroupSize; - static int GroupRank; - - std::vector LexicographicToWorldRank; +#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) + MPI_Comm communicator; + static MPI_Comm communicator_world; + typedef MPI_Request CommsRequest_t; #else - typedef int CommsRequest_t; + typedef int CommsRequest_t; #endif - static void Init(int *argc, char ***argv); + //////////////////////////////////////////////////////////////////// + // Helper functionality for SHM Windows common to all other impls + //////////////////////////////////////////////////////////////////// + // Longer term; drop this in favour of a master / slave model with + // cartesian communicator on a subset of ranks, slave ranks controlled + // by group leader with data xfer via shared memory + //////////////////////////////////////////////////////////////////// +#ifdef GRID_COMMS_MPI3 + std::vector WorldDims; + std::vector GroupDims; + std::vector ShmDims; + + std::vector GroupCoor; + std::vector ShmCoor; + std::vector WorldCoor; + + static std::vector GroupRanks; + static std::vector MyGroup; + static int ShmSetup; + static MPI_Win ShmWindow; + static MPI_Comm ShmComm; + + std::vector LexicographicToWorldRank; + + static std::vector ShmCommBufs; +#else + static void ShmInitGeneric(void); + static commVector ShmBufStorageVector; +#endif + static void * ShmCommBuf; + size_t heap_top; + size_t heap_bytes; + void *ShmBufferSelf(void); + void *ShmBuffer(int rank); + void *ShmBufferTranslate(int rank,void * local_p); + void *ShmBufferMalloc(size_t bytes); + void *ShmBufferFreeAll(void) ; + + //////////////////////////////////////////////// + // Must call in Grid startup + //////////////////////////////////////////////// + static void Init(int *argc, char ***argv); + + //////////////////////////////////////////////// + // Constructor of any given grid + //////////////////////////////////////////////// + CartesianCommunicator(const std::vector &pdimensions_in); + + //////////////////////////////////////////////////////////////////////////////////////// + // Wraps MPI_Cart routines, or implements equivalent on other impls + //////////////////////////////////////////////////////////////////////////////////////// + void ShiftedRanks(int dim,int shift,int & source, int & dest); + int RankFromProcessorCoor(std::vector &coor); + void ProcessorCoorFromRank(int rank,std::vector &coor); + + ///////////////////////////////// + // Grid information and queries + ///////////////////////////////// + static int ShmRank; + static int ShmSize; + static int GroupSize; + static int GroupRank; + static int WorldRank; + static int WorldSize; + static int Slave; + + int IsBoss(void) ; + int BossRank(void) ; + int ThisRank(void) ; + const std::vector & ThisProcessorCoor(void) ; + const std::vector & ProcessorGrid(void) ; + int ProcessorCount(void) ; + static int Ranks (void); + static int Nodes (void); + static int Cores (void); + static int NodeRank (void); + static int CoreRank (void); - // Constructor - CartesianCommunicator(const std::vector &pdimensions_in); + //////////////////////////////////////////////////////////////////////////////// + // very VERY rarely (Log, serial RNG) we need world without a grid + //////////////////////////////////////////////////////////////////////////////// + static int RankWorld(void) ; + static void BroadcastWorld(int root,void* data, int bytes); + + //////////////////////////////////////////////////////////// + // Reduction + //////////////////////////////////////////////////////////// + void GlobalSum(RealF &); + void GlobalSumVector(RealF *,int N); + void GlobalSum(RealD &); + void GlobalSumVector(RealD *,int N); + void GlobalSum(uint32_t &); + void GlobalSum(uint64_t &); + void GlobalSum(ComplexF &c); + void GlobalSumVector(ComplexF *c,int N); + void GlobalSum(ComplexD &c); + void GlobalSumVector(ComplexD *c,int N); + + template void GlobalSum(obj &o){ + typedef typename obj::scalar_type scalar_type; + int words = sizeof(obj)/sizeof(scalar_type); + scalar_type * ptr = (scalar_type *)& o; + GlobalSumVector(ptr,words); + } + + //////////////////////////////////////////////////////////// + // Face exchange, buffer swap in translational invariant way + //////////////////////////////////////////////////////////// + void SendToRecvFrom(void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes); + + void SendRecvPacket(void *xmit, + void *recv, + int xmit_to_rank, + int recv_from_rank, + int bytes); + + void SendToRecvFromBegin(std::vector &list, + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes); + + void SendToRecvFromComplete(std::vector &waitall); - // Wraps MPI_Cart routines - void ShiftedRanks(int dim,int shift,int & source, int & dest); - int RankFromProcessorCoor(std::vector &coor); - void ProcessorCoorFromRank(int rank,std::vector &coor); + void StencilSendToRecvFromBegin(std::vector &list, + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes); + + void StencilSendToRecvFromComplete(std::vector &waitall); + void StencilBarrier(void); - // Helper function for SHM Windows in MPI3 - void *ShmBufferSelf(void); - void *ShmBuffer(int rank); - - ///////////////////////////////// - // Grid information queries - ///////////////////////////////// - int IsBoss(void) { return _processor==0; }; - int BossRank(void) { return 0; }; - int ThisRank(void) { return _processor; }; - const std::vector & ThisProcessorCoor(void) { return _processor_coor; }; - const std::vector & ProcessorGrid(void) { return _processors; }; - int ProcessorCount(void) { return _Nprocessors; }; - - //////////////////////////////////////////////////////////// - // Reduction - //////////////////////////////////////////////////////////// - void GlobalSum(RealF &); - void GlobalSumVector(RealF *,int N); - - void GlobalSum(RealD &); - void GlobalSumVector(RealD *,int N); - - void GlobalSum(uint32_t &); - void GlobalSum(uint64_t &); - - void GlobalSum(ComplexF &c) - { - GlobalSumVector((float *)&c,2); - } - void GlobalSumVector(ComplexF *c,int N) - { - GlobalSumVector((float *)c,2*N); - } - - void GlobalSum(ComplexD &c) - { - GlobalSumVector((double *)&c,2); - } - void GlobalSumVector(ComplexD *c,int N) - { - GlobalSumVector((double *)c,2*N); - } - - template void GlobalSum(obj &o){ - typedef typename obj::scalar_type scalar_type; - int words = sizeof(obj)/sizeof(scalar_type); - scalar_type * ptr = (scalar_type *)& o; - GlobalSumVector(ptr,words); - } - //////////////////////////////////////////////////////////// - // Face exchange, buffer swap in translational invariant way - //////////////////////////////////////////////////////////// - void SendToRecvFrom(void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes); - - void SendRecvPacket(void *xmit, - void *recv, - int xmit_to_rank, - int recv_from_rank, - int bytes); - - void SendToRecvFromBegin(std::vector &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes); - void SendToRecvFromComplete(std::vector &waitall); - void StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes); - void StencilSendToRecvFromComplete(std::vector &waitall) - { - SendToRecvFromComplete(waitall); - } - - //////////////////////////////////////////////////////////// - // Barrier - //////////////////////////////////////////////////////////// - void Barrier(void); - - //////////////////////////////////////////////////////////// - // Broadcast a buffer and composite larger - //////////////////////////////////////////////////////////// - void Broadcast(int root,void* data, int bytes); - template void Broadcast(int root,obj &data) + //////////////////////////////////////////////////////////// + // Barrier + //////////////////////////////////////////////////////////// + void Barrier(void); + + //////////////////////////////////////////////////////////// + // Broadcast a buffer and composite larger + //////////////////////////////////////////////////////////// + void Broadcast(int root,void* data, int bytes); + + template void Broadcast(int root,obj &data) { Broadcast(root,(void *)&data,sizeof(data)); }; - static void BroadcastWorld(int root,void* data, int bytes); - }; } diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 4291b319..a638eebb 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -30,19 +30,28 @@ Author: Peter Boyle namespace Grid { - // Should error check all MPI calls. + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////////////////////////////////////////// +MPI_Comm CartesianCommunicator::communicator_world; + +// Should error check all MPI calls. void CartesianCommunicator::Init(int *argc, char ***argv) { int flag; MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { MPI_Init(argc,argv); } -} - -int Rank(void) { - int pe; - MPI_Comm_rank(MPI_COMM_WORLD,&pe); - return pe; + MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); + MPI_Comm_rank(communicator_world,&WorldRank); + MPI_Comm_size(communicator_world,&WorldSize); + ShmRank=0; + ShmSize=1; + GroupRank=WorldRank; + GroupSize=WorldSize; + Slave =0; + ShmInitGeneric(); } CartesianCommunicator::CartesianCommunicator(const std::vector &processors) @@ -54,7 +63,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) _processors = processors; _processor_coor.resize(_ndimension); - MPI_Cart_create(MPI_COMM_WORLD, _ndimension,&_processors[0],&periodic[0],1,&communicator); + MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); MPI_Comm_rank(communicator,&_processor); MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); @@ -67,15 +76,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) assert(Size==_Nprocessors); } -void *CartesianCommunicator::ShmBufferSelf(void) -{ - return NULL; -} -void *CartesianCommunicator::ShmBuffer(int rank) -{ - return NULL; -} - void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); @@ -194,14 +194,17 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) communicator); assert(ierr==0); } - + /////////////////////////////////////////////////////// + // Should only be used prior to Grid Init finished. + // Check for this? + /////////////////////////////////////////////////////// void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { int ierr= MPI_Bcast(data, bytes, MPI_BYTE, root, - MPI_COMM_WORLD); + communicator_world); assert(ierr==0); } diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index f5bbdbda..00b0ca11 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -1,4 +1,3 @@ - /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -33,26 +32,197 @@ Author: Peter Boyle namespace Grid { +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////////////////////////////////////////// +int CartesianCommunicator::ShmSetup = 0; -// Global used by Init and nowhere else. How to hide? -int Rank(void) { - int pe; - MPI_Comm_rank(MPI_COMM_WORLD,&pe); - return pe; +MPI_Comm CartesianCommunicator::communicator_world; +MPI_Comm CartesianCommunicator::ShmComm; +MPI_Win CartesianCommunicator::ShmWindow; + +std::vector CartesianCommunicator::GroupRanks; +std::vector CartesianCommunicator::MyGroup; +std::vector CartesianCommunicator::ShmCommBufs; + +void *CartesianCommunicator::ShmBufferSelf(void) +{ + return ShmCommBufs[ShmRank]; } - // Should error check all MPI calls. +void *CartesianCommunicator::ShmBuffer(int rank) +{ + int gpeer = GroupRanks[rank]; + if (gpeer == MPI_UNDEFINED){ + return NULL; + } else { + return ShmCommBufs[gpeer]; + } +} +void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) +{ + int gpeer = GroupRanks[rank]; + if (gpeer == MPI_UNDEFINED){ + return NULL; + } else { + uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank]; + uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset; + return (void *) remote; + } +} + void CartesianCommunicator::Init(int *argc, char ***argv) { int flag; MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { MPI_Init(argc,argv); } -} - //////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Want to implement some magic ... Group sub-cubes into those on same node - // - //////////////////////////////////////////////////////////////////////////////////////////////////////////// + MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); + MPI_Comm_rank(communicator_world,&WorldRank); + MPI_Comm_size(communicator_world,&WorldSize); + + ///////////////////////////////////////////////////////////////////// + // Split into groups that can share memory + ///////////////////////////////////////////////////////////////////// + MPI_Comm_split_type(communicator_world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm); + MPI_Comm_rank(ShmComm ,&ShmRank); + MPI_Comm_size(ShmComm ,&ShmSize); + GroupSize = WorldSize/ShmSize; + + ///////////////////////////////////////////////////////////////////// + // find world ranks in our SHM group (i.e. which ranks are on our node) + ///////////////////////////////////////////////////////////////////// + MPI_Group WorldGroup, ShmGroup; + MPI_Comm_group (communicator_world, &WorldGroup); + MPI_Comm_group (ShmComm, &ShmGroup); + + std::vector world_ranks(WorldSize); + GroupRanks.resize(WorldSize); + MyGroup.resize(ShmSize); + for(int r=0;r()); + int myleader = MyGroup[0]; + + std::vector leaders_1hot(WorldSize,0); + std::vector leaders_group(GroupSize,0); + leaders_1hot [ myleader ] = 1; + + /////////////////////////////////////////////////////////////////// + // global sum leaders over comm world + /////////////////////////////////////////////////////////////////// + int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world); + assert(ierr==0); + + /////////////////////////////////////////////////////////////////// + // find the group leaders world rank + /////////////////////////////////////////////////////////////////// + int group=0; + for(int l=0;l coor = _processor_coor; @@ -80,139 +250,13 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &c rank = LexicographicToWorldRank[rank]; } -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// -int CartesianCommunicator::ShmSetup = 0; -int CartesianCommunicator::ShmRank; -int CartesianCommunicator::ShmSize; -int CartesianCommunicator::GroupRank; -int CartesianCommunicator::GroupSize; -MPI_Comm CartesianCommunicator::ShmComm; -MPI_Win CartesianCommunicator::ShmWindow; -std::vector CartesianCommunicator::GroupRanks; -std::vector CartesianCommunicator::MyGroup; - CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { - - _ndimension = processors.size(); - - WorldDims = processors; - - communicator = MPI_COMM_WORLD; - MPI_Comm_rank(communicator,&WorldRank); - MPI_Comm_size(communicator,&WorldSize); - - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Plan: allocate a fixed SHM region. Scratch that is just used via some scheme during stencil comms, with no allocate free. - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Does every grid need one, or could we share across all grids via a singleton/guard? int ierr; - if ( !ShmSetup ) { + communicator=communicator_world; - MPI_Comm_split_type(communicator, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm); - MPI_Comm_rank(ShmComm ,&ShmRank); - MPI_Comm_size(ShmComm ,&ShmSize); - GroupSize = WorldSize/ShmSize; - - ///////////////////////////////////////////////////////////////////// - // find world ranks in our SHM group (i.e. which ranks are on our node) - ///////////////////////////////////////////////////////////////////// - MPI_Group WorldGroup, ShmGroup; - MPI_Comm_group (communicator, &WorldGroup); - MPI_Comm_group (ShmComm, &ShmGroup); - - std::vector world_ranks(WorldSize); - GroupRanks.resize(WorldSize); - MyGroup.resize(ShmSize); - for(int r=0;r()); - int myleader = MyGroup[0]; - - std::vector leaders_1hot(WorldSize,0); - std::vector leaders_group(GroupSize,0); - leaders_1hot [ myleader ] = 1; - - /////////////////////////////////////////////////////////////////// - // global sum leaders over comm world - /////////////////////////////////////////////////////////////////// - ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator); - assert(ierr==0); - - /////////////////////////////////////////////////////////////////// - // find the group leaders world rank - /////////////////////////////////////////////////////////////////// - int group=0; - for(int l=0;l &processors) //////////////////////////////////////////////////////////////// int dim = 0; + std::vector WorldDims = processors; + ShmDims.resize(_ndimension,1); GroupDims.resize(_ndimension); @@ -346,21 +392,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit, } } - -void *CartesianCommunicator::ShmBufferSelf(void) -{ - return ShmCommBufs[ShmRank]; -} -void *CartesianCommunicator::ShmBuffer(int rank) -{ - int gpeer = GroupRanks[rank]; - if (gpeer == MPI_UNDEFINED){ - return NULL; - } else { - return ShmCommBufs[gpeer]; - } -} - // Basic Halo comms primitive void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, void *xmit, @@ -369,6 +400,7 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector &lis int from, int bytes) { +#if 1 MPI_Request xrq; MPI_Request rrq; @@ -387,12 +419,11 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector &lis sequence++; - char *to_ptr = (char *)ShmCommBufs[gdest]; char *from_ptr = (char *)ShmCommBufs[ShmRank]; int small = (bytes &lis if ( small && (gdest !=MPI_UNDEFINED) ) { + char *to_ptr = (char *)ShmCommBufs[gdest]; + assert(gme != gdest); T *ip = (T *)xmit; T *op = (T *)to_ptr; PARALLEL_FOR_LOOP for(int w=0;w "<< gdest<<" " < &list, @@ -476,19 +528,29 @@ void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list) +{ + SendToRecvFromComplete(list); +} + +void CartesianCommunicator::StencilBarrier(void) +{ MPI_Win_sync (ShmWindow); MPI_Barrier (ShmComm); MPI_Win_sync (ShmWindow); - } - void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) { int nreq=list.size(); std::vector status(nreq); int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); } @@ -514,7 +576,7 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) bytes, MPI_BYTE, root, - MPI_COMM_WORLD); + communicator_world); assert(ierr==0); } diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index 80b8fb90..d32fe4fa 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -28,18 +28,29 @@ Author: Peter Boyle #include "Grid.h" namespace Grid { +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////////////////////////////////////////// +int CartesianCommunicator::ShmRank; +int CartesianCommunicator::ShmSize; +int CartesianCommunicator::GroupRank; +int CartesianCommunicator::GroupSize; +int CartesianCommunicator::WorldRank; +int CartesianCommunicator::WorldSize; +int CartesianCommunicator::Slave; +void * CartesianCommunicator::ShmCommBuf; +commVector CartesianCommunicator::ShmBufStorageVector; + void CartesianCommunicator::Init(int *argc, char *** arv) { -} - -int Rank(void ){ return 0; }; -void *CartesianCommunicator::ShmBufferSelf(void) -{ - return NULL; -} -void *CartesianCommunicator::ShmBuffer(int rank) -{ - return NULL; + WorldRank = 0; + WorldSize = 1; + ShmRank=0; + ShmSize=1; + GroupRank=_WorldRank; + GroupSize=_WorldSize; + Slave =0; + ShmInitGeneric(); } CartesianCommunicator::CartesianCommunicator(const std::vector &processors) @@ -97,30 +108,16 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector & assert(0); } -void CartesianCommunicator::Barrier(void) -{ -} - -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ -} -void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) -{ -} - - +void CartesianCommunicator::Barrier(void){} +void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {} +void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { } +int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) { return 0;} +void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor){ assert(0);} void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) { source =0; dest=0; } -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - return 0; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ -} } diff --git a/lib/communicator/Communicator_shmem.cc b/lib/communicator/Communicator_shmem.cc index 4af719b0..544b37c7 100644 --- a/lib/communicator/Communicator_shmem.cc +++ b/lib/communicator/Communicator_shmem.cc @@ -39,25 +39,19 @@ namespace Grid { BACKTRACEFILE(); \ }\ } -int Rank(void) { - return shmem_my_pe(); -} -typedef struct HandShake_t { - uint64_t seq_local; - uint64_t seq_remote; -} HandShake; -static Vector< HandShake > XConnections; -static Vector< HandShake > RConnections; -void *CartesianCommunicator::ShmBufferSelf(void) -{ - return NULL; -} -void *CartesianCommunicator::ShmBuffer(int rank) -{ - return NULL; -} +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////////////////////////////////////////// +int CartesianCommunicator::ShmRank; +int CartesianCommunicator::ShmSize; +int CartesianCommunicator::GroupRank; +int CartesianCommunicator::GroupSize; +int CartesianCommunicator::WorldRank; +int CartesianCommunicator::WorldSize; +int CartesianCommunicator::Slave; + void CartesianCommunicator::Init(int *argc, char ***argv) { shmem_init(); XConnections.resize(shmem_n_pes()); @@ -69,7 +63,36 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { RConnections[pe].seq_remote= 0; } shmem_barrier_all(); + ShmInitGeneric(); } + + +// Should error check all MPI calls. +void CartesianCommunicator::Init(int *argc, char ***argv) { + int flag; + MPI_Initialized(&flag); // needed to coexist with other libs apparently + if ( !flag ) { + MPI_Init(argc,argv); + MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); + MPI_Comm_rank(communicator_world,&_WorldRank); + MPI_Comm_size(communicator_world,&_WorldSize); + _ShmRank=0; + _ShmSize=1; + _GroupRank=_WorldRank; + _GroupSize=_WorldSize; + _Slave =0; + } +} + + +typedef struct HandShake_t { + uint64_t seq_local; + uint64_t seq_remote; +} HandShake; + +static Vector< HandShake > XConnections; +static Vector< HandShake > RConnections; + CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { _ndimension = processors.size();