From 1a0163f45c43a304271bb9e8a99052c012b4b736 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:26:11 +0000 Subject: [PATCH 01/17] Updated to do list --- TODO | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/TODO b/TODO index 83bfda5e..95ccf1df 100644 --- a/TODO +++ b/TODO @@ -4,17 +4,17 @@ TODO: Large item work list: 1)- BG/Q port and check ; Andrew says ok. -2)- Christoph's local basis expansion Lanczos --- 3a)- RNG I/O in ILDG/SciDAC (minor) -3b)- Precision conversion and sort out localConvert <-- partial/easy 3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet 4)- Physical propagator interface -5)- Conserved currents 6)- Multigrid Wilson and DWF, compare to other Multigrid implementations 7)- HDCR resume - +---------------------------- Recent DONE +-- Precision conversion and sort out localConvert <-- partial/easy +-- Conserved currents (Andrew) +-- Split grid +-- Christoph's local basis expansion Lanczos -- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O ; <-- DONE ; bmark cori -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE -- GaugeFix into central location <-- DONE From 7b3ed160aa22c11cfb8e5acbd7e4414a4d718305 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:26:48 +0000 Subject: [PATCH 02/17] Rationalise MPI options --- configure.ac | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/configure.ac b/configure.ac index 496f7fd7..468d9d5f 100644 --- a/configure.ac +++ b/configure.ac @@ -337,15 +337,11 @@ case ${ac_PRECISION} in esac ###################### Shared memory allocation technique under MPI3 -AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs], +AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|hugetlbfs], [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen]) case ${ac_SHM} in - shmget) - AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) - ;; - shmopen) AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] ) ;; @@ -367,7 +363,7 @@ AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path], AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing]) ############### communication type selection -AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], +AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto], [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) case ${ac_COMMS} in @@ -375,22 +371,10 @@ case ${ac_COMMS} in AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) comms_type='none' ;; - mpi3*) + mpi*) AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) comms_type='mpi3' ;; - mpit) - AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] ) - comms_type='mpit' - ;; - mpi*) - AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) - comms_type='mpi' - ;; - shmem) - AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] ) - comms_type='shmem' - ;; *) AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); ;; From 9b32d51cd1a7ec710239ed280a94a3d836117e7a Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:27:14 +0000 Subject: [PATCH 03/17] Simplify comms layer proliferatoin --- benchmarks/Benchmark_comms.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index a270e3fa..29ccf96c 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -106,7 +106,7 @@ int main (int argc, char ** argv) for(int i=0;i requests; + std::vector requests; ncomm=0; for(int mu=0;mu<4;mu++){ @@ -202,7 +202,7 @@ int main (int argc, char ** argv) int recv_from_rank; { - std::vector requests; + std::vector requests; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.SendToRecvFromBegin(requests, (void *)&xbuf[mu][0], @@ -215,7 +215,7 @@ int main (int argc, char ** argv) comm_proc = mpi_layout[mu]-1; { - std::vector requests; + std::vector requests; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.SendToRecvFromBegin(requests, (void *)&xbuf[mu+4][0], @@ -290,7 +290,7 @@ int main (int argc, char ** argv) dbytes=0; ncomm=0; - std::vector requests; + std::vector requests; for(int mu=0;mu<4;mu++){ @@ -383,7 +383,7 @@ int main (int argc, char ** argv) for(int i=0;i requests; + std::vector requests; dbytes=0; ncomm=0; for(int mu=0;mu<4;mu++){ @@ -481,7 +481,7 @@ int main (int argc, char ** argv) for(int i=0;i requests; + std::vector requests; dbytes=0; ncomm=0; From 7eeab7f995332ae2a2ce60c318beb77c449fe0db Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:27:43 +0000 Subject: [PATCH 04/17] Simplify comms layers --- lib/communicator/Communicator.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/communicator/Communicator.h b/lib/communicator/Communicator.h index 09ce50dc..d4ec5a13 100644 --- a/lib/communicator/Communicator.h +++ b/lib/communicator/Communicator.h @@ -28,6 +28,7 @@ Author: Peter Boyle #ifndef GRID_COMMUNICATOR_H #define GRID_COMMUNICATOR_H +#include #include #endif From 6ecf2807237f7b476c495cc97d42cd2c9a1c5c72 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:28:04 +0000 Subject: [PATCH 05/17] Simplify comms layer proliferation --- lib/qcd/action/fermion/WilsonCompressor.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/qcd/action/fermion/WilsonCompressor.h b/lib/qcd/action/fermion/WilsonCompressor.h index cc5c3c63..b47700ac 100644 --- a/lib/qcd/action/fermion/WilsonCompressor.h +++ b/lib/qcd/action/fermion/WilsonCompressor.h @@ -265,7 +265,6 @@ public: if ( timer3 ) std::cout << GridLogMessage << " timer3 (commsMergeShm) " < same_node; std::vector surface_list; From 0a68470f9a2042af2c5bc443bc3fdb33bfc58e77 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:28:30 +0000 Subject: [PATCH 06/17] Simplify comms layers --- lib/stencil/Stencil.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/stencil/Stencil.h b/lib/stencil/Stencil.h index 887d8a7c..69c010f4 100644 --- a/lib/stencil/Stencil.h +++ b/lib/stencil/Stencil.h @@ -105,7 +105,6 @@ template class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in. public: - typedef CartesianCommunicator::CommsRequest_t CommsRequest_t; typedef typename cobj::vector_type vector_type; typedef typename cobj::scalar_type scalar_type; typedef typename cobj::scalar_object scalar_object; From b91282ad46630f006c9c678b33ec0d9448cec8d6 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:28:52 +0000 Subject: [PATCH 07/17] Simplify comms layer proliferation --- lib/util/Init.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/util/Init.cc b/lib/util/Init.cc index 031f8f5a..4f99e491 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -220,11 +220,11 @@ void Grid_init(int *argc,char ***argv) arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm"); GridCmdOptionInt(arg,MB); uint64_t MB64 = MB; - CartesianCommunicator::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL; + GlobalSharedMemory::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL; } if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){ - CartesianCommunicator::Hugepages = 1; + GlobalSharedMemory::Hugepages = 1; } @@ -392,8 +392,8 @@ void Grid_init(int *argc,char ***argv) Grid_default_latt, Grid_default_mpi); - std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "< Date: Mon, 8 Jan 2018 11:29:20 +0000 Subject: [PATCH 08/17] Simplify proliferation of comms layers --- scripts/filelist | 2 +- tests/solver/Test_dwf_mrhs_cg_mpi.cc | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/filelist b/scripts/filelist index 8d4b8e1a..74f8e334 100755 --- a/scripts/filelist +++ b/scripts/filelist @@ -6,7 +6,7 @@ home=`pwd` cd $home/lib HFILES=`find . -type f -name '*.h' -not -name '*Hdf5*' -not -path '*/gamma-gen/*' -not -path '*/Old/*' -not -path '*/Eigen/*'` HFILES="$HFILES" -CCFILES=`find . -type f -name '*.cc' -not -path '*/gamma-gen/*' -not -name '*Communicator*.cc' -not -name '*Hdf5*'` +CCFILES=`find . -type f -name '*.cc' -not -path '*/gamma-gen/*' -not -name '*Communicator*.cc' -not -name '*SharedMemory*.cc' -not -name '*Hdf5*'` HPPFILES=`find . -type f -name '*.hpp'` echo HFILES=$HFILES $HPPFILES > Make.inc echo >> Make.inc diff --git a/tests/solver/Test_dwf_mrhs_cg_mpi.cc b/tests/solver/Test_dwf_mrhs_cg_mpi.cc index 7e11d8d1..aa36ebbc 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc @@ -72,14 +72,17 @@ int main (int argc, char ** argv) int nrhs = 1; int me; for(int i=0;i Date: Mon, 8 Jan 2018 11:30:22 +0000 Subject: [PATCH 09/17] Simplify comms layer proliferation --- lib/Makefile.am | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/lib/Makefile.am b/lib/Makefile.am index 6dd7899e..dc33e7cf 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -1,28 +1,18 @@ extra_sources= extra_headers= -if BUILD_COMMS_MPI - extra_sources+=communicator/Communicator_mpi.cc - extra_sources+=communicator/Communicator_base.cc -endif if BUILD_COMMS_MPI3 extra_sources+=communicator/Communicator_mpi3.cc extra_sources+=communicator/Communicator_base.cc -endif - -if BUILD_COMMS_MPIT - extra_sources+=communicator/Communicator_mpit.cc - extra_sources+=communicator/Communicator_base.cc -endif - -if BUILD_COMMS_SHMEM - extra_sources+=communicator/Communicator_shmem.cc - extra_sources+=communicator/Communicator_base.cc + extra_sources+=communicator/SharedMemoryMPI.cc + extra_sources+=communicator/SharedMemory.cc endif if BUILD_COMMS_NONE extra_sources+=communicator/Communicator_none.cc extra_sources+=communicator/Communicator_base.cc + extra_sources+=communicator/SharedMemoryNone.cc + extra_sources+=communicator/SharedMemory.cc endif if BUILD_HDF5 From 0091eec23a08fd94ed5711d887019d2359e3503a Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:31:32 +0000 Subject: [PATCH 10/17] Simplify communicator cases --- lib/communicator/Communicator_base.h | 112 ++++----------------------- 1 file changed, 13 insertions(+), 99 deletions(-) diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 548515cd..a9b99c17 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -32,117 +32,33 @@ Author: Peter Boyle /////////////////////////////////// // Processor layout information /////////////////////////////////// -#ifdef GRID_COMMS_MPI -#include -#endif -#ifdef GRID_COMMS_MPI3 -#include -#endif -#ifdef GRID_COMMS_MPIT -#include -#endif -#ifdef GRID_COMMS_SHMEM -#include -#endif +#include namespace Grid { -class CartesianCommunicator { - public: +class CartesianCommunicator : public SharedMemory { +public: //////////////////////////////////////////// - // Isend/Irecv/Wait, or Sendrecv blocking + // Policies //////////////////////////////////////////// enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; static CommunicatorPolicy_t CommunicatorPolicy; static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } - - /////////////////////////////////////////// - // Up to 65536 ranks per node adequate for now - // 128MB shared memory for comms enought for 48^4 local vol comms - // Give external control (command line override?) of this - /////////////////////////////////////////// - static const int MAXLOG2RANKSPERNODE = 16; - static uint64_t MAX_MPI_SHM_BYTES; static int nCommThreads; - // use explicit huge pages - static int Hugepages; + //////////////////////////////////////////// // Communicator should know nothing of the physics grid, only processor grid. + //////////////////////////////////////////// int _Nprocessors; // How many in all std::vector _processors; // Which dimensions get relayed out over processors lanes. int _processor; // linear processor rank std::vector _processor_coor; // linear processor coordinate - unsigned long _ndimension; - -#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT) - static MPI_Comm communicator_world; - - MPI_Comm communicator; - std::vector communicator_halo; - - typedef MPI_Request CommsRequest_t; - -#else - typedef int CommsRequest_t; -#endif - - - //////////////////////////////////////////////////////////////////// - // Helper functionality for SHM Windows common to all other impls - //////////////////////////////////////////////////////////////////// - // Longer term; drop this in favour of a master / slave model with - // cartesian communicator on a subset of ranks, slave ranks controlled - // by group leader with data xfer via shared memory - //////////////////////////////////////////////////////////////////// -#ifdef GRID_COMMS_MPI3 - - static int ShmRank; - static int ShmSize; - static int GroupRank; - static int GroupSize; - static int WorldRank; - static int WorldSize; - - std::vector WorldDims; - std::vector GroupDims; - std::vector ShmDims; - - std::vector GroupCoor; - std::vector ShmCoor; - std::vector WorldCoor; - - static std::vector GroupRanks; - static std::vector MyGroup; - static int ShmSetup; - static MPI_Win ShmWindow; - static MPI_Comm ShmComm; - - std::vector LexicographicToWorldRank; - - static std::vector ShmCommBufs; - -#else - static void ShmInitGeneric(void); - static commVector ShmBufStorageVector; -#endif - - ///////////////////////////////// - // Grid information and queries - // Implemented in Communicator_base.C - ///////////////////////////////// - static void * ShmCommBuf; - - - size_t heap_top; - size_t heap_bytes; - - void *ShmBufferSelf(void); - void *ShmBuffer(int rank); - void *ShmBufferTranslate(int rank,void * local_p); - void *ShmBufferMalloc(size_t bytes); - void ShmBufferFreeAll(void) ; + unsigned long _ndimension; + static Grid_MPI_Comm communicator_world; + Grid_MPI_Comm communicator; + std::vector communicator_halo; //////////////////////////////////////////////// // Must call in Grid startup @@ -158,13 +74,13 @@ class CartesianCommunicator { virtual ~CartesianCommunicator(); private: -#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) + //////////////////////////////////////////////// // Private initialise from an MPI communicator // Can use after an MPI_Comm_split, but hidden from user so private //////////////////////////////////////////////// - void InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base); -#endif + void InitFromMPICommunicator(const std::vector &processors, Grid_MPI_Comm communicator_base); + public: //////////////////////////////////////////////////////////////////////////////////////// @@ -181,8 +97,6 @@ class CartesianCommunicator { const std::vector & ThisProcessorCoor(void) ; const std::vector & ProcessorGrid(void) ; int ProcessorCount(void) ; - int NodeCount(void) ; - int RankCount(void) ; //////////////////////////////////////////////////////////////////////////////// // very VERY rarely (Log, serial RNG) we need world without a grid From 357badce5ed7efac2df4c3f5bc5cf71815334c3a Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:32:16 +0000 Subject: [PATCH 11/17] Simplify communicator case proliferation --- lib/communicator/Communicator_base.cc | 288 -------------------------- 1 file changed, 288 deletions(-) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 3e561405..edbf26af 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -36,33 +36,9 @@ namespace Grid { /////////////////////////////////////////////////////////////// // Info that is setup once and indept of cartesian layout /////////////////////////////////////////////////////////////// -void * CartesianCommunicator::ShmCommBuf; -uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL; CartesianCommunicator::CommunicatorPolicy_t CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; int CartesianCommunicator::nCommThreads = -1; -int CartesianCommunicator::Hugepages = 0; - -///////////////////////////////// -// Alloc, free shmem region -///////////////////////////////// -void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){ - // bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes - void *ptr = (void *)heap_top; - heap_top += bytes; - heap_bytes+= bytes; - if (heap_bytes >= MAX_MPI_SHM_BYTES) { - std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < row(_ndimension,1); - assert(dim>=0 && dim<_ndimension); - - // Split the communicator - row[dim] = _processors[dim]; - - int me; - CartesianCommunicator Comm(row,*this,me); - Comm.AllToAll(in,out,words,bytes); -} -void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) -{ - // MPI is a pain and uses "int" arguments - // 64*64*64*128*16 == 500Million elements of data. - // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. - // (Turns up on 32^3 x 64 Gparity too) - MPI_Datatype object; - int iwords; - int ibytes; - iwords = words; - ibytes = bytes; - assert(words == iwords); // safe to cast to int ? - assert(bytes == ibytes); // safe to cast to int ? - MPI_Type_contiguous(ibytes,MPI_BYTE,&object); - MPI_Type_commit(&object); - MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); - MPI_Type_free(&object); -} -#endif - -#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) -{ - _ndimension = processors.size(); - - int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension); - std::vector parent_processor_coor(_ndimension,0); - std::vector parent_processors (_ndimension,1); - - // Can make 5d grid from 4d etc... - int pad = _ndimension-parent_ndimension; - for(int d=0;d ccoor(_ndimension); // coor within subcommunicator - std::vector scoor(_ndimension); // coor of split within parent - std::vector ssize(_ndimension); // coor of split within parent - - for(int d=0;d<_ndimension;d++){ - ccoor[d] = parent_processor_coor[d] % processors[d]; - scoor[d] = parent_processor_coor[d] / processors[d]; - ssize[d] = parent_processors[d] / processors[d]; - } - int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms - // Mpi uses the reverse Lexico convention to us - Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); - Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); - - MPI_Comm comm_split; - if ( Nchild > 1 ) { - - if(0){ - std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< &processors, MPI_Comm communicator_base) -{ - _ndimension = processors.size(); - _processor_coor.resize(_ndimension); - - ///////////////////////////////// - // Count the requested nodes - ///////////////////////////////// - _Nprocessors=1; - _processors = processors; - for(int i=0;i<_ndimension;i++){ - _Nprocessors*=_processors[i]; - } - - std::vector periodic(_ndimension,1); - MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); - MPI_Comm_rank(communicator,&_processor); - MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); - - if ( 0 && (communicator_base != communicator_world) ) { - std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"< &processors) -{ - InitFromMPICommunicator(processors,communicator_world); -} - -#endif - -#if !defined( GRID_COMMS_MPI3) -int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; -int CartesianCommunicator::RankCount(void) { return ProcessorCount();}; -#endif - -#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT) -double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes, int dir) -{ - std::vector list; - // Discard the "dir" - SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); - SendToRecvFromComplete(list); - return 2.0*bytes; -} -double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes, int dir) -{ - // Discard the "dir" - SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); - return 2.0*bytes; -} -void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall,int dir) -{ - SendToRecvFromComplete(waitall); -} -#endif - -#if !defined( GRID_COMMS_MPI3) - -void CartesianCommunicator::StencilBarrier(void){}; - -commVector CartesianCommunicator::ShmBufStorageVector; - -void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; } - -void *CartesianCommunicator::ShmBuffer(int rank) { - return NULL; -} -void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { - return NULL; -} -void CartesianCommunicator::ShmInitGeneric(void){ -#if 1 - int mmap_flag =0; -#ifdef MAP_ANONYMOUS - mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; -#endif -#ifdef MAP_ANON - mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON; -#endif -#ifdef MAP_HUGETLB - if ( Hugepages ) mmap_flag |= MAP_HUGETLB; -#endif - ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0); - if (ShmCommBuf == (void *)MAP_FAILED) { - perror("mmap failed "); - exit(EXIT_FAILURE); - } -#ifdef MADV_HUGEPAGE - if (!Hugepages ) madvise(ShmCommBuf,MAX_MPI_SHM_BYTES,MADV_HUGEPAGE); -#endif -#else - ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); - ShmCommBuf=(void *)&ShmBufStorageVector[0]; -#endif - bzero(ShmCommBuf,MAX_MPI_SHM_BYTES); -} - -#endif } From 9947cfbf14de0bc323c0a791f21267aadf9488ab Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:33:01 +0000 Subject: [PATCH 12/17] Simplify number of communicator cases --- lib/communicator/Communicator_mpi3.cc | 751 ++++++++------------------ 1 file changed, 213 insertions(+), 538 deletions(-) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index e41749d4..ef47d617 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -26,89 +26,20 @@ Author: Peter Boyle *************************************************************************************/ /* END LEGAL */ #include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_NUMAIF_H -#include -#endif - +#include namespace Grid { -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// -int CartesianCommunicator::ShmSetup = 0; +Grid_MPI_Comm CartesianCommunicator::communicator_world; -int CartesianCommunicator::ShmRank; -int CartesianCommunicator::ShmSize; -int CartesianCommunicator::GroupRank; -int CartesianCommunicator::GroupSize; -int CartesianCommunicator::WorldRank; -int CartesianCommunicator::WorldSize; - -MPI_Comm CartesianCommunicator::communicator_world; -MPI_Comm CartesianCommunicator::ShmComm; -MPI_Win CartesianCommunicator::ShmWindow; - -std::vector CartesianCommunicator::GroupRanks; -std::vector CartesianCommunicator::MyGroup; -std::vector CartesianCommunicator::ShmCommBufs; - -int CartesianCommunicator::NodeCount(void) { return GroupSize;}; -int CartesianCommunicator::RankCount(void) { return WorldSize;}; - - -#undef FORCE_COMMS -void *CartesianCommunicator::ShmBufferSelf(void) +//////////////////////////////////////////// +// First initialise of comms system +//////////////////////////////////////////// +void CartesianCommunicator::Init(int *argc, char ***argv) { - return ShmCommBufs[ShmRank]; -} -void *CartesianCommunicator::ShmBuffer(int rank) -{ - int gpeer = GroupRanks[rank]; -#ifdef FORCE_COMMS - return NULL; -#endif - if (gpeer == MPI_UNDEFINED){ - return NULL; - } else { - return ShmCommBufs[gpeer]; - } -} -void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) -{ - static int count =0; - int gpeer = GroupRanks[rank]; - assert(gpeer!=ShmRank); // never send to self - assert(rank!=WorldRank);// never send to self -#ifdef FORCE_COMMS - return NULL; -#endif - if (gpeer == MPI_UNDEFINED){ - return NULL; - } else { - uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank]; - uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset; - return (void *) remote; - } -} - -void CartesianCommunicator::Init(int *argc, char ***argv) { int flag; int provided; - // mtrace(); MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { @@ -119,487 +50,202 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { Grid_quiesce_nodes(); MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); - MPI_Comm_rank(communicator_world,&WorldRank); - MPI_Comm_size(communicator_world,&WorldSize); - if ( WorldRank == 0 ) { - std::cout << GridLogMessage<< "Initialising MPI "<< WorldRank <<"/"< world_ranks(WorldSize); - GroupRanks.resize(WorldSize); - for(int r=0;r()); - int myleader = MyGroup[0]; - - std::vector leaders_1hot(WorldSize,0); - std::vector leaders_group(GroupSize,0); - leaders_1hot [ myleader ] = 1; - - /////////////////////////////////////////////////////////////////// - // global sum leaders over comm world - /////////////////////////////////////////////////////////////////// - int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world); - assert(ierr==0); - /////////////////////////////////////////////////////////////////// - // find the group leaders world rank - /////////////////////////////////////////////////////////////////// - int group=0; - for(int l=0;l shmids(ShmSize); - - if ( ShmRank == 0 ) { - for(int r=0;r coor = _processor_coor; // my coord - assert(std::abs(shift) <_processors[dim]); - - coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim]; - Lexicographic::IndexFromCoor(coor,source,_processors); - source = LexicographicToWorldRank[source]; - - coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim]; - Lexicographic::IndexFromCoor(coor,dest,_processors); - dest = LexicographicToWorldRank[dest]; - -}// rank is world rank. - + int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); + assert(ierr==0); +} int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) { int rank; - Lexicographic::IndexFromCoor(coor,rank,_processors); - rank = LexicographicToWorldRank[rank]; + int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); + assert(ierr==0); return rank; -}// rank is world rank - +} void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) { - int lr=-1; - for(int r=0;r &processors) +{ + MPI_Comm optimal_comm; + GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine + InitFromMPICommunicator(processors,optimal_comm); + SetCommunicator(optimal_comm); } ////////////////////////////////// // Try to subdivide communicator ////////////////////////////////// -/* - * Use default in MPI compile - */ -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) - : CartesianCommunicator(processors) +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) { - std::cout << "Attempts to split MPI3 communicators will fail until implemented" <= parent._ndimension); + std::vector parent_processor_coor(_ndimension,0); + std::vector parent_processors (_ndimension,1); + + // Can make 5d grid from 4d etc... + int pad = _ndimension-parent_ndimension; + for(int d=0;d ccoor(_ndimension); // coor within subcommunicator + std::vector scoor(_ndimension); // coor of split within parent + std::vector ssize(_ndimension); // coor of split within parent + + for(int d=0;d<_ndimension;d++){ + ccoor[d] = parent_processor_coor[d] % processors[d]; + scoor[d] = parent_processor_coor[d] / processors[d]; + ssize[d] = parent_processors[d] / processors[d]; + } + + // rank within subcomm ; srank is rank of subcomm within blocks of subcomms + int crank; + // Mpi uses the reverse Lexico convention to us; so reversed routines called + Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions + Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); // ssize is the number of split grids + + MPI_Comm comm_split; + if ( Nchild > 1 ) { + + if(0){ + std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< &processors) -{ - int ierr; - communicator=communicator_world; - +void CartesianCommunicator::InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base) +{ _ndimension = processors.size(); + _processor_coor.resize(_ndimension); + + ///////////////////////////////// + // Count the requested nodes + ///////////////////////////////// + _Nprocessors=1; + _processors = processors; + for(int i=0;i<_ndimension;i++){ + _Nprocessors*=_processors[i]; + } + + std::vector periodic(_ndimension,1); + MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); + MPI_Comm_rank(communicator,&_processor); + MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); + + if ( 0 && (communicator_base != communicator_world) ) { + std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"< WorldDims = processors; - - ShmDims.resize (_ndimension,1); - GroupDims.resize(_ndimension); - ShmCoor.resize (_ndimension); - GroupCoor.resize(_ndimension); - WorldCoor.resize(_ndimension); - - int dim = 0; - for(int l2=0;l2 coor(_ndimension); - ProcessorCoorFromRank(wr,coor); // from world rank - int ck = RankFromProcessorCoor(coor); - assert(ck==wr); - - if ( wr == WorldRank ) { - for(int j=0;j mcoor = coor; - this->Broadcast(0,(void *)&mcoor[0],mcoor.size()*sizeof(int)); - for(int d = 0 ; d< _ndimension; d++) { - assert(coor[d] == mcoor[d]); - } - } -}; CartesianCommunicator::~CartesianCommunicator() { int MPI_is_finalised; @@ -734,19 +380,15 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector row(_ndimension,1); + assert(dim>=0 && dim<_ndimension); + + // Split the communicator + row[dim] = _processors[dim]; + + int me; + CartesianCommunicator Comm(row,*this,me); + Comm.AllToAll(in,out,words,bytes); +} +void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) +{ + // MPI is a pain and uses "int" arguments + // 64*64*64*128*16 == 500Million elements of data. + // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. + // (Turns up on 32^3 x 64 Gparity too) + MPI_Datatype object; + int iwords; + int ibytes; + iwords = words; + ibytes = bytes; + assert(words == iwords); // safe to cast to int ? + assert(bytes == ibytes); // safe to cast to int ? + MPI_Type_contiguous(ibytes,MPI_BYTE,&object); + MPI_Type_commit(&object); + MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); + MPI_Type_free(&object); +} + + + } From 0b85f1bfc8d6ceec46150ae1c75dc048f20629a3 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:33:47 +0000 Subject: [PATCH 13/17] Simplify the communicator proliferation: mpi and none. --- lib/communicator/Communicator_none.cc | 43 +++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index 26b330a7..c3763d53 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -32,14 +32,22 @@ namespace Grid { /////////////////////////////////////////////////////////////////////////////////////////////////// // Info that is setup once and indept of cartesian layout /////////////////////////////////////////////////////////////////////////////////////////////////// +Grid_MPI_Comm CartesianCommunicator::communicator_world; void CartesianCommunicator::Init(int *argc, char *** arv) { - ShmInitGeneric(); + GlobalSharedMemory::Init(communicator_world); + GlobalSharedMemory::SharedMemoryAllocate( + GlobalSharedMemory::MAX_MPI_SHM_BYTES, + GlobalSharedMemory::Hugepages); } CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) - : CartesianCommunicator(processors) { srank=0;} + : CartesianCommunicator(processors) +{ + srank=0; + SetCommunicator(communicator_world); +} CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { @@ -54,6 +62,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) assert(_processors[d]==1); _processor_coor[d] = 0; } + SetCommunicator(communicator_world); } CartesianCommunicator::~CartesianCommunicator(){} @@ -121,6 +130,36 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest dest=0; } +double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes, int dir) +{ + std::vector list; + // Discard the "dir" + SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); + SendToRecvFromComplete(list); + return 2.0*bytes; +} +double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes, int dir) +{ + // Discard the "dir" + SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); + return 2.0*bytes; +} +void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall,int dir) +{ + SendToRecvFromComplete(waitall); +} + +void CartesianCommunicator::StencilBarrier(void){}; + } From 44f65526e01369c193a8754e97ec959ed8d0a1d4 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 11:35:43 +0000 Subject: [PATCH 14/17] Simplify communicators --- lib/communicator/Communicator_mpi.cc | 222 ----- lib/communicator/Communicator_mpi3_leader.cc | 988 ------------------- lib/communicator/Communicator_mpit.cc | 273 ----- lib/communicator/Communicator_shmem.cc | 357 ------- lib/communicator/SharedMemory.cc | 54 + lib/communicator/SharedMemory.h | 158 +++ lib/communicator/SharedMemoryMPI.cc | 415 ++++++++ lib/communicator/SharedMemoryNone.cc | 150 +++ 8 files changed, 777 insertions(+), 1840 deletions(-) delete mode 100644 lib/communicator/Communicator_mpi.cc delete mode 100644 lib/communicator/Communicator_mpi3_leader.cc delete mode 100644 lib/communicator/Communicator_mpit.cc delete mode 100644 lib/communicator/Communicator_shmem.cc create mode 100644 lib/communicator/SharedMemory.cc create mode 100644 lib/communicator/SharedMemory.h create mode 100644 lib/communicator/SharedMemoryMPI.cc create mode 100644 lib/communicator/SharedMemoryNone.cc diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc deleted file mode 100644 index 2075e4bf..00000000 --- a/lib/communicator/Communicator_mpi.cc +++ /dev/null @@ -1,222 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/communicator/Communicator_mpi.cc - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include -#include -#include - -namespace Grid { - - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// -MPI_Comm CartesianCommunicator::communicator_world; - -// Should error check all MPI calls. -void CartesianCommunicator::Init(int *argc, char ***argv) { - int flag; - int provided; - MPI_Initialized(&flag); // needed to coexist with other libs apparently - if ( !flag ) { - MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); - if ( provided != MPI_THREAD_MULTIPLE ) { - QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; - } - } - MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); - ShmInitGeneric(); -} - -CartesianCommunicator::~CartesianCommunicator() -{ - int MPI_is_finalised; - MPI_Finalized(&MPI_is_finalised); - if (communicator && !MPI_is_finalised) - MPI_Comm_free(&communicator); -} - -void CartesianCommunicator::GlobalSum(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalXOR(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalXOR(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(float &f){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(float *f,int N) -{ - int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(double &d) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(double *d,int N) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) -{ - int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); - assert(ierr==0); -} -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - int rank; - int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); - assert(ierr==0); - return rank; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ - coor.resize(_ndimension); - int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); - assert(ierr==0); -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - std::vector reqs(0); - SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); - SendToRecvFromComplete(reqs); -} - -void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int sender, - int receiver, - int bytes) -{ - MPI_Status stat; - assert(sender != receiver); - int tag = sender; - if ( _processor == sender ) { - MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); - } - if ( _processor == receiver ) { - MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); - } -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - int myrank = _processor; - int ierr; - if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { - MPI_Request xrq; - MPI_Request rrq; - - ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); - ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); - - assert(ierr==0); - list.push_back(xrq); - list.push_back(rrq); - } else { - // Give the CPU to MPI immediately; can use threads to overlap optionally - ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, - recv,bytes,MPI_CHAR,from, from, - communicator,MPI_STATUS_IGNORE); - assert(ierr==0); - } -} -void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) -{ - if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { - int nreq=list.size(); - std::vector status(nreq); - int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); - } -} - -void CartesianCommunicator::Barrier(void) -{ - int ierr = MPI_Barrier(communicator); - assert(ierr==0); -} - -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ - int ierr=MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator); - assert(ierr==0); -} - /////////////////////////////////////////////////////// - // Should only be used prior to Grid Init finished. - // Check for this? - /////////////////////////////////////////////////////// -int CartesianCommunicator::RankWorld(void){ - int r; - MPI_Comm_rank(communicator_world,&r); - return r; -} -void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) -{ - int ierr= MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator_world); - assert(ierr==0); -} - - - -} - diff --git a/lib/communicator/Communicator_mpi3_leader.cc b/lib/communicator/Communicator_mpi3_leader.cc deleted file mode 100644 index 6e26bd3e..00000000 --- a/lib/communicator/Communicator_mpi3_leader.cc +++ /dev/null @@ -1,988 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/communicator/Communicator_mpi.cc - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include "Grid.h" -#include -//#include - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// Workarounds: -/// i) bloody mac os doesn't implement unnamed semaphores since it is "optional" posix. -/// darwin dispatch semaphores don't seem to be multiprocess. -/// -/// ii) openmpi under --mca shmem posix works with two squadrons per node; -/// openmpi under default mca settings (I think --mca shmem mmap) on MacOS makes two squadrons map the SAME -/// memory as each other, despite their living on different communicators. This appears to be a bug in OpenMPI. -/// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -typedef sem_t *Grid_semaphore; - - -#error /*THis is deprecated*/ - -#if 0 -#define SEM_INIT(S) S = sem_open(sem_name,0,0600,0); assert ( S != SEM_FAILED ); -#define SEM_INIT_EXCL(S) sem_unlink(sem_name); S = sem_open(sem_name,O_CREAT|O_EXCL,0600,0); assert ( S != SEM_FAILED ); -#define SEM_POST(S) assert ( sem_post(S) == 0 ); -#define SEM_WAIT(S) assert ( sem_wait(S) == 0 ); -#else -#define SEM_INIT(S) ; -#define SEM_INIT_EXCL(S) ; -#define SEM_POST(S) ; -#define SEM_WAIT(S) ; -#endif -#include - -namespace Grid { - -enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL, COMMAND_SENDRECV }; - -struct Descriptor { - uint64_t buf; - size_t bytes; - int rank; - int tag; - int command; - uint64_t xbuf; - uint64_t rbuf; - int xtag; - int rtag; - int src; - int dest; - MPI_Request request; -}; - -const int pool = 48; - -class SlaveState { -public: - volatile int head; - volatile int start; - volatile int tail; - volatile Descriptor Descrs[pool]; -}; - -class Slave { -public: - Grid_semaphore sem_head; - Grid_semaphore sem_tail; - SlaveState *state; - MPI_Comm squadron; - uint64_t base; - int universe_rank; - int vertical_rank; - char sem_name [NAME_MAX]; - //////////////////////////////////////////////////////////// - // Descriptor circular pointers - //////////////////////////////////////////////////////////// - Slave() {}; - - void Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank); - - void SemInit(void) { - sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank); - SEM_INIT(sem_head); - sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank); - SEM_INIT(sem_tail); - } - void SemInitExcl(void) { - sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank); - SEM_INIT_EXCL(sem_head); - sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank); - SEM_INIT_EXCL(sem_tail); - } - void WakeUpDMA(void) { - SEM_POST(sem_head); - }; - void WakeUpCompute(void) { - SEM_POST(sem_tail); - }; - void WaitForCommand(void) { - SEM_WAIT(sem_head); - }; - void WaitForComplete(void) { - SEM_WAIT(sem_tail); - }; - void EventLoop (void) { - // std::cout<< " Entering event loop "<head,0,0); - int s=state->start; - if ( s != state->head ) { - _mm_mwait(0,0); - } -#endif - Event(); - } - } - - int Event (void) ; - - uint64_t QueueCommand(int command,void *buf, int bytes, int hashtag, MPI_Comm comm,int u_rank) ; - void QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) ; - - void WaitAll() { - // std::cout << "Queueing WAIT command "<tail != state->head ); - } -}; - -//////////////////////////////////////////////////////////////////////// -// One instance of a data mover. -// Master and Slave must agree on location in shared memory -//////////////////////////////////////////////////////////////////////// - -class MPIoffloadEngine { -public: - - static std::vector Slaves; - - static int ShmSetup; - - static int UniverseRank; - static int UniverseSize; - - static MPI_Comm communicator_universe; - static MPI_Comm communicator_cached; - - static MPI_Comm HorizontalComm; - static int HorizontalRank; - static int HorizontalSize; - - static MPI_Comm VerticalComm; - static MPI_Win VerticalWindow; - static int VerticalSize; - static int VerticalRank; - - static std::vector VerticalShmBufs; - static std::vector > UniverseRanks; - static std::vector UserCommunicatorToWorldRanks; - - static MPI_Group WorldGroup, CachedGroup; - - static void CommunicatorInit (MPI_Comm &communicator_world, - MPI_Comm &ShmComm, - void * &ShmCommBuf); - - static void MapCommRankToWorldRank(int &hashtag, int & comm_world_peer,int tag, MPI_Comm comm,int commrank); - - ///////////////////////////////////////////////////////// - // routines for master proc must handle any communicator - ///////////////////////////////////////////////////////// - - static void QueueSend(int slave,void *buf, int bytes, int tag, MPI_Comm comm,int rank) { - // std::cout<< " Queueing send "<< bytes<< " slave "<< slave << " to comm "<= units ) { - mywork = myoff = 0; - } else { - mywork = (nwork+me)/units; - myoff = basework * me; - if ( me > backfill ) - myoff+= (me-backfill); - } - return; - }; - - static void QueueRoundRobinSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) { - uint8_t * cxbuf = (uint8_t *) xbuf; - uint8_t * crbuf = (uint8_t *) rbuf; - static int rrp=0; - int procs = VerticalSize-1; - int myoff=0; - int mywork=bytes; - QueueSendRecv(rrp+1,&cxbuf[myoff],&crbuf[myoff],mywork,xtag,rtag,comm,dest,src); - rrp = rrp+1; - if ( rrp == (VerticalSize-1) ) rrp = 0; - } - - static void QueueMultiplexedSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) { - uint8_t * cxbuf = (uint8_t *) xbuf; - uint8_t * crbuf = (uint8_t *) rbuf; - int mywork, myoff, procs; - procs = VerticalSize-1; - for(int s=0;s MPIoffloadEngine::Slaves; - -int MPIoffloadEngine::UniverseRank; -int MPIoffloadEngine::UniverseSize; - -MPI_Comm MPIoffloadEngine::communicator_universe; -MPI_Comm MPIoffloadEngine::communicator_cached; -MPI_Group MPIoffloadEngine::WorldGroup; -MPI_Group MPIoffloadEngine::CachedGroup; - -MPI_Comm MPIoffloadEngine::HorizontalComm; -int MPIoffloadEngine::HorizontalRank; -int MPIoffloadEngine::HorizontalSize; - -MPI_Comm MPIoffloadEngine::VerticalComm; -int MPIoffloadEngine::VerticalSize; -int MPIoffloadEngine::VerticalRank; -MPI_Win MPIoffloadEngine::VerticalWindow; -std::vector MPIoffloadEngine::VerticalShmBufs; -std::vector > MPIoffloadEngine::UniverseRanks; -std::vector MPIoffloadEngine::UserCommunicatorToWorldRanks; - -int CartesianCommunicator::NodeCount(void) { return HorizontalSize;}; -int MPIoffloadEngine::ShmSetup = 0; - -void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world, - MPI_Comm &ShmComm, - void * &ShmCommBuf) -{ - int flag; - assert(ShmSetup==0); - - ////////////////////////////////////////////////////////////////////// - // Universe is all nodes prior to squadron grouping - ////////////////////////////////////////////////////////////////////// - MPI_Comm_dup (MPI_COMM_WORLD,&communicator_universe); - MPI_Comm_rank(communicator_universe,&UniverseRank); - MPI_Comm_size(communicator_universe,&UniverseSize); - - ///////////////////////////////////////////////////////////////////// - // Split into groups that can share memory (Verticals) - ///////////////////////////////////////////////////////////////////// -#undef MPI_SHARED_MEM_DEBUG -#ifdef MPI_SHARED_MEM_DEBUG - MPI_Comm_split(communicator_universe,(UniverseRank/4),UniverseRank,&VerticalComm); -#else - MPI_Comm_split_type(communicator_universe, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&VerticalComm); -#endif - MPI_Comm_rank(VerticalComm ,&VerticalRank); - MPI_Comm_size(VerticalComm ,&VerticalSize); - - ////////////////////////////////////////////////////////////////////// - // Split into horizontal groups by rank in squadron - ////////////////////////////////////////////////////////////////////// - MPI_Comm_split(communicator_universe,VerticalRank,UniverseRank,&HorizontalComm); - MPI_Comm_rank(HorizontalComm,&HorizontalRank); - MPI_Comm_size(HorizontalComm,&HorizontalSize); - assert(HorizontalSize*VerticalSize==UniverseSize); - - //////////////////////////////////////////////////////////////////////////////// - // What is my place in the world - //////////////////////////////////////////////////////////////////////////////// - int WorldRank=0; - if(VerticalRank==0) WorldRank = HorizontalRank; - int ierr=MPI_Allreduce(MPI_IN_PLACE,&WorldRank,1,MPI_INT,MPI_SUM,VerticalComm); - assert(ierr==0); - - //////////////////////////////////////////////////////////////////////////////// - // Where is the world in the universe? - //////////////////////////////////////////////////////////////////////////////// - UniverseRanks = std::vector >(HorizontalSize,std::vector(VerticalSize,0)); - UniverseRanks[WorldRank][VerticalRank] = UniverseRank; - for(int w=0;w0 ) size = sizeof(SlaveState); - - sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r); - - shm_unlink(shm_name); - - int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600); - if ( fd < 0 ) { - perror("failed shm_open"); - assert(0); - } - - ftruncate(fd, size); - - VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if ( VerticalShmBufs[r] == MAP_FAILED ) { - perror("failed mmap"); - assert(0); - } - - /* - for(uint64_t page=0;page0 ) size = sizeof(SlaveState); - - sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r); - - int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600); - if ( fd<0 ) { - perror("failed shm_open"); - assert(0); - } - VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - - uint64_t * check = (uint64_t *) VerticalShmBufs[r]; - assert(check[0]== WorldRank); - assert(check[1]== r); - // std::cerr<<"SHM "<"<"< cached_ranks(size); - - for(int r=0;r"<>0 )&0xFFFF)^((icomm>>16)&0xFFFF) - ^ ((icomm>>32)&0xFFFF)^((icomm>>48)&0xFFFF); - - // hashtag = (comm_hash<<15) | tag; - hashtag = tag; - -}; - -void Slave::Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank) -{ - squadron=_squadron; - universe_rank=_universe_rank; - vertical_rank=_vertical_rank; - state =_state; - // std::cout << "state "<<_state<<" comm "<<_squadron<<" universe_rank"<head = state->tail = state->start = 0; - base = (uint64_t)MPIoffloadEngine::VerticalShmBufs[0]; - int rank; MPI_Comm_rank(_squadron,&rank); -} -#define PERI_PLUS(A) ( (A+1)%pool ) -int Slave::Event (void) { - - static int tail_last; - static int head_last; - static int start_last; - int ierr; - MPI_Status stat; - static int i=0; - - //////////////////////////////////////////////////// - // Try to advance the start pointers - //////////////////////////////////////////////////// - int s=state->start; - if ( s != state->head ) { - switch ( state->Descrs[s].command ) { - case COMMAND_ISEND: - ierr = MPI_Isend((void *)(state->Descrs[s].buf+base), - state->Descrs[s].bytes, - MPI_CHAR, - state->Descrs[s].rank, - state->Descrs[s].tag, - MPIoffloadEngine::communicator_universe, - (MPI_Request *)&state->Descrs[s].request); - assert(ierr==0); - state->start = PERI_PLUS(s); - return 1; - break; - - case COMMAND_IRECV: - ierr=MPI_Irecv((void *)(state->Descrs[s].buf+base), - state->Descrs[s].bytes, - MPI_CHAR, - state->Descrs[s].rank, - state->Descrs[s].tag, - MPIoffloadEngine::communicator_universe, - (MPI_Request *)&state->Descrs[s].request); - - // std::cout<< " Request is "<Descrs[s].request<Descrs[0].request<start = PERI_PLUS(s); - return 1; - break; - - case COMMAND_SENDRECV: - - // fprintf(stderr,"Sendrecv ->%d %d : <-%d %d \n",state->Descrs[s].dest, state->Descrs[s].xtag+i*10,state->Descrs[s].src, state->Descrs[s].rtag+i*10); - - ierr=MPI_Sendrecv((void *)(state->Descrs[s].xbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].dest, state->Descrs[s].xtag+i*10, - (void *)(state->Descrs[s].rbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].src , state->Descrs[s].rtag+i*10, - MPIoffloadEngine::communicator_universe,MPI_STATUS_IGNORE); - - assert(ierr==0); - - // fprintf(stderr,"Sendrecv done %d %d\n",ierr,i); - // MPI_Barrier(MPIoffloadEngine::HorizontalComm); - // fprintf(stderr,"Barrier\n"); - i++; - - state->start = PERI_PLUS(s); - - return 1; - break; - - case COMMAND_WAITALL: - - for(int t=state->tail;t!=s; t=PERI_PLUS(t) ){ - if ( state->Descrs[t].command != COMMAND_SENDRECV ) { - MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE); - } - }; - s=PERI_PLUS(s); - state->start = s; - state->tail = s; - - WakeUpCompute(); - - return 1; - break; - - default: - assert(0); - break; - } - } - return 0; -} - ////////////////////////////////////////////////////////////////////////////// - // External interaction with the queue - ////////////////////////////////////////////////////////////////////////////// - -void Slave::QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) -{ - int head =state->head; - int next = PERI_PLUS(head); - - // Set up descriptor - int worldrank; - int hashtag; - MPI_Comm communicator; - MPI_Request request; - uint64_t relative; - - relative = (uint64_t)xbuf - base; - state->Descrs[head].xbuf = relative; - - relative= (uint64_t)rbuf - base; - state->Descrs[head].rbuf = relative; - - state->Descrs[head].bytes = bytes; - - MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,xtag,comm,dest); - state->Descrs[head].dest = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]; - state->Descrs[head].xtag = hashtag; - - MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,rtag,comm,src); - state->Descrs[head].src = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]; - state->Descrs[head].rtag = hashtag; - - state->Descrs[head].command= COMMAND_SENDRECV; - - // Block until FIFO has space - while( state->tail==next ); - - // Msync on weak order architectures - - // Advance pointer - state->head = next; - -}; -uint64_t Slave::QueueCommand(int command,void *buf, int bytes, int tag, MPI_Comm comm,int commrank) -{ - ///////////////////////////////////////// - // Spin; if FIFO is full until not full - ///////////////////////////////////////// - int head =state->head; - int next = PERI_PLUS(head); - - // Set up descriptor - int worldrank; - int hashtag; - MPI_Comm communicator; - MPI_Request request; - - MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,tag,comm,commrank); - - uint64_t relative= (uint64_t)buf - base; - state->Descrs[head].buf = relative; - state->Descrs[head].bytes = bytes; - state->Descrs[head].rank = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]; - state->Descrs[head].tag = hashtag; - state->Descrs[head].command= command; - - /* - if ( command == COMMAND_ISEND ) { - std::cout << "QueueSend from "<< universe_rank <<" to commrank " << commrank - << " to worldrank " << worldrank <tail==next ); - - // Msync on weak order architectures - // Advance pointer - state->head = next; - - return 0; -} - - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// - -MPI_Comm CartesianCommunicator::communicator_world; - -void CartesianCommunicator::Init(int *argc, char ***argv) -{ - int flag; - MPI_Initialized(&flag); // needed to coexist with other libs apparently - if ( !flag ) { - MPI_Init(argc,argv); - } - communicator_world = MPI_COMM_WORLD; - MPI_Comm ShmComm; - MPIoffloadEngine::CommunicatorInit (communicator_world,ShmComm,ShmCommBuf); -} -void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) -{ - int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); - assert(ierr==0); -} -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - int rank; - int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); - assert(ierr==0); - return rank; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ - coor.resize(_ndimension); - int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); - assert(ierr==0); -} - -CartesianCommunicator::CartesianCommunicator(const std::vector &processors) -{ - _ndimension = processors.size(); - std::vector periodic(_ndimension,1); - - _Nprocessors=1; - _processors = processors; - - for(int i=0;i<_ndimension;i++){ - _Nprocessors*=_processors[i]; - } - - int Size; - MPI_Comm_size(communicator_world,&Size); - assert(Size==_Nprocessors); - - _processor_coor.resize(_ndimension); - MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); - MPI_Comm_rank (communicator,&_processor); - MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); -}; - -void CartesianCommunicator::GlobalSum(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(float &f){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(float *f,int N) -{ - int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(double &d) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(double *d,int N) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - std::vector reqs(0); - SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); - SendToRecvFromComplete(reqs); -} - -void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int sender, - int receiver, - int bytes) -{ - MPI_Status stat; - assert(sender != receiver); - int tag = sender; - if ( _processor == sender ) { - MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); - } - if ( _processor == receiver ) { - MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); - } -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - MPI_Request xrq; - MPI_Request rrq; - int rank = _processor; - int ierr; - ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); - ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); - - assert(ierr==0); - - list.push_back(xrq); - list.push_back(rrq); -} - -void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - uint64_t xmit_i = (uint64_t) xmit; - uint64_t recv_i = (uint64_t) recv; - uint64_t shm = (uint64_t) ShmCommBuf; - // assert xmit and recv lie in shared memory region - assert( (xmit_i >= shm) && (xmit_i+bytes <= shm+MAX_MPI_SHM_BYTES) ); - assert( (recv_i >= shm) && (recv_i+bytes <= shm+MAX_MPI_SHM_BYTES) ); - assert(from!=_processor); - assert(dest!=_processor); - - MPIoffloadEngine::QueueMultiplexedSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from); - - //MPIoffloadEngine::QueueRoundRobinSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from); - - //MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest); - //MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from); -} - -void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &list) -{ - MPIoffloadEngine::WaitAll(); - //this->Barrier(); -} - -void CartesianCommunicator::StencilBarrier(void) { } - -void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) -{ - int nreq=list.size(); - std::vector status(nreq); - int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); -} - -void CartesianCommunicator::Barrier(void) -{ - int ierr = MPI_Barrier(communicator); - assert(ierr==0); -} - -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ - int ierr=MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator); - assert(ierr==0); -} - -void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) -{ - int ierr= MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator_world); - assert(ierr==0); -} - -void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; } - -void *CartesianCommunicator::ShmBuffer(int rank) { - return NULL; -} -void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { - return NULL; -} - - -}; - diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc deleted file mode 100644 index bceea0d8..00000000 --- a/lib/communicator/Communicator_mpit.cc +++ /dev/null @@ -1,273 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/communicator/Communicator_mpi.cc - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include -#include -#include - -namespace Grid { - - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// -MPI_Comm CartesianCommunicator::communicator_world; - -// Should error check all MPI calls. -void CartesianCommunicator::Init(int *argc, char ***argv) { - int flag; - int provided; - MPI_Initialized(&flag); // needed to coexist with other libs apparently - if ( !flag ) { - MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); - if ( provided != MPI_THREAD_MULTIPLE ) { - QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; - } - } - MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); - ShmInitGeneric(); -} - -CartesianCommunicator::~CartesianCommunicator() -{ - int MPI_is_finalised; - MPI_Finalized(&MPI_is_finalised); - if (communicator && !MPI_is_finalised){ - MPI_Comm_free(&communicator); - for(int i=0;i< communicator_halo.size();i++){ - MPI_Comm_free(&communicator_halo[i]); - } - } -} - -void CartesianCommunicator::GlobalSum(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalXOR(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalXOR(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(float &f){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(float *f,int N) -{ - int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(double &d) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(double *d,int N) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) -{ - int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); - assert(ierr==0); -} -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - int rank; - int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); - assert(ierr==0); - return rank; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ - coor.resize(_ndimension); - int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); - assert(ierr==0); -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - std::vector reqs(0); - SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); - SendToRecvFromComplete(reqs); -} - -void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int sender, - int receiver, - int bytes) -{ - MPI_Status stat; - assert(sender != receiver); - int tag = sender; - if ( _processor == sender ) { - MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); - } - if ( _processor == receiver ) { - MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); - } -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - int myrank = _processor; - int ierr; - if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { - MPI_Request xrq; - MPI_Request rrq; - - ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); - ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); - - assert(ierr==0); - list.push_back(xrq); - list.push_back(rrq); - } else { - // Give the CPU to MPI immediately; can use threads to overlap optionally - ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, - recv,bytes,MPI_CHAR,from, from, - communicator,MPI_STATUS_IGNORE); - assert(ierr==0); - } -} -void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) -{ - if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { - int nreq=list.size(); - std::vector status(nreq); - int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); - } -} - -void CartesianCommunicator::Barrier(void) -{ - int ierr = MPI_Barrier(communicator); - assert(ierr==0); -} - -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ - int ierr=MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator); - assert(ierr==0); -} - /////////////////////////////////////////////////////// - // Should only be used prior to Grid Init finished. - // Check for this? - /////////////////////////////////////////////////////// -int CartesianCommunicator::RankWorld(void){ - int r; - MPI_Comm_rank(communicator_world,&r); - return r; -} -void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) -{ - int ierr= MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator_world); - assert(ierr==0); -} - -double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes,int dir) -{ - int myrank = _processor; - int ierr; - int ncomm =communicator_halo.size(); - int commdir=dir%ncomm; - - // std::cout << " sending on communicator "< &waitall,int dir) -{ - int nreq=waitall.size(); - MPI_Waitall(nreq, &waitall[0], MPI_STATUSES_IGNORE); -} -double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes,int dir) -{ - int myrank = _processor; - int ierr; - // std::cout << " sending on communicator "< - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include -#include - -namespace Grid { - - // Should error check all MPI calls. -#define SHMEM_VET(addr) - -#define SHMEM_VET_DEBUG(addr) { \ - if ( ! shmem_addr_accessible(addr,_processor) ) {\ - std::fprintf(stderr,"%d Inaccessible shmem address %lx %s %s\n",_processor,addr,__FUNCTION__,#addr); \ - BACKTRACEFILE(); \ - }\ -} - - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// - -typedef struct HandShake_t { - uint64_t seq_local; - uint64_t seq_remote; -} HandShake; - -std::array make_psync_init(void) { - std::array ret; - ret.fill(SHMEM_SYNC_VALUE); - return ret; -} -static std::array psync_init = make_psync_init(); - -static Vector< HandShake > XConnections; -static Vector< HandShake > RConnections; - -void CartesianCommunicator::Init(int *argc, char ***argv) { - shmem_init(); - XConnections.resize(shmem_n_pes()); - RConnections.resize(shmem_n_pes()); - for(int pe =0 ; pe &processors,const CartesianCommunicator &parent) - : CartesianCommunicator(processors) -{ - std::cout << "Attempts to split SHMEM communicators will fail " < &processors) -{ - _ndimension = processors.size(); - std::vector periodic(_ndimension,1); - - _Nprocessors=1; - _processors = processors; - _processor_coor.resize(_ndimension); - - _processor = shmem_my_pe(); - - Lexicographic::CoorFromIndex(_processor_coor,_processor,_processors); - - for(int i=0;i<_ndimension;i++){ - _Nprocessors*=_processors[i]; - } - - int Size = shmem_n_pes(); - - - assert(Size==_Nprocessors); -} - -void CartesianCommunicator::GlobalSum(uint32_t &u){ - static long long source ; - static long long dest ; - static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - // int nreduce=1; - // int pestart=0; - // int logStride=0; - - source = u; - dest = 0; - shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); // necessary? - u = dest; -} -void CartesianCommunicator::GlobalSum(uint64_t &u){ - static long long source ; - static long long dest ; - static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - // int nreduce=1; - // int pestart=0; - // int logStride=0; - - source = u; - dest = 0; - shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); // necessary? - u = dest; -} -void CartesianCommunicator::GlobalSum(float &f){ - static float source ; - static float dest ; - static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - source = f; - dest =0.0; - shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); - f = dest; -} -void CartesianCommunicator::GlobalSumVector(float *f,int N) -{ - static float source ; - static float dest = 0 ; - static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - if ( shmem_addr_accessible(f,_processor) ){ - shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); - return; - } - - for(int i=0;i psync = psync_init; - - source = d; - dest = 0; - shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); - d = dest; -} -void CartesianCommunicator::GlobalSumVector(double *d,int N) -{ - static double source ; - static double dest ; - static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - - if ( shmem_addr_accessible(d,_processor) ){ - shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); - return; - } - - for(int i=0;i coor = _processor_coor; - - assert(std::abs(shift) <_processors[dim]); - - coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim]; - Lexicographic::IndexFromCoor(coor,source,_processors); - - coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim]; - Lexicographic::IndexFromCoor(coor,dest,_processors); - -} -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - int rank; - Lexicographic::IndexFromCoor(coor,rank,_processors); - return rank; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ - Lexicographic::CoorFromIndex(coor,rank,_processors); -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - SHMEM_VET(xmit); - SHMEM_VET(recv); - std::vector reqs(0); - SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); - SendToRecvFromComplete(reqs); -} - -void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int sender, - int receiver, - int bytes) -{ - static uint64_t seq; - - assert(recv!=xmit); - volatile HandShake *RecvSeq = (volatile HandShake *) & RConnections[sender]; - volatile HandShake *SendSeq = (volatile HandShake *) & XConnections[receiver]; - - if ( _processor == sender ) { - - // Check he has posted a receive - while(SendSeq->seq_remote == SendSeq->seq_local); - - // Advance our send count - seq = ++(SendSeq->seq_local); - - // Send this packet - SHMEM_VET(recv); - shmem_putmem(recv,xmit,bytes,receiver); - shmem_fence(); - - //Notify him we're done - shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver); - shmem_fence(); - } - if ( _processor == receiver ) { - - // Post a receive - seq = ++(RecvSeq->seq_local); - shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender); - - // Now wait until he has advanced our reception counter - while(RecvSeq->seq_remote != RecvSeq->seq_local); - - } -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - SHMEM_VET(xmit); - SHMEM_VET(recv); - // shmem_putmem_nb(recv,xmit,bytes,dest,NULL); - shmem_putmem(recv,xmit,bytes,dest); - - if ( CommunicatorPolicy == CommunicatorPolicySequential ) shmem_barrier_all(); -} -void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) -{ - // shmem_quiet(); // I'm done - if( CommunicatorPolicy == CommunicatorPolicyConcurrent ) shmem_barrier_all();// He's done too -} -void CartesianCommunicator::Barrier(void) -{ - shmem_barrier_all(); -} -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ - static std::array psync = psync_init; - static uint32_t word; - uint32_t *array = (uint32_t *) data; - assert( (bytes % 4)==0); - int words = bytes/4; - - if ( shmem_addr_accessible(data,_processor) ){ - shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync.data()); - return; - } - - for(int w=0;w psync = psync_init; - static uint32_t word; - uint32_t *array = (uint32_t *) data; - assert( (bytes % 4)==0); - int words = bytes/4; - - for(int w=0;w + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +namespace Grid { + +// static data + +uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL; +int GlobalSharedMemory::Hugepages = 0; +int GlobalSharedMemory::ShmSetup; + +std::vector GlobalSharedMemory::WorldShmCommBufs; + +Grid_MPI_Comm GlobalSharedMemory::WorldShmComm; +int GlobalSharedMemory::WorldShmRank; +int GlobalSharedMemory::WorldShmSize; +std::vector GlobalSharedMemory::WorldShmRanks; + +Grid_MPI_Comm GlobalSharedMemory::WorldComm; +int GlobalSharedMemory::WorldSize; +int GlobalSharedMemory::WorldRank; + +int GlobalSharedMemory::WorldNodes; +int GlobalSharedMemory::WorldNode; + + +} diff --git a/lib/communicator/SharedMemory.h b/lib/communicator/SharedMemory.h new file mode 100644 index 00000000..2bb112e5 --- /dev/null +++ b/lib/communicator/SharedMemory.h @@ -0,0 +1,158 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/communicator/SharedMemory.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + + +// TODO +// 1) move includes into SharedMemory.cc +// +// 2) split shared memory into a) optimal communicator creation from comm world +// +// b) shared memory buffers container +// -- static globally shared; init once +// -- per instance set of buffers. +// + +#pragma once + +#include + +#if defined (GRID_COMMS_MPI3) +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_NUMAIF_H +#include +#endif + +namespace Grid { + +#if defined (GRID_COMMS_MPI3) + typedef MPI_Comm Grid_MPI_Comm; + typedef MPI_Request CommsRequest_t; +#else + typedef int CommsRequest_t; + typedef int Grid_MPI_Comm; +#endif + +class GlobalSharedMemory { + private: + // Init once lock on the buffer allocation + static int ShmSetup; + static const int MAXLOG2RANKSPERNODE = 16; + + public: + static uint64_t MAX_MPI_SHM_BYTES; + static int Hugepages; + + static std::vector WorldShmCommBufs; + + static Grid_MPI_Comm WorldComm; + static int WorldRank; + static int WorldSize; + + static Grid_MPI_Comm WorldShmComm; + static int WorldShmRank; + static int WorldShmSize; + + static int WorldNodes; + static int WorldNode; + + static std::vector WorldShmRanks; + + ////////////////////////////////////////////////////////////////////////////////////// + // Create an optimal reordered communicator that makes MPI_Cart_create get it right + ////////////////////////////////////////////////////////////////////////////////////// + static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD + static void OptimalCommunicator(const std::vector &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian + /////////////////////////////////////////////////// + // Provide shared memory facilities off comm world + /////////////////////////////////////////////////// + static void SharedMemoryAllocate(uint64_t bytes, int flags); + static void SharedMemoryFree(void); + +}; + +////////////////////////////// +// one per communicator +////////////////////////////// +class SharedMemory +{ + private: + static const int MAXLOG2RANKSPERNODE = 16; + + size_t heap_top; + size_t heap_bytes; + size_t heap_size; + + protected: + + Grid_MPI_Comm ShmComm; // for barriers + int ShmRank; + int ShmSize; + std::vector ShmCommBufs; + std::vector ShmRanks;// Mapping comm ranks to Shm ranks + + public: + SharedMemory() {}; + /////////////////////////////////////////////////////////////////////////////////////// + // set the buffers & sizes + /////////////////////////////////////////////////////////////////////////////////////// + void SetCommunicator(Grid_MPI_Comm comm); + + //////////////////////////////////////////////////////////////////////// + // For this instance ; disjoint buffer sets between splits if split grid + //////////////////////////////////////////////////////////////////////// + void ShmBarrier(void); + + /////////////////////////////////////////////////// + // Call on any instance + /////////////////////////////////////////////////// + void SharedMemoryTest(void); + void *ShmBufferSelf(void); + void *ShmBuffer (int rank); + void *ShmBufferTranslate(int rank,void * local_p); + void *ShmBufferMalloc(size_t bytes); + void ShmBufferFreeAll(void) ; + + ////////////////////////////////////////////////////////////////////////// + // Make info on Nodes & ranks and Shared memory available + ////////////////////////////////////////////////////////////////////////// + int NodeCount(void) { return GlobalSharedMemory::WorldNodes;}; + int RankCount(void) { return GlobalSharedMemory::WorldSize;}; + +}; + +} diff --git a/lib/communicator/SharedMemoryMPI.cc b/lib/communicator/SharedMemoryMPI.cc new file mode 100644 index 00000000..af4f9702 --- /dev/null +++ b/lib/communicator/SharedMemoryMPI.cc @@ -0,0 +1,415 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/communicator/SharedMemory.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +namespace Grid { + +/*Construct from an MPI communicator*/ +void GlobalSharedMemory::Init(Grid_MPI_Comm comm) +{ + WorldComm = comm; + MPI_Comm_rank(WorldComm,&WorldRank); + MPI_Comm_size(WorldComm,&WorldSize); + // WorldComm, WorldSize, WorldRank + + ///////////////////////////////////////////////////////////////////// + // Split into groups that can share memory + ///////////////////////////////////////////////////////////////////// + MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm); + MPI_Comm_rank(WorldShmComm ,&WorldShmRank); + MPI_Comm_size(WorldShmComm ,&WorldShmSize); + // WorldShmComm, WorldShmSize, WorldShmRank + + // WorldNodes + WorldNodes = WorldSize/WorldShmSize; + assert( (WorldNodes * WorldShmSize) == WorldSize ); + + // FIXME: Check all WorldShmSize are the same ? + + ///////////////////////////////////////////////////////////////////// + // find world ranks in our SHM group (i.e. which ranks are on our node) + ///////////////////////////////////////////////////////////////////// + MPI_Group WorldGroup, ShmGroup; + MPI_Comm_group (WorldComm, &WorldGroup); + MPI_Comm_group (WorldShmComm, &ShmGroup); + + std::vector world_ranks(WorldSize); for(int r=0;r MyGroup; + MyGroup.resize(WorldShmSize); + for(int rank=0;rank()); + int myleader = MyGroup[0]; + + std::vector leaders_1hot(WorldSize,0); + std::vector leaders_group(WorldNodes,0); + leaders_1hot [ myleader ] = 1; + + /////////////////////////////////////////////////////////////////// + // global sum leaders over comm world + /////////////////////////////////////////////////////////////////// + int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,WorldComm); + assert(ierr==0); + + /////////////////////////////////////////////////////////////////// + // find the group leaders world rank + /////////////////////////////////////////////////////////////////// + int group=0; + for(int l=0;l &processors,Grid_MPI_Comm & optimal_comm) +{ + //////////////////////////////////////////////////////////////// + // Assert power of two shm_size. + //////////////////////////////////////////////////////////////// + int log2size = -1; + for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){ + if ( (0x1< processor_coor(ndimension); + std::vector WorldDims = processors; std::vector ShmDims (ndimension,1); std::vector NodeDims (ndimension); + std::vector ShmCoor (ndimension); std::vector NodeCoor (ndimension); std::vector WorldCoor(ndimension); + int dim = 0; + for(int l2=0;l2 ranks(size); for(int r=0;r= heap_size) { + std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +namespace Grid { + +/*Construct from an MPI communicator*/ +void GlobalSharedMemory::Init(Grid_MPI_Comm comm) +{ + WorldComm = 0; + WorldRank = 0; + WorldSize = 1; + WorldShmComm = 0 ; + WorldShmRank = 0 ; + WorldShmSize = 1 ; + WorldNodes = 1 ; + WorldNode = 0 ; + WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0; + WorldShmCommBufs.resize(1); +} + +void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors,Grid_MPI_Comm & optimal_comm) +{ + optimal_comm = WorldComm; +} + +//////////////////////////////////////////////////////////////////////////////////////////// +// Hugetlbfs mapping intended, use anonymous mmap +//////////////////////////////////////////////////////////////////////////////////////////// +void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) +{ + void * ShmCommBuf ; + MAX_MPI_SHM_BYTES=bytes; + int mmap_flag =0; +#ifdef MAP_ANONYMOUS + mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; +#endif +#ifdef MAP_ANON + mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON; +#endif +#ifdef MAP_HUGETLB + if ( flags ) mmap_flag |= MAP_HUGETLB; +#endif + ShmCommBuf =(void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag, -1, 0); + if (ShmCommBuf == (void *)MAP_FAILED) { + perror("mmap failed "); + exit(EXIT_FAILURE); + } +#ifdef MADV_HUGEPAGE + if (!Hugepages ) madvise(ShmCommBuf,bytes,MADV_HUGEPAGE); +#endif + bzero(ShmCommBuf,bytes); + WorldShmCommBufs[0] = ShmCommBuf; +}; + +void GlobalSharedMemory::SharedMemoryFree(void) +{ + assert(ShmSetup); + assert(0); // unimplemented +} + + //////////////////////////////////////////////////////// + // Global shared functionality finished + // Now move to per communicator functionality + //////////////////////////////////////////////////////// +void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) +{ + ShmRanks.resize(1); + ShmCommBufs.resize(1); + ShmRanks[0] = 0; + ShmRank = 0; + ShmSize = 1; + ////////////////////////////////////////////////////////////////////// + // Map ShmRank to WorldShmRank and use the right buffer + ////////////////////////////////////////////////////////////////////// + ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0]; + heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES; + ShmBufferFreeAll(); + return; +} +////////////////////////////////////////////////////////////////// +// On node barrier +////////////////////////////////////////////////////////////////// +void SharedMemory::ShmBarrier(void){ return ; } + +////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Test the shared memory is working +////////////////////////////////////////////////////////////////////////////////////////////////////////// +void SharedMemory::SharedMemoryTest(void) { return; } + +void *SharedMemory::ShmBufferSelf(void) +{ + return ShmCommBufs[ShmRank]; +} +void *SharedMemory::ShmBuffer(int rank) +{ + return NULL; +} +void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) +{ + return NULL; +} + +///////////////////////////////// +// Alloc, free shmem region ; common to MPI and none? +///////////////////////////////// +void *SharedMemory::ShmBufferMalloc(size_t bytes){ + void *ptr = (void *)heap_top; + heap_top += bytes; + heap_bytes+= bytes; + if (heap_bytes >= heap_size) { + std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < Date: Mon, 8 Jan 2018 11:36:39 +0000 Subject: [PATCH 15/17] Synthetic test of lanczos --- ..._dwf_compressed_lanczos_reorg_synthetic.cc | 330 ++++++++++++++++++ 1 file changed, 330 insertions(+) create mode 100644 tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc new file mode 100644 index 00000000..132dff4e --- /dev/null +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc @@ -0,0 +1,330 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc + + Copyright (C) 2017 + +Author: Leans heavily on Christoph Lehner's code +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +/* + * Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features + * in Grid that were intended to be used to support blocked Aggregates, from + */ +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +class ProjectedHermOp : public LinearFunction > > { +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + LinearOperatorBase &_Linop; + Aggregation &_Aggregate; + + ProjectedHermOp(LinearOperatorBase& linop, Aggregation &aggregate) : + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + FineField fin(FineGrid); + FineField fout(FineGrid); + + _Aggregate.PromoteFromSubspace(in,fin); + _Linop.HermOp(fin,fout); + _Aggregate.ProjectToSubspace(out,fout); + } +}; + +template +class ProjectedFunctionHermOp : public LinearFunction > > { +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + + OperatorFunction & _poly; + LinearOperatorBase &_Linop; + Aggregation &_Aggregate; + + ProjectedFunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop, + Aggregation &aggregate) : + _poly(poly), + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + + FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard; + FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; + + _Aggregate.PromoteFromSubspace(in,fin); + _poly(_Linop,fin,fout); + _Aggregate.ProjectToSubspace(out,fout); + } +}; + +// Make serializable Lanczos params + +template +class CoarseFineIRL +{ +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice CoarseField; + typedef Lattice FineField; + +private: + GridBase *_CoarseGrid; + GridBase *_FineGrid; + int _checkerboard; + LinearOperatorBase & _FineOp; + Aggregation _Aggregate; + +public: + CoarseFineIRL(GridBase *FineGrid, + GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard) : + _CoarseGrid(CoarseGrid), + _FineGrid(FineGrid), + _Aggregate(CoarseGrid,FineGrid,checkerboard), + _FineOp(FineOp), + _checkerboard(checkerboard) + {}; + + template static RealD normalise(T& v) + { + RealD nn = norm2(v); + nn = ::sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void testFine(void) + { + int Nk = nbasis; + _Aggregate.subspace.resize(Nk,_FineGrid); + _Aggregate.subspace[0]=1.0; + _Aggregate.subspace[0].checkerboard=_checkerboard; + normalise(_Aggregate.subspace[0]); + PlainHermOp Op(_FineOp); + for(int k=1;k Cheby(alpha,beta,Npoly); + FunctionHermOp ChebyOp(Cheby,_FineOp); + PlainHermOp Op(_FineOp); + + int Nk = nbasis; + + std::vector eval(Nm); + + FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; + + ImplicitlyRestartedLanczos IRL(ChebyOp,Op,Nk,Nk,Nm,resid,MaxIt,betastp,MinRes); + _Aggregate.subspace.resize(Nm,_FineGrid); + IRL.calc(eval,_Aggregate.subspace,src,Nk,false); + _Aggregate.subspace.resize(Nk,_FineGrid); + for(int k=0;k Cheby(alpha,beta,Npoly); + ProjectedHermOp Op(_FineOp,_Aggregate); + ProjectedFunctionHermOp ChebyOp(Cheby,_FineOp,_Aggregate); + + std::vector eval(Nm); + std::vector evec(Nm,_CoarseGrid); + + CoarseField src(_CoarseGrid); src=1.0; + + ImplicitlyRestartedLanczos IRL(ChebyOp,ChebyOp,Nk,Nk,Nm,resid,MaxIt,betastp,MinRes); + IRL.calc(eval,evec,src,Nk,false); + + // We got the evalues of the Cheby operator; + // Reconstruct eigenvalues of original operator via Chebyshev inverse + for (int i=0;i, blockSize, + std::string, config, + std::vector < std::complex >, omega, + RealD, mass, + RealD, M5 + ); +}; + +int main (int argc, char ** argv) { + + Grid_init(&argc,&argv); + + CompressedLanczosParams Params; + { + Params.omega.resize(10); + Params.blockSize.resize(5); + XmlWriter writer("Params_template.xml"); + write(writer,"Params",Params); + std::cout << GridLogMessage << " Written Params_template.xml" < blockSize = Params.blockSize; + + // Grids + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector fineLatt = GridDefaultLatt(); + int dims=fineLatt.size(); + assert(blockSize.size()==dims+1); + std::vector coarseLatt(dims); + std::vector coarseLatt5d ; + + for (int d=0;d seeds4({1,2,3,4}); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + SU3::HotConfiguration(RNG4, Umu); + } + std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl; + + // ZMobius EO Operator + ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); + SchurDiagTwoOperator HermOp(Ddwf); + + // Eigenvector storage + LanczosParams fine =Params.FineParams; + LanczosParams coarse=Params.CoarseParams; + const int Nm1 = fine.Nm; + const int Nm2 = coarse.Nm; + + std::cout << GridLogMessage << "Keep " << fine.Nk << " full vectors" << std::endl; + std::cout << GridLogMessage << "Keep " << coarse.Nk << " total vectors" << std::endl; + assert(Nm2 >= Nm1); + + const int nbasis= 70; + CoarseFineIRL IRL(FrbGrid,CoarseGrid5rb,HermOp,Odd); + + std::cout << GridLogMessage << "Constructed CoarseFine IRL" << std::endl; + + std::cout << GridLogMessage << "Performing fine grid IRL Nk "<< nbasis<<" Nm "< Date: Mon, 8 Jan 2018 14:06:53 +0000 Subject: [PATCH 16/17] Clean up --- lib/communicator/Communicator_base.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index a9b99c17..359846c9 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -184,15 +184,10 @@ public: template void AllToAll(int dim,std::vector &in, std::vector &out){ assert(dim>=0); assert(dim<_ndimension); - int numnode = _processors[dim]; - // std::cerr << " AllToAll in.size() "< Date: Mon, 8 Jan 2018 15:20:26 +0000 Subject: [PATCH 17/17] Allow resize of the shared memory buffers --- TODO | 28 +++++++++++---- lib/communicator/SharedMemory.cc | 40 ++++++++++++++++++++- lib/communicator/SharedMemory.h | 10 ++++-- lib/communicator/SharedMemoryMPI.cc | 52 +++++++++------------------- lib/communicator/SharedMemoryNone.cc | 40 +++++---------------- 5 files changed, 93 insertions(+), 77 deletions(-) diff --git a/TODO b/TODO index 95ccf1df..746302ca 100644 --- a/TODO +++ b/TODO @@ -1,16 +1,32 @@ TODO: --------------- -Large item work list: +Code item work list + +a) namespaces & indentation + GRID_BEGIN_NAMESPACE(); + GRID_END_NAMESPACE(); +-- delete QCD namespace + +b) GPU branch +- start branch +- Increase Macro use in core library support; prepare for change +- Audit volume of "device" code +- Virtual function audit +- Start port once Nvidia box is up +- Cut down volume of code for first port? How? + +Physics item work list: 1)- BG/Q port and check ; Andrew says ok. -3a)- RNG I/O in ILDG/SciDAC (minor) -3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet -4)- Physical propagator interface -6)- Multigrid Wilson and DWF, compare to other Multigrid implementations -7)- HDCR resume +2)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet +3)- Physical propagator interface +4)- Multigrid Wilson and DWF, compare to other Multigrid implementations +5)- HDCR resume + ---------------------------- Recent DONE +-- RNG I/O in ILDG/SciDAC (minor) -- Precision conversion and sort out localConvert <-- partial/easy -- Conserved currents (Andrew) -- Split grid diff --git a/lib/communicator/SharedMemory.cc b/lib/communicator/SharedMemory.cc index f9d5e5bc..4682d420 100644 --- a/lib/communicator/SharedMemory.cc +++ b/lib/communicator/SharedMemory.cc @@ -34,7 +34,9 @@ namespace Grid { uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL; int GlobalSharedMemory::Hugepages = 0; -int GlobalSharedMemory::ShmSetup; +int GlobalSharedMemory::_ShmSetup; +int GlobalSharedMemory::_ShmAlloc; +uint64_t GlobalSharedMemory::_ShmAllocBytes; std::vector GlobalSharedMemory::WorldShmCommBufs; @@ -50,5 +52,41 @@ int GlobalSharedMemory::WorldRank; int GlobalSharedMemory::WorldNodes; int GlobalSharedMemory::WorldNode; +void GlobalSharedMemory::SharedMemoryFree(void) +{ + assert(_ShmAlloc); + assert(_ShmAllocBytes>0); + for(int r=0;r= heap_size) { + std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < &processors,Grid_MPI_Comm & optimal_comm) @@ -180,8 +182,8 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors, #ifdef GRID_MPI3_SHMMMAP void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) { - GlobalSharedMemory::MAX_MPI_SHM_BYTES = bytes; - assert(ShmSetup==0); ShmSetup=1; + assert(_ShmSetup==1); + assert(_ShmAlloc==0); ////////////////////////////////////////////////////////////////////////////////////////////////////////// // allocate the shared windows for our group ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -214,8 +216,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) perror("failed mmap"); assert(0); } assert(((uint64_t)ptr&0x3F)==0); + close(fd); WorldShmCommBufs[r] =ptr; } + _ShmAlloc=1; + _ShmAllocBytes = bytes; }; #endif // MMAP @@ -227,8 +232,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) //////////////////////////////////////////////////////////////////////////////////////////// void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) { - GlobalSharedMemory::MAX_MPI_SHM_BYTES = bytes; - assert(ShmSetup==0); ShmSetup=1; + assert(_ShmSetup==1); + assert(_ShmAlloc==0); MPI_Barrier(WorldShmComm); WorldShmCommBufs.resize(WorldShmSize); @@ -258,6 +263,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) assert(((uint64_t)ptr&0x3F)==0); WorldShmCommBufs[r] =ptr; + close(fd); } } @@ -277,17 +283,15 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); } assert(((uint64_t)ptr&0x3F)==0); WorldShmCommBufs[r] =ptr; + + close(fd); } } + _ShmAlloc=1; + _ShmAllocBytes = bytes; } #endif -void GlobalSharedMemory::SharedMemoryFree(void) -{ - assert(ShmSetup); - assert(0); // unimplemented -} - //////////////////////////////////////////////////////// // Global shared functionality finished // Now move to per communicator functionality @@ -310,7 +314,8 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) ////////////////////////////////////////////////////////////////////// // Map ShmRank to WorldShmRank and use the right buffer ////////////////////////////////////////////////////////////////////// - heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES; + assert (GlobalSharedMemory::ShmAlloc()==1); + heap_size = GlobalSharedMemory::ShmAllocBytes(); for(int r=0;r= heap_size) { - std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < &processors,Grid_MPI_Comm & optimal_comm) @@ -56,7 +58,8 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors, void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) { void * ShmCommBuf ; - MAX_MPI_SHM_BYTES=bytes; + assert(_ShmSetup==1); + assert(_ShmAlloc==0); int mmap_flag =0; #ifdef MAP_ANONYMOUS mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; @@ -77,20 +80,17 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) #endif bzero(ShmCommBuf,bytes); WorldShmCommBufs[0] = ShmCommBuf; + _ShmAllocBytes=bytes; + _ShmAlloc=1; }; -void GlobalSharedMemory::SharedMemoryFree(void) -{ - assert(ShmSetup); - assert(0); // unimplemented -} - //////////////////////////////////////////////////////// // Global shared functionality finished // Now move to per communicator functionality //////////////////////////////////////////////////////// void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) { + assert(GlobalSharedMemory::ShmAlloc()==1); ShmRanks.resize(1); ShmCommBufs.resize(1); ShmRanks[0] = 0; @@ -100,7 +100,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) // Map ShmRank to WorldShmRank and use the right buffer ////////////////////////////////////////////////////////////////////// ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0]; - heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES; + heap_size = GlobalSharedMemory::ShmAllocBytes(); ShmBufferFreeAll(); return; } @@ -114,10 +114,6 @@ void SharedMemory::ShmBarrier(void){ return ; } ////////////////////////////////////////////////////////////////////////////////////////////////////////// void SharedMemory::SharedMemoryTest(void) { return; } -void *SharedMemory::ShmBufferSelf(void) -{ - return ShmCommBufs[ShmRank]; -} void *SharedMemory::ShmBuffer(int rank) { return NULL; @@ -127,24 +123,4 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) return NULL; } -///////////////////////////////// -// Alloc, free shmem region ; common to MPI and none? -///////////////////////////////// -void *SharedMemory::ShmBufferMalloc(size_t bytes){ - void *ptr = (void *)heap_top; - heap_top += bytes; - heap_bytes+= bytes; - if (heap_bytes >= heap_size) { - std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" <