From e564d11687b6cf77e417724e9ae0e8c8e8ce6037 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 8 Jan 2018 15:20:26 +0000 Subject: [PATCH] Allow resize of the shared memory buffers --- TODO | 28 +++++++++++---- lib/communicator/SharedMemory.cc | 40 ++++++++++++++++++++- lib/communicator/SharedMemory.h | 10 ++++-- lib/communicator/SharedMemoryMPI.cc | 52 +++++++++------------------- lib/communicator/SharedMemoryNone.cc | 40 +++++---------------- 5 files changed, 93 insertions(+), 77 deletions(-) diff --git a/TODO b/TODO index 95ccf1df..746302ca 100644 --- a/TODO +++ b/TODO @@ -1,16 +1,32 @@ TODO: --------------- -Large item work list: +Code item work list + +a) namespaces & indentation + GRID_BEGIN_NAMESPACE(); + GRID_END_NAMESPACE(); +-- delete QCD namespace + +b) GPU branch +- start branch +- Increase Macro use in core library support; prepare for change +- Audit volume of "device" code +- Virtual function audit +- Start port once Nvidia box is up +- Cut down volume of code for first port? How? + +Physics item work list: 1)- BG/Q port and check ; Andrew says ok. -3a)- RNG I/O in ILDG/SciDAC (minor) -3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet -4)- Physical propagator interface -6)- Multigrid Wilson and DWF, compare to other Multigrid implementations -7)- HDCR resume +2)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet +3)- Physical propagator interface +4)- Multigrid Wilson and DWF, compare to other Multigrid implementations +5)- HDCR resume + ---------------------------- Recent DONE +-- RNG I/O in ILDG/SciDAC (minor) -- Precision conversion and sort out localConvert <-- partial/easy -- Conserved currents (Andrew) -- Split grid diff --git a/lib/communicator/SharedMemory.cc b/lib/communicator/SharedMemory.cc index f9d5e5bc..4682d420 100644 --- a/lib/communicator/SharedMemory.cc +++ b/lib/communicator/SharedMemory.cc @@ -34,7 +34,9 @@ namespace Grid { uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL; int GlobalSharedMemory::Hugepages = 0; -int GlobalSharedMemory::ShmSetup; +int GlobalSharedMemory::_ShmSetup; +int GlobalSharedMemory::_ShmAlloc; +uint64_t GlobalSharedMemory::_ShmAllocBytes; std::vector GlobalSharedMemory::WorldShmCommBufs; @@ -50,5 +52,41 @@ int GlobalSharedMemory::WorldRank; int GlobalSharedMemory::WorldNodes; int GlobalSharedMemory::WorldNode; +void GlobalSharedMemory::SharedMemoryFree(void) +{ + assert(_ShmAlloc); + assert(_ShmAllocBytes>0); + for(int r=0;r= heap_size) { + std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < &processors,Grid_MPI_Comm & optimal_comm) @@ -180,8 +182,8 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors, #ifdef GRID_MPI3_SHMMMAP void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) { - GlobalSharedMemory::MAX_MPI_SHM_BYTES = bytes; - assert(ShmSetup==0); ShmSetup=1; + assert(_ShmSetup==1); + assert(_ShmAlloc==0); ////////////////////////////////////////////////////////////////////////////////////////////////////////// // allocate the shared windows for our group ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -214,8 +216,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) perror("failed mmap"); assert(0); } assert(((uint64_t)ptr&0x3F)==0); + close(fd); WorldShmCommBufs[r] =ptr; } + _ShmAlloc=1; + _ShmAllocBytes = bytes; }; #endif // MMAP @@ -227,8 +232,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) //////////////////////////////////////////////////////////////////////////////////////////// void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) { - GlobalSharedMemory::MAX_MPI_SHM_BYTES = bytes; - assert(ShmSetup==0); ShmSetup=1; + assert(_ShmSetup==1); + assert(_ShmAlloc==0); MPI_Barrier(WorldShmComm); WorldShmCommBufs.resize(WorldShmSize); @@ -258,6 +263,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) assert(((uint64_t)ptr&0x3F)==0); WorldShmCommBufs[r] =ptr; + close(fd); } } @@ -277,17 +283,15 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); } assert(((uint64_t)ptr&0x3F)==0); WorldShmCommBufs[r] =ptr; + + close(fd); } } + _ShmAlloc=1; + _ShmAllocBytes = bytes; } #endif -void GlobalSharedMemory::SharedMemoryFree(void) -{ - assert(ShmSetup); - assert(0); // unimplemented -} - //////////////////////////////////////////////////////// // Global shared functionality finished // Now move to per communicator functionality @@ -310,7 +314,8 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) ////////////////////////////////////////////////////////////////////// // Map ShmRank to WorldShmRank and use the right buffer ////////////////////////////////////////////////////////////////////// - heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES; + assert (GlobalSharedMemory::ShmAlloc()==1); + heap_size = GlobalSharedMemory::ShmAllocBytes(); for(int r=0;r= heap_size) { - std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < &processors,Grid_MPI_Comm & optimal_comm) @@ -56,7 +58,8 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors, void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) { void * ShmCommBuf ; - MAX_MPI_SHM_BYTES=bytes; + assert(_ShmSetup==1); + assert(_ShmAlloc==0); int mmap_flag =0; #ifdef MAP_ANONYMOUS mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; @@ -77,20 +80,17 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) #endif bzero(ShmCommBuf,bytes); WorldShmCommBufs[0] = ShmCommBuf; + _ShmAllocBytes=bytes; + _ShmAlloc=1; }; -void GlobalSharedMemory::SharedMemoryFree(void) -{ - assert(ShmSetup); - assert(0); // unimplemented -} - //////////////////////////////////////////////////////// // Global shared functionality finished // Now move to per communicator functionality //////////////////////////////////////////////////////// void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) { + assert(GlobalSharedMemory::ShmAlloc()==1); ShmRanks.resize(1); ShmCommBufs.resize(1); ShmRanks[0] = 0; @@ -100,7 +100,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) // Map ShmRank to WorldShmRank and use the right buffer ////////////////////////////////////////////////////////////////////// ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0]; - heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES; + heap_size = GlobalSharedMemory::ShmAllocBytes(); ShmBufferFreeAll(); return; } @@ -114,10 +114,6 @@ void SharedMemory::ShmBarrier(void){ return ; } ////////////////////////////////////////////////////////////////////////////////////////////////////////// void SharedMemory::SharedMemoryTest(void) { return; } -void *SharedMemory::ShmBufferSelf(void) -{ - return ShmCommBufs[ShmRank]; -} void *SharedMemory::ShmBuffer(int rank) { return NULL; @@ -127,24 +123,4 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) return NULL; } -///////////////////////////////// -// Alloc, free shmem region ; common to MPI and none? -///////////////////////////////// -void *SharedMemory::ShmBufferMalloc(size_t bytes){ - void *ptr = (void *)heap_top; - heap_top += bytes; - heap_bytes+= bytes; - if (heap_bytes >= heap_size) { - std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" <