1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-14 01:35:36 +00:00

Allow resize of the shared memory buffers

This commit is contained in:
paboyle 2018-01-08 15:20:26 +00:00
parent 0b2162f375
commit e564d11687
5 changed files with 93 additions and 77 deletions

28
TODO
View File

@ -1,16 +1,32 @@
TODO: TODO:
--------------- ---------------
Large item work list: Code item work list
a) namespaces & indentation
GRID_BEGIN_NAMESPACE();
GRID_END_NAMESPACE();
-- delete QCD namespace
b) GPU branch
- start branch
- Increase Macro use in core library support; prepare for change
- Audit volume of "device" code
- Virtual function audit
- Start port once Nvidia box is up
- Cut down volume of code for first port? How?
Physics item work list:
1)- BG/Q port and check ; Andrew says ok. 1)- BG/Q port and check ; Andrew says ok.
3a)- RNG I/O in ILDG/SciDAC (minor) 2)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet 3)- Physical propagator interface
4)- Physical propagator interface 4)- Multigrid Wilson and DWF, compare to other Multigrid implementations
6)- Multigrid Wilson and DWF, compare to other Multigrid implementations 5)- HDCR resume
7)- HDCR resume
---------------------------- ----------------------------
Recent DONE Recent DONE
-- RNG I/O in ILDG/SciDAC (minor)
-- Precision conversion and sort out localConvert <-- partial/easy -- Precision conversion and sort out localConvert <-- partial/easy
-- Conserved currents (Andrew) -- Conserved currents (Andrew)
-- Split grid -- Split grid

View File

@ -34,7 +34,9 @@ namespace Grid {
uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL; uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL;
int GlobalSharedMemory::Hugepages = 0; int GlobalSharedMemory::Hugepages = 0;
int GlobalSharedMemory::ShmSetup; int GlobalSharedMemory::_ShmSetup;
int GlobalSharedMemory::_ShmAlloc;
uint64_t GlobalSharedMemory::_ShmAllocBytes;
std::vector<void *> GlobalSharedMemory::WorldShmCommBufs; std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
@ -50,5 +52,41 @@ int GlobalSharedMemory::WorldRank;
int GlobalSharedMemory::WorldNodes; int GlobalSharedMemory::WorldNodes;
int GlobalSharedMemory::WorldNode; int GlobalSharedMemory::WorldNode;
void GlobalSharedMemory::SharedMemoryFree(void)
{
assert(_ShmAlloc);
assert(_ShmAllocBytes>0);
for(int r=0;r<WorldShmSize;r++){
munmap(WorldShmCommBufs[r],_ShmAllocBytes);
}
_ShmAlloc = 0;
_ShmAllocBytes = 0;
}
/////////////////////////////////
// Alloc, free shmem region
/////////////////////////////////
void *SharedMemory::ShmBufferMalloc(size_t bytes){
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
void *ptr = (void *)heap_top;
heap_top += bytes;
heap_bytes+= bytes;
if (heap_bytes >= heap_size) {
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
assert(heap_bytes<heap_size);
}
return ptr;
}
void SharedMemory::ShmBufferFreeAll(void) {
heap_top =(size_t)ShmBufferSelf();
heap_bytes=0;
}
void *SharedMemory::ShmBufferSelf(void)
{
return ShmCommBufs[ShmRank];
}
} }

View File

@ -69,11 +69,17 @@ namespace Grid {
class GlobalSharedMemory { class GlobalSharedMemory {
private: private:
// Init once lock on the buffer allocation
static int ShmSetup;
static const int MAXLOG2RANKSPERNODE = 16; static const int MAXLOG2RANKSPERNODE = 16;
// Init once lock on the buffer allocation
static int _ShmSetup;
static int _ShmAlloc;
static uint64_t _ShmAllocBytes;
public: public:
static int ShmSetup(void) { return _ShmSetup; }
static int ShmAlloc(void) { return _ShmAlloc; }
static uint64_t ShmAllocBytes(void) { return _ShmAllocBytes; }
static uint64_t MAX_MPI_SHM_BYTES; static uint64_t MAX_MPI_SHM_BYTES;
static int Hugepages; static int Hugepages;

View File

@ -33,6 +33,7 @@ namespace Grid {
/*Construct from an MPI communicator*/ /*Construct from an MPI communicator*/
void GlobalSharedMemory::Init(Grid_MPI_Comm comm) void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
{ {
assert(_ShmSetup==0);
WorldComm = comm; WorldComm = comm;
MPI_Comm_rank(WorldComm,&WorldRank); MPI_Comm_rank(WorldComm,&WorldRank);
MPI_Comm_size(WorldComm,&WorldSize); MPI_Comm_size(WorldComm,&WorldSize);
@ -110,6 +111,7 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
} }
} }
assert(WorldNode!=-1); assert(WorldNode!=-1);
_ShmSetup=1;
} }
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm) void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
@ -180,8 +182,8 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
#ifdef GRID_MPI3_SHMMMAP #ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{ {
GlobalSharedMemory::MAX_MPI_SHM_BYTES = bytes; assert(_ShmSetup==1);
assert(ShmSetup==0); ShmSetup=1; assert(_ShmAlloc==0);
////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared windows for our group // allocate the shared windows for our group
////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -214,8 +216,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
perror("failed mmap"); assert(0); perror("failed mmap"); assert(0);
} }
assert(((uint64_t)ptr&0x3F)==0); assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr; WorldShmCommBufs[r] =ptr;
} }
_ShmAlloc=1;
_ShmAllocBytes = bytes;
}; };
#endif // MMAP #endif // MMAP
@ -227,8 +232,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{ {
GlobalSharedMemory::MAX_MPI_SHM_BYTES = bytes; assert(_ShmSetup==1);
assert(ShmSetup==0); ShmSetup=1; assert(_ShmAlloc==0);
MPI_Barrier(WorldShmComm); MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize); WorldShmCommBufs.resize(WorldShmSize);
@ -258,6 +263,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0); assert(((uint64_t)ptr&0x3F)==0);
WorldShmCommBufs[r] =ptr; WorldShmCommBufs[r] =ptr;
close(fd);
} }
} }
@ -277,17 +283,15 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); } if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0); assert(((uint64_t)ptr&0x3F)==0);
WorldShmCommBufs[r] =ptr; WorldShmCommBufs[r] =ptr;
close(fd);
} }
} }
_ShmAlloc=1;
_ShmAllocBytes = bytes;
} }
#endif #endif
void GlobalSharedMemory::SharedMemoryFree(void)
{
assert(ShmSetup);
assert(0); // unimplemented
}
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Global shared functionality finished // Global shared functionality finished
// Now move to per communicator functionality // Now move to per communicator functionality
@ -310,7 +314,8 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// Map ShmRank to WorldShmRank and use the right buffer // Map ShmRank to WorldShmRank and use the right buffer
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES; assert (GlobalSharedMemory::ShmAlloc()==1);
heap_size = GlobalSharedMemory::ShmAllocBytes();
for(int r=0;r<ShmSize;r++){ for(int r=0;r<ShmSize;r++){
uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ; uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ;
@ -364,10 +369,6 @@ void SharedMemory::SharedMemoryTest(void)
ShmBarrier(); ShmBarrier();
} }
void *SharedMemory::ShmBufferSelf(void)
{
return ShmCommBufs[ShmRank];
}
void *SharedMemory::ShmBuffer(int rank) void *SharedMemory::ShmBuffer(int rank)
{ {
int gpeer = ShmRanks[rank]; int gpeer = ShmRanks[rank];
@ -391,25 +392,4 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
} }
} }
/////////////////////////////////
// Alloc, free shmem region
/////////////////////////////////
void *SharedMemory::ShmBufferMalloc(size_t bytes){
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
void *ptr = (void *)heap_top;
heap_top += bytes;
heap_bytes+= bytes;
if (heap_bytes >= heap_size) {
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
assert(heap_bytes<heap_size);
}
return ptr;
}
void SharedMemory::ShmBufferFreeAll(void) {
heap_top =(size_t)ShmBufferSelf();
heap_bytes=0;
}
} }

View File

@ -33,6 +33,7 @@ namespace Grid {
/*Construct from an MPI communicator*/ /*Construct from an MPI communicator*/
void GlobalSharedMemory::Init(Grid_MPI_Comm comm) void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
{ {
assert(_ShmSetup==0);
WorldComm = 0; WorldComm = 0;
WorldRank = 0; WorldRank = 0;
WorldSize = 1; WorldSize = 1;
@ -43,6 +44,7 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
WorldNode = 0 ; WorldNode = 0 ;
WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0; WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0;
WorldShmCommBufs.resize(1); WorldShmCommBufs.resize(1);
_ShmSetup=1;
} }
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm) void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
@ -56,7 +58,8 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{ {
void * ShmCommBuf ; void * ShmCommBuf ;
MAX_MPI_SHM_BYTES=bytes; assert(_ShmSetup==1);
assert(_ShmAlloc==0);
int mmap_flag =0; int mmap_flag =0;
#ifdef MAP_ANONYMOUS #ifdef MAP_ANONYMOUS
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
@ -77,20 +80,17 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#endif #endif
bzero(ShmCommBuf,bytes); bzero(ShmCommBuf,bytes);
WorldShmCommBufs[0] = ShmCommBuf; WorldShmCommBufs[0] = ShmCommBuf;
_ShmAllocBytes=bytes;
_ShmAlloc=1;
}; };
void GlobalSharedMemory::SharedMemoryFree(void)
{
assert(ShmSetup);
assert(0); // unimplemented
}
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Global shared functionality finished // Global shared functionality finished
// Now move to per communicator functionality // Now move to per communicator functionality
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
{ {
assert(GlobalSharedMemory::ShmAlloc()==1);
ShmRanks.resize(1); ShmRanks.resize(1);
ShmCommBufs.resize(1); ShmCommBufs.resize(1);
ShmRanks[0] = 0; ShmRanks[0] = 0;
@ -100,7 +100,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
// Map ShmRank to WorldShmRank and use the right buffer // Map ShmRank to WorldShmRank and use the right buffer
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0]; ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0];
heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES; heap_size = GlobalSharedMemory::ShmAllocBytes();
ShmBufferFreeAll(); ShmBufferFreeAll();
return; return;
} }
@ -114,10 +114,6 @@ void SharedMemory::ShmBarrier(void){ return ; }
////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////
void SharedMemory::SharedMemoryTest(void) { return; } void SharedMemory::SharedMemoryTest(void) { return; }
void *SharedMemory::ShmBufferSelf(void)
{
return ShmCommBufs[ShmRank];
}
void *SharedMemory::ShmBuffer(int rank) void *SharedMemory::ShmBuffer(int rank)
{ {
return NULL; return NULL;
@ -127,24 +123,4 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
return NULL; return NULL;
} }
/////////////////////////////////
// Alloc, free shmem region ; common to MPI and none?
/////////////////////////////////
void *SharedMemory::ShmBufferMalloc(size_t bytes){
void *ptr = (void *)heap_top;
heap_top += bytes;
heap_bytes+= bytes;
if (heap_bytes >= heap_size) {
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
assert(heap_bytes<heap_size);
}
return ptr;
}
void SharedMemory::ShmBufferFreeAll(void) {
heap_top =(size_t)ShmBufferSelf();
heap_bytes=0;
}
} }