1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Allow resize of the shared memory buffers

This commit is contained in:
paboyle 2018-01-08 15:20:26 +00:00
parent 0b2162f375
commit e564d11687
5 changed files with 93 additions and 77 deletions

28
TODO
View File

@ -1,16 +1,32 @@
TODO:
---------------
Large item work list:
Code item work list
a) namespaces & indentation
GRID_BEGIN_NAMESPACE();
GRID_END_NAMESPACE();
-- delete QCD namespace
b) GPU branch
- start branch
- Increase Macro use in core library support; prepare for change
- Audit volume of "device" code
- Virtual function audit
- Start port once Nvidia box is up
- Cut down volume of code for first port? How?
Physics item work list:
1)- BG/Q port and check ; Andrew says ok.
3a)- RNG I/O in ILDG/SciDAC (minor)
3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
4)- Physical propagator interface
6)- Multigrid Wilson and DWF, compare to other Multigrid implementations
7)- HDCR resume
2)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
3)- Physical propagator interface
4)- Multigrid Wilson and DWF, compare to other Multigrid implementations
5)- HDCR resume
----------------------------
Recent DONE
-- RNG I/O in ILDG/SciDAC (minor)
-- Precision conversion and sort out localConvert <-- partial/easy
-- Conserved currents (Andrew)
-- Split grid

View File

@ -34,7 +34,9 @@ namespace Grid {
uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL;
int GlobalSharedMemory::Hugepages = 0;
int GlobalSharedMemory::ShmSetup;
int GlobalSharedMemory::_ShmSetup;
int GlobalSharedMemory::_ShmAlloc;
uint64_t GlobalSharedMemory::_ShmAllocBytes;
std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
@ -50,5 +52,41 @@ int GlobalSharedMemory::WorldRank;
int GlobalSharedMemory::WorldNodes;
int GlobalSharedMemory::WorldNode;
void GlobalSharedMemory::SharedMemoryFree(void)
{
assert(_ShmAlloc);
assert(_ShmAllocBytes>0);
for(int r=0;r<WorldShmSize;r++){
munmap(WorldShmCommBufs[r],_ShmAllocBytes);
}
_ShmAlloc = 0;
_ShmAllocBytes = 0;
}
/////////////////////////////////
// Alloc, free shmem region
/////////////////////////////////
void *SharedMemory::ShmBufferMalloc(size_t bytes){
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
void *ptr = (void *)heap_top;
heap_top += bytes;
heap_bytes+= bytes;
if (heap_bytes >= heap_size) {
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
assert(heap_bytes<heap_size);
}
return ptr;
}
void SharedMemory::ShmBufferFreeAll(void) {
heap_top =(size_t)ShmBufferSelf();
heap_bytes=0;
}
void *SharedMemory::ShmBufferSelf(void)
{
return ShmCommBufs[ShmRank];
}
}

View File

@ -69,11 +69,17 @@ namespace Grid {
class GlobalSharedMemory {
private:
// Init once lock on the buffer allocation
static int ShmSetup;
static const int MAXLOG2RANKSPERNODE = 16;
// Init once lock on the buffer allocation
static int _ShmSetup;
static int _ShmAlloc;
static uint64_t _ShmAllocBytes;
public:
static int ShmSetup(void) { return _ShmSetup; }
static int ShmAlloc(void) { return _ShmAlloc; }
static uint64_t ShmAllocBytes(void) { return _ShmAllocBytes; }
static uint64_t MAX_MPI_SHM_BYTES;
static int Hugepages;

View File

@ -33,6 +33,7 @@ namespace Grid {
/*Construct from an MPI communicator*/
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
{
assert(_ShmSetup==0);
WorldComm = comm;
MPI_Comm_rank(WorldComm,&WorldRank);
MPI_Comm_size(WorldComm,&WorldSize);
@ -110,6 +111,7 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
}
}
assert(WorldNode!=-1);
_ShmSetup=1;
}
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
@ -180,8 +182,8 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
#ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
GlobalSharedMemory::MAX_MPI_SHM_BYTES = bytes;
assert(ShmSetup==0); ShmSetup=1;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared windows for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -214,8 +216,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
perror("failed mmap"); assert(0);
}
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
};
#endif // MMAP
@ -227,8 +232,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
////////////////////////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
GlobalSharedMemory::MAX_MPI_SHM_BYTES = bytes;
assert(ShmSetup==0); ShmSetup=1;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
@ -258,6 +263,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0);
WorldShmCommBufs[r] =ptr;
close(fd);
}
}
@ -277,17 +283,15 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0);
WorldShmCommBufs[r] =ptr;
close(fd);
}
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
}
#endif
void GlobalSharedMemory::SharedMemoryFree(void)
{
assert(ShmSetup);
assert(0); // unimplemented
}
////////////////////////////////////////////////////////
// Global shared functionality finished
// Now move to per communicator functionality
@ -310,7 +314,8 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
//////////////////////////////////////////////////////////////////////
// Map ShmRank to WorldShmRank and use the right buffer
//////////////////////////////////////////////////////////////////////
heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES;
assert (GlobalSharedMemory::ShmAlloc()==1);
heap_size = GlobalSharedMemory::ShmAllocBytes();
for(int r=0;r<ShmSize;r++){
uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ;
@ -364,10 +369,6 @@ void SharedMemory::SharedMemoryTest(void)
ShmBarrier();
}
void *SharedMemory::ShmBufferSelf(void)
{
return ShmCommBufs[ShmRank];
}
void *SharedMemory::ShmBuffer(int rank)
{
int gpeer = ShmRanks[rank];
@ -391,25 +392,4 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
}
}
/////////////////////////////////
// Alloc, free shmem region
/////////////////////////////////
void *SharedMemory::ShmBufferMalloc(size_t bytes){
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
void *ptr = (void *)heap_top;
heap_top += bytes;
heap_bytes+= bytes;
if (heap_bytes >= heap_size) {
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
assert(heap_bytes<heap_size);
}
return ptr;
}
void SharedMemory::ShmBufferFreeAll(void) {
heap_top =(size_t)ShmBufferSelf();
heap_bytes=0;
}
}

View File

@ -33,6 +33,7 @@ namespace Grid {
/*Construct from an MPI communicator*/
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
{
assert(_ShmSetup==0);
WorldComm = 0;
WorldRank = 0;
WorldSize = 1;
@ -43,6 +44,7 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
WorldNode = 0 ;
WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0;
WorldShmCommBufs.resize(1);
_ShmSetup=1;
}
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
@ -56,7 +58,8 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
void * ShmCommBuf ;
MAX_MPI_SHM_BYTES=bytes;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
int mmap_flag =0;
#ifdef MAP_ANONYMOUS
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
@ -77,20 +80,17 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#endif
bzero(ShmCommBuf,bytes);
WorldShmCommBufs[0] = ShmCommBuf;
_ShmAllocBytes=bytes;
_ShmAlloc=1;
};
void GlobalSharedMemory::SharedMemoryFree(void)
{
assert(ShmSetup);
assert(0); // unimplemented
}
////////////////////////////////////////////////////////
// Global shared functionality finished
// Now move to per communicator functionality
////////////////////////////////////////////////////////
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
{
assert(GlobalSharedMemory::ShmAlloc()==1);
ShmRanks.resize(1);
ShmCommBufs.resize(1);
ShmRanks[0] = 0;
@ -100,7 +100,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
// Map ShmRank to WorldShmRank and use the right buffer
//////////////////////////////////////////////////////////////////////
ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0];
heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES;
heap_size = GlobalSharedMemory::ShmAllocBytes();
ShmBufferFreeAll();
return;
}
@ -114,10 +114,6 @@ void SharedMemory::ShmBarrier(void){ return ; }
//////////////////////////////////////////////////////////////////////////////////////////////////////////
void SharedMemory::SharedMemoryTest(void) { return; }
void *SharedMemory::ShmBufferSelf(void)
{
return ShmCommBufs[ShmRank];
}
void *SharedMemory::ShmBuffer(int rank)
{
return NULL;
@ -127,24 +123,4 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
return NULL;
}
/////////////////////////////////
// Alloc, free shmem region ; common to MPI and none?
/////////////////////////////////
void *SharedMemory::ShmBufferMalloc(size_t bytes){
void *ptr = (void *)heap_top;
heap_top += bytes;
heap_bytes+= bytes;
if (heap_bytes >= heap_size) {
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
assert(heap_bytes<heap_size);
}
return ptr;
}
void SharedMemory::ShmBufferFreeAll(void) {
heap_top =(size_t)ShmBufferSelf();
heap_bytes=0;
}
}