mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 01:35:36 +00:00
Allow resize of the shared memory buffers
This commit is contained in:
parent
0b2162f375
commit
e564d11687
28
TODO
28
TODO
@ -1,16 +1,32 @@
|
|||||||
TODO:
|
TODO:
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
Large item work list:
|
Code item work list
|
||||||
|
|
||||||
|
a) namespaces & indentation
|
||||||
|
GRID_BEGIN_NAMESPACE();
|
||||||
|
GRID_END_NAMESPACE();
|
||||||
|
-- delete QCD namespace
|
||||||
|
|
||||||
|
b) GPU branch
|
||||||
|
- start branch
|
||||||
|
- Increase Macro use in core library support; prepare for change
|
||||||
|
- Audit volume of "device" code
|
||||||
|
- Virtual function audit
|
||||||
|
- Start port once Nvidia box is up
|
||||||
|
- Cut down volume of code for first port? How?
|
||||||
|
|
||||||
|
Physics item work list:
|
||||||
|
|
||||||
1)- BG/Q port and check ; Andrew says ok.
|
1)- BG/Q port and check ; Andrew says ok.
|
||||||
3a)- RNG I/O in ILDG/SciDAC (minor)
|
2)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
|
||||||
3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
|
3)- Physical propagator interface
|
||||||
4)- Physical propagator interface
|
4)- Multigrid Wilson and DWF, compare to other Multigrid implementations
|
||||||
6)- Multigrid Wilson and DWF, compare to other Multigrid implementations
|
5)- HDCR resume
|
||||||
7)- HDCR resume
|
|
||||||
----------------------------
|
----------------------------
|
||||||
Recent DONE
|
Recent DONE
|
||||||
|
-- RNG I/O in ILDG/SciDAC (minor)
|
||||||
-- Precision conversion and sort out localConvert <-- partial/easy
|
-- Precision conversion and sort out localConvert <-- partial/easy
|
||||||
-- Conserved currents (Andrew)
|
-- Conserved currents (Andrew)
|
||||||
-- Split grid
|
-- Split grid
|
||||||
|
@ -34,7 +34,9 @@ namespace Grid {
|
|||||||
|
|
||||||
uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL;
|
uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL;
|
||||||
int GlobalSharedMemory::Hugepages = 0;
|
int GlobalSharedMemory::Hugepages = 0;
|
||||||
int GlobalSharedMemory::ShmSetup;
|
int GlobalSharedMemory::_ShmSetup;
|
||||||
|
int GlobalSharedMemory::_ShmAlloc;
|
||||||
|
uint64_t GlobalSharedMemory::_ShmAllocBytes;
|
||||||
|
|
||||||
std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
|
std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
|
||||||
|
|
||||||
@ -50,5 +52,41 @@ int GlobalSharedMemory::WorldRank;
|
|||||||
int GlobalSharedMemory::WorldNodes;
|
int GlobalSharedMemory::WorldNodes;
|
||||||
int GlobalSharedMemory::WorldNode;
|
int GlobalSharedMemory::WorldNode;
|
||||||
|
|
||||||
|
void GlobalSharedMemory::SharedMemoryFree(void)
|
||||||
|
{
|
||||||
|
assert(_ShmAlloc);
|
||||||
|
assert(_ShmAllocBytes>0);
|
||||||
|
for(int r=0;r<WorldShmSize;r++){
|
||||||
|
munmap(WorldShmCommBufs[r],_ShmAllocBytes);
|
||||||
|
}
|
||||||
|
_ShmAlloc = 0;
|
||||||
|
_ShmAllocBytes = 0;
|
||||||
|
}
|
||||||
|
/////////////////////////////////
|
||||||
|
// Alloc, free shmem region
|
||||||
|
/////////////////////////////////
|
||||||
|
void *SharedMemory::ShmBufferMalloc(size_t bytes){
|
||||||
|
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
|
||||||
|
void *ptr = (void *)heap_top;
|
||||||
|
heap_top += bytes;
|
||||||
|
heap_bytes+= bytes;
|
||||||
|
if (heap_bytes >= heap_size) {
|
||||||
|
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
||||||
|
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
||||||
|
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
|
||||||
|
assert(heap_bytes<heap_size);
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
void SharedMemory::ShmBufferFreeAll(void) {
|
||||||
|
heap_top =(size_t)ShmBufferSelf();
|
||||||
|
heap_bytes=0;
|
||||||
|
}
|
||||||
|
void *SharedMemory::ShmBufferSelf(void)
|
||||||
|
{
|
||||||
|
return ShmCommBufs[ShmRank];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -69,11 +69,17 @@ namespace Grid {
|
|||||||
|
|
||||||
class GlobalSharedMemory {
|
class GlobalSharedMemory {
|
||||||
private:
|
private:
|
||||||
// Init once lock on the buffer allocation
|
|
||||||
static int ShmSetup;
|
|
||||||
static const int MAXLOG2RANKSPERNODE = 16;
|
static const int MAXLOG2RANKSPERNODE = 16;
|
||||||
|
|
||||||
|
// Init once lock on the buffer allocation
|
||||||
|
static int _ShmSetup;
|
||||||
|
static int _ShmAlloc;
|
||||||
|
static uint64_t _ShmAllocBytes;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
static int ShmSetup(void) { return _ShmSetup; }
|
||||||
|
static int ShmAlloc(void) { return _ShmAlloc; }
|
||||||
|
static uint64_t ShmAllocBytes(void) { return _ShmAllocBytes; }
|
||||||
static uint64_t MAX_MPI_SHM_BYTES;
|
static uint64_t MAX_MPI_SHM_BYTES;
|
||||||
static int Hugepages;
|
static int Hugepages;
|
||||||
|
|
||||||
|
@ -33,6 +33,7 @@ namespace Grid {
|
|||||||
/*Construct from an MPI communicator*/
|
/*Construct from an MPI communicator*/
|
||||||
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
||||||
{
|
{
|
||||||
|
assert(_ShmSetup==0);
|
||||||
WorldComm = comm;
|
WorldComm = comm;
|
||||||
MPI_Comm_rank(WorldComm,&WorldRank);
|
MPI_Comm_rank(WorldComm,&WorldRank);
|
||||||
MPI_Comm_size(WorldComm,&WorldSize);
|
MPI_Comm_size(WorldComm,&WorldSize);
|
||||||
@ -110,6 +111,7 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(WorldNode!=-1);
|
assert(WorldNode!=-1);
|
||||||
|
_ShmSetup=1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||||
@ -180,8 +182,8 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
|
|||||||
#ifdef GRID_MPI3_SHMMMAP
|
#ifdef GRID_MPI3_SHMMMAP
|
||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
{
|
{
|
||||||
GlobalSharedMemory::MAX_MPI_SHM_BYTES = bytes;
|
assert(_ShmSetup==1);
|
||||||
assert(ShmSetup==0); ShmSetup=1;
|
assert(_ShmAlloc==0);
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// allocate the shared windows for our group
|
// allocate the shared windows for our group
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -214,8 +216,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
perror("failed mmap"); assert(0);
|
perror("failed mmap"); assert(0);
|
||||||
}
|
}
|
||||||
assert(((uint64_t)ptr&0x3F)==0);
|
assert(((uint64_t)ptr&0x3F)==0);
|
||||||
|
close(fd);
|
||||||
WorldShmCommBufs[r] =ptr;
|
WorldShmCommBufs[r] =ptr;
|
||||||
}
|
}
|
||||||
|
_ShmAlloc=1;
|
||||||
|
_ShmAllocBytes = bytes;
|
||||||
};
|
};
|
||||||
#endif // MMAP
|
#endif // MMAP
|
||||||
|
|
||||||
@ -227,8 +232,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
{
|
{
|
||||||
GlobalSharedMemory::MAX_MPI_SHM_BYTES = bytes;
|
assert(_ShmSetup==1);
|
||||||
assert(ShmSetup==0); ShmSetup=1;
|
assert(_ShmAlloc==0);
|
||||||
MPI_Barrier(WorldShmComm);
|
MPI_Barrier(WorldShmComm);
|
||||||
WorldShmCommBufs.resize(WorldShmSize);
|
WorldShmCommBufs.resize(WorldShmSize);
|
||||||
|
|
||||||
@ -258,6 +263,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
assert(((uint64_t)ptr&0x3F)==0);
|
assert(((uint64_t)ptr&0x3F)==0);
|
||||||
|
|
||||||
WorldShmCommBufs[r] =ptr;
|
WorldShmCommBufs[r] =ptr;
|
||||||
|
close(fd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -277,17 +283,15 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
||||||
assert(((uint64_t)ptr&0x3F)==0);
|
assert(((uint64_t)ptr&0x3F)==0);
|
||||||
WorldShmCommBufs[r] =ptr;
|
WorldShmCommBufs[r] =ptr;
|
||||||
|
|
||||||
|
close(fd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_ShmAlloc=1;
|
||||||
|
_ShmAllocBytes = bytes;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void GlobalSharedMemory::SharedMemoryFree(void)
|
|
||||||
{
|
|
||||||
assert(ShmSetup);
|
|
||||||
assert(0); // unimplemented
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// Global shared functionality finished
|
// Global shared functionality finished
|
||||||
// Now move to per communicator functionality
|
// Now move to per communicator functionality
|
||||||
@ -310,7 +314,8 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
|||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Map ShmRank to WorldShmRank and use the right buffer
|
// Map ShmRank to WorldShmRank and use the right buffer
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES;
|
assert (GlobalSharedMemory::ShmAlloc()==1);
|
||||||
|
heap_size = GlobalSharedMemory::ShmAllocBytes();
|
||||||
for(int r=0;r<ShmSize;r++){
|
for(int r=0;r<ShmSize;r++){
|
||||||
|
|
||||||
uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ;
|
uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ;
|
||||||
@ -364,10 +369,6 @@ void SharedMemory::SharedMemoryTest(void)
|
|||||||
ShmBarrier();
|
ShmBarrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
void *SharedMemory::ShmBufferSelf(void)
|
|
||||||
{
|
|
||||||
return ShmCommBufs[ShmRank];
|
|
||||||
}
|
|
||||||
void *SharedMemory::ShmBuffer(int rank)
|
void *SharedMemory::ShmBuffer(int rank)
|
||||||
{
|
{
|
||||||
int gpeer = ShmRanks[rank];
|
int gpeer = ShmRanks[rank];
|
||||||
@ -391,25 +392,4 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////
|
|
||||||
// Alloc, free shmem region
|
|
||||||
/////////////////////////////////
|
|
||||||
void *SharedMemory::ShmBufferMalloc(size_t bytes){
|
|
||||||
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
|
|
||||||
void *ptr = (void *)heap_top;
|
|
||||||
heap_top += bytes;
|
|
||||||
heap_bytes+= bytes;
|
|
||||||
if (heap_bytes >= heap_size) {
|
|
||||||
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
|
||||||
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
|
||||||
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
|
|
||||||
assert(heap_bytes<heap_size);
|
|
||||||
}
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
void SharedMemory::ShmBufferFreeAll(void) {
|
|
||||||
heap_top =(size_t)ShmBufferSelf();
|
|
||||||
heap_bytes=0;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -33,6 +33,7 @@ namespace Grid {
|
|||||||
/*Construct from an MPI communicator*/
|
/*Construct from an MPI communicator*/
|
||||||
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
||||||
{
|
{
|
||||||
|
assert(_ShmSetup==0);
|
||||||
WorldComm = 0;
|
WorldComm = 0;
|
||||||
WorldRank = 0;
|
WorldRank = 0;
|
||||||
WorldSize = 1;
|
WorldSize = 1;
|
||||||
@ -43,6 +44,7 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
|||||||
WorldNode = 0 ;
|
WorldNode = 0 ;
|
||||||
WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0;
|
WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0;
|
||||||
WorldShmCommBufs.resize(1);
|
WorldShmCommBufs.resize(1);
|
||||||
|
_ShmSetup=1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||||
@ -56,7 +58,8 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
|
|||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
{
|
{
|
||||||
void * ShmCommBuf ;
|
void * ShmCommBuf ;
|
||||||
MAX_MPI_SHM_BYTES=bytes;
|
assert(_ShmSetup==1);
|
||||||
|
assert(_ShmAlloc==0);
|
||||||
int mmap_flag =0;
|
int mmap_flag =0;
|
||||||
#ifdef MAP_ANONYMOUS
|
#ifdef MAP_ANONYMOUS
|
||||||
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
|
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
|
||||||
@ -77,20 +80,17 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
#endif
|
#endif
|
||||||
bzero(ShmCommBuf,bytes);
|
bzero(ShmCommBuf,bytes);
|
||||||
WorldShmCommBufs[0] = ShmCommBuf;
|
WorldShmCommBufs[0] = ShmCommBuf;
|
||||||
|
_ShmAllocBytes=bytes;
|
||||||
|
_ShmAlloc=1;
|
||||||
};
|
};
|
||||||
|
|
||||||
void GlobalSharedMemory::SharedMemoryFree(void)
|
|
||||||
{
|
|
||||||
assert(ShmSetup);
|
|
||||||
assert(0); // unimplemented
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// Global shared functionality finished
|
// Global shared functionality finished
|
||||||
// Now move to per communicator functionality
|
// Now move to per communicator functionality
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||||
{
|
{
|
||||||
|
assert(GlobalSharedMemory::ShmAlloc()==1);
|
||||||
ShmRanks.resize(1);
|
ShmRanks.resize(1);
|
||||||
ShmCommBufs.resize(1);
|
ShmCommBufs.resize(1);
|
||||||
ShmRanks[0] = 0;
|
ShmRanks[0] = 0;
|
||||||
@ -100,7 +100,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
|||||||
// Map ShmRank to WorldShmRank and use the right buffer
|
// Map ShmRank to WorldShmRank and use the right buffer
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0];
|
ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0];
|
||||||
heap_size = GlobalSharedMemory::MAX_MPI_SHM_BYTES;
|
heap_size = GlobalSharedMemory::ShmAllocBytes();
|
||||||
ShmBufferFreeAll();
|
ShmBufferFreeAll();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -114,10 +114,6 @@ void SharedMemory::ShmBarrier(void){ return ; }
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
void SharedMemory::SharedMemoryTest(void) { return; }
|
void SharedMemory::SharedMemoryTest(void) { return; }
|
||||||
|
|
||||||
void *SharedMemory::ShmBufferSelf(void)
|
|
||||||
{
|
|
||||||
return ShmCommBufs[ShmRank];
|
|
||||||
}
|
|
||||||
void *SharedMemory::ShmBuffer(int rank)
|
void *SharedMemory::ShmBuffer(int rank)
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -127,24 +123,4 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////
|
|
||||||
// Alloc, free shmem region ; common to MPI and none?
|
|
||||||
/////////////////////////////////
|
|
||||||
void *SharedMemory::ShmBufferMalloc(size_t bytes){
|
|
||||||
void *ptr = (void *)heap_top;
|
|
||||||
heap_top += bytes;
|
|
||||||
heap_bytes+= bytes;
|
|
||||||
if (heap_bytes >= heap_size) {
|
|
||||||
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
|
||||||
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
|
||||||
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
|
|
||||||
assert(heap_bytes<heap_size);
|
|
||||||
}
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
void SharedMemory::ShmBufferFreeAll(void) {
|
|
||||||
heap_top =(size_t)ShmBufferSelf();
|
|
||||||
heap_bytes=0;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user