mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Options to bounce through a host buffer if
--disable-accelerator-aware-mpi
This commit is contained in:
parent
434c3e7f1d
commit
1f53458af8
@ -40,6 +40,7 @@ int GlobalSharedMemory::_ShmAlloc;
|
|||||||
uint64_t GlobalSharedMemory::_ShmAllocBytes;
|
uint64_t GlobalSharedMemory::_ShmAllocBytes;
|
||||||
|
|
||||||
std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
|
std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
|
||||||
|
void * GlobalSharedMemory::HostCommBuf;
|
||||||
|
|
||||||
Grid_MPI_Comm GlobalSharedMemory::WorldShmComm;
|
Grid_MPI_Comm GlobalSharedMemory::WorldShmComm;
|
||||||
int GlobalSharedMemory::WorldShmRank;
|
int GlobalSharedMemory::WorldShmRank;
|
||||||
@ -66,6 +67,26 @@ void GlobalSharedMemory::SharedMemoryFree(void)
|
|||||||
/////////////////////////////////
|
/////////////////////////////////
|
||||||
// Alloc, free shmem region
|
// Alloc, free shmem region
|
||||||
/////////////////////////////////
|
/////////////////////////////////
|
||||||
|
#ifndef ACCELERATOR_AWARE_MPI
|
||||||
|
void *SharedMemory::HostBufferMalloc(size_t bytes){
|
||||||
|
void *ptr = (void *)host_heap_top;
|
||||||
|
host_heap_top += bytes;
|
||||||
|
host_heap_bytes+= bytes;
|
||||||
|
if (host_heap_bytes >= host_heap_size) {
|
||||||
|
std::cout<< " HostBufferMalloc exceeded heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
||||||
|
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
||||||
|
std::cout<< " Current alloc is " << (bytes/(1024*1024)) <<"MB"<<std::endl;
|
||||||
|
std::cout<< " Current bytes is " << (host_heap_bytes/(1024*1024)) <<"MB"<<std::endl;
|
||||||
|
std::cout<< " Current heap is " << (host_heap_size/(1024*1024)) <<"MB"<<std::endl;
|
||||||
|
assert(host_heap_bytes<host_heap_size);
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
void SharedMemory::HostBufferFreeAll(void) {
|
||||||
|
host_heap_top =(size_t)HostCommBuf;
|
||||||
|
host_heap_bytes=0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
void *SharedMemory::ShmBufferMalloc(size_t bytes){
|
void *SharedMemory::ShmBufferMalloc(size_t bytes){
|
||||||
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
|
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
|
||||||
void *ptr = (void *)heap_top;
|
void *ptr = (void *)heap_top;
|
||||||
|
@ -75,7 +75,9 @@ public:
|
|||||||
static int Hugepages;
|
static int Hugepages;
|
||||||
|
|
||||||
static std::vector<void *> WorldShmCommBufs;
|
static std::vector<void *> WorldShmCommBufs;
|
||||||
|
#ifndef ACCELERATOR_AWARE_MPI
|
||||||
|
static void *HostCommBuf;
|
||||||
|
#endif
|
||||||
static Grid_MPI_Comm WorldComm;
|
static Grid_MPI_Comm WorldComm;
|
||||||
static int WorldRank;
|
static int WorldRank;
|
||||||
static int WorldSize;
|
static int WorldSize;
|
||||||
@ -120,6 +122,13 @@ private:
|
|||||||
size_t heap_bytes;
|
size_t heap_bytes;
|
||||||
size_t heap_size;
|
size_t heap_size;
|
||||||
|
|
||||||
|
#ifndef ACCELERATOR_AWARE_MPI
|
||||||
|
size_t host_heap_top; // set in free all
|
||||||
|
size_t host_heap_bytes;// set in free all
|
||||||
|
void *HostCommBuf; // set in SetCommunicator
|
||||||
|
size_t host_heap_size; // set in SetCommunicator
|
||||||
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
Grid_MPI_Comm ShmComm; // for barriers
|
Grid_MPI_Comm ShmComm; // for barriers
|
||||||
@ -151,7 +160,10 @@ public:
|
|||||||
void *ShmBufferTranslate(int rank,void * local_p);
|
void *ShmBufferTranslate(int rank,void * local_p);
|
||||||
void *ShmBufferMalloc(size_t bytes);
|
void *ShmBufferMalloc(size_t bytes);
|
||||||
void ShmBufferFreeAll(void) ;
|
void ShmBufferFreeAll(void) ;
|
||||||
|
#ifndef ACCELERATOR_AWARE_MPI
|
||||||
|
void *HostBufferMalloc(size_t bytes);
|
||||||
|
void HostBufferFreeAll(void);
|
||||||
|
#endif
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Make info on Nodes & ranks and Shared memory available
|
// Make info on Nodes & ranks and Shared memory available
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -512,46 +512,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
// Hugetlbfs mapping intended
|
// Hugetlbfs mapping intended
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
#if defined(GRID_CUDA) ||defined(GRID_HIP) || defined(GRID_SYCL)
|
#if defined(GRID_CUDA) ||defined(GRID_HIP) || defined(GRID_SYCL)
|
||||||
|
|
||||||
//if defined(GRID_SYCL)
|
|
||||||
#if 0
|
|
||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|
||||||
{
|
|
||||||
void * ShmCommBuf ;
|
|
||||||
assert(_ShmSetup==1);
|
|
||||||
assert(_ShmAlloc==0);
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// allocate the pointer array for shared windows for our group
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
MPI_Barrier(WorldShmComm);
|
|
||||||
WorldShmCommBufs.resize(WorldShmSize);
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Each MPI rank should allocate our own buffer
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
ShmCommBuf = acceleratorAllocDevice(bytes);
|
|
||||||
|
|
||||||
if (ShmCommBuf == (void *)NULL ) {
|
|
||||||
std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
|
|
||||||
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
|
|
||||||
|
|
||||||
SharedMemoryZero(ShmCommBuf,bytes);
|
|
||||||
|
|
||||||
assert(WorldShmSize == 1);
|
|
||||||
for(int r=0;r<WorldShmSize;r++){
|
|
||||||
WorldShmCommBufs[r] = ShmCommBuf;
|
|
||||||
}
|
|
||||||
_ShmAllocBytes=bytes;
|
|
||||||
_ShmAlloc=1;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(GRID_CUDA) ||defined(GRID_HIP) ||defined(GRID_SYCL)
|
|
||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
{
|
{
|
||||||
void * ShmCommBuf ;
|
void * ShmCommBuf ;
|
||||||
@ -574,6 +534,9 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Each MPI rank should allocate our own buffer
|
// Each MPI rank should allocate our own buffer
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#ifndef ACCELERATOR_AWARE_MPI
|
||||||
|
HostCommBuf= malloc(bytes);
|
||||||
|
#endif
|
||||||
ShmCommBuf = acceleratorAllocDevice(bytes);
|
ShmCommBuf = acceleratorAllocDevice(bytes);
|
||||||
if (ShmCommBuf == (void *)NULL ) {
|
if (ShmCommBuf == (void *)NULL ) {
|
||||||
std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
|
std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
|
||||||
@ -738,7 +701,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
_ShmAllocBytes=bytes;
|
_ShmAllocBytes=bytes;
|
||||||
_ShmAlloc=1;
|
_ShmAlloc=1;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#ifdef GRID_MPI3_SHMMMAP
|
#ifdef GRID_MPI3_SHMMMAP
|
||||||
@ -962,6 +924,12 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
|||||||
}
|
}
|
||||||
ShmBufferFreeAll();
|
ShmBufferFreeAll();
|
||||||
|
|
||||||
|
#ifndef ACCELERATOR_AWARE_MPI
|
||||||
|
host_heap_size = heap_size;
|
||||||
|
HostCommBuf= GlobalSharedMemory::HostCommBuf;
|
||||||
|
HostBufferFreeAll();
|
||||||
|
#endif
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
// find comm ranks in our SHM group (i.e. which ranks are on our node)
|
// find comm ranks in our SHM group (i.e. which ranks are on our node)
|
||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
Loading…
Reference in New Issue
Block a user