mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Some prep work for GPU shared memory. Need to be careful, as will try GPU direct
RDMA and inter-GPU memory sharing on SUmmit later
This commit is contained in:
parent
2075b177ef
commit
94d1ae4c82
@ -42,9 +42,14 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
|||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
// Split into groups that can share memory
|
// Split into groups that can share memory
|
||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
#ifdef GRID_NVCC
|
||||||
|
MPI_Comm_split(comm, WorldRank , 0,&WorldShmComm);
|
||||||
|
#else
|
||||||
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm);
|
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm);
|
||||||
|
#endif
|
||||||
MPI_Comm_rank(WorldShmComm ,&WorldShmRank);
|
MPI_Comm_rank(WorldShmComm ,&WorldShmRank);
|
||||||
MPI_Comm_size(WorldShmComm ,&WorldShmSize);
|
MPI_Comm_size(WorldShmComm ,&WorldShmSize);
|
||||||
|
std::cout << " Shared communicator of size " <<WorldShmSize << std::endl;
|
||||||
// WorldShmComm, WorldShmSize, WorldShmRank
|
// WorldShmComm, WorldShmSize, WorldShmRank
|
||||||
|
|
||||||
// WorldNodes
|
// WorldNodes
|
||||||
@ -179,6 +184,34 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Hugetlbfs mapping intended
|
// Hugetlbfs mapping intended
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#ifdef GRID_NVCC
|
||||||
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
|
{
|
||||||
|
void * ShmCommBuf ;
|
||||||
|
assert(_ShmSetup==1);
|
||||||
|
assert(_ShmAlloc==0);
|
||||||
|
|
||||||
|
|
||||||
|
//std::cerr << " allocating "<<bytes <<" bytes "<< std::endl;
|
||||||
|
if ( cudaMallocManaged(&ShmCommBuf, bytes) != cudaSuccess) {
|
||||||
|
perror("cudaMallocManaged failed ");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ShmCommBuf == (void *)NULL ) {
|
||||||
|
perror("cudaMallocManaged failed ");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
std::cerr << " Cuda allocated managed memory at "<<std::hex<<ShmCommBuf <<" - " << ((uint64_t)ShmCommBuf + bytes)<<std::dec<< std::endl;
|
||||||
|
bzero(ShmCommBuf,bytes);
|
||||||
|
|
||||||
|
WorldShmCommBufs.resize(1);
|
||||||
|
WorldShmCommBufs[0] = ShmCommBuf;
|
||||||
|
_ShmAllocBytes=bytes;
|
||||||
|
_ShmAlloc=1;
|
||||||
|
//std::cerr << " cudaMallocManaged Returning ; "<< bytes <<" bytes allocated at "<<std::hex<<ShmCommBuf <<std::dec<< std::endl;
|
||||||
|
}
|
||||||
|
#else
|
||||||
#ifdef GRID_MPI3_SHMMMAP
|
#ifdef GRID_MPI3_SHMMMAP
|
||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
{
|
{
|
||||||
@ -291,7 +324,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
_ShmAllocBytes = bytes;
|
_ShmAllocBytes = bytes;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// Global shared functionality finished
|
// Global shared functionality finished
|
||||||
// Now move to per communicator functionality
|
// Now move to per communicator functionality
|
||||||
@ -306,7 +339,11 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
|||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
// Split into groups that can share memory
|
// Split into groups that can share memory
|
||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
#ifdef GRID_NVCC
|
||||||
|
MPI_Comm_split(comm, rank, 0,&ShmComm);
|
||||||
|
#else
|
||||||
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
|
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
|
||||||
|
#endif
|
||||||
MPI_Comm_rank(ShmComm ,&ShmRank);
|
MPI_Comm_rank(ShmComm ,&ShmRank);
|
||||||
MPI_Comm_size(ShmComm ,&ShmSize);
|
MPI_Comm_size(ShmComm ,&ShmSize);
|
||||||
ShmCommBufs.resize(ShmSize);
|
ShmCommBufs.resize(ShmSize);
|
||||||
@ -318,11 +355,11 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
|||||||
heap_size = GlobalSharedMemory::ShmAllocBytes();
|
heap_size = GlobalSharedMemory::ShmAllocBytes();
|
||||||
for(int r=0;r<ShmSize;r++){
|
for(int r=0;r<ShmSize;r++){
|
||||||
|
|
||||||
uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ;
|
uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
|
||||||
|
|
||||||
MPI_Allreduce(MPI_IN_PLACE,&sr,1,MPI_UINT32_T,MPI_SUM,comm);
|
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,comm);
|
||||||
|
|
||||||
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[sr];
|
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
|
||||||
}
|
}
|
||||||
ShmBufferFreeAll();
|
ShmBufferFreeAll();
|
||||||
|
|
||||||
@ -380,7 +417,6 @@ void *SharedMemory::ShmBuffer(int rank)
|
|||||||
}
|
}
|
||||||
void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
||||||
{
|
{
|
||||||
static int count =0;
|
|
||||||
int gpeer = ShmRanks[rank];
|
int gpeer = ShmRanks[rank];
|
||||||
assert(gpeer!=ShmRank); // never send to self
|
assert(gpeer!=ShmRank); // never send to self
|
||||||
if (gpeer == MPI_UNDEFINED){
|
if (gpeer == MPI_UNDEFINED){
|
||||||
|
Loading…
Reference in New Issue
Block a user