1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-19 08:17:05 +01:00

true shm_none implementation with GPUs that disables the use of device shared memory for the stencils

This commit is contained in:
Christoph Lehner
2020-08-14 18:37:00 +02:00
parent 12e6059a70
commit 06007db3d9
2 changed files with 15 additions and 3 deletions

View File

@ -448,7 +448,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Each MPI rank should allocate our own buffer
///////////////////////////////////////////////////////////////////////////////////////////////////////////
#ifndef GRID_MPI3_SHM_NONE
auto err = cudaMalloc(&ShmCommBuf, bytes);
#else
auto err = cudaMallocManaged(&ShmCommBuf, bytes);
#endif
if ( err != cudaSuccess) {
std::cerr << " SharedMemoryMPI.cc cudaMallocManaged failed for " << bytes<<" bytes " <<cudaGetErrorString(err)<< std::endl;
exit(EXIT_FAILURE);
@ -466,7 +470,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// Loop over ranks/gpu's on our node
///////////////////////////////////////////////////////////////////////////////////////////////////////////
for(int r=0;r<WorldShmSize;r++){
#ifndef GRID_MPI3_SHM_NONE
//////////////////////////////////////////////////
// If it is me, pass around the IPC access key
//////////////////////////////////////////////////
@ -506,6 +511,9 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// Save a copy of the device buffers
///////////////////////////////////////////////////////////////
WorldShmCommBufs[r] = thisBuf;
#else
WorldShmCommBufs[r] = ShmCommBuf;
#endif
}
_ShmAllocBytes=bytes;