1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-11 14:40:46 +01:00

true shm_none implementation with GPUs that disables the use of device shared memory for the stencils

This commit is contained in:
Christoph Lehner 2020-08-14 18:37:00 +02:00
parent 12e6059a70
commit 06007db3d9
2 changed files with 15 additions and 3 deletions

View File

@ -448,7 +448,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Each MPI rank should allocate our own buffer // Each MPI rank should allocate our own buffer
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
#ifndef GRID_MPI3_SHM_NONE
auto err = cudaMalloc(&ShmCommBuf, bytes); auto err = cudaMalloc(&ShmCommBuf, bytes);
#else
auto err = cudaMallocManaged(&ShmCommBuf, bytes);
#endif
if ( err != cudaSuccess) { if ( err != cudaSuccess) {
std::cerr << " SharedMemoryMPI.cc cudaMallocManaged failed for " << bytes<<" bytes " <<cudaGetErrorString(err)<< std::endl; std::cerr << " SharedMemoryMPI.cc cudaMallocManaged failed for " << bytes<<" bytes " <<cudaGetErrorString(err)<< std::endl;
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
@ -466,7 +470,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// Loop over ranks/gpu's on our node // Loop over ranks/gpu's on our node
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
for(int r=0;r<WorldShmSize;r++){ for(int r=0;r<WorldShmSize;r++){
#ifndef GRID_MPI3_SHM_NONE
////////////////////////////////////////////////// //////////////////////////////////////////////////
// If it is me, pass around the IPC access key // If it is me, pass around the IPC access key
////////////////////////////////////////////////// //////////////////////////////////////////////////
@ -506,6 +511,9 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// Save a copy of the device buffers // Save a copy of the device buffers
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
WorldShmCommBufs[r] = thisBuf; WorldShmCommBufs[r] = thisBuf;
#else
WorldShmCommBufs[r] = ShmCommBuf;
#endif
} }
_ShmAllocBytes=bytes; _ShmAllocBytes=bytes;

View File

@ -97,11 +97,15 @@ int main(int argc, char ** argv) {
ocoor[dir]=(ocoor[dir]+disp)%Fine._rdimensions[dir]; ocoor[dir]=(ocoor[dir]+disp)%Fine._rdimensions[dir];
} }
std::cout << GridLogMessage << "A" << std::endl;
SimpleCompressor<vobj> compress; SimpleCompressor<vobj> compress;
myStencil.HaloExchange(Foo,compress); myStencil.HaloExchange(Foo,compress);
Bar = Cshift(Foo,dir,disp); Bar = Cshift(Foo,dir,disp);
std::cout << GridLogMessage << "B" << std::endl;
// Implement a stencil code that should agree with cshift! // Implement a stencil code that should agree with cshift!
for(int i=0;i<Check.Grid()->oSites();i++){ for(int i=0;i<Check.Grid()->oSites();i++){
@ -117,8 +121,8 @@ int main(int argc, char ** argv) {
check[i] = foo[SE->_offset]; check[i] = foo[SE->_offset];
else { else {
check[i] = myStencil.CommBuf()[SE->_offset]; check[i] = myStencil.CommBuf()[SE->_offset];
// std::cout << " receive "<<i<<" " << Check[i]<<std::endl; std::cout << " receive "<<i<<" " << check[i]<<std::endl;
// std::cout << " Foo "<<i<<" " << Foo[i]<<std::endl; std::cout << " Foo "<<i<<" " << foo[i]<<std::endl;
} }
} }