true shm_none implementation with GPUs that disables the use of device shared memory for the stencils

2026-01-10 11:59:34 +00:00 · 2020-08-14 18:37:00 +02:00
parent 12e6059a70
commit 06007db3d9
2 changed files with 15 additions and 3 deletions
--- a/Grid/communicator/SharedMemoryMPI.cc
+++ b/Grid/communicator/SharedMemoryMPI.cc
@@ -448,7 +448,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
  // Each MPI rank should allocate our own buffer
  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 #ifndef GRID_MPI3_SHM_NONE
  auto err =  cudaMalloc(&ShmCommBuf, bytes);
 #else
  auto err =  cudaMallocManaged(&ShmCommBuf, bytes);
 #endif
  if ( err !=  cudaSuccess) {
    std::cerr << " SharedMemoryMPI.cc cudaMallocManaged failed for " << bytes<<" bytes " <<cudaGetErrorString(err)<< std::endl;
    exit(EXIT_FAILURE);  
@@ -466,7 +470,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
  // Loop over ranks/gpu's on our node
  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
  for(int r=0;r<WorldShmSize;r++){
-    
+
 #ifndef GRID_MPI3_SHM_NONE
    //////////////////////////////////////////////////
    // If it is me, pass around the IPC access key
    //////////////////////////////////////////////////
@@ -506,6 +511,9 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
    // Save a copy of the device buffers
    ///////////////////////////////////////////////////////////////
    WorldShmCommBufs[r] = thisBuf;
 #else
    WorldShmCommBufs[r] = ShmCommBuf;
 #endif
  }
  _ShmAllocBytes=bytes;
--- a/tests/Test_stencil.cc
+++ b/tests/Test_stencil.cc
@@ -97,11 +97,15 @@ int main(int argc, char ** argv) {
 	  ocoor[dir]=(ocoor[dir]+disp)%Fine._rdimensions[dir];
 	}
 	std::cout << GridLogMessage << "A" << std::endl;
 	SimpleCompressor<vobj> compress;
 	myStencil.HaloExchange(Foo,compress);
 	Bar = Cshift(Foo,dir,disp);
 	std::cout << GridLogMessage << "B" << std::endl;
 	// Implement a stencil code that should agree with cshift!
 	for(int i=0;i<Check.Grid()->oSites();i++){
@@ -117,8 +121,8 @@ int main(int argc, char ** argv) {
 	    check[i] = foo[SE->_offset];
 	  else { 
 	    check[i] = myStencil.CommBuf()[SE->_offset];
-	    //	    std::cout << " receive "<<i<<" " << Check[i]<<std::endl;
+	    	    std::cout << " receive "<<i<<" " << check[i]<<std::endl;
-	    //	    std::cout << " Foo     "<<i<<" " <<   Foo[i]<<std::endl;
+	    	    std::cout << " Foo     "<<i<<" " <<   foo[i]<<std::endl;
 	  }
 	}