1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-27 06:05:56 +01:00

Deprecate shared memory copy as direction matters on nvidia GPU

This commit is contained in:
Peter Boyle 2025-04-04 16:42:15 -04:00
parent 4ab73b36b2
commit ebbd015c5c

View File

@ -547,7 +547,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
HostCommBuf= acceleratorAllocHost(bytes); HostCommBuf= acceleratorAllocHost(bytes);
#else #else
HostCommBuf= malloc(bytes); /// CHANGE THIS TO malloc_host HostCommBuf= malloc(bytes); /// CHANGE THIS TO malloc_host
#ifdef HAVE_NUMAIF_H #if 0
#warning "Moving host buffers to specific NUMA domain" #warning "Moving host buffers to specific NUMA domain"
int numa; int numa;
char *numa_name=(char *)getenv("MPI_BUF_NUMA"); char *numa_name=(char *)getenv("MPI_BUF_NUMA");
@ -916,14 +916,14 @@ void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes)
bzero(dest,bytes); bzero(dest,bytes);
#endif #endif
} }
void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes) //void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
{ //{
#if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL) //#if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)
acceleratorCopyToDevice(src,dest,bytes); // acceleratorCopyToDevice(src,dest,bytes);
#else //#else
bcopy(src,dest,bytes); // bcopy(src,dest,bytes);
#endif //#endif
} //}
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Global shared functionality finished // Global shared functionality finished
// Now move to per communicator functionality // Now move to per communicator functionality
@ -989,7 +989,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
} }
#endif #endif
//SharedMemoryTest(); SharedMemoryTest();
} }
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// On node barrier // On node barrier
@ -1011,13 +1011,23 @@ void SharedMemory::SharedMemoryTest(void)
check[0]=GlobalSharedMemory::WorldNode; check[0]=GlobalSharedMemory::WorldNode;
check[1]=r; check[1]=r;
check[2]=magic; check[2]=magic;
GlobalSharedMemory::SharedMemoryCopy( ShmCommBufs[r], check, 3*sizeof(uint64_t)); // std::cerr << " ShmRank "<<ShmRank<<" storing "<<GlobalSharedMemory::WorldNode<<","<<r<<","<<std::hex<<magic<<" to buf "<<ShmCommBufs[r]
// <<std::dec<<std::endl;
acceleratorPut(ShmCommBufs[r][0],check[0]);
acceleratorPut(ShmCommBufs[r][1],check[1]);
acceleratorPut(ShmCommBufs[r][2],check[2]);
// GlobalSharedMemory::SharedMemoryCopy( ShmCommBufs[r], check, 3*sizeof(uint64_t));
} }
} }
ShmBarrier(); ShmBarrier();
for(uint64_t r=0;r<ShmSize;r++){ for(uint64_t r=0;r<ShmSize;r++){
ShmBarrier(); ShmBarrier();
GlobalSharedMemory::SharedMemoryCopy(check,ShmCommBufs[r], 3*sizeof(uint64_t)); // GlobalSharedMemory::SharedMemoryCopy(check,ShmCommBufs[r], 3*sizeof(uint64_t));
// std::cerr << " ShmRank "<<ShmRank<<" read "<<check[0]<<","<<check[1]<<","<<std::hex<<check[2]<<" from buf "<<ShmCommBufs[r]
// <<std::dec<<std::endl;
check[0] = acceleratorGet(ShmCommBufs[r][0]);
check[1] = acceleratorGet(ShmCommBufs[r][1]);
check[2] = acceleratorGet(ShmCommBufs[r][2]);
ShmBarrier(); ShmBarrier();
assert(check[0]==GlobalSharedMemory::WorldNode); assert(check[0]==GlobalSharedMemory::WorldNode);
assert(check[1]==r); assert(check[1]==r);