1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Device resident GPU block buffer instead of UVM as hit likely UVM

bug. Code worked on CUDA 11.4 but fails on later drivers (certainly 530.30.02, but need to
find the perlmutter driver version).
This commit is contained in:
Peter Boyle 2023-03-22 19:07:32 -04:00
parent c6621806ca
commit d0bb033ea2

View File

@ -211,13 +211,28 @@ inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osi
assert(ok); assert(ok);
Integer smemSize = numThreads * sizeof(sobj); Integer smemSize = numThreads * sizeof(sobj);
// UVM seems to be buggy under later CUDA drivers
Vector<sobj> buffer(numBlocks); // This fails on A100 and driver 5.30.02 / CUDA 12.1
// Fails with multiple NVCC versions back to 11.4,
// which worked with earlier drivers.
// Not sure which driver had first fail and this bears checking
// Is awkward as must install multiple driver versions
#undef UVM_BLOCK_BUFFER
#ifndef UVM_BLOCK_BUFFER
commVector<sobj> buffer(numBlocks);
sobj *buffer_v = &buffer[0]; sobj *buffer_v = &buffer[0];
sobj result;
reduceKernel<<< numBlocks, numThreads, smemSize >>>(lat, buffer_v, size); reduceKernel<<< numBlocks, numThreads, smemSize >>>(lat, buffer_v, size);
accelerator_barrier(); accelerator_barrier();
auto result = buffer_v[0]; acceleratorCopyFromDevice(buffer_v,&result,sizeof(result));
#else
Vector<sobj> buffer(numBlocks);
sobj *buffer_v = &buffer[0];
sobj result;
reduceKernel<<< numBlocks, numThreads, smemSize >>>(lat, buffer_v, size);
accelerator_barrier();
result = *buffer_v;
#endif
return result; return result;
} }