mirror of
https://github.com/paboyle/Grid.git
synced 2026-06-25 04:53:30 +01:00
Fixes to support CUDA > 13. Specifically, the CUDA header is no longer accidentally included within Grid's namespace, and the breaking change to cub::Sum() -> ::cuda::std::plus<>{} in CUDA-13 has been worked around
This commit is contained in:
@@ -1,7 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#if defined(GRID_CUDA)
|
#if defined(GRID_CUDA)
|
||||||
|
|
||||||
#include <cub/cub.cuh>
|
#include <cub/cub.cuh>
|
||||||
#define gpucub cub
|
#define gpucub cub
|
||||||
#define gpuError_t cudaError_t
|
#define gpuError_t cudaError_t
|
||||||
@@ -57,8 +56,13 @@ inline void sliceSumReduction_cub_small(const vobj *Data,
|
|||||||
//copy offsets to device
|
//copy offsets to device
|
||||||
acceleratorCopyToDeviceAsynch(&offsets[0],d_offsets,sizeof(int)*(rd+1),computeStream);
|
acceleratorCopyToDeviceAsynch(&offsets[0],d_offsets,sizeof(int)*(rd+1),computeStream);
|
||||||
|
|
||||||
|
#if defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 13)
|
||||||
|
#define GRID_CUB_SUM_OP ::cuda::std::plus<>{}
|
||||||
|
#else
|
||||||
|
#define GRID_CUB_SUM_OP ::cub::Sum()
|
||||||
|
#endif
|
||||||
|
|
||||||
gpuError_t gpuErr = gpucub::DeviceSegmentedReduce::Reduce(temp_storage_array, temp_storage_bytes, rb_p,d_out, rd, d_offsets, d_offsets+1, ::gpucub::Sum(), zero_init, computeStream);
|
gpuError_t gpuErr = gpucub::DeviceSegmentedReduce::Reduce(temp_storage_array, temp_storage_bytes, rb_p,d_out, rd, d_offsets, d_offsets+1, GRID_CUB_SUM_OP, zero_init, computeStream);
|
||||||
if (gpuErr!=gpuSuccess) {
|
if (gpuErr!=gpuSuccess) {
|
||||||
std::cout << GridLogError << "Lattice_slicesum_gpu.h: Encountered error during gpucub::DeviceSegmentedReduce::Reduce (setup)! Error: " << gpuErr <<std::endl;
|
std::cout << GridLogError << "Lattice_slicesum_gpu.h: Encountered error during gpucub::DeviceSegmentedReduce::Reduce (setup)! Error: " << gpuErr <<std::endl;
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
@@ -82,12 +86,14 @@ inline void sliceSumReduction_cub_small(const vobj *Data,
|
|||||||
});
|
});
|
||||||
|
|
||||||
//issue segmented reductions in computeStream
|
//issue segmented reductions in computeStream
|
||||||
gpuErr = gpucub::DeviceSegmentedReduce::Reduce(temp_storage_array, temp_storage_bytes, rb_p, d_out, rd, d_offsets, d_offsets+1,::gpucub::Sum(), zero_init, computeStream);
|
gpuErr = gpucub::DeviceSegmentedReduce::Reduce(temp_storage_array, temp_storage_bytes, rb_p, d_out, rd, d_offsets, d_offsets+1, GRID_CUB_SUM_OP, zero_init, computeStream);
|
||||||
if (gpuErr!=gpuSuccess) {
|
if (gpuErr!=gpuSuccess) {
|
||||||
std::cout << GridLogError << "Lattice_slicesum_gpu.h: Encountered error during gpucub::DeviceSegmentedReduce::Reduce! Error: " << gpuErr <<std::endl;
|
std::cout << GridLogError << "Lattice_slicesum_gpu.h: Encountered error during gpucub::DeviceSegmentedReduce::Reduce! Error: " << gpuErr <<std::endl;
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#undef GRID_CUB_SUM_OP
|
||||||
|
|
||||||
acceleratorCopyFromDeviceAsynch(d_out,&lvSum[0],rd*sizeof(vobj),computeStream);
|
acceleratorCopyFromDeviceAsynch(d_out,&lvSum[0],rd*sizeof(vobj),computeStream);
|
||||||
|
|
||||||
//sync after copy
|
//sync after copy
|
||||||
|
|||||||
@@ -96,7 +96,9 @@ void acceleratorInit(void);
|
|||||||
|
|
||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
|
|
||||||
|
NAMESPACE_END(Grid);
|
||||||
#include <cuda.h>
|
#include <cuda.h>
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
#ifdef __CUDA_ARCH__
|
#ifdef __CUDA_ARCH__
|
||||||
#define GRID_SIMT
|
#define GRID_SIMT
|
||||||
|
|||||||
Reference in New Issue
Block a user