diff --git a/Grid/lattice/Lattice_reduction_gpu_cub.h b/Grid/lattice/Lattice_reduction_gpu_cub.h index e104a6f2..6a732de4 100644 --- a/Grid/lattice/Lattice_reduction_gpu_cub.h +++ b/Grid/lattice/Lattice_reduction_gpu_cub.h @@ -52,6 +52,8 @@ NAMESPACE_BEGIN(Grid); #if defined(GRID_CUDA) || defined(GRID_HIP) +#define GRID_REDUCTION_TIMING + // Direct CUB reduction on the full scalar_objectD. // Only safe when sizeof(sobjD)*64 <= device sharedMemPerBlock. // Do not call directly for large composite types (e.g. LatticePropagator).