From 3c9012676a9863ed9c33e135ebf60a3576e204be Mon Sep 17 00:00:00 2001 From: dbollweg Date: Tue, 27 Feb 2024 12:41:45 -0500 Subject: [PATCH] CUDA cub refuses to reduce vSpinColourMatrix, breaking up into smaller parts like already done for HIP case. --- Grid/lattice/Lattice_slicesum_core.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/Grid/lattice/Lattice_slicesum_core.h b/Grid/lattice/Lattice_slicesum_core.h index 7c3518cd..9c4cc051 100644 --- a/Grid/lattice/Lattice_slicesum_core.h +++ b/Grid/lattice/Lattice_slicesum_core.h @@ -119,18 +119,13 @@ template inline void sliceSumReduction_cub_large(const vobj *Data, V template inline void sliceSumReduction_cub(const Lattice &Data, Vector &lvSum, const int rd, const int e1, const int e2, const int stride, const int ostride, const int Nsimd) { - autoView(Data_v, Data, AcceleratorRead); - #if defined(GRID_CUDA) - sliceSumReduction_cub_small(&Data_v[0], lvSum, rd, e1, e2, stride, ostride, Nsimd); - - #elif defined (GRID_HIP) //hipcub cannot deal with large vobjs that don't fit in shared memory, therefore separate into _small/_large. - if constexpr (sizeof(vobj) <= 256) { + autoView(Data_v, Data, AcceleratorRead); //hipcub/cub cannot deal with large vobjs so we split into small/large case. + if constexpr (sizeof(vobj) <= 256) { sliceSumReduction_cub_small(&Data_v[0], lvSum, rd, e1, e2, stride, ostride, Nsimd); } else { sliceSumReduction_cub_large(&Data_v[0], lvSum, rd, e1, e2, stride, ostride, Nsimd); } - #endif } #endif @@ -215,4 +210,4 @@ template inline void sliceSumReduction(const Lattice &Data, Ve } -NAMESPACE_END(Grid); \ No newline at end of file +NAMESPACE_END(Grid);