mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
CUDA cub refuses to reduce vSpinColourMatrix, breaking up into smaller parts like already done for HIP case.
This commit is contained in:
parent
b507fe209c
commit
3c9012676a
@ -119,18 +119,13 @@ template<class vobj> inline void sliceSumReduction_cub_large(const vobj *Data, V
|
|||||||
|
|
||||||
template<class vobj> inline void sliceSumReduction_cub(const Lattice<vobj> &Data, Vector<vobj> &lvSum, const int rd, const int e1, const int e2, const int stride, const int ostride, const int Nsimd)
|
template<class vobj> inline void sliceSumReduction_cub(const Lattice<vobj> &Data, Vector<vobj> &lvSum, const int rd, const int e1, const int e2, const int stride, const int ostride, const int Nsimd)
|
||||||
{
|
{
|
||||||
autoView(Data_v, Data, AcceleratorRead);
|
autoView(Data_v, Data, AcceleratorRead); //hipcub/cub cannot deal with large vobjs so we split into small/large case.
|
||||||
#if defined(GRID_CUDA)
|
if constexpr (sizeof(vobj) <= 256) {
|
||||||
sliceSumReduction_cub_small(&Data_v[0], lvSum, rd, e1, e2, stride, ostride, Nsimd);
|
|
||||||
|
|
||||||
#elif defined (GRID_HIP) //hipcub cannot deal with large vobjs that don't fit in shared memory, therefore separate into _small/_large.
|
|
||||||
if constexpr (sizeof(vobj) <= 256) {
|
|
||||||
sliceSumReduction_cub_small(&Data_v[0], lvSum, rd, e1, e2, stride, ostride, Nsimd);
|
sliceSumReduction_cub_small(&Data_v[0], lvSum, rd, e1, e2, stride, ostride, Nsimd);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
sliceSumReduction_cub_large(&Data_v[0], lvSum, rd, e1, e2, stride, ostride, Nsimd);
|
sliceSumReduction_cub_large(&Data_v[0], lvSum, rd, e1, e2, stride, ostride, Nsimd);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -215,4 +210,4 @@ template<class vobj> inline void sliceSumReduction(const Lattice<vobj> &Data, Ve
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
Loading…
Reference in New Issue
Block a user