mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
commit
b615fa0f35
@ -168,6 +168,22 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
|||||||
return ssum;
|
return ssum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
inline typename vobj::scalar_object sum_large(const Lattice<vobj> &arg)
|
||||||
|
{
|
||||||
|
#if defined(GRID_CUDA)||defined(GRID_HIP)
|
||||||
|
autoView( arg_v, arg, AcceleratorRead);
|
||||||
|
Integer osites = arg.Grid()->oSites();
|
||||||
|
auto ssum= sum_gpu_large(&arg_v[0],osites);
|
||||||
|
#else
|
||||||
|
autoView(arg_v, arg, CpuRead);
|
||||||
|
Integer osites = arg.Grid()->oSites();
|
||||||
|
auto ssum= sum_cpu(&arg_v[0],osites);
|
||||||
|
#endif
|
||||||
|
arg.Grid()->GlobalSum(ssum);
|
||||||
|
return ssum;
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Deterministic Reduction operations
|
// Deterministic Reduction operations
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -232,10 +232,6 @@ inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osi
|
|||||||
|
|
||||||
const int words = sizeof(vobj)/sizeof(vector);
|
const int words = sizeof(vobj)/sizeof(vector);
|
||||||
|
|
||||||
Integer nsimd= vobj::Nsimd();
|
|
||||||
Integer size = osites*nsimd;
|
|
||||||
Integer numThreads, numBlocks;
|
|
||||||
|
|
||||||
Vector<vector> buffer(osites);
|
Vector<vector> buffer(osites);
|
||||||
vector *dat = (vector *)lat;
|
vector *dat = (vector *)lat;
|
||||||
vector *buf = &buffer[0];
|
vector *buf = &buffer[0];
|
||||||
@ -258,10 +254,7 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
|
|||||||
typedef typename vobj::scalar_typeD scalarD;
|
typedef typename vobj::scalar_typeD scalarD;
|
||||||
typedef typename vobj::scalar_objectD sobj;
|
typedef typename vobj::scalar_objectD sobj;
|
||||||
sobj ret;
|
sobj ret;
|
||||||
scalarD *ret_p = (scalarD *)&ret;
|
|
||||||
|
|
||||||
const int words = sizeof(vobj)/sizeof(vector);
|
|
||||||
|
|
||||||
Integer nsimd= vobj::Nsimd();
|
Integer nsimd= vobj::Nsimd();
|
||||||
Integer size = osites*nsimd;
|
Integer size = osites*nsimd;
|
||||||
Integer numThreads, numBlocks;
|
Integer numThreads, numBlocks;
|
||||||
@ -275,7 +268,6 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Return as same precision as input performing reduction in double precision though
|
// Return as same precision as input performing reduction in double precision though
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -288,6 +280,13 @@ inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class vobj>
|
||||||
|
inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
sobj result;
|
||||||
|
result = sumD_gpu_large(lat,osites);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user