mirror of
https://github.com/paboyle/Grid.git
synced 2026-06-24 20:43:29 +01:00
Lattice_reduction_sycl: fix double-precision accumulation in sumD_gpu_tensor
Accumulate in sobjD throughout rather than accumulating in sobj and converting the final sum. For float fields this matters: summing N floats then casting loses O(N*eps_float) relative precision vs accumulating in double from the start. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,28 +6,27 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
|
|
||||||
|
|
||||||
template <class vobj>
|
template <class vobj>
|
||||||
inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)
|
inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)
|
||||||
{
|
{
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
typedef typename vobj::scalar_objectD sobjD;
|
typedef typename vobj::scalar_objectD sobjD;
|
||||||
|
|
||||||
sobj identity; zeroit(identity);
|
sobjD identity; zeroit(identity);
|
||||||
sobj ret; zeroit(ret);
|
sobjD ret; zeroit(ret);
|
||||||
Integer nsimd= vobj::Nsimd();
|
{
|
||||||
{
|
sycl::buffer<sobjD, 1> abuff(&ret, {1});
|
||||||
sycl::buffer<sobj, 1> abuff(&ret, {1});
|
|
||||||
theGridAccelerator->submit([&](sycl::handler &cgh) {
|
theGridAccelerator->submit([&](sycl::handler &cgh) {
|
||||||
auto Reduction = sycl::reduction(abuff,cgh,identity,std::plus<>());
|
auto Reduction = sycl::reduction(abuff, cgh, identity, std::plus<>());
|
||||||
cgh.parallel_for(sycl::range<1>{osites},
|
cgh.parallel_for(sycl::range<1>{(size_t)osites},
|
||||||
Reduction,
|
Reduction,
|
||||||
[=] (sycl::id<1> item, auto &sum) {
|
[=](sycl::id<1> item, auto &sum) {
|
||||||
auto osite = item[0];
|
sobj s = Reduce(lat[item[0]]);
|
||||||
sum +=Reduce(lat[osite]);
|
sobjD sd; sd = s;
|
||||||
});
|
sum += sd;
|
||||||
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
sobjD dret; convertType(dret,ret);
|
return ret;
|
||||||
return dret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class vobj>
|
template <class vobj>
|
||||||
|
|||||||
Reference in New Issue
Block a user