From bf2140e74d09ab75eba96eef8209d3f9db03f1fa Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Mon, 18 May 2026 21:53:40 -0400
Subject: [PATCH] Lattice_reduction_sycl: fix double-precision accumulation in
 sumD_gpu_tensor

Accumulate in sobjD throughout rather than accumulating in sobj and
converting the final sum. For float fields this matters: summing N floats
then casting loses O(N*eps_float) relative precision vs accumulating in
double from the start.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 Grid/lattice/Lattice_reduction_sycl.h | 31 +++++++++++++--------------
 1 file changed, 15 insertions(+), 16 deletions(-)
diff --git a/Grid/lattice/Lattice_reduction_sycl.h b/Grid/lattice/Lattice_reduction_sycl.h
index ace7f010..84e7a09a 100644
--- a/Grid/lattice/Lattice_reduction_sycl.h
+++ b/Grid/lattice/Lattice_reduction_sycl.h
@@ -6,28 +6,27 @@ NAMESPACE_BEGIN(Grid);
 
 
 template <class vobj>
-inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites) 
+inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)
 {
-  typedef typename vobj::scalar_object sobj;
+  typedef typename vobj::scalar_object  sobj;
   typedef typename vobj::scalar_objectD sobjD;
 
-  sobj identity; zeroit(identity);
-  sobj ret; zeroit(ret);
-  Integer nsimd= vobj::Nsimd();
-  { 
-    sycl::buffer<sobj, 1> abuff(&ret, {1});
+  sobjD identity; zeroit(identity);
+  sobjD ret;      zeroit(ret);
+  {
+    sycl::buffer<sobjD, 1> abuff(&ret, {1});
     theGridAccelerator->submit([&](sycl::handler &cgh) {
-      auto Reduction = sycl::reduction(abuff,cgh,identity,std::plus<>());
-      cgh.parallel_for(sycl::range<1>{osites},
-                      Reduction,
-                      [=] (sycl::id<1> item, auto &sum) {
-                        auto osite   = item[0];
-                        sum +=Reduce(lat[osite]);
-                      });
+      auto Reduction = sycl::reduction(abuff, cgh, identity, std::plus<>());
+      cgh.parallel_for(sycl::range<1>{(size_t)osites},
+                       Reduction,
+                       [=](sycl::id<1> item, auto &sum) {
+                         sobj s = Reduce(lat[item[0]]);
+                         sobjD sd; sd = s;
+                         sum += sd;
+                       });
     });
   }
-  sobjD dret; convertType(dret,ret);
-  return dret;
+  return ret;
 }
 
 template <class vobj>