Reduction GPU no compile fix

2025-07-28 18:27:07 +01:00 · 2020-06-24 17:28:31 -04:00
parent 22cfbdbbb3
commit 936c5ecf69
2 changed files with 26 additions and 25 deletions
--- a/Grid/lattice/Lattice_reduction.h
+++ b/Grid/lattice/Lattice_reduction.h
@@ -62,7 +62,6 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites)
  for(int i=0;i<nthread;i++){
    ssum = ssum+sumarray[i];
  } 
  return ssum;
 }
 template<class vobj>
@@ -156,7 +155,7 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
  const uint64_t sites = grid->oSites();
  // Might make all code paths go this way.
-  typedef decltype(Reduce(innerProductD(vobj(),vobj()))) inner_t;
+  typedef decltype(innerProductD(vobj(),vobj())) inner_t;
  Vector<inner_t> inner_tmp(sites);
  auto inner_tmp_v = &inner_tmp[0];
@@ -168,12 +167,13 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
    accelerator_for( ss, sites, 1,{
 	auto x_l = left_v[ss];
 	auto y_l = right_v[ss];
-	inner_tmp_v[ss]=Reduce(innerProductD(x_l,y_l));
+	inner_tmp_v[ss]=innerProductD(x_l,y_l);
    });
  }
  // This is in single precision and fails some tests
-  nrm = TensorRemove(sum(inner_tmp_v,sites));  
+  auto anrm = sum(inner_tmp_v,sites);  
  nrm = anrm;
  return nrm;
 }
@@ -219,13 +219,13 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
  autoView( y_v, y, AcceleratorRead);
  autoView( z_v, z, AcceleratorWrite);
-  typedef decltype(Reduce(innerProductD(x_v[0],y_v[0]))) inner_t;
+  typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t;
  Vector<inner_t> inner_tmp(sites);
  auto inner_tmp_v = &inner_tmp[0];
  accelerator_for( ss, sites, 1,{
      auto tmp = a*x_v[ss]+b*y_v[ss];
-      inner_tmp_v[ss]=Reduce(innerProductD(tmp,tmp));
+      inner_tmp_v[ss]=innerProductD(tmp,tmp);
      z_v[ss]=tmp;
  });
  nrm = real(TensorRemove(sum(inner_tmp_v,sites)));
@@ -248,8 +248,8 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice<vobj> &left,const Latti
  const uint64_t sites = grid->oSites();
  // GPU
-  typedef decltype(Reduce(innerProductD(vobj(),vobj()))) inner_t;
+  typedef decltype(innerProductD(vobj(),vobj())) inner_t;
-  typedef decltype(Reduce(innerProductD(vobj(),vobj()))) norm_t;
+  typedef decltype(innerProductD(vobj(),vobj())) norm_t;
  Vector<inner_t> inner_tmp(sites);
  Vector<norm_t>  norm_tmp(sites);
  auto inner_tmp_v = &inner_tmp[0];
@@ -259,8 +259,8 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice<vobj> &left,const Latti
    autoView(right_v,right,AcceleratorRead);
    accelerator_for( ss, sites, 1,{
 	auto left_tmp = left_v[ss];
-	inner_tmp_v[ss]=Reduce(innerProductD(left_tmp,right_v[ss]));
+	inner_tmp_v[ss]=innerProductD(left_tmp,right_v[ss]);
-        norm_tmp_v [ss]=Reduce(innerProductD(left_tmp,left_tmp));
+        norm_tmp_v [ss]=innerProductD(left_tmp,left_tmp);
      });
  }
--- a/Grid/tensors/Tensor_class.h
+++ b/Grid/tensors/Tensor_class.h
@@ -59,6 +59,20 @@ class GridTensorBase {};
  using DoublePrecision2= typename Traits::DoublePrecision2; \
  static constexpr int TensorLevel = Traits::TensorLevel
 ///////////////////////////////////////////////////////////
 // Allows to turn scalar<scalar<scalar<double>>>> back to double.
 ///////////////////////////////////////////////////////////
 template <class T>
 accelerator_inline typename std::enable_if<!isGridTensor<T>::value, T>::type
 TensorRemove(T arg) {
  return arg;
 }
 template <class vtype>
 accelerator_inline auto TensorRemove(iScalar<vtype> arg)
  -> decltype(TensorRemove(arg._internal)) {
  return TensorRemove(arg._internal);
 }
 template <class vtype>
 class iScalar {
 public:
@@ -135,9 +149,10 @@ public:
  operator ComplexD() const {
    return (TensorRemove(_internal));
  }
  //             instantiation of "Grid::iScalar<vtype>::operator Grid::RealD() const [with vtype=Grid::Real, U=Grid::Real, V=Grid::RealD, <unnamed>=0, <unnamed>=0U]" 
  template <class U = vtype, class V = scalar_type, IfReal<V> = 0,IfNotSimd<U> = 0> accelerator_inline
  operator RealD() const {
-    return TensorRemove(_internal);
+    return (RealD) TensorRemove(_internal);
  }
  template <class U = vtype, class V = scalar_type, IfInteger<V> = 0, IfNotSimd<U> = 0> accelerator_inline
  operator Integer() const {
@@ -169,20 +184,6 @@ public:
  strong_inline       scalar_type * end()         { return begin() + Traits::count; }
 };
 ///////////////////////////////////////////////////////////
 // Allows to turn scalar<scalar<scalar<double>>>> back to double.
 ///////////////////////////////////////////////////////////
 template <class T>
 accelerator_inline typename std::enable_if<!isGridTensor<T>::value, T>::type
 TensorRemove(T arg) {
  return arg;
 }
 template <class vtype>
 accelerator_inline auto TensorRemove(iScalar<vtype> arg)
  -> decltype(TensorRemove(arg._internal)) {
  return TensorRemove(arg._internal);
 }
 template <class vtype, int N>
 class iVector {
 public: