Merge staggered fix linear operator and reduction

2025-08-04 13:47:07 +01:00 · 2018-04-26 10:33:19 +01:00
parent 5c936d88a0
commit 96272f3841
2 changed files with 11 additions and 10 deletions
--- a/lib/algorithms/LinearOperator.h
+++ b/lib/algorithms/LinearOperator.h
@@ -51,7 +51,7 @@ namespace Grid {

      virtual void Op     (const Field &in, Field &out) = 0; // Abstract base
      virtual void AdjOp  (const Field &in, Field &out) = 0; // Abstract base
-      virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2);
+      virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0;
      virtual void HermOp(const Field &in, Field &out)=0;
    };

--- a/lib/lattice/Lattice_reduction.h
+++ b/lib/lattice/Lattice_reduction.h
@@ -46,28 +46,29 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
  GridBase *grid = left._grid;
  const int pad = 8;

-  scalar_type  nrm;
-  std::vector<scalar_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize()*pad);
+  ComplexD  inner;
+  Vector<ComplexD> sumarray(grid->SumArraySize()*pad);

  parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
    int nwork, mywork, myoff;
    GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
    
-    decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
+    decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
    for(int ss=myoff;ss<mywork+myoff; ss++){
-      vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]);
+      vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
    }
    // All threads sum across SIMD; reduce serial work at end
    // one write per cacheline with streaming store
-    vstream(sumarray[thr*pad],Reduce(TensorRemove(vnrm))) ;
+    ComplexD tmp = Reduce(TensorRemove(vinner)) ;
+    vstream(sumarray[thr*pad],tmp);
  }
  
-  nrm=0.0;
+  inner=0.0;
  for(int i=0;i<grid->SumArraySize();i++){
-    nrm = nrm+sumarray[i*pad];
+    inner = inner+sumarray[i*pad];
  } 
-  right._grid->GlobalSum(nrm);
-  return nrm;
+  right._grid->GlobalSum(inner);
+  return inner;
 }

 /////////////////////////