Some fix for the GenericHMCrunner

2025-12-19 20:24:29 +00:00 · 2016-10-10 09:43:05 +01:00
parent 6eb873dd96
commit 26b9740d53
5 changed files with 80 additions and 60 deletions
--- a/lib/lattice/Lattice_reduction.h
+++ b/lib/lattice/Lattice_reduction.h
@@ -38,51 +38,56 @@ namespace Grid {
    ////////////////////////////////////////////////////////////////////////////////////////////////////
    // Deterministic Reduction operations
    ////////////////////////////////////////////////////////////////////////////////////////////////////
-  template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
-    ComplexD nrm = innerProduct(arg,arg);
-    return std::real(nrm); 
+template <class vobj>
+inline RealD norm2(const Lattice<vobj> &arg) {
+  ComplexD nrm = innerProduct(arg, arg);
+  return std::real(nrm);
+}
+
+template <class vobj>
+inline ComplexD innerProduct(const Lattice<vobj> &left,
+                             const Lattice<vobj> &right) {
+  typedef typename vobj::scalar_type scalar_type;
+  typedef typename vobj::vector_type vector_type;
+  scalar_type nrm;
+
+  GridBase *grid = left._grid;
+
+  std::vector<vector_type, alignedAllocator<vector_type> > sumarray(
+      grid->SumArraySize());
+  for (int i = 0; i < grid->SumArraySize(); i++) {
+    sumarray[i] = zero;
  }

-    template<class vobj>
-    inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) 
-    {
-      typedef typename vobj::scalar_type scalar_type;
-      typedef typename vobj::vector_type vector_type;
-      scalar_type  nrm;
+  PARALLEL_FOR_LOOP
+  for (int thr = 0; thr < grid->SumArraySize(); thr++) {
+    int nwork, mywork, myoff;
+    GridThread::GetWork(left._grid->oSites(), thr, mywork, myoff);

-      GridBase *grid = left._grid;
-
-      std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
-      for(int i=0;i<grid->SumArraySize();i++){
-	sumarray[i]=zero;
-      }
-
-PARALLEL_FOR_LOOP
-      for(int thr=0;thr<grid->SumArraySize();thr++){
-	int nwork, mywork, myoff;
-	GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
-	
-	decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
-        for(int ss=myoff;ss<mywork+myoff; ss++){
-	  vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
-	}
-	sumarray[thr]=TensorRemove(vnrm) ;
-      }
-    
-      vector_type vvnrm; vvnrm=zero;  // sum across threads
-      for(int i=0;i<grid->SumArraySize();i++){
-	vvnrm = vvnrm+sumarray[i];
-      } 
-      nrm = Reduce(vvnrm);// sum across simd
-      right._grid->GlobalSum(nrm);
-      return nrm;
+    decltype(innerProduct(left._odata[0], right._odata[0])) vnrm =
+        zero;  // private to thread; sub summation
+    for (int ss = myoff; ss < mywork + myoff; ss++) {
+      vnrm = vnrm + innerProduct(left._odata[ss], right._odata[ss]);
    }
+    sumarray[thr] = TensorRemove(vnrm);
+  }
+   

-    template<class Op,class T1>
-      inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
-      ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
-    {
-      return sum(closure(expr));
+  vector_type vvnrm;
+  vvnrm = zero;  // sum across threads
+  for (int i = 0; i < grid->SumArraySize(); i++) {
+    vvnrm = vvnrm + sumarray[i];
+  }
+  nrm = Reduce(vvnrm);  // sum across simd
+  right._grid->GlobalSum(nrm);
+  return nrm;
+}
+
+template <class Op, class T1>
+inline auto sum(const LatticeUnaryExpression<Op, T1> &expr) ->
+    typename decltype(
+        expr.first.func(eval(0, std::get<0>(expr.second))))::scalar_object {
+  return sum(closure(expr));
    }

    template<class Op,class T1,class T2>
@@ -96,9 +101,9 @@ PARALLEL_FOR_LOOP
    template<class Op,class T1,class T2,class T3>
      inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
      ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
-				 eval(0,std::get<1>(expr.second)),
-				 eval(0,std::get<2>(expr.second))
-				 ))::scalar_object
+         eval(0,std::get<1>(expr.second)),
+         eval(0,std::get<2>(expr.second))
+         ))::scalar_object
    {
      return sum(closure(expr));
    }
@@ -111,24 +116,24 @@ PARALLEL_FOR_LOOP

      std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
      for(int i=0;i<grid->SumArraySize();i++){
-	sumarray[i]=zero;
+  sumarray[i]=zero;
      }

 PARALLEL_FOR_LOOP
      for(int thr=0;thr<grid->SumArraySize();thr++){
-	int nwork, mywork, myoff;
-	GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
+  int nwork, mywork, myoff;
+  GridThread::GetWork(grid->oSites(),thr,mywork,myoff);

-	vobj vvsum=zero;
+  vobj vvsum=zero;
        for(int ss=myoff;ss<mywork+myoff; ss++){
-	  vvsum = vvsum + arg._odata[ss];
-	}
-	sumarray[thr]=vvsum;
+    vvsum = vvsum + arg._odata[ss];
+  }
+  sumarray[thr]=vvsum;
      }

      vobj vsum=zero;  // sum across threads
      for(int i=0;i<grid->SumArraySize();i++){
-	vsum = vsum+sumarray[i];
+  vsum = vsum+sumarray[i];
      } 

      typedef typename vobj::scalar_object sobj;