Got unpreconditioned conjugate gradient to run and converge on a random (uniform random,

not even SU(3) for now) gauge field. Convergence history is correctly indepdendent of decomposition on 1,2,4,8,16 mpi tasks. Found a couple of simd bugs which required fixed and enhanced the Grid_simd.cc test suite. Implemented the Mdag, M, MdagM, Meooe Mooee schur type stuff in the wilson dop.
2025-12-23 06:04:29 +00:00 · 2015-05-19 13:57:35 +01:00
parent 05d862782f
commit 4dba8522a1
33 changed files with 566 additions and 316 deletions
--- a/lib/lattice/Grid_lattice_arith.h
+++ b/lib/lattice/Grid_lattice_arith.h
@@ -144,14 +144,44 @@ PARALLEL_FOR_LOOP
  }
  
  template<class sobj,class vobj> strong_inline
-  void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
-    conformable(lhs,rhs);
+  void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
+    conformable(x,y);
 #pragma omp parallel for
-    for(int ss=0;ss<lhs._grid->oSites();ss++){
-      vobj tmp = a*lhs._odata[ss];
-      vstream(ret._odata[ss],tmp+rhs._odata[ss]);
+    for(int ss=0;ss<x._grid->oSites();ss++){
+      vobj tmp = a*x._odata[ss]+y._odata[ss];
+      vstream(ret._odata[ss],tmp);
    }
  }
+  template<class sobj,class vobj> strong_inline
+  void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
+    conformable(x,y);
+#pragma omp parallel for
+    for(int ss=0;ss<x._grid->oSites();ss++){
+      vobj tmp = a*x._odata[ss]+b*y._odata[ss];
+      vstream(ret._odata[ss],tmp);
+    }
+  }
+
+  template<class sobj,class vobj> strong_inline
+  RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
+    conformable(x,y);
+#pragma omp parallel for
+    for(int ss=0;ss<x._grid->oSites();ss++){
+      vobj tmp = a*x._odata[ss]+y._odata[ss];
+      vstream(ret._odata[ss],tmp);
+    }
+    return norm2(ret);
+  }
+  template<class sobj,class vobj> strong_inline
+  RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
+    conformable(x,y);
+#pragma omp parallel for
+    for(int ss=0;ss<x._grid->oSites();ss++){
+      vobj tmp = a*x._odata[ss]+b*y._odata[ss];
+      vstream(ret._odata[ss],tmp);
+    }
+    return norm2(ret); // FIXME implement parallel norm in ss loop
+  }

 }
 #endif