From 53cfa44d7a0f9d35b12d5cbfa0858068f559e575 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Mon, 17 Jul 2017 21:02:10 +0200
Subject: [PATCH 001/130] Save current state

---
 lib/algorithms/Algorithms.h                   |   1 +
 .../iterative/GeneralisedMinimalResidual.h    | 202 ++++++++++++++++++
 tests/solver/Test_wilson_gmres_unprec.cc      |  78 +++++++
 3 files changed, 281 insertions(+)
 create mode 100644 lib/algorithms/iterative/GeneralisedMinimalResidual.h
 create mode 100644 tests/solver/Test_wilson_gmres_unprec.cc

diff --git a/lib/algorithms/Algorithms.h b/lib/algorithms/Algorithms.h
index 070a1019..af83df67 100644
--- a/lib/algorithms/Algorithms.h
+++ b/lib/algorithms/Algorithms.h
@@ -47,6 +47,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
 #include <Grid/algorithms/iterative/BlockConjugateGradient.h>
 #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
+#include <Grid/algorithms/iterative/GeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 #include <Grid/algorithms/CoarsenedMatrix.h>
 #include <Grid/algorithms/FFT.h>
diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
new file mode 100644
index 00000000..0bdd43ad
--- /dev/null
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -0,0 +1,202 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: lib/algorithms/iterative/GeneralisedMinimalResidual.h
+
+Copyright (C) 2015
+Copyright (C) 2016
+
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_GENERALISED_MINIMAL_RESIDUAL_H
+#define GRID_GENERALISED_MINIMAL_RESIDUAL_H
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// from Y. Saad - Iterative Methods for Sparse Linear Systems, PP 172
+// Compute r0 = b − Ax0 , β := ||r0||2 , and v1 := r0 /β
+// For j = 1, 2, ..., m Do:
+//   Compute wj := Avj
+//   For i = 1, ..., j Do:
+//     hij := (wj , vi)
+//     wj := wj − hij vi
+//   EndDo
+//   hj+1,j = ||wj||2 . If hj+1,j = 0 set m := j and go to HERE
+//   vj+1 = wj /hj+1,j
+// EndDo
+// Define the (m + 1) × m Hessenberg matrix H̄m = {hij}1≤i≤m+1,1≤j≤m. [HERE]
+// Compute ym the minimizer of ||βe1 − H̄m y||2 and xm = x0 + Vm ym.
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// want to solve Ax = b -> A = LinOp, psi = x, b = src
+
+namespace Grid
+{
+template< class Field >
+class GeneralisedMinimalResidual : public OperatorFunction< Field >
+{
+public:
+    bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge,
+                            // defaults to True.
+    RealD   Tolerance;
+    Integer MaxIterations;
+    Integer IterationsToComplete; // Number of iterations the GMRES took to
+                                  // finish. Filled in upon completion
+
+    GeneralisedMinimalResidual( RealD   tol,
+                                Integer maxit,
+                                bool    err_on_no_conv = true )
+        : Tolerance( tol )
+        , MaxIterations( maxit )
+        , ErrorOnNoConverge( err_on_no_conv ){};
+
+    // want to solve Ax = b -> A = LinOp, psi = x, b = src
+
+    void operator()( LinearOperatorBase< Field > &LinOp,
+                     const Field &                src,
+                     Field &                      psi )
+    {
+        std::cout << GridLogMessage
+                  << "GeneralisedMinimalResidual: Start of operator()"
+                  << std::endl;
+        psi.checkerboard = src.checkerboard;
+        conformable( psi, src );
+
+        Field r( src );
+        Field mmv( src );
+
+        std::vector< Field > v( MaxIterations + 1, src );
+
+        RealD beta{};
+        RealD b{};
+        RealD d{};
+
+        Eigen::MatrixXcd H
+            = Eigen::MatrixXcd::Zero( MaxIterations + 1, MaxIterations );
+
+        // Compute r0 = b − Ax0 , β := ||r0||2 , and v1 := r0 /β
+        LinOp.Op( psi, mmv );
+
+        r      = src - mmv;
+        beta   = norm2( r );
+        V[ 0 ] = ( 1 / beta ) * r;
+
+        for( auto j = 0; j < MaxIterations; ++j )
+        {
+            LinOp.Op( V[ j ], mmv );
+
+            for( auto i = 0; i < j; ++i )
+            {
+                std::cout
+                    << GridLogMessage
+                    << "GeneralisedMinimalResidual: End of inner iteration "
+                    << i << std::endl;
+                H( i, j ) = innerProduct( mmv, v[ i ] );
+                mmv = mmv - H( i, j ) * V[ i ];
+            }
+
+            H( j + 1, j ) = norm2( mmv );
+
+            std::cout << GridLogMessage << "GeneralisedMinimalResidual: H"
+                      << j + 1 << "," << j << "= " << H( j + 1, j )
+                      << std::endl;
+            if( H( j + 1, j ) == 0. )
+            {
+                IterationsToComplete = j;
+                break;
+            }
+
+            V[ j + 1 ] = ( 1. / H( j + 1, j ) ) * mmv;
+            std::cout << GridLogMessage
+                      << "GeneralisedMinimalResidual: End of outer iteration "
+                      << j << std::endl;
+        }
+        std::cout << GridLogMessage
+                  << "GeneralisedMinimalResidual: End of operator()"
+                  << std::endl;
+    }
+};
+}
+#endif
+
+// Note: The DD-αAMG codebase turns around the Hessenberg matrix
+
+void arnoldiStep()
+{
+    w = D * V[ j ];
+
+    for( auto i = 0; i <= j; ++i )
+        H( i, j ) = innerProduct( V[ j + 1 ], w );
+    w = w - H( i, j ) * V[ i ];
+
+    H( j + 1, j ) = norm2( w );
+
+    V[ j + 1 ] = w / H( j + 1, j );
+}
+
+void qr_update_PRECISION()
+{
+    // update QR factorization
+    // apply previous Givens rotation
+    for( auto i = 0; i < j; i++ )
+    {
+        beta = -s[ i ] * H( i, j ) + c[ i ] * H( i + 1, j );
+        H( i, j ) = std::conj( c[ i ] ) * H( i, j )
+                    + std::conj( s[ i ] ) * H( i + 1, j );
+        H( i + 1, j ) = beta;
+    }
+
+    // compute current Givens rotation
+    beta   = sqrt( std::norm( H( j, j ) ) + std::norm( H( j, j + 1 ) ) );
+    s[ j ] = H( j + 1, j ) / beta;
+    c[ j ] = H( j, j ) / beta;
+
+    // update right column
+    gamma[ j + 1 ] = -s[ j ] * gamma[ j ];
+    gamma[ j ]     = std::conj( c[ j ] ) * gamma[ j ];
+
+    // apply current Givens rotation
+    H( j, j )     = beta;
+    H( j + 1, j ) = 0;
+}
+
+// check
+void compute_solution_PRECISION()
+{
+    for( auto i = j; i >= 0; i-- )
+    {
+        y[ i ] = gamma[ i ];
+        for( auto k = i + 1; k <= j; k++ )
+            y[ i ] -= H( i, k ) * y[ k ];
+        y[ i ] /= H( i, i );
+    }
+
+    if( true ) // TODO ???
+    {
+        for( i = 0; i <= j; i++ )
+            x = x + V[ i ] * y[ i ];
+    }
+    else
+    {
+        x = y[ 0 ] * V[ 0 ];
+        for( i = 1; i <= j; i++ )
+            x = x + V[ i ] * y[ i ];
+    }
+}
diff --git a/tests/solver/Test_wilson_gmres_unprec.cc b/tests/solver/Test_wilson_gmres_unprec.cc
new file mode 100644
index 00000000..7ac20840
--- /dev/null
+++ b/tests/solver/Test_wilson_gmres_unprec.cc
@@ -0,0 +1,78 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_wilson_cg_unprec.cc
+
+    Copyright (C) 2015
+
+Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template<class d>
+struct scal {
+  d internal;
+};
+
+  Gamma::Algebra Gmu [] = {
+    Gamma::Algebra::GammaX,
+    Gamma::Algebra::GammaY,
+    Gamma::Algebra::GammaZ,
+    Gamma::Algebra::GammaT
+  };
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(latt_size,simd_layout,mpi_layout);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeFermion src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  LatticeFermion result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }  
+  
+  RealD mass=0.5;
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
+  GeneralisedMinimalResidual<LatticeFermion> GMRES(1.0e-8,10000);
+  GMRES(HermOp,src,result);
+
+  Grid_finalize();
+}

From 789e892865817151329c3d5804f538b91320583d Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Tue, 18 Jul 2017 17:57:13 +0200
Subject: [PATCH 002/130] Save current state

---
 .../iterative/GeneralisedMinimalResidual.h    | 389 ++++++++++++------
 1 file changed, 253 insertions(+), 136 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 0bdd43ad..453071c7 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -29,7 +29,6 @@ directory
 #ifndef GRID_GENERALISED_MINIMAL_RESIDUAL_H
 #define GRID_GENERALISED_MINIMAL_RESIDUAL_H
 
-///////////////////////////////////////////////////////////////////////////////////////////////////////
 // from Y. Saad - Iterative Methods for Sparse Linear Systems, PP 172
 // Compute r0 = b − Ax0 , β := ||r0||2 , and v1 := r0 /β
 // For j = 1, 2, ..., m Do:
@@ -47,156 +46,274 @@ directory
 
 // want to solve Ax = b -> A = LinOp, psi = x, b = src
 
-namespace Grid
-{
-template< class Field >
-class GeneralisedMinimalResidual : public OperatorFunction< Field >
-{
-public:
-    bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge,
-                            // defaults to True.
-    RealD   Tolerance;
-    Integer MaxIterations;
-    Integer IterationsToComplete; // Number of iterations the GMRES took to
-                                  // finish. Filled in upon completion
+namespace Grid {
 
-    GeneralisedMinimalResidual( RealD   tol,
-                                Integer maxit,
-                                bool    err_on_no_conv = true )
-        : Tolerance( tol )
-        , MaxIterations( maxit )
-        , ErrorOnNoConverge( err_on_no_conv ){};
+template<class Field>
+class GeneralisedMinimalResidual : public OperatorFunction<Field> {
+ public:
+  bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge,
+                          // defaults to True.
+  RealD   Tolerance;
+  Integer MaxIterations;
+  Integer IterationsToComplete; // Number of iterations the GMRES took to
+                                // finish. Filled in upon completion
 
-    // want to solve Ax = b -> A = LinOp, psi = x, b = src
+  GeneralisedMinimalResidual(RealD   tol,
+                             Integer maxit,
+                             bool    err_on_no_conv = true)
+    : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){};
 
-    void operator()( LinearOperatorBase< Field > &LinOp,
-                     const Field &                src,
-                     Field &                      psi )
-    {
-        std::cout << GridLogMessage
-                  << "GeneralisedMinimalResidual: Start of operator()"
-                  << std::endl;
-        psi.checkerboard = src.checkerboard;
-        conformable( psi, src );
+  // want to solve Ax = b -> A = LinOp, psi = x, b = src
 
-        Field r( src );
-        Field mmv( src );
+  /* void */
+  /* operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi)
+   * { */
+  /*   typedef typename Eigen::MatrixXcd MyMatrix; */
+  /*   typedef typename Eigen::VectorXcd MyVector; */
 
-        std::vector< Field > v( MaxIterations + 1, src );
+  /*   Field r(src); */
+  /*   Field w(src); */
+  /*   Field mmv(src); */
 
-        RealD beta{};
-        RealD b{};
-        RealD d{};
+  /*   std::vector<Field>                V(MaxIterations + 1, src); */
+  /*   std::vector<std::complex<double>> y(MaxIterations + 1, 0.); */
+  /*   std::vector<std::complex<double>> gamma(MaxIterations + 1, 0.); */
+  /*   std::vector<std::complex<double>> c(MaxIterations + 1, 0.); */
+  /*   std::vector<std::complex<double>> s(MaxIterations + 1, 0.); */
 
-        Eigen::MatrixXcd H
-            = Eigen::MatrixXcd::Zero( MaxIterations + 1, MaxIterations );
+  /*   int m = MaxIterations; */
 
-        // Compute r0 = b − Ax0 , β := ||r0||2 , and v1 := r0 /β
-        LinOp.Op( psi, mmv );
+  /*   RealD gamma0{}; */
 
-        r      = src - mmv;
-        beta   = norm2( r );
-        V[ 0 ] = ( 1 / beta ) * r;
+  /*   MyMatrix H = Eigen::MatrixXcd::Zero(MaxIterations + 1, MaxIterations); */
 
-        for( auto j = 0; j < MaxIterations; ++j )
-        {
-            LinOp.Op( V[ j ], mmv );
+  /*   RealD normPsiSq   = norm2(psi); */
+  /*   RealD normSrcSq   = norm2(src); */
+  /*   RealD TargetResSq = Tolerance * Tolerance * normSrcSq; */
 
-            for( auto i = 0; i < j; ++i )
-            {
-                std::cout
-                    << GridLogMessage
-                    << "GeneralisedMinimalResidual: End of inner iteration "
-                    << i << std::endl;
-                H( i, j ) = innerProduct( mmv, v[ i ] );
-                mmv = mmv - H( i, j ) * V[ i ];
-            }
+  /*   LinOp.Op(psi, mmv); */
 
-            H( j + 1, j ) = norm2( mmv );
+  /*   r        = src - mmv; */
+  /*   gamma[0] = norm2(r); */
+  /*   std::cout << gamma[0] << std::endl; */
+  /*   gamma0 = std::real(gamma[0]); */
+  /*   V[0]   = (1. / gamma[0]) * r; */
 
-            std::cout << GridLogMessage << "GeneralisedMinimalResidual: H"
-                      << j + 1 << "," << j << "= " << H( j + 1, j )
-                      << std::endl;
-            if( H( j + 1, j ) == 0. )
-            {
-                IterationsToComplete = j;
-                break;
-            }
+  /*   std::cout << GridLogMessage << std::setprecision(4) */
+  /*             << "GeneralisedMinimalResidual:    psi " << normPsiSq */
+  /*             << std::endl; */
+  /*   std::cout << GridLogMessage << std::setprecision(4) */
+  /*             << "GeneralisedMinimalResidual:    src " << normSrcSq */
+  /*             << std::endl; */
+  /*   std::cout << GridLogMessage << std::setprecision(4) */
+  /*             << "GeneralisedMinimalResidual: target " << TargetResSq */
+  /*             << std::endl; */
+  /*   std::cout << GridLogMessage << std::setprecision(4) */
+  /*             << "GeneralisedMinimalResidual:      r " << gamma0 <<
+   * std::endl; */
 
-            V[ j + 1 ] = ( 1. / H( j + 1, j ) ) * mmv;
-            std::cout << GridLogMessage
-                      << "GeneralisedMinimalResidual: End of outer iteration "
-                      << j << std::endl;
-        }
-        std::cout << GridLogMessage
-                  << "GeneralisedMinimalResidual: End of operator()"
-                  << std::endl;
+  /*   std::cout */
+  /*     << GridLogIterative << std::setprecision(4) */
+  /*     << "GeneralisedMinimalResidual: before starting to iterate residual "
+   */
+  /*     << gamma0 << " target " << TargetResSq << std::endl; */
+
+  /*   for(auto j = 0; j < m; ++j) { */
+  /*     LinOp.Op(V[j], w); */
+
+  /*     for(auto i = 0; i <= j; ++i) { */
+  /*       H(i, j) = innerProduct(V[i], w); */
+  /*       w = w - H(i, j) * V[i]; */
+  /*     } */
+
+  /*     H(j + 1, j) = norm2(w); */
+  /*     V[j + 1] = (1. / H(j + 1, j)) * w; */
+
+  /*     if(std::abs(H(j + 1, j)) > 1e-15) { */
+  /*       qrUpdate(gamma, c, s, H, j); */
+  /*     } */
+
+  /*     /\* std::cout << GridLogMessage << "GeneralisedMinimalResidual: H( "
+   * *\/ */
+  /*     /\*           << j + 1 << "," << j << " ) = " << H( j + 1, j ) *\/ */
+  /*     /\*           << std::endl; *\/ */
+
+  /*     std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration
+   * " */
+  /*               << j << " residual " << std::abs(gamma[j + 1]) << " target "
+   */
+  /*               << TargetResSq << std::endl; */
+  /*     if(std::abs(gamma[j + 1]) / gamma0 < Tolerance) { */
+  /*       IterationsToComplete = j; */
+  /*       break; */
+  /*     } */
+  /*   } */
+  /*   computeSolution(y, gamma, H, V, psi, IterationsToComplete); */
+  /*   std::cout << GridLogMessage */
+  /*             << "GeneralisedMinimalResidual: End of operator() after " */
+  /*             << IterationsToComplete << " iterations" << std::endl; */
+
+  /*   RealD normSrc       = sqrt(normSrcSq); */
+  /*   RealD resnorm       = sqrt(norm2(mmv)); */
+  /*   RealD true_residual = resnorm / srcnorm; */
+  /*   Field result        = mmv; */
+  /*   Field Dx(src); */
+  /*   Field tmp(src); */
+
+  /*   // Test the correctness */
+  /*   LinOp.Op(result, Dx); */
+
+  /*   tmp = Dx - src; */
+
+  /*   std::cout << norm2(tmp) << " " << norm2(tmp) / gamma0 << std::endl; */
+  /* } */
+
+  void
+  operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
+    std::cout << "GMRES: Start of operator()" << std::endl;
+
+    int m = MaxIterations;
+
+    Field r(src);
+    Field w(src);
+    Field Dpsi(src);
+    Field Dv(src);
+
+    std::vector<Field> v(m + 1, src);
+    Eigen::MatrixXcd   H = Eigen::MatrixXcd::Zero(m + 1, m);
+
+    std::vector<std::complex<double>> y(m + 1, 0.);
+    std::vector<std::complex<double>> gamma(m + 1, 0.);
+    std::vector<std::complex<double>> c(m + 1, 0.);
+    std::vector<std::complex<double>> s(m + 1, 0.);
+
+    LinOp.Op(psi, Dpsi);
+    r = src - Dpsi;
+
+    RealD beta = norm2(r);
+    gamma[0]  = beta;
+
+    std::cout << "beta " << beta << std::endl;
+
+    v[0] = (1. / beta) * r;
+
+    // Begin iterating
+    for(auto j = 0; j < m; ++j) {
+      LinOp.Op(v[j], Dv);
+      w = Dv;
+
+      for(auto i = 0; i <= j; ++i) {
+        H(i, j) = innerProduct(v[i], w);
+        w = w - H(i, j) * v[i];
+      }
+
+      H(j + 1, j) = norm2(w);
+      v[j + 1] = (1. / H(j + 1, j)) * w;
+
+      // end of arnoldi process, begin of givens rotations
+      // apply old Givens rotation
+      for(auto i = 0; i < j ; ++i) {
+        auto tmp = -s[i] * H(i, j) + c[i] * H(i + 1, j);
+        H(i, j)     = std::conj(c[i]) * H(i, j) + std::conj(s[i]) * H(i + 1, j);
+        H(i + 1, j) = tmp;
+      }
+
+      // compute new Givens Rotation
+      ComplexD nu = sqrt(std::norm(H(j, j)) + std::norm(H(j + 1, j)));
+      c[j]        = H(j, j) / nu;
+      s[j]        = H(j + 1, j) / nu;
+      std::cout << "nu" << nu << std::endl;
+      std::cout << "H("<<j<<","<<j<<")" << H(j,j) << std::endl;
+      std::cout << "H("<<j+1<<","<<j<<")" << H(j+1,j) << std::endl;
+
+      // apply new Givens rotation
+      H(j, j)     = nu;
+      H(j + 1, j) = 0.;
+
+      /* ORDERING??? */
+      gamma[j + 1] = -s[j] * gamma[j];
+      gamma[j]     = std::conj(c[j]) * gamma[j];
+
+      /* for(auto k = 0; k <= j+1 ; ++k) */
+      /*   std::cout << "k " << k << "nu " << nu << " c["<<k<<"]" << c[k]<< " s["<<k<<"]" << s[k] << " gamma["<<k<<"]" << gamma[k] << std::endl; */
+
+      std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration "
+                << j << " residual " << std::abs(gamma[j + 1]) << std::endl; //" target "
+                /* << TargetResSq << std::endl; */
+      if(std::abs(gamma[j + 1]) / sqrt(beta) < Tolerance) {
+        IterationsToComplete = j;
+        break;
+      }
     }
+
+    // backward substitution
+    computeSolution(y, gamma, H, v, psi, IterationsToComplete);
+
+    std::cout << "GMRES: End of operator()" << std::endl;
+  }
+
+  private:
+  /*  void qrUpdate(std::vector<std::complex<double>> &gamma, */
+  /*                std::vector<std::complex<double>> &c, */
+  /*                std::vector<std::complex<double>> &s, */
+  /*                Eigen::MatrixXcd &                 H, */
+  /*                int                                j) { */
+  /*    ComplexD beta{}; */
+  /*    // update QR factorization */
+  /*    // apply previous Givens rotation */
+  /*    for(auto i = 0; i < j; i++) { */
+  /*      beta = -s[i] * H(i, j) + c[i] * H(i + 1, j); */
+  /*      H(i, j)     = std::conj(c[i]) * H(i, j) + std::conj(s[i]) * H(i + 1,
+   * j); */
+  /*      H(i + 1, j) = beta; */
+  /*    } */
+
+  /*    // compute current Givens rotation */
+  /*    beta = sqrt(std::norm(H(j, j)) + std::norm(H(j + 1, j))); */
+  /*    s[j] = H(j + 1, j) / beta; */
+  /*    c[j] = H(j, j) / beta; */
+  /*    /\* std::cout << "beta= " << beta << std::endl; *\/ */
+  /*    /\* std::cout << "s[j]= " << s[ j ] << std::endl; *\/ */
+  /*    /\* std::cout << "c[j]= " << c[ j ] << std::endl; *\/ */
+
+  /*    /\* std::cout << "gamma[j+1]= " << gamma[ j + 1 ] << std::endl; *\/ */
+  /*    /\* std::cout << "gamma[j]= " << gamma[ j ] << std::endl; *\/ */
+  /*    // update right column */
+  /*    gamma[j + 1] = -s[j] * gamma[j]; */
+  /*    gamma[j]     = std::conj(c[j]) * gamma[j]; */
+  /*    /\* std::cout << "gamma[j+1]= " << gamma[ j + 1 ] << std::endl; *\/ */
+  /*    /\* std::cout << "gamma[j]= " << gamma[ j ] << std::endl; *\/ */
+
+  /*    // apply current Givens rotation */
+  /*    H(j, j)     = beta; */
+  /*    H(j + 1, j) = 0.; */
+  /*    /\* std::cout << "H(j,j)= " << H( j, j ) << std::endl; *\/ */
+  /*    /\* std::cout << "H(j+1,j)= " << H( j + 1, j ) << std::endl; *\/ */
+  /*  } */
+
+  void computeSolution(std::vector<std::complex<double>> &      y,
+                       std::vector<std::complex<double>> const &gamma,
+                       Eigen::MatrixXcd const &                 H,
+                       std::vector<Field> const &               v,
+                       Field &                                  x,
+                       int                                      j) {
+    for(auto i = j; i >= 0; i--) {
+      y[i] = gamma[i];
+      for(auto k = i + 1; k <= j; k++)
+        y[i] -= H(i, k) * y[k];
+      y[i] /= H(i, i);
+    }
+
+    /* if(true) // TODO ??? */
+    /* { */
+    /*   for(auto i = 0; i <= j; i++) */
+    /*     x = x + v[i] * y[i]; */
+    /* } else { */
+      x = y[0] * v[0];
+      for(auto i = 1; i <= j; i++)
+        x = x + v[i] * y[i];
+    /* } */
+  }
 };
 }
 #endif
-
-// Note: The DD-αAMG codebase turns around the Hessenberg matrix
-
-void arnoldiStep()
-{
-    w = D * V[ j ];
-
-    for( auto i = 0; i <= j; ++i )
-        H( i, j ) = innerProduct( V[ j + 1 ], w );
-    w = w - H( i, j ) * V[ i ];
-
-    H( j + 1, j ) = norm2( w );
-
-    V[ j + 1 ] = w / H( j + 1, j );
-}
-
-void qr_update_PRECISION()
-{
-    // update QR factorization
-    // apply previous Givens rotation
-    for( auto i = 0; i < j; i++ )
-    {
-        beta = -s[ i ] * H( i, j ) + c[ i ] * H( i + 1, j );
-        H( i, j ) = std::conj( c[ i ] ) * H( i, j )
-                    + std::conj( s[ i ] ) * H( i + 1, j );
-        H( i + 1, j ) = beta;
-    }
-
-    // compute current Givens rotation
-    beta   = sqrt( std::norm( H( j, j ) ) + std::norm( H( j, j + 1 ) ) );
-    s[ j ] = H( j + 1, j ) / beta;
-    c[ j ] = H( j, j ) / beta;
-
-    // update right column
-    gamma[ j + 1 ] = -s[ j ] * gamma[ j ];
-    gamma[ j ]     = std::conj( c[ j ] ) * gamma[ j ];
-
-    // apply current Givens rotation
-    H( j, j )     = beta;
-    H( j + 1, j ) = 0;
-}
-
-// check
-void compute_solution_PRECISION()
-{
-    for( auto i = j; i >= 0; i-- )
-    {
-        y[ i ] = gamma[ i ];
-        for( auto k = i + 1; k <= j; k++ )
-            y[ i ] -= H( i, k ) * y[ k ];
-        y[ i ] /= H( i, i );
-    }
-
-    if( true ) // TODO ???
-    {
-        for( i = 0; i <= j; i++ )
-            x = x + V[ i ] * y[ i ];
-    }
-    else
-    {
-        x = y[ 0 ] * V[ 0 ];
-        for( i = 1; i <= j; i++ )
-            x = x + V[ i ] * y[ i ];
-    }
-}

From 1ab8d5cc13d143be6c8ceb1f6d91cdd848ca77fd Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Fri, 21 Jul 2017 13:39:03 +0200
Subject: [PATCH 003/130] Save two more files

---
 lib/algorithms/iterative/MinimalResidual.h | 197 +++++
 tests/solver/Test_wilson_ddalphaamg.cc     | 806 +++++++++++++++++++++
 2 files changed, 1003 insertions(+)
 create mode 100644 lib/algorithms/iterative/MinimalResidual.h
 create mode 100644 tests/solver/Test_wilson_ddalphaamg.cc

diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
new file mode 100644
index 00000000..878deb24
--- /dev/null
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -0,0 +1,197 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/MinimalResidual.h
+
+Copyright (C) 2015
+
+Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: paboyle <paboyle@ph.ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_MINIMAL_RESIDUAL_H
+#define GRID_MINIMAL_RESIDUAL_H
+
+namespace Grid {
+
+/////////////////////////////////////////////////////////////
+// Base classes for iterative processes based on operators
+// single input vec, single output vec.
+/////////////////////////////////////////////////////////////
+
+template <class Field>
+class MinimalResidual : public OperatorFunction<Field> {
+ public:
+  bool ErrorOnNoConverge;  // throw an assert when the MR fails to converge.
+                           // Defaults true.
+  RealD Tolerance;
+  Integer MaxIterations;
+  Integer IterationsToComplete; //Number of iterations the MR took to finish. Filled in upon completion
+  
+  MinimalResidual(RealD tol, Integer maxit, bool err_on_no_conv = true)
+      : Tolerance(tol),
+        MaxIterations(maxit),
+        ErrorOnNoConverge(err_on_no_conv){};
+
+  void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
+                  Field &psi) {
+    psi.checkerboard = src.checkerboard; // Check
+    conformable(psi, src);
+
+    Field p {src};
+    Field matrixTimesPsi {src};
+    Field r {src};
+
+    RealD alpha {};
+
+    // Initial residual computation & set up
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    Linop.HermOp(psi, matrixTimesPsi);
+
+    r = src - matrixTimesPsi;
+
+    Linop.HermOp(r, p);
+
+    alpha = innerProduct(p,r) / innerProduct(p,p);
+    psi = psi + alpha * r;
+    r   = r   - alpha * p;
+
+    Linop.HermOp(r, p);
+
+
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+    // RealD cp, c, a, d, b, ssq, qq, b_pred;
+
+    Field p(src);
+    Field matrixTimesPsi(src);
+    // Field r(src);
+
+    // Initial residual computation & set up
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    
+    Linop.HermOpAndNorm(psi, matrixTimesPsi, d, b);
+    
+
+    r = src - matrixTimesPsi;
+    p = matrixTimesPsi;
+
+    a = norm2(p);
+    cp = a;
+    ssq = norm2(src);
+
+    std::cout << GridLogIterative << std::setprecision(4)
+              << "MinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4)
+              << "MinimalResidual:   src " << ssq << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4)
+              << "MinimalResidual:    mp " << d << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4)
+              << "MinimalResidual:   matrixTimesPsi " << b << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4)
+              << "MinimalResidual:  cp,r " << cp << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4)
+              << "MinimalResidual:     p " << a << std::endl;
+
+    RealD rsq = Tolerance * Tolerance * ssq;
+
+    // Check if guess is really REALLY good :)
+    if (cp <= rsq) {
+      return;
+    }
+
+    std::cout << GridLogIterative << std::setprecision(4)
+              << "MinimalResidual: k=0 residual " << cp << " target " << rsq
+              << std::endl;
+
+    GridStopWatch LinalgTimer;
+    GridStopWatch MatrixTimer;
+    GridStopWatch SolverTimer;
+
+    SolverTimer.Start();
+    int k;
+    for (k = 1; k <= MaxIterations; k++) {
+      c = cp;
+
+      MatrixTimer.Start();
+      Linop.HermOpAndNorm(p, matrixTimesPsi, d, qq);
+      MatrixTimer.Stop();
+
+      LinalgTimer.Start();
+      //  RealD    qqck = norm2(matrixTimesPsi);
+      //  ComplexD dck  = innerProduct(p,matrixTimesPsi);
+
+      a = c / d;
+      b_pred = a * (a * qq - d) / c;
+
+      cp = axpy_norm(r, -a, matrixTimesPsi, r);
+      b = cp / c;
+
+      // Fuse these loops ; should be really easy
+      psi = a * p + psi;
+      p = p * b + r;
+
+      LinalgTimer.Stop();
+      std::cout << GridLogIterative << "MinimalResidual: Iteration " << k
+                << " residual " << cp << " target " << rsq << std::endl;
+
+      // Stopping condition
+      if (cp <= rsq) {
+        SolverTimer.Stop();
+        Linop.HermOpAndNorm(psi, matrixTimesPsi, d, qq);
+        p = matrixTimesPsi - src;
+
+        RealD matrixTimesPsiNorm = sqrt(norm2(matrixTimesPsi));
+        RealD psinorm = sqrt(norm2(psi));
+        RealD srcnorm = sqrt(norm2(src));
+        RealD resnorm = sqrt(norm2(p));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage
+                  << "MinimalResidual: Converged on iteration " << k << std::endl;
+        std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
+                  << " true residual " << true_residual << " target "
+                  << Tolerance << std::endl;
+        std::cout << GridLogMessage << "Time elapsed: Iterations "
+                  << SolverTimer.Elapsed() << " Matrix  "
+                  << MatrixTimer.Elapsed() << " Linalg "
+                  << LinalgTimer.Elapsed();
+        std::cout << std::endl;
+
+        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
+	IterationsToComplete = k;	
+        return;
+      }
+    }
+    std::cout << GridLogMessage << "MinimalResidual did NOT converge"
+              << std::endl;
+    if (ErrorOnNoConverge) assert(0);
+    IterationsToComplete = k;
+  }
+};
+}
+#endif
diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
new file mode 100644
index 00000000..7269bf64
--- /dev/null
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -0,0 +1,806 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_dwf_hdcr.cc
+
+    Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
+//#include <algorithms/iterative/PrecConjugateResidual.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+class myclass: Serializable {
+public:
+
+  GRID_SERIALIZABLE_CLASS_MEMBERS(myclass,
+			  int, domaindecompose,
+			  int, domainsize,
+			  int, order,
+			  int, Ls,
+			  double, mq,
+			  double, lo,
+			  double, hi,
+			  int, steps);
+
+  myclass(){};
+
+};
+myclass params;
+
+RealD InverseApproximation(RealD x){
+  return 1.0/x;
+}
+
+template<class Fobj,class CComplex,int nbasis, class Matrix>
+class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
+public:
+
+  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
+  typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
+
+  typedef typename Aggregation<Fobj,CComplex,nbasis>::siteVector     siteVector;
+  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseScalar CoarseScalar;
+  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
+  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
+  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
+  typedef LinearOperatorBase<FineField>                            FineOperator;
+
+  Aggregates     & _Aggregates;
+  CoarseOperator & _CoarseOperator;
+  Matrix         & _FineMatrix;
+  FineOperator   & _FineOperator;
+  Matrix         & _SmootherMatrix;
+  FineOperator   & _SmootherOperator;
+
+  // Constructor
+  MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse, 
+			  FineOperator &Fine,Matrix &FineMatrix,
+			  FineOperator &Smooth,Matrix &SmootherMatrix) 
+    : _Aggregates(Agg),
+      _CoarseOperator(Coarse),
+      _FineOperator(Fine),
+      _FineMatrix(FineMatrix),
+      _SmootherOperator(Smooth),
+      _SmootherMatrix(SmootherMatrix)
+  {
+  }
+
+  void PowerMethod(const FineField &in) {
+
+    FineField p1(in._grid);
+    FineField p2(in._grid);
+
+    MdagMLinearOperator<Matrix,FineField>   fMdagMOp(_FineMatrix);
+
+    p1=in;
+    RealD absp2;
+    for(int i=0;i<20;i++){
+      RealD absp1=std::sqrt(norm2(p1));
+      fMdagMOp.HermOp(p1,p2);// this is the G5 herm bit      
+      //      _FineOperator.Op(p1,p2);// this is the G5 herm bit      
+      RealD absp2=std::sqrt(norm2(p2));
+      if(i%10==9)
+	std::cout<<GridLogMessage << "Power method on mdagm "<<i<<" " << absp2/absp1<<std::endl;
+      p1=p2*(1.0/std::sqrt(absp2));
+    }
+  }
+
+  void operator()(const FineField &in, FineField & out) {
+    if ( params.domaindecompose ) {
+      operatorSAP(in,out);
+    } else { 
+      operatorCheby(in,out);
+    }
+  }
+
+    ////////////////////////////////////////////////////////////////////////
+    // ADEF2: [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
+    // ADEF1: [MP+Q ] in =M [1 - A Q] in + Q in  
+    ////////////////////////////////////////////////////////////////////////
+#if 1
+  void operatorADEF2(const FineField &in, FineField & out) {
+
+    CoarseVector Csrc(_CoarseOperator.Grid());
+    CoarseVector Ctmp(_CoarseOperator.Grid());
+    CoarseVector Csol(_CoarseOperator.Grid());
+
+    ConjugateGradient<CoarseVector>  CG(1.0e-10,100000);
+    ConjugateGradient<FineField>    fCG(3.0e-2,1000);
+
+    HermitianLinearOperator<CoarseOperator,CoarseVector>  HermOp(_CoarseOperator);
+    MdagMLinearOperator<CoarseOperator,CoarseVector>     MdagMOp(_CoarseOperator);
+    MdagMLinearOperator<Matrix,FineField>               fMdagMOp(_FineMatrix);
+
+    FineField tmp(in._grid);
+    FineField res(in._grid);
+    FineField Min(in._grid);
+
+    // Monitor completeness of low mode space
+    _Aggregates.ProjectToSubspace  (Csrc,in);
+    _Aggregates.PromoteFromSubspace(Csrc,out);
+    std::cout<<GridLogMessage<<"Coarse Grid Preconditioner\nCompleteness in: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
+
+    // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
+    _FineOperator.Op(in,tmp);// this is the G5 herm bit
+    fCG(fMdagMOp,tmp,Min);    // solves  MdagM = g5 M g5M
+
+    // Monitor completeness of low mode space
+    _Aggregates.ProjectToSubspace  (Csrc,Min);
+    _Aggregates.PromoteFromSubspace(Csrc,out);
+    std::cout<<GridLogMessage<<"Completeness Min: "<<std::sqrt(norm2(out)/norm2(Min))<<std::endl;
+
+    _FineOperator.Op(Min,tmp);
+    tmp = in - tmp;   // in - A Min
+
+    Csol=zero;
+    _Aggregates.ProjectToSubspace  (Csrc,tmp);
+    HermOp.AdjOp(Csrc,Ctmp);// Normal equations
+    CG(MdagMOp,Ctmp,Csol);
+
+    HermOp.Op(Csol,Ctmp);
+    Ctmp=Ctmp-Csrc;
+    std::cout<<GridLogMessage<<"coarse space true residual "<<std::sqrt(norm2(Ctmp)/norm2(Csrc))<<std::endl;
+    _Aggregates.PromoteFromSubspace(Csol,out);
+
+    _FineOperator.Op(out,res);
+    res=res-tmp;
+    std::cout<<GridLogMessage<<"promoted sol residual "<<std::sqrt(norm2(res)/norm2(tmp))<<std::endl;
+    _Aggregates.ProjectToSubspace  (Csrc,res);
+    std::cout<<GridLogMessage<<"coarse space proj of residual "<<norm2(Csrc)<<std::endl;
+
+    
+    out = out+Min; // additive coarse space correction
+    //    out = Min; // no additive coarse space correction
+
+    _FineOperator.Op(out,tmp);
+    tmp=tmp-in;         // tmp is new residual
+
+    std::cout<<GridLogMessage<< " Preconditioner in  " << norm2(in)<<std::endl; 
+    std::cout<<GridLogMessage<< " Preconditioner out " << norm2(out)<<std::endl; 
+    std::cout<<GridLogMessage<<"preconditioner thinks residual is "<<std::sqrt(norm2(tmp)/norm2(in))<<std::endl;
+
+  }
+#endif
+  // ADEF1: [MP+Q ] in =M [1 - A Q] in + Q in  
+#if 1
+  void operatorADEF1(const FineField &in, FineField & out) {
+
+    CoarseVector Csrc(_CoarseOperator.Grid());
+    CoarseVector Ctmp(_CoarseOperator.Grid());
+    CoarseVector Csol(_CoarseOperator.Grid()); Csol=zero;
+
+    ConjugateGradient<CoarseVector>  CG(1.0e-10,100000);
+    ConjugateGradient<FineField>    fCG(3.0e-2,1000);
+
+    HermitianLinearOperator<CoarseOperator,CoarseVector>  HermOp(_CoarseOperator);
+    MdagMLinearOperator<CoarseOperator,CoarseVector>     MdagMOp(_CoarseOperator);
+    ShiftedMdagMLinearOperator<Matrix,FineField>        fMdagMOp(_FineMatrix,0.1);
+
+    FineField tmp(in._grid);
+    FineField res(in._grid);
+    FineField Qin(in._grid);
+
+    // Monitor completeness of low mode space
+    //    _Aggregates.ProjectToSubspace  (Csrc,in);
+    //    _Aggregates.PromoteFromSubspace(Csrc,out);
+    //    std::cout<<GridLogMessage<<"Coarse Grid Preconditioner\nCompleteness in: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
+    
+    _Aggregates.ProjectToSubspace  (Csrc,in);
+    HermOp.AdjOp(Csrc,Ctmp);// Normal equations
+    CG(MdagMOp,Ctmp,Csol);
+    _Aggregates.PromoteFromSubspace(Csol,Qin);
+
+    //    Qin=0;
+    _FineOperator.Op(Qin,tmp);// A Q in
+    tmp = in - tmp;            // in - A Q in
+
+    _FineOperator.Op(tmp,res);// this is the G5 herm bit
+    fCG(fMdagMOp,res,out);    // solves  MdagM = g5 M g5M
+
+    out = out + Qin;
+
+    _FineOperator.Op(out,tmp);
+    tmp=tmp-in;         // tmp is new residual
+
+    std::cout<<GridLogMessage<<"preconditioner thinks residual is "<<std::sqrt(norm2(tmp)/norm2(in))<<std::endl;
+
+  }
+#endif
+
+  void SAP (const FineField & src,FineField & psi){
+
+    Lattice<iScalar<vInteger> > coor(src._grid);
+    Lattice<iScalar<vInteger> > subset(src._grid);
+    
+    FineField r(src._grid);
+    FineField zz(src._grid); zz=zero;
+    FineField vec1(src._grid);
+    FineField vec2(src._grid);
+
+    const Integer block=params.domainsize;
+
+    subset=zero;
+    for(int mu=0;mu<Nd;mu++){
+      LatticeCoordinate(coor,mu+1);
+      coor = div(coor,block);
+      subset = subset+coor;
+    }
+    subset = mod(subset,(Integer)2);
+    
+    ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_SmootherMatrix,0.0);
+    Chebyshev<FineField> Cheby  (params.lo,params.hi,params.order,InverseApproximation);
+
+    RealD resid;
+    for(int i=0;i<params.steps;i++){
+      
+      // Even domain residual
+      _FineOperator.Op(psi,vec1);// this is the G5 herm bit
+      r= src - vec1 ;
+      resid = norm2(r) /norm2(src); 
+      std::cout << "SAP "<<i<<" resid "<<resid<<std::endl;
+
+      // Even domain solve
+      r= where(subset==(Integer)0,r,zz);
+      _SmootherOperator.AdjOp(r,vec1);
+      Cheby(fMdagMOp,vec1,vec2);    // solves  MdagM = g5 M g5M
+      psi = psi + vec2;  
+
+      // Odd domain residual
+      _FineOperator.Op(psi,vec1);// this is the G5 herm bit
+      r= src - vec1 ;
+      r= where(subset==(Integer)1,r,zz);
+
+      resid = norm2(r) /norm2(src); 
+      std::cout << "SAP "<<i<<" resid "<<resid<<std::endl;
+      
+      // Odd domain solve
+      _SmootherOperator.AdjOp(r,vec1);
+      Cheby(fMdagMOp,vec1,vec2);    // solves  MdagM = g5 M g5M
+      psi = psi + vec2;  
+
+      _FineOperator.Op(psi,vec1);// this is the G5 herm bit
+      r= src - vec1 ;
+      resid = norm2(r) /norm2(src); 
+      std::cout << "SAP "<<i<<" resid "<<resid<<std::endl;
+
+    }
+
+  };
+
+  void SmootherTest (const FineField & in){
+    
+    FineField vec1(in._grid);
+    FineField vec2(in._grid);
+    RealD lo[3] = { 0.5, 1.0, 2.0};
+
+    //    MdagMLinearOperator<Matrix,FineField>        fMdagMOp(_FineMatrix);
+    ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_SmootherMatrix,0.0);
+
+    RealD Ni,r;
+
+    Ni = norm2(in);
+
+    for(int ilo=0;ilo<3;ilo++){
+      for(int ord=5;ord<50;ord*=2){
+
+	_SmootherOperator.AdjOp(in,vec1);
+
+	Chebyshev<FineField> Cheby  (lo[ilo],70.0,ord,InverseApproximation);
+	Cheby(fMdagMOp,vec1,vec2);    // solves  MdagM = g5 M g5M
+
+	_FineOperator.Op(vec2,vec1);// this is the G5 herm bit
+	vec1  = in - vec1;   // tmp  = in - A Min
+	r=norm2(vec1);
+	std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<<std::endl;
+
+      }
+    }
+  }
+
+  void operatorCheby(const FineField &in, FineField & out) {
+
+    CoarseVector Csrc(_CoarseOperator.Grid());
+    CoarseVector Ctmp(_CoarseOperator.Grid());
+    CoarseVector Csol(_CoarseOperator.Grid()); Csol=zero;
+
+    ConjugateGradient<CoarseVector>  CG(3.0e-3,100000);
+    //    ConjugateGradient<FineField>    fCG(3.0e-2,1000);
+
+    HermitianLinearOperator<CoarseOperator,CoarseVector>  HermOp(_CoarseOperator);
+    MdagMLinearOperator<CoarseOperator,CoarseVector>     MdagMOp(_CoarseOperator);
+    //    MdagMLinearOperator<Matrix,FineField>        fMdagMOp(_FineMatrix);
+    ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_SmootherMatrix,0.0);
+
+    FineField vec1(in._grid);
+    FineField vec2(in._grid);
+
+    //    Chebyshev<FineField> Cheby    (0.5,70.0,30,InverseApproximation);
+    //    Chebyshev<FineField> ChebyAccu(0.5,70.0,30,InverseApproximation);
+    Chebyshev<FineField> Cheby    (params.lo,params.hi,params.order,InverseApproximation);
+    Chebyshev<FineField> ChebyAccu(params.lo,params.hi,params.order,InverseApproximation);
+    //    Cheby.JacksonSmooth();
+    //    ChebyAccu.JacksonSmooth();
+
+    //    _Aggregates.ProjectToSubspace  (Csrc,in);
+    //    _Aggregates.PromoteFromSubspace(Csrc,out);
+    //    std::cout<<GridLogMessage<<"Completeness: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
+    
+    //    ofstream fout("smoother");
+    //    Cheby.csv(fout);
+
+    // V11 multigrid.
+    // Use a fixed chebyshev and hope hermiticity helps.
+
+    // To make a working smoother for indefinite operator
+    // must multiply by "Mdag" (ouch loses all low mode content)
+    // and apply to poly approx of (mdagm)^-1.
+    // so that we end up with an odd polynomial.
+
+    RealD Ni = norm2(in);
+
+    _SmootherOperator.AdjOp(in,vec1);// this is the G5 herm bit
+    ChebyAccu(fMdagMOp,vec1,out);    // solves  MdagM = g5 M g5M
+
+    std::cout<<GridLogMessage << "Smoother norm "<<norm2(out)<<std::endl;
+
+    // Update with residual for out
+    _FineOperator.Op(out,vec1);// this is the G5 herm bit
+    vec1  = in - vec1;   // tmp  = in - A Min
+
+    RealD r = norm2(vec1);
+
+    std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<< " " << r << " " << Ni <<std::endl;
+    
+    _Aggregates.ProjectToSubspace  (Csrc,vec1);
+    HermOp.AdjOp(Csrc,Ctmp);// Normal equations
+    CG(MdagMOp,Ctmp,Csol);
+    _Aggregates.PromoteFromSubspace(Csol,vec1); // Ass^{-1} [in - A Min]_s
+                                             // Q = Q[in - A Min]  
+    out = out+vec1;
+
+    // Three preconditioner smoothing -- hermitian if C3 = C1
+    // Recompute error
+    _FineOperator.Op(out,vec1);// this is the G5 herm bit
+    vec1  = in - vec1;   // tmp  = in - A Min
+    r=norm2(vec1);
+
+    std::cout<<GridLogMessage << "Coarse resid "<<std::sqrt(r/Ni)<<std::endl;
+
+    // Reapply smoother
+    _SmootherOperator.Op(vec1,vec2);  // this is the G5 herm bit
+    ChebyAccu(fMdagMOp,vec2,vec1);    // solves  MdagM = g5 M g5M
+
+    out =out+vec1;
+    vec1  = in - vec1;   // tmp  = in - A Min
+    r=norm2(vec1);
+    std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<<std::endl;
+
+  }
+
+  void operatorSAP(const FineField &in, FineField & out) {
+
+    CoarseVector Csrc(_CoarseOperator.Grid());
+    CoarseVector Ctmp(_CoarseOperator.Grid());
+    CoarseVector Csol(_CoarseOperator.Grid()); Csol=zero;
+
+    ConjugateGradient<CoarseVector>  CG(1.0e-3,100000);
+
+    HermitianLinearOperator<CoarseOperator,CoarseVector>  HermOp(_CoarseOperator);
+    MdagMLinearOperator<CoarseOperator,CoarseVector>     MdagMOp(_CoarseOperator);
+
+    FineField vec1(in._grid);
+    FineField vec2(in._grid);
+
+    _Aggregates.ProjectToSubspace  (Csrc,in);
+    _Aggregates.PromoteFromSubspace(Csrc,out);
+    std::cout<<GridLogMessage<<"Completeness: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
+    
+
+    // To make a working smoother for indefinite operator
+    // must multiply by "Mdag" (ouch loses all low mode content)
+    // and apply to poly approx of (mdagm)^-1.
+    // so that we end up with an odd polynomial.
+    SAP(in,out);
+
+    // Update with residual for out
+    _FineOperator.Op(out,vec1);// this is the G5 herm bit
+    vec1  = in - vec1;   // tmp  = in - A Min
+
+    RealD r = norm2(vec1);
+    RealD Ni = norm2(in);
+    std::cout<<GridLogMessage << "SAP resid "<<std::sqrt(r/Ni)<< " " << r << " " << Ni <<std::endl;
+    
+    _Aggregates.ProjectToSubspace  (Csrc,vec1);
+    HermOp.AdjOp(Csrc,Ctmp);// Normal equations
+    CG(MdagMOp,Ctmp,Csol);
+    _Aggregates.PromoteFromSubspace(Csol,vec1); // Ass^{-1} [in - A Min]_s
+                                             // Q = Q[in - A Min]  
+    out = out+vec1;
+
+    // Three preconditioner smoothing -- hermitian if C3 = C1
+    // Recompute error
+    _FineOperator.Op(out,vec1);// this is the G5 herm bit
+    vec1  = in - vec1;   // tmp  = in - A Min
+    r=norm2(vec1);
+
+    std::cout<<GridLogMessage << "Coarse resid "<<std::sqrt(r/Ni)<<std::endl;
+
+    // Reapply smoother
+    SAP(vec1,vec2);
+    out =out+vec2;
+
+
+    // Update with residual for out
+    _FineOperator.Op(out,vec1);// this is the G5 herm bit
+    vec1  = in - vec1;   // tmp  = in - A Min
+
+    r = norm2(vec1);
+    Ni = norm2(in);
+    std::cout<<GridLogMessage << "SAP resid(post) "<<std::sqrt(r/Ni)<< " " << r << " " << Ni <<std::endl;
+
+  }
+
+};
+
+#if 0
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  const int Ls=params.Ls;
+
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+
+  ///////////////////////////////////////////////////
+  // Construct a coarsened grid; utility for this?
+  ///////////////////////////////////////////////////
+  std::vector<int> block ({2,2,2,2});
+  const int nbasis= 32;
+
+  std::vector<int> clatt = GridDefaultLatt();
+  for(int d=0;d<clatt.size();d++){
+    clatt[d] = clatt[d]/block[d];
+  }
+  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  std::vector<int> cseeds({5,6,7,8});
+  GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
+  GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
+
+  Gamma g5(Gamma::Algebra::Gamma5);
+
+  LatticeFermion    src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
+  LatticeFermion result(FGrid); result=zero;
+  LatticeFermion    ref(FGrid); ref=zero;
+  LatticeFermion    tmp(FGrid);
+  LatticeFermion    err(FGrid);
+  LatticeGaugeField Umu(UGrid); 
+  LatticeGaugeField UmuDD(UGrid); 
+  LatticeColourMatrix U(UGrid);
+  LatticeColourMatrix zz(UGrid);
+
+  FieldMetaData header;
+  std::string file("./ckpoint_lat.4000");
+  NerscIO::readConfiguration(Umu,header,file);
+
+
+  if ( params.domaindecompose ) { 
+    Lattice<iScalar<vInteger> > coor(UGrid);
+    zz=zero;
+    for(int mu=0;mu<Nd;mu++){
+      LatticeCoordinate(coor,mu);
+      U = PeekIndex<LorentzIndex>(Umu,mu);
+      U = where(mod(coor,params.domainsize)==(Integer)0,zz,U);
+      PokeIndex<LorentzIndex>(UmuDD,U,mu);
+    }
+  } else { 
+    UmuDD = Umu;
+  }
+  //  SU3::ColdConfiguration(RNG4,Umu);
+  //  SU3::TepidConfiguration(RNG4,Umu);
+  //  SU3::HotConfiguration(RNG4,Umu);
+  //  Umu=zero;
+
+  RealD mass=params.mq;
+  RealD M5=1.8;
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  DomainWallFermionR DdwfDD(UmuDD,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+
+  typedef Aggregation<vSpinColourVector,vTComplex,nbasis>              Subspace;
+  typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>          CoarseOperator;
+  typedef CoarseOperator::CoarseVector                                 CoarseVector;
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
+  Subspace Aggregates(Coarse5d,FGrid);
+  //  Aggregates.CreateSubspace(RNG5,HermDefOp,nbasis);
+  assert ( (nbasis & 0x1)==0);
+  int nb=nbasis/2;
+  std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
+  //  Aggregates.CreateSubspace(RNG5,HermDefOp,nb);
+  Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb);
+  for(int n=0;n<nb;n++){
+    G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
+    std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl;
+  }
+  for(int n=0;n<nbasis;n++){
+    std::cout<<GridLogMessage << "vec["<<n<<"] = "<<norm2(Aggregates.subspace[n])  <<std::endl;
+  }
+
+//  for(int i=0;i<nbasis;i++){
+//    result =     Aggregates.subspace[i];
+//    Aggregates.subspace[i]=result+g5*result;
+//  }
+  result=zero;
+  
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
+  Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOpDD(DdwfDD);
+  CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LDOp(*Coarse5d);
+  LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Testing some coarse space solvers  " <<std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  CoarseVector c_src (Coarse5d);
+  CoarseVector c_res (Coarse5d);
+  gaussian(CRNG,c_src);
+  c_res=zero;
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Solving posdef-CG on coarse space "<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
+  ConjugateGradient<CoarseVector> CG(1.0e-6,100000);
+  //  CG(PosdefLdop,c_src,c_res);
+
+  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  //  std::cout<<GridLogMessage << "Solving indef-MCR on coarse space "<< std::endl;
+  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  //  HermitianLinearOperator<CoarseOperator,CoarseVector> HermIndefLdop(LDOp);
+  //  ConjugateResidual<CoarseVector> MCR(1.0e-6,100000);
+  //MCR(HermIndefLdop,c_src,c_res);
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Building deflation preconditioner "<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+
+  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,DomainWallFermionR> Precon  (Aggregates, LDOp,
+											   HermIndefOp,Ddwf,
+											   HermIndefOp,Ddwf);
+
+  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,DomainWallFermionR> PreconDD(Aggregates, LDOp,
+											   HermIndefOp,Ddwf,
+											   HermIndefOpDD,DdwfDD);
+  TrivialPrecon<LatticeFermion> simple;
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Testing smoother efficacy"<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  //  Precon.SmootherTest(src);
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Testing DD smoother efficacy"<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  //  PreconDD.SmootherTest(src);
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Testing SAP smoother efficacy"<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  //  PreconDD.SAP(src,result);
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Unprec CG "<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+
+  //  TrivialPrecon<LatticeFermion> simple;
+  //  ConjugateGradient<LatticeFermion> fCG(1.0e-8,100000);
+  //  fCG(HermDefOp,src,result);
+  //  exit(0);
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Testing GCR on indef matrix "<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  //  PrecGeneralisedConjugateResidual<LatticeFermion> UPGCR(1.0e-8,100000,simple,8,128);
+  //  UPGCR(HermIndefOp,src,result);
+
+  
+  /// Get themax eval
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage <<" Applying power method to find spectral range      "<<std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  Precon.PowerMethod(src);
+
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Building a two level DDPGCR "<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  //  PrecGeneralisedConjugateResidual<LatticeFermion> PGCRDD(1.0e-8,100000,PreconDD,8,128);
+  //  result=zero;
+  //  std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
+  //  PGCRDD(HermIndefOp,src,result);
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Building a two level PGCR "<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  PrecGeneralisedConjugateResidual<LatticeFermion> PGCR(1.0e-8,100000,Precon,8,8);
+  std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
+  result=zero;
+  PGCR(HermIndefOp,src,result);
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Red Black Prec CG "<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
+  ConjugateGradient<LatticeFermion> pCG(1.0e-8,10000);
+
+  LatticeFermion    src_o(FrbGrid);
+  LatticeFermion result_o(FrbGrid);
+  pickCheckerboard(Odd,src_o,src);
+  result_o=zero;
+
+  pCG(HermOpEO,src_o,result_o);
+
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Done "<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  Grid_finalize();
+}
+
+#else
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  params.domaindecompose = 1;
+  params.domainsize= 1;
+  params.order = 1;
+  params.Ls = 1;
+  params.mq = 1;
+  params.lo = 1;
+  params.hi = 1;
+  params.steps = 1;
+
+  const int Ls=params.Ls;
+
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+
+  ///////////////////////////////////////////////////
+  // Construct a coarsened grid; utility for this?
+  ///////////////////////////////////////////////////
+  std::vector<int> block ({4,4,4,4});
+  const int nbasis= 32;
+
+  std::vector<int> clatt = GridDefaultLatt();
+  for(int d=0;d<clatt.size();d++){
+    clatt[d] = clatt[d]/block[d];
+  }
+  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
+
+  std::vector<int> seedsFine({1,2,3,4});
+  std::vector<int> seedsCoarse({5,6,7,8});
+
+  GridParallelRNG          pRNGFine(UGrid);      pRNGFine.SeedFixedIntegers(seedsFine);
+  GridParallelRNG          pRNGCoarse(Coarse4d); pRNGCoarse.SeedFixedIntegers(seedsCoarse);
+
+  Gamma g5(Gamma::Algebra::Gamma5);
+
+  LatticeFermion    src(UGrid); gaussian(pRNGFine,src);// src=src+g5*src;
+  LatticeFermion result(UGrid); result=zero;
+  LatticeFermion    ref(UGrid); ref=zero;
+  LatticeFermion    tmp(UGrid);
+  LatticeFermion    err(UGrid);
+  LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNGFine,Umu);
+  LatticeGaugeField UmuDD(UGrid); 
+  LatticeColourMatrix U(UGrid);
+  LatticeColourMatrix zz(UGrid);
+
+  if ( params.domaindecompose ) { 
+    Lattice<iScalar<vInteger> > coor(UGrid);
+    zz=zero;
+    for(int mu=0;mu<Nd;mu++){
+      LatticeCoordinate(coor,mu);
+      U = PeekIndex<LorentzIndex>(Umu,mu);
+      U = where(mod(coor,params.domainsize)==(Integer)0,zz,U);
+      PokeIndex<LorentzIndex>(UmuDD,U,mu);
+    }
+  } else { 
+    UmuDD = Umu;
+  }
+
+  RealD mass=params.mq;
+  RealD M5=1.8;
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Hello "<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+
+  std::cout << params << std::endl;
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Building the wilson operator" <<std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  
+  WilsonFermionR Dw(Umu,*UGrid,*UrbGrid,mass);
+  WilsonFermionR DwDD(UmuDD,*UGrid,*UrbGrid,mass);
+
+  typedef Aggregation<vSpinColourVector,vTComplex,nbasis>              Subspace;
+  typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>          CoarseOperator;
+  typedef CoarseOperator::CoarseVector                                 CoarseVector;
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+
+  Subspace Aggregates(Coarse4d,UGrid);
+  assert ( (nbasis & 0x1)==0);
+  int nb=nbasis/2;
+  std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
+
+  Aggregates.CreateSubspaceRandom(pRNGFine);
+
+  for(int n=0;n<nb;n++){
+    G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
+    std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl;
+  }
+  for(int n=0;n<nbasis;n++){
+    std::cout<<GridLogMessage << "vec["<<n<<"] = "<<norm2(Aggregates.subspace[n])  <<std::endl;
+  }
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Building coarse representation of Dirac operator" <<std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+
+  CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LDOp(*Coarse4d);
+  // LDOp.CoarsenOperator(UGrid,Dw,Aggregates); // problem with this line
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Testing some coarse space solvers  " <<std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+
+  CoarseVector c_src (Coarse4d);
+  CoarseVector c_res (Coarse4d);
+  gaussian(pRNGCoarse,c_src);
+  c_res=zero;
+
+  Grid_finalize();
+}
+#endif

From d5f661ba709cf1b43181c383ac9a505c6a8d1b62 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 25 Oct 2017 10:38:26 +0200
Subject: [PATCH 004/130] Save intermediate state

---
 lib/algorithms/iterative/MinimalResidual.h | 148 +++++++++++++++++++++
 1 file changed, 148 insertions(+)

diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index 878deb24..a5104e03 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -57,6 +57,18 @@ class MinimalResidual : public OperatorFunction<Field> {
     psi.checkerboard = src.checkerboard; // Check
     conformable(psi, src);
 
+    /////
+    RealD cp, c, a, d, b, ssq, qq, b_pred;
+
+    Field p(src);
+    Field mmp(src);
+    Field r(src);
+
+    // Initial residual computation & set up
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+    /////
+
     Field p {src};
     Field matrixTimesPsi {src};
     Field r {src};
@@ -192,6 +204,142 @@ class MinimalResidual : public OperatorFunction<Field> {
     if (ErrorOnNoConverge) assert(0);
     IterationsToComplete = k;
   }
+
+  //! Minimal-residual (MR) algorithm for a generic Linear Operator
+  /*! \ingroup invert
+   * This subroutine uses the Minimal Residual (MR) algorithm to determine
+   * the solution of the set of linear equations. Here we allow M to be nonhermitian.
+   *
+   *    M . Psi  =  src
+   *
+   * Algorithm:
+   *
+   *  Psi[0]                                      Argument
+   *  r[0]    :=  src  -  M . Psi[0] ;            Initial residual
+   *  IF |r[0]| <= RsdCG |src| THEN RETURN;       Converged?
+   *  FOR k FROM 1 TO MaxCG DO                    MR iterations
+   *      a[k-1]  := <M.r[k-1],r[k-1]> / <M.r[k-1],M.r[k-1]> ;
+   *      ap[k-1] := MRovpar * a[k] ;             Overrelaxtion step
+   *      Psi[k]  += ap[k-1] r[k-1] ;                   New solution vector
+   *      r[k]    -= ap[k-1] A . r[k-1] ;         New residual
+   *      IF |r[k]| <= RsdCG |src| THEN RETURN;   Converged?
+
+   * Arguments:
+
+   *  \param M       Linear Operator             (Read)
+   *  \param src     Source                      (Read)
+   *  \param psi     Solution                    (Modify)
+   *  \param RsdCG   MR residual accuracy        (Read)
+   *  \param MRovpar Overrelaxation parameter    (Read)
+   *  \param MaxIterations   Maximum MR iterations       (Read)
+
+   * Local Variables:
+
+   *  r         Residual vector
+   *  cp        | r[k] |**2
+   *  c         | r[k-1] |**2
+   *  k         MR iteration counter
+   *  a         a[k]
+   *  d         < M.r[k], M.r[k] >
+   *  R_Aux     Temporary for  M.Psi
+   *  Mr        Temporary for  M.r
+
+   * Global Variables:
+
+   *  MaxIterations       Maximum number of MR iterations allowed
+   *  RsdCG       Maximum acceptable MR residual (relative to source)
+   *
+   * Subroutines:
+   *
+   *  M           Apply matrix to vector
+   *
+   * @{
+   */
+
+  // TODO: figure out what isign from chroma is supposed to do
+  void tmpImplFromChroma(LinearOperatorBase<Field> &Linop, const Field &src,
+                  Field &psi) {
+    psi.checkerboard = src.checkerboard;
+    conformable(psi, src);
+
+    Complex a, c;
+    Complex c;
+    RealD d;
+
+    Field Mr(src);
+    Field r(src);
+
+    // Initial residual computation & set up
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD ssq = norm2(src); // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "source squared"
+    RealD rsd_sq = Tolerance * Tolerance * ssq; // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "residual squared"
+
+    /*  r[0]  :=  src - M . Psi[0] */
+    /*  r  :=  M . Psi  */
+    M(Mr, psi, isign); // flopcount.addFlops(M.nFlops());
+
+    r = src - Mr; // flopcount.addSiteFlops(2*Nc*Ns,s);
+
+    RealD cp = norm2(r); /*  Cp = |r[0]|^2 */ /* 2 Nc Ns  flops */ // flopcount.addSiteFlops(4*Nc*Ns, s);
+
+    if (cp <= rsd_sq) { /*  IF |r[0]| <= Tolerance|src| THEN RETURN; */
+      return;
+    }
+
+    std::cout << GridLogIterative << std::setprecision(4)
+              << "MinimalResidual: k=0 residual " << cp << " target " << rsq_sq << std::endl;
+
+    /*  FOR k FROM 1 TO MaxIterations DO */
+    auto k = 0;
+    while( (k < MaxIterations) && (cp > rsd_sq) )
+    {
+      ++k;
+
+      /*  a[k-1] := < M.r[k-1], r[k-1] >/ < M.r[k-1], M.r[k-1] > ; */
+
+      M(Mr, r, isign); /*  Mr = M * r  */  // flopcount.addFlops(M.nFlops());
+
+      c = innerProduct(Mr, r); /*  c = < M.r, r > */ // flopcount.addSiteFlops(4*Nc*Ns,s);
+
+      d = norm2(Mr); /*  d = | M.r | ** 2  */ // flopcount.addSiteFlops(4*Nc*Ns,s);
+
+      a = c / d;  /*  a = c / d */
+
+      a = a * MRovpar; /*  a[k-1] *= MRovpar ; */
+
+
+      psi = psi + r * a;  /*  Psi[k] += a[k-1] r[k-1] ; */ // flopcount.addSiteFlops(4*Nc*Ns,s);
+
+      r = r - Mr * a; /*  r[k] -= a[k-1] M . r[k-1] ; */ // flopcount.addSiteFlops(4*Nc*Ns,s);
+
+      cp = norm2(r); /*  cp  =  | r[k] |**2 */ // flopcount.addSiteFlops(4*Nc*Ns,s);
+
+//    std::cout << "InvMR: k = " << k << "  cp = " << cp << endl;
+    }
+
+    IterationsToComplete = k;
+
+    res.resid   = sqrt(cp);
+    swatch.stop();
+    std::cout << "InvMR: k = " << k << "  cp = " << cp << endl;
+    // flopcount.report("invmr", swatch.getTimeInSeconds());
+
+    // Compute the actual residual
+    {
+      M(Mr, psi, isign);
+      RealD actual_res = norm2(src- Mr);
+      res.resid = sqrt(actual_res);
+    }
+
+    if ( IterationsToComplete == MaxIterations )
+      std::cerr << "Nonconvergence Warning" << endl;
+
+    END_CODE();
+    return res;
+
+  }
 };
 }
 #endif

From 074db32e5484161e70659825da27a86aa270ffd2 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Fri, 27 Oct 2017 14:08:48 +0200
Subject: [PATCH 005/130] Fix build of gmres test

---
 tests/solver/Test_wilson_gmres_unprec.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/solver/Test_wilson_gmres_unprec.cc b/tests/solver/Test_wilson_gmres_unprec.cc
index 7ac20840..f371278c 100644
--- a/tests/solver/Test_wilson_gmres_unprec.cc
+++ b/tests/solver/Test_wilson_gmres_unprec.cc
@@ -52,7 +52,7 @@ int main (int argc, char ** argv)
   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
   std::vector<int> mpi_layout  = GridDefaultMpi();
   GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
-  GridRedBlackCartesian     RBGrid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
 
   std::vector<int> seeds({1,2,3,4});
   GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);

From f61c0b5d03660a20d67ca75bee0b159db8e5dfa5 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Fri, 27 Oct 2017 14:09:02 +0200
Subject: [PATCH 006/130] Very early version of MR solver

---
 lib/algorithms/Algorithms.h                |   1 +
 lib/algorithms/iterative/MinimalResidual.h | 261 ++++++---------------
 2 files changed, 67 insertions(+), 195 deletions(-)

diff --git a/lib/algorithms/Algorithms.h b/lib/algorithms/Algorithms.h
index af83df67..503092db 100644
--- a/lib/algorithms/Algorithms.h
+++ b/lib/algorithms/Algorithms.h
@@ -47,6 +47,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
 #include <Grid/algorithms/iterative/BlockConjugateGradient.h>
 #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
+#include <Grid/algorithms/iterative/MinimalResidual.h>
 #include <Grid/algorithms/iterative/GeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 #include <Grid/algorithms/CoarsenedMatrix.h>
diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index a5104e03..3229b408 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -38,177 +38,24 @@ namespace Grid {
 // single input vec, single output vec.
 /////////////////////////////////////////////////////////////
 
-template <class Field>
-class MinimalResidual : public OperatorFunction<Field> {
+template<class Field> class MinimalResidual : public OperatorFunction<Field> {
  public:
-  bool ErrorOnNoConverge;  // throw an assert when the MR fails to converge.
-                           // Defaults true.
-  RealD Tolerance;
+  bool ErrorOnNoConverge; // throw an assert when the MR fails to converge.
+                          // Defaults true.
+  RealD   Tolerance;
   Integer MaxIterations;
-  Integer IterationsToComplete; //Number of iterations the MR took to finish. Filled in upon completion
-  
+  Integer IterationsToComplete; // Number of iterations the MR took to finish. Filled in upon completion
+
   MinimalResidual(RealD tol, Integer maxit, bool err_on_no_conv = true)
-      : Tolerance(tol),
-        MaxIterations(maxit),
-        ErrorOnNoConverge(err_on_no_conv){};
-
-  void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
-                  Field &psi) {
-    psi.checkerboard = src.checkerboard; // Check
-    conformable(psi, src);
-
-    /////
-    RealD cp, c, a, d, b, ssq, qq, b_pred;
-
-    Field p(src);
-    Field mmp(src);
-    Field r(src);
-
-    // Initial residual computation & set up
-    RealD guess = norm2(psi);
-    assert(std::isnan(guess) == 0);
-    /////
-
-    Field p {src};
-    Field matrixTimesPsi {src};
-    Field r {src};
-
-    RealD alpha {};
-
-    // Initial residual computation & set up
-    RealD guess = norm2(psi);
-    assert(std::isnan(guess) == 0);
-
-    Linop.HermOp(psi, matrixTimesPsi);
-
-    r = src - matrixTimesPsi;
-
-    Linop.HermOp(r, p);
-
-    alpha = innerProduct(p,r) / innerProduct(p,p);
-    psi = psi + alpha * r;
-    r   = r   - alpha * p;
-
-    Linop.HermOp(r, p);
-
-
-////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////
-
-    // RealD cp, c, a, d, b, ssq, qq, b_pred;
-
-    Field p(src);
-    Field matrixTimesPsi(src);
-    // Field r(src);
-
-    // Initial residual computation & set up
-    RealD guess = norm2(psi);
-    assert(std::isnan(guess) == 0);
-
-    
-    Linop.HermOpAndNorm(psi, matrixTimesPsi, d, b);
-    
-
-    r = src - matrixTimesPsi;
-    p = matrixTimesPsi;
-
-    a = norm2(p);
-    cp = a;
-    ssq = norm2(src);
-
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "MinimalResidual: guess " << guess << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "MinimalResidual:   src " << ssq << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "MinimalResidual:    mp " << d << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "MinimalResidual:   matrixTimesPsi " << b << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "MinimalResidual:  cp,r " << cp << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "MinimalResidual:     p " << a << std::endl;
-
-    RealD rsq = Tolerance * Tolerance * ssq;
-
-    // Check if guess is really REALLY good :)
-    if (cp <= rsq) {
-      return;
-    }
-
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "MinimalResidual: k=0 residual " << cp << " target " << rsq
-              << std::endl;
-
-    GridStopWatch LinalgTimer;
-    GridStopWatch MatrixTimer;
-    GridStopWatch SolverTimer;
-
-    SolverTimer.Start();
-    int k;
-    for (k = 1; k <= MaxIterations; k++) {
-      c = cp;
-
-      MatrixTimer.Start();
-      Linop.HermOpAndNorm(p, matrixTimesPsi, d, qq);
-      MatrixTimer.Stop();
-
-      LinalgTimer.Start();
-      //  RealD    qqck = norm2(matrixTimesPsi);
-      //  ComplexD dck  = innerProduct(p,matrixTimesPsi);
-
-      a = c / d;
-      b_pred = a * (a * qq - d) / c;
-
-      cp = axpy_norm(r, -a, matrixTimesPsi, r);
-      b = cp / c;
-
-      // Fuse these loops ; should be really easy
-      psi = a * p + psi;
-      p = p * b + r;
-
-      LinalgTimer.Stop();
-      std::cout << GridLogIterative << "MinimalResidual: Iteration " << k
-                << " residual " << cp << " target " << rsq << std::endl;
-
-      // Stopping condition
-      if (cp <= rsq) {
-        SolverTimer.Stop();
-        Linop.HermOpAndNorm(psi, matrixTimesPsi, d, qq);
-        p = matrixTimesPsi - src;
-
-        RealD matrixTimesPsiNorm = sqrt(norm2(matrixTimesPsi));
-        RealD psinorm = sqrt(norm2(psi));
-        RealD srcnorm = sqrt(norm2(src));
-        RealD resnorm = sqrt(norm2(p));
-        RealD true_residual = resnorm / srcnorm;
-
-        std::cout << GridLogMessage
-                  << "MinimalResidual: Converged on iteration " << k << std::endl;
-        std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
-                  << " true residual " << true_residual << " target "
-                  << Tolerance << std::endl;
-        std::cout << GridLogMessage << "Time elapsed: Iterations "
-                  << SolverTimer.Elapsed() << " Matrix  "
-                  << MatrixTimer.Elapsed() << " Linalg "
-                  << LinalgTimer.Elapsed();
-        std::cout << std::endl;
-
-        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
-	IterationsToComplete = k;	
-        return;
-      }
-    }
-    std::cout << GridLogMessage << "MinimalResidual did NOT converge"
-              << std::endl;
-    if (ErrorOnNoConverge) assert(0);
-    IterationsToComplete = k;
-  }
+    : Tolerance(tol),
+      MaxIterations(maxit),
+      ErrorOnNoConverge(err_on_no_conv){};
 
   //! Minimal-residual (MR) algorithm for a generic Linear Operator
   /*! \ingroup invert
    * This subroutine uses the Minimal Residual (MR) algorithm to determine
-   * the solution of the set of linear equations. Here we allow M to be nonhermitian.
+   * the solution of the set of linear equations. Here we allow M to be
+   nonhermitian.
    *
    *    M . Psi  =  src
    *
@@ -256,15 +103,13 @@ class MinimalResidual : public OperatorFunction<Field> {
    * @{
    */
 
-  // TODO: figure out what isign from chroma is supposed to do
-  void tmpImplFromChroma(LinearOperatorBase<Field> &Linop, const Field &src,
-                  Field &psi) {
+  void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
+
     psi.checkerboard = src.checkerboard;
     conformable(psi, src);
 
     Complex a, c;
-    Complex c;
-    RealD d;
+    RealD   d;
 
     Field Mr(src);
     Field r(src);
@@ -274,72 +119,98 @@ class MinimalResidual : public OperatorFunction<Field> {
     assert(std::isnan(guess) == 0);
 
     RealD ssq = norm2(src); // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "source squared"
-    RealD rsd_sq = Tolerance * Tolerance * ssq; // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "residual squared"
+    RealD rsd_sq = Tolerance * Tolerance * ssq; // flopcount.addSiteFlops(4*Nc*Ns,s); //
+                                     // stands for "residual squared"
 
     /*  r[0]  :=  src - M . Psi[0] */
     /*  r  :=  M . Psi  */
-    M(Mr, psi, isign); // flopcount.addFlops(M.nFlops());
+    // M(Mr, psi, isign); // flopcount.addFlops(M.nFlops());
+    Linop.Op(psi, Mr); // flopcount.addFlops(M.nFlops());
 
     r = src - Mr; // flopcount.addSiteFlops(2*Nc*Ns,s);
 
-    RealD cp = norm2(r); /*  Cp = |r[0]|^2 */ /* 2 Nc Ns  flops */ // flopcount.addSiteFlops(4*Nc*Ns, s);
+    RealD cp = norm2(r);   /*  Cp = |r[0]|^2 */
+      /* 2 Nc Ns  flops */ // flopcount.addSiteFlops(4*Nc*Ns, s);
+    // auto cp = norm2(r); /*  Cp = |r[0]|^2 */ /* 2 Nc Ns  flops */ //
+    // flopcount.addSiteFlops(4*Nc*Ns, s);
 
-    if (cp <= rsd_sq) { /*  IF |r[0]| <= Tolerance|src| THEN RETURN; */
+    if(cp <= rsd_sq) { /*  IF |r[0]| <= Tolerance|src| THEN RETURN; */
       return;
     }
 
     std::cout << GridLogIterative << std::setprecision(4)
-              << "MinimalResidual: k=0 residual " << cp << " target " << rsq_sq << std::endl;
+              << "MinimalResidual: k=0 residual " << cp << " target " << rsd_sq << std::endl;
 
-    /*  FOR k FROM 1 TO MaxIterations DO */
+    GridStopWatch LinalgTimer;
+    GridStopWatch MatrixTimer;
+    GridStopWatch SolverTimer;
+
+    SolverTimer.Start();
     auto k = 0;
-    while( (k < MaxIterations) && (cp > rsd_sq) )
-    {
+    while((k < MaxIterations) && (cp > rsd_sq)) {
       ++k;
 
       /*  a[k-1] := < M.r[k-1], r[k-1] >/ < M.r[k-1], M.r[k-1] > ; */
 
-      M(Mr, r, isign); /*  Mr = M * r  */  // flopcount.addFlops(M.nFlops());
+      MatrixTimer.Start();
+      // M(Mr, r, isign); /*  Mr = M * r  */  // flopcount.addFlops(M.nFlops());
+      Linop.Op(r, Mr); /*  Mr = M * r  */ // flopcount.addFlops(M.nFlops());
+      MatrixTimer.Stop();
+
+      LinalgTimer.Start();
 
       c = innerProduct(Mr, r); /*  c = < M.r, r > */ // flopcount.addSiteFlops(4*Nc*Ns,s);
 
       d = norm2(Mr); /*  d = | M.r | ** 2  */ // flopcount.addSiteFlops(4*Nc*Ns,s);
 
-      a = c / d;  /*  a = c / d */
+      a = c / d;
 
-      a = a * MRovpar; /*  a[k-1] *= MRovpar ; */
+      // a = a * MRovpar; /*  a[k-1] *= MRovpar ; */
 
-
-      psi = psi + r * a;  /*  Psi[k] += a[k-1] r[k-1] ; */ // flopcount.addSiteFlops(4*Nc*Ns,s);
+      psi = psi + r * a; /*  Psi[k] += a[k-1] r[k-1] ; */ // flopcount.addSiteFlops(4*Nc*Ns,s);
 
       r = r - Mr * a; /*  r[k] -= a[k-1] M . r[k-1] ; */ // flopcount.addSiteFlops(4*Nc*Ns,s);
 
       cp = norm2(r); /*  cp  =  | r[k] |**2 */ // flopcount.addSiteFlops(4*Nc*Ns,s);
 
-//    std::cout << "InvMR: k = " << k << "  cp = " << cp << endl;
+      LinalgTimer.Stop();
+
+      std::cout << GridLogIterative << "MinimalResidual: Iteration " << k
+                << " residual " << cp << " target " << rsd_sq << std::endl;
     }
+    SolverTimer.Stop();
 
     IterationsToComplete = k;
 
-    res.resid   = sqrt(cp);
-    swatch.stop();
-    std::cout << "InvMR: k = " << k << "  cp = " << cp << endl;
+    // res.resid   = sqrt(cp);
+    std::cout << "InvMR: k = " << k << "  cp = " << cp << std::endl;
     // flopcount.report("invmr", swatch.getTimeInSeconds());
 
+    std::cout << GridLogMessage << "MinimalResidual Converged on iteration " << k << std::endl;
+    std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
+    // std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
+    // std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
+
+    std::cout << GridLogMessage << "Time breakdown "<<std::endl;
+    std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl;
+    std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
+    std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
+
     // Compute the actual residual
     {
-      M(Mr, psi, isign);
-      RealD actual_res = norm2(src- Mr);
-      res.resid = sqrt(actual_res);
+      // M(Mr, psi, isign);
+      Linop.Op(psi, Mr);
+      Field tmp = src - Mr;
+      // RealD actual_res = norm2(src-Mr);
+      RealD actual_res = norm2(tmp);
+      // res.resid = sqrt(actual_res);
     }
 
-    if ( IterationsToComplete == MaxIterations )
-      std::cerr << "Nonconvergence Warning" << endl;
-
-    END_CODE();
-    return res;
+    if(IterationsToComplete == MaxIterations)
+      std::cerr << "Nonconvergence Warning" << std::endl;
 
+    // return res;
   }
 };
-}
+} // namespace Grid
 #endif

From 2185b0d65120d8f7c8c91ad9259c22d87c49be4f Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Fri, 27 Oct 2017 14:32:38 +0200
Subject: [PATCH 007/130] Correct author in the file

---
 lib/algorithms/iterative/MinimalResidual.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index 3229b408..686db169 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -6,9 +6,7 @@ Source file: ./lib/algorithms/iterative/MinimalResidual.h
 
 Copyright (C) 2015
 
-Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: paboyle <paboyle@ph.ed.ac.uk>
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by

From 15dfa9f6631e80787897512ff2facabc05720fc6 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Fri, 27 Oct 2017 14:33:25 +0200
Subject: [PATCH 008/130] Change stopping criterion implementation in MR solver
 + some cleanup

---
 lib/algorithms/iterative/MinimalResidual.h | 160 +++++++--------------
 1 file changed, 51 insertions(+), 109 deletions(-)

diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index 686db169..87dc5941 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -42,64 +42,11 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
                           // Defaults true.
   RealD   Tolerance;
   Integer MaxIterations;
-  Integer IterationsToComplete; // Number of iterations the MR took to finish. Filled in upon completion
+  Integer IterationsToComplete; // Number of iterations the MR took to finish.
+                                // Filled in upon completion
 
   MinimalResidual(RealD tol, Integer maxit, bool err_on_no_conv = true)
-    : Tolerance(tol),
-      MaxIterations(maxit),
-      ErrorOnNoConverge(err_on_no_conv){};
-
-  //! Minimal-residual (MR) algorithm for a generic Linear Operator
-  /*! \ingroup invert
-   * This subroutine uses the Minimal Residual (MR) algorithm to determine
-   * the solution of the set of linear equations. Here we allow M to be
-   nonhermitian.
-   *
-   *    M . Psi  =  src
-   *
-   * Algorithm:
-   *
-   *  Psi[0]                                      Argument
-   *  r[0]    :=  src  -  M . Psi[0] ;            Initial residual
-   *  IF |r[0]| <= RsdCG |src| THEN RETURN;       Converged?
-   *  FOR k FROM 1 TO MaxCG DO                    MR iterations
-   *      a[k-1]  := <M.r[k-1],r[k-1]> / <M.r[k-1],M.r[k-1]> ;
-   *      ap[k-1] := MRovpar * a[k] ;             Overrelaxtion step
-   *      Psi[k]  += ap[k-1] r[k-1] ;                   New solution vector
-   *      r[k]    -= ap[k-1] A . r[k-1] ;         New residual
-   *      IF |r[k]| <= RsdCG |src| THEN RETURN;   Converged?
-
-   * Arguments:
-
-   *  \param M       Linear Operator             (Read)
-   *  \param src     Source                      (Read)
-   *  \param psi     Solution                    (Modify)
-   *  \param RsdCG   MR residual accuracy        (Read)
-   *  \param MRovpar Overrelaxation parameter    (Read)
-   *  \param MaxIterations   Maximum MR iterations       (Read)
-
-   * Local Variables:
-
-   *  r         Residual vector
-   *  cp        | r[k] |**2
-   *  c         | r[k-1] |**2
-   *  k         MR iteration counter
-   *  a         a[k]
-   *  d         < M.r[k], M.r[k] >
-   *  R_Aux     Temporary for  M.Psi
-   *  Mr        Temporary for  M.r
-
-   * Global Variables:
-
-   *  MaxIterations       Maximum number of MR iterations allowed
-   *  RsdCG       Maximum acceptable MR residual (relative to source)
-   *
-   * Subroutines:
-   *
-   *  M           Apply matrix to vector
-   *
-   * @{
-   */
+    : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){};
 
   void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
 
@@ -117,22 +64,15 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
     assert(std::isnan(guess) == 0);
 
     RealD ssq = norm2(src); // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "source squared"
-    RealD rsd_sq = Tolerance * Tolerance * ssq; // flopcount.addSiteFlops(4*Nc*Ns,s); //
-                                     // stands for "residual squared"
+    RealD rsd_sq = Tolerance * Tolerance * ssq; // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "residual squared"
 
-    /*  r[0]  :=  src - M . Psi[0] */
-    /*  r  :=  M . Psi  */
-    // M(Mr, psi, isign); // flopcount.addFlops(M.nFlops());
     Linop.Op(psi, Mr); // flopcount.addFlops(M.nFlops());
 
     r = src - Mr; // flopcount.addSiteFlops(2*Nc*Ns,s);
 
-    RealD cp = norm2(r);   /*  Cp = |r[0]|^2 */
-      /* 2 Nc Ns  flops */ // flopcount.addSiteFlops(4*Nc*Ns, s);
-    // auto cp = norm2(r); /*  Cp = |r[0]|^2 */ /* 2 Nc Ns  flops */ //
-    // flopcount.addSiteFlops(4*Nc*Ns, s);
+    RealD cp = norm2(r); //  Cp = |r[0]|^2 // 2 Nc Ns  flops // flopcount.addSiteFlops(4*Nc*Ns, s);
 
-    if(cp <= rsd_sq) { /*  IF |r[0]| <= Tolerance|src| THEN RETURN; */
+    if(cp <= rsd_sq) {
       return;
     }
 
@@ -144,70 +84,72 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
     GridStopWatch SolverTimer;
 
     SolverTimer.Start();
-    auto k = 0;
-    while((k < MaxIterations) && (cp > rsd_sq)) {
-      ++k;
-
-      /*  a[k-1] := < M.r[k-1], r[k-1] >/ < M.r[k-1], M.r[k-1] > ; */
+    int k;
+    for(k = 1; k <= MaxIterations; k++) { //  a[k-1] := < M.r[k-1], r[k-1] >/ < M.r[k-1], M.r[k-1] >
 
       MatrixTimer.Start();
-      // M(Mr, r, isign); /*  Mr = M * r  */  // flopcount.addFlops(M.nFlops());
-      Linop.Op(r, Mr); /*  Mr = M * r  */ // flopcount.addFlops(M.nFlops());
+      Linop.Op(r, Mr); //  Mr = M * r // flopcount.addFlops(M.nFlops());
       MatrixTimer.Stop();
 
       LinalgTimer.Start();
 
-      c = innerProduct(Mr, r); /*  c = < M.r, r > */ // flopcount.addSiteFlops(4*Nc*Ns,s);
+      c = innerProduct(Mr, r); //  c = < M.r, r > // // flopcount.addSiteFlops(4*Nc*Ns,s);
 
-      d = norm2(Mr); /*  d = | M.r | ** 2  */ // flopcount.addSiteFlops(4*Nc*Ns,s);
+      d = norm2(Mr); //  d = | M.r | ** 2  // // flopcount.addSiteFlops(4*Nc*Ns,s);
 
       a = c / d;
 
-      // a = a * MRovpar; /*  a[k-1] *= MRovpar ; */
+      // a = a * MRovpar; //  a[k-1] *= MRovpar // from chroma code. TODO: check what to do with this
 
-      psi = psi + r * a; /*  Psi[k] += a[k-1] r[k-1] ; */ // flopcount.addSiteFlops(4*Nc*Ns,s);
+      psi = psi + r * a; //  Psi[k] += a[k-1] r[k-1] ; // flopcount.addSiteFlops(4*Nc*Ns,s);
 
-      r = r - Mr * a; /*  r[k] -= a[k-1] M . r[k-1] ; */ // flopcount.addSiteFlops(4*Nc*Ns,s);
+      r = r - Mr * a; //  r[k] -= a[k-1] M . r[k-1] ; // flopcount.addSiteFlops(4*Nc*Ns,s);
 
-      cp = norm2(r); /*  cp  =  | r[k] |**2 */ // flopcount.addSiteFlops(4*Nc*Ns,s);
+      cp = norm2(r); //  cp  =  | r[k] |**2 // flopcount.addSiteFlops(4*Nc*Ns,s);
 
       LinalgTimer.Stop();
 
       std::cout << GridLogIterative << "MinimalResidual: Iteration " << k
                 << " residual " << cp << " target " << rsd_sq << std::endl;
+      std::cout << GridLogDebug << "a = " << a << " c = " << c << " d = " << d << std::endl;
+
+      // Stopping condition
+      if(cp <= rsd_sq) {
+        SolverTimer.Stop();
+
+        Linop.Op(psi, Mr);
+        r = src - Mr;
+
+        RealD srcnorm       = sqrt(ssq);
+        RealD resnorm       = sqrt(norm2(r));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage << "MinimalResidual Converged on iteration " << k << std::endl;
+        std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq) << std::endl;
+        std::cout << GridLogMessage << "\tTrue residual " << true_residual << std::endl;
+        std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
+
+        std::cout << GridLogMessage << "Time breakdown " << std::endl;
+        std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() << std::endl;
+
+        if(ErrorOnNoConverge)
+          assert(true_residual / Tolerance < 10000.0);
+
+        IterationsToComplete = k;
+
+        return;
+      }
     }
-    SolverTimer.Stop();
+
+    std::cout << GridLogMessage << "MinimalResidual did NOT converge"
+              << std::endl;
+
+    if(ErrorOnNoConverge)
+      assert(0);
 
     IterationsToComplete = k;
-
-    // res.resid   = sqrt(cp);
-    std::cout << "InvMR: k = " << k << "  cp = " << cp << std::endl;
-    // flopcount.report("invmr", swatch.getTimeInSeconds());
-
-    std::cout << GridLogMessage << "MinimalResidual Converged on iteration " << k << std::endl;
-    std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
-    // std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
-    // std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
-
-    std::cout << GridLogMessage << "Time breakdown "<<std::endl;
-    std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl;
-    std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
-    std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
-
-    // Compute the actual residual
-    {
-      // M(Mr, psi, isign);
-      Linop.Op(psi, Mr);
-      Field tmp = src - Mr;
-      // RealD actual_res = norm2(src-Mr);
-      RealD actual_res = norm2(tmp);
-      // res.resid = sqrt(actual_res);
-    }
-
-    if(IterationsToComplete == MaxIterations)
-      std::cerr << "Nonconvergence Warning" << std::endl;
-
-    // return res;
   }
 };
 } // namespace Grid

From 1bad64ac6a3710a22abb7432266f48240c57ff40 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Fri, 27 Oct 2017 14:35:04 +0200
Subject: [PATCH 009/130] Some formatting

---
 lib/algorithms/iterative/MinimalResidual.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index 87dc5941..beda875b 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -72,7 +72,7 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
 
     RealD cp = norm2(r); //  Cp = |r[0]|^2 // 2 Nc Ns  flops // flopcount.addSiteFlops(4*Nc*Ns, s);
 
-    if(cp <= rsd_sq) {
+    if (cp <= rsd_sq) {
       return;
     }
 
@@ -85,7 +85,7 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
 
     SolverTimer.Start();
     int k;
-    for(k = 1; k <= MaxIterations; k++) { //  a[k-1] := < M.r[k-1], r[k-1] >/ < M.r[k-1], M.r[k-1] >
+    for (k = 1; k <= MaxIterations; k++) { //  a[k-1] := < M.r[k-1], r[k-1] >/ < M.r[k-1], M.r[k-1] >
 
       MatrixTimer.Start();
       Linop.Op(r, Mr); //  Mr = M * r // flopcount.addFlops(M.nFlops());
@@ -114,7 +114,7 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
       std::cout << GridLogDebug << "a = " << a << " c = " << c << " d = " << d << std::endl;
 
       // Stopping condition
-      if(cp <= rsd_sq) {
+      if (cp <= rsd_sq) {
         SolverTimer.Stop();
 
         Linop.Op(psi, Mr);
@@ -146,7 +146,7 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
     std::cout << GridLogMessage << "MinimalResidual did NOT converge"
               << std::endl;
 
-    if(ErrorOnNoConverge)
+    if (ErrorOnNoConverge)
       assert(0);
 
     IterationsToComplete = k;

From e7b1933e881293e77e71fa1b26c61f98aafa12ca Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Fri, 27 Oct 2017 14:38:57 +0200
Subject: [PATCH 010/130] Add a test for the MR solver

---
 tests/solver/Test_wilson_mr_unprec.cc | 65 +++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 tests/solver/Test_wilson_mr_unprec.cc

diff --git a/tests/solver/Test_wilson_mr_unprec.cc b/tests/solver/Test_wilson_mr_unprec.cc
new file mode 100644
index 00000000..3d584962
--- /dev/null
+++ b/tests/solver/Test_wilson_mr_unprec.cc
@@ -0,0 +1,65 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/Test_wilson_mr_unprec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeFermion src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  LatticeFermion result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
+  MinimalResidual<LatticeFermion> MR(1.0e-8,10000);
+  MR(HermOp,src,result);
+
+  Grid_finalize();
+}

From 54128d579acf1a3f42fbdc82e8412a9982871d5e Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Fri, 27 Oct 2017 14:45:29 +0200
Subject: [PATCH 011/130] Make MR a bit more verbose

---
 lib/algorithms/iterative/MinimalResidual.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index beda875b..ac4267db 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -72,6 +72,11 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
 
     RealD cp = norm2(r); //  Cp = |r[0]|^2 // 2 Nc Ns  flops // flopcount.addSiteFlops(4*Nc*Ns, s);
 
+    std::cout << GridLogIterative << std::setprecision(4) << "MinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "MinimalResidual:   src " << ssq << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "MinimalResidual:    mp " << d << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "MinimalResidual:  cp,r " << cp << std::endl;
+
     if (cp <= rsd_sq) {
       return;
     }

From 5a477ed29ec1a20158128cffc959cf59f350db5e Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Fri, 27 Oct 2017 14:46:18 +0200
Subject: [PATCH 012/130] Perform minor style correction

---
 lib/algorithms/iterative/MinimalResidual.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index ac4267db..74d4eb53 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -139,7 +139,7 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
         std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() << std::endl;
         std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() << std::endl;
 
-        if(ErrorOnNoConverge)
+        if (ErrorOnNoConverge)
           assert(true_residual / Tolerance < 10000.0);
 
         IterationsToComplete = k;

From 89bacb04708e9cfbc1217a636fba6316bf254901 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 30 Oct 2017 16:16:40 +0100
Subject: [PATCH 013/130] Fix path in MR solver header commentary

---
 tests/solver/Test_wilson_mr_unprec.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/solver/Test_wilson_mr_unprec.cc b/tests/solver/Test_wilson_mr_unprec.cc
index 3d584962..be88d6f8 100644
--- a/tests/solver/Test_wilson_mr_unprec.cc
+++ b/tests/solver/Test_wilson_mr_unprec.cc
@@ -2,7 +2,7 @@
 
 Grid physics library, www.github.com/paboyle/Grid
 
-Source file: ./tests/Test_wilson_mr_unprec.cc
+Source file: ./tests/solver/Test_wilson_mr_unprec.cc
 
 Copyright (C) 2015
 

From a2d83d4f3d605a648758607aa8c5a35438806325 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 30 Oct 2017 16:17:25 +0100
Subject: [PATCH 014/130] Add test for the MR solver with DW fermions; does not
 converge atm

TODO: Is this a property of DWF or did I do something wrong?
---
 tests/solver/Test_dwf_mr_unprec.cc | 68 ++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 tests/solver/Test_dwf_mr_unprec.cc

diff --git a/tests/solver/Test_dwf_mr_unprec.cc b/tests/solver/Test_dwf_mr_unprec.cc
new file mode 100644
index 00000000..a7c7733b
--- /dev/null
+++ b/tests/solver/Test_dwf_mr_unprec.cc
@@ -0,0 +1,68 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_dwf_mr_unprec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  const int Ls=8;
+
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+
+  LatticeFermion    src(FGrid); random(RNG5,src);
+  LatticeFermion result(FGrid); result=zero;
+  LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
+
+  std::vector<LatticeColourMatrix> U(4,UGrid);
+  for(int mu=0;mu<Nd;mu++){
+    U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
+  }
+
+  RealD mass=0.1;
+  RealD M5=1.8;
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+
+  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
+  MinimalResidual<LatticeFermion> MR(1.0e-6,10000);
+  MR(HermOp,src,result);
+
+  Grid_finalize();
+}

From 6f81906b0010e16bd678f2a3284e87c55eb1b456 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 30 Oct 2017 16:57:55 +0100
Subject: [PATCH 015/130] Add test for the MR solver with staggered fermions;
 does not converge atm

TODO: Is this a property of staggered or did I do something wrong?
---
 tests/solver/Test_staggered_mr_unprec.cc | 69 ++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 tests/solver/Test_staggered_mr_unprec.cc

diff --git a/tests/solver/Test_staggered_mr_unprec.cc b/tests/solver/Test_staggered_mr_unprec.cc
new file mode 100644
index 00000000..22210329
--- /dev/null
+++ b/tests/solver/Test_staggered_mr_unprec.cc
@@ -0,0 +1,69 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_staggered_mr_unprec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
+  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
+  typename ImprovedStaggeredFermionR::ImplParams params;
+
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.1;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
+  MinimalResidual<FermionField> MR(1.0e-6,10000);
+  MR(HermOp,src,result);
+
+  Grid_finalize();
+}

From 74af31564faece34af71aae40a8554f6047530b0 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 6 Nov 2017 14:06:45 +0100
Subject: [PATCH 016/130] Adapt style of wilson GMRES test to style of wilson
 MR test

---
 tests/solver/Test_wilson_gmres_unprec.cc | 51 +++++++++---------------
 1 file changed, 19 insertions(+), 32 deletions(-)

diff --git a/tests/solver/Test_wilson_gmres_unprec.cc b/tests/solver/Test_wilson_gmres_unprec.cc
index f371278c..c3c27f9e 100644
--- a/tests/solver/Test_wilson_gmres_unprec.cc
+++ b/tests/solver/Test_wilson_gmres_unprec.cc
@@ -1,49 +1,36 @@
 /*************************************************************************************
 
-    Grid physics library, www.github.com/paboyle/Grid 
+Grid physics library, www.github.com/paboyle/Grid
 
-    Source file: ./tests/Test_wilson_cg_unprec.cc
+Source file: ./tests/solver/Test_wilson_gmres_unprec.cc
 
-    Copyright (C) 2015
+Copyright (C) 2015
 
-Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
 
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
 
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
-    See the full license in the file "LICENSE" in the top level distribution directory
+See the full license in the file "LICENSE" in the top level distribution
+directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 
-using namespace std;
 using namespace Grid;
 using namespace Grid::QCD;
 
-template<class d>
-struct scal {
-  d internal;
-};
-
-  Gamma::Algebra Gmu [] = {
-    Gamma::Algebra::GammaX,
-    Gamma::Algebra::GammaY,
-    Gamma::Algebra::GammaZ,
-    Gamma::Algebra::GammaT
-  };
-
 int main (int argc, char ** argv)
 {
   Grid_init(&argc,&argv);
@@ -65,8 +52,8 @@ int main (int argc, char ** argv)
   double volume=1;
   for(int mu=0;mu<Nd;mu++){
     volume=volume*latt_size[mu];
-  }  
-  
+  }
+
   RealD mass=0.5;
   WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 

From 8363edfcdb9cbab6e9812a321ae6a19e2940adfc Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 6 Nov 2017 16:17:44 +0100
Subject: [PATCH 017/130] Perform some minor changes to GMRES code

---
 .../iterative/GeneralisedMinimalResidual.h    | 107 ++++++++++++------
 tests/solver/Test_wilson_gmres_unprec.cc      |   2 +-
 2 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 453071c7..ee5445f1 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -55,13 +55,15 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
                           // defaults to True.
   RealD   Tolerance;
   Integer MaxIterations;
+  Integer RestartLength;
   Integer IterationsToComplete; // Number of iterations the GMRES took to
                                 // finish. Filled in upon completion
 
   GeneralisedMinimalResidual(RealD   tol,
                              Integer maxit,
+                             Integer restart_length,
                              bool    err_on_no_conv = true)
-    : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){};
+    : Tolerance(tol), MaxIterations(maxit), RestartLength(restart_length), ErrorOnNoConverge(err_on_no_conv){};
 
   // want to solve Ax = b -> A = LinOp, psi = x, b = src
 
@@ -168,9 +170,12 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
   /*   std::cout << norm2(tmp) << " " << norm2(tmp) / gamma0 << std::endl; */
   /* } */
 
-  void
-  operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
-    std::cout << "GMRES: Start of operator()" << std::endl;
+  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
+
+    std::cout << GridLogIterative << "GMRES: Start of operator()" << std::endl;
+
+    psi.checkerboard = src.checkerboard;
+    conformable(psi, src);
 
     int m = MaxIterations;
 
@@ -180,25 +185,50 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     Field Dv(src);
 
     std::vector<Field> v(m + 1, src);
-    Eigen::MatrixXcd   H = Eigen::MatrixXcd::Zero(m + 1, m);
+
+    Eigen::MatrixXcd H = Eigen::MatrixXcd::Zero(m + 1, m);
 
     std::vector<std::complex<double>> y(m + 1, 0.);
     std::vector<std::complex<double>> gamma(m + 1, 0.);
     std::vector<std::complex<double>> c(m + 1, 0.);
     std::vector<std::complex<double>> s(m + 1, 0.);
 
+    // Initial residual computation & set up
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD ssq = norm2(src); // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "source squared"
+    RealD rsd_sq = Tolerance * Tolerance * ssq; // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "residual squared"
+
     LinOp.Op(psi, Dpsi);
     r = src - Dpsi;
 
-    RealD beta = norm2(r);
-    gamma[0]  = beta;
+    RealD cp = norm2(r); // cp = beta in DD-αAMG nomenclature
+    gamma[0]  = cp;
 
-    std::cout << "beta " << beta << std::endl;
+    std::cout << GridLogIterative << "cp " << cp << std::endl;
 
-    v[0] = (1. / beta) * r;
+    v[0] = (1. / cp) * r;
 
-    // Begin iterating
+    std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual:   src " << ssq << std::endl;
+    // std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual:    mp " << d << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual:  cp,r " << cp << std::endl;
+
+    if (cp <= rsd_sq) {
+      return;
+    }
+
+    std::cout << GridLogIterative << std::setprecision(4)
+              << "GeneralizedMinimalResidual: k=0 residual " << cp << " target " << rsd_sq << std::endl;
+
+    GridStopWatch SolverTimer;
+
+    SolverTimer.Start();
     for(auto j = 0; j < m; ++j) {
+
+      // std::cout << GridLogIterative << "GeneralizedMinimalResidual: Start of outer loop with index j = " << j << std::endl;
+
       LinOp.Op(v[j], Dv);
       w = Dv;
 
@@ -222,9 +252,9 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
       ComplexD nu = sqrt(std::norm(H(j, j)) + std::norm(H(j + 1, j)));
       c[j]        = H(j, j) / nu;
       s[j]        = H(j + 1, j) / nu;
-      std::cout << "nu" << nu << std::endl;
-      std::cout << "H("<<j<<","<<j<<")" << H(j,j) << std::endl;
-      std::cout << "H("<<j+1<<","<<j<<")" << H(j+1,j) << std::endl;
+      std::cout << GridLogIterative << "GeneralizedMinimalResidual: nu" << nu << std::endl;
+      std::cout << GridLogIterative << "GeneralizedMinimalResidual: H("<<j<<","<<j<<")" << H(j,j) << std::endl;
+      std::cout << GridLogIterative << "GeneralizedMinimalResidual: H("<<j+1<<","<<j<<")" << H(j+1,j) << std::endl;
 
       // apply new Givens rotation
       H(j, j)     = nu;
@@ -235,13 +265,26 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
       gamma[j]     = std::conj(c[j]) * gamma[j];
 
       /* for(auto k = 0; k <= j+1 ; ++k) */
-      /*   std::cout << "k " << k << "nu " << nu << " c["<<k<<"]" << c[k]<< " s["<<k<<"]" << s[k] << " gamma["<<k<<"]" << gamma[k] << std::endl; */
+      /*   std::cout << GridLogIterative << "k " << k << "nu " << nu << " c["<<k<<"]" << c[k]<< " s["<<k<<"]" << s[k] << " gamma["<<k<<"]" << gamma[k] << std::endl; */
 
       std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration "
                 << j << " residual " << std::abs(gamma[j + 1]) << std::endl; //" target "
                 /* << TargetResSq << std::endl; */
-      if(std::abs(gamma[j + 1]) / sqrt(beta) < Tolerance) {
+      if(std::abs(gamma[j + 1]) / sqrt(cp) < Tolerance) {
+        SolverTimer.Stop();
+
+        std::cout << GridLogMessage << "GeneralizedMinimalResidual Converged on iteration " << j << std::endl;
+        // std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq) << std::endl;
+        // std::cout << GridLogMessage << "\tTrue residual " << true_residual << std::endl;
+        std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
+
+        std::cout << GridLogMessage << "Time breakdown " << std::endl;
+        std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() << std::endl;
+        // std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() << std::endl;
+        // std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() << std::endl;
+
         IterationsToComplete = j;
+
         break;
       }
     }
@@ -249,7 +292,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     // backward substitution
     computeSolution(y, gamma, H, v, psi, IterationsToComplete);
 
-    std::cout << "GMRES: End of operator()" << std::endl;
+    std::cout << GridLogIterative << "GeneralizedMinimalResidual: End of operator()" << std::endl;
   }
 
   private:
@@ -258,37 +301,37 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
   /*                std::vector<std::complex<double>> &s, */
   /*                Eigen::MatrixXcd &                 H, */
   /*                int                                j) { */
-  /*    ComplexD beta{}; */
+  /*    ComplexD cp{}; */
   /*    // update QR factorization */
   /*    // apply previous Givens rotation */
   /*    for(auto i = 0; i < j; i++) { */
-  /*      beta = -s[i] * H(i, j) + c[i] * H(i + 1, j); */
+  /*      cp = -s[i] * H(i, j) + c[i] * H(i + 1, j); */
   /*      H(i, j)     = std::conj(c[i]) * H(i, j) + std::conj(s[i]) * H(i + 1,
    * j); */
-  /*      H(i + 1, j) = beta; */
+  /*      H(i + 1, j) = cp; */
   /*    } */
 
   /*    // compute current Givens rotation */
-  /*    beta = sqrt(std::norm(H(j, j)) + std::norm(H(j + 1, j))); */
-  /*    s[j] = H(j + 1, j) / beta; */
-  /*    c[j] = H(j, j) / beta; */
-  /*    /\* std::cout << "beta= " << beta << std::endl; *\/ */
-  /*    /\* std::cout << "s[j]= " << s[ j ] << std::endl; *\/ */
-  /*    /\* std::cout << "c[j]= " << c[ j ] << std::endl; *\/ */
+  /*    cp = sqrt(std::norm(H(j, j)) + std::norm(H(j + 1, j))); */
+  /*    s[j] = H(j + 1, j) / cp; */
+  /*    c[j] = H(j, j) / cp; */
+  /*    /\* std::cout << GridLogIterative << "cp= " << cp << std::endl; *\/ */
+  /*    /\* std::cout << GridLogIterative << "s[j]= " << s[ j ] << std::endl; *\/ */
+  /*    /\* std::cout << GridLogIterative << "c[j]= " << c[ j ] << std::endl; *\/ */
 
-  /*    /\* std::cout << "gamma[j+1]= " << gamma[ j + 1 ] << std::endl; *\/ */
-  /*    /\* std::cout << "gamma[j]= " << gamma[ j ] << std::endl; *\/ */
+  /*    /\* std::cout << GridLogIterative << "gamma[j+1]= " << gamma[ j + 1 ] << std::endl; *\/ */
+  /*    /\* std::cout << GridLogIterative << "gamma[j]= " << gamma[ j ] << std::endl; *\/ */
   /*    // update right column */
   /*    gamma[j + 1] = -s[j] * gamma[j]; */
   /*    gamma[j]     = std::conj(c[j]) * gamma[j]; */
-  /*    /\* std::cout << "gamma[j+1]= " << gamma[ j + 1 ] << std::endl; *\/ */
-  /*    /\* std::cout << "gamma[j]= " << gamma[ j ] << std::endl; *\/ */
+  /*    /\* std::cout << GridLogIterative << "gamma[j+1]= " << gamma[ j + 1 ] << std::endl; *\/ */
+  /*    /\* std::cout << GridLogIterative << "gamma[j]= " << gamma[ j ] << std::endl; *\/ */
 
   /*    // apply current Givens rotation */
-  /*    H(j, j)     = beta; */
+  /*    H(j, j)     = cp; */
   /*    H(j + 1, j) = 0.; */
-  /*    /\* std::cout << "H(j,j)= " << H( j, j ) << std::endl; *\/ */
-  /*    /\* std::cout << "H(j+1,j)= " << H( j + 1, j ) << std::endl; *\/ */
+  /*    /\* std::cout << GridLogIterative << "H(j,j)= " << H( j, j ) << std::endl; *\/ */
+  /*    /\* std::cout << GridLogIterative << "H(j+1,j)= " << H( j + 1, j ) << std::endl; *\/ */
   /*  } */
 
   void computeSolution(std::vector<std::complex<double>> &      y,
diff --git a/tests/solver/Test_wilson_gmres_unprec.cc b/tests/solver/Test_wilson_gmres_unprec.cc
index c3c27f9e..f43a7737 100644
--- a/tests/solver/Test_wilson_gmres_unprec.cc
+++ b/tests/solver/Test_wilson_gmres_unprec.cc
@@ -58,7 +58,7 @@ int main (int argc, char ** argv)
   WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 
   MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
-  GeneralisedMinimalResidual<LatticeFermion> GMRES(1.0e-8,10000);
+  GeneralisedMinimalResidual<LatticeFermion> GMRES(1.0e-8,10000, 1);
   GMRES(HermOp,src,result);
 
   Grid_finalize();

From 9e3c187a4d973d117e0a40130b4898485074a272 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 6 Nov 2017 17:05:25 +0100
Subject: [PATCH 018/130] Save current state

---
 .../iterative/GeneralisedMinimalResidual.h    | 193 +++++++++++++++++-
 1 file changed, 191 insertions(+), 2 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index ee5445f1..1a55d9ad 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -203,7 +203,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     LinOp.Op(psi, Dpsi);
     r = src - Dpsi;
 
-    RealD cp = norm2(r); // cp = beta in DD-αAMG nomenclature
+    RealD cp = norm2(r); // cp = beta in WMG nomenclature, in WMG there is no norm2 but a sqrt(norm2) here
     gamma[0]  = cp;
 
     std::cout << GridLogIterative << "cp " << cp << std::endl;
@@ -223,13 +223,17 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
               << "GeneralizedMinimalResidual: k=0 residual " << cp << " target " << rsd_sq << std::endl;
 
     GridStopWatch SolverTimer;
+    GridStopWatch MatrixTimer;
 
     SolverTimer.Start();
     for(auto j = 0; j < m; ++j) {
 
       // std::cout << GridLogIterative << "GeneralizedMinimalResidual: Start of outer loop with index j = " << j << std::endl;
 
+      MatrixTimer.Start();
       LinOp.Op(v[j], Dv);
+      MatrixTimer.Stop();
+
       w = Dv;
 
       for(auto i = 0; i <= j; ++i) {
@@ -280,7 +284,192 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
 
         std::cout << GridLogMessage << "Time breakdown " << std::endl;
         std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() << std::endl;
-        // std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() << std::endl;
+        // std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() << std::endl;
+
+        IterationsToComplete = j;
+
+        break;
+      }
+    }
+
+    // backward substitution
+    computeSolution(y, gamma, H, v, psi, IterationsToComplete);
+
+    std::cout << GridLogIterative << "GeneralizedMinimalResidual: End of operator()" << std::endl;
+  }
+
+  void alternativeOperatorImplementation()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
+
+    psi.checkerboard = src.checkerboard;
+    psi(conformable, src);
+
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD cp;
+    RealD ssq    = norm2(src);
+    RealD rsd_sq = Tolerance * Tolerance * ssq;
+
+    Field r(src._grid);
+    Field Dpsi(src._grid);
+
+    PrecTimer.Reset();
+    MatTimer.Reset();
+    LinalgTimer.Reset();
+
+    GridStopWatch SolverTimer;
+    SolverTimer.Start();
+
+    int iterations = 0;
+    for (int k=0; k<MaxIterations; k++) {
+
+      cp = outerLoopBody();
+
+      // Stopping condition
+      if (cp <= rsd_sq) {
+
+        SolverTimer.Stop();
+
+        Linop.Op(psi, Dpsi); // maybe can improve these two lines
+        r = src - Dpsi;      // by technique used in VPGCR
+
+        RealD srcnorm       = sqrt(ssq);
+        RealD resnorm       = sqrt(norm2(r));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage << "GeneralizedMinimalResidual: Converged on iteration " << k              << std::endl;
+        std::cout << GridLogMessage << "\tComputed residual "                                << sqrt(cp / ssq) << std::endl;
+        std::cout << GridLogMessage << "\tTrue residual "                                    << true_residual  << std::endl;
+        std::cout << GridLogMessage << "\tTarget "                                           << Tolerance      << std::endl;
+
+        std::cout << GridLogMessage << "GeneralizedMinimalResidual Time breakdown" << std::endl;
+        std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()       << std::endl;
+        std::cout << GridLogMessage << "\tPrecon "  << PrecTimer.Elapsed()         << std::endl;
+        std::cout << GridLogMessage << "\tMatrix "  << MatTimer.Elapsed()          << std::endl;
+        std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()       << std::endl;
+        return;
+      }
+    }
+
+    std::cout << GridLogMessage << "GeneralizedMinimalResidual did NOT converge" << std::endl;
+
+    if (ErrorOnNoConverge)
+      assert(0);
+  }
+
+  RealD outerLoopBody() {
+  }
+
+  void Step() {
+
+    int m = MaxIterations;
+
+    Field r(src);
+    Field w(src);
+    Field Dpsi(src);
+    Field Dv(src);
+    std::vector<Field> v(m + 1, src);
+
+    Eigen::MatrixXcd H = Eigen::MatrixXcd::Zero(m + 1, m);
+
+    std::vector<std::complex<double>> y(m + 1, 0.);
+    std::vector<std::complex<double>> gamma(m + 1, 0.);
+    std::vector<std::complex<double>> c(m + 1, 0.);
+    std::vector<std::complex<double>> s(m + 1, 0.);
+
+    // Initial residual computation & set up
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD ssq = norm2(src); // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "source squared"
+    RealD rsd_sq = Tolerance * Tolerance * ssq; // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "residual squared"
+
+    LinOp.Op(psi, Dpsi);
+    r = src - Dpsi;
+
+    RealD cp = norm2(r); // cp = beta in WMG nomenclature, in WMG there is no norm2 but a sqrt(norm2) here
+    gamma[0]  = cp;
+
+    std::cout << GridLogIterative << "cp " << cp << std::endl;
+
+    v[0] = (1. / cp) * r;
+
+    std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual:   src " << ssq << std::endl;
+    // std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual:    mp " << d << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual:  cp,r " << cp << std::endl;
+
+    if (cp <= rsd_sq) {
+      return;
+    }
+
+    std::cout << GridLogIterative << std::setprecision(4)
+              << "GeneralizedMinimalResidual: k=0 residual " << cp << " target " << rsd_sq << std::endl;
+
+    GridStopWatch SolverTimer;
+    GridStopWatch MatrixTimer;
+
+    SolverTimer.Start();
+    for(auto j = 0; j < m; ++j) {
+
+      // std::cout << GridLogIterative << "GeneralizedMinimalResidual: Start of outer loop with index j = " << j << std::endl;
+
+      MatrixTimer.Start();
+      LinOp.Op(v[j], Dv);
+      MatrixTimer.Stop();
+
+      w = Dv;
+
+      for(auto i = 0; i <= j; ++i) {
+        H(i, j) = innerProduct(v[i], w);
+        w = w - H(i, j) * v[i];
+      }
+
+      H(j + 1, j) = norm2(w);
+      v[j + 1] = (1. / H(j + 1, j)) * w;
+
+      // end of arnoldi process, begin of givens rotations
+      // apply old Givens rotation
+      for(auto i = 0; i < j ; ++i) {
+        auto tmp = -s[i] * H(i, j) + c[i] * H(i + 1, j);
+        H(i, j)     = std::conj(c[i]) * H(i, j) + std::conj(s[i]) * H(i + 1, j);
+        H(i + 1, j) = tmp;
+      }
+
+      // compute new Givens Rotation
+      ComplexD nu = sqrt(std::norm(H(j, j)) + std::norm(H(j + 1, j)));
+      c[j]        = H(j, j) / nu;
+      s[j]        = H(j + 1, j) / nu;
+      std::cout << GridLogIterative << "GeneralizedMinimalResidual: nu" << nu << std::endl;
+      std::cout << GridLogIterative << "GeneralizedMinimalResidual: H("<<j<<","<<j<<")" << H(j,j) << std::endl;
+      std::cout << GridLogIterative << "GeneralizedMinimalResidual: H("<<j+1<<","<<j<<")" << H(j+1,j) << std::endl;
+
+      // apply new Givens rotation
+      H(j, j)     = nu;
+      H(j + 1, j) = 0.;
+
+      /* ORDERING??? */
+      gamma[j + 1] = -s[j] * gamma[j];
+      gamma[j]     = std::conj(c[j]) * gamma[j];
+
+      /* for(auto k = 0; k <= j+1 ; ++k) */
+      /*   std::cout << GridLogIterative << "k " << k << "nu " << nu << " c["<<k<<"]" << c[k]<< " s["<<k<<"]" << s[k] << " gamma["<<k<<"]" << gamma[k] << std::endl; */
+
+      std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration "
+                << j << " residual " << std::abs(gamma[j + 1]) << std::endl; //" target "
+                /* << TargetResSq << std::endl; */
+      if(std::abs(gamma[j + 1]) / sqrt(cp) < Tolerance) {
+        SolverTimer.Stop();
+
+        std::cout << GridLogMessage << "GeneralizedMinimalResidual Converged on iteration " << j << std::endl;
+        // std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq) << std::endl;
+        // std::cout << GridLogMessage << "\tTrue residual " << true_residual << std::endl;
+        std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
+
+        std::cout << GridLogMessage << "Time breakdown " << std::endl;
+        std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() << std::endl;
         // std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() << std::endl;
 
         IterationsToComplete = j;

From b3be9195b4b5227be777e6bbd0efb284b314c9ad Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 6 Nov 2017 17:12:23 +0100
Subject: [PATCH 019/130] Save one lattice fermion in GMRES code

---
 lib/algorithms/iterative/GeneralisedMinimalResidual.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 1a55d9ad..aa5c0d68 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -312,7 +312,6 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     RealD rsd_sq = Tolerance * Tolerance * ssq;
 
     Field r(src._grid);
-    Field Dpsi(src._grid);
 
     PrecTimer.Reset();
     MatTimer.Reset();
@@ -331,8 +330,8 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
 
         SolverTimer.Stop();
 
-        Linop.Op(psi, Dpsi); // maybe can improve these two lines
-        r = src - Dpsi;      // by technique used in VPGCR
+        Linop.Op(psi,r);
+        axpy(r,-1.0,src,r);
 
         RealD srcnorm       = sqrt(ssq);
         RealD resnorm       = sqrt(norm2(r));

From fc7d07ade0f34be5583505f40f90f5ae53d9f714 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 6 Nov 2017 17:12:38 +0100
Subject: [PATCH 020/130] Correct function signature of body of GMRES outer
 loop

---
 lib/algorithms/iterative/GeneralisedMinimalResidual.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index aa5c0d68..931af3cb 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -323,7 +323,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     int iterations = 0;
     for (int k=0; k<MaxIterations; k++) {
 
-      cp = outerLoopBody();
+      cp = outerLoopBody(Linop, src, psi, rsd_sq);
 
       // Stopping condition
       if (cp <= rsd_sq) {
@@ -357,7 +357,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
       assert(0);
   }
 
-  RealD outerLoopBody() {
+  RealD outerLoopBody(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi, RealD rsd_sq) {
   }
 
   void Step() {

From 8c579d2d4a0b58426e8754bee4e1dd6c95fb3e1b Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 6 Nov 2017 18:09:48 +0100
Subject: [PATCH 021/130] Save current state

---
 .../iterative/GeneralisedMinimalResidual.h    | 80 +++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 931af3cb..d53d05db 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -358,6 +358,86 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
   }
 
   RealD outerLoopBody(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi, RealD rsd_sq) {
+
+    Field w(src._grid);
+    Field r(src._grid);
+
+    std::vector<Field> v(whatDoWePutHere, src._grid); // in MG code: m + 1
+
+    std::vector<std::complex<double>> gamma(whatDoWePutHere, 0.); // in MG code: m + 1
+
+    MatrixTimer.Start();
+    Linop.Op(psi, w); // w = D * psi
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    r = src - w;
+
+    gamma[0] = norm2(r); // do we need an explicit cast? // in MG code: sqrt around/within the norm
+
+    v[0] = (1. / gamma[0]) * r;
+    LinalgTimer.Stop();
+
+    for (int i=0; i<whatDoWePutHere; i++) { // in MG code: p->restart_length
+
+      arnoldiStep(Linop, v, w, whatDoWePutHere); // in MG code: j
+
+      ///////////////////////////////////////////////////////////////////////
+      // Begin of QR Update /////////////////////////////////////////////////
+      ///////////////////////////////////////////////////////////////////////
+
+      qrUpdate(whatDoWePutHere); // in MG code: j
+
+      ///////////////////////////////////////////////////////////////////////
+      // End of QR Update ///////////////////////////////////////////////////
+      ///////////////////////////////////////////////////////////////////////
+
+      if ((whatDoWePutHere) || (cp < rsd_sq)) { // in VPGCR code: (k == nstep-1)
+
+        // compute solution
+
+        return cp;
+      }
+    }
+  }
+
+  void arnoldiStep(LinearOperatorBase<Field> &Linop, std::vector<Field> &v, Field &w, int iter) {
+
+    MatrixTimer.Start();
+    Linop.Op(v[iter], w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    for(int i = 0; i <= iter; ++i) {
+      H(i, j) = innerProduct(v[i], w);
+      w = w - H(i, iter) * v[i];
+    }
+
+    H(iter + 1, iter) = norm2(w); // in MG code: sqrt around/within the norm
+    v[iter + 1] = (1. / H(iter + 1, iter)) * w;
+    LinalgTimer.Stop();
+  }
+
+  void qrUpdate(int iter) {
+
+    for(int i = 0; i < iter ; ++i) {
+      auto tmp = -s[i] * H(i, iter) + c[i] * H(i + 1, iter);
+      H(i, iter)     = std::conj(c[i]) * H(i, iter) + std::conj(s[i]) * H(i + 1, iter);
+      H(i + 1, iter) = tmp;
+    }
+
+    // compute new Givens Rotation
+    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter + 1, iter)));
+    c[iter]        = H(iter, iter) / nu;
+    s[iter]        = H(iter + 1, iter) / nu;
+
+    // apply new Givens rotation
+    H(iter, iter)     = nu;
+    H(iter + 1, iter) = 0.;
+
+    /* ORDERING??? */
+    gamma[iter + 1] = -s[iter] * gamma[iter];
+    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
   }
 
   void Step() {

From e1f928398d5c601d1ccfc2affd404c35e79ba41a Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 7 Nov 2017 10:22:41 +0100
Subject: [PATCH 022/130] Save current state

---
 .../iterative/GeneralisedMinimalResidual.h    | 39 +++++++++++--------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index d53d05db..da728619 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -2,11 +2,11 @@
 
 Grid physics library, www.github.com/paboyle/Grid
 
-Source file: lib/algorithms/iterative/GeneralisedMinimalResidual.h
+Source file: ./lib/algorithms/iterative/GeneralisedMinimalResidual.h
 
 Copyright (C) 2015
-Copyright (C) 2016
 
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -58,6 +58,9 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
   Integer RestartLength;
   Integer IterationsToComplete; // Number of iterations the GMRES took to
                                 // finish. Filled in upon completion
+  GridStopWatch MatrixTimer;
+  GridStopWatch PrecTimer;
+  GridStopWatch LinalgTimer;
 
   GeneralisedMinimalResidual(RealD   tol,
                              Integer maxit,
@@ -299,7 +302,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     std::cout << GridLogIterative << "GeneralizedMinimalResidual: End of operator()" << std::endl;
   }
 
-  void alternativeOperatorImplementation()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
+  void alternativeOperatorImplementation(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
 
     psi.checkerboard = src.checkerboard;
     psi(conformable, src);
@@ -314,7 +317,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     Field r(src._grid);
 
     PrecTimer.Reset();
-    MatTimer.Reset();
+    MatrixTimer.Reset();
     LinalgTimer.Reset();
 
     GridStopWatch SolverTimer;
@@ -323,14 +326,14 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     int iterations = 0;
     for (int k=0; k<MaxIterations; k++) {
 
-      cp = outerLoopBody(Linop, src, psi, rsd_sq);
+      cp = outerLoopBody(LinOp, src, psi, rsd_sq);
 
       // Stopping condition
       if (cp <= rsd_sq) {
 
         SolverTimer.Stop();
 
-        Linop.Op(psi,r);
+        LinOp.Op(psi,r);
         axpy(r,-1.0,src,r);
 
         RealD srcnorm       = sqrt(ssq);
@@ -345,7 +348,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
         std::cout << GridLogMessage << "GeneralizedMinimalResidual Time breakdown" << std::endl;
         std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()       << std::endl;
         std::cout << GridLogMessage << "\tPrecon "  << PrecTimer.Elapsed()         << std::endl;
-        std::cout << GridLogMessage << "\tMatrix "  << MatTimer.Elapsed()          << std::endl;
+        std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()       << std::endl;
         std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()       << std::endl;
         return;
       }
@@ -357,17 +360,21 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
       assert(0);
   }
 
-  RealD outerLoopBody(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi, RealD rsd_sq) {
+  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsd_sq) {
+
+    RealD cp = 0;
 
     Field w(src._grid);
     Field r(src._grid);
 
+    auto whatDoWePutHere = 1;
+
     std::vector<Field> v(whatDoWePutHere, src._grid); // in MG code: m + 1
 
     std::vector<std::complex<double>> gamma(whatDoWePutHere, 0.); // in MG code: m + 1
 
     MatrixTimer.Start();
-    Linop.Op(psi, w); // w = D * psi
+    LinOp.Op(psi, w); // w = D * psi
     MatrixTimer.Stop();
 
     LinalgTimer.Start();
@@ -380,7 +387,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
 
     for (int i=0; i<whatDoWePutHere; i++) { // in MG code: p->restart_length
 
-      arnoldiStep(Linop, v, w, whatDoWePutHere); // in MG code: j
+      arnoldiStep(LinOp, v, w, whatDoWePutHere); // in MG code: j
 
       ///////////////////////////////////////////////////////////////////////
       // Begin of QR Update /////////////////////////////////////////////////
@@ -401,15 +408,15 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     }
   }
 
-  void arnoldiStep(LinearOperatorBase<Field> &Linop, std::vector<Field> &v, Field &w, int iter) {
+  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
 
     MatrixTimer.Start();
-    Linop.Op(v[iter], w);
+    LinOp.Op(v[iter], w);
     MatrixTimer.Stop();
 
     LinalgTimer.Start();
     for(int i = 0; i <= iter; ++i) {
-      H(i, j) = innerProduct(v[i], w);
+      H(i, iter) = innerProduct(v[i], w);
       w = w - H(i, iter) * v[i];
     }
 
@@ -608,16 +615,16 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
                        std::vector<Field> const &               v,
                        Field &                                  x,
                        int                                      j) {
-    for(auto i = j; i >= 0; i--) {
+    for(auto i = iter; i >= 0; i--) {
       y[i] = gamma[i];
-      for(auto k = i + 1; k <= j; k++)
+      for(auto k = i + 1; k <= iter; k++)
         y[i] -= H(i, k) * y[k];
       y[i] /= H(i, i);
     }
 
     /* if(true) // TODO ??? */
     /* { */
-    /*   for(auto i = 0; i <= j; i++) */
+    /*   for(auto i = 0; i <= iter; i++) */
     /*     x = x + v[i] * y[i]; */
     /* } else { */
       x = y[0] * v[0];

From b3d342ca22157697ec656778b10cdb4fc1c956e9 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 7 Nov 2017 10:24:49 +0100
Subject: [PATCH 023/130] Remove old implementation of GMRES operator

---
 .../iterative/GeneralisedMinimalResidual.h    | 129 ------------------
 1 file changed, 129 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index da728619..5571f64a 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -175,135 +175,6 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
 
   void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
 
-    std::cout << GridLogIterative << "GMRES: Start of operator()" << std::endl;
-
-    psi.checkerboard = src.checkerboard;
-    conformable(psi, src);
-
-    int m = MaxIterations;
-
-    Field r(src);
-    Field w(src);
-    Field Dpsi(src);
-    Field Dv(src);
-
-    std::vector<Field> v(m + 1, src);
-
-    Eigen::MatrixXcd H = Eigen::MatrixXcd::Zero(m + 1, m);
-
-    std::vector<std::complex<double>> y(m + 1, 0.);
-    std::vector<std::complex<double>> gamma(m + 1, 0.);
-    std::vector<std::complex<double>> c(m + 1, 0.);
-    std::vector<std::complex<double>> s(m + 1, 0.);
-
-    // Initial residual computation & set up
-    RealD guess = norm2(psi);
-    assert(std::isnan(guess) == 0);
-
-    RealD ssq = norm2(src); // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "source squared"
-    RealD rsd_sq = Tolerance * Tolerance * ssq; // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "residual squared"
-
-    LinOp.Op(psi, Dpsi);
-    r = src - Dpsi;
-
-    RealD cp = norm2(r); // cp = beta in WMG nomenclature, in WMG there is no norm2 but a sqrt(norm2) here
-    gamma[0]  = cp;
-
-    std::cout << GridLogIterative << "cp " << cp << std::endl;
-
-    v[0] = (1. / cp) * r;
-
-    std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual: guess " << guess << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual:   src " << ssq << std::endl;
-    // std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual:    mp " << d << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual:  cp,r " << cp << std::endl;
-
-    if (cp <= rsd_sq) {
-      return;
-    }
-
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "GeneralizedMinimalResidual: k=0 residual " << cp << " target " << rsd_sq << std::endl;
-
-    GridStopWatch SolverTimer;
-    GridStopWatch MatrixTimer;
-
-    SolverTimer.Start();
-    for(auto j = 0; j < m; ++j) {
-
-      // std::cout << GridLogIterative << "GeneralizedMinimalResidual: Start of outer loop with index j = " << j << std::endl;
-
-      MatrixTimer.Start();
-      LinOp.Op(v[j], Dv);
-      MatrixTimer.Stop();
-
-      w = Dv;
-
-      for(auto i = 0; i <= j; ++i) {
-        H(i, j) = innerProduct(v[i], w);
-        w = w - H(i, j) * v[i];
-      }
-
-      H(j + 1, j) = norm2(w);
-      v[j + 1] = (1. / H(j + 1, j)) * w;
-
-      // end of arnoldi process, begin of givens rotations
-      // apply old Givens rotation
-      for(auto i = 0; i < j ; ++i) {
-        auto tmp = -s[i] * H(i, j) + c[i] * H(i + 1, j);
-        H(i, j)     = std::conj(c[i]) * H(i, j) + std::conj(s[i]) * H(i + 1, j);
-        H(i + 1, j) = tmp;
-      }
-
-      // compute new Givens Rotation
-      ComplexD nu = sqrt(std::norm(H(j, j)) + std::norm(H(j + 1, j)));
-      c[j]        = H(j, j) / nu;
-      s[j]        = H(j + 1, j) / nu;
-      std::cout << GridLogIterative << "GeneralizedMinimalResidual: nu" << nu << std::endl;
-      std::cout << GridLogIterative << "GeneralizedMinimalResidual: H("<<j<<","<<j<<")" << H(j,j) << std::endl;
-      std::cout << GridLogIterative << "GeneralizedMinimalResidual: H("<<j+1<<","<<j<<")" << H(j+1,j) << std::endl;
-
-      // apply new Givens rotation
-      H(j, j)     = nu;
-      H(j + 1, j) = 0.;
-
-      /* ORDERING??? */
-      gamma[j + 1] = -s[j] * gamma[j];
-      gamma[j]     = std::conj(c[j]) * gamma[j];
-
-      /* for(auto k = 0; k <= j+1 ; ++k) */
-      /*   std::cout << GridLogIterative << "k " << k << "nu " << nu << " c["<<k<<"]" << c[k]<< " s["<<k<<"]" << s[k] << " gamma["<<k<<"]" << gamma[k] << std::endl; */
-
-      std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration "
-                << j << " residual " << std::abs(gamma[j + 1]) << std::endl; //" target "
-                /* << TargetResSq << std::endl; */
-      if(std::abs(gamma[j + 1]) / sqrt(cp) < Tolerance) {
-        SolverTimer.Stop();
-
-        std::cout << GridLogMessage << "GeneralizedMinimalResidual Converged on iteration " << j << std::endl;
-        // std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq) << std::endl;
-        // std::cout << GridLogMessage << "\tTrue residual " << true_residual << std::endl;
-        std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
-
-        std::cout << GridLogMessage << "Time breakdown " << std::endl;
-        std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() << std::endl;
-        // std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() << std::endl;
-
-        IterationsToComplete = j;
-
-        break;
-      }
-    }
-
-    // backward substitution
-    computeSolution(y, gamma, H, v, psi, IterationsToComplete);
-
-    std::cout << GridLogIterative << "GeneralizedMinimalResidual: End of operator()" << std::endl;
-  }
-
-  void alternativeOperatorImplementation(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
-
     psi.checkerboard = src.checkerboard;
     psi(conformable, src);
 

From 176bf37372d1a27f2db2878ebcdf5f1a08ebaf64 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 7 Nov 2017 14:57:36 +0100
Subject: [PATCH 024/130] Remove some commented stuff

---
 .../iterative/GeneralisedMinimalResidual.h    | 105 ------------------
 1 file changed, 105 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 5571f64a..b2853e54 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -68,111 +68,6 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
                              bool    err_on_no_conv = true)
     : Tolerance(tol), MaxIterations(maxit), RestartLength(restart_length), ErrorOnNoConverge(err_on_no_conv){};
 
-  // want to solve Ax = b -> A = LinOp, psi = x, b = src
-
-  /* void */
-  /* operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi)
-   * { */
-  /*   typedef typename Eigen::MatrixXcd MyMatrix; */
-  /*   typedef typename Eigen::VectorXcd MyVector; */
-
-  /*   Field r(src); */
-  /*   Field w(src); */
-  /*   Field mmv(src); */
-
-  /*   std::vector<Field>                V(MaxIterations + 1, src); */
-  /*   std::vector<std::complex<double>> y(MaxIterations + 1, 0.); */
-  /*   std::vector<std::complex<double>> gamma(MaxIterations + 1, 0.); */
-  /*   std::vector<std::complex<double>> c(MaxIterations + 1, 0.); */
-  /*   std::vector<std::complex<double>> s(MaxIterations + 1, 0.); */
-
-  /*   int m = MaxIterations; */
-
-  /*   RealD gamma0{}; */
-
-  /*   MyMatrix H = Eigen::MatrixXcd::Zero(MaxIterations + 1, MaxIterations); */
-
-  /*   RealD normPsiSq   = norm2(psi); */
-  /*   RealD normSrcSq   = norm2(src); */
-  /*   RealD TargetResSq = Tolerance * Tolerance * normSrcSq; */
-
-  /*   LinOp.Op(psi, mmv); */
-
-  /*   r        = src - mmv; */
-  /*   gamma[0] = norm2(r); */
-  /*   std::cout << gamma[0] << std::endl; */
-  /*   gamma0 = std::real(gamma[0]); */
-  /*   V[0]   = (1. / gamma[0]) * r; */
-
-  /*   std::cout << GridLogMessage << std::setprecision(4) */
-  /*             << "GeneralisedMinimalResidual:    psi " << normPsiSq */
-  /*             << std::endl; */
-  /*   std::cout << GridLogMessage << std::setprecision(4) */
-  /*             << "GeneralisedMinimalResidual:    src " << normSrcSq */
-  /*             << std::endl; */
-  /*   std::cout << GridLogMessage << std::setprecision(4) */
-  /*             << "GeneralisedMinimalResidual: target " << TargetResSq */
-  /*             << std::endl; */
-  /*   std::cout << GridLogMessage << std::setprecision(4) */
-  /*             << "GeneralisedMinimalResidual:      r " << gamma0 <<
-   * std::endl; */
-
-  /*   std::cout */
-  /*     << GridLogIterative << std::setprecision(4) */
-  /*     << "GeneralisedMinimalResidual: before starting to iterate residual "
-   */
-  /*     << gamma0 << " target " << TargetResSq << std::endl; */
-
-  /*   for(auto j = 0; j < m; ++j) { */
-  /*     LinOp.Op(V[j], w); */
-
-  /*     for(auto i = 0; i <= j; ++i) { */
-  /*       H(i, j) = innerProduct(V[i], w); */
-  /*       w = w - H(i, j) * V[i]; */
-  /*     } */
-
-  /*     H(j + 1, j) = norm2(w); */
-  /*     V[j + 1] = (1. / H(j + 1, j)) * w; */
-
-  /*     if(std::abs(H(j + 1, j)) > 1e-15) { */
-  /*       qrUpdate(gamma, c, s, H, j); */
-  /*     } */
-
-  /*     /\* std::cout << GridLogMessage << "GeneralisedMinimalResidual: H( "
-   * *\/ */
-  /*     /\*           << j + 1 << "," << j << " ) = " << H( j + 1, j ) *\/ */
-  /*     /\*           << std::endl; *\/ */
-
-  /*     std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration
-   * " */
-  /*               << j << " residual " << std::abs(gamma[j + 1]) << " target "
-   */
-  /*               << TargetResSq << std::endl; */
-  /*     if(std::abs(gamma[j + 1]) / gamma0 < Tolerance) { */
-  /*       IterationsToComplete = j; */
-  /*       break; */
-  /*     } */
-  /*   } */
-  /*   computeSolution(y, gamma, H, V, psi, IterationsToComplete); */
-  /*   std::cout << GridLogMessage */
-  /*             << "GeneralisedMinimalResidual: End of operator() after " */
-  /*             << IterationsToComplete << " iterations" << std::endl; */
-
-  /*   RealD normSrc       = sqrt(normSrcSq); */
-  /*   RealD resnorm       = sqrt(norm2(mmv)); */
-  /*   RealD true_residual = resnorm / srcnorm; */
-  /*   Field result        = mmv; */
-  /*   Field Dx(src); */
-  /*   Field tmp(src); */
-
-  /*   // Test the correctness */
-  /*   LinOp.Op(result, Dx); */
-
-  /*   tmp = Dx - src; */
-
-  /*   std::cout << norm2(tmp) << " " << norm2(tmp) / gamma0 << std::endl; */
-  /* } */
-
   void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
 
     psi.checkerboard = src.checkerboard;

From b87416dac4d65b3eec04e13c596248e7295f31d5 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 7 Nov 2017 15:00:08 +0100
Subject: [PATCH 025/130] Fix error with conformable

---
 lib/algorithms/iterative/GeneralisedMinimalResidual.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index b2853e54..87ef3366 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -71,7 +71,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
   void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
 
     psi.checkerboard = src.checkerboard;
-    psi(conformable, src);
+    conformable(psi, src);
 
     RealD guess = norm2(psi);
     assert(std::isnan(guess) == 0);

From b8ee496ed6bba8b1455acd4010556aea4c12902a Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 13:23:41 +0100
Subject: [PATCH 026/130] Print some info at start of GMRES

---
 lib/algorithms/iterative/GeneralisedMinimalResidual.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 87ef3366..34aeb265 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -82,6 +82,9 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
 
     Field r(src._grid);
 
+    std::cout << GridLogIterative << std::setprecision(4) << std::scientific << "MinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << std::scientific << "MinimalResidual:   src " << ssq   << std::endl;
+
     PrecTimer.Reset();
     MatrixTimer.Reset();
     LinalgTimer.Reset();

From 56d32a4afb5d35d340cbb4fe16863ea3842e4af5 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 13:50:34 +0100
Subject: [PATCH 027/130] Rename misunderstood "rsd_sq" to "rsq" in MR code

---
 lib/algorithms/iterative/MinimalResidual.h | 12 ++++++------
 tests/solver/Test_wilson_gmres_unprec.cc   |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index 74d4eb53..0198c425 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -63,8 +63,8 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
     RealD guess = norm2(psi);
     assert(std::isnan(guess) == 0);
 
-    RealD ssq = norm2(src); // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "source squared"
-    RealD rsd_sq = Tolerance * Tolerance * ssq; // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "residual squared"
+    RealD ssq = norm2(src); // flopcount.addSiteFlops(4*Nc*Ns,s);
+    RealD rsq = Tolerance * Tolerance * ssq; // flopcount.addSiteFlops(4*Nc*Ns,s);
 
     Linop.Op(psi, Mr); // flopcount.addFlops(M.nFlops());
 
@@ -77,12 +77,12 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
     std::cout << GridLogIterative << std::setprecision(4) << "MinimalResidual:    mp " << d << std::endl;
     std::cout << GridLogIterative << std::setprecision(4) << "MinimalResidual:  cp,r " << cp << std::endl;
 
-    if (cp <= rsd_sq) {
+    if (cp <= rsq) {
       return;
     }
 
     std::cout << GridLogIterative << std::setprecision(4)
-              << "MinimalResidual: k=0 residual " << cp << " target " << rsd_sq << std::endl;
+              << "MinimalResidual: k=0 residual " << cp << " target " << rsq << std::endl;
 
     GridStopWatch LinalgTimer;
     GridStopWatch MatrixTimer;
@@ -115,11 +115,11 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
       LinalgTimer.Stop();
 
       std::cout << GridLogIterative << "MinimalResidual: Iteration " << k
-                << " residual " << cp << " target " << rsd_sq << std::endl;
+                << " residual " << cp << " target " << rsq << std::endl;
       std::cout << GridLogDebug << "a = " << a << " c = " << c << " d = " << d << std::endl;
 
       // Stopping condition
-      if (cp <= rsd_sq) {
+      if (cp <= rsq) {
         SolverTimer.Stop();
 
         Linop.Op(psi, Mr);
diff --git a/tests/solver/Test_wilson_gmres_unprec.cc b/tests/solver/Test_wilson_gmres_unprec.cc
index f43a7737..13cc32b6 100644
--- a/tests/solver/Test_wilson_gmres_unprec.cc
+++ b/tests/solver/Test_wilson_gmres_unprec.cc
@@ -58,7 +58,7 @@ int main (int argc, char ** argv)
   WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 
   MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
-  GeneralisedMinimalResidual<LatticeFermion> GMRES(1.0e-8,10000, 1);
+  GeneralisedMinimalResidual<LatticeFermion> GMRES(1.0e-8, 10000, 5);
   GMRES(HermOp,src,result);
 
   Grid_finalize();

From 7f4ed6c2e579a7dbad3f4c9ef61dd9d1b4ba8eeb Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 13:56:41 +0100
Subject: [PATCH 028/130] First working version of GMRES + a test for Wilson
 fermions

---
 .../iterative/GeneralisedMinimalResidual.h    | 316 +++++-------------
 tests/solver/Test_wilson_gmres_unprec.cc      |   2 +-
 2 files changed, 92 insertions(+), 226 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 34aeb265..c19802df 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -53,20 +53,40 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
  public:
   bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge,
                           // defaults to True.
+
   RealD   Tolerance;
+
   Integer MaxIterations;
   Integer RestartLength;
-  Integer IterationsToComplete; // Number of iterations the GMRES took to
-                                // finish. Filled in upon completion
+  Integer IterationCount; // Number of iterations the GMRES took to finish,
+                          // filled in upon completion
+
   GridStopWatch MatrixTimer;
   GridStopWatch PrecTimer;
   GridStopWatch LinalgTimer;
+  GridStopWatch QrTimer;
+  GridStopWatch CompSolutionTimer;
+
+  Eigen::MatrixXcd H;
+
+  std::vector<std::complex<double>> y;
+  std::vector<std::complex<double>> gamma;
+  std::vector<std::complex<double>> c;
+  std::vector<std::complex<double>> s;
 
   GeneralisedMinimalResidual(RealD   tol,
                              Integer maxit,
                              Integer restart_length,
                              bool    err_on_no_conv = true)
-    : Tolerance(tol), MaxIterations(maxit), RestartLength(restart_length), ErrorOnNoConverge(err_on_no_conv){};
+      : Tolerance(tol)
+      , MaxIterations(maxit)
+      , RestartLength(restart_length)
+      , ErrorOnNoConverge(err_on_no_conv)
+      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
+      , y(RestartLength + 1, 0.)
+      , gamma(RestartLength + 1, 0.)
+      , c(RestartLength + 1, 0.)
+      , s(RestartLength + 1, 0.) {};
 
   void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
 
@@ -82,17 +102,21 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
 
     Field r(src._grid);
 
-    std::cout << GridLogIterative << std::setprecision(4) << std::scientific << "MinimalResidual: guess " << guess << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4) << std::scientific << "MinimalResidual:   src " << ssq   << std::endl;
+    std::cout << std::setprecision(4) << std::scientific << std::endl;
+    std::cout << GridLogIterative << "GeneralisedMinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "GeneralisedMinimalResidual:   src " << ssq   << std::endl;
+
 
     PrecTimer.Reset();
     MatrixTimer.Reset();
     LinalgTimer.Reset();
+    QrTimer.Reset();
+    CompSolutionTimer.Reset();
 
     GridStopWatch SolverTimer;
     SolverTimer.Start();
 
-    int iterations = 0;
+    IterationCount = 0;
     for (int k=0; k<MaxIterations; k++) {
 
       cp = outerLoopBody(LinOp, src, psi, rsd_sq);
@@ -109,21 +133,23 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
         RealD resnorm       = sqrt(norm2(r));
         RealD true_residual = resnorm / srcnorm;
 
-        std::cout << GridLogMessage << "GeneralizedMinimalResidual: Converged on iteration " << k              << std::endl;
-        std::cout << GridLogMessage << "\tComputed residual "                                << sqrt(cp / ssq) << std::endl;
-        std::cout << GridLogMessage << "\tTrue residual "                                    << true_residual  << std::endl;
-        std::cout << GridLogMessage << "\tTarget "                                           << Tolerance      << std::endl;
+        std::cout << GridLogMessage << "GeneralisedMinimalResidual: Converged on iteration " << IterationCount << std::endl;
+        std::cout << GridLogMessage << "\tComputed residual "                                << sqrt(cp / ssq)       << std::endl;
+        std::cout << GridLogMessage << "\tTrue residual "                                    << true_residual        << std::endl;
+        std::cout << GridLogMessage << "\tTarget "                                           << Tolerance            << std::endl;
 
-        std::cout << GridLogMessage << "GeneralizedMinimalResidual Time breakdown" << std::endl;
+        std::cout << GridLogMessage << "GeneralisedMinimalResidual Time breakdown" << std::endl;
         std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()       << std::endl;
         std::cout << GridLogMessage << "\tPrecon "  << PrecTimer.Elapsed()         << std::endl;
         std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()       << std::endl;
         std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()       << std::endl;
+        std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()           << std::endl;
+        std::cout << GridLogMessage << "\tCompSol " << CompSolutionTimer.Elapsed() << std::endl;
         return;
       }
     }
 
-    std::cout << GridLogMessage << "GeneralizedMinimalResidual did NOT converge" << std::endl;
+    std::cout << GridLogMessage << "GeneralisedMinimalResidual did NOT converge" << std::endl;
 
     if (ErrorOnNoConverge)
       assert(0);
@@ -136,45 +162,43 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     Field w(src._grid);
     Field r(src._grid);
 
-    auto whatDoWePutHere = 1;
-
-    std::vector<Field> v(whatDoWePutHere, src._grid); // in MG code: m + 1
-
-    std::vector<std::complex<double>> gamma(whatDoWePutHere, 0.); // in MG code: m + 1
+    std::vector<Field> v(RestartLength + 1, src._grid);
 
     MatrixTimer.Start();
-    LinOp.Op(psi, w); // w = D * psi
+    LinOp.Op(psi, w);
     MatrixTimer.Stop();
 
     LinalgTimer.Start();
     r = src - w;
 
-    gamma[0] = norm2(r); // do we need an explicit cast? // in MG code: sqrt around/within the norm
+    gamma[0] = sqrt(norm2(r));
 
     v[0] = (1. / gamma[0]) * r;
     LinalgTimer.Stop();
 
-    for (int i=0; i<whatDoWePutHere; i++) { // in MG code: p->restart_length
+    for (int i=0; i<RestartLength; i++) {
 
-      arnoldiStep(LinOp, v, w, whatDoWePutHere); // in MG code: j
+      IterationCount++;
 
-      ///////////////////////////////////////////////////////////////////////
-      // Begin of QR Update /////////////////////////////////////////////////
-      ///////////////////////////////////////////////////////////////////////
+      arnoldiStep(LinOp, v, w, i);
 
-      qrUpdate(whatDoWePutHere); // in MG code: j
+      qrUpdate(i);
 
-      ///////////////////////////////////////////////////////////////////////
-      // End of QR Update ///////////////////////////////////////////////////
-      ///////////////////////////////////////////////////////////////////////
+      cp = std::norm(gamma[i+1]);
 
-      if ((whatDoWePutHere) || (cp < rsd_sq)) { // in VPGCR code: (k == nstep-1)
+      std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration " << IterationCount
+                << " residual " << cp << " target " << rsd_sq << std::endl;
 
-        // compute solution
+      if ((i == RestartLength - 1) || (cp <= rsd_sq)) {
+
+        computeSolution(v, psi, i);
 
         return cp;
       }
     }
+
+    assert(0); // Never reached
+    return cp;
   }
 
   void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
@@ -184,223 +208,65 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     MatrixTimer.Stop();
 
     LinalgTimer.Start();
-    for(int i = 0; i <= iter; ++i) {
-      H(i, iter) = innerProduct(v[i], w);
-      w = w - H(i, iter) * v[i];
+    for (int i = 0; i <= iter; ++i) {
+      H(iter, i) = innerProduct(v[i], w);
+      w = w - H(iter, i) * v[i];
     }
 
-    H(iter + 1, iter) = norm2(w); // in MG code: sqrt around/within the norm
-    v[iter + 1] = (1. / H(iter + 1, iter)) * w;
+    H(iter, iter + 1) = sqrt(norm2(w));
+    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
     LinalgTimer.Stop();
   }
 
   void qrUpdate(int iter) {
 
-    for(int i = 0; i < iter ; ++i) {
-      auto tmp = -s[i] * H(i, iter) + c[i] * H(i + 1, iter);
-      H(i, iter)     = std::conj(c[i]) * H(i, iter) + std::conj(s[i]) * H(i + 1, iter);
-      H(i + 1, iter) = tmp;
+    QrTimer.Start();
+    for (int i = 0; i < iter ; ++i) {
+      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
+      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
+      H(iter, i + 1) = tmp;
     }
 
-    // compute new Givens Rotation
-    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter + 1, iter)));
-    c[iter]        = H(iter, iter) / nu;
-    s[iter]        = H(iter + 1, iter) / nu;
+    // Compute new Givens Rotation
+    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
+    c[iter]     = H(iter, iter) / nu;
+    s[iter]     = H(iter, iter + 1) / nu;
 
-    // apply new Givens rotation
+    // Apply new Givens rotation
     H(iter, iter)     = nu;
-    H(iter + 1, iter) = 0.;
+    H(iter, iter + 1) = 0.;
 
-    /* ORDERING??? */
     gamma[iter + 1] = -s[iter] * gamma[iter];
     gamma[iter]     = std::conj(c[iter]) * gamma[iter];
+    QrTimer.Stop();
   }
 
-  void Step() {
+  void computeSolution(std::vector< Field > const &v, Field &psi, int iter) {
 
-    int m = MaxIterations;
-
-    Field r(src);
-    Field w(src);
-    Field Dpsi(src);
-    Field Dv(src);
-    std::vector<Field> v(m + 1, src);
-
-    Eigen::MatrixXcd H = Eigen::MatrixXcd::Zero(m + 1, m);
-
-    std::vector<std::complex<double>> y(m + 1, 0.);
-    std::vector<std::complex<double>> gamma(m + 1, 0.);
-    std::vector<std::complex<double>> c(m + 1, 0.);
-    std::vector<std::complex<double>> s(m + 1, 0.);
-
-    // Initial residual computation & set up
-    RealD guess = norm2(psi);
-    assert(std::isnan(guess) == 0);
-
-    RealD ssq = norm2(src); // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "source squared"
-    RealD rsd_sq = Tolerance * Tolerance * ssq; // flopcount.addSiteFlops(4*Nc*Ns,s); // stands for "residual squared"
-
-    LinOp.Op(psi, Dpsi);
-    r = src - Dpsi;
-
-    RealD cp = norm2(r); // cp = beta in WMG nomenclature, in WMG there is no norm2 but a sqrt(norm2) here
-    gamma[0]  = cp;
-
-    std::cout << GridLogIterative << "cp " << cp << std::endl;
-
-    v[0] = (1. / cp) * r;
-
-    std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual: guess " << guess << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual:   src " << ssq << std::endl;
-    // std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual:    mp " << d << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4) << "GeneralizedMinimalResidual:  cp,r " << cp << std::endl;
-
-    if (cp <= rsd_sq) {
-      return;
-    }
-
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "GeneralizedMinimalResidual: k=0 residual " << cp << " target " << rsd_sq << std::endl;
-
-    GridStopWatch SolverTimer;
-    GridStopWatch MatrixTimer;
-
-    SolverTimer.Start();
-    for(auto j = 0; j < m; ++j) {
-
-      // std::cout << GridLogIterative << "GeneralizedMinimalResidual: Start of outer loop with index j = " << j << std::endl;
-
-      MatrixTimer.Start();
-      LinOp.Op(v[j], Dv);
-      MatrixTimer.Stop();
-
-      w = Dv;
-
-      for(auto i = 0; i <= j; ++i) {
-        H(i, j) = innerProduct(v[i], w);
-        w = w - H(i, j) * v[i];
-      }
-
-      H(j + 1, j) = norm2(w);
-      v[j + 1] = (1. / H(j + 1, j)) * w;
-
-      // end of arnoldi process, begin of givens rotations
-      // apply old Givens rotation
-      for(auto i = 0; i < j ; ++i) {
-        auto tmp = -s[i] * H(i, j) + c[i] * H(i + 1, j);
-        H(i, j)     = std::conj(c[i]) * H(i, j) + std::conj(s[i]) * H(i + 1, j);
-        H(i + 1, j) = tmp;
-      }
-
-      // compute new Givens Rotation
-      ComplexD nu = sqrt(std::norm(H(j, j)) + std::norm(H(j + 1, j)));
-      c[j]        = H(j, j) / nu;
-      s[j]        = H(j + 1, j) / nu;
-      std::cout << GridLogIterative << "GeneralizedMinimalResidual: nu" << nu << std::endl;
-      std::cout << GridLogIterative << "GeneralizedMinimalResidual: H("<<j<<","<<j<<")" << H(j,j) << std::endl;
-      std::cout << GridLogIterative << "GeneralizedMinimalResidual: H("<<j+1<<","<<j<<")" << H(j+1,j) << std::endl;
-
-      // apply new Givens rotation
-      H(j, j)     = nu;
-      H(j + 1, j) = 0.;
-
-      /* ORDERING??? */
-      gamma[j + 1] = -s[j] * gamma[j];
-      gamma[j]     = std::conj(c[j]) * gamma[j];
-
-      /* for(auto k = 0; k <= j+1 ; ++k) */
-      /*   std::cout << GridLogIterative << "k " << k << "nu " << nu << " c["<<k<<"]" << c[k]<< " s["<<k<<"]" << s[k] << " gamma["<<k<<"]" << gamma[k] << std::endl; */
-
-      std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration "
-                << j << " residual " << std::abs(gamma[j + 1]) << std::endl; //" target "
-                /* << TargetResSq << std::endl; */
-      if(std::abs(gamma[j + 1]) / sqrt(cp) < Tolerance) {
-        SolverTimer.Stop();
-
-        std::cout << GridLogMessage << "GeneralizedMinimalResidual Converged on iteration " << j << std::endl;
-        // std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq) << std::endl;
-        // std::cout << GridLogMessage << "\tTrue residual " << true_residual << std::endl;
-        std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
-
-        std::cout << GridLogMessage << "Time breakdown " << std::endl;
-        std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() << std::endl;
-        // std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() << std::endl;
-
-        IterationsToComplete = j;
-
-        break;
-      }
-    }
-
-    // backward substitution
-    computeSolution(y, gamma, H, v, psi, IterationsToComplete);
-
-    std::cout << GridLogIterative << "GeneralizedMinimalResidual: End of operator()" << std::endl;
-  }
-
-  private:
-  /*  void qrUpdate(std::vector<std::complex<double>> &gamma, */
-  /*                std::vector<std::complex<double>> &c, */
-  /*                std::vector<std::complex<double>> &s, */
-  /*                Eigen::MatrixXcd &                 H, */
-  /*                int                                j) { */
-  /*    ComplexD cp{}; */
-  /*    // update QR factorization */
-  /*    // apply previous Givens rotation */
-  /*    for(auto i = 0; i < j; i++) { */
-  /*      cp = -s[i] * H(i, j) + c[i] * H(i + 1, j); */
-  /*      H(i, j)     = std::conj(c[i]) * H(i, j) + std::conj(s[i]) * H(i + 1,
-   * j); */
-  /*      H(i + 1, j) = cp; */
-  /*    } */
-
-  /*    // compute current Givens rotation */
-  /*    cp = sqrt(std::norm(H(j, j)) + std::norm(H(j + 1, j))); */
-  /*    s[j] = H(j + 1, j) / cp; */
-  /*    c[j] = H(j, j) / cp; */
-  /*    /\* std::cout << GridLogIterative << "cp= " << cp << std::endl; *\/ */
-  /*    /\* std::cout << GridLogIterative << "s[j]= " << s[ j ] << std::endl; *\/ */
-  /*    /\* std::cout << GridLogIterative << "c[j]= " << c[ j ] << std::endl; *\/ */
-
-  /*    /\* std::cout << GridLogIterative << "gamma[j+1]= " << gamma[ j + 1 ] << std::endl; *\/ */
-  /*    /\* std::cout << GridLogIterative << "gamma[j]= " << gamma[ j ] << std::endl; *\/ */
-  /*    // update right column */
-  /*    gamma[j + 1] = -s[j] * gamma[j]; */
-  /*    gamma[j]     = std::conj(c[j]) * gamma[j]; */
-  /*    /\* std::cout << GridLogIterative << "gamma[j+1]= " << gamma[ j + 1 ] << std::endl; *\/ */
-  /*    /\* std::cout << GridLogIterative << "gamma[j]= " << gamma[ j ] << std::endl; *\/ */
-
-  /*    // apply current Givens rotation */
-  /*    H(j, j)     = cp; */
-  /*    H(j + 1, j) = 0.; */
-  /*    /\* std::cout << GridLogIterative << "H(j,j)= " << H( j, j ) << std::endl; *\/ */
-  /*    /\* std::cout << GridLogIterative << "H(j+1,j)= " << H( j + 1, j ) << std::endl; *\/ */
-  /*  } */
-
-  void computeSolution(std::vector<std::complex<double>> &      y,
-                       std::vector<std::complex<double>> const &gamma,
-                       Eigen::MatrixXcd const &                 H,
-                       std::vector<Field> const &               v,
-                       Field &                                  x,
-                       int                                      j) {
-    for(auto i = iter; i >= 0; i--) {
+    CompSolutionTimer.Start();
+    for (int i = iter; i >= 0; i--) {
       y[i] = gamma[i];
-      for(auto k = i + 1; k <= iter; k++)
-        y[i] -= H(i, k) * y[k];
+      for (int k = i + 1; k <= iter; k++)
+        y[i] -= H(k, i) * y[k];
       y[i] /= H(i, i);
     }
 
-    /* if(true) // TODO ??? */
-    /* { */
-    /*   for(auto i = 0; i <= iter; i++) */
-    /*     x = x + v[i] * y[i]; */
-    /* } else { */
-      x = y[0] * v[0];
-      for(auto i = 1; i <= j; i++)
-        x = x + v[i] * y[i];
-    /* } */
+    // TODO: Use axpys or similar for these
+    // TODO: Fix the condition
+    if (true) {
+      for (int i = 0; i <= iter; i++)
+        psi = psi + v[i] * y[i];
+    }
+    else {
+      psi = y[0] * v[0];
+      for (int i = 1; i <= iter; i++)
+        psi = psi + v[i] * y[i];
+    }
+    CompSolutionTimer.Stop();
   }
 };
 }
 #endif
+
+// Possible problems/TODOs for this implementation
+// * correct the stopping criterion
diff --git a/tests/solver/Test_wilson_gmres_unprec.cc b/tests/solver/Test_wilson_gmres_unprec.cc
index 13cc32b6..4df5f4e3 100644
--- a/tests/solver/Test_wilson_gmres_unprec.cc
+++ b/tests/solver/Test_wilson_gmres_unprec.cc
@@ -58,7 +58,7 @@ int main (int argc, char ** argv)
   WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 
   MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
-  GeneralisedMinimalResidual<LatticeFermion> GMRES(1.0e-8, 10000, 5);
+  GeneralisedMinimalResidual<LatticeFermion> GMRES(1.0e-8, 50, 25);
   GMRES(HermOp,src,result);
 
   Grid_finalize();

From 0c1c1d990058f620f62644a5d662a8b29c5fcaed Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 13:57:06 +0100
Subject: [PATCH 029/130] Set precision and formatting only once in MR code

---
 lib/algorithms/iterative/MinimalResidual.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index 0198c425..44e7dd40 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -72,17 +72,17 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
 
     RealD cp = norm2(r); //  Cp = |r[0]|^2 // 2 Nc Ns  flops // flopcount.addSiteFlops(4*Nc*Ns, s);
 
-    std::cout << GridLogIterative << std::setprecision(4) << "MinimalResidual: guess " << guess << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4) << "MinimalResidual:   src " << ssq << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4) << "MinimalResidual:    mp " << d << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4) << "MinimalResidual:  cp,r " << cp << std::endl;
+    std::cout << std::setprecision(4) << std::scientific << std::endl;
+    std::cout << GridLogIterative << "MinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "MinimalResidual:   src " << ssq << std::endl;
+    std::cout << GridLogIterative << "MinimalResidual:    mp " << d << std::endl;
+    std::cout << GridLogIterative << "MinimalResidual:  cp,r " << cp << std::endl;
 
     if (cp <= rsq) {
       return;
     }
 
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "MinimalResidual: k=0 residual " << cp << " target " << rsq << std::endl;
+    std::cout << GridLogIterative << "MinimalResidual: k=0 residual " << cp << " target " << rsq << std::endl;
 
     GridStopWatch LinalgTimer;
     GridStopWatch MatrixTimer;

From b069090b525ec5401b6c897f02b8e3ac99e3f371 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 13:58:02 +0100
Subject: [PATCH 030/130] Remove a superfluous comment

---
 lib/algorithms/iterative/GeneralisedMinimalResidual.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index c19802df..26feb60d 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -267,6 +267,3 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
 };
 }
 #endif
-
-// Possible problems/TODOs for this implementation
-// * correct the stopping criterion

From 781c611ca03c8582e9e298bb3362bfc3c7effbda Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 14:22:38 +0100
Subject: [PATCH 031/130] Perform minor code style fix

---
 lib/algorithms/iterative/GeneralisedMinimalResidual.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 26feb60d..9851987c 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -241,7 +241,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     QrTimer.Stop();
   }
 
-  void computeSolution(std::vector< Field > const &v, Field &psi, int iter) {
+  void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
 
     CompSolutionTimer.Start();
     for (int i = iter; i >= 0; i--) {

From 738278785652a2040d4d29fe10a3131403c6407d Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 14:23:55 +0100
Subject: [PATCH 032/130] Some minor changes

---
 lib/algorithms/iterative/GeneralisedMinimalResidual.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 9851987c..62a1699f 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -247,8 +247,8 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     for (int i = iter; i >= 0; i--) {
       y[i] = gamma[i];
       for (int k = i + 1; k <= iter; k++)
-        y[i] -= H(k, i) * y[k];
-      y[i] /= H(i, i);
+        y[i] = y[i] - H(k, i) * y[k];
+      y[i] = y[i] / H(i, i);
     }
 
     // TODO: Use axpys or similar for these

From 37b777d801cfb8a9a4ea17d0dd5ccc5d133b6847 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 14:28:48 +0100
Subject: [PATCH 033/130] Add test for GMRES solver with staggered fermions

---
 tests/solver/Test_staggered_gmres_unprec.cc | 69 +++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 tests/solver/Test_staggered_gmres_unprec.cc

diff --git a/tests/solver/Test_staggered_gmres_unprec.cc b/tests/solver/Test_staggered_gmres_unprec.cc
new file mode 100644
index 00000000..f9a8744f
--- /dev/null
+++ b/tests/solver/Test_staggered_gmres_unprec.cc
@@ -0,0 +1,69 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_staggered_gmres_unprec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
+  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
+  typename ImprovedStaggeredFermionR::ImplParams params;
+
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.1;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
+  GeneralisedMinimalResidual<FermionField> GMRES(1.0e-6, 50, 25);
+  GMRES(HermOp,src,result);
+
+  Grid_finalize();
+}

From 8107b785cc84ba9314faf119583d946551787e1f Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 14:40:03 +0100
Subject: [PATCH 034/130] Rename misunderstood "rsd_sq" to "rsq" in GMRES code

---
 .../iterative/GeneralisedMinimalResidual.h           | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 62a1699f..aa17c382 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -98,7 +98,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
 
     RealD cp;
     RealD ssq    = norm2(src);
-    RealD rsd_sq = Tolerance * Tolerance * ssq;
+    RealD rsq = Tolerance * Tolerance * ssq;
 
     Field r(src._grid);
 
@@ -119,10 +119,10 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     IterationCount = 0;
     for (int k=0; k<MaxIterations; k++) {
 
-      cp = outerLoopBody(LinOp, src, psi, rsd_sq);
+      cp = outerLoopBody(LinOp, src, psi, rsq);
 
       // Stopping condition
-      if (cp <= rsd_sq) {
+      if (cp <= rsq) {
 
         SolverTimer.Stop();
 
@@ -155,7 +155,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
       assert(0);
   }
 
-  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsd_sq) {
+  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
 
     RealD cp = 0;
 
@@ -187,9 +187,9 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
       cp = std::norm(gamma[i+1]);
 
       std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration " << IterationCount
-                << " residual " << cp << " target " << rsd_sq << std::endl;
+                << " residual " << cp << " target " << rsq << std::endl;
 
-      if ((i == RestartLength - 1) || (cp <= rsd_sq)) {
+      if ((i == RestartLength - 1) || (cp <= rsq)) {
 
         computeSolution(v, psi, i);
 

From 0f75ea52b71893ea93896ee7c2cee718b11625af Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 16:17:18 +0100
Subject: [PATCH 035/130] First version of FGMRES; not working yet

---
 lib/algorithms/Algorithms.h                   |   1 +
 .../FlexibleGeneralisedMinimalResidual.h      | 284 ++++++++++++++++++
 2 files changed, 285 insertions(+)
 create mode 100644 lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h

diff --git a/lib/algorithms/Algorithms.h b/lib/algorithms/Algorithms.h
index 503092db..3368dce8 100644
--- a/lib/algorithms/Algorithms.h
+++ b/lib/algorithms/Algorithms.h
@@ -49,6 +49,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
 #include <Grid/algorithms/iterative/MinimalResidual.h>
 #include <Grid/algorithms/iterative/GeneralisedMinimalResidual.h>
+#include <Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 #include <Grid/algorithms/CoarsenedMatrix.h>
 #include <Grid/algorithms/FFT.h>
diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
new file mode 100644
index 00000000..d7867833
--- /dev/null
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -0,0 +1,284 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
+#define GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
+
+// from Y. Saad - Iterative Methods for Sparse Linear Systems, PP 172
+// Compute r0 = b − Ax0 , β := ||r0||2 , and v1 := r0 /β
+// For j = 1, 2, ..., m Do:
+//   Compute wj := Avj
+//   For i = 1, ..., j Do:
+//     hij := (wj , vi)
+//     wj := wj − hij vi
+//   EndDo
+//   hj+1,j = ||wj||2 . If hj+1,j = 0 set m := j and go to HERE
+//   vj+1 = wj /hj+1,j
+// EndDo
+// Define the (m + 1) × m Hessenberg matrix H̄m = {hij}1≤i≤m+1,1≤j≤m. [HERE]
+// Compute ym the minimizer of ||βe1 − H̄m y||2 and xm = x0 + Vm ym.
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// want to solve Ax = b -> A = LinOp, psi = x, b = src
+
+namespace Grid {
+
+template<class Field>
+class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
+ public:
+  bool ErrorOnNoConverge; // Throw an assert when FGMRES fails to converge,
+                          // defaults to True.
+
+  RealD   Tolerance;
+
+  Integer MaxIterations;
+  Integer RestartLength;
+  Integer IterationCount; // Number of iterations the FGMRES took to finish,
+                          // filled in upon completion
+
+  GridStopWatch MatrixTimer;
+  GridStopWatch PrecTimer;
+  GridStopWatch LinalgTimer;
+  GridStopWatch QrTimer;
+  GridStopWatch CompSolutionTimer;
+
+  Eigen::MatrixXcd H;
+
+  std::vector<std::complex<double>> y;
+  std::vector<std::complex<double>> gamma;
+  std::vector<std::complex<double>> c;
+  std::vector<std::complex<double>> s;
+
+  LinearFunction<Field> &Preconditioner;
+
+ FlexibleGeneralisedMinimalResidual(RealD   tol,
+                                    Integer maxit,
+                                    LinearFunction<Field> &Prec,
+                                    Integer restart_length,
+                                    bool    err_on_no_conv = true)
+      : Tolerance(tol)
+      , MaxIterations(maxit)
+      , RestartLength(restart_length)
+      , ErrorOnNoConverge(err_on_no_conv)
+      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
+      , y(RestartLength + 1, 0.)
+      , gamma(RestartLength + 1, 0.)
+      , c(RestartLength + 1, 0.)
+      , s(RestartLength + 1, 0.)
+      , Preconditioner(Prec) {};
+
+  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
+
+    psi.checkerboard = src.checkerboard;
+    conformable(psi, src);
+
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD cp;
+    RealD ssq = norm2(src);
+    RealD rsq = Tolerance * Tolerance * ssq;
+
+    Field r(src._grid);
+
+    std::cout << std::setprecision(4) << std::scientific << std::endl;
+    std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual:   src " << ssq   << std::endl;
+
+
+    PrecTimer.Reset();
+    MatrixTimer.Reset();
+    LinalgTimer.Reset();
+    QrTimer.Reset();
+    CompSolutionTimer.Reset();
+
+    GridStopWatch SolverTimer;
+    SolverTimer.Start();
+
+    IterationCount = 0;
+    for (int k=0; k<MaxIterations; k++) {
+
+      cp = outerLoopBody(LinOp, src, psi, rsq);
+
+      // Stopping condition
+      if (cp <= rsq) {
+
+        SolverTimer.Stop();
+
+        LinOp.Op(psi,r);
+        axpy(r,-1.0,src,r);
+
+        RealD srcnorm       = sqrt(ssq);
+        RealD resnorm       = sqrt(norm2(r));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual: Converged on iteration " << IterationCount << std::endl;
+        std::cout << GridLogMessage << "\tComputed residual "                                << sqrt(cp / ssq)       << std::endl;
+        std::cout << GridLogMessage << "\tTrue residual "                                    << true_residual        << std::endl;
+        std::cout << GridLogMessage << "\tTarget "                                           << Tolerance            << std::endl;
+
+        std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual Time breakdown" << std::endl;
+        std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()       << std::endl;
+        std::cout << GridLogMessage << "\tPrecon "  << PrecTimer.Elapsed()         << std::endl;
+        std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()       << std::endl;
+        std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()       << std::endl;
+        std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()           << std::endl;
+        std::cout << GridLogMessage << "\tCompSol " << CompSolutionTimer.Elapsed() << std::endl;
+        return;
+      }
+    }
+
+    std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual did NOT converge" << std::endl;
+
+    if (ErrorOnNoConverge)
+      assert(0);
+  }
+
+  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
+
+    RealD cp = 0;
+
+    Field w(src._grid);
+    Field r(src._grid);
+
+    std::vector<Field> v(RestartLength + 1, src._grid);
+    std::vector<Field> z(RestartLength + 1, src._grid);
+
+    MatrixTimer.Start();
+    LinOp.Op(psi, z[0]);
+    MatrixTimer.Stop();
+
+    PrecTimer.Start();
+    Preconditioner(z[0], r);
+    PrecTimer.Stop();
+
+    LinalgTimer.Start();
+    r = src - w;
+
+    gamma[0] = sqrt(norm2(r));
+
+    v[0] = (1. / gamma[0]) * r;
+    LinalgTimer.Stop();
+
+    for (int i=0; i<RestartLength; i++) {
+
+      IterationCount++;
+
+      arnoldiStep(LinOp, v, z, w, i);
+
+      qrUpdate(i);
+
+      cp = std::norm(gamma[i+1]);
+
+      std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: Iteration " << IterationCount
+                << " residual " << cp << " target " << rsq << std::endl;
+
+      if ((i == RestartLength - 1) || (cp <= rsq)) {
+
+        computeSolution(v, psi, i);
+
+        return cp;
+      }
+    }
+
+    assert(0); // Never reached
+    return cp;
+  }
+
+  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
+
+    MatrixTimer.Start();
+    LinOp.Op(v[iter], z[0]);
+    MatrixTimer.Stop();
+
+    PrecTimer.Start();
+    Preconditioner(z[0], w);
+    PrecTimer.Stop();
+
+    LinalgTimer.Start();
+    for (int i = 0; i <= iter; ++i) {
+      H(iter, i) = innerProduct(v[i], w);
+      w = w - H(iter, i) * v[i];
+    }
+
+    H(iter, iter + 1) = sqrt(norm2(w));
+    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
+    LinalgTimer.Stop();
+  }
+
+  void qrUpdate(int iter) {
+
+    QrTimer.Start();
+    for (int i = 0; i < iter ; ++i) {
+      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
+      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
+      H(iter, i + 1) = tmp;
+    }
+
+    // Compute new Givens Rotation
+    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
+    c[iter]     = H(iter, iter) / nu;
+    s[iter]     = H(iter, iter + 1) / nu;
+
+    // Apply new Givens rotation
+    H(iter, iter)     = nu;
+    H(iter, iter + 1) = 0.;
+
+    gamma[iter + 1] = -s[iter] * gamma[iter];
+    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
+    QrTimer.Stop();
+  }
+
+  void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
+
+    CompSolutionTimer.Start();
+    for (int i = iter; i >= 0; i--) {
+      y[i] = gamma[i];
+      for (int k = i + 1; k <= iter; k++)
+        y[i] = y[i] - H(k, i) * y[k];
+      y[i] = y[i] / H(i, i);
+    }
+
+    // TODO: Use axpys or similar for these
+    // TODO: Fix the condition
+    if (true) {
+      for (int i = 0; i <= iter; i++)
+        psi = psi + v[i] * y[i];
+    }
+    else {
+      psi = y[0] * v[0];
+      for (int i = 1; i <= iter; i++)
+        psi = psi + v[i] * y[i];
+    }
+    CompSolutionTimer.Stop();
+  }
+};
+}
+#endif
+
+// I took the version with p->kind == left from the WMG code base here. TODO: recheck if we need left or right

From e843d83d9dbabbed48f01b80ec00dc3efadb6d81 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 16:38:16 +0100
Subject: [PATCH 036/130] Make z in FGMRES a single Field

---
 .../iterative/FlexibleGeneralisedMinimalResidual.h   | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
index d7867833..3e677a77 100644
--- a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -165,16 +165,16 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
 
     Field w(src._grid);
     Field r(src._grid);
+    Field z(src._grid);
 
     std::vector<Field> v(RestartLength + 1, src._grid);
-    std::vector<Field> z(RestartLength + 1, src._grid);
 
     MatrixTimer.Start();
-    LinOp.Op(psi, z[0]);
+    LinOp.Op(psi, z);
     MatrixTimer.Stop();
 
     PrecTimer.Start();
-    Preconditioner(z[0], r);
+    Preconditioner(z, r);
     PrecTimer.Stop();
 
     LinalgTimer.Start();
@@ -210,14 +210,14 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
     return cp;
   }
 
-  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
+  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &z, Field &w, int iter) {
 
     MatrixTimer.Start();
-    LinOp.Op(v[iter], z[0]);
+    LinOp.Op(v[iter], z);
     MatrixTimer.Stop();
 
     PrecTimer.Start();
-    Preconditioner(z[0], w);
+    Preconditioner(z, w);
     PrecTimer.Stop();
 
     LinalgTimer.Start();

From 99bc4cde565e2e8a77a77899b46f0f8929881f3e Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 16:38:34 +0100
Subject: [PATCH 037/130] Fix an implementation error in FGMRES

---
 lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
index 3e677a77..94193484 100644
--- a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -174,7 +174,7 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
     MatrixTimer.Stop();
 
     PrecTimer.Start();
-    Preconditioner(z, r);
+    Preconditioner(z, w);
     PrecTimer.Stop();
 
     LinalgTimer.Start();

From 26b3d441bb2fbcfe7a0720546071c206c7a4f26c Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 16:39:11 +0100
Subject: [PATCH 038/130] Check in forgotten FGMRES test with wilson Fermions

---
 tests/solver/Test_wilson_fgmres_prec.cc | 68 +++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 tests/solver/Test_wilson_fgmres_prec.cc

diff --git a/tests/solver/Test_wilson_fgmres_prec.cc b/tests/solver/Test_wilson_fgmres_prec.cc
new file mode 100644
index 00000000..80c9156d
--- /dev/null
+++ b/tests/solver/Test_wilson_fgmres_prec.cc
@@ -0,0 +1,68 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilson_gmres_unprec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeFermion src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  LatticeFermion result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
+
+  TrivialPrecon<LatticeFermion> simple;
+
+  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMRES(1.0e-8, 50, simple, 25);
+  FGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}

From 9031f0ed95ad83056689bec4f625695d41f6795f Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 16:40:54 +0100
Subject: [PATCH 039/130] Fix a filename in a file header

---
 tests/solver/Test_wilson_fgmres_prec.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/solver/Test_wilson_fgmres_prec.cc b/tests/solver/Test_wilson_fgmres_prec.cc
index 80c9156d..2bdee58f 100644
--- a/tests/solver/Test_wilson_fgmres_prec.cc
+++ b/tests/solver/Test_wilson_fgmres_prec.cc
@@ -2,7 +2,7 @@
 
 Grid physics library, www.github.com/paboyle/Grid
 
-Source file: ./tests/solver/Test_wilson_gmres_unprec.cc
+Source file: ./tests/solver/Test_wilson_fgmres_prec.cc
 
 Copyright (C) 2015
 

From 699d537cd60c84404081a17b8df30f44698e14c0 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 16:56:42 +0100
Subject: [PATCH 040/130] Add FGMRES test with staggered fermions

---
 tests/solver/Test_staggered_fgmres_prec.cc | 72 ++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 tests/solver/Test_staggered_fgmres_prec.cc

diff --git a/tests/solver/Test_staggered_fgmres_prec.cc b/tests/solver/Test_staggered_fgmres_prec.cc
new file mode 100644
index 00000000..0c8dbc67
--- /dev/null
+++ b/tests/solver/Test_staggered_fgmres_prec.cc
@@ -0,0 +1,72 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_staggered_fgmres_prec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
+  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
+  typename ImprovedStaggeredFermionR::ImplParams params;
+
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.1;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
+
+  TrivialPrecon<FermionField> simple;
+
+  FlexibleGeneralisedMinimalResidual<FermionField> FGMRES(1.0e-6, 50, simple, 25);
+  FGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}

From cd63052205edea9f6c8efc3a277a6ae2c7b2eec0 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 16:57:40 +0100
Subject: [PATCH 041/130] Remove everything preconditioner-related in GMRES
 code

---
 lib/algorithms/iterative/GeneralisedMinimalResidual.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index aa17c382..dbe82a58 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -62,7 +62,6 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
                           // filled in upon completion
 
   GridStopWatch MatrixTimer;
-  GridStopWatch PrecTimer;
   GridStopWatch LinalgTimer;
   GridStopWatch QrTimer;
   GridStopWatch CompSolutionTimer;
@@ -107,7 +106,6 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     std::cout << GridLogIterative << "GeneralisedMinimalResidual:   src " << ssq   << std::endl;
 
 
-    PrecTimer.Reset();
     MatrixTimer.Reset();
     LinalgTimer.Reset();
     QrTimer.Reset();
@@ -140,7 +138,6 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
 
         std::cout << GridLogMessage << "GeneralisedMinimalResidual Time breakdown" << std::endl;
         std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()       << std::endl;
-        std::cout << GridLogMessage << "\tPrecon "  << PrecTimer.Elapsed()         << std::endl;
         std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()       << std::endl;
         std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()       << std::endl;
         std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()           << std::endl;

From a7ae46b61e798a712a58c074925b16e39f1bbcd5 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 8 Nov 2017 16:58:20 +0100
Subject: [PATCH 042/130] Remove some comments

---
 .../iterative/GeneralisedMinimalResidual.h      | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index dbe82a58..27ab6caf 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -29,23 +29,6 @@ directory
 #ifndef GRID_GENERALISED_MINIMAL_RESIDUAL_H
 #define GRID_GENERALISED_MINIMAL_RESIDUAL_H
 
-// from Y. Saad - Iterative Methods for Sparse Linear Systems, PP 172
-// Compute r0 = b − Ax0 , β := ||r0||2 , and v1 := r0 /β
-// For j = 1, 2, ..., m Do:
-//   Compute wj := Avj
-//   For i = 1, ..., j Do:
-//     hij := (wj , vi)
-//     wj := wj − hij vi
-//   EndDo
-//   hj+1,j = ||wj||2 . If hj+1,j = 0 set m := j and go to HERE
-//   vj+1 = wj /hj+1,j
-// EndDo
-// Define the (m + 1) × m Hessenberg matrix H̄m = {hij}1≤i≤m+1,1≤j≤m. [HERE]
-// Compute ym the minimizer of ||βe1 − H̄m y||2 and xm = x0 + Vm ym.
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// want to solve Ax = b -> A = LinOp, psi = x, b = src
-
 namespace Grid {
 
 template<class Field>

From c63095345e2b8ba161a518fc22e3af1470619d25 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 9 Nov 2017 12:47:20 +0100
Subject: [PATCH 043/130] Remove some superfluous comments

---
 .../FlexibleGeneralisedMinimalResidual.h      | 23 +------------------
 .../iterative/GeneralisedMinimalResidual.h    |  4 +---
 2 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
index 94193484..ce82ed4d 100644
--- a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -29,30 +29,13 @@ directory
 #ifndef GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
 #define GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
 
-// from Y. Saad - Iterative Methods for Sparse Linear Systems, PP 172
-// Compute r0 = b − Ax0 , β := ||r0||2 , and v1 := r0 /β
-// For j = 1, 2, ..., m Do:
-//   Compute wj := Avj
-//   For i = 1, ..., j Do:
-//     hij := (wj , vi)
-//     wj := wj − hij vi
-//   EndDo
-//   hj+1,j = ||wj||2 . If hj+1,j = 0 set m := j and go to HERE
-//   vj+1 = wj /hj+1,j
-// EndDo
-// Define the (m + 1) × m Hessenberg matrix H̄m = {hij}1≤i≤m+1,1≤j≤m. [HERE]
-// Compute ym the minimizer of ||βe1 − H̄m y||2 and xm = x0 + Vm ym.
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// want to solve Ax = b -> A = LinOp, psi = x, b = src
-
 namespace Grid {
 
 template<class Field>
 class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
  public:
   bool ErrorOnNoConverge; // Throw an assert when FGMRES fails to converge,
-                          // defaults to True.
+                          // defaults to true
 
   RealD   Tolerance;
 
@@ -264,8 +247,6 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
       y[i] = y[i] / H(i, i);
     }
 
-    // TODO: Use axpys or similar for these
-    // TODO: Fix the condition
     if (true) {
       for (int i = 0; i <= iter; i++)
         psi = psi + v[i] * y[i];
@@ -280,5 +261,3 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
 };
 }
 #endif
-
-// I took the version with p->kind == left from the WMG code base here. TODO: recheck if we need left or right
diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 27ab6caf..0f7bf155 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -35,7 +35,7 @@ template<class Field>
 class GeneralisedMinimalResidual : public OperatorFunction<Field> {
  public:
   bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge,
-                          // defaults to True.
+                          // defaults to true
 
   RealD   Tolerance;
 
@@ -231,8 +231,6 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
       y[i] = y[i] / H(i, i);
     }
 
-    // TODO: Use axpys or similar for these
-    // TODO: Fix the condition
     if (true) {
       for (int i = 0; i <= iter; i++)
         psi = psi + v[i] * y[i];

From 8402ab6cf953fa8b00ddf52f319a4459a1521c71 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 9 Nov 2017 12:52:04 +0100
Subject: [PATCH 044/130] Some minor formatting improvements

---
 .../iterative/FlexibleGeneralisedMinimalResidual.h     | 10 +++++-----
 lib/algorithms/iterative/GeneralisedMinimalResidual.h  |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
index ce82ed4d..f190e386 100644
--- a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -59,11 +59,11 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
 
   LinearFunction<Field> &Preconditioner;
 
- FlexibleGeneralisedMinimalResidual(RealD   tol,
-                                    Integer maxit,
-                                    LinearFunction<Field> &Prec,
-                                    Integer restart_length,
-                                    bool    err_on_no_conv = true)
+  FlexibleGeneralisedMinimalResidual(RealD   tol,
+                                     Integer maxit,
+                                     LinearFunction<Field> &Prec,
+                                     Integer restart_length,
+                                     bool    err_on_no_conv = true)
       : Tolerance(tol)
       , MaxIterations(maxit)
       , RestartLength(restart_length)
diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 0f7bf155..76c9a8bf 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -79,7 +79,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     assert(std::isnan(guess) == 0);
 
     RealD cp;
-    RealD ssq    = norm2(src);
+    RealD ssq = norm2(src);
     RealD rsq = Tolerance * Tolerance * ssq;
 
     Field r(src._grid);

From c6cbe533ea9690aace4805a2cce6e8228ef5feb3 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 9 Nov 2017 17:12:54 +0100
Subject: [PATCH 045/130] Set everything up for the implementation of CAGMRES

The current implementation is the exact same code as normal GMRES. This commit
only sets up the "framework" for the implementation of CAGMRES, i.e., a test and
an include in the algorithms header file.
---
 lib/algorithms/Algorithms.h                   |   1 +
 ...cationAvoidingGeneralisedMinimalResidual.h | 247 ++++++++++++++++++
 tests/solver/Test_wilson_cagmres_unprec.cc    |  65 +++++
 3 files changed, 313 insertions(+)
 create mode 100644 lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
 create mode 100644 tests/solver/Test_wilson_cagmres_unprec.cc

diff --git a/lib/algorithms/Algorithms.h b/lib/algorithms/Algorithms.h
index 3368dce8..1517c0be 100644
--- a/lib/algorithms/Algorithms.h
+++ b/lib/algorithms/Algorithms.h
@@ -49,6 +49,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
 #include <Grid/algorithms/iterative/MinimalResidual.h>
 #include <Grid/algorithms/iterative/GeneralisedMinimalResidual.h>
+#include <Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 #include <Grid/algorithms/CoarsenedMatrix.h>
diff --git a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
new file mode 100644
index 00000000..2e574639
--- /dev/null
+++ b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
@@ -0,0 +1,247 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
+#define GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
+
+namespace Grid {
+
+template<class Field>
+class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
+ public:
+  bool ErrorOnNoConverge; // Throw an assert when CAGMRES fails to converge,
+                          // defaults to true
+
+  RealD   Tolerance;
+
+  Integer MaxIterations;
+  Integer RestartLength;
+  Integer IterationCount; // Number of iterations the CAGMRES took to finish,
+                          // filled in upon completion
+
+  GridStopWatch MatrixTimer;
+  GridStopWatch LinalgTimer;
+  GridStopWatch QrTimer;
+  GridStopWatch CompSolutionTimer;
+
+  Eigen::MatrixXcd H;
+
+  std::vector<std::complex<double>> y;
+  std::vector<std::complex<double>> gamma;
+  std::vector<std::complex<double>> c;
+  std::vector<std::complex<double>> s;
+
+ CommunicationAvoidingGeneralisedMinimalResidual(RealD   tol,
+                                                 Integer maxit,
+                                                 Integer restart_length,
+                                                 bool    err_on_no_conv = true)
+      : Tolerance(tol)
+      , MaxIterations(maxit)
+      , RestartLength(restart_length)
+      , ErrorOnNoConverge(err_on_no_conv)
+      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
+      , y(RestartLength + 1, 0.)
+      , gamma(RestartLength + 1, 0.)
+      , c(RestartLength + 1, 0.)
+      , s(RestartLength + 1, 0.) {};
+
+  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
+
+    psi.checkerboard = src.checkerboard;
+    conformable(psi, src);
+
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD cp;
+    RealD ssq = norm2(src);
+    RealD rsq = Tolerance * Tolerance * ssq;
+
+    Field r(src._grid);
+
+    std::cout << std::setprecision(4) << std::scientific << std::endl;
+    std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl;
+
+
+    MatrixTimer.Reset();
+    LinalgTimer.Reset();
+    QrTimer.Reset();
+    CompSolutionTimer.Reset();
+
+    GridStopWatch SolverTimer;
+    SolverTimer.Start();
+
+    IterationCount = 0;
+    for (int k=0; k<MaxIterations; k++) {
+
+      cp = outerLoopBody(LinOp, src, psi, rsq);
+
+      // Stopping condition
+      if (cp <= rsq) {
+
+        SolverTimer.Stop();
+
+        LinOp.Op(psi,r);
+        axpy(r,-1.0,src,r);
+
+        RealD srcnorm       = sqrt(ssq);
+        RealD resnorm       = sqrt(norm2(r));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount << std::endl;
+        std::cout << GridLogMessage << "\tComputed residual "                                << sqrt(cp / ssq)       << std::endl;
+        std::cout << GridLogMessage << "\tTrue residual "                                    << true_residual        << std::endl;
+        std::cout << GridLogMessage << "\tTarget "                                           << Tolerance            << std::endl;
+
+        std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual Time breakdown" << std::endl;
+        std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()       << std::endl;
+        std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()       << std::endl;
+        std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()       << std::endl;
+        std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()           << std::endl;
+        std::cout << GridLogMessage << "\tCompSol " << CompSolutionTimer.Elapsed() << std::endl;
+        return;
+      }
+    }
+
+    std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
+
+    if (ErrorOnNoConverge)
+      assert(0);
+  }
+
+  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
+
+    RealD cp = 0;
+
+    Field w(src._grid);
+    Field r(src._grid);
+
+    std::vector<Field> v(RestartLength + 1, src._grid);
+
+    MatrixTimer.Start();
+    LinOp.Op(psi, w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    r = src - w;
+
+    gamma[0] = sqrt(norm2(r));
+
+    v[0] = (1. / gamma[0]) * r;
+    LinalgTimer.Stop();
+
+    for (int i=0; i<RestartLength; i++) {
+
+      IterationCount++;
+
+      arnoldiStep(LinOp, v, w, i);
+
+      qrUpdate(i);
+
+      cp = std::norm(gamma[i+1]);
+
+      std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
+                << " residual " << cp << " target " << rsq << std::endl;
+
+      if ((i == RestartLength - 1) || (cp <= rsq)) {
+
+        computeSolution(v, psi, i);
+
+        return cp;
+      }
+    }
+
+    assert(0); // Never reached
+    return cp;
+  }
+
+  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
+
+    MatrixTimer.Start();
+    LinOp.Op(v[iter], w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    for (int i = 0; i <= iter; ++i) {
+      H(iter, i) = innerProduct(v[i], w);
+      w = w - H(iter, i) * v[i];
+    }
+
+    H(iter, iter + 1) = sqrt(norm2(w));
+    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
+    LinalgTimer.Stop();
+  }
+
+  void qrUpdate(int iter) {
+
+    QrTimer.Start();
+    for (int i = 0; i < iter ; ++i) {
+      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
+      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
+      H(iter, i + 1) = tmp;
+    }
+
+    // Compute new Givens Rotation
+    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
+    c[iter]     = H(iter, iter) / nu;
+    s[iter]     = H(iter, iter + 1) / nu;
+
+    // Apply new Givens rotation
+    H(iter, iter)     = nu;
+    H(iter, iter + 1) = 0.;
+
+    gamma[iter + 1] = -s[iter] * gamma[iter];
+    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
+    QrTimer.Stop();
+  }
+
+  void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
+
+    CompSolutionTimer.Start();
+    for (int i = iter; i >= 0; i--) {
+      y[i] = gamma[i];
+      for (int k = i + 1; k <= iter; k++)
+        y[i] = y[i] - H(k, i) * y[k];
+      y[i] = y[i] / H(i, i);
+    }
+
+    if (true) {
+      for (int i = 0; i <= iter; i++)
+        psi = psi + v[i] * y[i];
+    }
+    else {
+      psi = y[0] * v[0];
+      for (int i = 1; i <= iter; i++)
+        psi = psi + v[i] * y[i];
+    }
+    CompSolutionTimer.Stop();
+  }
+};
+}
+#endif
diff --git a/tests/solver/Test_wilson_cagmres_unprec.cc b/tests/solver/Test_wilson_cagmres_unprec.cc
new file mode 100644
index 00000000..067fc0c1
--- /dev/null
+++ b/tests/solver/Test_wilson_cagmres_unprec.cc
@@ -0,0 +1,65 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilson_cagmres_unprec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeFermion src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  LatticeFermion result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
+  CommunicationAvoidingGeneralisedMinimalResidual<LatticeFermion> CAGMRES(1.0e-8, 50, 25);
+  CAGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}

From d7743591ea1a6ab17c619f95982f3f9643c607fa Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 9 Nov 2017 17:22:58 +0100
Subject: [PATCH 046/130] Fix some minor formatting errors

---
 ...cationAvoidingGeneralisedMinimalResidual.h | 24 +++++++++----------
 .../FlexibleGeneralisedMinimalResidual.h      | 18 +++++++-------
 .../iterative/GeneralisedMinimalResidual.h    |  6 ++---
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
index 2e574639..dfa0da8e 100644
--- a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
@@ -56,10 +56,10 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
   std::vector<std::complex<double>> c;
   std::vector<std::complex<double>> s;
 
- CommunicationAvoidingGeneralisedMinimalResidual(RealD   tol,
-                                                 Integer maxit,
-                                                 Integer restart_length,
-                                                 bool    err_on_no_conv = true)
+  CommunicationAvoidingGeneralisedMinimalResidual(RealD   tol,
+                                                  Integer maxit,
+                                                  Integer restart_length,
+                                                  bool    err_on_no_conv = true)
       : Tolerance(tol)
       , MaxIterations(maxit)
       , RestartLength(restart_length)
@@ -115,16 +115,16 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
         RealD true_residual = resnorm / srcnorm;
 
         std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount << std::endl;
-        std::cout << GridLogMessage << "\tComputed residual "                                << sqrt(cp / ssq)       << std::endl;
-        std::cout << GridLogMessage << "\tTrue residual "                                    << true_residual        << std::endl;
-        std::cout << GridLogMessage << "\tTarget "                                           << Tolerance            << std::endl;
+        std::cout << GridLogMessage << "\tComputed residual "                                                     << sqrt(cp / ssq) << std::endl;
+        std::cout << GridLogMessage << "\tTrue residual "                                                         << true_residual  << std::endl;
+        std::cout << GridLogMessage << "\tTarget "                                                                << Tolerance      << std::endl;
 
         std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual Time breakdown" << std::endl;
-        std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()       << std::endl;
-        std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()       << std::endl;
-        std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()       << std::endl;
-        std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()           << std::endl;
-        std::cout << GridLogMessage << "\tCompSol " << CompSolutionTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()                            << std::endl;
+        std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()                            << std::endl;
+        std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()                            << std::endl;
+        std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()                                << std::endl;
+        std::cout << GridLogMessage << "\tCompSol " << CompSolutionTimer.Elapsed()                      << std::endl;
         return;
       }
     }
diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
index f190e386..f885b902 100644
--- a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -121,17 +121,17 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
         RealD true_residual = resnorm / srcnorm;
 
         std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual: Converged on iteration " << IterationCount << std::endl;
-        std::cout << GridLogMessage << "\tComputed residual "                                << sqrt(cp / ssq)       << std::endl;
-        std::cout << GridLogMessage << "\tTrue residual "                                    << true_residual        << std::endl;
-        std::cout << GridLogMessage << "\tTarget "                                           << Tolerance            << std::endl;
+        std::cout << GridLogMessage << "\tComputed residual "                                        << sqrt(cp / ssq) << std::endl;
+        std::cout << GridLogMessage << "\tTrue residual "                                            << true_residual  << std::endl;
+        std::cout << GridLogMessage << "\tTarget "                                                   << Tolerance      << std::endl;
 
         std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual Time breakdown" << std::endl;
-        std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()       << std::endl;
-        std::cout << GridLogMessage << "\tPrecon "  << PrecTimer.Elapsed()         << std::endl;
-        std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()       << std::endl;
-        std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()       << std::endl;
-        std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()           << std::endl;
-        std::cout << GridLogMessage << "\tCompSol " << CompSolutionTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()               << std::endl;
+        std::cout << GridLogMessage << "\tPrecon "  << PrecTimer.Elapsed()                 << std::endl;
+        std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()               << std::endl;
+        std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()               << std::endl;
+        std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()                   << std::endl;
+        std::cout << GridLogMessage << "\tCompSol " << CompSolutionTimer.Elapsed()         << std::endl;
         return;
       }
     }
diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 76c9a8bf..e426b6f3 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -115,9 +115,9 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
         RealD true_residual = resnorm / srcnorm;
 
         std::cout << GridLogMessage << "GeneralisedMinimalResidual: Converged on iteration " << IterationCount << std::endl;
-        std::cout << GridLogMessage << "\tComputed residual "                                << sqrt(cp / ssq)       << std::endl;
-        std::cout << GridLogMessage << "\tTrue residual "                                    << true_residual        << std::endl;
-        std::cout << GridLogMessage << "\tTarget "                                           << Tolerance            << std::endl;
+        std::cout << GridLogMessage << "\tComputed residual "                                << sqrt(cp / ssq) << std::endl;
+        std::cout << GridLogMessage << "\tTrue residual "                                    << true_residual  << std::endl;
+        std::cout << GridLogMessage << "\tTarget "                                           << Tolerance      << std::endl;
 
         std::cout << GridLogMessage << "GeneralisedMinimalResidual Time breakdown" << std::endl;
         std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()       << std::endl;

From a367835bf2bbbff1fc47c137499910a3d641f6b4 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 9 Nov 2017 17:30:41 +0100
Subject: [PATCH 047/130] Set everything up for the implementation of FCAGMRES

The current implementation is the exact same code as normal FGMRES. This commit
only sets up the "framework" for the implementation of FCAGMRES, i.e., a test
and an include in the algorithms header file.
---
 lib/algorithms/Algorithms.h                   |   1 +
 ...cationAvoidingGeneralisedMinimalResidual.h | 263 ++++++++++++++++++
 tests/solver/Test_wilson_fcagmres_prec.cc     |  68 +++++
 3 files changed, 332 insertions(+)
 create mode 100644 lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
 create mode 100644 tests/solver/Test_wilson_fcagmres_prec.cc

diff --git a/lib/algorithms/Algorithms.h b/lib/algorithms/Algorithms.h
index 1517c0be..b541a3be 100644
--- a/lib/algorithms/Algorithms.h
+++ b/lib/algorithms/Algorithms.h
@@ -51,6 +51,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/iterative/GeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h>
+#include <Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 #include <Grid/algorithms/CoarsenedMatrix.h>
 #include <Grid/algorithms/FFT.h>
diff --git a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
new file mode 100644
index 00000000..96fc4a20
--- /dev/null
+++ b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
@@ -0,0 +1,263 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
+#define GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
+
+namespace Grid {
+
+template<class Field>
+class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
+ public:
+  bool ErrorOnNoConverge; // Throw an assert when FCAGMRES fails to converge,
+                          // defaults to true
+
+  RealD   Tolerance;
+
+  Integer MaxIterations;
+  Integer RestartLength;
+  Integer IterationCount; // Number of iterations the FCAGMRES took to finish,
+                          // filled in upon completion
+
+  GridStopWatch MatrixTimer;
+  GridStopWatch PrecTimer;
+  GridStopWatch LinalgTimer;
+  GridStopWatch QrTimer;
+  GridStopWatch CompSolutionTimer;
+
+  Eigen::MatrixXcd H;
+
+  std::vector<std::complex<double>> y;
+  std::vector<std::complex<double>> gamma;
+  std::vector<std::complex<double>> c;
+  std::vector<std::complex<double>> s;
+
+  LinearFunction<Field> &Preconditioner;
+
+  FlexibleCommunicationAvoidingGeneralisedMinimalResidual(RealD   tol,
+                                                          Integer maxit,
+                                                          LinearFunction<Field> &Prec,
+                                                          Integer restart_length,
+                                                          bool    err_on_no_conv = true)
+      : Tolerance(tol)
+      , MaxIterations(maxit)
+      , RestartLength(restart_length)
+      , ErrorOnNoConverge(err_on_no_conv)
+      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
+      , y(RestartLength + 1, 0.)
+      , gamma(RestartLength + 1, 0.)
+      , c(RestartLength + 1, 0.)
+      , s(RestartLength + 1, 0.)
+      , Preconditioner(Prec) {};
+
+  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
+
+    psi.checkerboard = src.checkerboard;
+    conformable(psi, src);
+
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD cp;
+    RealD ssq = norm2(src);
+    RealD rsq = Tolerance * Tolerance * ssq;
+
+    Field r(src._grid);
+
+    std::cout << std::setprecision(4) << std::scientific << std::endl;
+    std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl;
+
+
+    PrecTimer.Reset();
+    MatrixTimer.Reset();
+    LinalgTimer.Reset();
+    QrTimer.Reset();
+    CompSolutionTimer.Reset();
+
+    GridStopWatch SolverTimer;
+    SolverTimer.Start();
+
+    IterationCount = 0;
+    for (int k=0; k<MaxIterations; k++) {
+
+      cp = outerLoopBody(LinOp, src, psi, rsq);
+
+      // Stopping condition
+      if (cp <= rsq) {
+
+        SolverTimer.Stop();
+
+        LinOp.Op(psi,r);
+        axpy(r,-1.0,src,r);
+
+        RealD srcnorm       = sqrt(ssq);
+        RealD resnorm       = sqrt(norm2(r));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount << std::endl;
+        std::cout << GridLogMessage << "\tComputed residual "                                                             << sqrt(cp / ssq) << std::endl;
+        std::cout << GridLogMessage << "\tTrue residual "                                                                 << true_residual  << std::endl;
+        std::cout << GridLogMessage << "\tTarget "                                                                        << Tolerance      << std::endl;
+
+        std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual Time breakdown" << std::endl;
+        std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()                                    << std::endl;
+        std::cout << GridLogMessage << "\tPrecon "  << PrecTimer.Elapsed()                                      << std::endl;
+        std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()                                    << std::endl;
+        std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()                                    << std::endl;
+        std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()                                        << std::endl;
+        std::cout << GridLogMessage << "\tCompSol " << CompSolutionTimer.Elapsed()                              << std::endl;
+        return;
+      }
+    }
+
+    std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
+
+    if (ErrorOnNoConverge)
+      assert(0);
+  }
+
+  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
+
+    RealD cp = 0;
+
+    Field w(src._grid);
+    Field r(src._grid);
+    Field z(src._grid);
+
+    std::vector<Field> v(RestartLength + 1, src._grid);
+
+    MatrixTimer.Start();
+    LinOp.Op(psi, z);
+    MatrixTimer.Stop();
+
+    PrecTimer.Start();
+    Preconditioner(z, w);
+    PrecTimer.Stop();
+
+    LinalgTimer.Start();
+    r = src - w;
+
+    gamma[0] = sqrt(norm2(r));
+
+    v[0] = (1. / gamma[0]) * r;
+    LinalgTimer.Stop();
+
+    for (int i=0; i<RestartLength; i++) {
+
+      IterationCount++;
+
+      arnoldiStep(LinOp, v, z, w, i);
+
+      qrUpdate(i);
+
+      cp = std::norm(gamma[i+1]);
+
+      std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
+                << " residual " << cp << " target " << rsq << std::endl;
+
+      if ((i == RestartLength - 1) || (cp <= rsq)) {
+
+        computeSolution(v, psi, i);
+
+        return cp;
+      }
+    }
+
+    assert(0); // Never reached
+    return cp;
+  }
+
+  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &z, Field &w, int iter) {
+
+    MatrixTimer.Start();
+    LinOp.Op(v[iter], z);
+    MatrixTimer.Stop();
+
+    PrecTimer.Start();
+    Preconditioner(z, w);
+    PrecTimer.Stop();
+
+    LinalgTimer.Start();
+    for (int i = 0; i <= iter; ++i) {
+      H(iter, i) = innerProduct(v[i], w);
+      w = w - H(iter, i) * v[i];
+    }
+
+    H(iter, iter + 1) = sqrt(norm2(w));
+    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
+    LinalgTimer.Stop();
+  }
+
+  void qrUpdate(int iter) {
+
+    QrTimer.Start();
+    for (int i = 0; i < iter ; ++i) {
+      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
+      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
+      H(iter, i + 1) = tmp;
+    }
+
+    // Compute new Givens Rotation
+    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
+    c[iter]     = H(iter, iter) / nu;
+    s[iter]     = H(iter, iter + 1) / nu;
+
+    // Apply new Givens rotation
+    H(iter, iter)     = nu;
+    H(iter, iter + 1) = 0.;
+
+    gamma[iter + 1] = -s[iter] * gamma[iter];
+    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
+    QrTimer.Stop();
+  }
+
+  void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
+
+    CompSolutionTimer.Start();
+    for (int i = iter; i >= 0; i--) {
+      y[i] = gamma[i];
+      for (int k = i + 1; k <= iter; k++)
+        y[i] = y[i] - H(k, i) * y[k];
+      y[i] = y[i] / H(i, i);
+    }
+
+    if (true) {
+      for (int i = 0; i <= iter; i++)
+        psi = psi + v[i] * y[i];
+    }
+    else {
+      psi = y[0] * v[0];
+      for (int i = 1; i <= iter; i++)
+        psi = psi + v[i] * y[i];
+    }
+    CompSolutionTimer.Stop();
+  }
+};
+}
+#endif
diff --git a/tests/solver/Test_wilson_fcagmres_prec.cc b/tests/solver/Test_wilson_fcagmres_prec.cc
new file mode 100644
index 00000000..59477f95
--- /dev/null
+++ b/tests/solver/Test_wilson_fcagmres_prec.cc
@@ -0,0 +1,68 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilson_fcagmres_prec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeFermion src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  LatticeFermion result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
+
+  TrivialPrecon<LatticeFermion> simple;
+
+  FlexibleCommunicationAvoidingGeneralisedMinimalResidual<LatticeFermion> FCAGMRES(1.0e-8, 50, simple, 25);
+  FCAGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}

From fa43206c790cfa0ff87fba2f45ece89b14d7d22b Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Fri, 10 Nov 2017 13:48:38 +0100
Subject: [PATCH 048/130] Remove some empty lines

---
 .../iterative/CommunicationAvoidingGeneralisedMinimalResidual.h  | 1 -
 .../FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h    | 1 -
 lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h    | 1 -
 lib/algorithms/iterative/GeneralisedMinimalResidual.h            | 1 -
 4 files changed, 4 deletions(-)

diff --git a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
index dfa0da8e..84c00a99 100644
--- a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
@@ -88,7 +88,6 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
     std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
     std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl;
 
-
     MatrixTimer.Reset();
     LinalgTimer.Reset();
     QrTimer.Reset();
diff --git a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
index 96fc4a20..b9a4b475 100644
--- a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
@@ -93,7 +93,6 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
     std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
     std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl;
 
-
     PrecTimer.Reset();
     MatrixTimer.Reset();
     LinalgTimer.Reset();
diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
index f885b902..c574d86f 100644
--- a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -93,7 +93,6 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
     std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: guess " << guess << std::endl;
     std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual:   src " << ssq   << std::endl;
 
-
     PrecTimer.Reset();
     MatrixTimer.Reset();
     LinalgTimer.Reset();
diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index e426b6f3..a6e7aadc 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -88,7 +88,6 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     std::cout << GridLogIterative << "GeneralisedMinimalResidual: guess " << guess << std::endl;
     std::cout << GridLogIterative << "GeneralisedMinimalResidual:   src " << ssq   << std::endl;
 
-
     MatrixTimer.Reset();
     LinalgTimer.Reset();
     QrTimer.Reset();

From 649b8c9acaac8a58683b63574c0c3a7ffd5b20cc Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Fri, 24 Nov 2017 10:43:34 +0100
Subject: [PATCH 049/130] Save current state

---
 tests/solver/Test_wilson_ddalphaamg.cc | 100 +++++++++++++++++--------
 1 file changed, 68 insertions(+), 32 deletions(-)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index 7269bf64..85617a05 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -699,42 +699,43 @@ int main (int argc, char ** argv)
   params.steps = 1;
 
   const int Ls=params.Ls;
+  const int ds=params.domainsize;
 
-  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
-  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
 
   ///////////////////////////////////////////////////
   // Construct a coarsened grid; utility for this?
   ///////////////////////////////////////////////////
-  std::vector<int> block ({4,4,4,4});
-  const int nbasis= 32;
+  std::vector<int> blockSize({2,2,2,2});
+  const int nbasis= 16;
 
-  std::vector<int> clatt = GridDefaultLatt();
-  for(int d=0;d<clatt.size();d++){
-    clatt[d] = clatt[d]/block[d];
+  std::vector<int> cLattSize = GridDefaultLatt();
+  for(int d=0;d<cLattSize.size();d++){
+    cLattSize[d] = cLattSize[d]/blockSize[d];
   }
-  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
+  GridCartesian *CGrid =  SpaceTimeGrid::makeFourDimGrid(cLattSize, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
 
   std::vector<int> seedsFine({1,2,3,4});
   std::vector<int> seedsCoarse({5,6,7,8});
 
-  GridParallelRNG          pRNGFine(UGrid);      pRNGFine.SeedFixedIntegers(seedsFine);
-  GridParallelRNG          pRNGCoarse(Coarse4d); pRNGCoarse.SeedFixedIntegers(seedsCoarse);
+  GridParallelRNG pRNGFine(FGrid);   pRNGFine.SeedFixedIntegers(seedsFine);
+  GridParallelRNG pRNGCoarse(CGrid); pRNGCoarse.SeedFixedIntegers(seedsCoarse);
 
   Gamma g5(Gamma::Algebra::Gamma5);
 
-  LatticeFermion    src(UGrid); gaussian(pRNGFine,src);// src=src+g5*src;
-  LatticeFermion result(UGrid); result=zero;
-  LatticeFermion    ref(UGrid); ref=zero;
-  LatticeFermion    tmp(UGrid);
-  LatticeFermion    err(UGrid);
-  LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNGFine,Umu);
-  LatticeGaugeField UmuDD(UGrid); 
-  LatticeColourMatrix U(UGrid);
-  LatticeColourMatrix zz(UGrid);
+  LatticeFermion    src(FGrid); gaussian(pRNGFine,src);// src=src+g5*src;
+  LatticeFermion result(FGrid); result=zero;
+  LatticeFermion    ref(FGrid); ref=zero;
+  LatticeFermion    tmp(FGrid);
+  LatticeFermion    err(FGrid);
+  LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(pRNGFine,Umu);
+  LatticeGaugeField UmuDD(FGrid);
+  LatticeColourMatrix U(FGrid);
+  LatticeColourMatrix zz(FGrid);
 
-  if ( params.domaindecompose ) { 
-    Lattice<iScalar<vInteger> > coor(UGrid);
+  if ( params.domaindecompose ) {
+    Lattice<iScalar<vInteger> > coor(FGrid);
     zz=zero;
     for(int mu=0;mu<Nd;mu++){
       LatticeCoordinate(coor,mu);
@@ -747,10 +748,9 @@ int main (int argc, char ** argv)
   }
 
   RealD mass=params.mq;
-  RealD M5=1.8;
 
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Hello "<< std::endl;
+  std::cout<<GridLogMessage << "Params: "<< std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 
   std::cout << params << std::endl;
@@ -759,8 +759,8 @@ int main (int argc, char ** argv)
   std::cout<<GridLogMessage << "Building the wilson operator" <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
   
-  WilsonFermionR Dw(Umu,*UGrid,*UrbGrid,mass);
-  WilsonFermionR DwDD(UmuDD,*UGrid,*UrbGrid,mass);
+  WilsonFermionR Dw(Umu,*FGrid,*FrbGrid,mass);
+  WilsonFermionR DwDD(UmuDD,*FGrid,*FrbGrid,mass);
 
   typedef Aggregation<vSpinColourVector,vTComplex,nbasis>              Subspace;
   typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>          CoarseOperator;
@@ -770,37 +770,73 @@ int main (int argc, char ** argv)
   std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 
-  Subspace Aggregates(Coarse4d,UGrid);
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
+  Subspace Aggregates(CGrid,FGrid,0);
   assert ( (nbasis & 0x1)==0);
   int nb=nbasis/2;
   std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
 
-  Aggregates.CreateSubspaceRandom(pRNGFine);
+  Aggregates.CreateSubspaceRandom(pRNGFine); // creates subspace randomly and orthogonalizes it
 
   for(int n=0;n<nb;n++){
-    G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
+    Aggregates.subspace[n+nb] = g5 * Aggregates.subspace[n]; // multiply with g5 normally instead of G5R5
     std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl;
   }
   for(int n=0;n<nbasis;n++){
     std::cout<<GridLogMessage << "vec["<<n<<"] = "<<norm2(Aggregates.subspace[n])  <<std::endl;
   }
 
+  result=zero;
+
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
   std::cout<<GridLogMessage << "Building coarse representation of Dirac operator" <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 
-  CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LDOp(*Coarse4d);
-  // LDOp.CoarsenOperator(UGrid,Dw,Aggregates); // problem with this line
+  Gamma5HermitianLinearOperator<WilsonFermionR,LatticeFermion> Blah(Dw);
+  Gamma5HermitianLinearOperator<WilsonFermionR,LatticeFermion> BlahDD(DwDD);
+  CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LDOp(*CGrid);
+  LDOp.CoarsenOperator(FGrid,Blah,Aggregates); // problem with this line since it enforces hermiticity
 
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
   std::cout<<GridLogMessage << "Testing some coarse space solvers  " <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 
-  CoarseVector c_src (Coarse4d);
-  CoarseVector c_res (Coarse4d);
+  CoarseVector c_src (CGrid);
+  CoarseVector c_res (CGrid);
   gaussian(pRNGCoarse,c_src);
   c_res=zero;
 
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Solving posdef-CG on coarse space "<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+
+  // MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
+  // ConjugateGradient<CoarseVector> CG(1.0e-6,100000);
+  // //  CG(PosdefLdop,c_src,c_res);
+
+  // //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  // //  std::cout<<GridLogMessage << "Solving indef-MCR on coarse space "<< std::endl;
+  // //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  // //  HermitianLinearOperator<CoarseOperator,CoarseVector> HermIndefLdop(LDOp);
+  // //  ConjugateResidual<CoarseVector> MCR(1.0e-6,100000);
+  // //MCR(HermIndefLdop,c_src,c_res);
+
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage << "Building deflation preconditioner "<< std::endl;
+  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+
+  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,WilsonFermionR> Precon  (Aggregates, LDOp,
+                                                                                           Blah,Dw,
+                                                                                           BlahDD,DwDD);
+
+  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,WilsonFermionR> PreconDD(Aggregates, LDOp,
+                                                                                           Blah,Dw,
+                                                                                           BlahDD,DwDD);
+  // MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
+  //                         FineOperator &Fine,Matrix &FineMatrix,
+  //                         FineOperator &Smooth,Matrix &SmootherMatrix)
+  TrivialPrecon<LatticeFermion> simple;
+
   Grid_finalize();
 }
 #endif

From f260af546e9b0b65a500879de02d98518a3a7ce7 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 28 Nov 2017 15:03:02 +0100
Subject: [PATCH 050/130] Save current state

---
 tests/solver/Test_wilson_ddalphaamg.cc | 57 +++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index 85617a05..265c83d3 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -26,13 +26,54 @@ Author: Daniel Richtmann <daniel.richtmann@ur.de>
     *************************************************************************************/
     /*  END LEGAL */
 #include <Grid/Grid.h>
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
+// #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 //#include <algorithms/iterative/PrecConjugateResidual.h>
 
 using namespace std;
 using namespace Grid;
 using namespace Grid::QCD;
 
+template<class Field>
+class TestVectorAnalyzer {
+public:
+  void operator()(LinearOperatorBase<Field> &Linop, std::vector<Field> const & vectors)
+  {
+    std::vector<Field> tmp(4, vectors[0]._grid); // bit hacky?
+    Gamma g5(Gamma::Algebra::Gamma5);
+
+    std::cout << GridLogMessage << "Test vector analysis:" << std::endl;
+
+    for (auto i = 0; i < vectors.size(); ++i) {
+
+      Linop.Op(vectors[i], tmp[3]); // apply_operator_PRECISION( l->vbuf_PRECISION[3], test_vectors[i], &(l->p_PRECISION), l, no_threading ); // output, input
+
+      tmp[0] = g5 * tmp[3]; // is this the same as coarse_gamma5_PRECISION(tmp[0], tmp[3]) in WMG codebase???
+
+      // // use either these two
+      // auto lambda = innerProduct(vectors[i], l->vbuf_PRECISION[0]);
+      // lambda = lambda / innerProduct( vectors[i], vectors[i]);
+
+      // or this
+      auto lambda = innerProduct(vectors[i], tmp[0]) / innerProduct(vectors[i], vectors[i]);
+
+      tmp[1] = tmp[0] - lambda * vectors[i]; // vector_PRECISION_saxpy(tmp[1], tmp[0], vectors[i], -lambda);
+
+      // auto mu = sqrt(norm2(tmp[1]) / norm2(vectors[i])); // mu = global_norm_PRECISION( l->vbuf_PRECISION[1], 0, l->inner_vector_size, l, no_threading )/global_norm_PRECISION( test_vectors[i], 0, l->inner_vector_size, l, no_threading );
+
+      // RealD mu = sqrt(norm2(tmp[1]));
+      // mu = mu / sqrt(norm2(vectors[i])); // mu = global_norm_PRECISION( l->vbuf_PRECISION[1], 0, l->inner_vector_size, l, no_threading )/global_norm_PRECISION( test_vectors[i], 0, l->inner_vector_size, l, no_threading );
+
+      RealD mu = norm2(tmp[1]);
+      mu = mu / norm2(vectors[i]); // mu = global_norm_PRECISION( l->vbuf_PRECISION[1], 0, l->inner_vector_size, l, no_threading )/global_norm_PRECISION( test_vectors[i], 0, l->inner_vector_size, l, no_threading );
+      mu = std::sqrt(mu);
+
+      std::cout << GridLogMessage << std::setprecision(2) << "vector " << i << ": "
+                << "singular value: " << lambda << " singular vector precision: " << mu << std::endl; // printf0("singular value: %+lf%+lfi, singular vector precision: %le\n", (double)creal(lambda), (double)cimag(lambda), (double)mu );
+    }
+
+  }
+};
+
 class myclass: Serializable {
 public:
 
@@ -770,13 +811,22 @@ int main (int argc, char ** argv)
   std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 
+  // • TODO: need some way to run the smoother on the "test vectors" for a few
+  //   times before constructing the subspace from them
+  // • Maybe an application for an mrhs (true mrhs, no block) smoother?
+  // • In WMG, the vectors are normalized but not orthogonalized, but here they
+  //   are constructed randomly and then orthogonalized (rather orthonormalized) against each other
   MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
   Subspace Aggregates(CGrid,FGrid,0);
-  assert ( (nbasis & 0x1)==0);
+  assert ((nbasis & 0x1)==0);
   int nb=nbasis/2;
   std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
 
   Aggregates.CreateSubspaceRandom(pRNGFine); // creates subspace randomly and orthogonalizes it
+  auto testVectorAnalyzer = TestVectorAnalyzer<LatticeFermion>{};
+
+  // tva(HermOp, Aggregates.subspace);
+  testVectorAnalyzer(HermOp, Aggregates.subspace);
 
   for(int n=0;n<nb;n++){
     Aggregates.subspace[n+nb] = g5 * Aggregates.subspace[n]; // multiply with g5 normally instead of G5R5
@@ -786,6 +836,9 @@ int main (int argc, char ** argv)
     std::cout<<GridLogMessage << "vec["<<n<<"] = "<<norm2(Aggregates.subspace[n])  <<std::endl;
   }
 
+  // tva(HermOp, Aggregates.subspace);
+  testVectorAnalyzer(HermOp, Aggregates.subspace);
+
   result=zero;
 
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;

From 4e965c168ef233ec675463b41c06bfd1f8dffd38 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 29 Nov 2017 15:04:32 +0100
Subject: [PATCH 051/130] Implement analogon to test vector analysis in WMG
 codebase

---
 tests/solver/Test_wilson_ddalphaamg.cc | 27 +++++++++-----------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index 265c83d3..eaaca2bf 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -38,6 +38,9 @@ class TestVectorAnalyzer {
 public:
   void operator()(LinearOperatorBase<Field> &Linop, std::vector<Field> const & vectors)
   {
+    // this function corresponds to testvector_analysis_PRECISION from the
+    // DD-αAMG codebase
+
     std::vector<Field> tmp(4, vectors[0]._grid); // bit hacky?
     Gamma g5(Gamma::Algebra::Gamma5);
 
@@ -45,32 +48,20 @@ public:
 
     for (auto i = 0; i < vectors.size(); ++i) {
 
-      Linop.Op(vectors[i], tmp[3]); // apply_operator_PRECISION( l->vbuf_PRECISION[3], test_vectors[i], &(l->p_PRECISION), l, no_threading ); // output, input
+      Linop.Op(vectors[i], tmp[3]);
 
-      tmp[0] = g5 * tmp[3]; // is this the same as coarse_gamma5_PRECISION(tmp[0], tmp[3]) in WMG codebase???
+      tmp[0] = g5 * tmp[3]; // is this the same as coarse_gamma5_PRECISION?
 
-      // // use either these two
-      // auto lambda = innerProduct(vectors[i], l->vbuf_PRECISION[0]);
-      // lambda = lambda / innerProduct( vectors[i], vectors[i]);
-
-      // or this
       auto lambda = innerProduct(vectors[i], tmp[0]) / innerProduct(vectors[i], vectors[i]);
 
-      tmp[1] = tmp[0] - lambda * vectors[i]; // vector_PRECISION_saxpy(tmp[1], tmp[0], vectors[i], -lambda);
+      tmp[1] = tmp[0] - lambda * vectors[i];
 
-      // auto mu = sqrt(norm2(tmp[1]) / norm2(vectors[i])); // mu = global_norm_PRECISION( l->vbuf_PRECISION[1], 0, l->inner_vector_size, l, no_threading )/global_norm_PRECISION( test_vectors[i], 0, l->inner_vector_size, l, no_threading );
-
-      // RealD mu = sqrt(norm2(tmp[1]));
-      // mu = mu / sqrt(norm2(vectors[i])); // mu = global_norm_PRECISION( l->vbuf_PRECISION[1], 0, l->inner_vector_size, l, no_threading )/global_norm_PRECISION( test_vectors[i], 0, l->inner_vector_size, l, no_threading );
-
-      RealD mu = norm2(tmp[1]);
-      mu = mu / norm2(vectors[i]); // mu = global_norm_PRECISION( l->vbuf_PRECISION[1], 0, l->inner_vector_size, l, no_threading )/global_norm_PRECISION( test_vectors[i], 0, l->inner_vector_size, l, no_threading );
-      mu = std::sqrt(mu);
+      auto mu = ::sqrt(norm2(tmp[1]) / norm2(vectors[i]));
 
       std::cout << GridLogMessage << std::setprecision(2) << "vector " << i << ": "
-                << "singular value: " << lambda << " singular vector precision: " << mu << std::endl; // printf0("singular value: %+lf%+lfi, singular vector precision: %le\n", (double)creal(lambda), (double)cimag(lambda), (double)mu );
+                << "singular value: " << lambda
+                << " singular vector precision: " << mu << std::endl;
     }
-
   }
 };
 

From df152648d67b30e360291ddf22deba7fbdd4f35d Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 6 Dec 2017 18:00:58 +0100
Subject: [PATCH 052/130] Fix error in MR code when compiling for single
 precision

---
 lib/algorithms/iterative/MinimalResidual.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index 44e7dd40..b2651285 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -54,7 +54,7 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
     conformable(psi, src);
 
     Complex a, c;
-    RealD   d;
+    Real    d;
 
     Field Mr(src);
     Field r(src);

From 39558cce52262e52afd048f25c286bf2aa65da99 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Fri, 22 Dec 2017 13:07:56 +0100
Subject: [PATCH 053/130] Multiply TVs in Wilson MG with G5 instead of G5R5

---
 tests/solver/Test_wilson_ddalphaamg.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index eaaca2bf..7ebfb678 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -820,7 +820,7 @@ int main (int argc, char ** argv)
   testVectorAnalyzer(HermOp, Aggregates.subspace);
 
   for(int n=0;n<nb;n++){
-    Aggregates.subspace[n+nb] = g5 * Aggregates.subspace[n]; // multiply with g5 normally instead of G5R5
+    Aggregates.subspace[n+nb] = g5 * Aggregates.subspace[n]; // multiply with g5 normally instead of G5R5 since this specific to DWF
     std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl;
   }
   for(int n=0;n<nbasis;n++){

From 6cf635d61c4ec9f7b0017707a7d73b7f617b5e41 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Fri, 22 Dec 2017 13:20:09 +0100
Subject: [PATCH 054/130] Remove some old code in Wilson MG

---
 tests/solver/Test_wilson_ddalphaamg.cc | 220 -------------------------
 1 file changed, 220 deletions(-)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index 7ebfb678..0f643e47 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -498,225 +498,6 @@ public:
 
 };
 
-#if 0
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
-
-  const int Ls=params.Ls;
-
-  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
-  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
-
-  ///////////////////////////////////////////////////
-  // Construct a coarsened grid; utility for this?
-  ///////////////////////////////////////////////////
-  std::vector<int> block ({2,2,2,2});
-  const int nbasis= 32;
-
-  std::vector<int> clatt = GridDefaultLatt();
-  for(int d=0;d<clatt.size();d++){
-    clatt[d] = clatt[d]/block[d];
-  }
-  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
-
-  std::vector<int> seeds4({1,2,3,4});
-  std::vector<int> seeds5({5,6,7,8});
-  std::vector<int> cseeds({5,6,7,8});
-  GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5);
-  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
-  GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
-
-  Gamma g5(Gamma::Algebra::Gamma5);
-
-  LatticeFermion    src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
-  LatticeFermion result(FGrid); result=zero;
-  LatticeFermion    ref(FGrid); ref=zero;
-  LatticeFermion    tmp(FGrid);
-  LatticeFermion    err(FGrid);
-  LatticeGaugeField Umu(UGrid); 
-  LatticeGaugeField UmuDD(UGrid); 
-  LatticeColourMatrix U(UGrid);
-  LatticeColourMatrix zz(UGrid);
-
-  FieldMetaData header;
-  std::string file("./ckpoint_lat.4000");
-  NerscIO::readConfiguration(Umu,header,file);
-
-
-  if ( params.domaindecompose ) { 
-    Lattice<iScalar<vInteger> > coor(UGrid);
-    zz=zero;
-    for(int mu=0;mu<Nd;mu++){
-      LatticeCoordinate(coor,mu);
-      U = PeekIndex<LorentzIndex>(Umu,mu);
-      U = where(mod(coor,params.domainsize)==(Integer)0,zz,U);
-      PokeIndex<LorentzIndex>(UmuDD,U,mu);
-    }
-  } else { 
-    UmuDD = Umu;
-  }
-  //  SU3::ColdConfiguration(RNG4,Umu);
-  //  SU3::TepidConfiguration(RNG4,Umu);
-  //  SU3::HotConfiguration(RNG4,Umu);
-  //  Umu=zero;
-
-  RealD mass=params.mq;
-  RealD M5=1.8;
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-  DomainWallFermionR DdwfDD(UmuDD,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-
-  typedef Aggregation<vSpinColourVector,vTComplex,nbasis>              Subspace;
-  typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>          CoarseOperator;
-  typedef CoarseOperator::CoarseVector                                 CoarseVector;
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
-  Subspace Aggregates(Coarse5d,FGrid);
-  //  Aggregates.CreateSubspace(RNG5,HermDefOp,nbasis);
-  assert ( (nbasis & 0x1)==0);
-  int nb=nbasis/2;
-  std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
-  //  Aggregates.CreateSubspace(RNG5,HermDefOp,nb);
-  Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb);
-  for(int n=0;n<nb;n++){
-    G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
-    std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl;
-  }
-  for(int n=0;n<nbasis;n++){
-    std::cout<<GridLogMessage << "vec["<<n<<"] = "<<norm2(Aggregates.subspace[n])  <<std::endl;
-  }
-
-//  for(int i=0;i<nbasis;i++){
-//    result =     Aggregates.subspace[i];
-//    Aggregates.subspace[i]=result+g5*result;
-//  }
-  result=zero;
-  
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
-  Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOpDD(DdwfDD);
-  CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LDOp(*Coarse5d);
-  LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Testing some coarse space solvers  " <<std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  CoarseVector c_src (Coarse5d);
-  CoarseVector c_res (Coarse5d);
-  gaussian(CRNG,c_src);
-  c_res=zero;
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Solving posdef-CG on coarse space "<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
-  ConjugateGradient<CoarseVector> CG(1.0e-6,100000);
-  //  CG(PosdefLdop,c_src,c_res);
-
-  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  //  std::cout<<GridLogMessage << "Solving indef-MCR on coarse space "<< std::endl;
-  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  //  HermitianLinearOperator<CoarseOperator,CoarseVector> HermIndefLdop(LDOp);
-  //  ConjugateResidual<CoarseVector> MCR(1.0e-6,100000);
-  //MCR(HermIndefLdop,c_src,c_res);
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Building deflation preconditioner "<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-
-  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,DomainWallFermionR> Precon  (Aggregates, LDOp,
-											   HermIndefOp,Ddwf,
-											   HermIndefOp,Ddwf);
-
-  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,DomainWallFermionR> PreconDD(Aggregates, LDOp,
-											   HermIndefOp,Ddwf,
-											   HermIndefOpDD,DdwfDD);
-  TrivialPrecon<LatticeFermion> simple;
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Testing smoother efficacy"<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  //  Precon.SmootherTest(src);
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Testing DD smoother efficacy"<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  //  PreconDD.SmootherTest(src);
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Testing SAP smoother efficacy"<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  //  PreconDD.SAP(src,result);
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Unprec CG "<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-
-  //  TrivialPrecon<LatticeFermion> simple;
-  //  ConjugateGradient<LatticeFermion> fCG(1.0e-8,100000);
-  //  fCG(HermDefOp,src,result);
-  //  exit(0);
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Testing GCR on indef matrix "<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  //  PrecGeneralisedConjugateResidual<LatticeFermion> UPGCR(1.0e-8,100000,simple,8,128);
-  //  UPGCR(HermIndefOp,src,result);
-
-  
-  /// Get themax eval
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage <<" Applying power method to find spectral range      "<<std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  Precon.PowerMethod(src);
-
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Building a two level DDPGCR "<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  //  PrecGeneralisedConjugateResidual<LatticeFermion> PGCRDD(1.0e-8,100000,PreconDD,8,128);
-  //  result=zero;
-  //  std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
-  //  PGCRDD(HermIndefOp,src,result);
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Building a two level PGCR "<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  PrecGeneralisedConjugateResidual<LatticeFermion> PGCR(1.0e-8,100000,Precon,8,8);
-  std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
-  result=zero;
-  PGCR(HermIndefOp,src,result);
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Red Black Prec CG "<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
-  ConjugateGradient<LatticeFermion> pCG(1.0e-8,10000);
-
-  LatticeFermion    src_o(FrbGrid);
-  LatticeFermion result_o(FrbGrid);
-  pickCheckerboard(Odd,src_o,src);
-  result_o=zero;
-
-  pCG(HermOpEO,src_o,result_o);
-
-
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Done "<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  Grid_finalize();
-}
-
-#else
 int main (int argc, char ** argv)
 {
   Grid_init(&argc,&argv);
@@ -883,4 +664,3 @@ int main (int argc, char ** argv)
 
   Grid_finalize();
 }
-#endif

From 26f14d7dd760d35de3a327f4102cf54685e50d5c Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 11 Jan 2018 13:36:30 +0100
Subject: [PATCH 055/130] Adapt output format of non-herm solvers to the one of
 VPGCR

---
 ...cationAvoidingGeneralisedMinimalResidual.h | 19 ++++++++---------
 ...cationAvoidingGeneralisedMinimalResidual.h | 21 +++++++++----------
 .../FlexibleGeneralisedMinimalResidual.h      | 21 +++++++++----------
 .../iterative/GeneralisedMinimalResidual.h    | 19 ++++++++---------
 lib/algorithms/iterative/MinimalResidual.h    | 15 +++++++------
 5 files changed, 45 insertions(+), 50 deletions(-)

diff --git a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
index 84c00a99..d6a1ca12 100644
--- a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
@@ -113,17 +113,16 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
         RealD resnorm       = sqrt(norm2(r));
         RealD true_residual = resnorm / srcnorm;
 
-        std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount << std::endl;
-        std::cout << GridLogMessage << "\tComputed residual "                                                     << sqrt(cp / ssq) << std::endl;
-        std::cout << GridLogMessage << "\tTrue residual "                                                         << true_residual  << std::endl;
-        std::cout << GridLogMessage << "\tTarget "                                                                << Tolerance      << std::endl;
+        std::cout << GridLogMessage        << "CommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
+                  << " computed residual " << sqrt(cp / ssq)
+                  << " true residual "     << true_residual
+                  << " target "            << Tolerance << std::endl;
 
-        std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual Time breakdown" << std::endl;
-        std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()                            << std::endl;
-        std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()                            << std::endl;
-        std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()                            << std::endl;
-        std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()                                << std::endl;
-        std::cout << GridLogMessage << "\tCompSol " << CompSolutionTimer.Elapsed()                      << std::endl;
+        std::cout << GridLogMessage << "CAGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "CAGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "CAGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "CAGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "CAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
         return;
       }
     }
diff --git a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
index b9a4b475..8c36597b 100644
--- a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
@@ -119,18 +119,17 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
         RealD resnorm       = sqrt(norm2(r));
         RealD true_residual = resnorm / srcnorm;
 
-        std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount << std::endl;
-        std::cout << GridLogMessage << "\tComputed residual "                                                             << sqrt(cp / ssq) << std::endl;
-        std::cout << GridLogMessage << "\tTrue residual "                                                                 << true_residual  << std::endl;
-        std::cout << GridLogMessage << "\tTarget "                                                                        << Tolerance      << std::endl;
+        std::cout << GridLogMessage        << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
+                  << " computed residual " << sqrt(cp / ssq)
+                  << " true residual "     << true_residual
+                  << " target "            << Tolerance << std::endl;
 
-        std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual Time breakdown" << std::endl;
-        std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()                                    << std::endl;
-        std::cout << GridLogMessage << "\tPrecon "  << PrecTimer.Elapsed()                                      << std::endl;
-        std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()                                    << std::endl;
-        std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()                                    << std::endl;
-        std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()                                        << std::endl;
-        std::cout << GridLogMessage << "\tCompSol " << CompSolutionTimer.Elapsed()                              << std::endl;
+        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Precon  " <<         PrecTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FCAGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FCAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
         return;
       }
     }
diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
index c574d86f..84956aed 100644
--- a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -119,18 +119,17 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
         RealD resnorm       = sqrt(norm2(r));
         RealD true_residual = resnorm / srcnorm;
 
-        std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual: Converged on iteration " << IterationCount << std::endl;
-        std::cout << GridLogMessage << "\tComputed residual "                                        << sqrt(cp / ssq) << std::endl;
-        std::cout << GridLogMessage << "\tTrue residual "                                            << true_residual  << std::endl;
-        std::cout << GridLogMessage << "\tTarget "                                                   << Tolerance      << std::endl;
+        std::cout << GridLogMessage        << "FlexibleGeneralisedMinimalResidual: Converged on iteration " << IterationCount
+                  << " computed residual " << sqrt(cp / ssq)
+                  << " true residual "     << true_residual
+                  << " target "            << Tolerance << std::endl;
 
-        std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual Time breakdown" << std::endl;
-        std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()               << std::endl;
-        std::cout << GridLogMessage << "\tPrecon "  << PrecTimer.Elapsed()                 << std::endl;
-        std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()               << std::endl;
-        std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()               << std::endl;
-        std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()                   << std::endl;
-        std::cout << GridLogMessage << "\tCompSol " << CompSolutionTimer.Elapsed()         << std::endl;
+        std::cout << GridLogMessage << "FGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FGMRES Time elapsed: Precon  " <<         PrecTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
         return;
       }
     }
diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index a6e7aadc..0e6d4be4 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -113,17 +113,16 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
         RealD resnorm       = sqrt(norm2(r));
         RealD true_residual = resnorm / srcnorm;
 
-        std::cout << GridLogMessage << "GeneralisedMinimalResidual: Converged on iteration " << IterationCount << std::endl;
-        std::cout << GridLogMessage << "\tComputed residual "                                << sqrt(cp / ssq) << std::endl;
-        std::cout << GridLogMessage << "\tTrue residual "                                    << true_residual  << std::endl;
-        std::cout << GridLogMessage << "\tTarget "                                           << Tolerance      << std::endl;
+        std::cout << GridLogMessage        << "GeneralisedMinimalResidual: Converged on iteration " << IterationCount
+                  << " computed residual " << sqrt(cp / ssq)
+                  << " true residual "     << true_residual
+                  << " target "            << Tolerance << std::endl;
 
-        std::cout << GridLogMessage << "GeneralisedMinimalResidual Time breakdown" << std::endl;
-        std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed()       << std::endl;
-        std::cout << GridLogMessage << "\tMatrix "  << MatrixTimer.Elapsed()       << std::endl;
-        std::cout << GridLogMessage << "\tLinalg "  << LinalgTimer.Elapsed()       << std::endl;
-        std::cout << GridLogMessage << "\tQR "      << QrTimer.Elapsed()           << std::endl;
-        std::cout << GridLogMessage << "\tCompSol " << CompSolutionTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "GMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "GMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "GMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "GMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "GMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
         return;
       }
     }
diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index b2651285..ee2f208b 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -129,15 +129,14 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
         RealD resnorm       = sqrt(norm2(r));
         RealD true_residual = resnorm / srcnorm;
 
-        std::cout << GridLogMessage << "MinimalResidual Converged on iteration " << k << std::endl;
-        std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq) << std::endl;
-        std::cout << GridLogMessage << "\tTrue residual " << true_residual << std::endl;
-        std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
+        std::cout << GridLogMessage        << "MinimalResidual Converged on iteration " << k
+                  << " computed residual " << sqrt(cp / ssq)
+                  << " true residual "     << true_residual
+                  << " target "            << Tolerance << std::endl;
 
-        std::cout << GridLogMessage << "Time breakdown " << std::endl;
-        std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MR Time elapsed: Total   " << SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MR Time elapsed: Matrix  " << MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MR Time elapsed: Linalg  " << LinalgTimer.Elapsed() << std::endl;
 
         if (ErrorOnNoConverge)
           assert(true_residual / Tolerance < 10000.0);

From 10f7a17ae48c16276976024928c2facb39341c4a Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 11 Jan 2018 13:42:18 +0100
Subject: [PATCH 056/130] Make timing in VPGCR more detailed

---
 .../PrecGeneralisedConjugateResidual.h        | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h b/lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
index fd11352e..c723c4a9 100644
--- a/lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
+++ b/lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
@@ -139,8 +139,11 @@ namespace Grid {
       MatTimer.Start();
       Linop.HermOpAndNorm(psi,Az,zAz,zAAz); 
       MatTimer.Stop();
+
+      LinalgTimer.Start();
       r=src-Az;
-      
+      LinalgTimer.Stop();
+
       /////////////////////
       // p = Prec(r)
       /////////////////////
@@ -152,8 +155,10 @@ namespace Grid {
       Linop.HermOp(z,tmp); 
       MatTimer.Stop();
 
+      LinalgTimer.Start();
       ttmp=tmp;
       tmp=tmp-r;
+      LinalgTimer.Stop();
 
       /*
       std::cout<<GridLogMessage<<r<<std::endl;
@@ -166,12 +171,14 @@ namespace Grid {
       Linop.HermOpAndNorm(z,Az,zAz,zAAz); 
       MatTimer.Stop();
 
+      LinalgTimer.Start();
       //p[0],q[0],qq[0] 
       p[0]= z;
       q[0]= Az;
       qq[0]= zAAz;
 
       cp =norm2(r);
+      LinalgTimer.Stop();
 
       for(int k=0;k<nstep;k++){
 
@@ -181,12 +188,14 @@ namespace Grid {
 	int peri_k = k %mmax;
 	int peri_kp= kp%mmax;
 
+        LinalgTimer.Start();
 	rq= real(innerProduct(r,q[peri_k])); // what if rAr not real?
 	a = rq/qq[peri_k];
 
 	axpy(psi,a,p[peri_k],psi);         
 
-	cp = axpy_norm(r,-a,q[peri_k],r);  
+	cp = axpy_norm(r,-a,q[peri_k],r);
+        LinalgTimer.Stop();
 
 	if((k==nstep-1)||(cp<rsq)){
 	  return cp;
@@ -202,6 +211,8 @@ namespace Grid {
 	Linop.HermOpAndNorm(z,Az,zAz,zAAz);
 	Linop.HermOp(z,tmp);
 	MatTimer.Stop();
+
+        LinalgTimer.Start();
         tmp=tmp-r;
 	std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl; 
 
@@ -219,9 +230,9 @@ namespace Grid {
 
 	}
 	qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
-
-
+        LinalgTimer.Stop();
       }
+
       assert(0); // never reached
       return cp;
     }

From fa4eeb28c4ef81a2779cc33ec0eede388fa1d7ad Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 17 Jan 2018 17:56:34 +0100
Subject: [PATCH 057/130] Save current state in Wilson MG test file

---
 tests/solver/Test_wilson_ddalphaamg.cc | 348 +++++++++++++++++++------
 1 file changed, 272 insertions(+), 76 deletions(-)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index 0f643e47..49a43c4e 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -26,27 +26,29 @@ Author: Daniel Richtmann <daniel.richtmann@ur.de>
     *************************************************************************************/
     /*  END LEGAL */
 #include <Grid/Grid.h>
-// #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
+#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 //#include <algorithms/iterative/PrecConjugateResidual.h>
 
 using namespace std;
 using namespace Grid;
 using namespace Grid::QCD;
 
-template<class Field>
+template<class Field, int nbasis>
 class TestVectorAnalyzer {
 public:
-  void operator()(LinearOperatorBase<Field> &Linop, std::vector<Field> const & vectors)
+  void operator()(LinearOperatorBase<Field> &Linop, std::vector<Field> const & vectors, int nn=nbasis)
   {
     // this function corresponds to testvector_analysis_PRECISION from the
     // DD-αAMG codebase
 
+    auto positiveOnes = 0;
+
     std::vector<Field> tmp(4, vectors[0]._grid); // bit hacky?
     Gamma g5(Gamma::Algebra::Gamma5);
 
     std::cout << GridLogMessage << "Test vector analysis:" << std::endl;
 
-    for (auto i = 0; i < vectors.size(); ++i) {
+    for (auto i = 0; i < nn; ++i) {
 
       Linop.Op(vectors[i], tmp[3]);
 
@@ -58,10 +60,16 @@ public:
 
       auto mu = ::sqrt(norm2(tmp[1]) / norm2(vectors[i]));
 
-      std::cout << GridLogMessage << std::setprecision(2) << "vector " << i << ": "
+      auto nrm = ::sqrt(norm2(vectors[i]));
+
+      if(real(lambda) > 0)
+        positiveOnes++;
+
+      std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << "vector " << i << ": "
                 << "singular value: " << lambda
-                << " singular vector precision: " << mu << std::endl;
+                << ", singular vector precision: " << mu << ", norm: " << nrm << std::endl;
     }
+    std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << positiveOnes << " out of " << nn << " vectors were positive" << std::endl;
   }
 };
 
@@ -71,7 +79,8 @@ public:
   GRID_SERIALIZABLE_CLASS_MEMBERS(myclass,
 			  int, domaindecompose,
 			  int, domainsize,
-			  int, order,
+                          int, coarsegrids,
+                          int, order,
 			  int, Ls,
 			  double, mq,
 			  double, lo,
@@ -87,6 +96,48 @@ RealD InverseApproximation(RealD x){
   return 1.0/x;
 }
 
+template <int nbasis>
+struct CoarseGrids
+{
+public:
+  // typedef Aggregation<vSpinColourVector,vTComplex,nbasis>     Subspace;
+  // typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>
+  // CoarseOperator; typedef typename CoarseOperator::CoarseVector
+  // CoarseVector;
+
+  std::vector<std::vector<int>> LattSizes;
+  std::vector<std::vector<int>> Seeds;
+  std::vector<GridCartesian *>  Grids;
+  std::vector<GridParallelRNG>  PRNGs;
+
+  CoarseGrids(std::vector<std::vector<int>> const &blockSizes,int coarsegrids = 1)
+  {
+    assert( blockSizes.size() == coarsegrids );
+
+    std::cout << GridLogMessage << "Constructing " << coarsegrids << " CoarseGrids" << std::endl;
+
+    for(int cl=0; cl<coarsegrids; ++cl) { // may be a bit ugly and slow but not perf critical
+      LattSizes.push_back({GridDefaultLatt()});
+      Seeds.push_back(std::vector<int>(LattSizes[cl].size()));
+
+      for(int d=0; d<LattSizes[cl].size(); ++d) {
+        LattSizes[cl][d] = LattSizes[cl][d] / blockSizes[cl][d];
+        Seeds[cl][d] = (cl + 1) * LattSizes[cl].size() + d + 1; // unimportant, just to get. e.g., {5, // 6, 7, 8} for first coarse level and // so on
+      }
+
+      Grids.push_back(SpaceTimeGrid::makeFourDimGrid(LattSizes[cl], GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()));
+      PRNGs.push_back(GridParallelRNG(Grids[cl]));
+
+      PRNGs[cl].SeedFixedIntegers(Seeds[cl]);
+
+      std::cout << GridLogMessage << "cl = " << cl << ": LattSize = " << LattSizes[cl] << std::endl;
+      std::cout << GridLogMessage << "cl = " << cl << ":    Seeds = " << Seeds[cl] << std::endl;
+    }
+  }
+};
+
+// template < class Fobj, class CComplex, int coarseSpins, int nbasis, class Matrix >
+// class MultiGridPreconditioner : public LinearFunction< Lattice< Fobj > > {
 template<class Fobj,class CComplex,int nbasis, class Matrix>
 class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
 public:
@@ -498,51 +549,62 @@ public:
 
 };
 
+struct MGParams
+{
+    std::vector< std::vector< int > > blockSizes;
+    const int                         nbasis;
+
+    MGParams()
+        : blockSizes( { { 1, 1, 1, 2 } } )
+        // : blockSizes({ {1,1,1,2}, {1,1,1,2} })
+        // : blockSizes({ {1,1,1,2}, {1,1,1,2}, {1,1,1,2} })
+        , nbasis( 20 )
+    {
+    }
+};
+
 int main (int argc, char ** argv)
 {
   Grid_init(&argc,&argv);
 
-  params.domaindecompose = 1;
   params.domainsize= 1;
-  params.order = 1;
+  params.coarsegrids= 1;
+  params.domaindecompose = 0;
+  params.order = 30;
   params.Ls = 1;
-  params.mq = 1;
-  params.lo = 1;
-  params.hi = 1;
+  // params.mq = .13;
+  params.mq = .5;
+  params.lo = 0.5;
+  params.hi = 70.0;
   params.steps = 1;
 
-  const int Ls=params.Ls;
-  const int ds=params.domainsize;
+  auto mgp = MGParams{};
 
-  GridCartesian         * FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Params: " << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  std::cout << params << std::endl;
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Set up some fine level stuff: " << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),GridDefaultSimd(Nd, vComplex::Nsimd()),GridDefaultMpi());
   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
 
-  ///////////////////////////////////////////////////
-  // Construct a coarsened grid; utility for this?
-  ///////////////////////////////////////////////////
-  std::vector<int> blockSize({2,2,2,2});
-  const int nbasis= 16;
-
-  std::vector<int> cLattSize = GridDefaultLatt();
-  for(int d=0;d<cLattSize.size();d++){
-    cLattSize[d] = cLattSize[d]/blockSize[d];
-  }
-  GridCartesian *CGrid =  SpaceTimeGrid::makeFourDimGrid(cLattSize, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
-
-  std::vector<int> seedsFine({1,2,3,4});
-  std::vector<int> seedsCoarse({5,6,7,8});
-
-  GridParallelRNG pRNGFine(FGrid);   pRNGFine.SeedFixedIntegers(seedsFine);
-  GridParallelRNG pRNGCoarse(CGrid); pRNGCoarse.SeedFixedIntegers(seedsCoarse);
+  std::vector<int> fSeeds( {1, 2, 3, 4} );
+  GridParallelRNG    fPRNG( FGrid );
+  fPRNG.SeedFixedIntegers( fSeeds );
 
   Gamma g5(Gamma::Algebra::Gamma5);
 
-  LatticeFermion    src(FGrid); gaussian(pRNGFine,src);// src=src+g5*src;
-  LatticeFermion result(FGrid); result=zero;
-  LatticeFermion    ref(FGrid); ref=zero;
+  LatticeFermion    src(FGrid); gaussian(fPRNG, src); // src=src+g5*src;
+  LatticeFermion result(FGrid); result = zero;
+  LatticeFermion    ref(FGrid); ref = zero;
   LatticeFermion    tmp(FGrid);
   LatticeFermion    err(FGrid);
-  LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(pRNGFine,Umu);
+  LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
   LatticeGaugeField UmuDD(FGrid);
   LatticeColourMatrix U(FGrid);
   LatticeColourMatrix zz(FGrid);
@@ -562,25 +624,97 @@ int main (int argc, char ** argv)
 
   RealD mass=params.mq;
 
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Params: "<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Set up some coarser levels stuff: " << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  std::cout << params << std::endl;
+  std::vector< std::vector< int > > blockSizes({ { 1, 1, 1, 2 } } ); // corresponds to two level algorithm
+  // std::vector< std::vector<int> > blockSizes({ {1,1,1,2},       // // corresponds to three level algorithm
+  //                                              {1,1,1,2} });
+
+  const int nbasis = 20; // we fix the number of test vector to the same
+                          // number on every level for now
+
+  // // some stuff we need for every coarser lattice
+  // std::vector<std::vector<int>> cLattSizes({GridDefaultLatt()});;
+  // std::vector<GridCartesian *> cGrids(params.coarsegrids);
+  // std::vector<std::vector<int>> cSeeds({ {5,6,7,8} });
+  // std::vector<GridParallelRNG> cPRNGs;(params.coarsegrids);
+
+  // assert(cLattSizes.size() == params.coarsegrids);
+  // assert(    cGrids.size() == params.coarsegrids);
+  // assert(    cSeeds.size() == params.coarsegrids);
+  // assert(    cPRNGs.size() == params.coarsegrids);
+
+  // for(int cl=0;cl<cLattSizes.size();cl++){
+  //   for(int d=0;d<cLattSizes[cl].size();d++){
+  //     // std::cout << cl << " " << d << " " << cLattSizes[cl][d] << " " <<
+  //     blockSizes[cl][d] << std::endl; cLattSizes[cl][d] =
+  //     cLattSizes[cl][d]/blockSizes[cl][d];
+  //   }
+  //   cGrids[cl] = SpaceTimeGrid::makeFourDimGrid(cLattSizes[cl],
+  //   GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
+  //   // std::cout << cLattSizes[cl] << std::endl;
+  // }
+
+    // GridParallelRNG cPRNG(CGrid); cPRNG.SeedFixedIntegers(cSeeds);
+
+    CoarseGrids< nbasis > cGrids( blockSizes );
+
+    // assert(0);
 
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Building the wilson operator" <<std::endl;
+  std::cout<<GridLogMessage << "Building the wilson operator on the fine grid" <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
   
   WilsonFermionR Dw(Umu,*FGrid,*FrbGrid,mass);
   WilsonFermionR DwDD(UmuDD,*FGrid,*FrbGrid,mass);
 
+  std::cout<<GridLogMessage<< "**************************************************"<< std::endl;
+  std::cout<<GridLogMessage<< "Some typedefs" <<std::endl;
+  std::cout<<GridLogMessage<< "**************************************************"<< std::endl;
+
   typedef Aggregation<vSpinColourVector,vTComplex,nbasis>              Subspace;
   typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>          CoarseOperator;
   typedef CoarseOperator::CoarseVector                                 CoarseVector;
+  typedef TestVectorAnalyzer<LatticeFermion,nbasis> TVA;
+
+  // typedef Aggregation<vSpinColourVector,vTComplex,1,nbasis> Subspace;
+  // typedef CoarsenedMatrix<vSpinColourVector,vTComplex,1,nbasis> CoarseOperator;
+  // typedef CoarseOperator::CoarseVector                 CoarseVector;
+
+  // typedef CoarseOperator::CoarseG5PVector
+  // CoarseG5PVector; // P = preserving typedef
+  // CoarseOperator::CoarseG5PMatrix CoarseG5PMatrix;
+
+#if 1
+  std::cout << std::endl;
+  std::cout << "type_name<decltype(vTComplex{})>()                      = " << type_name<decltype(vTComplex{})>()                      << std::endl;
+  std::cout << "type_name<GridTypeMapper<vTComplex>::scalar_type>()     = " << type_name<GridTypeMapper<vTComplex>::scalar_type>()     << std::endl;
+  std::cout << "type_name<GridTypeMapper<vTComplex>::vector_type>()     = " << type_name<GridTypeMapper<vTComplex>::vector_type>()     << std::endl;
+  std::cout << "type_name<GridTypeMapper<vTComplex>::vector_typeD>()    = " << type_name<GridTypeMapper<vTComplex>::vector_typeD>()    << std::endl;
+  std::cout << "type_name<GridTypeMapper<vTComplex>::tensor_reduced>()  = " << type_name<GridTypeMapper<vTComplex>::tensor_reduced>()  << std::endl;
+  std::cout << "type_name<GridTypeMapper<vTComplex>::scalar_object>()   = " << type_name<GridTypeMapper<vTComplex>::scalar_object>()   << std::endl;
+  std::cout << "type_name<GridTypeMapper<vTComplex>::Complexified>()    = " << type_name<GridTypeMapper<vTComplex>::Complexified>()    << std::endl;
+  std::cout << "type_name<GridTypeMapper<vTComplex>::Realified>()       = " << type_name<GridTypeMapper<vTComplex>::Realified>()       << std::endl;
+  std::cout << "type_name<GridTypeMapper<vTComplex>::DoublePrecision>() = " << type_name<GridTypeMapper<vTComplex>::DoublePrecision>() << std::endl;
+  std::cout << std::endl;
+
+  std::cout << std::endl;
+  std::cout << "type_name<decltype(TComplex{})>()                      = " << type_name<decltype(TComplex{})>()                      << std::endl;
+  std::cout << "type_name<GridTypeMapper<TComplex>::scalar_type>()     = " << type_name<GridTypeMapper<TComplex>::scalar_type>()     << std::endl;
+  std::cout << "type_name<GridTypeMapper<TComplex>::vector_type>()     = " << type_name<GridTypeMapper<TComplex>::vector_type>()     << std::endl;
+  std::cout << "type_name<GridTypeMapper<TComplex>::vector_typeD>()    = " << type_name<GridTypeMapper<TComplex>::vector_typeD>()    << std::endl;
+  std::cout << "type_name<GridTypeMapper<TComplex>::tensor_reduced>()  = " << type_name<GridTypeMapper<TComplex>::tensor_reduced>()  << std::endl;
+  std::cout << "type_name<GridTypeMapper<TComplex>::scalar_object>()   = " << type_name<GridTypeMapper<TComplex>::scalar_object>()   << std::endl;
+  std::cout << "type_name<GridTypeMapper<TComplex>::Complexified>()    = " << type_name<GridTypeMapper<TComplex>::Complexified>()    << std::endl;
+  std::cout << "type_name<GridTypeMapper<TComplex>::Realified>()       = " << type_name<GridTypeMapper<TComplex>::Realified>()       << std::endl;
+  std::cout << "type_name<GridTypeMapper<TComplex>::DoublePrecision>() = " << type_name<GridTypeMapper<TComplex>::DoublePrecision>() << std::endl;
+  std::cout << std::endl;
+#endif
 
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
+  std::cout<<GridLogMessage << "Calling Aggregation class to build subspaces" <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 
   // • TODO: need some way to run the smoother on the "test vectors" for a few
@@ -589,16 +723,16 @@ int main (int argc, char ** argv)
   // • In WMG, the vectors are normalized but not orthogonalized, but here they
   //   are constructed randomly and then orthogonalized (rather orthonormalized) against each other
   MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
-  Subspace Aggregates(CGrid,FGrid,0);
+  Subspace Aggregates(cGrids.Grids[0],FGrid,0);
   assert ((nbasis & 0x1)==0);
   int nb=nbasis/2;
   std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
 
-  Aggregates.CreateSubspaceRandom(pRNGFine); // creates subspace randomly and orthogonalizes it
-  auto testVectorAnalyzer = TestVectorAnalyzer<LatticeFermion>{};
+  Aggregates.CreateSubspace(fPRNG, HermOp /*, nb */); // Don't specify nb to see the orthogonalization check
 
-  // tva(HermOp, Aggregates.subspace);
-  testVectorAnalyzer(HermOp, Aggregates.subspace);
+  TVA testVectorAnalyzer;
+
+  testVectorAnalyzer(HermOp, Aggregates.subspace, nb);
 
   for(int n=0;n<nb;n++){
     Aggregates.subspace[n+nb] = g5 * Aggregates.subspace[n]; // multiply with g5 normally instead of G5R5 since this specific to DWF
@@ -609,6 +743,7 @@ int main (int argc, char ** argv)
   }
 
   // tva(HermOp, Aggregates.subspace);
+  Aggregates.CheckOrthogonal();
   testVectorAnalyzer(HermOp, Aggregates.subspace);
 
   result=zero;
@@ -617,50 +752,111 @@ int main (int argc, char ** argv)
   std::cout<<GridLogMessage << "Building coarse representation of Dirac operator" <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 
-  Gamma5HermitianLinearOperator<WilsonFermionR,LatticeFermion> Blah(Dw);
-  Gamma5HermitianLinearOperator<WilsonFermionR,LatticeFermion> BlahDD(DwDD);
-  CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LDOp(*CGrid);
-  LDOp.CoarsenOperator(FGrid,Blah,Aggregates); // problem with this line since it enforces hermiticity
+  Gamma5HermitianLinearOperator<WilsonFermionR,LatticeFermion> HermIndefOp(Dw); // this corresponds to working with H = g5 * D
+  Gamma5HermitianLinearOperator<WilsonFermionR,LatticeFermion> HermIndefOpDD(DwDD);
+  CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOp(*cGrids.Grids[0]);
+  CoarseOp.CoarsenOperator(FGrid, HermIndefOp, Aggregates); // uses only linop.OpDiag & linop.OpDir
 
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Testing some coarse space solvers  " <<std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building coarse vectors" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  CoarseVector c_src (CGrid);
-  CoarseVector c_res (CGrid);
-  gaussian(pRNGCoarse,c_src);
+  CoarseVector c_src (cGrids.Grids[0]);
+  CoarseVector c_res (cGrids.Grids[0]);
+  gaussian(cGrids.PRNGs[0],c_src);
   c_res=zero;
 
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Solving posdef-CG on coarse space "<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout << "type_name<decltype(c_src)>() = " << type_name< decltype( c_src ) >() << std::endl;
 
-  // MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
-  // ConjugateGradient<CoarseVector> CG(1.0e-6,100000);
-  // //  CG(PosdefLdop,c_src,c_res);
+  // c_res = g5 * c_src;
 
-  // //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  // //  std::cout<<GridLogMessage << "Solving indef-MCR on coarse space "<< std::endl;
-  // //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  // //  HermitianLinearOperator<CoarseOperator,CoarseVector> HermIndefLdop(LDOp);
-  // //  ConjugateResidual<CoarseVector> MCR(1.0e-6,100000);
-  // //MCR(HermIndefLdop,c_src,c_res);
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Solving posdef-MR on coarse space " << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(CoarseOp);
+  MinimalResidual<CoarseVector>   MR(5.0e-2, 100, false);
+  ConjugateGradient<CoarseVector> CG(5.0e-2, 100, false);
+
+  MR(PosdefLdop, c_src, c_res);
+
+  gaussian(cGrids.PRNGs[0], c_src);
+  c_res = zero;
+  CG(PosdefLdop, c_src, c_res);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Dummy testing for building second coarse level" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  // typedef Aggregation< CoarseVector, vTComplex, nbasis > SubspaceAgain;
+
+  // SubspaceAgain AggregatesCoarsenedAgain(cGrids.Grids[1], cGrids.Grids[0], 0);
+  // AggregatesCoarsenedAgain.CreateSubspace(cGrids.PRNGs[0], PosdefLdop);
+
+  // for(int n=0;n<nb;n++){
+  //   AggregatesCoarsenedAgain.subspace[n+nb] = g5 * AggregatesCoarsenedAgain.subspace[n]; // multiply with g5 normally instead of G5R5 since this specific to DWF
+  //   std::cout<<GridLogMessage<<n<<" subspace "<<norm2(AggregatesCoarsenedAgain.subspace[n+nb])<<" "<<norm2(AggregatesCoarsenedAgain.subspace[n]) <<std::endl;
+  // }
+
+  // for(int n=0;n<nbasis;n++){
+  //   std::cout<<GridLogMessage << "vec["<<n<<"] = "<<norm2(AggregatesCoarsenedAgain.subspace[n])  <<std::endl;
+  // }
+
+  // AggregatesCoarsenedAgain.CheckOrthogonal();
+
+  // std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  // std::cout<<GridLogMessage << "Solving indef-MCR on coarse space "<< std::endl;
+  // std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  // HermitianLinearOperator<CoarseOperator,CoarseVector> HermIndefLdop(CoarseOp);
+  // ConjugateResidual<CoarseVector> MCR(1.0e-6,100000);
+  // MCR(HermIndefLdop,c_src,c_res);
 
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
   std::cout<<GridLogMessage << "Building deflation preconditioner "<< std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 
-  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,WilsonFermionR> Precon  (Aggregates, LDOp,
-                                                                                           Blah,Dw,
-                                                                                           BlahDD,DwDD);
+  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,WilsonFermionR> Precon  (Aggregates, CoarseOp,
+                                                                                       HermIndefOp,Dw,
+                                                                                       HermIndefOp,Dw);
 
-  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,WilsonFermionR> PreconDD(Aggregates, LDOp,
-                                                                                           Blah,Dw,
-                                                                                           BlahDD,DwDD);
+  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,WilsonFermionR> PreconDD(Aggregates, CoarseOp,
+                                                                                       HermIndefOp,Dw,
+                                                                                       HermIndefOpDD,DwDD);
   // MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
   //                         FineOperator &Fine,Matrix &FineMatrix,
   //                         FineOperator &Smooth,Matrix &SmootherMatrix)
-  TrivialPrecon<LatticeFermion> simple;
+  TrivialPrecon<LatticeFermion> Simple;
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building two level VPGCR and FGMRES solvers" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  PrecGeneralisedConjugateResidual<LatticeFermion>    VPGCRMG(1.0e-12,100,Precon,8,8);
+  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMRESMG(1.0e-12,100,Precon,8);
+
+  std::cout << GridLogMessage << "checking norm src " << norm2(src) << std::endl;
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building unpreconditioned VPGCR and FGMRES solvers" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  PrecGeneralisedConjugateResidual<LatticeFermion>    VPGCRT(1.0e-12,4000000,Simple,8,8);
+  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMREST(1.0e-12,4000000,Simple,8);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing the four solvers" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  std::vector< OperatorFunction<LatticeFermion>*> solvers;
+  solvers.push_back(&VPGCRMG);
+  solvers.push_back(&FGMRESMG);
+  solvers.push_back(&VPGCRT);
+  solvers.push_back(&FGMREST);
+
+  for(auto elem : solvers) {
+    result = zero;
+    (*elem)(HermIndefOp,src,result);
+  }
 
   Grid_finalize();
 }

From 9732519c413c8a9a273346094aab99c4bdaa0654 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 18 Jan 2018 12:38:28 +0100
Subject: [PATCH 058/130] Apply clang-format to Wilson MG

I can provide the configuration file I used if people want that.
---
 tests/solver/Test_wilson_ddalphaamg.cc | 728 ++++++++++++-------------
 1 file changed, 360 insertions(+), 368 deletions(-)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index 49a43c4e..0f828959 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************
 
     Grid physics library, www.github.com/paboyle/Grid 
 
@@ -6,7 +6,7 @@
 
     Copyright (C) 2015
 
-Author: Daniel Richtmann <daniel.richtmann@ur.de>
+    Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -24,7 +24,8 @@ Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
     See the full license in the file "LICENSE" in the top level distribution directory
     *************************************************************************************/
-    /*  END LEGAL */
+/*  END LEGAL */
+
 #include <Grid/Grid.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 //#include <algorithms/iterative/PrecConjugateResidual.h>
@@ -33,22 +34,20 @@ using namespace std;
 using namespace Grid;
 using namespace Grid::QCD;
 
-template<class Field, int nbasis>
-class TestVectorAnalyzer {
+template<class Field, int nbasis> class TestVectorAnalyzer {
 public:
-  void operator()(LinearOperatorBase<Field> &Linop, std::vector<Field> const & vectors, int nn=nbasis)
-  {
+  void operator()(LinearOperatorBase<Field> &Linop, std::vector<Field> const &vectors, int nn = nbasis) {
     // this function corresponds to testvector_analysis_PRECISION from the
     // DD-αAMG codebase
 
     auto positiveOnes = 0;
 
     std::vector<Field> tmp(4, vectors[0]._grid); // bit hacky?
-    Gamma g5(Gamma::Algebra::Gamma5);
+    Gamma              g5(Gamma::Algebra::Gamma5);
 
     std::cout << GridLogMessage << "Test vector analysis:" << std::endl;
 
-    for (auto i = 0; i < nn; ++i) {
+    for(auto i = 0; i < nn; ++i) {
 
       Linop.Op(vectors[i], tmp[3]);
 
@@ -66,39 +65,36 @@ public:
         positiveOnes++;
 
       std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << "vector " << i << ": "
-                << "singular value: " << lambda
-                << ", singular vector precision: " << mu << ", norm: " << nrm << std::endl;
+                << "singular value: " << lambda << ", singular vector precision: " << mu << ", norm: " << nrm << std::endl;
     }
-    std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << positiveOnes << " out of " << nn << " vectors were positive" << std::endl;
+    std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << positiveOnes << " out of "
+              << nn << " vectors were positive" << std::endl;
   }
 };
 
-class myclass: Serializable {
+class myclass : Serializable {
 public:
-
+  // clang-format off
   GRID_SERIALIZABLE_CLASS_MEMBERS(myclass,
-			  int, domaindecompose,
-			  int, domainsize,
-                          int, coarsegrids,
-                          int, order,
-			  int, Ls,
-			  double, mq,
-			  double, lo,
-			  double, hi,
-			  int, steps);
-
+                                  int, domaindecompose,
+                                  int, domainsize,
+                                  int, coarsegrids,
+                                  int, order,
+                                  int, Ls,
+                                  double, mq,
+                                  double, lo,
+                                  double, hi,
+                                  int, steps);
+  // clang-format on
   myclass(){};
-
 };
 myclass params;
 
-RealD InverseApproximation(RealD x){
-  return 1.0/x;
+RealD InverseApproximation(RealD x) {
+  return 1.0 / x;
 }
 
-template <int nbasis>
-struct CoarseGrids
-{
+template<int nbasis> struct CoarseGrids {
 public:
   // typedef Aggregation<vSpinColourVector,vTComplex,nbasis>     Subspace;
   // typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>
@@ -110,19 +106,20 @@ public:
   std::vector<GridCartesian *>  Grids;
   std::vector<GridParallelRNG>  PRNGs;
 
-  CoarseGrids(std::vector<std::vector<int>> const &blockSizes,int coarsegrids = 1)
-  {
-    assert( blockSizes.size() == coarsegrids );
+  CoarseGrids(std::vector<std::vector<int>> const &blockSizes, int coarsegrids = 1) {
+
+    assert(blockSizes.size() == coarsegrids);
 
     std::cout << GridLogMessage << "Constructing " << coarsegrids << " CoarseGrids" << std::endl;
 
-    for(int cl=0; cl<coarsegrids; ++cl) { // may be a bit ugly and slow but not perf critical
+    for(int cl = 0; cl < coarsegrids; ++cl) { // may be a bit ugly and slow but not perf critical
       LattSizes.push_back({GridDefaultLatt()});
       Seeds.push_back(std::vector<int>(LattSizes[cl].size()));
 
-      for(int d=0; d<LattSizes[cl].size(); ++d) {
+      for(int d = 0; d < LattSizes[cl].size(); ++d) {
         LattSizes[cl][d] = LattSizes[cl][d] / blockSizes[cl][d];
-        Seeds[cl][d] = (cl + 1) * LattSizes[cl].size() + d + 1; // unimportant, just to get. e.g., {5, // 6, 7, 8} for first coarse level and // so on
+        Seeds[cl][d]     = (cl + 1) * LattSizes[cl].size() + d + 1;
+        // calculation unimportant, just to get. e.g., {5, 6, 7, 8} for first coarse level and so on
       }
 
       Grids.push_back(SpaceTimeGrid::makeFourDimGrid(LattSizes[cl], GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()));
@@ -138,150 +135,148 @@ public:
 
 // template < class Fobj, class CComplex, int coarseSpins, int nbasis, class Matrix >
 // class MultiGridPreconditioner : public LinearFunction< Lattice< Fobj > > {
-template<class Fobj,class CComplex,int nbasis, class Matrix>
-class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
+template<class Fobj, class CComplex, int nbasis, class Matrix> class MultiGridPreconditioner : public LinearFunction<Lattice<Fobj>> {
 public:
+  typedef Aggregation<Fobj, CComplex, nbasis>     Aggregates;
+  typedef CoarsenedMatrix<Fobj, CComplex, nbasis> CoarseOperator;
 
-  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
-  typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
+  typedef typename Aggregation<Fobj, CComplex, nbasis>::siteVector   siteVector;
+  typedef typename Aggregation<Fobj, CComplex, nbasis>::CoarseScalar CoarseScalar;
+  typedef typename Aggregation<Fobj, CComplex, nbasis>::CoarseVector CoarseVector;
+  typedef typename Aggregation<Fobj, CComplex, nbasis>::CoarseMatrix CoarseMatrix;
+  typedef typename Aggregation<Fobj, CComplex, nbasis>::FineField    FineField;
+  typedef LinearOperatorBase<FineField>                              FineOperator;
 
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::siteVector     siteVector;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseScalar CoarseScalar;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
-  typedef LinearOperatorBase<FineField>                            FineOperator;
-
-  Aggregates     & _Aggregates;
-  CoarseOperator & _CoarseOperator;
-  Matrix         & _FineMatrix;
-  FineOperator   & _FineOperator;
-  Matrix         & _SmootherMatrix;
-  FineOperator   & _SmootherOperator;
+  Aggregates &    _Aggregates;
+  CoarseOperator &_CoarseOperator;
+  Matrix &        _FineMatrix;
+  FineOperator &  _FineOperator;
+  Matrix &        _SmootherMatrix;
+  FineOperator &  _SmootherOperator;
 
   // Constructor
-  MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse, 
-			  FineOperator &Fine,Matrix &FineMatrix,
-			  FineOperator &Smooth,Matrix &SmootherMatrix) 
-    : _Aggregates(Agg),
-      _CoarseOperator(Coarse),
-      _FineOperator(Fine),
-      _FineMatrix(FineMatrix),
-      _SmootherOperator(Smooth),
-      _SmootherMatrix(SmootherMatrix)
-  {
-  }
+  MultiGridPreconditioner(Aggregates &    Agg,
+                          CoarseOperator &Coarse,
+                          FineOperator &  Fine,
+                          Matrix &        FineMatrix,
+                          FineOperator &  Smooth,
+                          Matrix &        SmootherMatrix)
+    : _Aggregates(Agg)
+    , _CoarseOperator(Coarse)
+    , _FineOperator(Fine)
+    , _FineMatrix(FineMatrix)
+    , _SmootherOperator(Smooth)
+    , _SmootherMatrix(SmootherMatrix) {}
 
   void PowerMethod(const FineField &in) {
 
     FineField p1(in._grid);
     FineField p2(in._grid);
 
-    MdagMLinearOperator<Matrix,FineField>   fMdagMOp(_FineMatrix);
+    MdagMLinearOperator<Matrix, FineField> fMdagMOp(_FineMatrix);
 
-    p1=in;
+    p1 = in;
     RealD absp2;
-    for(int i=0;i<20;i++){
-      RealD absp1=std::sqrt(norm2(p1));
-      fMdagMOp.HermOp(p1,p2);// this is the G5 herm bit      
-      //      _FineOperator.Op(p1,p2);// this is the G5 herm bit      
-      RealD absp2=std::sqrt(norm2(p2));
-      if(i%10==9)
-	std::cout<<GridLogMessage << "Power method on mdagm "<<i<<" " << absp2/absp1<<std::endl;
-      p1=p2*(1.0/std::sqrt(absp2));
+    for(int i = 0; i < 20; i++) {
+      RealD absp1 = std::sqrt(norm2(p1));
+      fMdagMOp.HermOp(p1, p2); // this is the G5 herm bit
+      // _FineOperator.Op(p1,p2); // this is the G5 herm bit
+      RealD absp2 = std::sqrt(norm2(p2));
+      if(i % 10 == 9)
+        std::cout << GridLogMessage << "Power method on mdagm " << i << " " << absp2 / absp1 << std::endl;
+      p1 = p2 * (1.0 / std::sqrt(absp2));
     }
   }
 
-  void operator()(const FineField &in, FineField & out) {
-    if ( params.domaindecompose ) {
-      operatorSAP(in,out);
-    } else { 
-      operatorCheby(in,out);
+  void operator()(const FineField &in, FineField &out) {
+    if(params.domaindecompose) {
+      operatorSAP(in, out);
+    } else {
+      operatorCheby(in, out);
     }
   }
 
     ////////////////////////////////////////////////////////////////////////
     // ADEF2: [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
-    // ADEF1: [MP+Q ] in =M [1 - A Q] in + Q in  
+    // ADEF1: [MP+Q ] in = M [1 - A Q] in + Q in
     ////////////////////////////////////////////////////////////////////////
 #if 1
-  void operatorADEF2(const FineField &in, FineField & out) {
+  void operatorADEF2(const FineField &in, FineField &out) {
 
     CoarseVector Csrc(_CoarseOperator.Grid());
     CoarseVector Ctmp(_CoarseOperator.Grid());
     CoarseVector Csol(_CoarseOperator.Grid());
 
-    ConjugateGradient<CoarseVector>  CG(1.0e-10,100000);
-    ConjugateGradient<FineField>    fCG(3.0e-2,1000);
+    ConjugateGradient<CoarseVector> CG(1.0e-10, 100000);
+    ConjugateGradient<FineField>    fCG(3.0e-2, 1000);
 
-    HermitianLinearOperator<CoarseOperator,CoarseVector>  HermOp(_CoarseOperator);
-    MdagMLinearOperator<CoarseOperator,CoarseVector>     MdagMOp(_CoarseOperator);
-    MdagMLinearOperator<Matrix,FineField>               fMdagMOp(_FineMatrix);
+    HermitianLinearOperator<CoarseOperator, CoarseVector> HermOp(_CoarseOperator);
+    MdagMLinearOperator<CoarseOperator, CoarseVector>     MdagMOp(_CoarseOperator);
+    MdagMLinearOperator<Matrix, FineField>                fMdagMOp(_FineMatrix);
 
     FineField tmp(in._grid);
     FineField res(in._grid);
     FineField Min(in._grid);
 
     // Monitor completeness of low mode space
-    _Aggregates.ProjectToSubspace  (Csrc,in);
-    _Aggregates.PromoteFromSubspace(Csrc,out);
-    std::cout<<GridLogMessage<<"Coarse Grid Preconditioner\nCompleteness in: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
+    _Aggregates.ProjectToSubspace(Csrc, in);
+    _Aggregates.PromoteFromSubspace(Csrc, out);
+    std::cout << GridLogMessage << "Coarse Grid Preconditioner\nCompleteness in: " << std::sqrt(norm2(out) / norm2(in)) << std::endl;
 
     // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
-    _FineOperator.Op(in,tmp);// this is the G5 herm bit
-    fCG(fMdagMOp,tmp,Min);    // solves  MdagM = g5 M g5M
+    _FineOperator.Op(in, tmp); // this is the G5 herm bit
+    fCG(fMdagMOp, tmp, Min);   // solves MdagM = g5 M g5M
 
     // Monitor completeness of low mode space
-    _Aggregates.ProjectToSubspace  (Csrc,Min);
-    _Aggregates.PromoteFromSubspace(Csrc,out);
-    std::cout<<GridLogMessage<<"Completeness Min: "<<std::sqrt(norm2(out)/norm2(Min))<<std::endl;
+    _Aggregates.ProjectToSubspace(Csrc, Min);
+    _Aggregates.PromoteFromSubspace(Csrc, out);
+    std::cout << GridLogMessage << "Completeness Min: " << std::sqrt(norm2(out) / norm2(Min)) << std::endl;
 
-    _FineOperator.Op(Min,tmp);
-    tmp = in - tmp;   // in - A Min
+    _FineOperator.Op(Min, tmp);
+    tmp = in - tmp; // in - A Min
 
-    Csol=zero;
-    _Aggregates.ProjectToSubspace  (Csrc,tmp);
-    HermOp.AdjOp(Csrc,Ctmp);// Normal equations
-    CG(MdagMOp,Ctmp,Csol);
+    Csol = zero;
+    _Aggregates.ProjectToSubspace(Csrc, tmp);
+    HermOp.AdjOp(Csrc, Ctmp); // Normal equations
+    CG(MdagMOp, Ctmp, Csol);
 
-    HermOp.Op(Csol,Ctmp);
-    Ctmp=Ctmp-Csrc;
-    std::cout<<GridLogMessage<<"coarse space true residual "<<std::sqrt(norm2(Ctmp)/norm2(Csrc))<<std::endl;
-    _Aggregates.PromoteFromSubspace(Csol,out);
+    HermOp.Op(Csol, Ctmp);
+    Ctmp = Ctmp - Csrc;
+    std::cout << GridLogMessage << "coarse space true residual " << std::sqrt(norm2(Ctmp) / norm2(Csrc)) << std::endl;
+    _Aggregates.PromoteFromSubspace(Csol, out);
 
-    _FineOperator.Op(out,res);
-    res=res-tmp;
-    std::cout<<GridLogMessage<<"promoted sol residual "<<std::sqrt(norm2(res)/norm2(tmp))<<std::endl;
-    _Aggregates.ProjectToSubspace  (Csrc,res);
-    std::cout<<GridLogMessage<<"coarse space proj of residual "<<norm2(Csrc)<<std::endl;
+    _FineOperator.Op(out, res);
+    res = res - tmp;
+    std::cout << GridLogMessage << "promoted sol residual " << std::sqrt(norm2(res) / norm2(tmp)) << std::endl;
+    _Aggregates.ProjectToSubspace(Csrc, res);
+    std::cout << GridLogMessage << "coarse space proj of residual " << norm2(Csrc) << std::endl;
 
-    
-    out = out+Min; // additive coarse space correction
+    out = out + Min; // additive coarse space correction
     //    out = Min; // no additive coarse space correction
 
-    _FineOperator.Op(out,tmp);
-    tmp=tmp-in;         // tmp is new residual
-
-    std::cout<<GridLogMessage<< " Preconditioner in  " << norm2(in)<<std::endl; 
-    std::cout<<GridLogMessage<< " Preconditioner out " << norm2(out)<<std::endl; 
-    std::cout<<GridLogMessage<<"preconditioner thinks residual is "<<std::sqrt(norm2(tmp)/norm2(in))<<std::endl;
+    _FineOperator.Op(out, tmp);
+    tmp = tmp - in; // tmp is new residual
 
+    std::cout << GridLogMessage << " Preconditioner in  " << norm2(in) << std::endl;
+    std::cout << GridLogMessage << " Preconditioner out " << norm2(out) << std::endl;
+    std::cout << GridLogMessage << "preconditioner thinks residual is " << std::sqrt(norm2(tmp) / norm2(in)) << std::endl;
   }
 #endif
-  // ADEF1: [MP+Q ] in =M [1 - A Q] in + Q in  
+    // ADEF1: [MP+Q ] in = M [1 - A Q] in + Q in
 #if 1
-  void operatorADEF1(const FineField &in, FineField & out) {
+  void operatorADEF1(const FineField &in, FineField &out) {
 
     CoarseVector Csrc(_CoarseOperator.Grid());
     CoarseVector Ctmp(_CoarseOperator.Grid());
-    CoarseVector Csol(_CoarseOperator.Grid()); Csol=zero;
+    CoarseVector Csol(_CoarseOperator.Grid());
+    Csol = zero;
 
-    ConjugateGradient<CoarseVector>  CG(1.0e-10,100000);
-    ConjugateGradient<FineField>    fCG(3.0e-2,1000);
+    ConjugateGradient<CoarseVector> CG(1.0e-10, 100000);
+    ConjugateGradient<FineField>    fCG(3.0e-2, 1000);
 
-    HermitianLinearOperator<CoarseOperator,CoarseVector>  HermOp(_CoarseOperator);
-    MdagMLinearOperator<CoarseOperator,CoarseVector>     MdagMOp(_CoarseOperator);
-    ShiftedMdagMLinearOperator<Matrix,FineField>        fMdagMOp(_FineMatrix,0.1);
+    HermitianLinearOperator<CoarseOperator, CoarseVector> HermOp(_CoarseOperator);
+    MdagMLinearOperator<CoarseOperator, CoarseVector>     MdagMOp(_CoarseOperator);
+    ShiftedMdagMLinearOperator<Matrix, FineField>         fMdagMOp(_FineMatrix, 0.1);
 
     FineField tmp(in._grid);
     FineField res(in._grid);
@@ -291,147 +286,146 @@ public:
     //    _Aggregates.ProjectToSubspace  (Csrc,in);
     //    _Aggregates.PromoteFromSubspace(Csrc,out);
     //    std::cout<<GridLogMessage<<"Coarse Grid Preconditioner\nCompleteness in: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
-    
-    _Aggregates.ProjectToSubspace  (Csrc,in);
-    HermOp.AdjOp(Csrc,Ctmp);// Normal equations
-    CG(MdagMOp,Ctmp,Csol);
-    _Aggregates.PromoteFromSubspace(Csol,Qin);
+
+    _Aggregates.ProjectToSubspace(Csrc, in);
+    HermOp.AdjOp(Csrc, Ctmp); // Normal equations
+    CG(MdagMOp, Ctmp, Csol);
+    _Aggregates.PromoteFromSubspace(Csol, Qin);
 
     //    Qin=0;
-    _FineOperator.Op(Qin,tmp);// A Q in
-    tmp = in - tmp;            // in - A Q in
+    _FineOperator.Op(Qin, tmp); // A Q in
+    tmp = in - tmp;             // in - A Q in
 
-    _FineOperator.Op(tmp,res);// this is the G5 herm bit
-    fCG(fMdagMOp,res,out);    // solves  MdagM = g5 M g5M
+    _FineOperator.Op(tmp, res); // this is the G5 herm bit
+    fCG(fMdagMOp, res, out);    // solves  MdagM = g5 M g5M
 
     out = out + Qin;
 
-    _FineOperator.Op(out,tmp);
-    tmp=tmp-in;         // tmp is new residual
-
-    std::cout<<GridLogMessage<<"preconditioner thinks residual is "<<std::sqrt(norm2(tmp)/norm2(in))<<std::endl;
+    _FineOperator.Op(out, tmp);
+    tmp = tmp - in; // tmp is new residual
 
+    std::cout << GridLogMessage << "preconditioner thinks residual is " << std::sqrt(norm2(tmp) / norm2(in)) << std::endl;
   }
 #endif
 
-  void SAP (const FineField & src,FineField & psi){
+  void SAP(const FineField &src, FineField &psi) {
+
+    Lattice<iScalar<vInteger>> coor(src._grid);
+    Lattice<iScalar<vInteger>> subset(src._grid);
 
-    Lattice<iScalar<vInteger> > coor(src._grid);
-    Lattice<iScalar<vInteger> > subset(src._grid);
-    
     FineField r(src._grid);
-    FineField zz(src._grid); zz=zero;
+    FineField zz(src._grid);
+    zz = zero;
     FineField vec1(src._grid);
     FineField vec2(src._grid);
 
-    const Integer block=params.domainsize;
+    const Integer block = params.domainsize;
 
-    subset=zero;
-    for(int mu=0;mu<Nd;mu++){
-      LatticeCoordinate(coor,mu+1);
-      coor = div(coor,block);
-      subset = subset+coor;
+    subset = zero;
+    for(int mu = 0; mu < Nd; mu++) {
+      LatticeCoordinate(coor, mu + 1);
+      coor   = div(coor, block);
+      subset = subset + coor;
     }
-    subset = mod(subset,(Integer)2);
-    
-    ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_SmootherMatrix,0.0);
-    Chebyshev<FineField> Cheby  (params.lo,params.hi,params.order,InverseApproximation);
+    subset = mod(subset, (Integer)2);
+
+    ShiftedMdagMLinearOperator<Matrix, FineField> fMdagMOp(_SmootherMatrix, 0.0);
+    Chebyshev<FineField>                          Cheby(params.lo, params.hi, params.order, InverseApproximation);
 
     RealD resid;
-    for(int i=0;i<params.steps;i++){
-      
+    for(int i = 0; i < params.steps; i++) {
+
       // Even domain residual
-      _FineOperator.Op(psi,vec1);// this is the G5 herm bit
-      r= src - vec1 ;
-      resid = norm2(r) /norm2(src); 
-      std::cout << "SAP "<<i<<" resid "<<resid<<std::endl;
+      _FineOperator.Op(psi, vec1); // this is the G5 herm bit
+      r     = src - vec1;
+      resid = norm2(r) / norm2(src);
+      std::cout << "SAP " << i << " resid " << resid << std::endl;
 
       // Even domain solve
-      r= where(subset==(Integer)0,r,zz);
-      _SmootherOperator.AdjOp(r,vec1);
-      Cheby(fMdagMOp,vec1,vec2);    // solves  MdagM = g5 M g5M
-      psi = psi + vec2;  
+      r = where(subset == (Integer)0, r, zz);
+      _SmootherOperator.AdjOp(r, vec1);
+      Cheby(fMdagMOp, vec1, vec2); // solves  MdagM = g5 M g5M
+      psi = psi + vec2;
 
       // Odd domain residual
-      _FineOperator.Op(psi,vec1);// this is the G5 herm bit
-      r= src - vec1 ;
-      r= where(subset==(Integer)1,r,zz);
+      _FineOperator.Op(psi, vec1); // this is the G5 herm bit
+      r = src - vec1;
+      r = where(subset == (Integer)1, r, zz);
+
+      resid = norm2(r) / norm2(src);
+      std::cout << "SAP " << i << " resid " << resid << std::endl;
 
-      resid = norm2(r) /norm2(src); 
-      std::cout << "SAP "<<i<<" resid "<<resid<<std::endl;
-      
       // Odd domain solve
-      _SmootherOperator.AdjOp(r,vec1);
-      Cheby(fMdagMOp,vec1,vec2);    // solves  MdagM = g5 M g5M
-      psi = psi + vec2;  
-
-      _FineOperator.Op(psi,vec1);// this is the G5 herm bit
-      r= src - vec1 ;
-      resid = norm2(r) /norm2(src); 
-      std::cout << "SAP "<<i<<" resid "<<resid<<std::endl;
+      _SmootherOperator.AdjOp(r, vec1);
+      Cheby(fMdagMOp, vec1, vec2); // solves  MdagM = g5 M g5M
+      psi = psi + vec2;
 
+      _FineOperator.Op(psi, vec1); // this is the G5 herm bit
+      r     = src - vec1;
+      resid = norm2(r) / norm2(src);
+      std::cout << "SAP " << i << " resid " << resid << std::endl;
     }
-
   };
 
-  void SmootherTest (const FineField & in){
-    
+  void SmootherTest(const FineField &in) {
+
     FineField vec1(in._grid);
     FineField vec2(in._grid);
-    RealD lo[3] = { 0.5, 1.0, 2.0};
+
+    RealD lo[3] = {0.5, 1.0, 2.0};
 
     //    MdagMLinearOperator<Matrix,FineField>        fMdagMOp(_FineMatrix);
-    ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_SmootherMatrix,0.0);
+    ShiftedMdagMLinearOperator<Matrix, FineField> fMdagMOp(_SmootherMatrix, 0.0);
 
-    RealD Ni,r;
+    RealD Ni, r;
 
     Ni = norm2(in);
 
-    for(int ilo=0;ilo<3;ilo++){
-      for(int ord=5;ord<50;ord*=2){
+    for(int ilo = 0; ilo < 3; ilo++) {
+      for(int ord = 5; ord < 50; ord *= 2) {
 
-	_SmootherOperator.AdjOp(in,vec1);
+        _SmootherOperator.AdjOp(in, vec1);
 
-	Chebyshev<FineField> Cheby  (lo[ilo],70.0,ord,InverseApproximation);
-	Cheby(fMdagMOp,vec1,vec2);    // solves  MdagM = g5 M g5M
-
-	_FineOperator.Op(vec2,vec1);// this is the G5 herm bit
-	vec1  = in - vec1;   // tmp  = in - A Min
-	r=norm2(vec1);
-	std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<<std::endl;
+        Chebyshev<FineField> Cheby(lo[ilo], 70.0, ord, InverseApproximation);
+        Cheby(fMdagMOp, vec1, vec2); // solves  MdagM = g5 M g5M
 
+        _FineOperator.Op(vec2, vec1); // this is the G5 herm bit
+        vec1 = in - vec1;             // tmp  = in - A Min
+        r    = norm2(vec1);
+        std::cout << GridLogMessage << "Smoother resid " << std::sqrt(r / Ni) << std::endl;
       }
     }
   }
 
-  void operatorCheby(const FineField &in, FineField & out) {
+  void operatorCheby(const FineField &in, FineField &out) {
 
     CoarseVector Csrc(_CoarseOperator.Grid());
     CoarseVector Ctmp(_CoarseOperator.Grid());
-    CoarseVector Csol(_CoarseOperator.Grid()); Csol=zero;
+    CoarseVector Csol(_CoarseOperator.Grid());
+    Csol = zero;
 
-    ConjugateGradient<CoarseVector>  CG(3.0e-3,100000);
+    ConjugateGradient<CoarseVector> CG(3.0e-3, 100000);
     //    ConjugateGradient<FineField>    fCG(3.0e-2,1000);
 
-    HermitianLinearOperator<CoarseOperator,CoarseVector>  HermOp(_CoarseOperator);
-    MdagMLinearOperator<CoarseOperator,CoarseVector>     MdagMOp(_CoarseOperator);
+    HermitianLinearOperator<CoarseOperator, CoarseVector> HermOp(_CoarseOperator);
+    MdagMLinearOperator<CoarseOperator, CoarseVector>     MdagMOp(_CoarseOperator);
     //    MdagMLinearOperator<Matrix,FineField>        fMdagMOp(_FineMatrix);
-    ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_SmootherMatrix,0.0);
+    ShiftedMdagMLinearOperator<Matrix, FineField> fMdagMOp(_SmootherMatrix, 0.0);
 
     FineField vec1(in._grid);
     FineField vec2(in._grid);
 
     //    Chebyshev<FineField> Cheby    (0.5,70.0,30,InverseApproximation);
     //    Chebyshev<FineField> ChebyAccu(0.5,70.0,30,InverseApproximation);
-    Chebyshev<FineField> Cheby    (params.lo,params.hi,params.order,InverseApproximation);
-    Chebyshev<FineField> ChebyAccu(params.lo,params.hi,params.order,InverseApproximation);
+    Chebyshev<FineField> Cheby(params.lo, params.hi, params.order, InverseApproximation);
+    Chebyshev<FineField> ChebyAccu(params.lo, params.hi, params.order, InverseApproximation);
     //    Cheby.JacksonSmooth();
     //    ChebyAccu.JacksonSmooth();
 
     //    _Aggregates.ProjectToSubspace  (Csrc,in);
     //    _Aggregates.PromoteFromSubspace(Csrc,out);
     //    std::cout<<GridLogMessage<<"Completeness: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
-    
+
     //    ofstream fout("smoother");
     //    Cheby.csv(fout);
 
@@ -445,137 +439,130 @@ public:
 
     RealD Ni = norm2(in);
 
-    _SmootherOperator.AdjOp(in,vec1);// this is the G5 herm bit
-    ChebyAccu(fMdagMOp,vec1,out);    // solves  MdagM = g5 M g5M
+    _SmootherOperator.AdjOp(in, vec1); // this is the G5 herm bit
+    ChebyAccu(fMdagMOp, vec1, out);    // solves  MdagM = g5 M g5M
 
-    std::cout<<GridLogMessage << "Smoother norm "<<norm2(out)<<std::endl;
+    std::cout << GridLogMessage << "Smoother norm " << norm2(out) << std::endl;
 
     // Update with residual for out
-    _FineOperator.Op(out,vec1);// this is the G5 herm bit
-    vec1  = in - vec1;   // tmp  = in - A Min
+    _FineOperator.Op(out, vec1); // this is the G5 herm bit
+    vec1 = in - vec1;            // tmp  = in - A Min
 
     RealD r = norm2(vec1);
 
-    std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<< " " << r << " " << Ni <<std::endl;
-    
-    _Aggregates.ProjectToSubspace  (Csrc,vec1);
-    HermOp.AdjOp(Csrc,Ctmp);// Normal equations
-    CG(MdagMOp,Ctmp,Csol);
-    _Aggregates.PromoteFromSubspace(Csol,vec1); // Ass^{-1} [in - A Min]_s
-                                             // Q = Q[in - A Min]  
-    out = out+vec1;
+    std::cout << GridLogMessage << "Smoother resid " << std::sqrt(r / Ni) << " " << r << " " << Ni << std::endl;
+
+    _Aggregates.ProjectToSubspace(Csrc, vec1);
+    HermOp.AdjOp(Csrc, Ctmp); // Normal equations
+    CG(MdagMOp, Ctmp, Csol);
+    _Aggregates.PromoteFromSubspace(Csol, vec1); // Ass^{-1} [in - A Min]_s
+                                                 // Q = Q[in - A Min]
+    out = out + vec1;
 
     // Three preconditioner smoothing -- hermitian if C3 = C1
     // Recompute error
-    _FineOperator.Op(out,vec1);// this is the G5 herm bit
-    vec1  = in - vec1;   // tmp  = in - A Min
-    r=norm2(vec1);
+    _FineOperator.Op(out, vec1); // this is the G5 herm bit
+    vec1 = in - vec1;            // tmp  = in - A Min
+    r    = norm2(vec1);
 
-    std::cout<<GridLogMessage << "Coarse resid "<<std::sqrt(r/Ni)<<std::endl;
+    std::cout << GridLogMessage << "Coarse resid " << std::sqrt(r / Ni) << std::endl;
 
     // Reapply smoother
-    _SmootherOperator.Op(vec1,vec2);  // this is the G5 herm bit
-    ChebyAccu(fMdagMOp,vec2,vec1);    // solves  MdagM = g5 M g5M
-
-    out =out+vec1;
-    vec1  = in - vec1;   // tmp  = in - A Min
-    r=norm2(vec1);
-    std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<<std::endl;
+    _SmootherOperator.Op(vec1, vec2); // this is the G5 herm bit
+    ChebyAccu(fMdagMOp, vec2, vec1);  // solves  MdagM = g5 M g5M
 
+    out  = out + vec1;
+    vec1 = in - vec1; // tmp  = in - A Min
+    r    = norm2(vec1);
+    std::cout << GridLogMessage << "Smoother resid " << std::sqrt(r / Ni) << std::endl;
   }
 
-  void operatorSAP(const FineField &in, FineField & out) {
+  void operatorSAP(const FineField &in, FineField &out) {
 
     CoarseVector Csrc(_CoarseOperator.Grid());
     CoarseVector Ctmp(_CoarseOperator.Grid());
-    CoarseVector Csol(_CoarseOperator.Grid()); Csol=zero;
+    CoarseVector Csol(_CoarseOperator.Grid());
+    Csol = zero;
 
-    ConjugateGradient<CoarseVector>  CG(1.0e-3,100000);
+    ConjugateGradient<CoarseVector> CG(1.0e-3, 100000);
 
-    HermitianLinearOperator<CoarseOperator,CoarseVector>  HermOp(_CoarseOperator);
-    MdagMLinearOperator<CoarseOperator,CoarseVector>     MdagMOp(_CoarseOperator);
+    HermitianLinearOperator<CoarseOperator, CoarseVector> HermOp(_CoarseOperator);
+    MdagMLinearOperator<CoarseOperator, CoarseVector>     MdagMOp(_CoarseOperator);
 
     FineField vec1(in._grid);
     FineField vec2(in._grid);
 
-    _Aggregates.ProjectToSubspace  (Csrc,in);
-    _Aggregates.PromoteFromSubspace(Csrc,out);
-    std::cout<<GridLogMessage<<"Completeness: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
-    
+    _Aggregates.ProjectToSubspace(Csrc, in);
+    _Aggregates.PromoteFromSubspace(Csrc, out);
+    std::cout << GridLogMessage << "Completeness: " << std::sqrt(norm2(out) / norm2(in)) << std::endl;
 
     // To make a working smoother for indefinite operator
     // must multiply by "Mdag" (ouch loses all low mode content)
     // and apply to poly approx of (mdagm)^-1.
     // so that we end up with an odd polynomial.
-    SAP(in,out);
+    SAP(in, out);
 
     // Update with residual for out
-    _FineOperator.Op(out,vec1);// this is the G5 herm bit
-    vec1  = in - vec1;   // tmp  = in - A Min
+    _FineOperator.Op(out, vec1); // this is the G5 herm bit
+    vec1 = in - vec1;            // tmp  = in - A Min
 
-    RealD r = norm2(vec1);
+    RealD r  = norm2(vec1);
     RealD Ni = norm2(in);
-    std::cout<<GridLogMessage << "SAP resid "<<std::sqrt(r/Ni)<< " " << r << " " << Ni <<std::endl;
-    
-    _Aggregates.ProjectToSubspace  (Csrc,vec1);
-    HermOp.AdjOp(Csrc,Ctmp);// Normal equations
-    CG(MdagMOp,Ctmp,Csol);
-    _Aggregates.PromoteFromSubspace(Csol,vec1); // Ass^{-1} [in - A Min]_s
-                                             // Q = Q[in - A Min]  
-    out = out+vec1;
+    std::cout << GridLogMessage << "SAP resid " << std::sqrt(r / Ni) << " " << r << " " << Ni << std::endl;
+
+    _Aggregates.ProjectToSubspace(Csrc, vec1);
+    HermOp.AdjOp(Csrc, Ctmp); // Normal equations
+    CG(MdagMOp, Ctmp, Csol);
+    _Aggregates.PromoteFromSubspace(Csol, vec1); // Ass^{-1} [in - A Min]_s
+                                                 // Q = Q[in - A Min]
+    out = out + vec1;
 
     // Three preconditioner smoothing -- hermitian if C3 = C1
     // Recompute error
-    _FineOperator.Op(out,vec1);// this is the G5 herm bit
-    vec1  = in - vec1;   // tmp  = in - A Min
-    r=norm2(vec1);
+    _FineOperator.Op(out, vec1); // this is the G5 herm bit
+    vec1 = in - vec1;            // tmp  = in - A Min
+    r    = norm2(vec1);
 
-    std::cout<<GridLogMessage << "Coarse resid "<<std::sqrt(r/Ni)<<std::endl;
+    std::cout << GridLogMessage << "Coarse resid " << std::sqrt(r / Ni) << std::endl;
 
     // Reapply smoother
-    SAP(vec1,vec2);
-    out =out+vec2;
-
+    SAP(vec1, vec2);
+    out = out + vec2;
 
     // Update with residual for out
-    _FineOperator.Op(out,vec1);// this is the G5 herm bit
-    vec1  = in - vec1;   // tmp  = in - A Min
+    _FineOperator.Op(out, vec1); // this is the G5 herm bit
+    vec1 = in - vec1;            // tmp  = in - A Min
 
-    r = norm2(vec1);
+    r  = norm2(vec1);
     Ni = norm2(in);
-    std::cout<<GridLogMessage << "SAP resid(post) "<<std::sqrt(r/Ni)<< " " << r << " " << Ni <<std::endl;
-
+    std::cout << GridLogMessage << "SAP resid(post) " << std::sqrt(r / Ni) << " " << r << " " << Ni << std::endl;
   }
-
 };
 
-struct MGParams
-{
-    std::vector< std::vector< int > > blockSizes;
-    const int                         nbasis;
+struct MGParams {
+  std::vector<std::vector<int>> blockSizes;
+  const int                     nbasis;
 
-    MGParams()
-        : blockSizes( { { 1, 1, 1, 2 } } )
-        // : blockSizes({ {1,1,1,2}, {1,1,1,2} })
-        // : blockSizes({ {1,1,1,2}, {1,1,1,2}, {1,1,1,2} })
-        , nbasis( 20 )
-    {
-    }
+  MGParams()
+    : blockSizes({{1, 1, 1, 2}})
+    // : blockSizes({{1,1,1,2}, {1,1,1,2}})
+    // : blockSizes({{1,1,1,2}, {1,1,1,2}, {1,1,1,2}})
+    , nbasis(20) {}
 };
 
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
+int main(int argc, char **argv) {
 
-  params.domainsize= 1;
-  params.coarsegrids= 1;
+  Grid_init(&argc, &argv);
+
+  params.domainsize      = 1;
+  params.coarsegrids     = 1;
   params.domaindecompose = 0;
-  params.order = 30;
-  params.Ls = 1;
+  params.order           = 30;
+  params.Ls              = 1;
   // params.mq = .13;
-  params.mq = .5;
-  params.lo = 0.5;
-  params.hi = 70.0;
+  params.mq    = .5;
+  params.lo    = 0.5;
+  params.hi    = 70.0;
   params.steps = 1;
 
   auto mgp = MGParams{};
@@ -590,50 +577,52 @@ int main (int argc, char ** argv)
   std::cout << GridLogMessage << "Set up some fine level stuff: " << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  GridCartesian         * FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),GridDefaultSimd(Nd, vComplex::Nsimd()),GridDefaultMpi());
-  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
+  GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
 
-  std::vector<int> fSeeds( {1, 2, 3, 4} );
-  GridParallelRNG    fPRNG( FGrid );
-  fPRNG.SeedFixedIntegers( fSeeds );
+  std::vector<int> fSeeds({1, 2, 3, 4});
+  GridParallelRNG  fPRNG(FGrid);
+  fPRNG.SeedFixedIntegers(fSeeds);
 
   Gamma g5(Gamma::Algebra::Gamma5);
 
-  LatticeFermion    src(FGrid); gaussian(fPRNG, src); // src=src+g5*src;
-  LatticeFermion result(FGrid); result = zero;
-  LatticeFermion    ref(FGrid); ref = zero;
-  LatticeFermion    tmp(FGrid);
-  LatticeFermion    err(FGrid);
-  LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
-  LatticeGaugeField UmuDD(FGrid);
-  LatticeColourMatrix U(FGrid);
-  LatticeColourMatrix zz(FGrid);
+  // clang-format off
+  LatticeFermion        src(FGrid); gaussian(fPRNG, src); // src=src + g5 * src;
+  LatticeFermion     result(FGrid); result = zero;
+  LatticeFermion        ref(FGrid); ref = zero;
+  LatticeFermion        tmp(FGrid);
+  LatticeFermion        err(FGrid);
+  LatticeGaugeField     Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
+  LatticeGaugeField   UmuDD(FGrid);
+  LatticeColourMatrix     U(FGrid);
+  LatticeColourMatrix     zz(FGrid);
+  // clang-format on
 
-  if ( params.domaindecompose ) {
-    Lattice<iScalar<vInteger> > coor(FGrid);
-    zz=zero;
-    for(int mu=0;mu<Nd;mu++){
-      LatticeCoordinate(coor,mu);
-      U = PeekIndex<LorentzIndex>(Umu,mu);
-      U = where(mod(coor,params.domainsize)==(Integer)0,zz,U);
-      PokeIndex<LorentzIndex>(UmuDD,U,mu);
+  if(params.domaindecompose) {
+    Lattice<iScalar<vInteger>> coor(FGrid);
+    zz = zero;
+    for(int mu = 0; mu < Nd; mu++) {
+      LatticeCoordinate(coor, mu);
+      U = PeekIndex<LorentzIndex>(Umu, mu);
+      U = where(mod(coor, params.domainsize) == (Integer)0, zz, U);
+      PokeIndex<LorentzIndex>(UmuDD, U, mu);
     }
-  } else { 
+  } else {
     UmuDD = Umu;
   }
 
-  RealD mass=params.mq;
+  RealD mass = params.mq;
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Set up some coarser levels stuff: " << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  std::vector< std::vector< int > > blockSizes({ { 1, 1, 1, 2 } } ); // corresponds to two level algorithm
-  // std::vector< std::vector<int> > blockSizes({ {1,1,1,2},       // // corresponds to three level algorithm
-  //                                              {1,1,1,2} });
+  std::vector<std::vector<int>> blockSizes({{1, 1, 1, 2}}); // corresponds to two level algorithm
+  // std::vector<std::vector<int>> blockSizes({{1, 1, 1, 2},   // corresponds to three level algorithm
+  //                                           {1, 1, 1, 2}});
 
   const int nbasis = 20; // we fix the number of test vector to the same
-                          // number on every level for now
+                         // number on every level for now
 
   // // some stuff we need for every coarser lattice
   // std::vector<std::vector<int>> cLattSizes({GridDefaultLatt()});;
@@ -657,27 +646,27 @@ int main (int argc, char ** argv)
   //   // std::cout << cLattSizes[cl] << std::endl;
   // }
 
-    // GridParallelRNG cPRNG(CGrid); cPRNG.SeedFixedIntegers(cSeeds);
+  // GridParallelRNG cPRNG(CGrid); cPRNG.SeedFixedIntegers(cSeeds);
 
-    CoarseGrids< nbasis > cGrids( blockSizes );
+  CoarseGrids<nbasis> cGrids(blockSizes);
 
-    // assert(0);
+  // assert(0);
 
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Building the wilson operator on the fine grid" <<std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  
-  WilsonFermionR Dw(Umu,*FGrid,*FrbGrid,mass);
-  WilsonFermionR DwDD(UmuDD,*FGrid,*FrbGrid,mass);
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building the wilson operator on the fine grid" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  std::cout<<GridLogMessage<< "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage<< "Some typedefs" <<std::endl;
-  std::cout<<GridLogMessage<< "**************************************************"<< std::endl;
+  WilsonFermionR Dw(Umu, *FGrid, *FrbGrid, mass);
+  WilsonFermionR DwDD(UmuDD, *FGrid, *FrbGrid, mass);
 
-  typedef Aggregation<vSpinColourVector,vTComplex,nbasis>              Subspace;
-  typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>          CoarseOperator;
-  typedef CoarseOperator::CoarseVector                                 CoarseVector;
-  typedef TestVectorAnalyzer<LatticeFermion,nbasis> TVA;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Some typedefs" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  typedef Aggregation<vSpinColourVector, vTComplex, nbasis>     Subspace;
+  typedef CoarsenedMatrix<vSpinColourVector, vTComplex, nbasis> CoarseOperator;
+  typedef CoarseOperator::CoarseVector                          CoarseVector;
+  typedef TestVectorAnalyzer<LatticeFermion, nbasis>            TVA;
 
   // typedef Aggregation<vSpinColourVector,vTComplex,1,nbasis> Subspace;
   // typedef CoarsenedMatrix<vSpinColourVector,vTComplex,1,nbasis> CoarseOperator;
@@ -688,6 +677,7 @@ int main (int argc, char ** argv)
   // CoarseOperator::CoarseG5PMatrix CoarseG5PMatrix;
 
 #if 1
+  // clang-format off
   std::cout << std::endl;
   std::cout << "type_name<decltype(vTComplex{})>()                      = " << type_name<decltype(vTComplex{})>()                      << std::endl;
   std::cout << "type_name<GridTypeMapper<vTComplex>::scalar_type>()     = " << type_name<GridTypeMapper<vTComplex>::scalar_type>()     << std::endl;
@@ -711,22 +701,23 @@ int main (int argc, char ** argv)
   std::cout << "type_name<GridTypeMapper<TComplex>::Realified>()       = " << type_name<GridTypeMapper<TComplex>::Realified>()       << std::endl;
   std::cout << "type_name<GridTypeMapper<TComplex>::DoublePrecision>() = " << type_name<GridTypeMapper<TComplex>::DoublePrecision>() << std::endl;
   std::cout << std::endl;
+  // clang-format on
 #endif
 
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Calling Aggregation class to build subspaces" <<std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Calling Aggregation class to build subspaces" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
 
   // • TODO: need some way to run the smoother on the "test vectors" for a few
   //   times before constructing the subspace from them
   // • Maybe an application for an mrhs (true mrhs, no block) smoother?
   // • In WMG, the vectors are normalized but not orthogonalized, but here they
   //   are constructed randomly and then orthogonalized (rather orthonormalized) against each other
-  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
-  Subspace Aggregates(cGrids.Grids[0],FGrid,0);
-  assert ((nbasis & 0x1)==0);
-  int nb=nbasis/2;
-  std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
+  MdagMLinearOperator<WilsonFermionR, LatticeFermion> HermOp(Dw);
+  Subspace                                            Aggregates(cGrids.Grids[0], FGrid, 0);
+  assert((nbasis & 0x1) == 0);
+  int nb = nbasis / 2;
+  std::cout << GridLogMessage << " nbasis/2 = " << nb << std::endl;
 
   Aggregates.CreateSubspace(fPRNG, HermOp /*, nb */); // Don't specify nb to see the orthogonalization check
 
@@ -734,39 +725,42 @@ int main (int argc, char ** argv)
 
   testVectorAnalyzer(HermOp, Aggregates.subspace, nb);
 
-  for(int n=0;n<nb;n++){
-    Aggregates.subspace[n+nb] = g5 * Aggregates.subspace[n]; // multiply with g5 normally instead of G5R5 since this specific to DWF
-    std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl;
+  for(int n = 0; n < nb; n++) {
+    // multiply with g5 normally instead of G5R5 since this specific to DWF
+    Aggregates.subspace[n + nb] = g5 * Aggregates.subspace[n];
+    std::cout << GridLogMessage << n << " subspace " << norm2(Aggregates.subspace[n + nb]) << " " << norm2(Aggregates.subspace[n])
+              << std::endl;
   }
-  for(int n=0;n<nbasis;n++){
-    std::cout<<GridLogMessage << "vec["<<n<<"] = "<<norm2(Aggregates.subspace[n])  <<std::endl;
+  for(int n = 0; n < nbasis; n++) {
+    std::cout << GridLogMessage << "vec[" << n << "] = " << norm2(Aggregates.subspace[n]) << std::endl;
   }
 
   // tva(HermOp, Aggregates.subspace);
   Aggregates.CheckOrthogonal();
   testVectorAnalyzer(HermOp, Aggregates.subspace);
 
-  result=zero;
+  result = zero;
 
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Building coarse representation of Dirac operator" <<std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building coarse representation of Dirac operator" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  Gamma5HermitianLinearOperator<WilsonFermionR,LatticeFermion> HermIndefOp(Dw); // this corresponds to working with H = g5 * D
-  Gamma5HermitianLinearOperator<WilsonFermionR,LatticeFermion> HermIndefOpDD(DwDD);
-  CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOp(*cGrids.Grids[0]);
+  // using Gamma5HermitianLinearOperator corresponds to working with H = g5 * D
+  Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> HermIndefOp(Dw);
+  Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> HermIndefOpDD(DwDD);
+  CoarsenedMatrix<vSpinColourVector, vTComplex, nbasis>         CoarseOp(*cGrids.Grids[0]);
   CoarseOp.CoarsenOperator(FGrid, HermIndefOp, Aggregates); // uses only linop.OpDiag & linop.OpDir
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building coarse vectors" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  CoarseVector c_src (cGrids.Grids[0]);
-  CoarseVector c_res (cGrids.Grids[0]);
-  gaussian(cGrids.PRNGs[0],c_src);
-  c_res=zero;
+  CoarseVector c_src(cGrids.Grids[0]);
+  CoarseVector c_res(cGrids.Grids[0]);
+  gaussian(cGrids.PRNGs[0], c_src);
+  c_res = zero;
 
-  std::cout << "type_name<decltype(c_src)>() = " << type_name< decltype( c_src ) >() << std::endl;
+  std::cout << "type_name<decltype(c_src)>() = " << type_name<decltype(c_src)>() << std::endl;
 
   // c_res = g5 * c_src;
 
@@ -774,9 +768,9 @@ int main (int argc, char ** argv)
   std::cout << GridLogMessage << "Solving posdef-MR on coarse space " << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(CoarseOp);
-  MinimalResidual<CoarseVector>   MR(5.0e-2, 100, false);
-  ConjugateGradient<CoarseVector> CG(5.0e-2, 100, false);
+  MdagMLinearOperator<CoarseOperator, CoarseVector> PosdefLdop(CoarseOp);
+  MinimalResidual<CoarseVector>                     MR(5.0e-2, 100, false);
+  ConjugateGradient<CoarseVector>                   CG(5.0e-2, 100, false);
 
   MR(PosdefLdop, c_src, c_res);
 
@@ -811,17 +805,15 @@ int main (int argc, char ** argv)
   // ConjugateResidual<CoarseVector> MCR(1.0e-6,100000);
   // MCR(HermIndefLdop,c_src,c_res);
 
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  std::cout<<GridLogMessage << "Building deflation preconditioner "<< std::endl;
-  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building deflation preconditioner " << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,WilsonFermionR> Precon  (Aggregates, CoarseOp,
-                                                                                       HermIndefOp,Dw,
-                                                                                       HermIndefOp,Dw);
+  MultiGridPreconditioner<vSpinColourVector, vTComplex, nbasis, WilsonFermionR> Precon(
+    Aggregates, CoarseOp, HermIndefOp, Dw, HermIndefOp, Dw);
 
-  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,WilsonFermionR> PreconDD(Aggregates, CoarseOp,
-                                                                                       HermIndefOp,Dw,
-                                                                                       HermIndefOpDD,DwDD);
+  MultiGridPreconditioner<vSpinColourVector, vTComplex, nbasis, WilsonFermionR> PreconDD(
+    Aggregates, CoarseOp, HermIndefOp, Dw, HermIndefOpDD, DwDD);
   // MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
   //                         FineOperator &Fine,Matrix &FineMatrix,
   //                         FineOperator &Smooth,Matrix &SmootherMatrix)
@@ -831,8 +823,8 @@ int main (int argc, char ** argv)
   std::cout << GridLogMessage << "Building two level VPGCR and FGMRES solvers" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  PrecGeneralisedConjugateResidual<LatticeFermion>    VPGCRMG(1.0e-12,100,Precon,8,8);
-  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMRESMG(1.0e-12,100,Precon,8);
+  PrecGeneralisedConjugateResidual<LatticeFermion>   VPGCRMG(1.0e-12, 100, Precon, 8, 8);
+  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMRESMG(1.0e-12, 100, Precon, 8);
 
   std::cout << GridLogMessage << "checking norm src " << norm2(src) << std::endl;
 
@@ -840,14 +832,14 @@ int main (int argc, char ** argv)
   std::cout << GridLogMessage << "Building unpreconditioned VPGCR and FGMRES solvers" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  PrecGeneralisedConjugateResidual<LatticeFermion>    VPGCRT(1.0e-12,4000000,Simple,8,8);
-  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMREST(1.0e-12,4000000,Simple,8);
+  PrecGeneralisedConjugateResidual<LatticeFermion>   VPGCRT(1.0e-12, 4000000, Simple, 8, 8);
+  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMREST(1.0e-12, 4000000, Simple, 8);
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Testing the four solvers" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  std::vector< OperatorFunction<LatticeFermion>*> solvers;
+  std::vector<OperatorFunction<LatticeFermion> *> solvers;
   solvers.push_back(&VPGCRMG);
   solvers.push_back(&FGMRESMG);
   solvers.push_back(&VPGCRT);
@@ -855,7 +847,7 @@ int main (int argc, char ** argv)
 
   for(auto elem : solvers) {
     result = zero;
-    (*elem)(HermIndefOp,src,result);
+    (*elem)(HermIndefOp, src, result);
   }
 
   Grid_finalize();

From 38328100c91348ae3da8adb46aa71c44d08eb56f Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 18 Jan 2018 15:35:54 +0100
Subject: [PATCH 059/130] Implement correctness checks for Wilson MG

---
 tests/solver/Test_wilson_ddalphaamg.cc | 107 +++++++++++++++++++++++++
 1 file changed, 107 insertions(+)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index 0f828959..456ff98b 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -537,6 +537,111 @@ public:
     Ni = norm2(in);
     std::cout << GridLogMessage << "SAP resid(post) " << std::sqrt(r / Ni) << " " << r << " " << Ni << std::endl;
   }
+
+  void runChecks(CoarseGrids<nbasis> &cGrids) {
+
+    /////////////////////////////////////////////
+    // Some stuff we need for the checks below //
+    /////////////////////////////////////////////
+    auto tolerance = 1e-13; // TODO: this obviously depends on the precision we use, current value is for double
+
+    std::vector<CoarseVector> cTmps(4, _CoarseOperator.Grid());
+    std::vector<FineField>    fTmps(2, _Aggregates.subspace[0]._grid); // atm only for one coarser grid
+
+    // need to construct an operator, since _CoarseOperator is not a LinearOperator but only a matrix (the name is a bit misleading)
+    MdagMLinearOperator<CoarseOperator, CoarseVector> MdagMOp(_CoarseOperator);
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "MG correctness check: 0 == (1 - P R) v" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    for(auto i = 0; i < _Aggregates.subspace.size(); ++i) {
+      _Aggregates.ProjectToSubspace(cTmps[0], _Aggregates.subspace[i]); //   R v_i
+      _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]);              // P R v_i
+
+      fTmps[1]       = _Aggregates.subspace[i] - fTmps[0]; // v_i - P R v_i
+      auto deviation = std::sqrt(norm2(fTmps[1]) / norm2(_Aggregates.subspace[i]));
+
+      std::cout << GridLogMessage << "Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
+                << " | norm2(R v_i) = " << norm2(cTmps[0]) << " | norm2(P R v_i) = " << norm2(fTmps[0])
+                << " | relative deviation = " << deviation << std::endl;
+
+      if(deviation > tolerance) {
+        std::cout << GridLogError << "Vector " << i << ": relative deviation check failed " << deviation << " > " << tolerance << std::endl;
+        abort();
+      }
+    }
+    std::cout << GridLogMessage << "Check passed!" << std::endl;
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "MG correctness check: 0 == (1 - R P) v_c" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    random(cGrids.PRNGs[0], cTmps[0]);
+
+    _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]); //   P v_c
+    _Aggregates.ProjectToSubspace(cTmps[1], fTmps[0]);   // R P v_c
+
+    cTmps[2]       = cTmps[0] - cTmps[1]; // v_c - R P v_c
+    auto deviation = std::sqrt(norm2(cTmps[2]) / norm2(cTmps[0]));
+
+    std::cout << GridLogMessage << "norm2(v_c) = " << norm2(cTmps[0]) << " | norm2(R P v_c) = " << norm2(cTmps[1])
+              << " | norm2(P v_c) = " << norm2(fTmps[0]) << " | relative deviation = " << deviation << std::endl;
+
+    if(deviation > tolerance) {
+      std::cout << GridLogError << "relative deviation check failed " << deviation << " > " << tolerance << std::endl;
+      abort();
+    }
+    std::cout << GridLogMessage << "Check passed!" << std::endl;
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    random(cGrids.PRNGs[0], cTmps[0]);
+
+    _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]); //     P v_c
+    _FineOperator.Op(fTmps[0], fTmps[1]);                //   D P v_c
+    _Aggregates.ProjectToSubspace(cTmps[1], fTmps[1]);   // R D P v_c
+
+    MdagMOp.Op(cTmps[0], cTmps[2]); // D_c v_c
+
+    cTmps[3]  = cTmps[1] - cTmps[2]; // R D P v_c - D_c v_c
+    deviation = std::sqrt(norm2(cTmps[3]) / norm2(cTmps[1]));
+
+    std::cout << GridLogMessage << "norm2(R D P v_c) = " << norm2(cTmps[1]) << " | norm2(D_c v_c) = " << norm2(cTmps[2])
+              << " | relative deviation = " << deviation << std::endl;
+
+    if(deviation > tolerance) {
+      std::cout << GridLogError << "relative deviation check failed " << deviation << " > " << tolerance << std::endl;
+      abort();
+    }
+    std::cout << GridLogMessage << "Check passed!" << std::endl;
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    random(cGrids.PRNGs[0], cTmps[0]);
+
+    MdagMOp.Op(cTmps[0], cTmps[1]);    //         D_c v_c
+    MdagMOp.AdjOp(cTmps[1], cTmps[2]); // D_c^dag D_c v_c
+
+    // // alternative impl, which is better?
+    // MdagMOp.HermOp(cTmps[0], cTmps[2]); // D_c^dag D_c v_c
+
+    auto dot  = innerProduct(cTmps[0], cTmps[2]); //v_c^dag D_c^dag D_c v_c
+    deviation = abs(imag(dot)) / abs(real(dot));
+
+    std::cout << GridLogMessage << "Re(v_c^dag D_c^dag D_c v_c) = " << real(dot) << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot)
+              << " | relative deviation = " << deviation << std::endl;
+
+    if(deviation > tolerance) {
+      std::cout << GridLogError << "relative deviation check failed " << deviation << " > " << tolerance << std::endl;
+      abort();
+    }
+    std::cout << GridLogMessage << "Check passed!" << std::endl;
+  }
 };
 
 struct MGParams {
@@ -819,6 +924,8 @@ int main(int argc, char **argv) {
   //                         FineOperator &Smooth,Matrix &SmootherMatrix)
   TrivialPrecon<LatticeFermion> Simple;
 
+  Precon.runChecks(cGrids);
+
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building two level VPGCR and FGMRES solvers" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;

From a70c1feeccabf9949d44dbb7398531b7d694e722 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 18 Jan 2018 15:48:28 +0100
Subject: [PATCH 060/130] Remove some unnecessary stuff in Wilson MG

---
 tests/solver/Test_wilson_ddalphaamg.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index 456ff98b..439df423 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -781,7 +781,7 @@ int main(int argc, char **argv) {
   // CoarseG5PVector; // P = preserving typedef
   // CoarseOperator::CoarseG5PMatrix CoarseG5PMatrix;
 
-#if 1
+#if 0
   // clang-format off
   std::cout << std::endl;
   std::cout << "type_name<decltype(vTComplex{})>()                      = " << type_name<decltype(vTComplex{})>()                      << std::endl;
@@ -844,8 +844,6 @@ int main(int argc, char **argv) {
   Aggregates.CheckOrthogonal();
   testVectorAnalyzer(HermOp, Aggregates.subspace);
 
-  result = zero;
-
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building coarse representation of Dirac operator" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;

From 9dc885d2974aa9c054e8baf54630389005bc78e7 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 18 Jan 2018 17:02:04 +0100
Subject: [PATCH 061/130] Fix a bug in Wilson MG

The calculation of the lattice size of a second coarse level was incorrect.
---
 tests/solver/Test_wilson_ddalphaamg.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index 439df423..95297deb 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -113,7 +113,8 @@ public:
     std::cout << GridLogMessage << "Constructing " << coarsegrids << " CoarseGrids" << std::endl;
 
     for(int cl = 0; cl < coarsegrids; ++cl) { // may be a bit ugly and slow but not perf critical
-      LattSizes.push_back({GridDefaultLatt()});
+      // need to differentiate between first and other coarse levels in size calculation
+      LattSizes.push_back({cl == 0 ? GridDefaultLatt() : LattSizes[cl - 1]});
       Seeds.push_back(std::vector<int>(LattSizes[cl].size()));
 
       for(int d = 0; d < LattSizes[cl].size(); ++d) {

From 3b2d805398c0691dcb61e9aa4cbbadb7881aa4b6 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Sat, 20 Jan 2018 18:48:53 +0100
Subject: [PATCH 062/130] WilsonMG: Some first steps towards coarse spin dofs;
 not compiling yet

A failing conversion from the innermost type (Grid::Simd<...>) to a coarse
scalar (triple iScalar) in blockPromote prohibits this commit from working.
---
 lib/algorithms/CoarsenedMatrix.h | 35 +++++++++++++++++++-------------
 lib/lattice/Lattice_transfer.h   | 26 ++++++++++++++----------
 2 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h
index 8af8d7ac..04ac18ae 100644
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -93,12 +93,16 @@ namespace Grid {
   template<class Fobj,class CComplex,int nbasis>
   class Aggregation   {
   public:
-    typedef iVector<CComplex,nbasis >             siteVector;
-    typedef Lattice<siteVector>                 CoarseVector;
-    typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
 
-    typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field
-    typedef Lattice<Fobj >        FineField;
+    typedef typename GridTypeMapper<CComplex>::vector_type     innerType;
+    typedef iScalar<iScalar<iScalar<innerType         >    > > siteScalar; // used for inner products on fine field
+    typedef iScalar<iVector<iVector<innerType, nbasis >, 1 > > siteVector;
+    typedef iScalar<iMatrix<iMatrix<innerType, nbasis >, 1 > > siteMatrix;
+    typedef Lattice<siteScalar> CoarseScalar; // used for inner products on fine field
+    typedef Lattice<siteVector> CoarseVector;
+    typedef Lattice<siteMatrix> CoarseMatrix;
+
+    typedef Lattice<Fobj>       FineField;
 
     GridBase *CoarseGrid;
     GridBase *FineGrid;
@@ -129,7 +133,7 @@ namespace Grid {
 	blockProject(iProj,subspace[i],subspace);
 	eProj=zero; 
 	parallel_for(int ss=0;ss<CoarseGrid->oSites();ss++){
-	  eProj._odata[ss](i)=CComplex(1.0);
+	  eProj._odata[ss]()(0)(i)=innerType(1.0);
 	}
 	eProj=eProj - iProj;
 	std::cout<<GridLogMessage<<"Orthog check error "<<i<<" " << norm2(eProj)<<std::endl;
@@ -239,15 +243,18 @@ namespace Grid {
   // Fine Object == (per site) type of fine field
   // nbasis      == number of deflation vectors
   template<class Fobj,class CComplex,int nbasis>
-  class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  {
+  class CoarsenedMatrix : public SparseMatrixBase<Lattice<iScalar<iVector<iVector<typename GridTypeMapper<CComplex>::vector_type, nbasis >, 1 > > > >  {
   public:
     
-    typedef iVector<CComplex,nbasis >             siteVector;
-    typedef Lattice<siteVector>                 CoarseVector;
-    typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
+    typedef typename GridTypeMapper<CComplex>::vector_type     innerType;
+    typedef iScalar<iScalar<iScalar<innerType         >    > > siteScalar;
+    typedef iScalar<iVector<iVector<innerType, nbasis >, 1 > > siteVector;
+    typedef iScalar<iMatrix<iMatrix<innerType, nbasis >, 1 > > siteMatrix;
+    typedef Lattice<siteScalar> CoarseScalar; // used for inner products on fine field
+    typedef Lattice<siteVector> CoarseVector;
+    typedef Lattice<siteMatrix> CoarseMatrix;
 
-    typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field
-    typedef Lattice<Fobj >        FineField;
+    typedef Lattice<Fobj>       FineField;
 
     ////////////////////
     // Data members
@@ -387,9 +394,9 @@ namespace Grid {
 	  parallel_for(int ss=0;ss<Grid()->oSites();ss++){
 	    for(int j=0;j<nbasis;j++){
 	      if( disp!= 0 ) {
-		A[p]._odata[ss](j,i) = oProj._odata[ss](j);
+		A[p]._odata[ss]()(0,0)(j,i) = oProj._odata[ss]()(0)(j);
 	      }
-	      A[self_stencil]._odata[ss](j,i) =	A[self_stencil]._odata[ss](j,i) + iProj._odata[ss](j);
+	      A[self_stencil]._odata[ss]()(0,0)(j,i) =	A[self_stencil]._odata[ss]()(0,0)(j,i) + iProj._odata[ss]()(0)(j);
 	    }
 	  }
 	}
diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h
index 32c15d22..5a04d551 100644
--- a/lib/lattice/Lattice_transfer.h
+++ b/lib/lattice/Lattice_transfer.h
@@ -80,8 +80,8 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
   }
   
 
-template<class vobj,class CComplex,int nbasis>
-inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
+template<class vobj,class vobjC>
+inline void blockProject(Lattice<vobjC>                    &coarseData,
 			 const             Lattice<vobj>   &fineData,
 			 const std::vector<Lattice<vobj> > &Basis)
 {
@@ -90,7 +90,8 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
   int  _ndimension = coarse->_ndimension;
 
   // checks
-  assert( nbasis == Basis.size() );
+  assert((Basis.size() != 0) && ((Basis.size() & 0x1) == 0));
+  auto nbasis = Basis.size();
   subdivides(coarse,fine); 
   for(int i=0;i<nbasis;i++){
     conformable(Basis[i],fineData);
@@ -118,8 +119,8 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
 PARALLEL_CRITICAL
     for(int i=0;i<nbasis;i++) {
 
-      coarseData._odata[sc](i)=coarseData._odata[sc](i)
-	+ innerProduct(Basis[i]._odata[sf],fineData._odata[sf]);
+      coarseData._odata[sc]()(0)(i)=coarseData._odata[sc]()(0)(i)
+        + TensorRemove(innerProduct(Basis[i]._odata[sf],fineData._odata[sf]));
 
     }
   }
@@ -285,9 +286,9 @@ inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> >
   }
 }
 
-template<class vobj,class CComplex,int nbasis>
-inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
-			 Lattice<vobj>   &fineData,
+template<class vobj,class vobjC>
+inline void blockPromote(const Lattice<vobjC>              &coarseData,
+			 Lattice<vobj>                     &fineData,
 			 const std::vector<Lattice<vobj> > &Basis)
 {
   GridBase * fine  = fineData._grid;
@@ -295,7 +296,9 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
   int  _ndimension = coarse->_ndimension;
 
   // checks
-  assert( nbasis == Basis.size() );
+  assert((Basis.size() != 0) && ((Basis.size() & 0x1) == 0));
+  auto nbasis = Basis.size();
+
   subdivides(coarse,fine); 
   for(int i=0;i<nbasis;i++){
     conformable(Basis[i]._grid,fine);
@@ -319,9 +322,10 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
       for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
       Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
       
+      // TODO: These lines here prevent this commit from working
       for(int i=0;i<nbasis;i++) {
-	if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
-	else     fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
+	if(i==0) fineData._odata[sf]=coarseData._odata[sc]()(0)(i) * Basis[i]._odata[sf];
+	else     fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc]()(0)(i)*Basis[i]._odata[sf];
       }
     }
   }

From 04f92ccddff5f941743044182d4d25ed3c306d24 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 22 Jan 2018 13:22:29 +0100
Subject: [PATCH 063/130] WilsonMG: Provide a fix for the previous commit;
 compiles and runs successfully now

I don't like the solution with the temporary very much though ...
---
 lib/lattice/Lattice_transfer.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h
index 5a04d551..1c74a82d 100644
--- a/lib/lattice/Lattice_transfer.h
+++ b/lib/lattice/Lattice_transfer.h
@@ -322,10 +322,13 @@ inline void blockPromote(const Lattice<vobjC>              &coarseData,
       for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
       Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
       
-      // TODO: These lines here prevent this commit from working
+      // The temporary is necessary, since a pure instance of Grid::simd<...> is
+      // not a valid argument to operator+ with an iVector, we need an an iScalar
+      typename vobjC::tensor_reduced tmp; // iScalar<iVector<iVector<...>>> -> iScalar<iScalar<iScalar<...>>>
       for(int i=0;i<nbasis;i++) {
-	if(i==0) fineData._odata[sf]=coarseData._odata[sc]()(0)(i) * Basis[i]._odata[sf];
-	else     fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc]()(0)(i)*Basis[i]._odata[sf];
+        tmp = coarseData._odata[sc]()(0)(i);
+        if(i==0) fineData._odata[sf] = tmp * Basis[i]._odata[sf];
+        else     fineData._odata[sf]=fineData._odata[sf]+tmp*Basis[i]._odata[sf];
       }
     }
   }

From 161637e573cb10eb8b704727b945106aed4ec713 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 29 Jan 2018 17:09:51 +0100
Subject: [PATCH 064/130] Turn on orthogonality checking temporarily

---
 lib/algorithms/CoarsenedMatrix.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h
index 04ac18ae..1c703a7b 100644
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -119,12 +119,12 @@ namespace Grid {
   
     void Orthogonalise(void){
       CoarseScalar InnerProd(CoarseGrid); 
-      std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
+      std::cout << GridLogMessage <<"Gram-Schmidt pass 1"<<std::endl;
       blockOrthogonalise(InnerProd,subspace);
-      std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
+      std::cout << GridLogMessage <<"Gram-Schmidt pass 2"<<std::endl;
       blockOrthogonalise(InnerProd,subspace);
-      //      std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
-      //      CheckOrthogonal();
+      std::cout << GridLogMessage <<"Gram-Schmidt checking orthogonality"<<std::endl;
+      CheckOrthogonal();
     } 
     void CheckOrthogonal(void){
       CoarseVector iProj(CoarseGrid); 

From bfc1411c1f902b6d556f8418cd3831ec549eceee Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 29 Jan 2018 17:11:29 +0100
Subject: [PATCH 065/130] Use more iterations in subspace creation

---
 lib/algorithms/CoarsenedMatrix.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h
index 1c703a7b..c76cdd14 100644
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -221,7 +221,7 @@ namespace Grid {
 
 	hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise   ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
 
-	for(int i=0;i<1;i++){
+	for(int i=0;i<3;i++){
 
 	  CG(hermop,noise,subspace[b]);
 

From 194e4b94bb03a91060e0c6049092d6a97c07ef50 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 29 Jan 2018 17:18:20 +0100
Subject: [PATCH 066/130] Make MG checking function work level-wise

---
 tests/solver/Test_wilson_ddalphaamg.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index 95297deb..3ba933e8 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -539,7 +539,7 @@ public:
     std::cout << GridLogMessage << "SAP resid(post) " << std::sqrt(r / Ni) << " " << r << " " << Ni << std::endl;
   }
 
-  void runChecks(CoarseGrids<nbasis> &cGrids) {
+  void runChecks(CoarseGrids<nbasis> &cGrids, int whichCoarseGrid) {
 
     /////////////////////////////////////////////
     // Some stuff we need for the checks below //
@@ -578,7 +578,7 @@ public:
     std::cout << GridLogMessage << "MG correctness check: 0 == (1 - R P) v_c" << std::endl;
     std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-    random(cGrids.PRNGs[0], cTmps[0]);
+    random(cGrids.PRNGs[whichCoarseGrid], cTmps[0]);
 
     _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]); //   P v_c
     _Aggregates.ProjectToSubspace(cTmps[1], fTmps[0]);   // R P v_c
@@ -599,7 +599,7 @@ public:
     std::cout << GridLogMessage << "MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
     std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-    random(cGrids.PRNGs[0], cTmps[0]);
+    random(cGrids.PRNGs[whichCoarseGrid], cTmps[0]);
 
     _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]); //     P v_c
     _FineOperator.Op(fTmps[0], fTmps[1]);                //   D P v_c
@@ -623,7 +623,7 @@ public:
     std::cout << GridLogMessage << "MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
     std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-    random(cGrids.PRNGs[0], cTmps[0]);
+    random(cGrids.PRNGs[whichCoarseGrid], cTmps[0]);
 
     MdagMOp.Op(cTmps[0], cTmps[1]);    //         D_c v_c
     MdagMOp.AdjOp(cTmps[1], cTmps[2]); // D_c^dag D_c v_c
@@ -923,7 +923,7 @@ int main(int argc, char **argv) {
   //                         FineOperator &Smooth,Matrix &SmootherMatrix)
   TrivialPrecon<LatticeFermion> Simple;
 
-  Precon.runChecks(cGrids);
+  Precon.runChecks(cGrids, 0);
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building two level VPGCR and FGMRES solvers" << std::endl;

From faf42780192f65ff0cd15842f79579e9786ce4b5 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 29 Jan 2018 17:21:42 +0100
Subject: [PATCH 067/130] Use 2 passes of GS in coarse operator construction

---
 lib/algorithms/CoarsenedMatrix.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h
index c76cdd14..59025276 100644
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -334,10 +334,9 @@ namespace Grid {
 
       CoarseVector iProj(Grid()); 
       CoarseVector oProj(Grid()); 
-      CoarseScalar InnerProd(Grid()); 
 
       // Orthogonalise the subblocks over the basis
-      blockOrthogonalise(InnerProd,Subspace.subspace);
+      Subspace.Orthogonalise();
 
       // Compute the matrix elements of linop between this orthonormal
       // set of vectors.

From 1cb745c8dcdf1099e2c9b693e87f851f8cf0d845 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 29 Jan 2018 17:34:26 +0100
Subject: [PATCH 068/130] Add function to return full type as std::string

Also works for nested templates. I find it useful for debugging.
Possible usage:

std::cout << "getTypename<AType>() = " << getTypename<Atype>() << std::endl;
std::cout << "getTypename<decltype(AnInstance)>() = " << getTypename<decltype(AnInstance)>() << std::endl;
---
 lib/tensors/Tensor_traits.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/lib/tensors/Tensor_traits.h b/lib/tensors/Tensor_traits.h
index c1ef397a..be0e550a 100644
--- a/lib/tensors/Tensor_traits.h
+++ b/lib/tensors/Tensor_traits.h
@@ -23,6 +23,7 @@ Author: Christopher Kelly <ckelly@phys.columbia.edu>
 #define GRID_MATH_TRAITS_H
 
 #include <type_traits>
+#include <cxxabi.h>
 
 namespace Grid {
 
@@ -288,6 +289,25 @@ namespace Grid {
 
     enum { value = sizeof(real_scalar_type)/sizeof(float) };
   };
+
+  template<typename T> std::string getTypename() {
+
+    typedef typename std::remove_reference<T>::type TWoRef;
+
+    std::unique_ptr<char, void (*)(void *)> own(abi::__cxa_demangle(typeid(TWoRef).name(), nullptr, nullptr, nullptr), std::free);
+
+    std::string r = own != nullptr ? own.get() : typeid(TWoRef).name();
+
+    if(std::is_const<TWoRef>::value)
+      r += " const";
+    if(std::is_volatile<TWoRef>::value)
+      r += " volatile";
+    if(std::is_lvalue_reference<T>::value)
+      r += "&";
+    else if(std::is_rvalue_reference<T>::value)
+      r += "&&";
+    return r;
+  }
 }
 
 #endif

From d2e68c4355880d257b93610aa9cb2a8dd40f76c5 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 29 Jan 2018 17:45:48 +0100
Subject: [PATCH 069/130] WilsonMG: Perform some minor cleanup

---
 tests/solver/Test_wilson_ddalphaamg.cc | 89 ++------------------------
 1 file changed, 4 insertions(+), 85 deletions(-)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index 3ba933e8..b5bc00fb 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -2,9 +2,9 @@
 
     Grid physics library, www.github.com/paboyle/Grid 
 
-    Source file: ./tests/Test_dwf_hdcr.cc
+    Source file: ./tests/solver/Test_wilson_ddalphaamg.cc
 
-    Copyright (C) 2015
+    Copyright (C) 2017
 
     Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
@@ -28,7 +28,6 @@
 
 #include <Grid/Grid.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
-//#include <algorithms/iterative/PrecConjugateResidual.h>
 
 using namespace std;
 using namespace Grid;
@@ -37,12 +36,10 @@ using namespace Grid::QCD;
 template<class Field, int nbasis> class TestVectorAnalyzer {
 public:
   void operator()(LinearOperatorBase<Field> &Linop, std::vector<Field> const &vectors, int nn = nbasis) {
-    // this function corresponds to testvector_analysis_PRECISION from the
-    // DD-αAMG codebase
 
     auto positiveOnes = 0;
 
-    std::vector<Field> tmp(4, vectors[0]._grid); // bit hacky?
+    std::vector<Field> tmp(4, vectors[0]._grid);
     Gamma              g5(Gamma::Algebra::Gamma5);
 
     std::cout << GridLogMessage << "Test vector analysis:" << std::endl;
@@ -51,7 +48,7 @@ public:
 
       Linop.Op(vectors[i], tmp[3]);
 
-      tmp[0] = g5 * tmp[3]; // is this the same as coarse_gamma5_PRECISION?
+      tmp[0] = g5 * tmp[3];
 
       auto lambda = innerProduct(vectors[i], tmp[0]) / innerProduct(vectors[i], vectors[i]);
 
@@ -96,11 +93,6 @@ RealD InverseApproximation(RealD x) {
 
 template<int nbasis> struct CoarseGrids {
 public:
-  // typedef Aggregation<vSpinColourVector,vTComplex,nbasis>     Subspace;
-  // typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>
-  // CoarseOperator; typedef typename CoarseOperator::CoarseVector
-  // CoarseVector;
-
   std::vector<std::vector<int>> LattSizes;
   std::vector<std::vector<int>> Seeds;
   std::vector<GridCartesian *>  Grids;
@@ -645,17 +637,6 @@ public:
   }
 };
 
-struct MGParams {
-  std::vector<std::vector<int>> blockSizes;
-  const int                     nbasis;
-
-  MGParams()
-    : blockSizes({{1, 1, 1, 2}})
-    // : blockSizes({{1,1,1,2}, {1,1,1,2}})
-    // : blockSizes({{1,1,1,2}, {1,1,1,2}, {1,1,1,2}})
-    , nbasis(20) {}
-};
-
 int main(int argc, char **argv) {
 
   Grid_init(&argc, &argv);
@@ -671,8 +652,6 @@ int main(int argc, char **argv) {
   params.hi    = 70.0;
   params.steps = 1;
 
-  auto mgp = MGParams{};
-
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Params: " << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
@@ -730,34 +709,9 @@ int main(int argc, char **argv) {
   const int nbasis = 20; // we fix the number of test vector to the same
                          // number on every level for now
 
-  // // some stuff we need for every coarser lattice
-  // std::vector<std::vector<int>> cLattSizes({GridDefaultLatt()});;
-  // std::vector<GridCartesian *> cGrids(params.coarsegrids);
-  // std::vector<std::vector<int>> cSeeds({ {5,6,7,8} });
-  // std::vector<GridParallelRNG> cPRNGs;(params.coarsegrids);
-
-  // assert(cLattSizes.size() == params.coarsegrids);
-  // assert(    cGrids.size() == params.coarsegrids);
-  // assert(    cSeeds.size() == params.coarsegrids);
-  // assert(    cPRNGs.size() == params.coarsegrids);
-
-  // for(int cl=0;cl<cLattSizes.size();cl++){
-  //   for(int d=0;d<cLattSizes[cl].size();d++){
-  //     // std::cout << cl << " " << d << " " << cLattSizes[cl][d] << " " <<
-  //     blockSizes[cl][d] << std::endl; cLattSizes[cl][d] =
-  //     cLattSizes[cl][d]/blockSizes[cl][d];
-  //   }
-  //   cGrids[cl] = SpaceTimeGrid::makeFourDimGrid(cLattSizes[cl],
-  //   GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
-  //   // std::cout << cLattSizes[cl] << std::endl;
-  // }
-
-  // GridParallelRNG cPRNG(CGrid); cPRNG.SeedFixedIntegers(cSeeds);
 
   CoarseGrids<nbasis> cGrids(blockSizes);
 
-  // assert(0);
-
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building the wilson operator on the fine grid" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
@@ -782,43 +736,11 @@ int main(int argc, char **argv) {
   // CoarseG5PVector; // P = preserving typedef
   // CoarseOperator::CoarseG5PMatrix CoarseG5PMatrix;
 
-#if 0
-  // clang-format off
-  std::cout << std::endl;
-  std::cout << "type_name<decltype(vTComplex{})>()                      = " << type_name<decltype(vTComplex{})>()                      << std::endl;
-  std::cout << "type_name<GridTypeMapper<vTComplex>::scalar_type>()     = " << type_name<GridTypeMapper<vTComplex>::scalar_type>()     << std::endl;
-  std::cout << "type_name<GridTypeMapper<vTComplex>::vector_type>()     = " << type_name<GridTypeMapper<vTComplex>::vector_type>()     << std::endl;
-  std::cout << "type_name<GridTypeMapper<vTComplex>::vector_typeD>()    = " << type_name<GridTypeMapper<vTComplex>::vector_typeD>()    << std::endl;
-  std::cout << "type_name<GridTypeMapper<vTComplex>::tensor_reduced>()  = " << type_name<GridTypeMapper<vTComplex>::tensor_reduced>()  << std::endl;
-  std::cout << "type_name<GridTypeMapper<vTComplex>::scalar_object>()   = " << type_name<GridTypeMapper<vTComplex>::scalar_object>()   << std::endl;
-  std::cout << "type_name<GridTypeMapper<vTComplex>::Complexified>()    = " << type_name<GridTypeMapper<vTComplex>::Complexified>()    << std::endl;
-  std::cout << "type_name<GridTypeMapper<vTComplex>::Realified>()       = " << type_name<GridTypeMapper<vTComplex>::Realified>()       << std::endl;
-  std::cout << "type_name<GridTypeMapper<vTComplex>::DoublePrecision>() = " << type_name<GridTypeMapper<vTComplex>::DoublePrecision>() << std::endl;
-  std::cout << std::endl;
-
-  std::cout << std::endl;
-  std::cout << "type_name<decltype(TComplex{})>()                      = " << type_name<decltype(TComplex{})>()                      << std::endl;
-  std::cout << "type_name<GridTypeMapper<TComplex>::scalar_type>()     = " << type_name<GridTypeMapper<TComplex>::scalar_type>()     << std::endl;
-  std::cout << "type_name<GridTypeMapper<TComplex>::vector_type>()     = " << type_name<GridTypeMapper<TComplex>::vector_type>()     << std::endl;
-  std::cout << "type_name<GridTypeMapper<TComplex>::vector_typeD>()    = " << type_name<GridTypeMapper<TComplex>::vector_typeD>()    << std::endl;
-  std::cout << "type_name<GridTypeMapper<TComplex>::tensor_reduced>()  = " << type_name<GridTypeMapper<TComplex>::tensor_reduced>()  << std::endl;
-  std::cout << "type_name<GridTypeMapper<TComplex>::scalar_object>()   = " << type_name<GridTypeMapper<TComplex>::scalar_object>()   << std::endl;
-  std::cout << "type_name<GridTypeMapper<TComplex>::Complexified>()    = " << type_name<GridTypeMapper<TComplex>::Complexified>()    << std::endl;
-  std::cout << "type_name<GridTypeMapper<TComplex>::Realified>()       = " << type_name<GridTypeMapper<TComplex>::Realified>()       << std::endl;
-  std::cout << "type_name<GridTypeMapper<TComplex>::DoublePrecision>() = " << type_name<GridTypeMapper<TComplex>::DoublePrecision>() << std::endl;
-  std::cout << std::endl;
-  // clang-format on
-#endif
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Calling Aggregation class to build subspaces" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  // • TODO: need some way to run the smoother on the "test vectors" for a few
-  //   times before constructing the subspace from them
-  // • Maybe an application for an mrhs (true mrhs, no block) smoother?
-  // • In WMG, the vectors are normalized but not orthogonalized, but here they
-  //   are constructed randomly and then orthogonalized (rather orthonormalized) against each other
   MdagMLinearOperator<WilsonFermionR, LatticeFermion> HermOp(Dw);
   Subspace                                            Aggregates(cGrids.Grids[0], FGrid, 0);
   assert((nbasis & 0x1) == 0);
@@ -864,9 +786,6 @@ int main(int argc, char **argv) {
   gaussian(cGrids.PRNGs[0], c_src);
   c_res = zero;
 
-  std::cout << "type_name<decltype(c_src)>() = " << type_name<decltype(c_src)>() << std::endl;
-
-  // c_res = g5 * c_src;
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Solving posdef-MR on coarse space " << std::endl;

From f20728baa9db03e90b2a6fd33a45fa4b9619e1f7 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 29 Jan 2018 18:10:02 +0100
Subject: [PATCH 070/130] WilsonMG: Some further steps towards a three level
 method

Currently this is very "manual" as we are still testing stuff. Will refactor
and make it an algorithm once everything works.

What currently does work:

  - All tests in MultiGridPreconditioner::runChecks for the first coarse grid
  - The tests for the intergrid operators going from the first to the second
    coarse grid
    - (1 - P R) v   == 0
    - (1 - R P) v_c == 0
  - A full solve with VPGCR and a two-level MG preconditioner

What hinders the rest of the tests from passing with a three-level method is the
absence of implementations of CoarsenedMatrix::Mdir and CoarsenedMatrix::Mdiag.
---
 tests/solver/Test_wilson_ddalphaamg.cc | 221 +++++++++++++++----------
 1 file changed, 131 insertions(+), 90 deletions(-)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_ddalphaamg.cc
index b5bc00fb..581bcda2 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_ddalphaamg.cc
@@ -98,7 +98,7 @@ public:
   std::vector<GridCartesian *>  Grids;
   std::vector<GridParallelRNG>  PRNGs;
 
-  CoarseGrids(std::vector<std::vector<int>> const &blockSizes, int coarsegrids = 1) {
+  CoarseGrids(std::vector<std::vector<int>> const &blockSizes, int coarsegrids) {
 
     assert(blockSizes.size() == coarsegrids);
 
@@ -702,15 +702,20 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Set up some coarser levels stuff: " << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  std::vector<std::vector<int>> blockSizes({{1, 1, 1, 2}}); // corresponds to two level algorithm
-  // std::vector<std::vector<int>> blockSizes({{1, 1, 1, 2},   // corresponds to three level algorithm
-  //                                           {1, 1, 1, 2}});
-
   const int nbasis = 20; // we fix the number of test vector to the same
                          // number on every level for now
 
+  //////////////////////////////////////////
+  // toggle to run two/three level method
+  //////////////////////////////////////////
 
-  CoarseGrids<nbasis> cGrids(blockSizes);
+  // // two-level algorithm
+  // std::vector<std::vector<int>> blockSizes({{2, 2, 2, 2}});
+  // CoarseGrids<nbasis>           coarseGrids(blockSizes, 1);
+
+  // three-level algorithm
+  std::vector<std::vector<int>> blockSizes({{2, 2, 2, 2}, {2, 2, 1, 1}});
+  CoarseGrids<nbasis>           coarseGrids(blockSizes, 2);
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building the wilson operator on the fine grid" << std::endl;
@@ -723,133 +728,169 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Some typedefs" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  typedef Aggregation<vSpinColourVector, vTComplex, nbasis>     Subspace;
-  typedef CoarsenedMatrix<vSpinColourVector, vTComplex, nbasis> CoarseOperator;
-  typedef CoarseOperator::CoarseVector                          CoarseVector;
-  typedef TestVectorAnalyzer<LatticeFermion, nbasis>            TVA;
+  // typedefs for transition from fine to first coarsened grid
+  typedef vSpinColourVector                                                                 FineSiteVector;
+  typedef vTComplex                                                                         CoarseSiteScalar;
+  typedef Aggregation<FineSiteVector, CoarseSiteScalar, nbasis>                             Subspace;
+  typedef CoarsenedMatrix<FineSiteVector, CoarseSiteScalar, nbasis>                         CoarseOperator;
+  typedef CoarseOperator::CoarseVector                                                      CoarseVector;
+  typedef CoarseOperator::siteVector                                                        CoarseSiteVector;
+  typedef TestVectorAnalyzer<LatticeFermion, nbasis>                                        FineTVA;
+  typedef MultiGridPreconditioner<FineSiteVector, CoarseSiteScalar, nbasis, WilsonFermionR> FineMGPreconditioner;
+  typedef TrivialPrecon<LatticeFermion>                                                     FineTrivialPreconditioner;
 
-  // typedef Aggregation<vSpinColourVector,vTComplex,1,nbasis> Subspace;
-  // typedef CoarsenedMatrix<vSpinColourVector,vTComplex,1,nbasis> CoarseOperator;
-  // typedef CoarseOperator::CoarseVector                 CoarseVector;
-
-  // typedef CoarseOperator::CoarseG5PVector
-  // CoarseG5PVector; // P = preserving typedef
-  // CoarseOperator::CoarseG5PMatrix CoarseG5PMatrix;
+  // typedefs for transition from a coarse to the next coarser grid (some defs remain the same)
+  typedef Aggregation<CoarseSiteVector, CoarseSiteScalar, nbasis>                             SubSubSpace;
+  typedef CoarsenedMatrix<CoarseSiteVector, CoarseSiteScalar, nbasis>                         CoarseCoarseOperator;
+  typedef CoarseCoarseOperator::CoarseVector                                                  CoarseCoarseVector;
+  typedef CoarseCoarseOperator::siteVector                                                    CoarseCoarseSiteVector;
+  typedef TestVectorAnalyzer<CoarseVector, nbasis>                                            CoarseTVA;
+  typedef MultiGridPreconditioner<CoarseSiteVector, CoarseSiteScalar, nbasis, CoarseOperator> CoarseMGPreconditioner;
+  typedef TrivialPrecon<CoarseVector>                                                         CoarseTrivialPreconditioner;
 
+  static_assert(std::is_same<CoarseVector, CoarseCoarseVector>::value, "CoarseVector and CoarseCoarseVector must be of the same type");
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Calling Aggregation class to build subspaces" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  MdagMLinearOperator<WilsonFermionR, LatticeFermion> HermOp(Dw);
-  Subspace                                            Aggregates(cGrids.Grids[0], FGrid, 0);
+  MdagMLinearOperator<WilsonFermionR, LatticeFermion> FineHermOp(Dw);
+  Subspace                                            FineAggregates(coarseGrids.Grids[0], FGrid, 0);
+
   assert((nbasis & 0x1) == 0);
   int nb = nbasis / 2;
   std::cout << GridLogMessage << " nbasis/2 = " << nb << std::endl;
 
-  Aggregates.CreateSubspace(fPRNG, HermOp /*, nb */); // Don't specify nb to see the orthogonalization check
+  FineAggregates.CreateSubspace(fPRNG, FineHermOp /*, nb */); // Don't specify nb to see the orthogonalization check
 
-  TVA testVectorAnalyzer;
-
-  testVectorAnalyzer(HermOp, Aggregates.subspace, nb);
+  std::cout << GridLogMessage << "Test vector analysis after initial creation of MG test vectors" << std::endl;
+  FineTVA fineTVA;
+  fineTVA(FineHermOp, FineAggregates.subspace, nb);
 
   for(int n = 0; n < nb; n++) {
-    // multiply with g5 normally instead of G5R5 since this specific to DWF
-    Aggregates.subspace[n + nb] = g5 * Aggregates.subspace[n];
-    std::cout << GridLogMessage << n << " subspace " << norm2(Aggregates.subspace[n + nb]) << " " << norm2(Aggregates.subspace[n])
-              << std::endl;
-  }
-  for(int n = 0; n < nbasis; n++) {
-    std::cout << GridLogMessage << "vec[" << n << "] = " << norm2(Aggregates.subspace[n]) << std::endl;
+    FineAggregates.subspace[n + nb] = g5 * FineAggregates.subspace[n];
   }
 
-  // tva(HermOp, Aggregates.subspace);
-  Aggregates.CheckOrthogonal();
-  testVectorAnalyzer(HermOp, Aggregates.subspace);
+  auto coarseSites = 1;
+  for(auto const &elem : coarseGrids.LattSizes[0]) coarseSites *= elem;
+
+  std::cout << GridLogMessage << "Norms of MG test vectors after chiral projection (coarse sites = " << coarseSites << ")" << std::endl;
+  for(int n = 0; n < nbasis; n++) {
+    std::cout << GridLogMessage << "vec[" << n << "] = " << norm2(FineAggregates.subspace[n]) << std::endl;
+  }
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building coarse representation of Dirac operator" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
   // using Gamma5HermitianLinearOperator corresponds to working with H = g5 * D
-  Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> HermIndefOp(Dw);
-  Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> HermIndefOpDD(DwDD);
-  CoarsenedMatrix<vSpinColourVector, vTComplex, nbasis>         CoarseOp(*cGrids.Grids[0]);
-  CoarseOp.CoarsenOperator(FGrid, HermIndefOp, Aggregates); // uses only linop.OpDiag & linop.OpDir
+  Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> FineHermIndefOp(Dw);
+  Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> FineHermIndefOpDD(DwDD);
+  CoarseOperator                                                Dc(*coarseGrids.Grids[0]);
+  Dc.CoarsenOperator(FGrid, FineHermIndefOp, FineAggregates); // uses only linop.OpDiag & linop.OpDir
+
+  std::cout << GridLogMessage << "Test vector analysis after construction of D_c" << std::endl;
+  fineTVA(FineHermOp, FineAggregates.subspace, nb);
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building coarse vectors" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  CoarseVector c_src(cGrids.Grids[0]);
-  CoarseVector c_res(cGrids.Grids[0]);
-  gaussian(cGrids.PRNGs[0], c_src);
-  c_res = zero;
-
+  CoarseVector coarseSource(coarseGrids.Grids[0]);
+  CoarseVector coarseResult(coarseGrids.Grids[0]);
+  gaussian(coarseGrids.PRNGs[0], coarseSource);
+  coarseResult = zero;
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Solving posdef-MR on coarse space " << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  MdagMLinearOperator<CoarseOperator, CoarseVector> PosdefLdop(CoarseOp);
-  MinimalResidual<CoarseVector>                     MR(5.0e-2, 100, false);
-  ConjugateGradient<CoarseVector>                   CG(5.0e-2, 100, false);
+  MdagMLinearOperator<CoarseOperator, CoarseVector> CoarsePosDefHermOp(Dc);
+  MinimalResidual<CoarseVector>                     CoarseMR(5.0e-2, 100, false);
+  ConjugateGradient<CoarseVector>                   CoarseCG(5.0e-2, 100, false);
 
-  MR(PosdefLdop, c_src, c_res);
+  CoarseMR(CoarsePosDefHermOp, coarseSource, coarseResult);
 
-  gaussian(cGrids.PRNGs[0], c_src);
-  c_res = zero;
-  CG(PosdefLdop, c_src, c_res);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Dummy testing for building second coarse level" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  // typedef Aggregation< CoarseVector, vTComplex, nbasis > SubspaceAgain;
-
-  // SubspaceAgain AggregatesCoarsenedAgain(cGrids.Grids[1], cGrids.Grids[0], 0);
-  // AggregatesCoarsenedAgain.CreateSubspace(cGrids.PRNGs[0], PosdefLdop);
-
-  // for(int n=0;n<nb;n++){
-  //   AggregatesCoarsenedAgain.subspace[n+nb] = g5 * AggregatesCoarsenedAgain.subspace[n]; // multiply with g5 normally instead of G5R5 since this specific to DWF
-  //   std::cout<<GridLogMessage<<n<<" subspace "<<norm2(AggregatesCoarsenedAgain.subspace[n+nb])<<" "<<norm2(AggregatesCoarsenedAgain.subspace[n]) <<std::endl;
-  // }
-
-  // for(int n=0;n<nbasis;n++){
-  //   std::cout<<GridLogMessage << "vec["<<n<<"] = "<<norm2(AggregatesCoarsenedAgain.subspace[n])  <<std::endl;
-  // }
-
-  // AggregatesCoarsenedAgain.CheckOrthogonal();
-
-  // std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  // std::cout<<GridLogMessage << "Solving indef-MCR on coarse space "<< std::endl;
-  // std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  // HermitianLinearOperator<CoarseOperator,CoarseVector> HermIndefLdop(CoarseOp);
-  // ConjugateResidual<CoarseVector> MCR(1.0e-6,100000);
-  // MCR(HermIndefLdop,c_src,c_res);
+  gaussian(coarseGrids.PRNGs[0], coarseSource);
+  coarseResult = zero;
+  CoarseCG(CoarsePosDefHermOp, coarseSource, coarseResult);
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building deflation preconditioner " << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  MultiGridPreconditioner<vSpinColourVector, vTComplex, nbasis, WilsonFermionR> Precon(
-    Aggregates, CoarseOp, HermIndefOp, Dw, HermIndefOp, Dw);
+  FineMGPreconditioner FineMGPrecon(FineAggregates, Dc, FineHermIndefOp, Dw, FineHermIndefOp, Dw);
 
-  MultiGridPreconditioner<vSpinColourVector, vTComplex, nbasis, WilsonFermionR> PreconDD(
-    Aggregates, CoarseOp, HermIndefOp, Dw, HermIndefOpDD, DwDD);
-  // MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
-  //                         FineOperator &Fine,Matrix &FineMatrix,
-  //                         FineOperator &Smooth,Matrix &SmootherMatrix)
-  TrivialPrecon<LatticeFermion> Simple;
+  FineMGPreconditioner FineMGPreconDD(FineAggregates, Dc, FineHermIndefOp, Dw, FineHermIndefOpDD, DwDD);
 
-  Precon.runChecks(cGrids, 0);
+  FineTrivialPreconditioner FineSimplePrecon;
+
+  FineMGPrecon.runChecks(coarseGrids, 0);
+
+  if(coarseGrids.LattSizes.size() == 2) {
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "Dummy testing for building a second coarse level" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    SubSubSpace CoarseAggregates(coarseGrids.Grids[1], coarseGrids.Grids[0], 0);
+    CoarseAggregates.CreateSubspace(coarseGrids.PRNGs[0], CoarsePosDefHermOp);
+
+    // // this doesn't work because this function applies g5 to a vector, which
+    // // doesn't work for coarse vectors atm -> FIXME
+    // CoarseTVA coarseTVA;
+    // coarseTVA(CoarsePosDefHermOp, CoarseAggregates.subspace, nb);
+
+    // // cannot apply g5 to coarse vectors atm -> FIXME
+    // for(int n=0;n<nb;n++){
+    //   CoarseAggregates.subspace[n+nb] = g5 * CoarseAggregates.subspace[n]; // multiply with g5 normally instead of G5R5 since this specific to DWF
+    //   std::cout<<GridLogMessage<<n<<" subspace "<<norm2(CoarseAggregates.subspace[n+nb])<<" "<<norm2(CoarseAggregates.subspace[n]) <<std::endl;
+    // }
+
+    auto coarseCoarseSites = 1;
+    for(auto const &elem : coarseGrids.LattSizes[1]) coarseCoarseSites *= elem;
+
+    std::cout << GridLogMessage << "Norms of MG test vectors after chiral projection (coarse coarse sites = " << coarseCoarseSites << ")"
+              << std::endl;
+    for(int n = 0; n < nbasis; n++) {
+      std::cout << GridLogMessage << "vec[" << n << "] = " << norm2(CoarseAggregates.subspace[n]) << std::endl;
+    }
+
+    CoarseCoarseOperator Dcc(*coarseGrids.Grids[1]);
+    Dcc.CoarsenOperator(coarseGrids.Grids[0], CoarsePosDefHermOp, CoarseAggregates); // uses only linop.OpDiag & linop.OpDir
+
+    // // this doesn't work because this function applies g5 to a vector, which
+    // // doesn't work for coarse vectors atm -> FIXME
+    // std::cout << GridLogMessage << "Test vector analysis after construction of D_c_c" << std::endl;
+    // coarseTVA(CoarsePosDefHermOp, CoarseAggregates.subspace, nb);
+
+    CoarseCoarseVector coarseCoarseSource(coarseGrids.Grids[1]);
+    CoarseCoarseVector coarseCoarseResult(coarseGrids.Grids[1]);
+    gaussian(coarseGrids.PRNGs[1], coarseCoarseSource);
+    coarseCoarseResult = zero;
+
+    MdagMLinearOperator<CoarseCoarseOperator, CoarseCoarseVector> CoarseCoarsePosDefHermOp(Dcc);
+    MinimalResidual<CoarseCoarseVector>                           CoarseCoarseMR(5.0e-2, 100, false);
+    ConjugateGradient<CoarseCoarseVector>                         CoarseCoarseCG(5.0e-2, 100, false);
+    CoarseCoarseMR(CoarseCoarsePosDefHermOp, coarseCoarseSource, coarseCoarseResult);
+    gaussian(coarseGrids.PRNGs[1], coarseCoarseSource);
+    coarseCoarseResult = zero;
+    CoarseCoarseCG(CoarseCoarsePosDefHermOp, coarseCoarseSource, coarseCoarseResult);
+
+    CoarseMGPreconditioner CoarseMGPrecon(CoarseAggregates, Dcc, CoarsePosDefHermOp, Dc, CoarsePosDefHermOp, Dc);
+
+    CoarseMGPrecon.runChecks(coarseGrids, 1);
+
+    std::cout << GridLogMessage << "ARTIFICIAL ABORT" << std::endl;
+    abort();
+  }
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building two level VPGCR and FGMRES solvers" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  PrecGeneralisedConjugateResidual<LatticeFermion>   VPGCRMG(1.0e-12, 100, Precon, 8, 8);
-  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMRESMG(1.0e-12, 100, Precon, 8);
+  PrecGeneralisedConjugateResidual<LatticeFermion>   VPGCRMG(1.0e-12, 100, FineMGPrecon, 8, 8);
+  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMRESMG(1.0e-12, 100, FineMGPrecon, 8);
 
   std::cout << GridLogMessage << "checking norm src " << norm2(src) << std::endl;
 
@@ -857,8 +898,8 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Building unpreconditioned VPGCR and FGMRES solvers" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  PrecGeneralisedConjugateResidual<LatticeFermion>   VPGCRT(1.0e-12, 4000000, Simple, 8, 8);
-  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMREST(1.0e-12, 4000000, Simple, 8);
+  PrecGeneralisedConjugateResidual<LatticeFermion>   VPGCRT(1.0e-12, 4000000, FineSimplePrecon, 8, 8);
+  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMREST(1.0e-12, 4000000, FineSimplePrecon, 8);
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Testing the four solvers" << std::endl;
@@ -872,7 +913,7 @@ int main(int argc, char **argv) {
 
   for(auto elem : solvers) {
     result = zero;
-    (*elem)(HermIndefOp, src, result);
+    (*elem)(FineHermIndefOp, src, result);
   }
 
   Grid_finalize();

From 10141f90c96c4a1e093343f38b2aea2547551ab7 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 30 Jan 2018 10:13:08 +0100
Subject: [PATCH 071/130] WilsonMG: Rename test file

---
 tests/solver/{Test_wilson_ddalphaamg.cc => Test_wilson_mg.cc} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename tests/solver/{Test_wilson_ddalphaamg.cc => Test_wilson_mg.cc} (99%)

diff --git a/tests/solver/Test_wilson_ddalphaamg.cc b/tests/solver/Test_wilson_mg.cc
similarity index 99%
rename from tests/solver/Test_wilson_ddalphaamg.cc
rename to tests/solver/Test_wilson_mg.cc
index 581bcda2..25d18d9a 100644
--- a/tests/solver/Test_wilson_ddalphaamg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@@ -2,7 +2,7 @@
 
     Grid physics library, www.github.com/paboyle/Grid 
 
-    Source file: ./tests/solver/Test_wilson_ddalphaamg.cc
+    Source file: ./tests/solver/Test_wilson_mg.cc
 
     Copyright (C) 2017
 

From 9292be0b69cfb8938d3be1bd8a4f2e4ce49cfed5 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 31 Jan 2018 14:03:30 +0100
Subject: [PATCH 072/130] =?UTF-8?q?WilsonMG:=20Add=20check=20for=20Mdiag?=
 =?UTF-8?q?=20+=20=CE=A3=20Mdir=20=3D=3D=20M?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Need to test my implementations of CoarsenedMatrix::Mdiag &
CoarsenedMatrix::Mdir.
---
 tests/solver/Test_wilson_mg.cc | 47 ++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
index 25d18d9a..71fe5837 100644
--- a/tests/solver/Test_wilson_mg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@@ -126,6 +126,40 @@ public:
   }
 };
 
+template<class Field> void testOperator(LinearOperatorBase<Field> &LinOp, GridBase *Grid) {
+
+  std::vector<int> seeds({1, 2, 3, 4});
+  GridParallelRNG  RNG(Grid);
+  RNG.SeedFixedIntegers(seeds);
+
+  // clang-format off
+  Field src(Grid);    random(RNG, src);
+  Field result(Grid); result = zero;
+  Field ref(Grid);    ref    = zero;
+  Field tmp(Grid);
+  Field err(Grid);
+  // clang-format on
+
+  LinOp.Op(src, ref);
+
+  LinOp.OpDiag(src, result);
+  std::cout << GridLogMessage << "diag:  norm2(result) = " << norm2(result) << std::endl;
+
+  for(int d = 0; d < 4; d++) {
+    LinOp.OpDir(src, tmp, d, +1);
+    std::cout << GridLogMessage << "dir + " << d << ": norm2(tmp) = " << norm2(tmp) << std::endl;
+    result = result + tmp;
+
+    LinOp.OpDir(src, tmp, d, -1);
+    std::cout << GridLogMessage << "dir - " << d << ": norm2(tmp) = " << norm2(tmp) << std::endl;
+    result = result + tmp;
+  }
+
+  err = result - ref;
+
+  std::cout << GridLogMessage << "Error: absolute = " << norm2(err) << " relative = " << norm2(err) / norm2(ref) << std::endl;
+}
+
 // template < class Fobj, class CComplex, int coarseSpins, int nbasis, class Matrix >
 // class MultiGridPreconditioner : public LinearFunction< Lattice< Fobj > > {
 template<class Fobj, class CComplex, int nbasis, class Matrix> class MultiGridPreconditioner : public LinearFunction<Lattice<Fobj>> {
@@ -815,6 +849,19 @@ int main(int argc, char **argv) {
   coarseResult = zero;
   CoarseCG(CoarsePosDefHermOp, coarseSource, coarseResult);
 
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing the operators" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  std::cout << GridLogMessage << "MdagMLinearOperator<WilsonFermionR, LatticeFermion> FineHermOp(Dw);" << std::endl;
+  testOperator(FineHermOp, FGrid);
+  std::cout << GridLogMessage << "Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> FineHermIndefOp(Dw);" << std::endl;
+  testOperator(FineHermIndefOp, FGrid);
+  std::cout << GridLogMessage << "Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> FineHermIndefOpDD(DwDD);" << std::endl;
+  testOperator(FineHermIndefOpDD, FGrid);
+  std::cout << GridLogMessage << "MdagMLinearOperator<CoarseOperator, CoarseVector> CoarsePosDefHermOp(Dc);" << std::endl;
+  testOperator(CoarsePosDefHermOp, coarseGrids.Grids[0]);
+
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building deflation preconditioner " << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;

From 7c86d2085b93c185680a05f2bd4b9bd474b3b55f Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 1 Feb 2018 12:24:16 +0100
Subject: [PATCH 073/130] WilsonMG: Some minor cleanup

---
 tests/solver/Test_wilson_mg.cc | 66 ++++++++++++++++------------------
 1 file changed, 31 insertions(+), 35 deletions(-)

diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
index 71fe5837..49026395 100644
--- a/tests/solver/Test_wilson_mg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@@ -836,18 +836,20 @@ int main(int argc, char **argv) {
   coarseResult = zero;
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Solving posdef-MR on coarse space " << std::endl;
+  std::cout << GridLogMessage << "Testing some coarse space solvers" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
   MdagMLinearOperator<CoarseOperator, CoarseVector> CoarsePosDefHermOp(Dc);
-  MinimalResidual<CoarseVector>                     CoarseMR(5.0e-2, 100, false);
-  ConjugateGradient<CoarseVector>                   CoarseCG(5.0e-2, 100, false);
 
-  CoarseMR(CoarsePosDefHermOp, coarseSource, coarseResult);
+  std::vector<std::unique_ptr<OperatorFunction<CoarseVector>>> coarseSolvers;
+  coarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseVector>(5.0e-2, 100, 8, false));
+  coarseSolvers.emplace_back(new MinimalResidual<CoarseVector>(5.0e-2, 100, false));
+  coarseSolvers.emplace_back(new ConjugateGradient<CoarseVector>(5.0e-2, 100, false));
 
-  gaussian(coarseGrids.PRNGs[0], coarseSource);
-  coarseResult = zero;
-  CoarseCG(CoarsePosDefHermOp, coarseSource, coarseResult);
+  for(auto const &solver : coarseSolvers) {
+    coarseResult = zero;
+    (*solver)(CoarsePosDefHermOp, coarseSource, coarseResult);
+  }
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Testing the operators" << std::endl;
@@ -917,12 +919,16 @@ int main(int argc, char **argv) {
     coarseCoarseResult = zero;
 
     MdagMLinearOperator<CoarseCoarseOperator, CoarseCoarseVector> CoarseCoarsePosDefHermOp(Dcc);
-    MinimalResidual<CoarseCoarseVector>                           CoarseCoarseMR(5.0e-2, 100, false);
-    ConjugateGradient<CoarseCoarseVector>                         CoarseCoarseCG(5.0e-2, 100, false);
-    CoarseCoarseMR(CoarseCoarsePosDefHermOp, coarseCoarseSource, coarseCoarseResult);
-    gaussian(coarseGrids.PRNGs[1], coarseCoarseSource);
-    coarseCoarseResult = zero;
-    CoarseCoarseCG(CoarseCoarsePosDefHermOp, coarseCoarseSource, coarseCoarseResult);
+
+    std::vector<std::unique_ptr<OperatorFunction<CoarseCoarseVector>>> coarseCoarseSolvers;
+    coarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 8, false));
+    coarseSolvers.emplace_back(new MinimalResidual<CoarseCoarseVector>(5.0e-2, 100, false));
+    coarseSolvers.emplace_back(new ConjugateGradient<CoarseCoarseVector>(5.0e-2, 100, false));
+
+    for(auto const &solver : coarseCoarseSolvers) {
+      coarseCoarseResult = zero;
+      (*solver)(CoarseCoarsePosDefHermOp, coarseCoarseSource, coarseCoarseResult);
+    }
 
     CoarseMGPreconditioner CoarseMGPrecon(CoarseAggregates, Dcc, CoarsePosDefHermOp, Dc, CoarsePosDefHermOp, Dc);
 
@@ -933,34 +939,24 @@ int main(int argc, char **argv) {
   }
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building two level VPGCR and FGMRES solvers" << std::endl;
+  std::cout << GridLogMessage << "Building VPGCR and FGMRES solvers w/ & w/o MG Preconditioner" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  PrecGeneralisedConjugateResidual<LatticeFermion>   VPGCRMG(1.0e-12, 100, FineMGPrecon, 8, 8);
-  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMRESMG(1.0e-12, 100, FineMGPrecon, 8);
+  std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solvers;
+  solvers.emplace_back(new PrecGeneralisedConjugateResidual<LatticeFermion>(1.0e-12, 100, FineMGPrecon, 8, 8));
+  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 100, FineMGPrecon, 8));
+  solvers.emplace_back(new PrecGeneralisedConjugateResidual<LatticeFermion>(1.0e-12, 4000000, FineSimplePrecon, 8, 8));
+  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 4000000, FineSimplePrecon, 8));
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing the solvers" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
 
   std::cout << GridLogMessage << "checking norm src " << norm2(src) << std::endl;
 
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building unpreconditioned VPGCR and FGMRES solvers" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  PrecGeneralisedConjugateResidual<LatticeFermion>   VPGCRT(1.0e-12, 4000000, FineSimplePrecon, 8, 8);
-  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMREST(1.0e-12, 4000000, FineSimplePrecon, 8);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Testing the four solvers" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  std::vector<OperatorFunction<LatticeFermion> *> solvers;
-  solvers.push_back(&VPGCRMG);
-  solvers.push_back(&FGMRESMG);
-  solvers.push_back(&VPGCRT);
-  solvers.push_back(&FGMREST);
-
-  for(auto elem : solvers) {
+  for(auto const &solver : solvers) {
     result = zero;
-    (*elem)(FineHermIndefOp, src, result);
+    (*solver)(FineHermIndefOp, src, result);
   }
 
   Grid_finalize();

From 871649238c9cf345f2c58be5adfea451a86008ef Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 1 Feb 2018 14:43:08 +0100
Subject: [PATCH 074/130] WilsonMG: Stricter naming for linear operators

---
 tests/solver/Test_wilson_mg.cc | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
index 49026395..9dd780cf 100644
--- a/tests/solver/Test_wilson_mg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@@ -788,18 +788,18 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Calling Aggregation class to build subspaces" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  MdagMLinearOperator<WilsonFermionR, LatticeFermion> FineHermOp(Dw);
+  MdagMLinearOperator<WilsonFermionR, LatticeFermion> FineHermPosdefOp(Dw);
   Subspace                                            FineAggregates(coarseGrids.Grids[0], FGrid, 0);
 
   assert((nbasis & 0x1) == 0);
   int nb = nbasis / 2;
   std::cout << GridLogMessage << " nbasis/2 = " << nb << std::endl;
 
-  FineAggregates.CreateSubspace(fPRNG, FineHermOp /*, nb */); // Don't specify nb to see the orthogonalization check
+  FineAggregates.CreateSubspace(fPRNG, FineHermPosdefOp /*, nb */); // Don't specify nb to see the orthogonalization check
 
   std::cout << GridLogMessage << "Test vector analysis after initial creation of MG test vectors" << std::endl;
   FineTVA fineTVA;
-  fineTVA(FineHermOp, FineAggregates.subspace, nb);
+  fineTVA(FineHermPosdefOp, FineAggregates.subspace, nb);
 
   for(int n = 0; n < nb; n++) {
     FineAggregates.subspace[n + nb] = g5 * FineAggregates.subspace[n];
@@ -824,7 +824,7 @@ int main(int argc, char **argv) {
   Dc.CoarsenOperator(FGrid, FineHermIndefOp, FineAggregates); // uses only linop.OpDiag & linop.OpDir
 
   std::cout << GridLogMessage << "Test vector analysis after construction of D_c" << std::endl;
-  fineTVA(FineHermOp, FineAggregates.subspace, nb);
+  fineTVA(FineHermPosdefOp, FineAggregates.subspace, nb);
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building coarse vectors" << std::endl;
@@ -839,7 +839,7 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Testing some coarse space solvers" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  MdagMLinearOperator<CoarseOperator, CoarseVector> CoarsePosDefHermOp(Dc);
+  MdagMLinearOperator<CoarseOperator, CoarseVector> CoarseHermPosdefOp(Dc);
 
   std::vector<std::unique_ptr<OperatorFunction<CoarseVector>>> coarseSolvers;
   coarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseVector>(5.0e-2, 100, 8, false));
@@ -848,21 +848,21 @@ int main(int argc, char **argv) {
 
   for(auto const &solver : coarseSolvers) {
     coarseResult = zero;
-    (*solver)(CoarsePosDefHermOp, coarseSource, coarseResult);
+    (*solver)(CoarseHermPosdefOp, coarseSource, coarseResult);
   }
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Testing the operators" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  std::cout << GridLogMessage << "MdagMLinearOperator<WilsonFermionR, LatticeFermion> FineHermOp(Dw);" << std::endl;
-  testOperator(FineHermOp, FGrid);
+  std::cout << GridLogMessage << "MdagMLinearOperator<WilsonFermionR, LatticeFermion> FineHermPosdefOp(Dw);" << std::endl;
+  testOperator(FineHermPosdefOp, FGrid);
   std::cout << GridLogMessage << "Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> FineHermIndefOp(Dw);" << std::endl;
   testOperator(FineHermIndefOp, FGrid);
   std::cout << GridLogMessage << "Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> FineHermIndefOpDD(DwDD);" << std::endl;
   testOperator(FineHermIndefOpDD, FGrid);
-  std::cout << GridLogMessage << "MdagMLinearOperator<CoarseOperator, CoarseVector> CoarsePosDefHermOp(Dc);" << std::endl;
-  testOperator(CoarsePosDefHermOp, coarseGrids.Grids[0]);
+  std::cout << GridLogMessage << "MdagMLinearOperator<CoarseOperator, CoarseVector> CoarseHermPosdefOp(Dc);" << std::endl;
+  testOperator(CoarseHermPosdefOp, coarseGrids.Grids[0]);
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building deflation preconditioner " << std::endl;
@@ -883,12 +883,12 @@ int main(int argc, char **argv) {
     std::cout << GridLogMessage << "**************************************************" << std::endl;
 
     SubSubSpace CoarseAggregates(coarseGrids.Grids[1], coarseGrids.Grids[0], 0);
-    CoarseAggregates.CreateSubspace(coarseGrids.PRNGs[0], CoarsePosDefHermOp);
+    CoarseAggregates.CreateSubspace(coarseGrids.PRNGs[0], CoarseHermPosdefOp);
 
     // // this doesn't work because this function applies g5 to a vector, which
     // // doesn't work for coarse vectors atm -> FIXME
     // CoarseTVA coarseTVA;
-    // coarseTVA(CoarsePosDefHermOp, CoarseAggregates.subspace, nb);
+    // coarseTVA(CoarseHermPosdefOp, CoarseAggregates.subspace, nb);
 
     // // cannot apply g5 to coarse vectors atm -> FIXME
     // for(int n=0;n<nb;n++){
@@ -906,19 +906,19 @@ int main(int argc, char **argv) {
     }
 
     CoarseCoarseOperator Dcc(*coarseGrids.Grids[1]);
-    Dcc.CoarsenOperator(coarseGrids.Grids[0], CoarsePosDefHermOp, CoarseAggregates); // uses only linop.OpDiag & linop.OpDir
+    Dcc.CoarsenOperator(coarseGrids.Grids[0], CoarseHermPosdefOp, CoarseAggregates); // uses only linop.OpDiag & linop.OpDir
 
     // // this doesn't work because this function applies g5 to a vector, which
     // // doesn't work for coarse vectors atm -> FIXME
     // std::cout << GridLogMessage << "Test vector analysis after construction of D_c_c" << std::endl;
-    // coarseTVA(CoarsePosDefHermOp, CoarseAggregates.subspace, nb);
+    // coarseTVA(CoarseHermPosdefOp, CoarseAggregates.subspace, nb);
 
     CoarseCoarseVector coarseCoarseSource(coarseGrids.Grids[1]);
     CoarseCoarseVector coarseCoarseResult(coarseGrids.Grids[1]);
     gaussian(coarseGrids.PRNGs[1], coarseCoarseSource);
     coarseCoarseResult = zero;
 
-    MdagMLinearOperator<CoarseCoarseOperator, CoarseCoarseVector> CoarseCoarsePosDefHermOp(Dcc);
+    MdagMLinearOperator<CoarseCoarseOperator, CoarseCoarseVector> CoarseCoarseHermPosdefOp(Dcc);
 
     std::vector<std::unique_ptr<OperatorFunction<CoarseCoarseVector>>> coarseCoarseSolvers;
     coarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 8, false));
@@ -927,10 +927,10 @@ int main(int argc, char **argv) {
 
     for(auto const &solver : coarseCoarseSolvers) {
       coarseCoarseResult = zero;
-      (*solver)(CoarseCoarsePosDefHermOp, coarseCoarseSource, coarseCoarseResult);
+      (*solver)(CoarseCoarseHermPosdefOp, coarseCoarseSource, coarseCoarseResult);
     }
 
-    CoarseMGPreconditioner CoarseMGPrecon(CoarseAggregates, Dcc, CoarsePosDefHermOp, Dc, CoarsePosDefHermOp, Dc);
+    CoarseMGPreconditioner      CoarseMGPrecon(CoarseAggregates, Dcc, CoarseHermPosdefOp, Dc, CoarseHermPosdefOp, Dc);
 
     CoarseMGPrecon.runChecks(coarseGrids, 1);
 

From 1671adfd492704cb4c15a4a759cfaba1d47ac526 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Fri, 2 Feb 2018 10:03:15 +0100
Subject: [PATCH 075/130] WilsonMG: Add some tests for linear operators

---
 tests/solver/Test_wilson_mg.cc | 100 ++++++++++++++++++++++++++-------
 1 file changed, 80 insertions(+), 20 deletions(-)

diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
index 9dd780cf..a402cf02 100644
--- a/tests/solver/Test_wilson_mg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@@ -126,38 +126,98 @@ public:
   }
 };
 
-template<class Field> void testOperator(LinearOperatorBase<Field> &LinOp, GridBase *Grid) {
+template<class Field> void testLinearOperator(LinearOperatorBase<Field> &LinOp, GridBase *Grid, std::string const &name = "") {
 
   std::vector<int> seeds({1, 2, 3, 4});
   GridParallelRNG  RNG(Grid);
   RNG.SeedFixedIntegers(seeds);
 
-  // clang-format off
-  Field src(Grid);    random(RNG, src);
-  Field result(Grid); result = zero;
-  Field ref(Grid);    ref    = zero;
-  Field tmp(Grid);
-  Field err(Grid);
-  // clang-format on
+  {
+    std::cout << GridLogMessage << "Testing that Mdiag + Σ_μ Mdir_μ == M for operator " << name << ":" << std::endl;
 
-  LinOp.Op(src, ref);
+    // clang-format off
+    Field src(Grid);    random(RNG, src);
+    Field ref(Grid);    ref    = zero;
+    Field result(Grid); result = zero;
+    Field diag(Grid);   diag   = zero;
+    Field sumDir(Grid); sumDir = zero;
+    Field tmp(Grid);
+    Field err(Grid);
+    // clang-format on
 
-  LinOp.OpDiag(src, result);
-  std::cout << GridLogMessage << "diag:  norm2(result) = " << norm2(result) << std::endl;
+    LinOp.Op(src, ref);
+    std::cout << GridLogMessage << " norm2(M * src)            = " << norm2(ref) << std::endl;
 
-  for(int d = 0; d < 4; d++) {
-    LinOp.OpDir(src, tmp, d, +1);
-    std::cout << GridLogMessage << "dir + " << d << ": norm2(tmp) = " << norm2(tmp) << std::endl;
-    result = result + tmp;
+    LinOp.OpDiag(src, diag);
+    std::cout << GridLogMessage << " norm2(Mdiag * src)        = " << norm2(diag) << std::endl;
 
-    LinOp.OpDir(src, tmp, d, -1);
-    std::cout << GridLogMessage << "dir - " << d << ": norm2(tmp) = " << norm2(tmp) << std::endl;
-    result = result + tmp;
+    for(int dir = 0; dir < 4; dir++) {
+      for(auto disp : {+1, -1}) {
+        LinOp.OpDir(src, tmp, dir, disp);
+        std::cout << GridLogMessage << " norm2(Mdir_{" << dir << "," << disp << "} * src) = " << norm2(tmp) << std::endl;
+        sumDir = sumDir + tmp;
+      }
+    }
+    std::cout << GridLogMessage << " norm2(Σ_μ Mdir_μ * src)   = " << norm2(sumDir) << std::endl;
+
+    result = diag + sumDir;
+    err    = ref - result;
+
+    std::cout << GridLogMessage << " Absolute deviation        = " << norm2(err) << std::endl;
+    std::cout << GridLogMessage << " Relative deviation        = " << norm2(err) / norm2(ref) << std::endl;
   }
 
-  err = result - ref;
+  {
+    std::cout << GridLogMessage << "Testing hermiticity stochastically for operator " << name << ":" << std::endl;
 
-  std::cout << GridLogMessage << "Error: absolute = " << norm2(err) << " relative = " << norm2(err) / norm2(ref) << std::endl;
+    // clang-format off
+    Field phi(Grid); random(RNG, phi);
+    Field chi(Grid); random(RNG, chi);
+    Field MPhi(Grid);
+    Field MdagChi(Grid);
+    // clang-format on
+
+    LinOp.Op(phi, MPhi);
+    LinOp.AdjOp(chi, MdagChi);
+
+    ComplexD chiMPhi    = innerProduct(chi, MPhi);
+    ComplexD phiMdagChi = innerProduct(phi, MdagChi);
+
+    ComplexD phiMPhi    = innerProduct(phi, MPhi);
+    ComplexD chiMdagChi = innerProduct(chi, MdagChi);
+
+    std::cout << GridLogMessage << " chiMPhi = " << chiMPhi << " phiMdagChi = " << phiMdagChi
+              << " difference = " << chiMPhi - conjugate(phiMdagChi) << std::endl;
+
+    std::cout << GridLogMessage << " phiMPhi = " << phiMPhi << " chiMdagChi = " << chiMdagChi << " <- should be real if hermitian"
+              << std::endl;
+  }
+
+  {
+    std::cout << GridLogMessage << "Testing linearity for operator " << name << ":" << std::endl;
+
+    // clang-format off
+    Field phi(Grid); random(RNG, phi);
+    Field chi(Grid); random(RNG, chi);
+    Field phiPlusChi(Grid);
+    Field MPhi(Grid);
+    Field MChi(Grid);
+    Field MPhiPlusChi(Grid);
+    Field linearityError(Grid);
+    // clang-format on
+
+    LinOp.Op(phi, MPhi);
+    LinOp.Op(chi, MChi);
+
+    phiPlusChi = phi + chi;
+
+    LinOp.Op(phiPlusChi, MPhiPlusChi);
+
+    linearityError = MPhiPlusChi - MPhi;
+    linearityError = linearityError - MChi;
+
+    std::cout << GridLogMessage << " norm2(linearityError) = " << norm2(linearityError) << std::endl;
+  }
 }
 
 // template < class Fobj, class CComplex, int coarseSpins, int nbasis, class Matrix >

From 68c66d2e4bbeaf23ec5e105f10f6ffbd70664141 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Wed, 7 Feb 2018 17:23:47 +0100
Subject: [PATCH 076/130] Remove empty line in output of *Residual* solvers

---
 .../iterative/CommunicationAvoidingGeneralisedMinimalResidual.h | 2 +-
 .../FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h   | 2 +-
 lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h   | 2 +-
 lib/algorithms/iterative/GeneralisedMinimalResidual.h           | 2 +-
 lib/algorithms/iterative/MinimalResidual.h                      | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
index d6a1ca12..3bf19131 100644
--- a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
@@ -84,7 +84,7 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
 
     Field r(src._grid);
 
-    std::cout << std::setprecision(4) << std::scientific << std::endl;
+    std::cout << std::setprecision(4) << std::scientific;
     std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
     std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl;
 
diff --git a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
index 8c36597b..77936f29 100644
--- a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
@@ -89,7 +89,7 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
 
     Field r(src._grid);
 
-    std::cout << std::setprecision(4) << std::scientific << std::endl;
+    std::cout << std::setprecision(4) << std::scientific;
     std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
     std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl;
 
diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
index 84956aed..906dd35d 100644
--- a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -89,7 +89,7 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
 
     Field r(src._grid);
 
-    std::cout << std::setprecision(4) << std::scientific << std::endl;
+    std::cout << std::setprecision(4) << std::scientific;
     std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: guess " << guess << std::endl;
     std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual:   src " << ssq   << std::endl;
 
diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 0e6d4be4..78dbf5ca 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -84,7 +84,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
 
     Field r(src._grid);
 
-    std::cout << std::setprecision(4) << std::scientific << std::endl;
+    std::cout << std::setprecision(4) << std::scientific;
     std::cout << GridLogIterative << "GeneralisedMinimalResidual: guess " << guess << std::endl;
     std::cout << GridLogIterative << "GeneralisedMinimalResidual:   src " << ssq   << std::endl;
 
diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index ee2f208b..7e054398 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -72,7 +72,7 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
 
     RealD cp = norm2(r); //  Cp = |r[0]|^2 // 2 Nc Ns  flops // flopcount.addSiteFlops(4*Nc*Ns, s);
 
-    std::cout << std::setprecision(4) << std::scientific << std::endl;
+    std::cout << std::setprecision(4) << std::scientific;
     std::cout << GridLogIterative << "MinimalResidual: guess " << guess << std::endl;
     std::cout << GridLogIterative << "MinimalResidual:   src " << ssq << std::endl;
     std::cout << GridLogIterative << "MinimalResidual:    mp " << d << std::endl;

From 323ed1a5886b5ac0c1e26e83beca8bba880ba016 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Wed, 7 Feb 2018 17:45:27 +0100
Subject: [PATCH 077/130] Add an overrelaxation parameter to the MR solver

---
 lib/algorithms/iterative/MinimalResidual.h | 7 ++++---
 tests/solver/Test_dwf_mr_unprec.cc         | 2 +-
 tests/solver/Test_staggered_mr_unprec.cc   | 2 +-
 tests/solver/Test_wilson_mr_unprec.cc      | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index 7e054398..432b5933 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -42,11 +42,12 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
                           // Defaults true.
   RealD   Tolerance;
   Integer MaxIterations;
+  RealD   overRelaxParam;
   Integer IterationsToComplete; // Number of iterations the MR took to finish.
                                 // Filled in upon completion
 
-  MinimalResidual(RealD tol, Integer maxit, bool err_on_no_conv = true)
-    : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){};
+  MinimalResidual(RealD tol, Integer maxit, Real ovrelparam = 1.0, bool err_on_no_conv = true)
+    : Tolerance(tol), MaxIterations(maxit), overRelaxParam(ovrelparam), ErrorOnNoConverge(err_on_no_conv){};
 
   void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
 
@@ -104,7 +105,7 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
 
       a = c / d;
 
-      // a = a * MRovpar; //  a[k-1] *= MRovpar // from chroma code. TODO: check what to do with this
+      a = a * overRelaxParam; //  a[k-1] *= MRovpar
 
       psi = psi + r * a; //  Psi[k] += a[k-1] r[k-1] ; // flopcount.addSiteFlops(4*Nc*Ns,s);
 
diff --git a/tests/solver/Test_dwf_mr_unprec.cc b/tests/solver/Test_dwf_mr_unprec.cc
index a7c7733b..3ca35e32 100644
--- a/tests/solver/Test_dwf_mr_unprec.cc
+++ b/tests/solver/Test_dwf_mr_unprec.cc
@@ -61,7 +61,7 @@ int main (int argc, char ** argv)
   DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 
   MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
-  MinimalResidual<LatticeFermion> MR(1.0e-6,10000);
+  MinimalResidual<LatticeFermion> MR(1.0e-6,10000,0.8);
   MR(HermOp,src,result);
 
   Grid_finalize();
diff --git a/tests/solver/Test_staggered_mr_unprec.cc b/tests/solver/Test_staggered_mr_unprec.cc
index 22210329..63b08837 100644
--- a/tests/solver/Test_staggered_mr_unprec.cc
+++ b/tests/solver/Test_staggered_mr_unprec.cc
@@ -62,7 +62,7 @@ int main (int argc, char ** argv)
   ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
 
   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
-  MinimalResidual<FermionField> MR(1.0e-6,10000);
+  MinimalResidual<FermionField> MR(1.0e-6,10000,0.8);
   MR(HermOp,src,result);
 
   Grid_finalize();
diff --git a/tests/solver/Test_wilson_mr_unprec.cc b/tests/solver/Test_wilson_mr_unprec.cc
index be88d6f8..7b7ef0f9 100644
--- a/tests/solver/Test_wilson_mr_unprec.cc
+++ b/tests/solver/Test_wilson_mr_unprec.cc
@@ -58,7 +58,7 @@ int main (int argc, char ** argv)
   WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 
   MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
-  MinimalResidual<LatticeFermion> MR(1.0e-8,10000);
+  MinimalResidual<LatticeFermion> MR(1.0e-8,10000,0.8);
   MR(HermOp,src,result);
 
   Grid_finalize();

From cd61e2e6d6e49c15dffeb4c5124f726d7ad8122f Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Wed, 7 Feb 2018 17:45:57 +0100
Subject: [PATCH 078/130] Increase max iterations in test of MR solver with
 staggered action

---
 tests/solver/Test_staggered_mr_unprec.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/solver/Test_staggered_mr_unprec.cc b/tests/solver/Test_staggered_mr_unprec.cc
index 63b08837..77f3090d 100644
--- a/tests/solver/Test_staggered_mr_unprec.cc
+++ b/tests/solver/Test_staggered_mr_unprec.cc
@@ -62,7 +62,7 @@ int main (int argc, char ** argv)
   ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
 
   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
-  MinimalResidual<FermionField> MR(1.0e-6,10000,0.8);
+  MinimalResidual<FermionField> MR(1.0e-6,20000,0.8);
   MR(HermOp,src,result);
 
   Grid_finalize();

From cc2f00f82745a270414a2b580ce78985f7a6d61d Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Wed, 7 Feb 2018 17:48:05 +0100
Subject: [PATCH 079/130] Remove test for MR solver with dwf action as it
 doesn't converge

---
 tests/solver/Test_dwf_mr_unprec.cc | 68 ------------------------------
 1 file changed, 68 deletions(-)
 delete mode 100644 tests/solver/Test_dwf_mr_unprec.cc

diff --git a/tests/solver/Test_dwf_mr_unprec.cc b/tests/solver/Test_dwf_mr_unprec.cc
deleted file mode 100644
index 3ca35e32..00000000
--- a/tests/solver/Test_dwf_mr_unprec.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./tests/solver/Test_dwf_mr_unprec.cc
-
-Copyright (C) 2015
-
-Author: Daniel Richtmann <daniel.richtmann@ur.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Grid/Grid.h>
-
-using namespace Grid;
-using namespace Grid::QCD;
-
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
-
-  const int Ls=8;
-
-  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
-  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
-  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
-  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
-
-  std::vector<int> seeds4({1,2,3,4});
-  std::vector<int> seeds5({5,6,7,8});
-  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
-  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
-
-  LatticeFermion    src(FGrid); random(RNG5,src);
-  LatticeFermion result(FGrid); result=zero;
-  LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
-
-  std::vector<LatticeColourMatrix> U(4,UGrid);
-  for(int mu=0;mu<Nd;mu++){
-    U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
-  }
-
-  RealD mass=0.1;
-  RealD M5=1.8;
-  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-
-  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
-  MinimalResidual<LatticeFermion> MR(1.0e-6,10000,0.8);
-  MR(HermOp,src,result);
-
-  Grid_finalize();
-}

From 8bc12e0ce1bf12f35f3f0ec03688137e6193c8c2 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Wed, 7 Feb 2018 17:59:03 +0100
Subject: [PATCH 080/130] Remove superfluous comments in MR solver

---
 lib/algorithms/iterative/MinimalResidual.h | 31 +++++++++-------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/lib/algorithms/iterative/MinimalResidual.h b/lib/algorithms/iterative/MinimalResidual.h
index 432b5933..fa1912cf 100644
--- a/lib/algorithms/iterative/MinimalResidual.h
+++ b/lib/algorithms/iterative/MinimalResidual.h
@@ -31,11 +31,6 @@ directory
 
 namespace Grid {
 
-/////////////////////////////////////////////////////////////
-// Base classes for iterative processes based on operators
-// single input vec, single output vec.
-/////////////////////////////////////////////////////////////
-
 template<class Field> class MinimalResidual : public OperatorFunction<Field> {
  public:
   bool ErrorOnNoConverge; // throw an assert when the MR fails to converge.
@@ -64,14 +59,14 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
     RealD guess = norm2(psi);
     assert(std::isnan(guess) == 0);
 
-    RealD ssq = norm2(src); // flopcount.addSiteFlops(4*Nc*Ns,s);
-    RealD rsq = Tolerance * Tolerance * ssq; // flopcount.addSiteFlops(4*Nc*Ns,s);
+    RealD ssq = norm2(src);
+    RealD rsq = Tolerance * Tolerance * ssq;
 
-    Linop.Op(psi, Mr); // flopcount.addFlops(M.nFlops());
+    Linop.Op(psi, Mr);
 
-    r = src - Mr; // flopcount.addSiteFlops(2*Nc*Ns,s);
+    r = src - Mr;
 
-    RealD cp = norm2(r); //  Cp = |r[0]|^2 // 2 Nc Ns  flops // flopcount.addSiteFlops(4*Nc*Ns, s);
+    RealD cp = norm2(r);
 
     std::cout << std::setprecision(4) << std::scientific;
     std::cout << GridLogIterative << "MinimalResidual: guess " << guess << std::endl;
@@ -91,27 +86,27 @@ template<class Field> class MinimalResidual : public OperatorFunction<Field> {
 
     SolverTimer.Start();
     int k;
-    for (k = 1; k <= MaxIterations; k++) { //  a[k-1] := < M.r[k-1], r[k-1] >/ < M.r[k-1], M.r[k-1] >
+    for (k = 1; k <= MaxIterations; k++) {
 
       MatrixTimer.Start();
-      Linop.Op(r, Mr); //  Mr = M * r // flopcount.addFlops(M.nFlops());
+      Linop.Op(r, Mr);
       MatrixTimer.Stop();
 
       LinalgTimer.Start();
 
-      c = innerProduct(Mr, r); //  c = < M.r, r > // // flopcount.addSiteFlops(4*Nc*Ns,s);
+      c = innerProduct(Mr, r);
 
-      d = norm2(Mr); //  d = | M.r | ** 2  // // flopcount.addSiteFlops(4*Nc*Ns,s);
+      d = norm2(Mr);
 
       a = c / d;
 
-      a = a * overRelaxParam; //  a[k-1] *= MRovpar
+      a = a * overRelaxParam;
 
-      psi = psi + r * a; //  Psi[k] += a[k-1] r[k-1] ; // flopcount.addSiteFlops(4*Nc*Ns,s);
+      psi = psi + r * a;
 
-      r = r - Mr * a; //  r[k] -= a[k-1] M . r[k-1] ; // flopcount.addSiteFlops(4*Nc*Ns,s);
+      r = r - Mr * a;
 
-      cp = norm2(r); //  cp  =  | r[k] |**2 // flopcount.addSiteFlops(4*Nc*Ns,s);
+      cp = norm2(r);
 
       LinalgTimer.Stop();
 

From 4ded1ceeb0703bf208afe112327338bb7d68820f Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Thu, 8 Feb 2018 15:29:44 +0100
Subject: [PATCH 081/130] Make GMRES solvers perform no more than MaxIterations
 steps

I noticed that it was possible to overrun this number.
---
 .../CommunicationAvoidingGeneralisedMinimalResidual.h       | 6 ++++--
 ...lexibleCommunicationAvoidingGeneralisedMinimalResidual.h | 6 ++++--
 .../iterative/FlexibleGeneralisedMinimalResidual.h          | 6 ++++--
 lib/algorithms/iterative/GeneralisedMinimalResidual.h       | 6 ++++--
 4 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
index 3bf19131..b56c8afc 100644
--- a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
@@ -97,7 +97,9 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
     SolverTimer.Start();
 
     IterationCount = 0;
-    for (int k=0; k<MaxIterations; k++) {
+    auto outerLoopMax = MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1);
+
+    for (int k=0; k<outerLoopMax; k++) {
 
       cp = outerLoopBody(LinOp, src, psi, rsq);
 
@@ -167,7 +169,7 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
       std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
                 << " residual " << cp << " target " << rsq << std::endl;
 
-      if ((i == RestartLength - 1) || (cp <= rsq)) {
+      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
 
         computeSolution(v, psi, i);
 
diff --git a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
index 77936f29..057c1af3 100644
--- a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
@@ -103,7 +103,9 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
     SolverTimer.Start();
 
     IterationCount = 0;
-    for (int k=0; k<MaxIterations; k++) {
+    auto outerLoopMax = MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1);
+
+    for (int k=0; k<outerLoopMax; k++) {
 
       cp = outerLoopBody(LinOp, src, psi, rsq);
 
@@ -179,7 +181,7 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
       std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
                 << " residual " << cp << " target " << rsq << std::endl;
 
-      if ((i == RestartLength - 1) || (cp <= rsq)) {
+      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
 
         computeSolution(v, psi, i);
 
diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
index 906dd35d..06aded09 100644
--- a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -103,7 +103,9 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
     SolverTimer.Start();
 
     IterationCount = 0;
-    for (int k=0; k<MaxIterations; k++) {
+    auto outerLoopMax = MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1);
+
+    for (int k=0; k<outerLoopMax; k++) {
 
       cp = outerLoopBody(LinOp, src, psi, rsq);
 
@@ -179,7 +181,7 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
       std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: Iteration " << IterationCount
                 << " residual " << cp << " target " << rsq << std::endl;
 
-      if ((i == RestartLength - 1) || (cp <= rsq)) {
+      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
 
         computeSolution(v, psi, i);
 
diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index 78dbf5ca..e9d54a53 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -97,7 +97,9 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     SolverTimer.Start();
 
     IterationCount = 0;
-    for (int k=0; k<MaxIterations; k++) {
+    auto outerLoopMax = MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1);
+
+    for (int k=0; k<outerLoopMax; k++) {
 
       cp = outerLoopBody(LinOp, src, psi, rsq);
 
@@ -167,7 +169,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
       std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration " << IterationCount
                 << " residual " << cp << " target " << rsq << std::endl;
 
-      if ((i == RestartLength - 1) || (cp <= rsq)) {
+      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
 
         computeSolution(v, psi, i);
 

From 9f79a871020c6ed331538dc298d549b8b2308294 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Thu, 8 Feb 2018 16:00:31 +0100
Subject: [PATCH 082/130] Fix bugs in Flexible GMRES solvers

Somehow I got the left and right-preconditioned versions of GMRES mixed up. As
of now this is right-preconditioned version, which is what we want.
---
 ...cationAvoidingGeneralisedMinimalResidual.h | 30 ++++++++-----------
 .../FlexibleGeneralisedMinimalResidual.h      | 30 ++++++++-----------
 2 files changed, 26 insertions(+), 34 deletions(-)

diff --git a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
index 057c1af3..f49eb7dc 100644
--- a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
@@ -148,18 +148,14 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
 
     Field w(src._grid);
     Field r(src._grid);
-    Field z(src._grid);
 
     std::vector<Field> v(RestartLength + 1, src._grid);
+    std::vector<Field> z(RestartLength + 1, src._grid);
 
     MatrixTimer.Start();
-    LinOp.Op(psi, z);
+    LinOp.Op(psi, w);
     MatrixTimer.Stop();
 
-    PrecTimer.Start();
-    Preconditioner(z, w);
-    PrecTimer.Stop();
-
     LinalgTimer.Start();
     r = src - w;
 
@@ -183,7 +179,7 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
 
       if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
 
-        computeSolution(v, psi, i);
+        computeSolution(z, psi, i);
 
         return cp;
       }
@@ -193,16 +189,16 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
     return cp;
   }
 
-  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &z, Field &w, int iter) {
-
-    MatrixTimer.Start();
-    LinOp.Op(v[iter], z);
-    MatrixTimer.Stop();
+  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
 
     PrecTimer.Start();
-    Preconditioner(z, w);
+    Preconditioner(v[iter], z[iter]);
     PrecTimer.Stop();
 
+    MatrixTimer.Start();
+    LinOp.Op(z[iter], w);
+    MatrixTimer.Stop();
+
     LinalgTimer.Start();
     for (int i = 0; i <= iter; ++i) {
       H(iter, i) = innerProduct(v[i], w);
@@ -237,7 +233,7 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
     QrTimer.Stop();
   }
 
-  void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
+  void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
 
     CompSolutionTimer.Start();
     for (int i = iter; i >= 0; i--) {
@@ -249,12 +245,12 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
 
     if (true) {
       for (int i = 0; i <= iter; i++)
-        psi = psi + v[i] * y[i];
+        psi = psi + z[i] * y[i];
     }
     else {
-      psi = y[0] * v[0];
+      psi = y[0] * z[0];
       for (int i = 1; i <= iter; i++)
-        psi = psi + v[i] * y[i];
+        psi = psi + z[i] * y[i];
     }
     CompSolutionTimer.Stop();
   }
diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
index 06aded09..b05ac069 100644
--- a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -148,18 +148,14 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
 
     Field w(src._grid);
     Field r(src._grid);
-    Field z(src._grid);
 
     std::vector<Field> v(RestartLength + 1, src._grid);
+    std::vector<Field> z(RestartLength + 1, src._grid);
 
     MatrixTimer.Start();
-    LinOp.Op(psi, z);
+    LinOp.Op(psi, w);
     MatrixTimer.Stop();
 
-    PrecTimer.Start();
-    Preconditioner(z, w);
-    PrecTimer.Stop();
-
     LinalgTimer.Start();
     r = src - w;
 
@@ -183,7 +179,7 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
 
       if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
 
-        computeSolution(v, psi, i);
+        computeSolution(z, psi, i);
 
         return cp;
       }
@@ -193,16 +189,16 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
     return cp;
   }
 
-  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &z, Field &w, int iter) {
-
-    MatrixTimer.Start();
-    LinOp.Op(v[iter], z);
-    MatrixTimer.Stop();
+  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
 
     PrecTimer.Start();
-    Preconditioner(z, w);
+    Preconditioner(v[iter], z[iter]);
     PrecTimer.Stop();
 
+    MatrixTimer.Start();
+    LinOp.Op(z[iter], w);
+    MatrixTimer.Stop();
+
     LinalgTimer.Start();
     for (int i = 0; i <= iter; ++i) {
       H(iter, i) = innerProduct(v[i], w);
@@ -237,7 +233,7 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
     QrTimer.Stop();
   }
 
-  void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
+  void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
 
     CompSolutionTimer.Start();
     for (int i = iter; i >= 0; i--) {
@@ -249,12 +245,12 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
 
     if (true) {
       for (int i = 0; i <= iter; i++)
-        psi = psi + v[i] * y[i];
+        psi = psi + z[i] * y[i];
     }
     else {
-      psi = y[0] * v[0];
+      psi = y[0] * z[0];
       for (int i = 1; i <= iter; i++)
-        psi = psi + v[i] * y[i];
+        psi = psi + z[i] * y[i];
     }
     CompSolutionTimer.Stop();
   }

From 13ae371ef81b6383bffe681c6d725b56ca37f1ff Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Thu, 8 Feb 2018 17:33:10 +0100
Subject: [PATCH 083/130] Make solver parameters match in all MR|GMRES solver
 tests

---
 tests/solver/Test_staggered_fgmres_prec.cc  | 4 ++--
 tests/solver/Test_staggered_gmres_unprec.cc | 4 ++--
 tests/solver/Test_staggered_mr_unprec.cc    | 4 ++--
 tests/solver/Test_wilson_cagmres_unprec.cc  | 2 +-
 tests/solver/Test_wilson_fcagmres_prec.cc   | 2 +-
 tests/solver/Test_wilson_fgmres_prec.cc     | 2 +-
 tests/solver/Test_wilson_gmres_unprec.cc    | 2 +-
 7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/solver/Test_staggered_fgmres_prec.cc b/tests/solver/Test_staggered_fgmres_prec.cc
index 0c8dbc67..48eec1ce 100644
--- a/tests/solver/Test_staggered_fgmres_prec.cc
+++ b/tests/solver/Test_staggered_fgmres_prec.cc
@@ -58,14 +58,14 @@ int main (int argc, char ** argv)
     volume=volume*latt_size[mu];
   }
 
-  RealD mass=0.1;
+  RealD mass=0.5;
   ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
 
   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
 
   TrivialPrecon<FermionField> simple;
 
-  FlexibleGeneralisedMinimalResidual<FermionField> FGMRES(1.0e-6, 50, simple, 25);
+  FlexibleGeneralisedMinimalResidual<FermionField> FGMRES(1.0e-8, 10000, simple, 25);
   FGMRES(HermOp,src,result);
 
   Grid_finalize();
diff --git a/tests/solver/Test_staggered_gmres_unprec.cc b/tests/solver/Test_staggered_gmres_unprec.cc
index f9a8744f..8b62eb78 100644
--- a/tests/solver/Test_staggered_gmres_unprec.cc
+++ b/tests/solver/Test_staggered_gmres_unprec.cc
@@ -58,11 +58,11 @@ int main (int argc, char ** argv)
     volume=volume*latt_size[mu];
   }
 
-  RealD mass=0.1;
+  RealD mass=0.5;
   ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
 
   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
-  GeneralisedMinimalResidual<FermionField> GMRES(1.0e-6, 50, 25);
+  GeneralisedMinimalResidual<FermionField> GMRES(1.0e-8, 10000, 25);
   GMRES(HermOp,src,result);
 
   Grid_finalize();
diff --git a/tests/solver/Test_staggered_mr_unprec.cc b/tests/solver/Test_staggered_mr_unprec.cc
index 77f3090d..d29ec68d 100644
--- a/tests/solver/Test_staggered_mr_unprec.cc
+++ b/tests/solver/Test_staggered_mr_unprec.cc
@@ -58,11 +58,11 @@ int main (int argc, char ** argv)
     volume=volume*latt_size[mu];
   }
 
-  RealD mass=0.1;
+  RealD mass=0.5;
   ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
 
   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
-  MinimalResidual<FermionField> MR(1.0e-6,20000,0.8);
+  MinimalResidual<FermionField> MR(1.0e-8,10000,0.8);
   MR(HermOp,src,result);
 
   Grid_finalize();
diff --git a/tests/solver/Test_wilson_cagmres_unprec.cc b/tests/solver/Test_wilson_cagmres_unprec.cc
index 067fc0c1..4f81b195 100644
--- a/tests/solver/Test_wilson_cagmres_unprec.cc
+++ b/tests/solver/Test_wilson_cagmres_unprec.cc
@@ -58,7 +58,7 @@ int main (int argc, char ** argv)
   WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 
   MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
-  CommunicationAvoidingGeneralisedMinimalResidual<LatticeFermion> CAGMRES(1.0e-8, 50, 25);
+  CommunicationAvoidingGeneralisedMinimalResidual<LatticeFermion> CAGMRES(1.0e-8, 10000, 25);
   CAGMRES(HermOp,src,result);
 
   Grid_finalize();
diff --git a/tests/solver/Test_wilson_fcagmres_prec.cc b/tests/solver/Test_wilson_fcagmres_prec.cc
index 59477f95..5ac9b87f 100644
--- a/tests/solver/Test_wilson_fcagmres_prec.cc
+++ b/tests/solver/Test_wilson_fcagmres_prec.cc
@@ -61,7 +61,7 @@ int main (int argc, char ** argv)
 
   TrivialPrecon<LatticeFermion> simple;
 
-  FlexibleCommunicationAvoidingGeneralisedMinimalResidual<LatticeFermion> FCAGMRES(1.0e-8, 50, simple, 25);
+  FlexibleCommunicationAvoidingGeneralisedMinimalResidual<LatticeFermion> FCAGMRES(1.0e-8, 10000, simple, 25);
   FCAGMRES(HermOp,src,result);
 
   Grid_finalize();
diff --git a/tests/solver/Test_wilson_fgmres_prec.cc b/tests/solver/Test_wilson_fgmres_prec.cc
index 2bdee58f..f9e1f352 100644
--- a/tests/solver/Test_wilson_fgmres_prec.cc
+++ b/tests/solver/Test_wilson_fgmres_prec.cc
@@ -61,7 +61,7 @@ int main (int argc, char ** argv)
 
   TrivialPrecon<LatticeFermion> simple;
 
-  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMRES(1.0e-8, 50, simple, 25);
+  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMRES(1.0e-8, 10000, simple, 25);
   FGMRES(HermOp,src,result);
 
   Grid_finalize();
diff --git a/tests/solver/Test_wilson_gmres_unprec.cc b/tests/solver/Test_wilson_gmres_unprec.cc
index 4df5f4e3..d4ec2ed0 100644
--- a/tests/solver/Test_wilson_gmres_unprec.cc
+++ b/tests/solver/Test_wilson_gmres_unprec.cc
@@ -58,7 +58,7 @@ int main (int argc, char ** argv)
   WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 
   MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
-  GeneralisedMinimalResidual<LatticeFermion> GMRES(1.0e-8, 50, 25);
+  GeneralisedMinimalResidual<LatticeFermion> GMRES(1.0e-8, 10000, 25);
   GMRES(HermOp,src,result);
 
   Grid_finalize();

From eb7cf239d93b2ec5576871737a3f9bf8fb4f980f Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Thu, 8 Feb 2018 17:43:47 +0100
Subject: [PATCH 084/130] Print warning messages in CAGMRES solvers

Currently, the implementation of these algorithms doesn't differ from their non
communication-avoiding versions.
---
 .../iterative/CommunicationAvoidingGeneralisedMinimalResidual.h | 2 ++
 .../FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
index b56c8afc..4469c7ed 100644
--- a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
@@ -72,6 +72,8 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
 
   void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
 
+    std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular GMRES" << std::endl;
+
     psi.checkerboard = src.checkerboard;
     conformable(psi, src);
 
diff --git a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
index f49eb7dc..dc4885d6 100644
--- a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
@@ -77,6 +77,8 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
 
   void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
 
+    std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular FGMRES" << std::endl;
+
     psi.checkerboard = src.checkerboard;
     conformable(psi, src);
 

From a3e009ba5465bbfcb99257c560a1c55d250882c6 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Thu, 8 Feb 2018 17:46:28 +0100
Subject: [PATCH 085/130] Add tests for CAGMRES solvers with staggered action

---
 tests/solver/Test_staggered_cagmres_unprec.cc | 69 ++++++++++++++++++
 tests/solver/Test_staggered_fcagmres_prec.cc  | 72 +++++++++++++++++++
 2 files changed, 141 insertions(+)
 create mode 100644 tests/solver/Test_staggered_cagmres_unprec.cc
 create mode 100644 tests/solver/Test_staggered_fcagmres_prec.cc

diff --git a/tests/solver/Test_staggered_cagmres_unprec.cc b/tests/solver/Test_staggered_cagmres_unprec.cc
new file mode 100644
index 00000000..506b68b1
--- /dev/null
+++ b/tests/solver/Test_staggered_cagmres_unprec.cc
@@ -0,0 +1,69 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_staggered_cagmres_unprec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
+  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
+  typename ImprovedStaggeredFermionR::ImplParams params;
+
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
+  CommunicationAvoidingGeneralisedMinimalResidual<FermionField> CAGMRES(1.0e-8, 10000, 25);
+  CAGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_staggered_fcagmres_prec.cc b/tests/solver/Test_staggered_fcagmres_prec.cc
new file mode 100644
index 00000000..33ce182d
--- /dev/null
+++ b/tests/solver/Test_staggered_fcagmres_prec.cc
@@ -0,0 +1,72 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_staggered_fcagmres_prec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
+  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
+  typename ImprovedStaggeredFermionR::ImplParams params;
+
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
+
+  TrivialPrecon<FermionField> simple;
+
+  FlexibleCommunicationAvoidingGeneralisedMinimalResidual<FermionField> FCAGMRES(1.0e-8, 10000, simple, 25);
+  FCAGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}

From c4ce70a821944d4c787ffe6226918d999f542e72 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Thu, 8 Feb 2018 22:44:55 +0100
Subject: [PATCH 086/130] WilsonMG: Major cleanup

---
 tests/solver/Test_wilson_mg.cc | 732 +++++++++++----------------------
 1 file changed, 232 insertions(+), 500 deletions(-)

diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
index a402cf02..137d02f2 100644
--- a/tests/solver/Test_wilson_mg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@@ -64,8 +64,8 @@ public:
       std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << "vector " << i << ": "
                 << "singular value: " << lambda << ", singular vector precision: " << mu << ", norm: " << nrm << std::endl;
     }
-    std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << positiveOnes << " out of "
-              << nn << " vectors were positive" << std::endl;
+    std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << positiveOnes << " out of " << nn
+              << " vectors were positive" << std::endl;
   }
 };
 
@@ -87,10 +87,6 @@ public:
 };
 myclass params;
 
-RealD InverseApproximation(RealD x) {
-  return 1.0 / x;
-}
-
 template<int nbasis> struct CoarseGrids {
 public:
   std::vector<std::vector<int>> LattSizes;
@@ -186,11 +182,10 @@ template<class Field> void testLinearOperator(LinearOperatorBase<Field> &LinOp,
     ComplexD phiMPhi    = innerProduct(phi, MPhi);
     ComplexD chiMdagChi = innerProduct(chi, MdagChi);
 
-    std::cout << GridLogMessage << " chiMPhi = " << chiMPhi << " phiMdagChi = " << phiMdagChi
-              << " difference = " << chiMPhi - conjugate(phiMdagChi) << std::endl;
-
-    std::cout << GridLogMessage << " phiMPhi = " << phiMPhi << " chiMdagChi = " << chiMdagChi << " <- should be real if hermitian"
+    std::cout << GridLogMessage << " chiMPhi = " << chiMPhi << " phiMdagChi = " << phiMdagChi << " difference = " << chiMPhi - conjugate(phiMdagChi)
               << std::endl;
+
+    std::cout << GridLogMessage << " phiMPhi = " << phiMPhi << " chiMdagChi = " << chiMdagChi << " <- should be real if hermitian" << std::endl;
   }
 
   {
@@ -255,374 +250,53 @@ public:
     , _SmootherOperator(Smooth)
     , _SmootherMatrix(SmootherMatrix) {}
 
-  void PowerMethod(const FineField &in) {
-
-    FineField p1(in._grid);
-    FineField p2(in._grid);
-
-    MdagMLinearOperator<Matrix, FineField> fMdagMOp(_FineMatrix);
-
-    p1 = in;
-    RealD absp2;
-    for(int i = 0; i < 20; i++) {
-      RealD absp1 = std::sqrt(norm2(p1));
-      fMdagMOp.HermOp(p1, p2); // this is the G5 herm bit
-      // _FineOperator.Op(p1,p2); // this is the G5 herm bit
-      RealD absp2 = std::sqrt(norm2(p2));
-      if(i % 10 == 9)
-        std::cout << GridLogMessage << "Power method on mdagm " << i << " " << absp2 / absp1 << std::endl;
-      p1 = p2 * (1.0 / std::sqrt(absp2));
-    }
-  }
-
   void operator()(const FineField &in, FineField &out) {
-    if(params.domaindecompose) {
-      operatorSAP(in, out);
-    } else {
-      operatorCheby(in, out);
-    }
-  }
 
-    ////////////////////////////////////////////////////////////////////////
-    // ADEF2: [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
-    // ADEF1: [MP+Q ] in = M [1 - A Q] in + Q in
-    ////////////////////////////////////////////////////////////////////////
-#if 1
-  void operatorADEF2(const FineField &in, FineField &out) {
+    CoarseVector coarseSrc(_CoarseOperator.Grid());
+    CoarseVector coarseTmp(_CoarseOperator.Grid());
+    CoarseVector coarseSol(_CoarseOperator.Grid());
+    coarseSol = zero;
 
-    CoarseVector Csrc(_CoarseOperator.Grid());
-    CoarseVector Ctmp(_CoarseOperator.Grid());
-    CoarseVector Csol(_CoarseOperator.Grid());
+    GeneralisedMinimalResidual<CoarseVector> coarseGMRES(5.0e-2, 100, 25, false);
+    GeneralisedMinimalResidual<FineField>    fineGMRES(5.0e-2, 100, 25, false);
 
-    ConjugateGradient<CoarseVector> CG(1.0e-10, 100000);
-    ConjugateGradient<FineField>    fCG(3.0e-2, 1000);
+    HermitianLinearOperator<CoarseOperator, CoarseVector> coarseHermOp(_CoarseOperator);
+    MdagMLinearOperator<CoarseOperator, CoarseVector>     coarseMdagMOp(_CoarseOperator);
+    MdagMLinearOperator<Matrix, FineField>                fineMdagMOp(_SmootherMatrix);
 
-    HermitianLinearOperator<CoarseOperator, CoarseVector> HermOp(_CoarseOperator);
-    MdagMLinearOperator<CoarseOperator, CoarseVector>     MdagMOp(_CoarseOperator);
-    MdagMLinearOperator<Matrix, FineField>                fMdagMOp(_FineMatrix);
-
-    FineField tmp(in._grid);
-    FineField res(in._grid);
-    FineField Min(in._grid);
-
-    // Monitor completeness of low mode space
-    _Aggregates.ProjectToSubspace(Csrc, in);
-    _Aggregates.PromoteFromSubspace(Csrc, out);
-    std::cout << GridLogMessage << "Coarse Grid Preconditioner\nCompleteness in: " << std::sqrt(norm2(out) / norm2(in)) << std::endl;
-
-    // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
-    _FineOperator.Op(in, tmp); // this is the G5 herm bit
-    fCG(fMdagMOp, tmp, Min);   // solves MdagM = g5 M g5M
-
-    // Monitor completeness of low mode space
-    _Aggregates.ProjectToSubspace(Csrc, Min);
-    _Aggregates.PromoteFromSubspace(Csrc, out);
-    std::cout << GridLogMessage << "Completeness Min: " << std::sqrt(norm2(out) / norm2(Min)) << std::endl;
-
-    _FineOperator.Op(Min, tmp);
-    tmp = in - tmp; // in - A Min
-
-    Csol = zero;
-    _Aggregates.ProjectToSubspace(Csrc, tmp);
-    HermOp.AdjOp(Csrc, Ctmp); // Normal equations
-    CG(MdagMOp, Ctmp, Csol);
-
-    HermOp.Op(Csol, Ctmp);
-    Ctmp = Ctmp - Csrc;
-    std::cout << GridLogMessage << "coarse space true residual " << std::sqrt(norm2(Ctmp) / norm2(Csrc)) << std::endl;
-    _Aggregates.PromoteFromSubspace(Csol, out);
-
-    _FineOperator.Op(out, res);
-    res = res - tmp;
-    std::cout << GridLogMessage << "promoted sol residual " << std::sqrt(norm2(res) / norm2(tmp)) << std::endl;
-    _Aggregates.ProjectToSubspace(Csrc, res);
-    std::cout << GridLogMessage << "coarse space proj of residual " << norm2(Csrc) << std::endl;
-
-    out = out + Min; // additive coarse space correction
-    //    out = Min; // no additive coarse space correction
-
-    _FineOperator.Op(out, tmp);
-    tmp = tmp - in; // tmp is new residual
-
-    std::cout << GridLogMessage << " Preconditioner in  " << norm2(in) << std::endl;
-    std::cout << GridLogMessage << " Preconditioner out " << norm2(out) << std::endl;
-    std::cout << GridLogMessage << "preconditioner thinks residual is " << std::sqrt(norm2(tmp) / norm2(in)) << std::endl;
-  }
-#endif
-    // ADEF1: [MP+Q ] in = M [1 - A Q] in + Q in
-#if 1
-  void operatorADEF1(const FineField &in, FineField &out) {
-
-    CoarseVector Csrc(_CoarseOperator.Grid());
-    CoarseVector Ctmp(_CoarseOperator.Grid());
-    CoarseVector Csol(_CoarseOperator.Grid());
-    Csol = zero;
-
-    ConjugateGradient<CoarseVector> CG(1.0e-10, 100000);
-    ConjugateGradient<FineField>    fCG(3.0e-2, 1000);
-
-    HermitianLinearOperator<CoarseOperator, CoarseVector> HermOp(_CoarseOperator);
-    MdagMLinearOperator<CoarseOperator, CoarseVector>     MdagMOp(_CoarseOperator);
-    ShiftedMdagMLinearOperator<Matrix, FineField>         fMdagMOp(_FineMatrix, 0.1);
-
-    FineField tmp(in._grid);
-    FineField res(in._grid);
-    FineField Qin(in._grid);
-
-    // Monitor completeness of low mode space
-    //    _Aggregates.ProjectToSubspace  (Csrc,in);
-    //    _Aggregates.PromoteFromSubspace(Csrc,out);
-    //    std::cout<<GridLogMessage<<"Coarse Grid Preconditioner\nCompleteness in: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
-
-    _Aggregates.ProjectToSubspace(Csrc, in);
-    HermOp.AdjOp(Csrc, Ctmp); // Normal equations
-    CG(MdagMOp, Ctmp, Csol);
-    _Aggregates.PromoteFromSubspace(Csol, Qin);
-
-    //    Qin=0;
-    _FineOperator.Op(Qin, tmp); // A Q in
-    tmp = in - tmp;             // in - A Q in
-
-    _FineOperator.Op(tmp, res); // this is the G5 herm bit
-    fCG(fMdagMOp, res, out);    // solves  MdagM = g5 M g5M
-
-    out = out + Qin;
-
-    _FineOperator.Op(out, tmp);
-    tmp = tmp - in; // tmp is new residual
-
-    std::cout << GridLogMessage << "preconditioner thinks residual is " << std::sqrt(norm2(tmp) / norm2(in)) << std::endl;
-  }
-#endif
-
-  void SAP(const FineField &src, FineField &psi) {
-
-    Lattice<iScalar<vInteger>> coor(src._grid);
-    Lattice<iScalar<vInteger>> subset(src._grid);
-
-    FineField r(src._grid);
-    FineField zz(src._grid);
-    zz = zero;
-    FineField vec1(src._grid);
-    FineField vec2(src._grid);
-
-    const Integer block = params.domainsize;
-
-    subset = zero;
-    for(int mu = 0; mu < Nd; mu++) {
-      LatticeCoordinate(coor, mu + 1);
-      coor   = div(coor, block);
-      subset = subset + coor;
-    }
-    subset = mod(subset, (Integer)2);
-
-    ShiftedMdagMLinearOperator<Matrix, FineField> fMdagMOp(_SmootherMatrix, 0.0);
-    Chebyshev<FineField>                          Cheby(params.lo, params.hi, params.order, InverseApproximation);
-
-    RealD resid;
-    for(int i = 0; i < params.steps; i++) {
-
-      // Even domain residual
-      _FineOperator.Op(psi, vec1); // this is the G5 herm bit
-      r     = src - vec1;
-      resid = norm2(r) / norm2(src);
-      std::cout << "SAP " << i << " resid " << resid << std::endl;
-
-      // Even domain solve
-      r = where(subset == (Integer)0, r, zz);
-      _SmootherOperator.AdjOp(r, vec1);
-      Cheby(fMdagMOp, vec1, vec2); // solves  MdagM = g5 M g5M
-      psi = psi + vec2;
-
-      // Odd domain residual
-      _FineOperator.Op(psi, vec1); // this is the G5 herm bit
-      r = src - vec1;
-      r = where(subset == (Integer)1, r, zz);
-
-      resid = norm2(r) / norm2(src);
-      std::cout << "SAP " << i << " resid " << resid << std::endl;
-
-      // Odd domain solve
-      _SmootherOperator.AdjOp(r, vec1);
-      Cheby(fMdagMOp, vec1, vec2); // solves  MdagM = g5 M g5M
-      psi = psi + vec2;
-
-      _FineOperator.Op(psi, vec1); // this is the G5 herm bit
-      r     = src - vec1;
-      resid = norm2(r) / norm2(src);
-      std::cout << "SAP " << i << " resid " << resid << std::endl;
-    }
-  };
-
-  void SmootherTest(const FineField &in) {
-
-    FineField vec1(in._grid);
-    FineField vec2(in._grid);
-
-    RealD lo[3] = {0.5, 1.0, 2.0};
-
-    //    MdagMLinearOperator<Matrix,FineField>        fMdagMOp(_FineMatrix);
-    ShiftedMdagMLinearOperator<Matrix, FineField> fMdagMOp(_SmootherMatrix, 0.0);
-
-    RealD Ni, r;
-
-    Ni = norm2(in);
-
-    for(int ilo = 0; ilo < 3; ilo++) {
-      for(int ord = 5; ord < 50; ord *= 2) {
-
-        _SmootherOperator.AdjOp(in, vec1);
-
-        Chebyshev<FineField> Cheby(lo[ilo], 70.0, ord, InverseApproximation);
-        Cheby(fMdagMOp, vec1, vec2); // solves  MdagM = g5 M g5M
-
-        _FineOperator.Op(vec2, vec1); // this is the G5 herm bit
-        vec1 = in - vec1;             // tmp  = in - A Min
-        r    = norm2(vec1);
-        std::cout << GridLogMessage << "Smoother resid " << std::sqrt(r / Ni) << std::endl;
-      }
-    }
-  }
-
-  void operatorCheby(const FineField &in, FineField &out) {
-
-    CoarseVector Csrc(_CoarseOperator.Grid());
-    CoarseVector Ctmp(_CoarseOperator.Grid());
-    CoarseVector Csol(_CoarseOperator.Grid());
-    Csol = zero;
-
-    ConjugateGradient<CoarseVector> CG(3.0e-3, 100000);
-    //    ConjugateGradient<FineField>    fCG(3.0e-2,1000);
-
-    HermitianLinearOperator<CoarseOperator, CoarseVector> HermOp(_CoarseOperator);
-    MdagMLinearOperator<CoarseOperator, CoarseVector>     MdagMOp(_CoarseOperator);
-    //    MdagMLinearOperator<Matrix,FineField>        fMdagMOp(_FineMatrix);
-    ShiftedMdagMLinearOperator<Matrix, FineField> fMdagMOp(_SmootherMatrix, 0.0);
-
-    FineField vec1(in._grid);
-    FineField vec2(in._grid);
-
-    //    Chebyshev<FineField> Cheby    (0.5,70.0,30,InverseApproximation);
-    //    Chebyshev<FineField> ChebyAccu(0.5,70.0,30,InverseApproximation);
-    Chebyshev<FineField> Cheby(params.lo, params.hi, params.order, InverseApproximation);
-    Chebyshev<FineField> ChebyAccu(params.lo, params.hi, params.order, InverseApproximation);
-    //    Cheby.JacksonSmooth();
-    //    ChebyAccu.JacksonSmooth();
-
-    //    _Aggregates.ProjectToSubspace  (Csrc,in);
-    //    _Aggregates.PromoteFromSubspace(Csrc,out);
-    //    std::cout<<GridLogMessage<<"Completeness: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
-
-    //    ofstream fout("smoother");
-    //    Cheby.csv(fout);
-
-    // V11 multigrid.
-    // Use a fixed chebyshev and hope hermiticity helps.
-
-    // To make a working smoother for indefinite operator
-    // must multiply by "Mdag" (ouch loses all low mode content)
-    // and apply to poly approx of (mdagm)^-1.
-    // so that we end up with an odd polynomial.
+    FineField fineTmp1(in._grid);
+    FineField fineTmp2(in._grid);
 
     RealD Ni = norm2(in);
 
-    _SmootherOperator.AdjOp(in, vec1); // this is the G5 herm bit
-    ChebyAccu(fMdagMOp, vec1, out);    // solves  MdagM = g5 M g5M
+    // no pre smoothing for now
+    auto  preSmootherNorm     = 0;
+    auto  preSmootherResidual = 0;
+    RealD r;
 
-    std::cout << GridLogMessage << "Smoother norm " << norm2(out) << std::endl;
+    // Project to coarse grid, solve, project back to fine grid
+    _Aggregates.ProjectToSubspace(coarseSrc, in);
+    coarseGMRES(coarseMdagMOp, coarseSrc, coarseSol);
+    _Aggregates.PromoteFromSubspace(coarseSol, out);
 
-    // Update with residual for out
-    _FineOperator.Op(out, vec1); // this is the G5 herm bit
-    vec1 = in - vec1;            // tmp  = in - A Min
-
-    RealD r = norm2(vec1);
-
-    std::cout << GridLogMessage << "Smoother resid " << std::sqrt(r / Ni) << " " << r << " " << Ni << std::endl;
-
-    _Aggregates.ProjectToSubspace(Csrc, vec1);
-    HermOp.AdjOp(Csrc, Ctmp); // Normal equations
-    CG(MdagMOp, Ctmp, Csol);
-    _Aggregates.PromoteFromSubspace(Csol, vec1); // Ass^{-1} [in - A Min]_s
-                                                 // Q = Q[in - A Min]
-    out = out + vec1;
-
-    // Three preconditioner smoothing -- hermitian if C3 = C1
     // Recompute error
-    _FineOperator.Op(out, vec1); // this is the G5 herm bit
-    vec1 = in - vec1;            // tmp  = in - A Min
-    r    = norm2(vec1);
+    _FineOperator.Op(out, fineTmp1);
+    fineTmp1            = in - fineTmp1;
+    r                   = norm2(fineTmp1);
+    auto coarseResidual = std::sqrt(r / Ni);
 
-    std::cout << GridLogMessage << "Coarse resid " << std::sqrt(r / Ni) << std::endl;
+    // Apply smoother, use GMRES for the moment
+    fineGMRES(fineMdagMOp, in, out);
 
-    // Reapply smoother
-    _SmootherOperator.Op(vec1, vec2); // this is the G5 herm bit
-    ChebyAccu(fMdagMOp, vec2, vec1);  // solves  MdagM = g5 M g5M
-
-    out  = out + vec1;
-    vec1 = in - vec1; // tmp  = in - A Min
-    r    = norm2(vec1);
-    std::cout << GridLogMessage << "Smoother resid " << std::sqrt(r / Ni) << std::endl;
-  }
-
-  void operatorSAP(const FineField &in, FineField &out) {
-
-    CoarseVector Csrc(_CoarseOperator.Grid());
-    CoarseVector Ctmp(_CoarseOperator.Grid());
-    CoarseVector Csol(_CoarseOperator.Grid());
-    Csol = zero;
-
-    ConjugateGradient<CoarseVector> CG(1.0e-3, 100000);
-
-    HermitianLinearOperator<CoarseOperator, CoarseVector> HermOp(_CoarseOperator);
-    MdagMLinearOperator<CoarseOperator, CoarseVector>     MdagMOp(_CoarseOperator);
-
-    FineField vec1(in._grid);
-    FineField vec2(in._grid);
-
-    _Aggregates.ProjectToSubspace(Csrc, in);
-    _Aggregates.PromoteFromSubspace(Csrc, out);
-    std::cout << GridLogMessage << "Completeness: " << std::sqrt(norm2(out) / norm2(in)) << std::endl;
-
-    // To make a working smoother for indefinite operator
-    // must multiply by "Mdag" (ouch loses all low mode content)
-    // and apply to poly approx of (mdagm)^-1.
-    // so that we end up with an odd polynomial.
-    SAP(in, out);
-
-    // Update with residual for out
-    _FineOperator.Op(out, vec1); // this is the G5 herm bit
-    vec1 = in - vec1;            // tmp  = in - A Min
-
-    RealD r  = norm2(vec1);
-    RealD Ni = norm2(in);
-    std::cout << GridLogMessage << "SAP resid " << std::sqrt(r / Ni) << " " << r << " " << Ni << std::endl;
-
-    _Aggregates.ProjectToSubspace(Csrc, vec1);
-    HermOp.AdjOp(Csrc, Ctmp); // Normal equations
-    CG(MdagMOp, Ctmp, Csol);
-    _Aggregates.PromoteFromSubspace(Csol, vec1); // Ass^{-1} [in - A Min]_s
-                                                 // Q = Q[in - A Min]
-    out = out + vec1;
-
-    // Three preconditioner smoothing -- hermitian if C3 = C1
     // Recompute error
-    _FineOperator.Op(out, vec1); // this is the G5 herm bit
-    vec1 = in - vec1;            // tmp  = in - A Min
-    r    = norm2(vec1);
+    _FineOperator.Op(out, fineTmp1);
+    fineTmp1                  = in - fineTmp1;
+    r                         = norm2(fineTmp1);
+    auto postSmootherResidual = std::sqrt(r / Ni);
 
-    std::cout << GridLogMessage << "Coarse resid " << std::sqrt(r / Ni) << std::endl;
-
-    // Reapply smoother
-    SAP(vec1, vec2);
-    out = out + vec2;
-
-    // Update with residual for out
-    _FineOperator.Op(out, vec1); // this is the G5 herm bit
-    vec1 = in - vec1;            // tmp  = in - A Min
-
-    r  = norm2(vec1);
-    Ni = norm2(in);
-    std::cout << GridLogMessage << "SAP resid(post) " << std::sqrt(r / Ni) << " " << r << " " << Ni << std::endl;
+    std::cout << GridLogIterative << "Input norm = " << Ni << " Pre-Smoother norm " << preSmootherNorm
+              << " Pre-Smoother residual = " << preSmootherResidual << " Coarse residual = " << coarseResidual
+              << " Post-Smoother residual = " << postSmootherResidual << std::endl;
   }
 
   void runChecks(CoarseGrids<nbasis> &cGrids, int whichCoarseGrid) {
@@ -649,16 +323,16 @@ public:
       fTmps[1]       = _Aggregates.subspace[i] - fTmps[0]; // v_i - P R v_i
       auto deviation = std::sqrt(norm2(fTmps[1]) / norm2(_Aggregates.subspace[i]));
 
-      std::cout << GridLogMessage << "Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
-                << " | norm2(R v_i) = " << norm2(cTmps[0]) << " | norm2(P R v_i) = " << norm2(fTmps[0])
-                << " | relative deviation = " << deviation << std::endl;
+      std::cout << GridLogMessage << "Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i]) << " | norm2(R v_i) = " << norm2(cTmps[0])
+                << " | norm2(P R v_i) = " << norm2(fTmps[0]) << " | relative deviation = " << deviation;
 
       if(deviation > tolerance) {
-        std::cout << GridLogError << "Vector " << i << ": relative deviation check failed " << deviation << " > " << tolerance << std::endl;
-        abort();
+        std::cout << " > " << tolerance << " -> check failed" << std::endl;
+        // abort();
+      } else {
+        std::cout << " < " << tolerance << " -> check passed" << std::endl;
       }
     }
-    std::cout << GridLogMessage << "Check passed!" << std::endl;
 
     std::cout << GridLogMessage << "**************************************************" << std::endl;
     std::cout << GridLogMessage << "MG correctness check: 0 == (1 - R P) v_c" << std::endl;
@@ -673,13 +347,14 @@ public:
     auto deviation = std::sqrt(norm2(cTmps[2]) / norm2(cTmps[0]));
 
     std::cout << GridLogMessage << "norm2(v_c) = " << norm2(cTmps[0]) << " | norm2(R P v_c) = " << norm2(cTmps[1])
-              << " | norm2(P v_c) = " << norm2(fTmps[0]) << " | relative deviation = " << deviation << std::endl;
+              << " | norm2(P v_c) = " << norm2(fTmps[0]) << " | relative deviation = " << deviation;
 
     if(deviation > tolerance) {
-      std::cout << GridLogError << "relative deviation check failed " << deviation << " > " << tolerance << std::endl;
-      abort();
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
     }
-    std::cout << GridLogMessage << "Check passed!" << std::endl;
 
     std::cout << GridLogMessage << "**************************************************" << std::endl;
     std::cout << GridLogMessage << "MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
@@ -697,13 +372,14 @@ public:
     deviation = std::sqrt(norm2(cTmps[3]) / norm2(cTmps[1]));
 
     std::cout << GridLogMessage << "norm2(R D P v_c) = " << norm2(cTmps[1]) << " | norm2(D_c v_c) = " << norm2(cTmps[2])
-              << " | relative deviation = " << deviation << std::endl;
+              << " | relative deviation = " << deviation;
 
     if(deviation > tolerance) {
-      std::cout << GridLogError << "relative deviation check failed " << deviation << " > " << tolerance << std::endl;
-      abort();
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
     }
-    std::cout << GridLogMessage << "Check passed!" << std::endl;
 
     std::cout << GridLogMessage << "**************************************************" << std::endl;
     std::cout << GridLogMessage << "MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
@@ -714,20 +390,18 @@ public:
     MdagMOp.Op(cTmps[0], cTmps[1]);    //         D_c v_c
     MdagMOp.AdjOp(cTmps[1], cTmps[2]); // D_c^dag D_c v_c
 
-    // // alternative impl, which is better?
-    // MdagMOp.HermOp(cTmps[0], cTmps[2]); // D_c^dag D_c v_c
-
     auto dot  = innerProduct(cTmps[0], cTmps[2]); //v_c^dag D_c^dag D_c v_c
     deviation = abs(imag(dot)) / abs(real(dot));
 
     std::cout << GridLogMessage << "Re(v_c^dag D_c^dag D_c v_c) = " << real(dot) << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot)
-              << " | relative deviation = " << deviation << std::endl;
+              << " | relative deviation = " << deviation;
 
     if(deviation > tolerance) {
-      std::cout << GridLogError << "relative deviation check failed " << deviation << " > " << tolerance << std::endl;
-      abort();
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
     }
-    std::cout << GridLogMessage << "Check passed!" << std::endl;
   }
 };
 
@@ -740,11 +414,10 @@ int main(int argc, char **argv) {
   params.domaindecompose = 0;
   params.order           = 30;
   params.Ls              = 1;
-  // params.mq = .13;
-  params.mq    = .5;
-  params.lo    = 0.5;
-  params.hi    = 70.0;
-  params.steps = 1;
+  params.mq              = -0.5;
+  params.lo              = 0.5;
+  params.hi              = 70.0;
+  params.steps           = 1;
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Params: " << std::endl;
@@ -756,7 +429,7 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Set up some fine level stuff: " << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
+  GridCartesian *        FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
   GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
 
   std::vector<int> fSeeds({1, 2, 3, 4});
@@ -766,57 +439,31 @@ int main(int argc, char **argv) {
   Gamma g5(Gamma::Algebra::Gamma5);
 
   // clang-format off
-  LatticeFermion        src(FGrid); gaussian(fPRNG, src); // src=src + g5 * src;
+  LatticeFermion        src(FGrid); gaussian(fPRNG, src);
   LatticeFermion     result(FGrid); result = zero;
-  LatticeFermion        ref(FGrid); ref = zero;
-  LatticeFermion        tmp(FGrid);
-  LatticeFermion        err(FGrid);
   LatticeGaugeField     Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
-  LatticeGaugeField   UmuDD(FGrid);
-  LatticeColourMatrix     U(FGrid);
-  LatticeColourMatrix     zz(FGrid);
   // clang-format on
 
-  if(params.domaindecompose) {
-    Lattice<iScalar<vInteger>> coor(FGrid);
-    zz = zero;
-    for(int mu = 0; mu < Nd; mu++) {
-      LatticeCoordinate(coor, mu);
-      U = PeekIndex<LorentzIndex>(Umu, mu);
-      U = where(mod(coor, params.domainsize) == (Integer)0, zz, U);
-      PokeIndex<LorentzIndex>(UmuDD, U, mu);
-    }
-  } else {
-    UmuDD = Umu;
-  }
-
   RealD mass = params.mq;
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Set up some coarser levels stuff: " << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  const int nbasis = 20; // we fix the number of test vector to the same
+  const int nbasis = 20; // fix the number of test vector to the same
                          // number on every level for now
 
   //////////////////////////////////////////
   // toggle to run two/three level method
   //////////////////////////////////////////
 
-  // // two-level algorithm
-  // std::vector<std::vector<int>> blockSizes({{2, 2, 2, 2}});
-  // CoarseGrids<nbasis>           coarseGrids(blockSizes, 1);
+  // two-level algorithm
+  std::vector<std::vector<int>> blockSizes({{2, 2, 2, 2}});
+  CoarseGrids<nbasis>           coarseGrids(blockSizes, 1);
 
-  // three-level algorithm
-  std::vector<std::vector<int>> blockSizes({{2, 2, 2, 2}, {2, 2, 1, 1}});
-  CoarseGrids<nbasis>           coarseGrids(blockSizes, 2);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building the wilson operator on the fine grid" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  WilsonFermionR Dw(Umu, *FGrid, *FrbGrid, mass);
-  WilsonFermionR DwDD(UmuDD, *FGrid, *FrbGrid, mass);
+  // // three-level algorithm
+  // std::vector<std::vector<int>> blockSizes({{2, 2, 2, 2}, {2, 2, 1, 1}});
+  // CoarseGrids<nbasis>           coarseGrids(blockSizes, 2);
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Some typedefs" << std::endl;
@@ -844,22 +491,40 @@ int main(int argc, char **argv) {
 
   static_assert(std::is_same<CoarseVector, CoarseCoarseVector>::value, "CoarseVector and CoarseCoarseVector must be of the same type");
 
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building the wilson operator on the fine grid" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  WilsonFermionR Dw(Umu, *FGrid, *FrbGrid, mass);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Setting up linear operators" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  MdagMLinearOperator<WilsonFermionR, LatticeFermion> FineMdagMOp(Dw);
+
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Calling Aggregation class to build subspaces" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  MdagMLinearOperator<WilsonFermionR, LatticeFermion> FineHermPosdefOp(Dw);
-  Subspace                                            FineAggregates(coarseGrids.Grids[0], FGrid, 0);
+  Subspace FineAggregates(coarseGrids.Grids[0], FGrid, 0);
 
   assert((nbasis & 0x1) == 0);
   int nb = nbasis / 2;
   std::cout << GridLogMessage << " nbasis/2 = " << nb << std::endl;
 
-  FineAggregates.CreateSubspace(fPRNG, FineHermPosdefOp /*, nb */); // Don't specify nb to see the orthogonalization check
+  FineAggregates.CreateSubspace(fPRNG, FineMdagMOp /*, nb */); // Don't specify nb to see the orthogonalization check
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Test vector analysis after initial creation of subspace" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  std::cout << GridLogMessage << "Test vector analysis after initial creation of MG test vectors" << std::endl;
   FineTVA fineTVA;
-  fineTVA(FineHermPosdefOp, FineAggregates.subspace, nb);
+  fineTVA(FineMdagMOp, FineAggregates.subspace);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Projecting subspace to definite chirality" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
 
   for(int n = 0; n < nb; n++) {
     FineAggregates.subspace[n + nb] = g5 * FineAggregates.subspace[n];
@@ -877,14 +542,26 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Building coarse representation of Dirac operator" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  // using Gamma5HermitianLinearOperator corresponds to working with H = g5 * D
-  Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> FineHermIndefOp(Dw);
-  Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> FineHermIndefOpDD(DwDD);
-  CoarseOperator                                                Dc(*coarseGrids.Grids[0]);
-  Dc.CoarsenOperator(FGrid, FineHermIndefOp, FineAggregates); // uses only linop.OpDiag & linop.OpDir
+  CoarseOperator Dc(*coarseGrids.Grids[0]);
 
-  std::cout << GridLogMessage << "Test vector analysis after construction of D_c" << std::endl;
-  fineTVA(FineHermPosdefOp, FineAggregates.subspace, nb);
+  Dc.CoarsenOperator(FGrid, FineMdagMOp, FineAggregates);
+
+  MdagMLinearOperator<CoarseOperator, CoarseVector> CoarseMdagMOp(Dc);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Test vector analysis after construction of coarse Dirac operator" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  fineTVA(FineMdagMOp, FineAggregates.subspace);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing the linear operators" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  // clang-format off
+  testLinearOperator(FineMdagMOp,   FGrid,                "FineMdagMOp");   std::cout << GridLogMessage << std::endl;
+  testLinearOperator(CoarseMdagMOp, coarseGrids.Grids[0], "CoarseMdagMOp"); std::cout << GridLogMessage << std::endl;
+  // clang-format on
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Building coarse vectors" << std::endl;
@@ -895,64 +572,85 @@ int main(int argc, char **argv) {
   gaussian(coarseGrids.PRNGs[0], coarseSource);
   coarseResult = zero;
 
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building some coarse space solvers" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  std::vector<std::unique_ptr<OperatorFunction<CoarseVector>>> dummyCoarseSolvers;
+  dummyCoarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseVector>(5.0e-2, 100, 8, false));
+  dummyCoarseSolvers.emplace_back(new MinimalResidual<CoarseVector>(5.0e-2, 100, 0.8, false));
+  dummyCoarseSolvers.emplace_back(new ConjugateGradient<CoarseVector>(5.0e-2, 100, false));
+
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Testing some coarse space solvers" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  MdagMLinearOperator<CoarseOperator, CoarseVector> CoarseHermPosdefOp(Dc);
+  std::cout << GridLogMessage << "checking norm of coarse src " << norm2(coarseSource) << std::endl;
 
-  std::vector<std::unique_ptr<OperatorFunction<CoarseVector>>> coarseSolvers;
-  coarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseVector>(5.0e-2, 100, 8, false));
-  coarseSolvers.emplace_back(new MinimalResidual<CoarseVector>(5.0e-2, 100, false));
-  coarseSolvers.emplace_back(new ConjugateGradient<CoarseVector>(5.0e-2, 100, false));
-
-  for(auto const &solver : coarseSolvers) {
+  for(auto const &solver : dummyCoarseSolvers) {
     coarseResult = zero;
-    (*solver)(CoarseHermPosdefOp, coarseSource, coarseResult);
+    (*solver)(CoarseMdagMOp, coarseSource, coarseResult);
   }
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Testing the operators" << std::endl;
+  std::cout << GridLogMessage << "Building a multigrid preconditioner" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  std::cout << GridLogMessage << "MdagMLinearOperator<WilsonFermionR, LatticeFermion> FineHermPosdefOp(Dw);" << std::endl;
-  testOperator(FineHermPosdefOp, FGrid);
-  std::cout << GridLogMessage << "Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> FineHermIndefOp(Dw);" << std::endl;
-  testOperator(FineHermIndefOp, FGrid);
-  std::cout << GridLogMessage << "Gamma5HermitianLinearOperator<WilsonFermionR, LatticeFermion> FineHermIndefOpDD(DwDD);" << std::endl;
-  testOperator(FineHermIndefOpDD, FGrid);
-  std::cout << GridLogMessage << "MdagMLinearOperator<CoarseOperator, CoarseVector> CoarseHermPosdefOp(Dc);" << std::endl;
-  testOperator(CoarseHermPosdefOp, coarseGrids.Grids[0]);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building deflation preconditioner " << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  FineMGPreconditioner FineMGPrecon(FineAggregates, Dc, FineHermIndefOp, Dw, FineHermIndefOp, Dw);
-
-  FineMGPreconditioner FineMGPreconDD(FineAggregates, Dc, FineHermIndefOp, Dw, FineHermIndefOpDD, DwDD);
-
+  FineMGPreconditioner      FineMGPrecon(FineAggregates, Dc, FineMdagMOp, Dw, FineMdagMOp, Dw);
   FineTrivialPreconditioner FineSimplePrecon;
 
   FineMGPrecon.runChecks(coarseGrids, 0);
 
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building krylov subspace solvers w/ & w/o MG Preconditioner" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solvers;
+  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 4000000, FineSimplePrecon, 25, false));
+  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 100, FineMGPrecon, 25, false));
+  solvers.emplace_back(new PrecGeneralisedConjugateResidual<LatticeFermion>(1.0e-12, 4000000, FineSimplePrecon, 25, 25));
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing the (un)?preconditioned solvers" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  for(auto const &solver : solvers) {
+    std::cout << GridLogMessage << "checking norm of fine src " << norm2(src) << std::endl;
+    result = zero;
+    (*solver)(FineMdagMOp, src, result);
+    std::cout << std::endl;
+  }
+
+#if 0
   if(coarseGrids.LattSizes.size() == 2) {
 
     std::cout << GridLogMessage << "**************************************************" << std::endl;
-    std::cout << GridLogMessage << "Dummy testing for building a second coarse level" << std::endl;
+    std::cout << GridLogMessage << "Some testing for construction of a second coarse level" << std::endl;
     std::cout << GridLogMessage << "**************************************************" << std::endl;
 
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Calling Aggregation class to build subspaces" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
     SubSubSpace CoarseAggregates(coarseGrids.Grids[1], coarseGrids.Grids[0], 0);
-    CoarseAggregates.CreateSubspace(coarseGrids.PRNGs[0], CoarseHermPosdefOp);
+    CoarseAggregates.CreateSubspace(coarseGrids.PRNGs[0], CoarseMdagMOp);
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Test vector analysis after initial creation of subspace" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
 
     // // this doesn't work because this function applies g5 to a vector, which
     // // doesn't work for coarse vectors atm -> FIXME
     // CoarseTVA coarseTVA;
-    // coarseTVA(CoarseHermPosdefOp, CoarseAggregates.subspace, nb);
+    // coarseTVA(CoarseMdagMOp, CoarseAggregates.subspace);
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Projecting subspace to definite chirality" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
 
     // // cannot apply g5 to coarse vectors atm -> FIXME
     // for(int n=0;n<nb;n++){
-    //   CoarseAggregates.subspace[n+nb] = g5 * CoarseAggregates.subspace[n]; // multiply with g5 normally instead of G5R5 since this specific to DWF
+    //   CoarseAggregates.subspace[n+nb] = g5 * CoarseAggregates.subspace[n];
     //   std::cout<<GridLogMessage<<n<<" subspace "<<norm2(CoarseAggregates.subspace[n+nb])<<" "<<norm2(CoarseAggregates.subspace[n]) <<std::endl;
     // }
 
@@ -965,59 +663,93 @@ int main(int argc, char **argv) {
       std::cout << GridLogMessage << "vec[" << n << "] = " << norm2(CoarseAggregates.subspace[n]) << std::endl;
     }
 
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Building coarse coarse representation of Dirac operator" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
     CoarseCoarseOperator Dcc(*coarseGrids.Grids[1]);
-    Dcc.CoarsenOperator(coarseGrids.Grids[0], CoarseHermPosdefOp, CoarseAggregates); // uses only linop.OpDiag & linop.OpDir
+
+    Dcc.CoarsenOperator(coarseGrids.Grids[0], CoarseMdagMOp, CoarseAggregates);
+
+    MdagMLinearOperator<CoarseCoarseOperator, CoarseCoarseVector> CoarseCoarseMdagMOp(Dcc);
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Test vector analysis after construction of coarse Dirac operator" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
 
     // // this doesn't work because this function applies g5 to a vector, which
     // // doesn't work for coarse vectors atm -> FIXME
-    // std::cout << GridLogMessage << "Test vector analysis after construction of D_c_c" << std::endl;
-    // coarseTVA(CoarseHermPosdefOp, CoarseAggregates.subspace, nb);
+    // coarseTVA(CoarseMdagMOp, CoarseAggregates.subspace);
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Testing the linear operators" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    // clang-format off
+    testLinearOperator(CoarseMdagMOp,       coarseGrids.Grids[0], "CoarseMdagMOp");
+    testLinearOperator(CoarseCoarseMdagMOp, coarseGrids.Grids[1], "CoarseCoarseMdagMOp");
+    // clang-format on
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Building coarse coarse vectors" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
 
     CoarseCoarseVector coarseCoarseSource(coarseGrids.Grids[1]);
     CoarseCoarseVector coarseCoarseResult(coarseGrids.Grids[1]);
     gaussian(coarseGrids.PRNGs[1], coarseCoarseSource);
     coarseCoarseResult = zero;
 
-    MdagMLinearOperator<CoarseCoarseOperator, CoarseCoarseVector> CoarseCoarseHermPosdefOp(Dcc);
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Building some coarse space solvers" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-    std::vector<std::unique_ptr<OperatorFunction<CoarseCoarseVector>>> coarseCoarseSolvers;
-    coarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 8, false));
-    coarseSolvers.emplace_back(new MinimalResidual<CoarseCoarseVector>(5.0e-2, 100, false));
-    coarseSolvers.emplace_back(new ConjugateGradient<CoarseCoarseVector>(5.0e-2, 100, false));
+    std::vector<std::unique_ptr<OperatorFunction<CoarseCoarseVector>>> dummyCoarseCoarseSolvers;
+    dummyCoarseCoarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 8, false));
+    dummyCoarseCoarseSolvers.emplace_back(new MinimalResidual<CoarseCoarseVector>(5.0e-2, 100, false));
+    dummyCoarseCoarseSolvers.emplace_back(new ConjugateGradient<CoarseCoarseVector>(5.0e-2, 100, false));
 
-    for(auto const &solver : coarseCoarseSolvers) {
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Testing some coarse coarse space solvers" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    std::cout << GridLogMessage << "checking norm of coarse coarse src " << norm2(coarseCoarseSource) << std::endl;
+
+    for(auto const &solver : dummyCoarseCoarseSolvers) {
       coarseCoarseResult = zero;
-      (*solver)(CoarseCoarseHermPosdefOp, coarseCoarseSource, coarseCoarseResult);
+      (*solver)(CoarseCoarseMdagMOp, coarseCoarseSource, coarseCoarseResult);
     }
 
-    CoarseMGPreconditioner      CoarseMGPrecon(CoarseAggregates, Dcc, CoarseHermPosdefOp, Dc, CoarseHermPosdefOp, Dc);
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Building a multigrid preconditioner" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    CoarseMGPreconditioner      CoarseMGPrecon(CoarseAggregates, Dcc, CoarseMdagMOp, Dc, CoarseMdagMOp, Dc);
+    CoarseTrivialPreconditioner CoarseSimplePrecon;
 
     CoarseMGPrecon.runChecks(coarseGrids, 1);
 
-    std::cout << GridLogMessage << "ARTIFICIAL ABORT" << std::endl;
-    abort();
-  }
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building VPGCR and FGMRES solvers w/ & w/o MG Preconditioner" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solvers;
-  solvers.emplace_back(new PrecGeneralisedConjugateResidual<LatticeFermion>(1.0e-12, 100, FineMGPrecon, 8, 8));
-  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 100, FineMGPrecon, 8));
-  solvers.emplace_back(new PrecGeneralisedConjugateResidual<LatticeFermion>(1.0e-12, 4000000, FineSimplePrecon, 8, 8));
-  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 4000000, FineSimplePrecon, 8));
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Testing the solvers" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  std::cout << GridLogMessage << "checking norm src " << norm2(src) << std::endl;
-
-  for(auto const &solver : solvers) {
-    result = zero;
-    (*solver)(FineHermIndefOp, src, result);
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Building krylov subspace solvers w/ & w/o MG Preconditioner" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    std::vector<std::unique_ptr<OperatorFunction<CoarseVector>>> solvers;
+    solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<CoarseVector>(1.0e-12, 4000000, CoarseSimplePrecon, 25, false));
+    solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<CoarseVector>(1.0e-12, 100, CoarseMGPrecon, 25, false));
+    solvers.emplace_back(new PrecGeneralisedConjugateResidual<CoarseVector>(1.0e-12, 4000000, CoarseSimplePrecon, 25, 25));
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Testing the (un)?preconditioned solvers" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    for(auto const &solver : solvers) {
+      std::cout << GridLogMessage << "checking norm of fine src " << norm2(coarseSource) << std::endl;
+      coarseResult = zero;
+      (*solver)(CoarseMdagMOp, coarseSource, coarseResult);
+      std::cout << std::endl;
+    }
+
   }
+#endif
 
   Grid_finalize();
 }

From 48177f2f2dc52b743ae78407d24a886de519b980 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Thu, 8 Feb 2018 23:36:22 +0100
Subject: [PATCH 087/130] Add tests for all MR|GMRES solvers with wilson clover
 action

---
 .../Test_wilsonclover_cagmres_unprec.cc       | 71 ++++++++++++++++++
 .../solver/Test_wilsonclover_fcagmres_prec.cc | 74 +++++++++++++++++++
 tests/solver/Test_wilsonclover_fgmres_prec.cc | 74 +++++++++++++++++++
 .../solver/Test_wilsonclover_gmres_unprec.cc  | 71 ++++++++++++++++++
 tests/solver/Test_wilsonclover_mr_unprec.cc   | 71 ++++++++++++++++++
 5 files changed, 361 insertions(+)
 create mode 100644 tests/solver/Test_wilsonclover_cagmres_unprec.cc
 create mode 100644 tests/solver/Test_wilsonclover_fcagmres_prec.cc
 create mode 100644 tests/solver/Test_wilsonclover_fgmres_prec.cc
 create mode 100644 tests/solver/Test_wilsonclover_gmres_unprec.cc
 create mode 100644 tests/solver/Test_wilsonclover_mr_unprec.cc

diff --git a/tests/solver/Test_wilsonclover_cagmres_unprec.cc b/tests/solver/Test_wilsonclover_cagmres_unprec.cc
new file mode 100644
index 00000000..e248614b
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_cagmres_unprec.cc
@@ -0,0 +1,71 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilsonclover_cagmres_unprec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  typedef typename WilsonCloverFermionR::FermionField FermionField;
+  typename WilsonCloverFermionR::ImplParams params;
+  WilsonAnisotropyCoefficients anis;
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass  = 0.5;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
+
+  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
+  CommunicationAvoidingGeneralisedMinimalResidual<FermionField> CAGMRES(1.0e-8, 10000, 25);
+  CAGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_fcagmres_prec.cc b/tests/solver/Test_wilsonclover_fcagmres_prec.cc
new file mode 100644
index 00000000..762a7fd7
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_fcagmres_prec.cc
@@ -0,0 +1,74 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilsonclover_fcagmres_prec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  typedef typename WilsonCloverFermionR::FermionField FermionField;
+  typename WilsonCloverFermionR::ImplParams params;
+  WilsonAnisotropyCoefficients anis;
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass  = 0.5;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
+
+  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
+
+  TrivialPrecon<FermionField> simple;
+
+  FlexibleCommunicationAvoidingGeneralisedMinimalResidual<FermionField> FCAGMRES(1.0e-8, 10000, simple, 25);
+  FCAGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_fgmres_prec.cc b/tests/solver/Test_wilsonclover_fgmres_prec.cc
new file mode 100644
index 00000000..50c2c605
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_fgmres_prec.cc
@@ -0,0 +1,74 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilsonclover_fgmres_prec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  typedef typename WilsonCloverFermionR::FermionField FermionField;
+  typename WilsonCloverFermionR::ImplParams params;
+  WilsonAnisotropyCoefficients anis;
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass  = 0.5;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
+
+  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
+
+  TrivialPrecon<FermionField> simple;
+
+  FlexibleGeneralisedMinimalResidual<FermionField> FGMRES(1.0e-8, 10000, simple, 25);
+  FGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_gmres_unprec.cc b/tests/solver/Test_wilsonclover_gmres_unprec.cc
new file mode 100644
index 00000000..c05af5b6
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_gmres_unprec.cc
@@ -0,0 +1,71 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilsonclover_gmres_unprec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  typedef typename WilsonCloverFermionR::FermionField FermionField;
+  typename WilsonCloverFermionR::ImplParams params;
+  WilsonAnisotropyCoefficients anis;
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass  = 0.5;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
+
+  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
+  GeneralisedMinimalResidual<FermionField> GMRES(1.0e-8, 10000, 25);
+  GMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_mr_unprec.cc b/tests/solver/Test_wilsonclover_mr_unprec.cc
new file mode 100644
index 00000000..f39bea9a
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_mr_unprec.cc
@@ -0,0 +1,71 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilsonclover_mr_unprec.cc
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  typedef typename WilsonCloverFermionR::FermionField FermionField;
+  typename WilsonCloverFermionR::ImplParams params;
+  WilsonAnisotropyCoefficients anis;
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass  = 0.5;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
+
+  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
+  MinimalResidual<FermionField> MR(1.0e-8,10000,0.8);
+  MR(HermOp,src,result);
+
+  Grid_finalize();
+}

From 2976132bdd68ad622a1b473068505a4d78af3a80 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Thu, 8 Feb 2018 23:39:37 +0100
Subject: [PATCH 088/130] Add first version of multigrid for wilson clover
 analogous to wilson one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just like the wilson one, this algorithm

• is currently only a 2-level method since I don't have correct implementations
  for Mdir and Mdiag in CoarsenedMatrix yet (needed for further coarsening)
• needs levelization and refactoring into a proper algorithm
---
 tests/solver/Test_wilsonclover_mg.cc | 773 +++++++++++++++++++++++++++
 1 file changed, 773 insertions(+)
 create mode 100644 tests/solver/Test_wilsonclover_mg.cc

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
new file mode 100644
index 00000000..ebb685cf
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -0,0 +1,773 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/solver/Test_wilsonclover_mg.cc
+
+    Copyright (C) 2017
+
+    Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+/*  END LEGAL */
+
+#include <Grid/Grid.h>
+#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template<class Field, int nbasis> class TestVectorAnalyzer {
+public:
+  void operator()(LinearOperatorBase<Field> &Linop, std::vector<Field> const &vectors, int nn = nbasis) {
+
+    auto positiveOnes = 0;
+
+    std::vector<Field> tmp(4, vectors[0]._grid);
+    Gamma              g5(Gamma::Algebra::Gamma5);
+
+    std::cout << GridLogMessage << "Test vector analysis:" << std::endl;
+
+    for(auto i = 0; i < nn; ++i) {
+
+      Linop.Op(vectors[i], tmp[3]);
+
+      tmp[0] = g5 * tmp[3];
+
+      auto lambda = innerProduct(vectors[i], tmp[0]) / innerProduct(vectors[i], vectors[i]);
+
+      tmp[1] = tmp[0] - lambda * vectors[i];
+
+      auto mu = ::sqrt(norm2(tmp[1]) / norm2(vectors[i]));
+
+      auto nrm = ::sqrt(norm2(vectors[i]));
+
+      if(real(lambda) > 0)
+        positiveOnes++;
+
+      std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << "vector " << i << ": "
+                << "singular value: " << lambda << ", singular vector precision: " << mu << ", norm: " << nrm << std::endl;
+    }
+    std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << positiveOnes << " out of " << nn
+              << " vectors were positive" << std::endl;
+  }
+};
+
+class myclass : Serializable {
+public:
+  // clang-format off
+  GRID_SERIALIZABLE_CLASS_MEMBERS(myclass,
+                                  int, domaindecompose,
+                                  int, domainsize,
+                                  int, coarsegrids,
+                                  int, order,
+                                  int, Ls,
+                                  double, mq,
+                                  double, lo,
+                                  double, hi,
+                                  int, steps);
+  // clang-format on
+  myclass(){};
+};
+myclass params;
+
+template<int nbasis> struct CoarseGrids {
+public:
+  std::vector<std::vector<int>> LattSizes;
+  std::vector<std::vector<int>> Seeds;
+  std::vector<GridCartesian *>  Grids;
+  std::vector<GridParallelRNG>  PRNGs;
+
+  CoarseGrids(std::vector<std::vector<int>> const &blockSizes, int coarsegrids) {
+
+    assert(blockSizes.size() == coarsegrids);
+
+    std::cout << GridLogMessage << "Constructing " << coarsegrids << " CoarseGrids" << std::endl;
+
+    for(int cl = 0; cl < coarsegrids; ++cl) { // may be a bit ugly and slow but not perf critical
+      // need to differentiate between first and other coarse levels in size calculation
+      LattSizes.push_back({cl == 0 ? GridDefaultLatt() : LattSizes[cl - 1]});
+      Seeds.push_back(std::vector<int>(LattSizes[cl].size()));
+
+      for(int d = 0; d < LattSizes[cl].size(); ++d) {
+        LattSizes[cl][d] = LattSizes[cl][d] / blockSizes[cl][d];
+        Seeds[cl][d]     = (cl + 1) * LattSizes[cl].size() + d + 1;
+        // calculation unimportant, just to get. e.g., {5, 6, 7, 8} for first coarse level and so on
+      }
+
+      Grids.push_back(SpaceTimeGrid::makeFourDimGrid(LattSizes[cl], GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()));
+      PRNGs.push_back(GridParallelRNG(Grids[cl]));
+
+      PRNGs[cl].SeedFixedIntegers(Seeds[cl]);
+
+      std::cout << GridLogMessage << "cl = " << cl << ": LattSize = " << LattSizes[cl] << std::endl;
+      std::cout << GridLogMessage << "cl = " << cl << ":    Seeds = " << Seeds[cl] << std::endl;
+    }
+  }
+};
+
+template<class Field> void testLinearOperator(LinearOperatorBase<Field> &LinOp, GridBase *Grid, std::string const &name = "") {
+
+  std::vector<int> seeds({1, 2, 3, 4});
+  GridParallelRNG  RNG(Grid);
+  RNG.SeedFixedIntegers(seeds);
+
+  {
+    std::cout << GridLogMessage << "Testing that Mdiag + Σ_μ Mdir_μ == M for operator " << name << ":" << std::endl;
+
+    // clang-format off
+    Field src(Grid);    random(RNG, src);
+    Field ref(Grid);    ref    = zero;
+    Field result(Grid); result = zero;
+    Field diag(Grid);   diag   = zero;
+    Field sumDir(Grid); sumDir = zero;
+    Field tmp(Grid);
+    Field err(Grid);
+    // clang-format on
+
+    LinOp.Op(src, ref);
+    std::cout << GridLogMessage << " norm2(M * src)            = " << norm2(ref) << std::endl;
+
+    LinOp.OpDiag(src, diag);
+    std::cout << GridLogMessage << " norm2(Mdiag * src)        = " << norm2(diag) << std::endl;
+
+    for(int dir = 0; dir < 4; dir++) {
+      for(auto disp : {+1, -1}) {
+        LinOp.OpDir(src, tmp, dir, disp);
+        std::cout << GridLogMessage << " norm2(Mdir_{" << dir << "," << disp << "} * src) = " << norm2(tmp) << std::endl;
+        sumDir = sumDir + tmp;
+      }
+    }
+    std::cout << GridLogMessage << " norm2(Σ_μ Mdir_μ * src)   = " << norm2(sumDir) << std::endl;
+
+    result = diag + sumDir;
+    err    = ref - result;
+
+    std::cout << GridLogMessage << " Absolute deviation        = " << norm2(err) << std::endl;
+    std::cout << GridLogMessage << " Relative deviation        = " << norm2(err) / norm2(ref) << std::endl;
+  }
+
+  {
+    std::cout << GridLogMessage << "Testing hermiticity stochastically for operator " << name << ":" << std::endl;
+
+    // clang-format off
+    Field phi(Grid); random(RNG, phi);
+    Field chi(Grid); random(RNG, chi);
+    Field MPhi(Grid);
+    Field MdagChi(Grid);
+    // clang-format on
+
+    LinOp.Op(phi, MPhi);
+    LinOp.AdjOp(chi, MdagChi);
+
+    ComplexD chiMPhi    = innerProduct(chi, MPhi);
+    ComplexD phiMdagChi = innerProduct(phi, MdagChi);
+
+    ComplexD phiMPhi    = innerProduct(phi, MPhi);
+    ComplexD chiMdagChi = innerProduct(chi, MdagChi);
+
+    std::cout << GridLogMessage << " chiMPhi = " << chiMPhi << " phiMdagChi = " << phiMdagChi << " difference = " << chiMPhi - conjugate(phiMdagChi)
+              << std::endl;
+
+    std::cout << GridLogMessage << " phiMPhi = " << phiMPhi << " chiMdagChi = " << chiMdagChi << " <- should be real if hermitian" << std::endl;
+  }
+
+  {
+    std::cout << GridLogMessage << "Testing linearity for operator " << name << ":" << std::endl;
+
+    // clang-format off
+    Field phi(Grid); random(RNG, phi);
+    Field chi(Grid); random(RNG, chi);
+    Field phiPlusChi(Grid);
+    Field MPhi(Grid);
+    Field MChi(Grid);
+    Field MPhiPlusChi(Grid);
+    Field linearityError(Grid);
+    // clang-format on
+
+    LinOp.Op(phi, MPhi);
+    LinOp.Op(chi, MChi);
+
+    phiPlusChi = phi + chi;
+
+    LinOp.Op(phiPlusChi, MPhiPlusChi);
+
+    linearityError = MPhiPlusChi - MPhi;
+    linearityError = linearityError - MChi;
+
+    std::cout << GridLogMessage << " norm2(linearityError) = " << norm2(linearityError) << std::endl;
+  }
+}
+
+// template < class Fobj, class CComplex, int coarseSpins, int nbasis, class Matrix >
+// class MultiGridPreconditioner : public LinearFunction< Lattice< Fobj > > {
+template<class Fobj, class CComplex, int nbasis, class Matrix> class MultiGridPreconditioner : public LinearFunction<Lattice<Fobj>> {
+public:
+  typedef Aggregation<Fobj, CComplex, nbasis>     Aggregates;
+  typedef CoarsenedMatrix<Fobj, CComplex, nbasis> CoarseOperator;
+
+  typedef typename Aggregation<Fobj, CComplex, nbasis>::siteVector   siteVector;
+  typedef typename Aggregation<Fobj, CComplex, nbasis>::CoarseScalar CoarseScalar;
+  typedef typename Aggregation<Fobj, CComplex, nbasis>::CoarseVector CoarseVector;
+  typedef typename Aggregation<Fobj, CComplex, nbasis>::CoarseMatrix CoarseMatrix;
+  typedef typename Aggregation<Fobj, CComplex, nbasis>::FineField    FineField;
+  typedef LinearOperatorBase<FineField>                              FineOperator;
+
+  Aggregates &    _Aggregates;
+  CoarseOperator &_CoarseOperator;
+  Matrix &        _FineMatrix;
+  FineOperator &  _FineOperator;
+  Matrix &        _SmootherMatrix;
+  FineOperator &  _SmootherOperator;
+
+  // Constructor
+  MultiGridPreconditioner(Aggregates &    Agg,
+                          CoarseOperator &Coarse,
+                          FineOperator &  Fine,
+                          Matrix &        FineMatrix,
+                          FineOperator &  Smooth,
+                          Matrix &        SmootherMatrix)
+    : _Aggregates(Agg)
+    , _CoarseOperator(Coarse)
+    , _FineOperator(Fine)
+    , _FineMatrix(FineMatrix)
+    , _SmootherOperator(Smooth)
+    , _SmootherMatrix(SmootherMatrix) {}
+
+  void operator()(const FineField &in, FineField &out) {
+
+    CoarseVector coarseSrc(_CoarseOperator.Grid());
+    CoarseVector coarseTmp(_CoarseOperator.Grid());
+    CoarseVector coarseSol(_CoarseOperator.Grid());
+    coarseSol = zero;
+
+    GeneralisedMinimalResidual<CoarseVector> coarseGMRES(5.0e-2, 100, 25, false);
+    GeneralisedMinimalResidual<FineField>    fineGMRES(5.0e-2, 100, 25, false);
+
+    HermitianLinearOperator<CoarseOperator, CoarseVector> coarseHermOp(_CoarseOperator);
+    MdagMLinearOperator<CoarseOperator, CoarseVector>     coarseMdagMOp(_CoarseOperator);
+    MdagMLinearOperator<Matrix, FineField>                fineMdagMOp(_SmootherMatrix);
+
+    FineField fineTmp1(in._grid);
+    FineField fineTmp2(in._grid);
+
+    RealD Ni = norm2(in);
+
+    // no pre smoothing for now
+    auto  preSmootherNorm     = 0;
+    auto  preSmootherResidual = 0;
+    RealD r;
+
+    // Project to coarse grid, solve, project back to fine grid
+    _Aggregates.ProjectToSubspace(coarseSrc, in);
+    coarseGMRES(coarseMdagMOp, coarseSrc, coarseSol);
+    _Aggregates.PromoteFromSubspace(coarseSol, out);
+
+    // Recompute error
+    _FineOperator.Op(out, fineTmp1);
+    fineTmp1            = in - fineTmp1;
+    r                   = norm2(fineTmp1);
+    auto coarseResidual = std::sqrt(r / Ni);
+
+    // Apply smoother, use GMRES for the moment
+    fineGMRES(fineMdagMOp, in, out);
+
+    // Recompute error
+    _FineOperator.Op(out, fineTmp1);
+    fineTmp1                  = in - fineTmp1;
+    r                         = norm2(fineTmp1);
+    auto postSmootherResidual = std::sqrt(r / Ni);
+
+    std::cout << GridLogIterative << "Input norm = " << Ni << " Pre-Smoother norm " << preSmootherNorm
+              << " Pre-Smoother residual = " << preSmootherResidual << " Coarse residual = " << coarseResidual
+              << " Post-Smoother residual = " << postSmootherResidual << std::endl;
+  }
+
+  void runChecks(CoarseGrids<nbasis> &cGrids, int whichCoarseGrid) {
+
+    /////////////////////////////////////////////
+    // Some stuff we need for the checks below //
+    /////////////////////////////////////////////
+    auto tolerance = 1e-13; // TODO: this obviously depends on the precision we use, current value is for double
+
+    std::vector<CoarseVector> cTmps(4, _CoarseOperator.Grid());
+    std::vector<FineField>    fTmps(2, _Aggregates.subspace[0]._grid); // atm only for one coarser grid
+
+    // need to construct an operator, since _CoarseOperator is not a LinearOperator but only a matrix (the name is a bit misleading)
+    MdagMLinearOperator<CoarseOperator, CoarseVector> MdagMOp(_CoarseOperator);
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "MG correctness check: 0 == (1 - P R) v" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    for(auto i = 0; i < _Aggregates.subspace.size(); ++i) {
+      _Aggregates.ProjectToSubspace(cTmps[0], _Aggregates.subspace[i]); //   R v_i
+      _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]);              // P R v_i
+
+      fTmps[1]       = _Aggregates.subspace[i] - fTmps[0]; // v_i - P R v_i
+      auto deviation = std::sqrt(norm2(fTmps[1]) / norm2(_Aggregates.subspace[i]));
+
+      std::cout << GridLogMessage << "Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i]) << " | norm2(R v_i) = " << norm2(cTmps[0])
+                << " | norm2(P R v_i) = " << norm2(fTmps[0]) << " | relative deviation = " << deviation;
+
+      if(deviation > tolerance) {
+        std::cout << " > " << tolerance << " -> check failed" << std::endl;
+        // abort();
+      } else {
+        std::cout << " < " << tolerance << " -> check passed" << std::endl;
+      }
+    }
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "MG correctness check: 0 == (1 - R P) v_c" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    random(cGrids.PRNGs[whichCoarseGrid], cTmps[0]);
+
+    _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]); //   P v_c
+    _Aggregates.ProjectToSubspace(cTmps[1], fTmps[0]);   // R P v_c
+
+    cTmps[2]       = cTmps[0] - cTmps[1]; // v_c - R P v_c
+    auto deviation = std::sqrt(norm2(cTmps[2]) / norm2(cTmps[0]));
+
+    std::cout << GridLogMessage << "norm2(v_c) = " << norm2(cTmps[0]) << " | norm2(R P v_c) = " << norm2(cTmps[1])
+              << " | norm2(P v_c) = " << norm2(fTmps[0]) << " | relative deviation = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    random(cGrids.PRNGs[whichCoarseGrid], cTmps[0]);
+
+    _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]); //     P v_c
+    _FineOperator.Op(fTmps[0], fTmps[1]);                //   D P v_c
+    _Aggregates.ProjectToSubspace(cTmps[1], fTmps[1]);   // R D P v_c
+
+    MdagMOp.Op(cTmps[0], cTmps[2]); // D_c v_c
+
+    cTmps[3]  = cTmps[1] - cTmps[2]; // R D P v_c - D_c v_c
+    deviation = std::sqrt(norm2(cTmps[3]) / norm2(cTmps[1]));
+
+    std::cout << GridLogMessage << "norm2(R D P v_c) = " << norm2(cTmps[1]) << " | norm2(D_c v_c) = " << norm2(cTmps[2])
+              << " | relative deviation = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    random(cGrids.PRNGs[whichCoarseGrid], cTmps[0]);
+
+    MdagMOp.Op(cTmps[0], cTmps[1]);    //         D_c v_c
+    MdagMOp.AdjOp(cTmps[1], cTmps[2]); // D_c^dag D_c v_c
+
+    auto dot  = innerProduct(cTmps[0], cTmps[2]); //v_c^dag D_c^dag D_c v_c
+    deviation = abs(imag(dot)) / abs(real(dot));
+
+    std::cout << GridLogMessage << "Re(v_c^dag D_c^dag D_c v_c) = " << real(dot) << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot)
+              << " | relative deviation = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+  }
+};
+
+int main(int argc, char **argv) {
+
+  Grid_init(&argc, &argv);
+
+  params.domainsize      = 1;
+  params.coarsegrids     = 1;
+  params.domaindecompose = 0;
+  params.order           = 30;
+  params.Ls              = 1;
+  params.mq              = -0.5;
+  params.lo              = 0.5;
+  params.hi              = 70.0;
+  params.steps           = 1;
+
+  typedef typename WilsonCloverFermionR::FermionField FermionField;
+  typename WilsonCloverFermionR::ImplParams           wcImplparams;
+  WilsonAnisotropyCoefficients                        wilsonAnisCoeff;
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Params: " << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  std::cout << params << std::endl;
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Set up some fine level stuff: " << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  GridCartesian *        FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
+
+  std::vector<int> fSeeds({1, 2, 3, 4});
+  GridParallelRNG  fPRNG(FGrid);
+  fPRNG.SeedFixedIntegers(fSeeds);
+
+  Gamma g5(Gamma::Algebra::Gamma5);
+
+  // clang-format off
+  FermionField        src(FGrid); gaussian(fPRNG, src);
+  FermionField     result(FGrid); result = zero;
+  LatticeGaugeField     Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
+  // clang-format on
+
+  RealD mass = params.mq;
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Set up some coarser levels stuff: " << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  const int nbasis = 20; // fix the number of test vector to the same
+                         // number on every level for now
+
+  //////////////////////////////////////////
+  // toggle to run two/three level method
+  //////////////////////////////////////////
+
+  // two-level algorithm
+  std::vector<std::vector<int>> blockSizes({{2, 2, 2, 2}});
+  CoarseGrids<nbasis>           coarseGrids(blockSizes, 1);
+
+  // // three-level algorithm
+  // std::vector<std::vector<int>> blockSizes({{2, 2, 2, 2}, {2, 2, 1, 1}});
+  // CoarseGrids<nbasis>           coarseGrids(blockSizes, 2);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Some typedefs" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  // typedefs for transition from fine to first coarsened grid
+  typedef vSpinColourVector                                                                       FineSiteVector;
+  typedef vTComplex                                                                               CoarseSiteScalar;
+  typedef Aggregation<FineSiteVector, CoarseSiteScalar, nbasis>                                   Subspace;
+  typedef CoarsenedMatrix<FineSiteVector, CoarseSiteScalar, nbasis>                               CoarseOperator;
+  typedef CoarseOperator::CoarseVector                                                            CoarseVector;
+  typedef CoarseOperator::siteVector                                                              CoarseSiteVector;
+  typedef TestVectorAnalyzer<FermionField, nbasis>                                                FineTVA;
+  typedef MultiGridPreconditioner<FineSiteVector, CoarseSiteScalar, nbasis, WilsonCloverFermionR> FineMGPreconditioner;
+  typedef TrivialPrecon<FermionField>                                                             FineTrivialPreconditioner;
+
+  // typedefs for transition from a coarse to the next coarser grid (some defs remain the same)
+  typedef Aggregation<CoarseSiteVector, CoarseSiteScalar, nbasis>                             SubSubSpace;
+  typedef CoarsenedMatrix<CoarseSiteVector, CoarseSiteScalar, nbasis>                         CoarseCoarseOperator;
+  typedef CoarseCoarseOperator::CoarseVector                                                  CoarseCoarseVector;
+  typedef CoarseCoarseOperator::siteVector                                                    CoarseCoarseSiteVector;
+  typedef TestVectorAnalyzer<CoarseVector, nbasis>                                            CoarseTVA;
+  typedef MultiGridPreconditioner<CoarseSiteVector, CoarseSiteScalar, nbasis, CoarseOperator> CoarseMGPreconditioner;
+  typedef TrivialPrecon<CoarseVector>                                                         CoarseTrivialPreconditioner;
+
+  static_assert(std::is_same<CoarseVector, CoarseCoarseVector>::value, "CoarseVector and CoarseCoarseVector must be of the same type");
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building the wilson clover operator on the fine grid" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  RealD                csw_r = 1.0;
+  RealD                csw_t = 1.0;
+  WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t, wilsonAnisCoeff, wcImplparams);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Setting up linear operators" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  MdagMLinearOperator<WilsonCloverFermionR, FermionField> FineMdagMOp(Dwc);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Calling Aggregation class to build subspaces" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  Subspace FineAggregates(coarseGrids.Grids[0], FGrid, 0);
+
+  assert((nbasis & 0x1) == 0);
+  int nb = nbasis / 2;
+  std::cout << GridLogMessage << " nbasis/2 = " << nb << std::endl;
+
+  FineAggregates.CreateSubspace(fPRNG, FineMdagMOp /*, nb */); // Don't specify nb to see the orthogonalization check
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Test vector analysis after initial creation of subspace" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  FineTVA fineTVA;
+  fineTVA(FineMdagMOp, FineAggregates.subspace);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Projecting subspace to definite chirality" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  for(int n = 0; n < nb; n++) {
+    FineAggregates.subspace[n + nb] = g5 * FineAggregates.subspace[n];
+  }
+
+  auto coarseSites = 1;
+  for(auto const &elem : coarseGrids.LattSizes[0]) coarseSites *= elem;
+
+  std::cout << GridLogMessage << "Norms of MG test vectors after chiral projection (coarse sites = " << coarseSites << ")" << std::endl;
+  for(int n = 0; n < nbasis; n++) {
+    std::cout << GridLogMessage << "vec[" << n << "] = " << norm2(FineAggregates.subspace[n]) << std::endl;
+  }
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building coarse representation of Dirac operator" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  CoarseOperator Dc(*coarseGrids.Grids[0]);
+
+  Dc.CoarsenOperator(FGrid, FineMdagMOp, FineAggregates);
+
+  MdagMLinearOperator<CoarseOperator, CoarseVector> CoarseMdagMOp(Dc);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Test vector analysis after construction of coarse Dirac operator" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  fineTVA(FineMdagMOp, FineAggregates.subspace);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing the linear operators" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  // clang-format off
+  testLinearOperator(FineMdagMOp,   FGrid,                "FineMdagMOp");   std::cout << GridLogMessage << std::endl;
+  testLinearOperator(CoarseMdagMOp, coarseGrids.Grids[0], "CoarseMdagMOp"); std::cout << GridLogMessage << std::endl;
+  // clang-format on
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building coarse vectors" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  CoarseVector coarseSource(coarseGrids.Grids[0]);
+  CoarseVector coarseResult(coarseGrids.Grids[0]);
+  gaussian(coarseGrids.PRNGs[0], coarseSource);
+  coarseResult = zero;
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building some coarse space solvers" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  std::vector<std::unique_ptr<OperatorFunction<CoarseVector>>> dummyCoarseSolvers;
+  dummyCoarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseVector>(5.0e-2, 100, 8, false));
+  dummyCoarseSolvers.emplace_back(new MinimalResidual<CoarseVector>(5.0e-2, 100, 0.8, false));
+  dummyCoarseSolvers.emplace_back(new ConjugateGradient<CoarseVector>(5.0e-2, 100, false));
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing some coarse space solvers" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  std::cout << GridLogMessage << "checking norm of coarse src " << norm2(coarseSource) << std::endl;
+
+  for(auto const &solver : dummyCoarseSolvers) {
+    coarseResult = zero;
+    (*solver)(CoarseMdagMOp, coarseSource, coarseResult);
+  }
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building a multigrid preconditioner" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  FineMGPreconditioner      FineMGPrecon(FineAggregates, Dc, FineMdagMOp, Dwc, FineMdagMOp, Dwc);
+  FineTrivialPreconditioner FineSimplePrecon;
+
+  FineMGPrecon.runChecks(coarseGrids, 0);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Building krylov subspace solvers w/ & w/o MG Preconditioner" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  std::vector<std::unique_ptr<OperatorFunction<FermionField>>> solvers;
+  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<FermionField>(1.0e-12, 4000000, FineSimplePrecon, 25, false));
+  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<FermionField>(1.0e-12, 100, FineMGPrecon, 25, false));
+  solvers.emplace_back(new PrecGeneralisedConjugateResidual<FermionField>(1.0e-12, 4000000, FineSimplePrecon, 25, 25));
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing the (un)?preconditioned solvers" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  for(auto const &solver : solvers) {
+    std::cout << GridLogMessage << "checking norm of fine src " << norm2(src) << std::endl;
+    result = zero;
+    (*solver)(FineMdagMOp, src, result);
+    std::cout << std::endl;
+  }
+
+#if 0
+  if(coarseGrids.LattSizes.size() == 2) {
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "Some testing for construction of a second coarse level" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Calling Aggregation class to build subspaces" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    SubSubSpace CoarseAggregates(coarseGrids.Grids[1], coarseGrids.Grids[0], 0);
+    CoarseAggregates.CreateSubspace(coarseGrids.PRNGs[0], CoarseMdagMOp);
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Test vector analysis after initial creation of subspace" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    // // this doesn't work because this function applies g5 to a vector, which
+    // // doesn't work for coarse vectors atm -> FIXME
+    // CoarseTVA coarseTVA;
+    // coarseTVA(CoarseMdagMOp, CoarseAggregates.subspace);
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Projecting subspace to definite chirality" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    // // cannot apply g5 to coarse vectors atm -> FIXME
+    // for(int n=0;n<nb;n++){
+    //   CoarseAggregates.subspace[n+nb] = g5 * CoarseAggregates.subspace[n];
+    //   std::cout<<GridLogMessage<<n<<" subspace "<<norm2(CoarseAggregates.subspace[n+nb])<<" "<<norm2(CoarseAggregates.subspace[n]) <<std::endl;
+    // }
+
+    auto coarseCoarseSites = 1;
+    for(auto const &elem : coarseGrids.LattSizes[1]) coarseCoarseSites *= elem;
+
+    std::cout << GridLogMessage << "Norms of MG test vectors after chiral projection (coarse coarse sites = " << coarseCoarseSites << ")"
+              << std::endl;
+    for(int n = 0; n < nbasis; n++) {
+      std::cout << GridLogMessage << "vec[" << n << "] = " << norm2(CoarseAggregates.subspace[n]) << std::endl;
+    }
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Building coarse coarse representation of Dirac operator" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    CoarseCoarseOperator Dcc(*coarseGrids.Grids[1]);
+
+    Dcc.CoarsenOperator(coarseGrids.Grids[0], CoarseMdagMOp, CoarseAggregates);
+
+    MdagMLinearOperator<CoarseCoarseOperator, CoarseCoarseVector> CoarseCoarseMdagMOp(Dcc);
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Test vector analysis after construction of coarse Dirac operator" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    // // this doesn't work because this function applies g5 to a vector, which
+    // // doesn't work for coarse vectors atm -> FIXME
+    // coarseTVA(CoarseMdagMOp, CoarseAggregates.subspace);
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Testing the linear operators" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    // clang-format off
+    testLinearOperator(CoarseMdagMOp,       coarseGrids.Grids[0], "CoarseMdagMOp");
+    testLinearOperator(CoarseCoarseMdagMOp, coarseGrids.Grids[1], "CoarseCoarseMdagMOp");
+    // clang-format on
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Building coarse coarse vectors" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    CoarseCoarseVector coarseCoarseSource(coarseGrids.Grids[1]);
+    CoarseCoarseVector coarseCoarseResult(coarseGrids.Grids[1]);
+    gaussian(coarseGrids.PRNGs[1], coarseCoarseSource);
+    coarseCoarseResult = zero;
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Building some coarse space solvers" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    std::vector<std::unique_ptr<OperatorFunction<CoarseCoarseVector>>> dummyCoarseCoarseSolvers;
+    dummyCoarseCoarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 8, false));
+    dummyCoarseCoarseSolvers.emplace_back(new MinimalResidual<CoarseCoarseVector>(5.0e-2, 100, false));
+    dummyCoarseCoarseSolvers.emplace_back(new ConjugateGradient<CoarseCoarseVector>(5.0e-2, 100, false));
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Testing some coarse coarse space solvers" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    std::cout << GridLogMessage << "checking norm of coarse coarse src " << norm2(coarseCoarseSource) << std::endl;
+
+    for(auto const &solver : dummyCoarseCoarseSolvers) {
+      coarseCoarseResult = zero;
+      (*solver)(CoarseCoarseMdagMOp, coarseCoarseSource, coarseCoarseResult);
+    }
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Building a multigrid preconditioner" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    CoarseMGPreconditioner      CoarseMGPrecon(CoarseAggregates, Dcc, CoarseMdagMOp, Dc, CoarseMdagMOp, Dc);
+    CoarseTrivialPreconditioner CoarseSimplePrecon;
+
+    CoarseMGPrecon.runChecks(coarseGrids, 1);
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Building krylov subspace solvers w/ & w/o MG Preconditioner" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    std::vector<std::unique_ptr<OperatorFunction<CoarseVector>>> solvers;
+    solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<CoarseVector>(1.0e-12, 4000000, CoarseSimplePrecon, 25, false));
+    solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<CoarseVector>(1.0e-12, 100, CoarseMGPrecon, 25, false));
+    solvers.emplace_back(new PrecGeneralisedConjugateResidual<CoarseVector>(1.0e-12, 4000000, CoarseSimplePrecon, 25, 25));
+
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+    // std::cout << GridLogMessage << "Testing the (un)?preconditioned solvers" << std::endl;
+    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    for(auto const &solver : solvers) {
+      std::cout << GridLogMessage << "checking norm of fine src " << norm2(coarseSource) << std::endl;
+      coarseResult = zero;
+      (*solver)(CoarseMdagMOp, coarseSource, coarseResult);
+      std::cout << std::endl;
+    }
+
+  }
+#endif
+
+  Grid_finalize();
+}
+
+// Ideas compiled during discussions with the others during lunchtime:
+//
+// • set the gauge fields to 0
+//   -> the hopping term is zero -> M is the same as Mdiag
+// • set the mass to minus 4
+//   -> the self coupling term is zero -> M is the same as Σ_u Mdir_μ
+//
+// In both cases it's probably a good idea to set the source fermion to 1
+
+// I just put this here to have it out of the way in main
+// This code is intended to be put after the creation of the first MG Preconditioner object for the fine grid.

From 8c692b7ffd4139163362e135f48e54ccb26f7ea1 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Thu, 8 Feb 2018 23:55:05 +0100
Subject: [PATCH 089/130] WilsonMG: Comment assertion on hermiticity of coarse
 operator for now

TODO: Think of a way to not break dwf_hdcr by doing that. It's only an assertion
but it still interferes with it.
---
 lib/algorithms/CoarsenedMatrix.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h
index 59025276..36c1eb34 100644
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -423,7 +423,7 @@ namespace Grid {
       std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
 #endif
       //      ForceHermitian();
-      AssertHermitian();
+      // AssertHermitian();
       // ForceDiagonal();
     }
     void ForceDiagonal(void) {

From 1e63b73a148b2c5766deff603a5697139f0f749b Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 7 Mar 2018 10:30:21 +0100
Subject: [PATCH 090/130] WilsonMG: Some cleanup/formatting

---
 tests/solver/Test_wilson_mg.cc       | 45 ++++++++++++----------
 tests/solver/Test_wilsonclover_mg.cc | 57 +++++++++++++---------------
 2 files changed, 52 insertions(+), 50 deletions(-)

diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
index 137d02f2..60bf6fec 100644
--- a/tests/solver/Test_wilson_mg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@@ -64,8 +64,8 @@ public:
       std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << "vector " << i << ": "
                 << "singular value: " << lambda << ", singular vector precision: " << mu << ", norm: " << nrm << std::endl;
     }
-    std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << positiveOnes << " out of " << nn
-              << " vectors were positive" << std::endl;
+    std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << positiveOnes << " out of "
+              << nn << " vectors were positive" << std::endl;
   }
 };
 
@@ -141,26 +141,31 @@ template<class Field> void testLinearOperator(LinearOperatorBase<Field> &LinOp,
     Field err(Grid);
     // clang-format on
 
-    LinOp.Op(src, ref);
-    std::cout << GridLogMessage << " norm2(M * src)            = " << norm2(ref) << std::endl;
+    std::cout << setprecision(9);
+
+    std::cout << GridLogMessage << " norm2(src)\t\t\t\t= " << norm2(src) << std::endl;
 
     LinOp.OpDiag(src, diag);
-    std::cout << GridLogMessage << " norm2(Mdiag * src)        = " << norm2(diag) << std::endl;
+    std::cout << GridLogMessage << " norm2(Mdiag * src)\t\t\t= " << norm2(diag) << std::endl;
 
     for(int dir = 0; dir < 4; dir++) {
       for(auto disp : {+1, -1}) {
         LinOp.OpDir(src, tmp, dir, disp);
-        std::cout << GridLogMessage << " norm2(Mdir_{" << dir << "," << disp << "} * src) = " << norm2(tmp) << std::endl;
+        std::cout << GridLogMessage << " norm2(Mdir_{" << dir << "," << disp << "} * src)\t\t= " << norm2(tmp) << std::endl;
         sumDir = sumDir + tmp;
       }
     }
-    std::cout << GridLogMessage << " norm2(Σ_μ Mdir_μ * src)   = " << norm2(sumDir) << std::endl;
+    std::cout << GridLogMessage << " norm2(Σ_μ Mdir_μ * src)\t\t= " << norm2(sumDir) << std::endl;
 
     result = diag + sumDir;
-    err    = ref - result;
+    std::cout << GridLogMessage << " norm2((Mdiag + Σ_μ Mdir_μ) * src)\t= " << norm2(result) << std::endl;
 
-    std::cout << GridLogMessage << " Absolute deviation        = " << norm2(err) << std::endl;
-    std::cout << GridLogMessage << " Relative deviation        = " << norm2(err) / norm2(ref) << std::endl;
+    LinOp.Op(src, ref);
+    std::cout << GridLogMessage << " norm2(M * src)\t\t\t= " << norm2(ref) << std::endl;
+
+    err = ref - result;
+    std::cout << GridLogMessage << " Absolute deviation\t\t\t= " << norm2(err) << std::endl;
+    std::cout << GridLogMessage << " Relative deviation\t\t\t= " << norm2(err) / norm2(ref) << std::endl;
   }
 
   {
@@ -182,10 +187,11 @@ template<class Field> void testLinearOperator(LinearOperatorBase<Field> &LinOp,
     ComplexD phiMPhi    = innerProduct(phi, MPhi);
     ComplexD chiMdagChi = innerProduct(chi, MdagChi);
 
-    std::cout << GridLogMessage << " chiMPhi = " << chiMPhi << " phiMdagChi = " << phiMdagChi << " difference = " << chiMPhi - conjugate(phiMdagChi)
-              << std::endl;
+    std::cout << GridLogMessage << " chiMPhi = " << chiMPhi << " phiMdagChi = " << phiMdagChi
+              << " difference = " << chiMPhi - conjugate(phiMdagChi) << std::endl;
 
-    std::cout << GridLogMessage << " phiMPhi = " << phiMPhi << " chiMdagChi = " << chiMdagChi << " <- should be real if hermitian" << std::endl;
+    std::cout << GridLogMessage << " phiMPhi = " << phiMPhi << " chiMdagChi = " << chiMdagChi << " <- should be real if hermitian"
+              << std::endl;
   }
 
   {
@@ -323,8 +329,9 @@ public:
       fTmps[1]       = _Aggregates.subspace[i] - fTmps[0]; // v_i - P R v_i
       auto deviation = std::sqrt(norm2(fTmps[1]) / norm2(_Aggregates.subspace[i]));
 
-      std::cout << GridLogMessage << "Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i]) << " | norm2(R v_i) = " << norm2(cTmps[0])
-                << " | norm2(P R v_i) = " << norm2(fTmps[0]) << " | relative deviation = " << deviation;
+      std::cout << GridLogMessage << "Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
+                << " | norm2(R v_i) = " << norm2(cTmps[0]) << " | norm2(P R v_i) = " << norm2(fTmps[0])
+                << " | relative deviation = " << deviation;
 
       if(deviation > tolerance) {
         std::cout << " > " << tolerance << " -> check failed" << std::endl;
@@ -429,7 +436,7 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Set up some fine level stuff: " << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  GridCartesian *        FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
+  GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
   GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
 
   std::vector<int> fSeeds({1, 2, 3, 4});
@@ -439,9 +446,9 @@ int main(int argc, char **argv) {
   Gamma g5(Gamma::Algebra::Gamma5);
 
   // clang-format off
-  LatticeFermion        src(FGrid); gaussian(fPRNG, src);
-  LatticeFermion     result(FGrid); result = zero;
-  LatticeGaugeField     Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
+  LatticeFermion    src(FGrid); gaussian(fPRNG, src);
+  LatticeFermion result(FGrid); result = zero;
+  LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
   // clang-format on
 
   RealD mass = params.mq;
diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index ebb685cf..5b38ea61 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -64,8 +64,8 @@ public:
       std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << "vector " << i << ": "
                 << "singular value: " << lambda << ", singular vector precision: " << mu << ", norm: " << nrm << std::endl;
     }
-    std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << positiveOnes << " out of " << nn
-              << " vectors were positive" << std::endl;
+    std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << positiveOnes << " out of "
+              << nn << " vectors were positive" << std::endl;
   }
 };
 
@@ -141,26 +141,31 @@ template<class Field> void testLinearOperator(LinearOperatorBase<Field> &LinOp,
     Field err(Grid);
     // clang-format on
 
-    LinOp.Op(src, ref);
-    std::cout << GridLogMessage << " norm2(M * src)            = " << norm2(ref) << std::endl;
+    std::cout << setprecision(9);
+
+    std::cout << GridLogMessage << " norm2(src)\t\t\t\t= " << norm2(src) << std::endl;
 
     LinOp.OpDiag(src, diag);
-    std::cout << GridLogMessage << " norm2(Mdiag * src)        = " << norm2(diag) << std::endl;
+    std::cout << GridLogMessage << " norm2(Mdiag * src)\t\t\t= " << norm2(diag) << std::endl;
 
     for(int dir = 0; dir < 4; dir++) {
       for(auto disp : {+1, -1}) {
         LinOp.OpDir(src, tmp, dir, disp);
-        std::cout << GridLogMessage << " norm2(Mdir_{" << dir << "," << disp << "} * src) = " << norm2(tmp) << std::endl;
+        std::cout << GridLogMessage << " norm2(Mdir_{" << dir << "," << disp << "} * src)\t\t= " << norm2(tmp) << std::endl;
         sumDir = sumDir + tmp;
       }
     }
-    std::cout << GridLogMessage << " norm2(Σ_μ Mdir_μ * src)   = " << norm2(sumDir) << std::endl;
+    std::cout << GridLogMessage << " norm2(Σ_μ Mdir_μ * src)\t\t= " << norm2(sumDir) << std::endl;
 
     result = diag + sumDir;
-    err    = ref - result;
+    std::cout << GridLogMessage << " norm2((Mdiag + Σ_μ Mdir_μ) * src)\t= " << norm2(result) << std::endl;
 
-    std::cout << GridLogMessage << " Absolute deviation        = " << norm2(err) << std::endl;
-    std::cout << GridLogMessage << " Relative deviation        = " << norm2(err) / norm2(ref) << std::endl;
+    LinOp.Op(src, ref);
+    std::cout << GridLogMessage << " norm2(M * src)\t\t\t= " << norm2(ref) << std::endl;
+
+    err = ref - result;
+    std::cout << GridLogMessage << " Absolute deviation\t\t\t= " << norm2(err) << std::endl;
+    std::cout << GridLogMessage << " Relative deviation\t\t\t= " << norm2(err) / norm2(ref) << std::endl;
   }
 
   {
@@ -182,10 +187,11 @@ template<class Field> void testLinearOperator(LinearOperatorBase<Field> &LinOp,
     ComplexD phiMPhi    = innerProduct(phi, MPhi);
     ComplexD chiMdagChi = innerProduct(chi, MdagChi);
 
-    std::cout << GridLogMessage << " chiMPhi = " << chiMPhi << " phiMdagChi = " << phiMdagChi << " difference = " << chiMPhi - conjugate(phiMdagChi)
-              << std::endl;
+    std::cout << GridLogMessage << " chiMPhi = " << chiMPhi << " phiMdagChi = " << phiMdagChi
+              << " difference = " << chiMPhi - conjugate(phiMdagChi) << std::endl;
 
-    std::cout << GridLogMessage << " phiMPhi = " << phiMPhi << " chiMdagChi = " << chiMdagChi << " <- should be real if hermitian" << std::endl;
+    std::cout << GridLogMessage << " phiMPhi = " << phiMPhi << " chiMdagChi = " << chiMdagChi << " <- should be real if hermitian"
+              << std::endl;
   }
 
   {
@@ -323,8 +329,9 @@ public:
       fTmps[1]       = _Aggregates.subspace[i] - fTmps[0]; // v_i - P R v_i
       auto deviation = std::sqrt(norm2(fTmps[1]) / norm2(_Aggregates.subspace[i]));
 
-      std::cout << GridLogMessage << "Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i]) << " | norm2(R v_i) = " << norm2(cTmps[0])
-                << " | norm2(P R v_i) = " << norm2(fTmps[0]) << " | relative deviation = " << deviation;
+      std::cout << GridLogMessage << "Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
+                << " | norm2(R v_i) = " << norm2(cTmps[0]) << " | norm2(P R v_i) = " << norm2(fTmps[0])
+                << " | relative deviation = " << deviation;
 
       if(deviation > tolerance) {
         std::cout << " > " << tolerance << " -> check failed" << std::endl;
@@ -433,7 +440,7 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Set up some fine level stuff: " << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  GridCartesian *        FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
+  GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
   GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
 
   std::vector<int> fSeeds({1, 2, 3, 4});
@@ -443,9 +450,9 @@ int main(int argc, char **argv) {
   Gamma g5(Gamma::Algebra::Gamma5);
 
   // clang-format off
-  FermionField        src(FGrid); gaussian(fPRNG, src);
-  FermionField     result(FGrid); result = zero;
-  LatticeGaugeField     Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
+  FermionField      src(FGrid); gaussian(fPRNG, src);
+  FermionField   result(FGrid); result = zero;
+  LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
   // clang-format on
 
   RealD mass = params.mq;
@@ -759,15 +766,3 @@ int main(int argc, char **argv) {
 
   Grid_finalize();
 }
-
-// Ideas compiled during discussions with the others during lunchtime:
-//
-// • set the gauge fields to 0
-//   -> the hopping term is zero -> M is the same as Mdiag
-// • set the mass to minus 4
-//   -> the self coupling term is zero -> M is the same as Σ_u Mdir_μ
-//
-// In both cases it's probably a good idea to set the source fermion to 1
-
-// I just put this here to have it out of the way in main
-// This code is intended to be put after the creation of the first MG Preconditioner object for the fine grid.

From 0f6cdf3d4b1e83ef943000b880ade09d9eb6b842 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Fri, 9 Mar 2018 16:56:16 +0100
Subject: [PATCH 091/130] WilsonMG: Implement missing parts of CoarsenedMatrix

---
 lib/algorithms/CoarsenedMatrix.h | 43 +++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h
index 36c1eb34..26ff2ede 100644
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -306,9 +306,46 @@ namespace Grid {
       return M(in,out);
     };
 
-    // Defer support for further coarsening for now
-    void Mdiag    (const CoarseVector &in,  CoarseVector &out){};
-    void Mdir     (const CoarseVector &in,  CoarseVector &out,int dir, int disp){};
+    void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp) {
+
+      conformable(_grid,in._grid);
+      conformable(in._grid,out._grid);
+
+      SimpleCompressor<siteVector> compressor;
+      Stencil.HaloExchange(in,compressor);
+
+      auto point = [dir, disp](){
+        if(dir == 0 and disp == 0)
+          return 8;
+        else
+          return (4 * dir + 1 - disp) / 2;
+      }();
+
+      parallel_for(int ss=0;ss<Grid()->oSites();ss++){
+        siteVector res = zero;
+        siteVector nbr;
+        int ptype;
+        StencilEntry *SE;
+
+        SE=Stencil.GetEntry(ptype,point,ss);
+
+        if(SE->_is_local&&SE->_permute) {
+          permute(nbr,in._odata[SE->_offset],ptype);
+        } else if(SE->_is_local) {
+          nbr = in._odata[SE->_offset];
+        } else {
+          nbr = Stencil.CommBuf()[SE->_offset];
+        }
+
+        res = res + A[point]._odata[ss]*nbr;
+
+        vstream(out._odata[ss],res);
+      }
+    };
+
+    void Mdiag(const CoarseVector &in, CoarseVector &out) {
+      Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil
+    };
 
     CoarsenedMatrix(GridCartesian &CoarseGrid) 	: 
 

From a66cecc5090bd81529614da75ea71d62852188ed Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Fri, 9 Mar 2018 17:34:29 +0100
Subject: [PATCH 092/130] WilsonMG: Fix invalid call to MR ctor

---
 tests/solver/Test_wilson_mg.cc       | 2 +-
 tests/solver/Test_wilsonclover_mg.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
index 60bf6fec..1601d9ef 100644
--- a/tests/solver/Test_wilson_mg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@@ -712,7 +712,7 @@ int main(int argc, char **argv) {
 
     std::vector<std::unique_ptr<OperatorFunction<CoarseCoarseVector>>> dummyCoarseCoarseSolvers;
     dummyCoarseCoarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 8, false));
-    dummyCoarseCoarseSolvers.emplace_back(new MinimalResidual<CoarseCoarseVector>(5.0e-2, 100, false));
+    dummyCoarseCoarseSolvers.emplace_back(new MinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 0.8, false));
     dummyCoarseCoarseSolvers.emplace_back(new ConjugateGradient<CoarseCoarseVector>(5.0e-2, 100, false));
 
     // std::cout << GridLogMessage << "**************************************************" << std::endl;
diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 5b38ea61..db250464 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -718,7 +718,7 @@ int main(int argc, char **argv) {
 
     std::vector<std::unique_ptr<OperatorFunction<CoarseCoarseVector>>> dummyCoarseCoarseSolvers;
     dummyCoarseCoarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 8, false));
-    dummyCoarseCoarseSolvers.emplace_back(new MinimalResidual<CoarseCoarseVector>(5.0e-2, 100, false));
+    dummyCoarseCoarseSolvers.emplace_back(new MinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 0.8, false));
     dummyCoarseCoarseSolvers.emplace_back(new ConjugateGradient<CoarseCoarseVector>(5.0e-2, 100, false));
 
     // std::cout << GridLogMessage << "**************************************************" << std::endl;

From ee5cf6c8c58855ee1dc88591edb8a4b8c3ffee97 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Fri, 16 Mar 2018 13:02:57 +0100
Subject: [PATCH 093/130] WilsonMG: Some minor changes to GMRES implementations

---
 ...unicationAvoidingGeneralisedMinimalResidual.h | 16 +++++-----------
 ...unicationAvoidingGeneralisedMinimalResidual.h | 16 +++++-----------
 .../FlexibleGeneralisedMinimalResidual.h         | 16 +++++-----------
 .../iterative/GeneralisedMinimalResidual.h       | 16 +++++-----------
 4 files changed, 20 insertions(+), 44 deletions(-)

diff --git a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
index 4469c7ed..1f5d293a 100644
--- a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
@@ -41,6 +41,7 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
 
   Integer MaxIterations;
   Integer RestartLength;
+  Integer MaxNumberOfRestarts;
   Integer IterationCount; // Number of iterations the CAGMRES took to finish,
                           // filled in upon completion
 
@@ -63,6 +64,7 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
       : Tolerance(tol)
       , MaxIterations(maxit)
       , RestartLength(restart_length)
+      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
       , ErrorOnNoConverge(err_on_no_conv)
       , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
       , y(RestartLength + 1, 0.)
@@ -99,9 +101,8 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
     SolverTimer.Start();
 
     IterationCount = 0;
-    auto outerLoopMax = MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1);
 
-    for (int k=0; k<outerLoopMax; k++) {
+    for (int k=0; k<MaxNumberOfRestarts; k++) {
 
       cp = outerLoopBody(LinOp, src, psi, rsq);
 
@@ -233,15 +234,8 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
       y[i] = y[i] / H(i, i);
     }
 
-    if (true) {
-      for (int i = 0; i <= iter; i++)
-        psi = psi + v[i] * y[i];
-    }
-    else {
-      psi = y[0] * v[0];
-      for (int i = 1; i <= iter; i++)
-        psi = psi + v[i] * y[i];
-    }
+    for (int i = 0; i <= iter; i++)
+      psi = psi + v[i] * y[i];
     CompSolutionTimer.Stop();
   }
 };
diff --git a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
index dc4885d6..b992f760 100644
--- a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
@@ -41,6 +41,7 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
 
   Integer MaxIterations;
   Integer RestartLength;
+  Integer MaxNumberOfRestarts;
   Integer IterationCount; // Number of iterations the FCAGMRES took to finish,
                           // filled in upon completion
 
@@ -67,6 +68,7 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
       : Tolerance(tol)
       , MaxIterations(maxit)
       , RestartLength(restart_length)
+      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
       , ErrorOnNoConverge(err_on_no_conv)
       , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
       , y(RestartLength + 1, 0.)
@@ -105,9 +107,8 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
     SolverTimer.Start();
 
     IterationCount = 0;
-    auto outerLoopMax = MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1);
 
-    for (int k=0; k<outerLoopMax; k++) {
+    for (int k=0; k<MaxNumberOfRestarts; k++) {
 
       cp = outerLoopBody(LinOp, src, psi, rsq);
 
@@ -245,15 +246,8 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
       y[i] = y[i] / H(i, i);
     }
 
-    if (true) {
-      for (int i = 0; i <= iter; i++)
-        psi = psi + z[i] * y[i];
-    }
-    else {
-      psi = y[0] * z[0];
-      for (int i = 1; i <= iter; i++)
-        psi = psi + z[i] * y[i];
-    }
+    for (int i = 0; i <= iter; i++)
+      psi = psi + z[i] * y[i];
     CompSolutionTimer.Stop();
   }
 };
diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
index b05ac069..bc5184d4 100644
--- a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -41,6 +41,7 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
 
   Integer MaxIterations;
   Integer RestartLength;
+  Integer MaxNumberOfRestarts;
   Integer IterationCount; // Number of iterations the FGMRES took to finish,
                           // filled in upon completion
 
@@ -67,6 +68,7 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
       : Tolerance(tol)
       , MaxIterations(maxit)
       , RestartLength(restart_length)
+      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
       , ErrorOnNoConverge(err_on_no_conv)
       , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
       , y(RestartLength + 1, 0.)
@@ -103,9 +105,8 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
     SolverTimer.Start();
 
     IterationCount = 0;
-    auto outerLoopMax = MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1);
 
-    for (int k=0; k<outerLoopMax; k++) {
+    for (int k=0; k<MaxNumberOfRestarts; k++) {
 
       cp = outerLoopBody(LinOp, src, psi, rsq);
 
@@ -243,15 +244,8 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
       y[i] = y[i] / H(i, i);
     }
 
-    if (true) {
-      for (int i = 0; i <= iter; i++)
-        psi = psi + z[i] * y[i];
-    }
-    else {
-      psi = y[0] * z[0];
-      for (int i = 1; i <= iter; i++)
-        psi = psi + z[i] * y[i];
-    }
+    for (int i = 0; i <= iter; i++)
+      psi = psi + z[i] * y[i];
     CompSolutionTimer.Stop();
   }
 };
diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index e9d54a53..eaa43563 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -41,6 +41,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
 
   Integer MaxIterations;
   Integer RestartLength;
+  Integer MaxNumberOfRestarts;
   Integer IterationCount; // Number of iterations the GMRES took to finish,
                           // filled in upon completion
 
@@ -63,6 +64,7 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
       : Tolerance(tol)
       , MaxIterations(maxit)
       , RestartLength(restart_length)
+      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
       , ErrorOnNoConverge(err_on_no_conv)
       , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
       , y(RestartLength + 1, 0.)
@@ -97,9 +99,8 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     SolverTimer.Start();
 
     IterationCount = 0;
-    auto outerLoopMax = MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1);
 
-    for (int k=0; k<outerLoopMax; k++) {
+    for (int k=0; k<MaxNumberOfRestarts; k++) {
 
       cp = outerLoopBody(LinOp, src, psi, rsq);
 
@@ -231,15 +232,8 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
       y[i] = y[i] / H(i, i);
     }
 
-    if (true) {
-      for (int i = 0; i <= iter; i++)
-        psi = psi + v[i] * y[i];
-    }
-    else {
-      psi = y[0] * v[0];
-      for (int i = 1; i <= iter; i++)
-        psi = psi + v[i] * y[i];
-    }
+    for (int i = 0; i <= iter; i++)
+      psi = psi + v[i] * y[i];
     CompSolutionTimer.Stop();
   }
 };

From edbc0d49d775b330901c719cea46cb17a6c68648 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 19 Mar 2018 18:30:24 +0100
Subject: [PATCH 094/130] WilsonMG: Get rid of explicit GridTypeMappers in
 CoarsenedMatrix

---
 lib/algorithms/CoarsenedMatrix.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h
index 26ff2ede..fdf0abce 100644
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -94,7 +94,7 @@ namespace Grid {
   class Aggregation   {
   public:
 
-    typedef typename GridTypeMapper<CComplex>::vector_type     innerType;
+    typedef typename CComplex::vector_type                     innerType;
     typedef iScalar<iScalar<iScalar<innerType         >    > > siteScalar; // used for inner products on fine field
     typedef iScalar<iVector<iVector<innerType, nbasis >, 1 > > siteVector;
     typedef iScalar<iMatrix<iMatrix<innerType, nbasis >, 1 > > siteMatrix;
@@ -243,10 +243,10 @@ namespace Grid {
   // Fine Object == (per site) type of fine field
   // nbasis      == number of deflation vectors
   template<class Fobj,class CComplex,int nbasis>
-  class CoarsenedMatrix : public SparseMatrixBase<Lattice<iScalar<iVector<iVector<typename GridTypeMapper<CComplex>::vector_type, nbasis >, 1 > > > >  {
+  class CoarsenedMatrix : public SparseMatrixBase<Lattice<iScalar<iVector<iVector<typename CComplex::vector_type, nbasis >, 1 > > > >  {
   public:
-    
-    typedef typename GridTypeMapper<CComplex>::vector_type     innerType;
+
+    typedef typename CComplex::vector_type                     innerType;
     typedef iScalar<iScalar<iScalar<innerType         >    > > siteScalar;
     typedef iScalar<iVector<iVector<innerType, nbasis >, 1 > > siteVector;
     typedef iScalar<iMatrix<iMatrix<innerType, nbasis >, 1 > > siteMatrix;

From 1cfed3de7cde8826d680409d1c95eaca82e158ed Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Fri, 23 Mar 2018 19:41:02 +0100
Subject: [PATCH 095/130] WilsonMG: Add new logger for MG

---
 lib/log/Log.cc | 1 +
 lib/log/Log.h  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/lib/log/Log.cc b/lib/log/Log.cc
index bc46893f..c3045a28 100644
--- a/lib/log/Log.cc
+++ b/lib/log/Log.cc
@@ -59,6 +59,7 @@ void GridLogTimestamp(int on){
 }
 
 Colours GridLogColours(0);
+GridLogger GridLogMG     (1, "MG"    , GridLogColours, "NORMAL");
 GridLogger GridLogIRL    (1, "IRL"   , GridLogColours, "NORMAL");
 GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL");
 GridLogger GridLogError  (1, "Error" , GridLogColours, "RED");
diff --git a/lib/log/Log.h b/lib/log/Log.h
index 011a7250..84e25d26 100644
--- a/lib/log/Log.h
+++ b/lib/log/Log.h
@@ -161,6 +161,7 @@ public:
 
 void GridLogConfigure(std::vector<std::string> &logstreams);
 
+extern GridLogger GridLogMG;
 extern GridLogger GridLogIRL;
 extern GridLogger GridLogSolver;
 extern GridLogger GridLogError;

From 0f6009a29f4b6c7d0eba041da79664321752ed5c Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 19 Mar 2018 10:59:42 +0100
Subject: [PATCH 096/130] WilsonMG: Huge refactor into something that could be
 considered an algorithm

---
 tests/solver/Test_wilson_mg.cc | 808 ++++++++++++++-------------------
 1 file changed, 330 insertions(+), 478 deletions(-)

diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
index 1601d9ef..bbbfe1e0 100644
--- a/tests/solver/Test_wilson_mg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@@ -69,55 +69,67 @@ public:
   }
 };
 
-class myclass : Serializable {
+// clang-format off
+struct MultigridParams : Serializable {
 public:
-  // clang-format off
-  GRID_SERIALIZABLE_CLASS_MEMBERS(myclass,
-                                  int, domaindecompose,
-                                  int, domainsize,
-                                  int, coarsegrids,
-                                  int, order,
-                                  int, Ls,
-                                  double, mq,
-                                  double, lo,
-                                  double, hi,
-                                  int, steps);
-  // clang-format on
-  myclass(){};
+  GRID_SERIALIZABLE_CLASS_MEMBERS(MultigridParams,
+                                  int, nLevels,
+                                  std::vector<std::vector<int>>, blockSizes);
+  MultigridParams(){};
 };
-myclass params;
+MultigridParams mgParams;
+// clang-format on
 
-template<int nbasis> struct CoarseGrids {
+struct LevelInfo {
 public:
-  std::vector<std::vector<int>> LattSizes;
   std::vector<std::vector<int>> Seeds;
   std::vector<GridCartesian *>  Grids;
   std::vector<GridParallelRNG>  PRNGs;
 
-  CoarseGrids(std::vector<std::vector<int>> const &blockSizes, int coarsegrids) {
+  LevelInfo(GridCartesian *FineGrid, MultigridParams const &Params) {
+    auto nCoarseLevels = Params.blockSizes.size();
 
-    assert(blockSizes.size() == coarsegrids);
+    assert(nCoarseLevels == Params.nLevels - 1);
 
-    std::cout << GridLogMessage << "Constructing " << coarsegrids << " CoarseGrids" << std::endl;
+    // set up values for finest grid
+    Grids.push_back(FineGrid);
+    Seeds.push_back({1, 2, 3, 4});
+    PRNGs.push_back(GridParallelRNG(Grids.back()));
+    PRNGs.back().SeedFixedIntegers(Seeds.back());
 
-    for(int cl = 0; cl < coarsegrids; ++cl) { // may be a bit ugly and slow but not perf critical
-      // need to differentiate between first and other coarse levels in size calculation
-      LattSizes.push_back({cl == 0 ? GridDefaultLatt() : LattSizes[cl - 1]});
-      Seeds.push_back(std::vector<int>(LattSizes[cl].size()));
+    // set up values for coarser grids
+    for(int level = 1; level < Params.nLevels; ++level) {
+      auto Nd  = Grids[level - 1]->_ndimension;
+      auto tmp = Grids[level - 1]->_fdimensions;
+      assert(tmp.size() == Nd);
 
-      for(int d = 0; d < LattSizes[cl].size(); ++d) {
-        LattSizes[cl][d] = LattSizes[cl][d] / blockSizes[cl][d];
-        Seeds[cl][d]     = (cl + 1) * LattSizes[cl].size() + d + 1;
-        // calculation unimportant, just to get. e.g., {5, 6, 7, 8} for first coarse level and so on
+      Seeds.push_back(std::vector<int>(Nd));
+
+      for(int d = 0; d < Nd; ++d) {
+        tmp[d] /= Params.blockSizes[level - 1][d];
+        Seeds[level][d] = (level)*Nd + d + 1;
       }
 
-      Grids.push_back(SpaceTimeGrid::makeFourDimGrid(LattSizes[cl], GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()));
-      PRNGs.push_back(GridParallelRNG(Grids[cl]));
+      Grids.push_back(SpaceTimeGrid::makeFourDimGrid(tmp, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()));
+      PRNGs.push_back(GridParallelRNG(Grids[level]));
 
-      PRNGs[cl].SeedFixedIntegers(Seeds[cl]);
+      PRNGs[level].SeedFixedIntegers(Seeds[level]);
+    }
 
-      std::cout << GridLogMessage << "cl = " << cl << ": LattSize = " << LattSizes[cl] << std::endl;
-      std::cout << GridLogMessage << "cl = " << cl << ":    Seeds = " << Seeds[cl] << std::endl;
+    std::cout << GridLogMessage << "Constructed " << Params.nLevels << " levels" << std::endl;
+
+    // The construction above corresponds to the finest level having level == 0
+    // (simply because it's not as ugly to implement), but we need it the
+    // other way round (i.e., the coarsest level to have level == 0) for the MG
+    // Preconditioner -> reverse the vectors
+
+    std::reverse(Seeds.begin(), Seeds.end());
+    std::reverse(Grids.begin(), Grids.end());
+    std::reverse(PRNGs.begin(), PRNGs.end());
+
+    for(int level = 0; level < Params.nLevels; ++level) {
+      std::cout << GridLogMessage << "level = " << level << ":" << std::endl;
+      Grids[level]->show_decomposition();
     }
   }
 };
@@ -221,116 +233,177 @@ template<class Field> void testLinearOperator(LinearOperatorBase<Field> &LinOp,
   }
 }
 
-// template < class Fobj, class CComplex, int coarseSpins, int nbasis, class Matrix >
-// class MultiGridPreconditioner : public LinearFunction< Lattice< Fobj > > {
-template<class Fobj, class CComplex, int nbasis, class Matrix> class MultiGridPreconditioner : public LinearFunction<Lattice<Fobj>> {
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nBasis, int level, class Matrix>
+class MultiGridPreconditioner : public LinearFunction<Lattice<Fobj>> {
 public:
-  typedef Aggregation<Fobj, CComplex, nbasis>     Aggregates;
-  typedef CoarsenedMatrix<Fobj, CComplex, nbasis> CoarseOperator;
+  /////////////////////////////////////////////
+  // Type Definitions
+  /////////////////////////////////////////////
 
-  typedef typename Aggregation<Fobj, CComplex, nbasis>::siteVector   siteVector;
-  typedef typename Aggregation<Fobj, CComplex, nbasis>::CoarseScalar CoarseScalar;
-  typedef typename Aggregation<Fobj, CComplex, nbasis>::CoarseVector CoarseVector;
-  typedef typename Aggregation<Fobj, CComplex, nbasis>::CoarseMatrix CoarseMatrix;
-  typedef typename Aggregation<Fobj, CComplex, nbasis>::FineField    FineField;
-  typedef LinearOperatorBase<FineField>                              FineOperator;
+  typedef Aggregation<Fobj, CoarseScalar, nBasis>                                                                Aggregates;
+  typedef CoarsenedMatrix<Fobj, CoarseScalar, nBasis>                                                            CoarseMatrix;
+  typedef typename Aggregates::CoarseVector                                                                      CoarseVector;
+  typedef typename Aggregates::siteVector                                                                        CoarseSiteVector;
+  typedef Matrix                                                                                                 FineMatrix;
+  typedef typename Aggregates::FineField                                                                         FineVector;
+  typedef MultiGridPreconditioner<CoarseSiteVector, CoarseScalar, nCoarseSpins, nBasis, level - 1, CoarseMatrix> NextPreconditionerLevel;
 
-  Aggregates &    _Aggregates;
-  CoarseOperator &_CoarseOperator;
-  Matrix &        _FineMatrix;
-  FineOperator &  _FineOperator;
-  Matrix &        _SmootherMatrix;
-  FineOperator &  _SmootherOperator;
+  /////////////////////////////////////////////
+  // Member Data
+  /////////////////////////////////////////////
 
-  // Constructor
-  MultiGridPreconditioner(Aggregates &    Agg,
-                          CoarseOperator &Coarse,
-                          FineOperator &  Fine,
-                          Matrix &        FineMatrix,
-                          FineOperator &  Smooth,
-                          Matrix &        SmootherMatrix)
-    : _Aggregates(Agg)
-    , _CoarseOperator(Coarse)
-    , _FineOperator(Fine)
-    , _FineMatrix(FineMatrix)
-    , _SmootherOperator(Smooth)
-    , _SmootherMatrix(SmootherMatrix) {}
+  LevelInfo &                              _LevelInfo;
+  FineMatrix &                             _FineMatrix;
+  FineMatrix &                             _SmootherMatrix;
+  Aggregates                               _Aggregates;
+  CoarseMatrix                             _CoarseMatrix;
+  std::unique_ptr<NextPreconditionerLevel> _NextPreconditionerLevel;
 
-  void operator()(const FineField &in, FineField &out) {
+  /////////////////////////////////////////////
+  // Member Functions
+  /////////////////////////////////////////////
 
-    CoarseVector coarseSrc(_CoarseOperator.Grid());
-    CoarseVector coarseTmp(_CoarseOperator.Grid());
-    CoarseVector coarseSol(_CoarseOperator.Grid());
-    coarseSol = zero;
-
-    GeneralisedMinimalResidual<CoarseVector> coarseGMRES(5.0e-2, 100, 25, false);
-    GeneralisedMinimalResidual<FineField>    fineGMRES(5.0e-2, 100, 25, false);
-
-    HermitianLinearOperator<CoarseOperator, CoarseVector> coarseHermOp(_CoarseOperator);
-    MdagMLinearOperator<CoarseOperator, CoarseVector>     coarseMdagMOp(_CoarseOperator);
-    MdagMLinearOperator<Matrix, FineField>                fineMdagMOp(_SmootherMatrix);
-
-    FineField fineTmp1(in._grid);
-    FineField fineTmp2(in._grid);
-
-    RealD Ni = norm2(in);
-
-    // no pre smoothing for now
-    auto  preSmootherNorm     = 0;
-    auto  preSmootherResidual = 0;
-    RealD r;
-
-    // Project to coarse grid, solve, project back to fine grid
-    _Aggregates.ProjectToSubspace(coarseSrc, in);
-    coarseGMRES(coarseMdagMOp, coarseSrc, coarseSol);
-    _Aggregates.PromoteFromSubspace(coarseSol, out);
-
-    // Recompute error
-    _FineOperator.Op(out, fineTmp1);
-    fineTmp1            = in - fineTmp1;
-    r                   = norm2(fineTmp1);
-    auto coarseResidual = std::sqrt(r / Ni);
-
-    // Apply smoother, use GMRES for the moment
-    fineGMRES(fineMdagMOp, in, out);
-
-    // Recompute error
-    _FineOperator.Op(out, fineTmp1);
-    fineTmp1                  = in - fineTmp1;
-    r                         = norm2(fineTmp1);
-    auto postSmootherResidual = std::sqrt(r / Ni);
-
-    std::cout << GridLogIterative << "Input norm = " << Ni << " Pre-Smoother norm " << preSmootherNorm
-              << " Pre-Smoother residual = " << preSmootherResidual << " Coarse residual = " << coarseResidual
-              << " Post-Smoother residual = " << postSmootherResidual << std::endl;
+  MultiGridPreconditioner(LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
+    : _LevelInfo(LvlInfo)
+    , _FineMatrix(FineMat)
+    , _SmootherMatrix(SmootherMat)
+    , _Aggregates(_LevelInfo.Grids[level - 1], _LevelInfo.Grids[level], 0)
+    , _CoarseMatrix(*_LevelInfo.Grids[level - 1]) {
+    _NextPreconditionerLevel = std::unique_ptr<NextPreconditionerLevel>(
+      new NextPreconditionerLevel(_LevelInfo, _CoarseMatrix, _CoarseMatrix));
   }
 
-  void runChecks(CoarseGrids<nbasis> &cGrids, int whichCoarseGrid) {
+  void setup() {
 
-    /////////////////////////////////////////////
-    // Some stuff we need for the checks below //
-    /////////////////////////////////////////////
-    auto tolerance = 1e-13; // TODO: this obviously depends on the precision we use, current value is for double
+    Gamma                                       g5(Gamma::Algebra::Gamma5);
+    MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
 
-    std::vector<CoarseVector> cTmps(4, _CoarseOperator.Grid());
-    std::vector<FineField>    fTmps(2, _Aggregates.subspace[0]._grid); // atm only for one coarser grid
+    _Aggregates.CreateSubspace(_LevelInfo.PRNGs[level], fineMdagMOp /*, nb */); // NOTE: Don't specify nb to see the orthogonalization check
 
-    // need to construct an operator, since _CoarseOperator is not a LinearOperator but only a matrix (the name is a bit misleading)
-    MdagMLinearOperator<CoarseOperator, CoarseVector> MdagMOp(_CoarseOperator);
+    // TestVectorAnalyzer<FineVector, nbasis> fineTVA;
+    // fineTVA(fineMdagMOp, _Aggregates.subspace);
 
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-    std::cout << GridLogMessage << "MG correctness check: 0 == (1 - P R) v" << std::endl;
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    static_assert((nBasis & 0x1) == 0, "MG Preconditioner only supports an even number of basis vectors");
+    int nb = nBasis / 2;
+
+    for(
+      int n = 0; n < nb;
+      n++) { // TODO: to get this to work for more than two levels, I would need to either implement coarse spins or have a template specialization of this class also for the finest level
+      _Aggregates.subspace[n + nb] = g5 * _Aggregates.subspace[n];
+    }
+
+    _CoarseMatrix.CoarsenOperator(_LevelInfo.Grids[level], fineMdagMOp, _Aggregates);
+
+    _NextPreconditionerLevel->setup();
+  }
+
+  virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
+
+    // TODO: implement a W-cycle and a toggle to switch between the cycling strategies
+    vCycle(in, out);
+    // kCycle(in, out);
+  }
+
+  void vCycle(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
+
+    RealD inputNorm = norm2(in);
+
+    CoarseVector coarseSrc(_LevelInfo.Grids[level - 1]);
+    CoarseVector coarseSol(_LevelInfo.Grids[level - 1]);
+    coarseSol = zero;
+
+    FineVector fineTmp(in._grid);
+
+    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
+    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(1.0e-14, 1, fineTrivialPreconditioner, 1, false);
+
+    MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<FineMatrix, FineVector> fineSmootherMdagMOp(_SmootherMatrix);
+
+    _Aggregates.ProjectToSubspace(coarseSrc, in);
+    (*_NextPreconditionerLevel)(coarseSrc, coarseSol);
+    _Aggregates.PromoteFromSubspace(coarseSol, out);
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                                = in - fineTmp;
+    auto r                                 = norm2(fineTmp);
+    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
+
+    fineFGMRES(fineSmootherMdagMOp, in, out);
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                        = in - fineTmp;
+    r                              = norm2(fineTmp);
+    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
+
+    std::cout << GridLogMG << " Level " << level << ": Input norm = " << std::sqrt(inputNorm)
+              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
+              << std::endl;
+  }
+
+  void kCycle(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
+
+    RealD inputNorm = norm2(in);
+
+    CoarseVector coarseSrc(_LevelInfo.Grids[level - 1]);
+    CoarseVector coarseSol(_LevelInfo.Grids[level - 1]);
+    coarseSol = zero;
+
+    FineVector fineTmp(in._grid);
+
+    TrivialPrecon<FineVector>                        fineTrivialPreconditioner;
+    FlexibleGeneralisedMinimalResidual<FineVector>   fineFGMRES(1.0e-14, 1, fineTrivialPreconditioner, 1, false);
+    FlexibleGeneralisedMinimalResidual<CoarseVector> coarseFGMRES(1.0e-14, 1, *_NextPreconditionerLevel, 1, false);
+
+    MdagMLinearOperator<FineMatrix, FineVector>     fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<FineMatrix, FineVector>     fineSmootherMdagMOp(_SmootherMatrix);
+    MdagMLinearOperator<CoarseMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
+
+    _Aggregates.ProjectToSubspace(coarseSrc, in);
+    coarseFGMRES(coarseMdagMOp, coarseSrc, coarseSol);
+    _Aggregates.PromoteFromSubspace(coarseSol, out);
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                                = in - fineTmp;
+    auto r                                 = norm2(fineTmp);
+    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
+
+    fineFGMRES(fineSmootherMdagMOp, in, out);
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                        = in - fineTmp;
+    r                              = norm2(fineTmp);
+    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
+
+    std::cout << GridLogMG << " Level " << level << ": Input norm = " << std::sqrt(inputNorm)
+              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
+              << std::endl;
+  }
+
+  void runChecks() {
+
+    auto tolerance   = 1e-13; // TODO: this obviously depends on the precision we use, current value is for double
+    auto coarseLevel = level - 1;
+
+    std::vector<FineVector>   fineTmps(2, _LevelInfo.Grids[level]);
+    std::vector<CoarseVector> coarseTmps(4, _LevelInfo.Grids[level - 1]);
+
+    MdagMLinearOperator<FineMatrix, FineVector>     fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<CoarseMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
+
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == (1 - P R) v" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
 
     for(auto i = 0; i < _Aggregates.subspace.size(); ++i) {
-      _Aggregates.ProjectToSubspace(cTmps[0], _Aggregates.subspace[i]); //   R v_i
-      _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]);              // P R v_i
+      _Aggregates.ProjectToSubspace(coarseTmps[0], _Aggregates.subspace[i]); //   R v_i
+      _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]);           // P R v_i
 
-      fTmps[1]       = _Aggregates.subspace[i] - fTmps[0]; // v_i - P R v_i
-      auto deviation = std::sqrt(norm2(fTmps[1]) / norm2(_Aggregates.subspace[i]));
+      fineTmps[1]    = _Aggregates.subspace[i] - fineTmps[0]; // v_i - P R v_i
+      auto deviation = std::sqrt(norm2(fineTmps[1]) / norm2(_Aggregates.subspace[i]));
 
-      std::cout << GridLogMessage << "Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
-                << " | norm2(R v_i) = " << norm2(cTmps[0]) << " | norm2(P R v_i) = " << norm2(fTmps[0])
+      std::cout << GridLogMG << " Level " << level << ": Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
+                << " | norm2(R v_i) = " << norm2(coarseTmps[0]) << " | norm2(P R v_i) = " << norm2(fineTmps[0])
                 << " | relative deviation = " << deviation;
 
       if(deviation > tolerance) {
@@ -341,44 +414,20 @@ public:
       }
     }
 
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-    std::cout << GridLogMessage << "MG correctness check: 0 == (1 - R P) v_c" << std::endl;
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == (1 - R P) v_c" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
 
-    random(cGrids.PRNGs[whichCoarseGrid], cTmps[0]);
+    random(_LevelInfo.PRNGs[coarseLevel], coarseTmps[0]);
 
-    _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]); //   P v_c
-    _Aggregates.ProjectToSubspace(cTmps[1], fTmps[0]);   // R P v_c
+    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //   P v_c
+    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[0]);   // R P v_c
 
-    cTmps[2]       = cTmps[0] - cTmps[1]; // v_c - R P v_c
-    auto deviation = std::sqrt(norm2(cTmps[2]) / norm2(cTmps[0]));
+    coarseTmps[2]  = coarseTmps[0] - coarseTmps[1]; // v_c - R P v_c
+    auto deviation = std::sqrt(norm2(coarseTmps[2]) / norm2(coarseTmps[0]));
 
-    std::cout << GridLogMessage << "norm2(v_c) = " << norm2(cTmps[0]) << " | norm2(R P v_c) = " << norm2(cTmps[1])
-              << " | norm2(P v_c) = " << norm2(fTmps[0]) << " | relative deviation = " << deviation;
-
-    if(deviation > tolerance) {
-      std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
-    } else {
-      std::cout << " < " << tolerance << " -> check passed" << std::endl;
-    }
-
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-    std::cout << GridLogMessage << "MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    random(cGrids.PRNGs[whichCoarseGrid], cTmps[0]);
-
-    _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]); //     P v_c
-    _FineOperator.Op(fTmps[0], fTmps[1]);                //   D P v_c
-    _Aggregates.ProjectToSubspace(cTmps[1], fTmps[1]);   // R D P v_c
-
-    MdagMOp.Op(cTmps[0], cTmps[2]); // D_c v_c
-
-    cTmps[3]  = cTmps[1] - cTmps[2]; // R D P v_c - D_c v_c
-    deviation = std::sqrt(norm2(cTmps[3]) / norm2(cTmps[1]));
-
-    std::cout << GridLogMessage << "norm2(R D P v_c) = " << norm2(cTmps[1]) << " | norm2(D_c v_c) = " << norm2(cTmps[2])
+    std::cout << GridLogMG << " Level " << level << ": norm2(v_c) = " << norm2(coarseTmps[0])
+              << " | norm2(R P v_c) = " << norm2(coarseTmps[1]) << " | norm2(P v_c) = " << norm2(fineTmps[0])
               << " | relative deviation = " << deviation;
 
     if(deviation > tolerance) {
@@ -388,54 +437,115 @@ public:
       std::cout << " < " << tolerance << " -> check passed" << std::endl;
     }
 
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-    std::cout << GridLogMessage << "MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
 
-    random(cGrids.PRNGs[whichCoarseGrid], cTmps[0]);
+    random(_LevelInfo.PRNGs[coarseLevel], coarseTmps[0]);
 
-    MdagMOp.Op(cTmps[0], cTmps[1]);    //         D_c v_c
-    MdagMOp.AdjOp(cTmps[1], cTmps[2]); // D_c^dag D_c v_c
+    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //     P v_c
+    fineMdagMOp.Op(fineTmps[0], fineTmps[1]);                    //   D P v_c
+    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[1]);   // R D P v_c
 
-    auto dot  = innerProduct(cTmps[0], cTmps[2]); //v_c^dag D_c^dag D_c v_c
+    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[2]); // D_c v_c
+
+    coarseTmps[3] = coarseTmps[1] - coarseTmps[2]; // R D P v_c - D_c v_c
+    deviation     = std::sqrt(norm2(coarseTmps[3]) / norm2(coarseTmps[1]));
+
+    std::cout << GridLogMG << " Level " << level << ": norm2(R D P v_c) = " << norm2(coarseTmps[1])
+              << " | norm2(D_c v_c) = " << norm2(coarseTmps[2]) << " | relative deviation = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+
+    random(_LevelInfo.PRNGs[coarseLevel], coarseTmps[0]);
+
+    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[1]);    //         D_c v_c
+    coarseMdagMOp.AdjOp(coarseTmps[1], coarseTmps[2]); // D_c^dag D_c v_c
+
+    auto dot  = innerProduct(coarseTmps[0], coarseTmps[2]); //v_c^dag D_c^dag D_c v_c
     deviation = abs(imag(dot)) / abs(real(dot));
 
-    std::cout << GridLogMessage << "Re(v_c^dag D_c^dag D_c v_c) = " << real(dot) << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot)
-              << " | relative deviation = " << deviation;
+    std::cout << GridLogMG << " Level " << level << ": Re(v_c^dag D_c^dag D_c v_c) = " << real(dot)
+              << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot) << " | relative deviation = " << deviation;
 
     if(deviation > tolerance) {
       std::cout << " > " << tolerance << " -> check failed" << std::endl;
       // abort();
     } else {
-      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+      std::cout << " < " << tolerance << " -> check passed"
+                << std::endl; // TODO: this check will work only when I got Mdag in CoarsenedMatrix to work
     }
+
+    _NextPreconditionerLevel->runChecks();
   }
 };
 
+// Specialize the coarsest level, this corresponds to counting downwards with level: coarsest = 0, finest = N
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
+class MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 0, Matrix> : public LinearFunction<Lattice<Fobj>> {
+public:
+  /////////////////////////////////////////////
+  // Type Definitions
+  /////////////////////////////////////////////
+
+  typedef Matrix        FineMatrix;
+  typedef Lattice<Fobj> FineVector;
+
+  /////////////////////////////////////////////
+  // Member Data
+  /////////////////////////////////////////////
+
+  LevelInfo & _LevelInfo;
+  FineMatrix &_FineMatrix;
+  FineMatrix &_SmootherMatrix;
+
+  /////////////////////////////////////////////
+  // Member Functions
+  /////////////////////////////////////////////
+
+  MultiGridPreconditioner(LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
+    : _LevelInfo(LvlInfo), _FineMatrix(FineMat), _SmootherMatrix(SmootherMat) {}
+
+  void setup() {}
+
+  virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
+
+    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
+    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(1.0e-14, 1, fineTrivialPreconditioner, 1, false);
+
+    MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
+
+    fineFGMRES(fineMdagMOp, in, out);
+  }
+
+  void runChecks() {}
+};
+
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
+using FourLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 4 - 1, Matrix>;
+
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
+using ThreeLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 3 - 1, Matrix>;
+
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
+using TwoLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 2 - 1, Matrix>;
+
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, int nlevel, class Matrix>
+using NLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, nlevel - 1, Matrix>;
+
 int main(int argc, char **argv) {
 
   Grid_init(&argc, &argv);
 
-  params.domainsize      = 1;
-  params.coarsegrids     = 1;
-  params.domaindecompose = 0;
-  params.order           = 30;
-  params.Ls              = 1;
-  params.mq              = -0.5;
-  params.lo              = 0.5;
-  params.hi              = 70.0;
-  params.steps           = 1;
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Params: " << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  std::cout << params << std::endl;
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Set up some fine level stuff: " << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
   GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
   GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
 
@@ -451,312 +561,54 @@ int main(int argc, char **argv) {
   LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
   // clang-format on
 
-  RealD mass = params.mq;
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Set up some coarser levels stuff: " << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  const int nbasis = 20; // fix the number of test vector to the same
-                         // number on every level for now
-
-  //////////////////////////////////////////
-  // toggle to run two/three level method
-  //////////////////////////////////////////
-
-  // two-level algorithm
-  std::vector<std::vector<int>> blockSizes({{2, 2, 2, 2}});
-  CoarseGrids<nbasis>           coarseGrids(blockSizes, 1);
-
-  // // three-level algorithm
-  // std::vector<std::vector<int>> blockSizes({{2, 2, 2, 2}, {2, 2, 1, 1}});
-  // CoarseGrids<nbasis>           coarseGrids(blockSizes, 2);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Some typedefs" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  // typedefs for transition from fine to first coarsened grid
-  typedef vSpinColourVector                                                                 FineSiteVector;
-  typedef vTComplex                                                                         CoarseSiteScalar;
-  typedef Aggregation<FineSiteVector, CoarseSiteScalar, nbasis>                             Subspace;
-  typedef CoarsenedMatrix<FineSiteVector, CoarseSiteScalar, nbasis>                         CoarseOperator;
-  typedef CoarseOperator::CoarseVector                                                      CoarseVector;
-  typedef CoarseOperator::siteVector                                                        CoarseSiteVector;
-  typedef TestVectorAnalyzer<LatticeFermion, nbasis>                                        FineTVA;
-  typedef MultiGridPreconditioner<FineSiteVector, CoarseSiteScalar, nbasis, WilsonFermionR> FineMGPreconditioner;
-  typedef TrivialPrecon<LatticeFermion>                                                     FineTrivialPreconditioner;
-
-  // typedefs for transition from a coarse to the next coarser grid (some defs remain the same)
-  typedef Aggregation<CoarseSiteVector, CoarseSiteScalar, nbasis>                             SubSubSpace;
-  typedef CoarsenedMatrix<CoarseSiteVector, CoarseSiteScalar, nbasis>                         CoarseCoarseOperator;
-  typedef CoarseCoarseOperator::CoarseVector                                                  CoarseCoarseVector;
-  typedef CoarseCoarseOperator::siteVector                                                    CoarseCoarseSiteVector;
-  typedef TestVectorAnalyzer<CoarseVector, nbasis>                                            CoarseTVA;
-  typedef MultiGridPreconditioner<CoarseSiteVector, CoarseSiteScalar, nbasis, CoarseOperator> CoarseMGPreconditioner;
-  typedef TrivialPrecon<CoarseVector>                                                         CoarseTrivialPreconditioner;
-
-  static_assert(std::is_same<CoarseVector, CoarseCoarseVector>::value, "CoarseVector and CoarseCoarseVector must be of the same type");
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building the wilson operator on the fine grid" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  RealD     mass   = 0.5;
+  const int nbasis = 20;
 
   WilsonFermionR Dw(Umu, *FGrid, *FrbGrid, mass);
 
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Setting up linear operators" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  // mgParams.blockSizes = {{2, 2, 2, 2}, {2, 2, 1, 1}, {1, 1, 2, 1}};
+  // mgParams.blockSizes = {{2, 2, 2, 2}, {2, 2, 1, 1}};
+  mgParams.blockSizes = {{2, 2, 2, 2}};
+  mgParams.nLevels    = mgParams.blockSizes.size() + 1;
+
+  std::cout << mgParams << std::endl;
+
+  LevelInfo levelInfo(FGrid, mgParams);
 
   MdagMLinearOperator<WilsonFermionR, LatticeFermion> FineMdagMOp(Dw);
 
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Calling Aggregation class to build subspaces" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  TrivialPrecon<LatticeFermion>                                                     TrivialPrecon;
+  TwoLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> TwoLevelMGPrecon(levelInfo, Dw, Dw);
+  // ThreeLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> ThreeLevelMGPrecon(levelInfo, Dw, Dw);
+  // FourLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> FourLevelMGPrecon(levelInfo, Dw, Dw);
+  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 4, WilsonFermionR> FourLevelMGPrecon(levelInfo, Dw, Dw);
 
-  Subspace FineAggregates(coarseGrids.Grids[0], FGrid, 0);
+  TwoLevelMGPrecon.setup();
+  TwoLevelMGPrecon.runChecks();
 
-  assert((nbasis & 0x1) == 0);
-  int nb = nbasis / 2;
-  std::cout << GridLogMessage << " nbasis/2 = " << nb << std::endl;
+  // ThreeLevelMGPrecon.setup();
+  // ThreeLevelMGPrecon.runChecks();
 
-  FineAggregates.CreateSubspace(fPRNG, FineMdagMOp /*, nb */); // Don't specify nb to see the orthogonalization check
+  // FourLevelMGPrecon.setup();
+  // FourLevelMGPrecon.runChecks();
 
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Test vector analysis after initial creation of subspace" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  FineTVA fineTVA;
-  fineTVA(FineMdagMOp, FineAggregates.subspace);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Projecting subspace to definite chirality" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  for(int n = 0; n < nb; n++) {
-    FineAggregates.subspace[n + nb] = g5 * FineAggregates.subspace[n];
-  }
-
-  auto coarseSites = 1;
-  for(auto const &elem : coarseGrids.LattSizes[0]) coarseSites *= elem;
-
-  std::cout << GridLogMessage << "Norms of MG test vectors after chiral projection (coarse sites = " << coarseSites << ")" << std::endl;
-  for(int n = 0; n < nbasis; n++) {
-    std::cout << GridLogMessage << "vec[" << n << "] = " << norm2(FineAggregates.subspace[n]) << std::endl;
-  }
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building coarse representation of Dirac operator" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  CoarseOperator Dc(*coarseGrids.Grids[0]);
-
-  Dc.CoarsenOperator(FGrid, FineMdagMOp, FineAggregates);
-
-  MdagMLinearOperator<CoarseOperator, CoarseVector> CoarseMdagMOp(Dc);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Test vector analysis after construction of coarse Dirac operator" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  fineTVA(FineMdagMOp, FineAggregates.subspace);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Testing the linear operators" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  // clang-format off
-  testLinearOperator(FineMdagMOp,   FGrid,                "FineMdagMOp");   std::cout << GridLogMessage << std::endl;
-  testLinearOperator(CoarseMdagMOp, coarseGrids.Grids[0], "CoarseMdagMOp"); std::cout << GridLogMessage << std::endl;
-  // clang-format on
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building coarse vectors" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  CoarseVector coarseSource(coarseGrids.Grids[0]);
-  CoarseVector coarseResult(coarseGrids.Grids[0]);
-  gaussian(coarseGrids.PRNGs[0], coarseSource);
-  coarseResult = zero;
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building some coarse space solvers" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  std::vector<std::unique_ptr<OperatorFunction<CoarseVector>>> dummyCoarseSolvers;
-  dummyCoarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseVector>(5.0e-2, 100, 8, false));
-  dummyCoarseSolvers.emplace_back(new MinimalResidual<CoarseVector>(5.0e-2, 100, 0.8, false));
-  dummyCoarseSolvers.emplace_back(new ConjugateGradient<CoarseVector>(5.0e-2, 100, false));
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Testing some coarse space solvers" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  std::cout << GridLogMessage << "checking norm of coarse src " << norm2(coarseSource) << std::endl;
-
-  for(auto const &solver : dummyCoarseSolvers) {
-    coarseResult = zero;
-    (*solver)(CoarseMdagMOp, coarseSource, coarseResult);
-  }
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building a multigrid preconditioner" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  FineMGPreconditioner      FineMGPrecon(FineAggregates, Dc, FineMdagMOp, Dw, FineMdagMOp, Dw);
-  FineTrivialPreconditioner FineSimplePrecon;
-
-  FineMGPrecon.runChecks(coarseGrids, 0);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building krylov subspace solvers w/ & w/o MG Preconditioner" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  // NLevelMGPrecon.setup();
+  // NLevelMGPrecon.runChecks();
 
   std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solvers;
-  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 4000000, FineSimplePrecon, 25, false));
-  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 100, FineMGPrecon, 25, false));
-  solvers.emplace_back(new PrecGeneralisedConjugateResidual<LatticeFermion>(1.0e-12, 4000000, FineSimplePrecon, 25, 25));
 
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Testing the (un)?preconditioned solvers" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 1000, false));
+  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TwoLevelMGPrecon, 1000, false));
+  // solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, ThreeLevelMGPrecon, 1000, false));
+  // solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, FourLevelMGPrecon, 1000, false));
+  // solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, NLevelMGPrecon, 1000, false));
 
   for(auto const &solver : solvers) {
-    std::cout << GridLogMessage << "checking norm of fine src " << norm2(src) << std::endl;
+    std::cout << "Starting with a new solver" << std::endl;
     result = zero;
     (*solver)(FineMdagMOp, src, result);
     std::cout << std::endl;
   }
 
-#if 0
-  if(coarseGrids.LattSizes.size() == 2) {
-
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-    std::cout << GridLogMessage << "Some testing for construction of a second coarse level" << std::endl;
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Calling Aggregation class to build subspaces" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    SubSubSpace CoarseAggregates(coarseGrids.Grids[1], coarseGrids.Grids[0], 0);
-    CoarseAggregates.CreateSubspace(coarseGrids.PRNGs[0], CoarseMdagMOp);
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Test vector analysis after initial creation of subspace" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    // // this doesn't work because this function applies g5 to a vector, which
-    // // doesn't work for coarse vectors atm -> FIXME
-    // CoarseTVA coarseTVA;
-    // coarseTVA(CoarseMdagMOp, CoarseAggregates.subspace);
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Projecting subspace to definite chirality" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    // // cannot apply g5 to coarse vectors atm -> FIXME
-    // for(int n=0;n<nb;n++){
-    //   CoarseAggregates.subspace[n+nb] = g5 * CoarseAggregates.subspace[n];
-    //   std::cout<<GridLogMessage<<n<<" subspace "<<norm2(CoarseAggregates.subspace[n+nb])<<" "<<norm2(CoarseAggregates.subspace[n]) <<std::endl;
-    // }
-
-    auto coarseCoarseSites = 1;
-    for(auto const &elem : coarseGrids.LattSizes[1]) coarseCoarseSites *= elem;
-
-    std::cout << GridLogMessage << "Norms of MG test vectors after chiral projection (coarse coarse sites = " << coarseCoarseSites << ")"
-              << std::endl;
-    for(int n = 0; n < nbasis; n++) {
-      std::cout << GridLogMessage << "vec[" << n << "] = " << norm2(CoarseAggregates.subspace[n]) << std::endl;
-    }
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Building coarse coarse representation of Dirac operator" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    CoarseCoarseOperator Dcc(*coarseGrids.Grids[1]);
-
-    Dcc.CoarsenOperator(coarseGrids.Grids[0], CoarseMdagMOp, CoarseAggregates);
-
-    MdagMLinearOperator<CoarseCoarseOperator, CoarseCoarseVector> CoarseCoarseMdagMOp(Dcc);
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Test vector analysis after construction of coarse Dirac operator" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    // // this doesn't work because this function applies g5 to a vector, which
-    // // doesn't work for coarse vectors atm -> FIXME
-    // coarseTVA(CoarseMdagMOp, CoarseAggregates.subspace);
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Testing the linear operators" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    // clang-format off
-    testLinearOperator(CoarseMdagMOp,       coarseGrids.Grids[0], "CoarseMdagMOp");
-    testLinearOperator(CoarseCoarseMdagMOp, coarseGrids.Grids[1], "CoarseCoarseMdagMOp");
-    // clang-format on
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Building coarse coarse vectors" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    CoarseCoarseVector coarseCoarseSource(coarseGrids.Grids[1]);
-    CoarseCoarseVector coarseCoarseResult(coarseGrids.Grids[1]);
-    gaussian(coarseGrids.PRNGs[1], coarseCoarseSource);
-    coarseCoarseResult = zero;
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Building some coarse space solvers" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    std::vector<std::unique_ptr<OperatorFunction<CoarseCoarseVector>>> dummyCoarseCoarseSolvers;
-    dummyCoarseCoarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 8, false));
-    dummyCoarseCoarseSolvers.emplace_back(new MinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 0.8, false));
-    dummyCoarseCoarseSolvers.emplace_back(new ConjugateGradient<CoarseCoarseVector>(5.0e-2, 100, false));
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Testing some coarse coarse space solvers" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    std::cout << GridLogMessage << "checking norm of coarse coarse src " << norm2(coarseCoarseSource) << std::endl;
-
-    for(auto const &solver : dummyCoarseCoarseSolvers) {
-      coarseCoarseResult = zero;
-      (*solver)(CoarseCoarseMdagMOp, coarseCoarseSource, coarseCoarseResult);
-    }
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Building a multigrid preconditioner" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    CoarseMGPreconditioner      CoarseMGPrecon(CoarseAggregates, Dcc, CoarseMdagMOp, Dc, CoarseMdagMOp, Dc);
-    CoarseTrivialPreconditioner CoarseSimplePrecon;
-
-    CoarseMGPrecon.runChecks(coarseGrids, 1);
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Building krylov subspace solvers w/ & w/o MG Preconditioner" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    std::vector<std::unique_ptr<OperatorFunction<CoarseVector>>> solvers;
-    solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<CoarseVector>(1.0e-12, 4000000, CoarseSimplePrecon, 25, false));
-    solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<CoarseVector>(1.0e-12, 100, CoarseMGPrecon, 25, false));
-    solvers.emplace_back(new PrecGeneralisedConjugateResidual<CoarseVector>(1.0e-12, 4000000, CoarseSimplePrecon, 25, 25));
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Testing the (un)?preconditioned solvers" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    for(auto const &solver : solvers) {
-      std::cout << GridLogMessage << "checking norm of fine src " << norm2(coarseSource) << std::endl;
-      coarseResult = zero;
-      (*solver)(CoarseMdagMOp, coarseSource, coarseResult);
-      std::cout << std::endl;
-    }
-
-  }
-#endif
-
   Grid_finalize();
 }

From bbe1d5b49e45b162a35e2e5d14ba4f1a72001212 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Fri, 23 Mar 2018 20:00:41 +0100
Subject: [PATCH 097/130] WilsonMG: Temporarily use GMRES in construction of
 basis vectors

This can go back to CG once Mdag in CoarsenedMatrix works.
---
 lib/algorithms/CoarsenedMatrix.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h
index fdf0abce..15db315a 100644
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -209,7 +209,8 @@ namespace Grid {
 
       RealD scale;
 
-      ConjugateGradient<FineField> CG(1.0e-2,10000);
+      TrivialPrecon<FineField> TrivialPrec;
+      FlexibleGeneralisedMinimalResidual<FineField> FGMRES(1.0e-14,1,TrivialPrec,1,false); // TODO: need to use GMRES as long as Mdag doesn't work on coarser levels (i.e., MdagM isn't hermitian)
       FineField noise(FineGrid);
       FineField Mn(FineGrid);
 
@@ -223,7 +224,7 @@ namespace Grid {
 
 	for(int i=0;i<3;i++){
 
-	  CG(hermop,noise,subspace[b]);
+	  FGMRES(hermop,noise,subspace[b]);
 
 	  noise = subspace[b];
 	  scale = std::pow(norm2(noise),-0.5); 
@@ -302,7 +303,7 @@ namespace Grid {
       return norm2(out);
     };
 
-    RealD Mdag (const CoarseVector &in, CoarseVector &out){ 
+    RealD Mdag (const CoarseVector &in, CoarseVector &out){ // TODO: get this correct
       return M(in,out);
     };
 

From 3c3ec4e267bfa9e28259167f5c6f615b42304a96 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Fri, 23 Mar 2018 20:46:10 +0100
Subject: [PATCH 098/130] WilsonMG: Move tests for Wilson & WilsonClover into
 the same file

---
 tests/solver/Test_wilson_mg.cc       | 614 -------------------
 tests/solver/Test_wilsonclover_mg.cc | 848 ++++++++++++---------------
 2 files changed, 373 insertions(+), 1089 deletions(-)
 delete mode 100644 tests/solver/Test_wilson_mg.cc

diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
deleted file mode 100644
index bbbfe1e0..00000000
--- a/tests/solver/Test_wilson_mg.cc
+++ /dev/null
@@ -1,614 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./tests/solver/Test_wilson_mg.cc
-
-    Copyright (C) 2017
-
-    Author: Daniel Richtmann <daniel.richtmann@ur.de>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/Grid.h>
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
-
-using namespace std;
-using namespace Grid;
-using namespace Grid::QCD;
-
-template<class Field, int nbasis> class TestVectorAnalyzer {
-public:
-  void operator()(LinearOperatorBase<Field> &Linop, std::vector<Field> const &vectors, int nn = nbasis) {
-
-    auto positiveOnes = 0;
-
-    std::vector<Field> tmp(4, vectors[0]._grid);
-    Gamma              g5(Gamma::Algebra::Gamma5);
-
-    std::cout << GridLogMessage << "Test vector analysis:" << std::endl;
-
-    for(auto i = 0; i < nn; ++i) {
-
-      Linop.Op(vectors[i], tmp[3]);
-
-      tmp[0] = g5 * tmp[3];
-
-      auto lambda = innerProduct(vectors[i], tmp[0]) / innerProduct(vectors[i], vectors[i]);
-
-      tmp[1] = tmp[0] - lambda * vectors[i];
-
-      auto mu = ::sqrt(norm2(tmp[1]) / norm2(vectors[i]));
-
-      auto nrm = ::sqrt(norm2(vectors[i]));
-
-      if(real(lambda) > 0)
-        positiveOnes++;
-
-      std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << "vector " << i << ": "
-                << "singular value: " << lambda << ", singular vector precision: " << mu << ", norm: " << nrm << std::endl;
-    }
-    std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << positiveOnes << " out of "
-              << nn << " vectors were positive" << std::endl;
-  }
-};
-
-// clang-format off
-struct MultigridParams : Serializable {
-public:
-  GRID_SERIALIZABLE_CLASS_MEMBERS(MultigridParams,
-                                  int, nLevels,
-                                  std::vector<std::vector<int>>, blockSizes);
-  MultigridParams(){};
-};
-MultigridParams mgParams;
-// clang-format on
-
-struct LevelInfo {
-public:
-  std::vector<std::vector<int>> Seeds;
-  std::vector<GridCartesian *>  Grids;
-  std::vector<GridParallelRNG>  PRNGs;
-
-  LevelInfo(GridCartesian *FineGrid, MultigridParams const &Params) {
-    auto nCoarseLevels = Params.blockSizes.size();
-
-    assert(nCoarseLevels == Params.nLevels - 1);
-
-    // set up values for finest grid
-    Grids.push_back(FineGrid);
-    Seeds.push_back({1, 2, 3, 4});
-    PRNGs.push_back(GridParallelRNG(Grids.back()));
-    PRNGs.back().SeedFixedIntegers(Seeds.back());
-
-    // set up values for coarser grids
-    for(int level = 1; level < Params.nLevels; ++level) {
-      auto Nd  = Grids[level - 1]->_ndimension;
-      auto tmp = Grids[level - 1]->_fdimensions;
-      assert(tmp.size() == Nd);
-
-      Seeds.push_back(std::vector<int>(Nd));
-
-      for(int d = 0; d < Nd; ++d) {
-        tmp[d] /= Params.blockSizes[level - 1][d];
-        Seeds[level][d] = (level)*Nd + d + 1;
-      }
-
-      Grids.push_back(SpaceTimeGrid::makeFourDimGrid(tmp, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()));
-      PRNGs.push_back(GridParallelRNG(Grids[level]));
-
-      PRNGs[level].SeedFixedIntegers(Seeds[level]);
-    }
-
-    std::cout << GridLogMessage << "Constructed " << Params.nLevels << " levels" << std::endl;
-
-    // The construction above corresponds to the finest level having level == 0
-    // (simply because it's not as ugly to implement), but we need it the
-    // other way round (i.e., the coarsest level to have level == 0) for the MG
-    // Preconditioner -> reverse the vectors
-
-    std::reverse(Seeds.begin(), Seeds.end());
-    std::reverse(Grids.begin(), Grids.end());
-    std::reverse(PRNGs.begin(), PRNGs.end());
-
-    for(int level = 0; level < Params.nLevels; ++level) {
-      std::cout << GridLogMessage << "level = " << level << ":" << std::endl;
-      Grids[level]->show_decomposition();
-    }
-  }
-};
-
-template<class Field> void testLinearOperator(LinearOperatorBase<Field> &LinOp, GridBase *Grid, std::string const &name = "") {
-
-  std::vector<int> seeds({1, 2, 3, 4});
-  GridParallelRNG  RNG(Grid);
-  RNG.SeedFixedIntegers(seeds);
-
-  {
-    std::cout << GridLogMessage << "Testing that Mdiag + Σ_μ Mdir_μ == M for operator " << name << ":" << std::endl;
-
-    // clang-format off
-    Field src(Grid);    random(RNG, src);
-    Field ref(Grid);    ref    = zero;
-    Field result(Grid); result = zero;
-    Field diag(Grid);   diag   = zero;
-    Field sumDir(Grid); sumDir = zero;
-    Field tmp(Grid);
-    Field err(Grid);
-    // clang-format on
-
-    std::cout << setprecision(9);
-
-    std::cout << GridLogMessage << " norm2(src)\t\t\t\t= " << norm2(src) << std::endl;
-
-    LinOp.OpDiag(src, diag);
-    std::cout << GridLogMessage << " norm2(Mdiag * src)\t\t\t= " << norm2(diag) << std::endl;
-
-    for(int dir = 0; dir < 4; dir++) {
-      for(auto disp : {+1, -1}) {
-        LinOp.OpDir(src, tmp, dir, disp);
-        std::cout << GridLogMessage << " norm2(Mdir_{" << dir << "," << disp << "} * src)\t\t= " << norm2(tmp) << std::endl;
-        sumDir = sumDir + tmp;
-      }
-    }
-    std::cout << GridLogMessage << " norm2(Σ_μ Mdir_μ * src)\t\t= " << norm2(sumDir) << std::endl;
-
-    result = diag + sumDir;
-    std::cout << GridLogMessage << " norm2((Mdiag + Σ_μ Mdir_μ) * src)\t= " << norm2(result) << std::endl;
-
-    LinOp.Op(src, ref);
-    std::cout << GridLogMessage << " norm2(M * src)\t\t\t= " << norm2(ref) << std::endl;
-
-    err = ref - result;
-    std::cout << GridLogMessage << " Absolute deviation\t\t\t= " << norm2(err) << std::endl;
-    std::cout << GridLogMessage << " Relative deviation\t\t\t= " << norm2(err) / norm2(ref) << std::endl;
-  }
-
-  {
-    std::cout << GridLogMessage << "Testing hermiticity stochastically for operator " << name << ":" << std::endl;
-
-    // clang-format off
-    Field phi(Grid); random(RNG, phi);
-    Field chi(Grid); random(RNG, chi);
-    Field MPhi(Grid);
-    Field MdagChi(Grid);
-    // clang-format on
-
-    LinOp.Op(phi, MPhi);
-    LinOp.AdjOp(chi, MdagChi);
-
-    ComplexD chiMPhi    = innerProduct(chi, MPhi);
-    ComplexD phiMdagChi = innerProduct(phi, MdagChi);
-
-    ComplexD phiMPhi    = innerProduct(phi, MPhi);
-    ComplexD chiMdagChi = innerProduct(chi, MdagChi);
-
-    std::cout << GridLogMessage << " chiMPhi = " << chiMPhi << " phiMdagChi = " << phiMdagChi
-              << " difference = " << chiMPhi - conjugate(phiMdagChi) << std::endl;
-
-    std::cout << GridLogMessage << " phiMPhi = " << phiMPhi << " chiMdagChi = " << chiMdagChi << " <- should be real if hermitian"
-              << std::endl;
-  }
-
-  {
-    std::cout << GridLogMessage << "Testing linearity for operator " << name << ":" << std::endl;
-
-    // clang-format off
-    Field phi(Grid); random(RNG, phi);
-    Field chi(Grid); random(RNG, chi);
-    Field phiPlusChi(Grid);
-    Field MPhi(Grid);
-    Field MChi(Grid);
-    Field MPhiPlusChi(Grid);
-    Field linearityError(Grid);
-    // clang-format on
-
-    LinOp.Op(phi, MPhi);
-    LinOp.Op(chi, MChi);
-
-    phiPlusChi = phi + chi;
-
-    LinOp.Op(phiPlusChi, MPhiPlusChi);
-
-    linearityError = MPhiPlusChi - MPhi;
-    linearityError = linearityError - MChi;
-
-    std::cout << GridLogMessage << " norm2(linearityError) = " << norm2(linearityError) << std::endl;
-  }
-}
-
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nBasis, int level, class Matrix>
-class MultiGridPreconditioner : public LinearFunction<Lattice<Fobj>> {
-public:
-  /////////////////////////////////////////////
-  // Type Definitions
-  /////////////////////////////////////////////
-
-  typedef Aggregation<Fobj, CoarseScalar, nBasis>                                                                Aggregates;
-  typedef CoarsenedMatrix<Fobj, CoarseScalar, nBasis>                                                            CoarseMatrix;
-  typedef typename Aggregates::CoarseVector                                                                      CoarseVector;
-  typedef typename Aggregates::siteVector                                                                        CoarseSiteVector;
-  typedef Matrix                                                                                                 FineMatrix;
-  typedef typename Aggregates::FineField                                                                         FineVector;
-  typedef MultiGridPreconditioner<CoarseSiteVector, CoarseScalar, nCoarseSpins, nBasis, level - 1, CoarseMatrix> NextPreconditionerLevel;
-
-  /////////////////////////////////////////////
-  // Member Data
-  /////////////////////////////////////////////
-
-  LevelInfo &                              _LevelInfo;
-  FineMatrix &                             _FineMatrix;
-  FineMatrix &                             _SmootherMatrix;
-  Aggregates                               _Aggregates;
-  CoarseMatrix                             _CoarseMatrix;
-  std::unique_ptr<NextPreconditionerLevel> _NextPreconditionerLevel;
-
-  /////////////////////////////////////////////
-  // Member Functions
-  /////////////////////////////////////////////
-
-  MultiGridPreconditioner(LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
-    : _LevelInfo(LvlInfo)
-    , _FineMatrix(FineMat)
-    , _SmootherMatrix(SmootherMat)
-    , _Aggregates(_LevelInfo.Grids[level - 1], _LevelInfo.Grids[level], 0)
-    , _CoarseMatrix(*_LevelInfo.Grids[level - 1]) {
-    _NextPreconditionerLevel = std::unique_ptr<NextPreconditionerLevel>(
-      new NextPreconditionerLevel(_LevelInfo, _CoarseMatrix, _CoarseMatrix));
-  }
-
-  void setup() {
-
-    Gamma                                       g5(Gamma::Algebra::Gamma5);
-    MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
-
-    _Aggregates.CreateSubspace(_LevelInfo.PRNGs[level], fineMdagMOp /*, nb */); // NOTE: Don't specify nb to see the orthogonalization check
-
-    // TestVectorAnalyzer<FineVector, nbasis> fineTVA;
-    // fineTVA(fineMdagMOp, _Aggregates.subspace);
-
-    static_assert((nBasis & 0x1) == 0, "MG Preconditioner only supports an even number of basis vectors");
-    int nb = nBasis / 2;
-
-    for(
-      int n = 0; n < nb;
-      n++) { // TODO: to get this to work for more than two levels, I would need to either implement coarse spins or have a template specialization of this class also for the finest level
-      _Aggregates.subspace[n + nb] = g5 * _Aggregates.subspace[n];
-    }
-
-    _CoarseMatrix.CoarsenOperator(_LevelInfo.Grids[level], fineMdagMOp, _Aggregates);
-
-    _NextPreconditionerLevel->setup();
-  }
-
-  virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
-
-    // TODO: implement a W-cycle and a toggle to switch between the cycling strategies
-    vCycle(in, out);
-    // kCycle(in, out);
-  }
-
-  void vCycle(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
-
-    RealD inputNorm = norm2(in);
-
-    CoarseVector coarseSrc(_LevelInfo.Grids[level - 1]);
-    CoarseVector coarseSol(_LevelInfo.Grids[level - 1]);
-    coarseSol = zero;
-
-    FineVector fineTmp(in._grid);
-
-    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
-    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(1.0e-14, 1, fineTrivialPreconditioner, 1, false);
-
-    MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
-    MdagMLinearOperator<FineMatrix, FineVector> fineSmootherMdagMOp(_SmootherMatrix);
-
-    _Aggregates.ProjectToSubspace(coarseSrc, in);
-    (*_NextPreconditionerLevel)(coarseSrc, coarseSol);
-    _Aggregates.PromoteFromSubspace(coarseSol, out);
-
-    fineMdagMOp.Op(out, fineTmp);
-    fineTmp                                = in - fineTmp;
-    auto r                                 = norm2(fineTmp);
-    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
-
-    fineFGMRES(fineSmootherMdagMOp, in, out);
-
-    fineMdagMOp.Op(out, fineTmp);
-    fineTmp                        = in - fineTmp;
-    r                              = norm2(fineTmp);
-    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
-
-    std::cout << GridLogMG << " Level " << level << ": Input norm = " << std::sqrt(inputNorm)
-              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
-              << std::endl;
-  }
-
-  void kCycle(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
-
-    RealD inputNorm = norm2(in);
-
-    CoarseVector coarseSrc(_LevelInfo.Grids[level - 1]);
-    CoarseVector coarseSol(_LevelInfo.Grids[level - 1]);
-    coarseSol = zero;
-
-    FineVector fineTmp(in._grid);
-
-    TrivialPrecon<FineVector>                        fineTrivialPreconditioner;
-    FlexibleGeneralisedMinimalResidual<FineVector>   fineFGMRES(1.0e-14, 1, fineTrivialPreconditioner, 1, false);
-    FlexibleGeneralisedMinimalResidual<CoarseVector> coarseFGMRES(1.0e-14, 1, *_NextPreconditionerLevel, 1, false);
-
-    MdagMLinearOperator<FineMatrix, FineVector>     fineMdagMOp(_FineMatrix);
-    MdagMLinearOperator<FineMatrix, FineVector>     fineSmootherMdagMOp(_SmootherMatrix);
-    MdagMLinearOperator<CoarseMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
-
-    _Aggregates.ProjectToSubspace(coarseSrc, in);
-    coarseFGMRES(coarseMdagMOp, coarseSrc, coarseSol);
-    _Aggregates.PromoteFromSubspace(coarseSol, out);
-
-    fineMdagMOp.Op(out, fineTmp);
-    fineTmp                                = in - fineTmp;
-    auto r                                 = norm2(fineTmp);
-    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
-
-    fineFGMRES(fineSmootherMdagMOp, in, out);
-
-    fineMdagMOp.Op(out, fineTmp);
-    fineTmp                        = in - fineTmp;
-    r                              = norm2(fineTmp);
-    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
-
-    std::cout << GridLogMG << " Level " << level << ": Input norm = " << std::sqrt(inputNorm)
-              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
-              << std::endl;
-  }
-
-  void runChecks() {
-
-    auto tolerance   = 1e-13; // TODO: this obviously depends on the precision we use, current value is for double
-    auto coarseLevel = level - 1;
-
-    std::vector<FineVector>   fineTmps(2, _LevelInfo.Grids[level]);
-    std::vector<CoarseVector> coarseTmps(4, _LevelInfo.Grids[level - 1]);
-
-    MdagMLinearOperator<FineMatrix, FineVector>     fineMdagMOp(_FineMatrix);
-    MdagMLinearOperator<CoarseMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
-
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == (1 - P R) v" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
-
-    for(auto i = 0; i < _Aggregates.subspace.size(); ++i) {
-      _Aggregates.ProjectToSubspace(coarseTmps[0], _Aggregates.subspace[i]); //   R v_i
-      _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]);           // P R v_i
-
-      fineTmps[1]    = _Aggregates.subspace[i] - fineTmps[0]; // v_i - P R v_i
-      auto deviation = std::sqrt(norm2(fineTmps[1]) / norm2(_Aggregates.subspace[i]));
-
-      std::cout << GridLogMG << " Level " << level << ": Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
-                << " | norm2(R v_i) = " << norm2(coarseTmps[0]) << " | norm2(P R v_i) = " << norm2(fineTmps[0])
-                << " | relative deviation = " << deviation;
-
-      if(deviation > tolerance) {
-        std::cout << " > " << tolerance << " -> check failed" << std::endl;
-        // abort();
-      } else {
-        std::cout << " < " << tolerance << " -> check passed" << std::endl;
-      }
-    }
-
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == (1 - R P) v_c" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
-
-    random(_LevelInfo.PRNGs[coarseLevel], coarseTmps[0]);
-
-    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //   P v_c
-    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[0]);   // R P v_c
-
-    coarseTmps[2]  = coarseTmps[0] - coarseTmps[1]; // v_c - R P v_c
-    auto deviation = std::sqrt(norm2(coarseTmps[2]) / norm2(coarseTmps[0]));
-
-    std::cout << GridLogMG << " Level " << level << ": norm2(v_c) = " << norm2(coarseTmps[0])
-              << " | norm2(R P v_c) = " << norm2(coarseTmps[1]) << " | norm2(P v_c) = " << norm2(fineTmps[0])
-              << " | relative deviation = " << deviation;
-
-    if(deviation > tolerance) {
-      std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
-    } else {
-      std::cout << " < " << tolerance << " -> check passed" << std::endl;
-    }
-
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
-
-    random(_LevelInfo.PRNGs[coarseLevel], coarseTmps[0]);
-
-    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //     P v_c
-    fineMdagMOp.Op(fineTmps[0], fineTmps[1]);                    //   D P v_c
-    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[1]);   // R D P v_c
-
-    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[2]); // D_c v_c
-
-    coarseTmps[3] = coarseTmps[1] - coarseTmps[2]; // R D P v_c - D_c v_c
-    deviation     = std::sqrt(norm2(coarseTmps[3]) / norm2(coarseTmps[1]));
-
-    std::cout << GridLogMG << " Level " << level << ": norm2(R D P v_c) = " << norm2(coarseTmps[1])
-              << " | norm2(D_c v_c) = " << norm2(coarseTmps[2]) << " | relative deviation = " << deviation;
-
-    if(deviation > tolerance) {
-      std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
-    } else {
-      std::cout << " < " << tolerance << " -> check passed" << std::endl;
-    }
-
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
-
-    random(_LevelInfo.PRNGs[coarseLevel], coarseTmps[0]);
-
-    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[1]);    //         D_c v_c
-    coarseMdagMOp.AdjOp(coarseTmps[1], coarseTmps[2]); // D_c^dag D_c v_c
-
-    auto dot  = innerProduct(coarseTmps[0], coarseTmps[2]); //v_c^dag D_c^dag D_c v_c
-    deviation = abs(imag(dot)) / abs(real(dot));
-
-    std::cout << GridLogMG << " Level " << level << ": Re(v_c^dag D_c^dag D_c v_c) = " << real(dot)
-              << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot) << " | relative deviation = " << deviation;
-
-    if(deviation > tolerance) {
-      std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
-    } else {
-      std::cout << " < " << tolerance << " -> check passed"
-                << std::endl; // TODO: this check will work only when I got Mdag in CoarsenedMatrix to work
-    }
-
-    _NextPreconditionerLevel->runChecks();
-  }
-};
-
-// Specialize the coarsest level, this corresponds to counting downwards with level: coarsest = 0, finest = N
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
-class MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 0, Matrix> : public LinearFunction<Lattice<Fobj>> {
-public:
-  /////////////////////////////////////////////
-  // Type Definitions
-  /////////////////////////////////////////////
-
-  typedef Matrix        FineMatrix;
-  typedef Lattice<Fobj> FineVector;
-
-  /////////////////////////////////////////////
-  // Member Data
-  /////////////////////////////////////////////
-
-  LevelInfo & _LevelInfo;
-  FineMatrix &_FineMatrix;
-  FineMatrix &_SmootherMatrix;
-
-  /////////////////////////////////////////////
-  // Member Functions
-  /////////////////////////////////////////////
-
-  MultiGridPreconditioner(LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
-    : _LevelInfo(LvlInfo), _FineMatrix(FineMat), _SmootherMatrix(SmootherMat) {}
-
-  void setup() {}
-
-  virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
-
-    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
-    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(1.0e-14, 1, fineTrivialPreconditioner, 1, false);
-
-    MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
-
-    fineFGMRES(fineMdagMOp, in, out);
-  }
-
-  void runChecks() {}
-};
-
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
-using FourLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 4 - 1, Matrix>;
-
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
-using ThreeLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 3 - 1, Matrix>;
-
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
-using TwoLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 2 - 1, Matrix>;
-
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, int nlevel, class Matrix>
-using NLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, nlevel - 1, Matrix>;
-
-int main(int argc, char **argv) {
-
-  Grid_init(&argc, &argv);
-
-  GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
-  GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
-
-  std::vector<int> fSeeds({1, 2, 3, 4});
-  GridParallelRNG  fPRNG(FGrid);
-  fPRNG.SeedFixedIntegers(fSeeds);
-
-  Gamma g5(Gamma::Algebra::Gamma5);
-
-  // clang-format off
-  LatticeFermion    src(FGrid); gaussian(fPRNG, src);
-  LatticeFermion result(FGrid); result = zero;
-  LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
-  // clang-format on
-
-  RealD     mass   = 0.5;
-  const int nbasis = 20;
-
-  WilsonFermionR Dw(Umu, *FGrid, *FrbGrid, mass);
-
-  // mgParams.blockSizes = {{2, 2, 2, 2}, {2, 2, 1, 1}, {1, 1, 2, 1}};
-  // mgParams.blockSizes = {{2, 2, 2, 2}, {2, 2, 1, 1}};
-  mgParams.blockSizes = {{2, 2, 2, 2}};
-  mgParams.nLevels    = mgParams.blockSizes.size() + 1;
-
-  std::cout << mgParams << std::endl;
-
-  LevelInfo levelInfo(FGrid, mgParams);
-
-  MdagMLinearOperator<WilsonFermionR, LatticeFermion> FineMdagMOp(Dw);
-
-  TrivialPrecon<LatticeFermion>                                                     TrivialPrecon;
-  TwoLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> TwoLevelMGPrecon(levelInfo, Dw, Dw);
-  // ThreeLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> ThreeLevelMGPrecon(levelInfo, Dw, Dw);
-  // FourLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> FourLevelMGPrecon(levelInfo, Dw, Dw);
-  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 4, WilsonFermionR> FourLevelMGPrecon(levelInfo, Dw, Dw);
-
-  TwoLevelMGPrecon.setup();
-  TwoLevelMGPrecon.runChecks();
-
-  // ThreeLevelMGPrecon.setup();
-  // ThreeLevelMGPrecon.runChecks();
-
-  // FourLevelMGPrecon.setup();
-  // FourLevelMGPrecon.runChecks();
-
-  // NLevelMGPrecon.setup();
-  // NLevelMGPrecon.runChecks();
-
-  std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solvers;
-
-  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 1000, false));
-  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TwoLevelMGPrecon, 1000, false));
-  // solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, ThreeLevelMGPrecon, 1000, false));
-  // solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, FourLevelMGPrecon, 1000, false));
-  // solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, NLevelMGPrecon, 1000, false));
-
-  for(auto const &solver : solvers) {
-    std::cout << "Starting with a new solver" << std::endl;
-    result = zero;
-    (*solver)(FineMdagMOp, src, result);
-    std::cout << std::endl;
-  }
-
-  Grid_finalize();
-}
diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index db250464..70dc31e0 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -69,55 +69,68 @@ public:
   }
 };
 
-class myclass : Serializable {
+// clang-format off
+struct MultigridParams : Serializable {
 public:
-  // clang-format off
-  GRID_SERIALIZABLE_CLASS_MEMBERS(myclass,
-                                  int, domaindecompose,
-                                  int, domainsize,
-                                  int, coarsegrids,
-                                  int, order,
-                                  int, Ls,
-                                  double, mq,
-                                  double, lo,
-                                  double, hi,
-                                  int, steps);
-  // clang-format on
-  myclass(){};
+  GRID_SERIALIZABLE_CLASS_MEMBERS(MultigridParams,
+                                  int, nLevels,
+                                  std::vector<std::vector<int>>, blockSizes);
+  MultigridParams(){};
 };
-myclass params;
+MultigridParams mgParams;
+// clang-format on
 
-template<int nbasis> struct CoarseGrids {
+struct LevelInfo {
 public:
-  std::vector<std::vector<int>> LattSizes;
   std::vector<std::vector<int>> Seeds;
   std::vector<GridCartesian *>  Grids;
   std::vector<GridParallelRNG>  PRNGs;
 
-  CoarseGrids(std::vector<std::vector<int>> const &blockSizes, int coarsegrids) {
+  LevelInfo(GridCartesian *FineGrid, MultigridParams const &Params) {
 
-    assert(blockSizes.size() == coarsegrids);
+    auto nCoarseLevels = Params.blockSizes.size();
 
-    std::cout << GridLogMessage << "Constructing " << coarsegrids << " CoarseGrids" << std::endl;
+    assert(nCoarseLevels == Params.nLevels - 1);
 
-    for(int cl = 0; cl < coarsegrids; ++cl) { // may be a bit ugly and slow but not perf critical
-      // need to differentiate between first and other coarse levels in size calculation
-      LattSizes.push_back({cl == 0 ? GridDefaultLatt() : LattSizes[cl - 1]});
-      Seeds.push_back(std::vector<int>(LattSizes[cl].size()));
+    // set up values for finest grid
+    Grids.push_back(FineGrid);
+    Seeds.push_back({1, 2, 3, 4});
+    PRNGs.push_back(GridParallelRNG(Grids.back()));
+    PRNGs.back().SeedFixedIntegers(Seeds.back());
 
-      for(int d = 0; d < LattSizes[cl].size(); ++d) {
-        LattSizes[cl][d] = LattSizes[cl][d] / blockSizes[cl][d];
-        Seeds[cl][d]     = (cl + 1) * LattSizes[cl].size() + d + 1;
-        // calculation unimportant, just to get. e.g., {5, 6, 7, 8} for first coarse level and so on
+    // set up values for coarser grids
+    for(int level = 1; level < Params.nLevels; ++level) {
+      auto Nd  = Grids[level - 1]->_ndimension;
+      auto tmp = Grids[level - 1]->_fdimensions;
+      assert(tmp.size() == Nd);
+
+      Seeds.push_back(std::vector<int>(Nd));
+
+      for(int d = 0; d < Nd; ++d) {
+        tmp[d] /= Params.blockSizes[level - 1][d];
+        Seeds[level][d] = (level)*Nd + d + 1;
       }
 
-      Grids.push_back(SpaceTimeGrid::makeFourDimGrid(LattSizes[cl], GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()));
-      PRNGs.push_back(GridParallelRNG(Grids[cl]));
+      Grids.push_back(SpaceTimeGrid::makeFourDimGrid(tmp, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()));
+      PRNGs.push_back(GridParallelRNG(Grids[level]));
 
-      PRNGs[cl].SeedFixedIntegers(Seeds[cl]);
+      PRNGs[level].SeedFixedIntegers(Seeds[level]);
+    }
 
-      std::cout << GridLogMessage << "cl = " << cl << ": LattSize = " << LattSizes[cl] << std::endl;
-      std::cout << GridLogMessage << "cl = " << cl << ":    Seeds = " << Seeds[cl] << std::endl;
+    std::cout << GridLogMessage << "Constructed " << Params.nLevels << " levels" << std::endl;
+
+    // The construction above corresponds to the finest level having level == 0
+    // (simply because it's not as ugly to implement), but we need it the
+    // other way round (i.e., the coarsest level to have level == 0) for the MG
+    // Preconditioner -> reverse the vectors
+
+    std::reverse(Seeds.begin(), Seeds.end());
+    std::reverse(Grids.begin(), Grids.end());
+    std::reverse(PRNGs.begin(), PRNGs.end());
+
+    for(int level = 0; level < Params.nLevels; ++level) {
+      std::cout << GridLogMessage << "level = " << level << ":" << std::endl;
+      Grids[level]->show_decomposition();
     }
   }
 };
@@ -221,116 +234,177 @@ template<class Field> void testLinearOperator(LinearOperatorBase<Field> &LinOp,
   }
 }
 
-// template < class Fobj, class CComplex, int coarseSpins, int nbasis, class Matrix >
-// class MultiGridPreconditioner : public LinearFunction< Lattice< Fobj > > {
-template<class Fobj, class CComplex, int nbasis, class Matrix> class MultiGridPreconditioner : public LinearFunction<Lattice<Fobj>> {
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nBasis, int level, class Matrix>
+class MultiGridPreconditioner : public LinearFunction<Lattice<Fobj>> {
 public:
-  typedef Aggregation<Fobj, CComplex, nbasis>     Aggregates;
-  typedef CoarsenedMatrix<Fobj, CComplex, nbasis> CoarseOperator;
+  /////////////////////////////////////////////
+  // Type Definitions
+  /////////////////////////////////////////////
 
-  typedef typename Aggregation<Fobj, CComplex, nbasis>::siteVector   siteVector;
-  typedef typename Aggregation<Fobj, CComplex, nbasis>::CoarseScalar CoarseScalar;
-  typedef typename Aggregation<Fobj, CComplex, nbasis>::CoarseVector CoarseVector;
-  typedef typename Aggregation<Fobj, CComplex, nbasis>::CoarseMatrix CoarseMatrix;
-  typedef typename Aggregation<Fobj, CComplex, nbasis>::FineField    FineField;
-  typedef LinearOperatorBase<FineField>                              FineOperator;
+  typedef Aggregation<Fobj, CoarseScalar, nBasis>                                                                Aggregates;
+  typedef CoarsenedMatrix<Fobj, CoarseScalar, nBasis>                                                            CoarseMatrix;
+  typedef typename Aggregates::CoarseVector                                                                      CoarseVector;
+  typedef typename Aggregates::siteVector                                                                        CoarseSiteVector;
+  typedef Matrix                                                                                                 FineMatrix;
+  typedef typename Aggregates::FineField                                                                         FineVector;
+  typedef MultiGridPreconditioner<CoarseSiteVector, CoarseScalar, nCoarseSpins, nBasis, level - 1, CoarseMatrix> NextPreconditionerLevel;
 
-  Aggregates &    _Aggregates;
-  CoarseOperator &_CoarseOperator;
-  Matrix &        _FineMatrix;
-  FineOperator &  _FineOperator;
-  Matrix &        _SmootherMatrix;
-  FineOperator &  _SmootherOperator;
+  /////////////////////////////////////////////
+  // Member Data
+  /////////////////////////////////////////////
 
-  // Constructor
-  MultiGridPreconditioner(Aggregates &    Agg,
-                          CoarseOperator &Coarse,
-                          FineOperator &  Fine,
-                          Matrix &        FineMatrix,
-                          FineOperator &  Smooth,
-                          Matrix &        SmootherMatrix)
-    : _Aggregates(Agg)
-    , _CoarseOperator(Coarse)
-    , _FineOperator(Fine)
-    , _FineMatrix(FineMatrix)
-    , _SmootherOperator(Smooth)
-    , _SmootherMatrix(SmootherMatrix) {}
+  LevelInfo &                              _LevelInfo;
+  FineMatrix &                             _FineMatrix;
+  FineMatrix &                             _SmootherMatrix;
+  Aggregates                               _Aggregates;
+  CoarseMatrix                             _CoarseMatrix;
+  std::unique_ptr<NextPreconditionerLevel> _NextPreconditionerLevel;
 
-  void operator()(const FineField &in, FineField &out) {
+  /////////////////////////////////////////////
+  // Member Functions
+  /////////////////////////////////////////////
 
-    CoarseVector coarseSrc(_CoarseOperator.Grid());
-    CoarseVector coarseTmp(_CoarseOperator.Grid());
-    CoarseVector coarseSol(_CoarseOperator.Grid());
-    coarseSol = zero;
-
-    GeneralisedMinimalResidual<CoarseVector> coarseGMRES(5.0e-2, 100, 25, false);
-    GeneralisedMinimalResidual<FineField>    fineGMRES(5.0e-2, 100, 25, false);
-
-    HermitianLinearOperator<CoarseOperator, CoarseVector> coarseHermOp(_CoarseOperator);
-    MdagMLinearOperator<CoarseOperator, CoarseVector>     coarseMdagMOp(_CoarseOperator);
-    MdagMLinearOperator<Matrix, FineField>                fineMdagMOp(_SmootherMatrix);
-
-    FineField fineTmp1(in._grid);
-    FineField fineTmp2(in._grid);
-
-    RealD Ni = norm2(in);
-
-    // no pre smoothing for now
-    auto  preSmootherNorm     = 0;
-    auto  preSmootherResidual = 0;
-    RealD r;
-
-    // Project to coarse grid, solve, project back to fine grid
-    _Aggregates.ProjectToSubspace(coarseSrc, in);
-    coarseGMRES(coarseMdagMOp, coarseSrc, coarseSol);
-    _Aggregates.PromoteFromSubspace(coarseSol, out);
-
-    // Recompute error
-    _FineOperator.Op(out, fineTmp1);
-    fineTmp1            = in - fineTmp1;
-    r                   = norm2(fineTmp1);
-    auto coarseResidual = std::sqrt(r / Ni);
-
-    // Apply smoother, use GMRES for the moment
-    fineGMRES(fineMdagMOp, in, out);
-
-    // Recompute error
-    _FineOperator.Op(out, fineTmp1);
-    fineTmp1                  = in - fineTmp1;
-    r                         = norm2(fineTmp1);
-    auto postSmootherResidual = std::sqrt(r / Ni);
-
-    std::cout << GridLogIterative << "Input norm = " << Ni << " Pre-Smoother norm " << preSmootherNorm
-              << " Pre-Smoother residual = " << preSmootherResidual << " Coarse residual = " << coarseResidual
-              << " Post-Smoother residual = " << postSmootherResidual << std::endl;
+  MultiGridPreconditioner(LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
+    : _LevelInfo(LvlInfo)
+    , _FineMatrix(FineMat)
+    , _SmootherMatrix(SmootherMat)
+    , _Aggregates(_LevelInfo.Grids[level - 1], _LevelInfo.Grids[level], 0)
+    , _CoarseMatrix(*_LevelInfo.Grids[level - 1]) {
+    _NextPreconditionerLevel
+      = std::unique_ptr<NextPreconditionerLevel>(new NextPreconditionerLevel(_LevelInfo, _CoarseMatrix, _CoarseMatrix));
   }
 
-  void runChecks(CoarseGrids<nbasis> &cGrids, int whichCoarseGrid) {
+  void setup() {
 
-    /////////////////////////////////////////////
-    // Some stuff we need for the checks below //
-    /////////////////////////////////////////////
-    auto tolerance = 1e-13; // TODO: this obviously depends on the precision we use, current value is for double
+    Gamma                                       g5(Gamma::Algebra::Gamma5);
+    MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
 
-    std::vector<CoarseVector> cTmps(4, _CoarseOperator.Grid());
-    std::vector<FineField>    fTmps(2, _Aggregates.subspace[0]._grid); // atm only for one coarser grid
+    _Aggregates.CreateSubspace(_LevelInfo.PRNGs[level], fineMdagMOp /*, nb */); // NOTE: Don't specify nb to see the orthogonalization check
 
-    // need to construct an operator, since _CoarseOperator is not a LinearOperator but only a matrix (the name is a bit misleading)
-    MdagMLinearOperator<CoarseOperator, CoarseVector> MdagMOp(_CoarseOperator);
+    // TestVectorAnalyzer<FineVector, nbasis> fineTVA;
+    // fineTVA(fineMdagMOp, _Aggregates.subspace);
 
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-    std::cout << GridLogMessage << "MG correctness check: 0 == (1 - P R) v" << std::endl;
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    static_assert((nBasis & 0x1) == 0, "MG Preconditioner only supports an even number of basis vectors");
+    int nb = nBasis / 2;
+
+    for(
+      int n = 0; n < nb;
+      n++) { // TODO: to get this to work for more than two levels, I would need to either implement coarse spins or have a template specialization of this class also for the finest level
+      _Aggregates.subspace[n + nb] = g5 * _Aggregates.subspace[n];
+    }
+
+    _CoarseMatrix.CoarsenOperator(_LevelInfo.Grids[level], fineMdagMOp, _Aggregates);
+
+    _NextPreconditionerLevel->setup();
+  }
+
+  virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
+
+    // TODO: implement a W-cycle and a toggle to switch between the cycling strategies
+    vCycle(in, out);
+    // kCycle(in, out);
+  }
+
+  void vCycle(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
+
+    RealD inputNorm = norm2(in);
+
+    CoarseVector coarseSrc(_LevelInfo.Grids[level - 1]);
+    CoarseVector coarseSol(_LevelInfo.Grids[level - 1]);
+    coarseSol = zero;
+
+    FineVector fineTmp(in._grid);
+
+    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
+    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(1.0e-14, 1, fineTrivialPreconditioner, 1, false);
+
+    MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<FineMatrix, FineVector> fineSmootherMdagMOp(_SmootherMatrix);
+
+    _Aggregates.ProjectToSubspace(coarseSrc, in);
+    (*_NextPreconditionerLevel)(coarseSrc, coarseSol);
+    _Aggregates.PromoteFromSubspace(coarseSol, out);
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                                = in - fineTmp;
+    auto r                                 = norm2(fineTmp);
+    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
+
+    fineFGMRES(fineSmootherMdagMOp, in, out);
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                        = in - fineTmp;
+    r                              = norm2(fineTmp);
+    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
+
+    std::cout << GridLogMG << " Level " << level << ": V-cycle: Input norm = " << std::sqrt(inputNorm)
+              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
+              << std::endl;
+  }
+
+  void kCycle(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
+
+    RealD inputNorm = norm2(in);
+
+    CoarseVector coarseSrc(_LevelInfo.Grids[level - 1]);
+    CoarseVector coarseSol(_LevelInfo.Grids[level - 1]);
+    coarseSol = zero;
+
+    FineVector fineTmp(in._grid);
+
+    TrivialPrecon<FineVector>                        fineTrivialPreconditioner;
+    FlexibleGeneralisedMinimalResidual<FineVector>   fineFGMRES(1.0e-14, 1, fineTrivialPreconditioner, 1, false);
+    FlexibleGeneralisedMinimalResidual<CoarseVector> coarseFGMRES(1.0e-14, 1, *_NextPreconditionerLevel, 1, false);
+
+    MdagMLinearOperator<FineMatrix, FineVector>     fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<FineMatrix, FineVector>     fineSmootherMdagMOp(_SmootherMatrix);
+    MdagMLinearOperator<CoarseMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
+
+    _Aggregates.ProjectToSubspace(coarseSrc, in);
+    coarseFGMRES(coarseMdagMOp, coarseSrc, coarseSol);
+    _Aggregates.PromoteFromSubspace(coarseSol, out);
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                                = in - fineTmp;
+    auto r                                 = norm2(fineTmp);
+    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
+
+    fineFGMRES(fineSmootherMdagMOp, in, out);
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                        = in - fineTmp;
+    r                              = norm2(fineTmp);
+    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
+
+    std::cout << GridLogMG << " Level " << level << ": K-cycle: Input norm = " << std::sqrt(inputNorm)
+              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
+              << std::endl;
+  }
+
+  void runChecks() {
+
+    auto tolerance   = 1e-13; // TODO: this obviously depends on the precision we use, current value is for double
+    auto coarseLevel = level - 1;
+
+    std::vector<FineVector>   fineTmps(2, _LevelInfo.Grids[level]);
+    std::vector<CoarseVector> coarseTmps(4, _LevelInfo.Grids[level - 1]);
+
+    MdagMLinearOperator<FineMatrix, FineVector>     fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<CoarseMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
+
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == (1 - P R) v" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
 
     for(auto i = 0; i < _Aggregates.subspace.size(); ++i) {
-      _Aggregates.ProjectToSubspace(cTmps[0], _Aggregates.subspace[i]); //   R v_i
-      _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]);              // P R v_i
+      _Aggregates.ProjectToSubspace(coarseTmps[0], _Aggregates.subspace[i]); //   R v_i
+      _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]);           // P R v_i
 
-      fTmps[1]       = _Aggregates.subspace[i] - fTmps[0]; // v_i - P R v_i
-      auto deviation = std::sqrt(norm2(fTmps[1]) / norm2(_Aggregates.subspace[i]));
+      fineTmps[1]    = _Aggregates.subspace[i] - fineTmps[0]; // v_i - P R v_i
+      auto deviation = std::sqrt(norm2(fineTmps[1]) / norm2(_Aggregates.subspace[i]));
 
-      std::cout << GridLogMessage << "Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
-                << " | norm2(R v_i) = " << norm2(cTmps[0]) << " | norm2(P R v_i) = " << norm2(fTmps[0])
+      std::cout << GridLogMG << " Level " << level << ": Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
+                << " | norm2(R v_i) = " << norm2(coarseTmps[0]) << " | norm2(P R v_i) = " << norm2(fineTmps[0])
                 << " | relative deviation = " << deviation;
 
       if(deviation > tolerance) {
@@ -341,44 +415,20 @@ public:
       }
     }
 
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-    std::cout << GridLogMessage << "MG correctness check: 0 == (1 - R P) v_c" << std::endl;
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == (1 - R P) v_c" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
 
-    random(cGrids.PRNGs[whichCoarseGrid], cTmps[0]);
+    random(_LevelInfo.PRNGs[coarseLevel], coarseTmps[0]);
 
-    _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]); //   P v_c
-    _Aggregates.ProjectToSubspace(cTmps[1], fTmps[0]);   // R P v_c
+    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //   P v_c
+    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[0]);   // R P v_c
 
-    cTmps[2]       = cTmps[0] - cTmps[1]; // v_c - R P v_c
-    auto deviation = std::sqrt(norm2(cTmps[2]) / norm2(cTmps[0]));
+    coarseTmps[2]  = coarseTmps[0] - coarseTmps[1]; // v_c - R P v_c
+    auto deviation = std::sqrt(norm2(coarseTmps[2]) / norm2(coarseTmps[0]));
 
-    std::cout << GridLogMessage << "norm2(v_c) = " << norm2(cTmps[0]) << " | norm2(R P v_c) = " << norm2(cTmps[1])
-              << " | norm2(P v_c) = " << norm2(fTmps[0]) << " | relative deviation = " << deviation;
-
-    if(deviation > tolerance) {
-      std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
-    } else {
-      std::cout << " < " << tolerance << " -> check passed" << std::endl;
-    }
-
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-    std::cout << GridLogMessage << "MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    random(cGrids.PRNGs[whichCoarseGrid], cTmps[0]);
-
-    _Aggregates.PromoteFromSubspace(cTmps[0], fTmps[0]); //     P v_c
-    _FineOperator.Op(fTmps[0], fTmps[1]);                //   D P v_c
-    _Aggregates.ProjectToSubspace(cTmps[1], fTmps[1]);   // R D P v_c
-
-    MdagMOp.Op(cTmps[0], cTmps[2]); // D_c v_c
-
-    cTmps[3]  = cTmps[1] - cTmps[2]; // R D P v_c - D_c v_c
-    deviation = std::sqrt(norm2(cTmps[3]) / norm2(cTmps[1]));
-
-    std::cout << GridLogMessage << "norm2(R D P v_c) = " << norm2(cTmps[1]) << " | norm2(D_c v_c) = " << norm2(cTmps[2])
+    std::cout << GridLogMG << " Level " << level << ": norm2(v_c) = " << norm2(coarseTmps[0])
+              << " | norm2(R P v_c) = " << norm2(coarseTmps[1]) << " | norm2(P v_c) = " << norm2(fineTmps[0])
               << " | relative deviation = " << deviation;
 
     if(deviation > tolerance) {
@@ -388,57 +438,117 @@ public:
       std::cout << " < " << tolerance << " -> check passed" << std::endl;
     }
 
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-    std::cout << GridLogMessage << "MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
 
-    random(cGrids.PRNGs[whichCoarseGrid], cTmps[0]);
+    random(_LevelInfo.PRNGs[coarseLevel], coarseTmps[0]);
 
-    MdagMOp.Op(cTmps[0], cTmps[1]);    //         D_c v_c
-    MdagMOp.AdjOp(cTmps[1], cTmps[2]); // D_c^dag D_c v_c
+    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //     P v_c
+    fineMdagMOp.Op(fineTmps[0], fineTmps[1]);                    //   D P v_c
+    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[1]);   // R D P v_c
 
-    auto dot  = innerProduct(cTmps[0], cTmps[2]); //v_c^dag D_c^dag D_c v_c
+    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[2]); // D_c v_c
+
+    coarseTmps[3] = coarseTmps[1] - coarseTmps[2]; // R D P v_c - D_c v_c
+    deviation     = std::sqrt(norm2(coarseTmps[3]) / norm2(coarseTmps[1]));
+
+    std::cout << GridLogMG << " Level " << level << ": norm2(R D P v_c) = " << norm2(coarseTmps[1])
+              << " | norm2(D_c v_c) = " << norm2(coarseTmps[2]) << " | relative deviation = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
+    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+
+    random(_LevelInfo.PRNGs[coarseLevel], coarseTmps[0]);
+
+    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[1]);    //         D_c v_c
+    coarseMdagMOp.AdjOp(coarseTmps[1], coarseTmps[2]); // D_c^dag D_c v_c
+
+    auto dot  = innerProduct(coarseTmps[0], coarseTmps[2]); //v_c^dag D_c^dag D_c v_c
     deviation = abs(imag(dot)) / abs(real(dot));
 
-    std::cout << GridLogMessage << "Re(v_c^dag D_c^dag D_c v_c) = " << real(dot) << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot)
-              << " | relative deviation = " << deviation;
+    std::cout << GridLogMG << " Level " << level << ": Re(v_c^dag D_c^dag D_c v_c) = " << real(dot)
+              << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot) << " | relative deviation = " << deviation;
 
     if(deviation > tolerance) {
       std::cout << " > " << tolerance << " -> check failed" << std::endl;
       // abort();
     } else {
-      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+      std::cout << " < " << tolerance << " -> check passed"
+                << std::endl; // TODO: this check will work only when I got Mdag in CoarsenedMatrix to work
     }
+
+    _NextPreconditionerLevel->runChecks();
   }
 };
 
+// Specialize the coarsest level, this corresponds to counting downwards with level: coarsest = 0, finest = N
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
+class MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 0, Matrix> : public LinearFunction<Lattice<Fobj>> {
+public:
+  /////////////////////////////////////////////
+  // Type Definitions
+  /////////////////////////////////////////////
+
+  typedef Matrix        FineMatrix;
+  typedef Lattice<Fobj> FineVector;
+
+  /////////////////////////////////////////////
+  // Member Data
+  /////////////////////////////////////////////
+
+  LevelInfo & _LevelInfo;
+  FineMatrix &_FineMatrix;
+  FineMatrix &_SmootherMatrix;
+
+  /////////////////////////////////////////////
+  // Member Functions
+  /////////////////////////////////////////////
+
+  MultiGridPreconditioner(LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
+    : _LevelInfo(LvlInfo), _FineMatrix(FineMat), _SmootherMatrix(SmootherMat) {}
+
+  void setup() {}
+
+  virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
+
+    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
+    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(1.0e-14, 1, fineTrivialPreconditioner, 1, false);
+
+    MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
+
+    fineFGMRES(fineMdagMOp, in, out);
+  }
+
+  void runChecks() {}
+};
+
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
+using FourLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 4 - 1, Matrix>;
+
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
+using ThreeLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 3 - 1, Matrix>;
+
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
+using TwoLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 2 - 1, Matrix>;
+
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, int nlevel, class Matrix>
+using NLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, nlevel - 1, Matrix>;
+
 int main(int argc, char **argv) {
 
   Grid_init(&argc, &argv);
 
-  params.domainsize      = 1;
-  params.coarsegrids     = 1;
-  params.domaindecompose = 0;
-  params.order           = 30;
-  params.Ls              = 1;
-  params.mq              = -0.5;
-  params.lo              = 0.5;
-  params.hi              = 70.0;
-  params.steps           = 1;
-
-  typedef typename WilsonCloverFermionR::FermionField FermionField;
-  typename WilsonCloverFermionR::ImplParams           wcImplparams;
-  WilsonAnisotropyCoefficients                        wilsonAnisCoeff;
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Params: " << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  std::cout << params << std::endl;
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Set up some fine level stuff: " << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  typename WilsonCloverFermionR::ImplParams wcImplparams;
+  WilsonAnisotropyCoefficients              wilsonAnisCoeff;
 
   GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
   GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
@@ -450,319 +560,107 @@ int main(int argc, char **argv) {
   Gamma g5(Gamma::Algebra::Gamma5);
 
   // clang-format off
-  FermionField      src(FGrid); gaussian(fPRNG, src);
-  FermionField   result(FGrid); result = zero;
+  LatticeFermion    src(FGrid); gaussian(fPRNG, src);
+  LatticeFermion result(FGrid); result = zero;
   LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
   // clang-format on
 
-  RealD mass = params.mq;
+  RealD mass  = 0.5;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
 
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Set up some coarser levels stuff: " << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  const int nbasis = 20;
 
-  const int nbasis = 20; // fix the number of test vector to the same
-                         // number on every level for now
-
-  //////////////////////////////////////////
-  // toggle to run two/three level method
-  //////////////////////////////////////////
-
-  // two-level algorithm
-  std::vector<std::vector<int>> blockSizes({{2, 2, 2, 2}});
-  CoarseGrids<nbasis>           coarseGrids(blockSizes, 1);
-
-  // // three-level algorithm
-  // std::vector<std::vector<int>> blockSizes({{2, 2, 2, 2}, {2, 2, 1, 1}});
-  // CoarseGrids<nbasis>           coarseGrids(blockSizes, 2);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Some typedefs" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  // typedefs for transition from fine to first coarsened grid
-  typedef vSpinColourVector                                                                       FineSiteVector;
-  typedef vTComplex                                                                               CoarseSiteScalar;
-  typedef Aggregation<FineSiteVector, CoarseSiteScalar, nbasis>                                   Subspace;
-  typedef CoarsenedMatrix<FineSiteVector, CoarseSiteScalar, nbasis>                               CoarseOperator;
-  typedef CoarseOperator::CoarseVector                                                            CoarseVector;
-  typedef CoarseOperator::siteVector                                                              CoarseSiteVector;
-  typedef TestVectorAnalyzer<FermionField, nbasis>                                                FineTVA;
-  typedef MultiGridPreconditioner<FineSiteVector, CoarseSiteScalar, nbasis, WilsonCloverFermionR> FineMGPreconditioner;
-  typedef TrivialPrecon<FermionField>                                                             FineTrivialPreconditioner;
-
-  // typedefs for transition from a coarse to the next coarser grid (some defs remain the same)
-  typedef Aggregation<CoarseSiteVector, CoarseSiteScalar, nbasis>                             SubSubSpace;
-  typedef CoarsenedMatrix<CoarseSiteVector, CoarseSiteScalar, nbasis>                         CoarseCoarseOperator;
-  typedef CoarseCoarseOperator::CoarseVector                                                  CoarseCoarseVector;
-  typedef CoarseCoarseOperator::siteVector                                                    CoarseCoarseSiteVector;
-  typedef TestVectorAnalyzer<CoarseVector, nbasis>                                            CoarseTVA;
-  typedef MultiGridPreconditioner<CoarseSiteVector, CoarseSiteScalar, nbasis, CoarseOperator> CoarseMGPreconditioner;
-  typedef TrivialPrecon<CoarseVector>                                                         CoarseTrivialPreconditioner;
-
-  static_assert(std::is_same<CoarseVector, CoarseCoarseVector>::value, "CoarseVector and CoarseCoarseVector must be of the same type");
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building the wilson clover operator on the fine grid" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  RealD                csw_r = 1.0;
-  RealD                csw_t = 1.0;
+  WilsonFermionR       Dw(Umu, *FGrid, *FrbGrid, mass);
   WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t, wilsonAnisCoeff, wcImplparams);
 
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Setting up linear operators" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  // mgParams.blockSizes = {{2, 2, 2, 2}, {2, 2, 1, 1}, {1, 1, 2, 1}};
+  // mgParams.blockSizes = {{2, 2, 2, 2}, {2, 2, 1, 1}};
+  mgParams.blockSizes = {{2, 2, 2, 2}};
+  mgParams.nLevels    = mgParams.blockSizes.size() + 1;
 
-  MdagMLinearOperator<WilsonCloverFermionR, FermionField> FineMdagMOp(Dwc);
+  std::cout << mgParams << std::endl;
+
+  LevelInfo levelInfo(FGrid, mgParams);
+
+  static_assert(std::is_same<LatticeFermion, typename WilsonFermionR::FermionField>::value, "");
+  static_assert(std::is_same<LatticeFermion, typename WilsonCloverFermionR::FermionField>::value, "");
+
+  MdagMLinearOperator<WilsonFermionR, LatticeFermion>       MdagMOpDw(Dw);
+  MdagMLinearOperator<WilsonCloverFermionR, LatticeFermion> MdagMOpDwc(Dwc);
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Calling Aggregation class to build subspaces" << std::endl;
+  std::cout << GridLogMessage << "Testing Multigrid for Wilson" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  Subspace FineAggregates(coarseGrids.Grids[0], FGrid, 0);
+  TrivialPrecon<LatticeFermion>                                                     TrivialPrecon;
+  TwoLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> TwoLevelMGPreconDw(levelInfo, Dw, Dw);
+  // ThreeLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> ThreeLevelMGPreconDw(levelInfo, Dw, Dw);
+  // FourLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> FourLevelMGPreconDw(levelInfo, Dw, Dw);
+  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 4, WilsonFermionR> NLevelMGPreconDw(levelInfo, Dw, Dw);
 
-  assert((nbasis & 0x1) == 0);
-  int nb = nbasis / 2;
-  std::cout << GridLogMessage << " nbasis/2 = " << nb << std::endl;
+  TwoLevelMGPreconDw.setup();
+  TwoLevelMGPreconDw.runChecks();
 
-  FineAggregates.CreateSubspace(fPRNG, FineMdagMOp /*, nb */); // Don't specify nb to see the orthogonalization check
+  // ThreeLevelMGPreconDw.setup();
+  // ThreeLevelMGPreconDw.runChecks();
 
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Test vector analysis after initial creation of subspace" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  // FourLevelMGPreconDw.setup();
+  // FourLevelMGPreconDw.runChecks();
 
-  FineTVA fineTVA;
-  fineTVA(FineMdagMOp, FineAggregates.subspace);
+  // NLevelMGPreconDw.setup();
+  // NLevelMGPreconDw.runChecks();
 
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Projecting subspace to definite chirality" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDw;
 
-  for(int n = 0; n < nb; n++) {
-    FineAggregates.subspace[n + nb] = g5 * FineAggregates.subspace[n];
-  }
+  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 1000, false));
+  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TwoLevelMGPreconDw, 1000, false));
+  // solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, ThreeLevelMGPreconDw, 1000, false));
+  // solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, FourLevelMGPreconDw, 1000, false));
+  // solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, NLevelMGPreconDw, 1000, false));
 
-  auto coarseSites = 1;
-  for(auto const &elem : coarseGrids.LattSizes[0]) coarseSites *= elem;
-
-  std::cout << GridLogMessage << "Norms of MG test vectors after chiral projection (coarse sites = " << coarseSites << ")" << std::endl;
-  for(int n = 0; n < nbasis; n++) {
-    std::cout << GridLogMessage << "vec[" << n << "] = " << norm2(FineAggregates.subspace[n]) << std::endl;
-  }
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building coarse representation of Dirac operator" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  CoarseOperator Dc(*coarseGrids.Grids[0]);
-
-  Dc.CoarsenOperator(FGrid, FineMdagMOp, FineAggregates);
-
-  MdagMLinearOperator<CoarseOperator, CoarseVector> CoarseMdagMOp(Dc);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Test vector analysis after construction of coarse Dirac operator" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  fineTVA(FineMdagMOp, FineAggregates.subspace);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Testing the linear operators" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  // clang-format off
-  testLinearOperator(FineMdagMOp,   FGrid,                "FineMdagMOp");   std::cout << GridLogMessage << std::endl;
-  testLinearOperator(CoarseMdagMOp, coarseGrids.Grids[0], "CoarseMdagMOp"); std::cout << GridLogMessage << std::endl;
-  // clang-format on
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building coarse vectors" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  CoarseVector coarseSource(coarseGrids.Grids[0]);
-  CoarseVector coarseResult(coarseGrids.Grids[0]);
-  gaussian(coarseGrids.PRNGs[0], coarseSource);
-  coarseResult = zero;
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building some coarse space solvers" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  std::vector<std::unique_ptr<OperatorFunction<CoarseVector>>> dummyCoarseSolvers;
-  dummyCoarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseVector>(5.0e-2, 100, 8, false));
-  dummyCoarseSolvers.emplace_back(new MinimalResidual<CoarseVector>(5.0e-2, 100, 0.8, false));
-  dummyCoarseSolvers.emplace_back(new ConjugateGradient<CoarseVector>(5.0e-2, 100, false));
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Testing some coarse space solvers" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  std::cout << GridLogMessage << "checking norm of coarse src " << norm2(coarseSource) << std::endl;
-
-  for(auto const &solver : dummyCoarseSolvers) {
-    coarseResult = zero;
-    (*solver)(CoarseMdagMOp, coarseSource, coarseResult);
-  }
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building a multigrid preconditioner" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  FineMGPreconditioner      FineMGPrecon(FineAggregates, Dc, FineMdagMOp, Dwc, FineMdagMOp, Dwc);
-  FineTrivialPreconditioner FineSimplePrecon;
-
-  FineMGPrecon.runChecks(coarseGrids, 0);
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Building krylov subspace solvers w/ & w/o MG Preconditioner" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  std::vector<std::unique_ptr<OperatorFunction<FermionField>>> solvers;
-  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<FermionField>(1.0e-12, 4000000, FineSimplePrecon, 25, false));
-  solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<FermionField>(1.0e-12, 100, FineMGPrecon, 25, false));
-  solvers.emplace_back(new PrecGeneralisedConjugateResidual<FermionField>(1.0e-12, 4000000, FineSimplePrecon, 25, 25));
-
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Testing the (un)?preconditioned solvers" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  for(auto const &solver : solvers) {
-    std::cout << GridLogMessage << "checking norm of fine src " << norm2(src) << std::endl;
+  for(auto const &solver : solversDw) {
+    std::cout << "Starting with a new solver" << std::endl;
     result = zero;
-    (*solver)(FineMdagMOp, src, result);
+    (*solver)(MdagMOpDw, src, result);
     std::cout << std::endl;
   }
 
-#if 0
-  if(coarseGrids.LattSizes.size() == 2) {
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing Multigrid for Wilson Clover" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
-    std::cout << GridLogMessage << "Some testing for construction of a second coarse level" << std::endl;
-    std::cout << GridLogMessage << "**************************************************" << std::endl;
+  TwoLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonCloverFermionR> TwoLevelMGPreconDwc(levelInfo, Dwc, Dwc);
+  // ThreeLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonCloverFermionR> ThreeLevelMGPreconDwc(levelInfo, Dwc, Dwc);
+  // FourLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonCloverFermionR> FourLevelMGPreconDwc(levelInfo, Dwc, Dwc);
+  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 4, WilsonCloverFermionR> NLevelMGPreconDwc(levelInfo, Dwc, Dwc);
 
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Calling Aggregation class to build subspaces" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+  TwoLevelMGPreconDwc.setup();
+  TwoLevelMGPreconDwc.runChecks();
 
-    SubSubSpace CoarseAggregates(coarseGrids.Grids[1], coarseGrids.Grids[0], 0);
-    CoarseAggregates.CreateSubspace(coarseGrids.PRNGs[0], CoarseMdagMOp);
+  // ThreeLevelMGPreconDwc.setup();
+  // ThreeLevelMGPreconDwc.runChecks();
 
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Test vector analysis after initial creation of subspace" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+  // FourLevelMGPreconDwc.setup();
+  // FourLevelMGPreconDwc.runChecks();
 
-    // // this doesn't work because this function applies g5 to a vector, which
-    // // doesn't work for coarse vectors atm -> FIXME
-    // CoarseTVA coarseTVA;
-    // coarseTVA(CoarseMdagMOp, CoarseAggregates.subspace);
+  // NLevelMGPreconDwc.setup();
+  // NLevelMGPreconDwc.runChecks();
 
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Projecting subspace to definite chirality" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDwc;
 
-    // // cannot apply g5 to coarse vectors atm -> FIXME
-    // for(int n=0;n<nb;n++){
-    //   CoarseAggregates.subspace[n+nb] = g5 * CoarseAggregates.subspace[n];
-    //   std::cout<<GridLogMessage<<n<<" subspace "<<norm2(CoarseAggregates.subspace[n+nb])<<" "<<norm2(CoarseAggregates.subspace[n]) <<std::endl;
-    // }
-
-    auto coarseCoarseSites = 1;
-    for(auto const &elem : coarseGrids.LattSizes[1]) coarseCoarseSites *= elem;
-
-    std::cout << GridLogMessage << "Norms of MG test vectors after chiral projection (coarse coarse sites = " << coarseCoarseSites << ")"
-              << std::endl;
-    for(int n = 0; n < nbasis; n++) {
-      std::cout << GridLogMessage << "vec[" << n << "] = " << norm2(CoarseAggregates.subspace[n]) << std::endl;
-    }
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Building coarse coarse representation of Dirac operator" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    CoarseCoarseOperator Dcc(*coarseGrids.Grids[1]);
-
-    Dcc.CoarsenOperator(coarseGrids.Grids[0], CoarseMdagMOp, CoarseAggregates);
-
-    MdagMLinearOperator<CoarseCoarseOperator, CoarseCoarseVector> CoarseCoarseMdagMOp(Dcc);
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Test vector analysis after construction of coarse Dirac operator" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    // // this doesn't work because this function applies g5 to a vector, which
-    // // doesn't work for coarse vectors atm -> FIXME
-    // coarseTVA(CoarseMdagMOp, CoarseAggregates.subspace);
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Testing the linear operators" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    // clang-format off
-    testLinearOperator(CoarseMdagMOp,       coarseGrids.Grids[0], "CoarseMdagMOp");
-    testLinearOperator(CoarseCoarseMdagMOp, coarseGrids.Grids[1], "CoarseCoarseMdagMOp");
-    // clang-format on
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Building coarse coarse vectors" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    CoarseCoarseVector coarseCoarseSource(coarseGrids.Grids[1]);
-    CoarseCoarseVector coarseCoarseResult(coarseGrids.Grids[1]);
-    gaussian(coarseGrids.PRNGs[1], coarseCoarseSource);
-    coarseCoarseResult = zero;
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Building some coarse space solvers" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    std::vector<std::unique_ptr<OperatorFunction<CoarseCoarseVector>>> dummyCoarseCoarseSolvers;
-    dummyCoarseCoarseSolvers.emplace_back(new GeneralisedMinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 8, false));
-    dummyCoarseCoarseSolvers.emplace_back(new MinimalResidual<CoarseCoarseVector>(5.0e-2, 100, 0.8, false));
-    dummyCoarseCoarseSolvers.emplace_back(new ConjugateGradient<CoarseCoarseVector>(5.0e-2, 100, false));
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Testing some coarse coarse space solvers" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    std::cout << GridLogMessage << "checking norm of coarse coarse src " << norm2(coarseCoarseSource) << std::endl;
-
-    for(auto const &solver : dummyCoarseCoarseSolvers) {
-      coarseCoarseResult = zero;
-      (*solver)(CoarseCoarseMdagMOp, coarseCoarseSource, coarseCoarseResult);
-    }
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Building a multigrid preconditioner" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    CoarseMGPreconditioner      CoarseMGPrecon(CoarseAggregates, Dcc, CoarseMdagMOp, Dc, CoarseMdagMOp, Dc);
-    CoarseTrivialPreconditioner CoarseSimplePrecon;
-
-    CoarseMGPrecon.runChecks(coarseGrids, 1);
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Building krylov subspace solvers w/ & w/o MG Preconditioner" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    std::vector<std::unique_ptr<OperatorFunction<CoarseVector>>> solvers;
-    solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<CoarseVector>(1.0e-12, 4000000, CoarseSimplePrecon, 25, false));
-    solvers.emplace_back(new FlexibleGeneralisedMinimalResidual<CoarseVector>(1.0e-12, 100, CoarseMGPrecon, 25, false));
-    solvers.emplace_back(new PrecGeneralisedConjugateResidual<CoarseVector>(1.0e-12, 4000000, CoarseSimplePrecon, 25, 25));
-
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-    // std::cout << GridLogMessage << "Testing the (un)?preconditioned solvers" << std::endl;
-    // std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-    for(auto const &solver : solvers) {
-      std::cout << GridLogMessage << "checking norm of fine src " << norm2(coarseSource) << std::endl;
-      coarseResult = zero;
-      (*solver)(CoarseMdagMOp, coarseSource, coarseResult);
-      std::cout << std::endl;
-    }
+  solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 1000, false));
+  solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TwoLevelMGPreconDwc, 1000, false));
+  // solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, ThreeLevelMGPreconDwc, 1000, false));
+  // solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, FourLevelMGPreconDwc, 1000, false));
+  // solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, NLevelMGPreconDwc, 1000, false));
 
+  for(auto const &solver : solversDwc) {
+    std::cout << "Starting with a new solver" << std::endl;
+    result = zero;
+    (*solver)(MdagMOpDwc, src, result);
+    std::cout << std::endl;
   }
-#endif
 
   Grid_finalize();
 }

From 683a7d2ddd69337518be26991d1f92fb614402df Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 26 Mar 2018 14:59:40 +0200
Subject: [PATCH 099/130] WilsonMG: Move comment to make clang-format happy

---
 tests/solver/Test_wilsonclover_mg.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 70dc31e0..294029c9 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -287,9 +287,8 @@ public:
     static_assert((nBasis & 0x1) == 0, "MG Preconditioner only supports an even number of basis vectors");
     int nb = nBasis / 2;
 
-    for(
-      int n = 0; n < nb;
-      n++) { // TODO: to get this to work for more than two levels, I would need to either implement coarse spins or have a template specialization of this class also for the finest level
+    // TODO: to get this to work for more than two levels, I would need to either implement coarse spins or have a template specialization of this class also for the finest level
+    for(int n = 0; n < nb; n++) {
       _Aggregates.subspace[n + nb] = g5 * _Aggregates.subspace[n];
     }
 

From 63ba33371f16d279d1f3446f7e3a2e9670e9af49 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 26 Mar 2018 15:34:53 +0200
Subject: [PATCH 100/130] WilsonMG: Some minor refactoring

---
 tests/solver/Test_wilsonclover_mg.cc | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 294029c9..f0f0f0ef 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -70,14 +70,14 @@ public:
 };
 
 // clang-format off
-struct MultigridParams : Serializable {
+struct MultiGridParams : Serializable {
 public:
-  GRID_SERIALIZABLE_CLASS_MEMBERS(MultigridParams,
+  GRID_SERIALIZABLE_CLASS_MEMBERS(MultiGridParams,
                                   int, nLevels,
                                   std::vector<std::vector<int>>, blockSizes);
-  MultigridParams(){};
+  MultiGridParams(){};
 };
-MultigridParams mgParams;
+MultiGridParams mgParams;
 // clang-format on
 
 struct LevelInfo {
@@ -86,11 +86,11 @@ public:
   std::vector<GridCartesian *>  Grids;
   std::vector<GridParallelRNG>  PRNGs;
 
-  LevelInfo(GridCartesian *FineGrid, MultigridParams const &Params) {
+  LevelInfo(GridCartesian *FineGrid, MultiGridParams const &mgParams) {
 
-    auto nCoarseLevels = Params.blockSizes.size();
+    auto nCoarseLevels = mgParams.blockSizes.size();
 
-    assert(nCoarseLevels == Params.nLevels - 1);
+    assert(nCoarseLevels == mgParams.nLevels - 1);
 
     // set up values for finest grid
     Grids.push_back(FineGrid);
@@ -99,7 +99,7 @@ public:
     PRNGs.back().SeedFixedIntegers(Seeds.back());
 
     // set up values for coarser grids
-    for(int level = 1; level < Params.nLevels; ++level) {
+    for(int level = 1; level < mgParams.nLevels; ++level) {
       auto Nd  = Grids[level - 1]->_ndimension;
       auto tmp = Grids[level - 1]->_fdimensions;
       assert(tmp.size() == Nd);
@@ -107,7 +107,7 @@ public:
       Seeds.push_back(std::vector<int>(Nd));
 
       for(int d = 0; d < Nd; ++d) {
-        tmp[d] /= Params.blockSizes[level - 1][d];
+        tmp[d] /= mgParams.blockSizes[level - 1][d];
         Seeds[level][d] = (level)*Nd + d + 1;
       }
 
@@ -117,7 +117,7 @@ public:
       PRNGs[level].SeedFixedIntegers(Seeds[level]);
     }
 
-    std::cout << GridLogMessage << "Constructed " << Params.nLevels << " levels" << std::endl;
+    std::cout << GridLogMessage << "Constructed " << mgParams.nLevels << " levels" << std::endl;
 
     // The construction above corresponds to the finest level having level == 0
     // (simply because it's not as ugly to implement), but we need it the
@@ -128,7 +128,7 @@ public:
     std::reverse(Grids.begin(), Grids.end());
     std::reverse(PRNGs.begin(), PRNGs.end());
 
-    for(int level = 0; level < Params.nLevels; ++level) {
+    for(int level = 0; level < mgParams.nLevels; ++level) {
       std::cout << GridLogMessage << "level = " << level << ":" << std::endl;
       Grids[level]->show_decomposition();
     }

From 08543b6b11a6ad16f6d58b9d649b8686a82ce412 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 26 Mar 2018 15:37:17 +0200
Subject: [PATCH 101/130] WilsonMG: Provide a switch between V- and K-cycle

---
 tests/solver/Test_wilsonclover_mg.cc | 36 +++++++++++++++++-----------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index f0f0f0ef..d17f2704 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -74,7 +74,8 @@ struct MultiGridParams : Serializable {
 public:
   GRID_SERIALIZABLE_CLASS_MEMBERS(MultiGridParams,
                                   int, nLevels,
-                                  std::vector<std::vector<int>>, blockSizes);
+                                  std::vector<std::vector<int>>, blockSizes,
+                                  bool, kCycle);
   MultiGridParams(){};
 };
 MultiGridParams mgParams;
@@ -253,6 +254,7 @@ public:
   // Member Data
   /////////////////////////////////////////////
 
+  MultiGridParams &                        _MultiGridParams;
   LevelInfo &                              _LevelInfo;
   FineMatrix &                             _FineMatrix;
   FineMatrix &                             _SmootherMatrix;
@@ -264,14 +266,15 @@ public:
   // Member Functions
   /////////////////////////////////////////////
 
-  MultiGridPreconditioner(LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
-    : _LevelInfo(LvlInfo)
+  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
+    : _MultiGridParams(mgParams)
+    , _LevelInfo(LvlInfo)
     , _FineMatrix(FineMat)
     , _SmootherMatrix(SmootherMat)
     , _Aggregates(_LevelInfo.Grids[level - 1], _LevelInfo.Grids[level], 0)
     , _CoarseMatrix(*_LevelInfo.Grids[level - 1]) {
     _NextPreconditionerLevel
-      = std::unique_ptr<NextPreconditionerLevel>(new NextPreconditionerLevel(_LevelInfo, _CoarseMatrix, _CoarseMatrix));
+      = std::unique_ptr<NextPreconditionerLevel>(new NextPreconditionerLevel(_MultiGridParams, _LevelInfo, _CoarseMatrix, _CoarseMatrix));
   }
 
   void setup() {
@@ -299,9 +302,11 @@ public:
 
   virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
 
-    // TODO: implement a W-cycle and a toggle to switch between the cycling strategies
-    vCycle(in, out);
-    // kCycle(in, out);
+    // TODO: implement a W-cycle
+    if(_MultiGridParams.kCycle)
+      kCycle(in, out);
+    else
+      vCycle(in, out);
   }
 
   void vCycle(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
@@ -504,16 +509,17 @@ public:
   // Member Data
   /////////////////////////////////////////////
 
-  LevelInfo & _LevelInfo;
-  FineMatrix &_FineMatrix;
-  FineMatrix &_SmootherMatrix;
+  MultiGridParams &_MultiGridParams;
+  LevelInfo &      _LevelInfo;
+  FineMatrix &     _FineMatrix;
+  FineMatrix &     _SmootherMatrix;
 
   /////////////////////////////////////////////
   // Member Functions
   /////////////////////////////////////////////
 
-  MultiGridPreconditioner(LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
-    : _LevelInfo(LvlInfo), _FineMatrix(FineMat), _SmootherMatrix(SmootherMat) {}
+  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
+    : _MultiGridParams(mgParams), _LevelInfo(LvlInfo), _FineMatrix(FineMat), _SmootherMatrix(SmootherMat) {}
 
   void setup() {}
 
@@ -577,6 +583,7 @@ int main(int argc, char **argv) {
   // mgParams.blockSizes = {{2, 2, 2, 2}, {2, 2, 1, 1}};
   mgParams.blockSizes = {{2, 2, 2, 2}};
   mgParams.nLevels    = mgParams.blockSizes.size() + 1;
+  mgParams.kCycle     = true;
 
   std::cout << mgParams << std::endl;
 
@@ -593,7 +600,7 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
   TrivialPrecon<LatticeFermion>                                                     TrivialPrecon;
-  TwoLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> TwoLevelMGPreconDw(levelInfo, Dw, Dw);
+  TwoLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> TwoLevelMGPreconDw(mgParams, levelInfo, Dw, Dw);
   // ThreeLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> ThreeLevelMGPreconDw(levelInfo, Dw, Dw);
   // FourLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> FourLevelMGPreconDw(levelInfo, Dw, Dw);
   // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 4, WilsonFermionR> NLevelMGPreconDw(levelInfo, Dw, Dw);
@@ -629,7 +636,8 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Testing Multigrid for Wilson Clover" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  TwoLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonCloverFermionR> TwoLevelMGPreconDwc(levelInfo, Dwc, Dwc);
+  TwoLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonCloverFermionR> TwoLevelMGPreconDwc(
+    mgParams, levelInfo, Dwc, Dwc);
   // ThreeLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonCloverFermionR> ThreeLevelMGPreconDwc(levelInfo, Dwc, Dwc);
   // FourLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonCloverFermionR> FourLevelMGPreconDwc(levelInfo, Dwc, Dwc);
   // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 4, WilsonCloverFermionR> NLevelMGPreconDwc(levelInfo, Dwc, Dwc);

From b78456bdf43649a8f1c99cf327d85190b6d0f76d Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 26 Mar 2018 15:41:53 +0200
Subject: [PATCH 102/130] WilsonMG: Get rid of explicit include of GCR header

---
 tests/solver/Test_wilsonclover_mg.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index d17f2704..f977342d 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -27,7 +27,6 @@
 /*  END LEGAL */
 
 #include <Grid/Grid.h>
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 
 using namespace std;
 using namespace Grid;

From 99107038f93a0298974f500db8e6a34c2cd0a6bf Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 27 Mar 2018 17:06:33 +0200
Subject: [PATCH 103/130] WilsonMG: Rationalize the level counting strategy

---
 tests/solver/Test_wilsonclover_mg.cc | 101 +++++++++++++--------------
 1 file changed, 49 insertions(+), 52 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index f977342d..f60a4bca 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -119,15 +119,6 @@ public:
 
     std::cout << GridLogMessage << "Constructed " << mgParams.nLevels << " levels" << std::endl;
 
-    // The construction above corresponds to the finest level having level == 0
-    // (simply because it's not as ugly to implement), but we need it the
-    // other way round (i.e., the coarsest level to have level == 0) for the MG
-    // Preconditioner -> reverse the vectors
-
-    std::reverse(Seeds.begin(), Seeds.end());
-    std::reverse(Grids.begin(), Grids.end());
-    std::reverse(PRNGs.begin(), PRNGs.end());
-
     for(int level = 0; level < mgParams.nLevels; ++level) {
       std::cout << GridLogMessage << "level = " << level << ":" << std::endl;
       Grids[level]->show_decomposition();
@@ -234,25 +225,29 @@ template<class Field> void testLinearOperator(LinearOperatorBase<Field> &LinOp,
   }
 }
 
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nBasis, int level, class Matrix>
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nBasis, int nCoarserLevels, class Matrix>
 class MultiGridPreconditioner : public LinearFunction<Lattice<Fobj>> {
 public:
   /////////////////////////////////////////////
   // Type Definitions
   /////////////////////////////////////////////
 
-  typedef Aggregation<Fobj, CoarseScalar, nBasis>                                                                Aggregates;
-  typedef CoarsenedMatrix<Fobj, CoarseScalar, nBasis>                                                            CoarseMatrix;
-  typedef typename Aggregates::CoarseVector                                                                      CoarseVector;
-  typedef typename Aggregates::siteVector                                                                        CoarseSiteVector;
-  typedef Matrix                                                                                                 FineMatrix;
-  typedef typename Aggregates::FineField                                                                         FineVector;
-  typedef MultiGridPreconditioner<CoarseSiteVector, CoarseScalar, nCoarseSpins, nBasis, level - 1, CoarseMatrix> NextPreconditionerLevel;
+  // clang-format off
+  typedef Aggregation<Fobj, CoarseScalar, nBasis>                                                                         Aggregates;
+  typedef CoarsenedMatrix<Fobj, CoarseScalar, nBasis>                                                                     CoarseMatrix;
+  typedef typename Aggregates::CoarseVector                                                                               CoarseVector;
+  typedef typename Aggregates::siteVector                                                                                 CoarseSiteVector;
+  typedef Matrix                                                                                                          FineMatrix;
+  typedef typename Aggregates::FineField                                                                                  FineVector;
+  typedef MultiGridPreconditioner<CoarseSiteVector, CoarseScalar, nCoarseSpins, nBasis, nCoarserLevels - 1, CoarseMatrix> NextPreconditionerLevel;
+  // clang-format on
 
   /////////////////////////////////////////////
   // Member Data
   /////////////////////////////////////////////
 
+  int                                      _CurrentLevel;
+  int                                      _NextCoarserLevel;
   MultiGridParams &                        _MultiGridParams;
   LevelInfo &                              _LevelInfo;
   FineMatrix &                             _FineMatrix;
@@ -266,12 +261,14 @@ public:
   /////////////////////////////////////////////
 
   MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
-    : _MultiGridParams(mgParams)
+    : _CurrentLevel(mgParams.nLevels - (nCoarserLevels + 1)) // _Level = 0 corresponds to finest
+    , _NextCoarserLevel(_CurrentLevel + 1)                   // incremented for instances on coarser levels
+    , _MultiGridParams(mgParams)
     , _LevelInfo(LvlInfo)
     , _FineMatrix(FineMat)
     , _SmootherMatrix(SmootherMat)
-    , _Aggregates(_LevelInfo.Grids[level - 1], _LevelInfo.Grids[level], 0)
-    , _CoarseMatrix(*_LevelInfo.Grids[level - 1]) {
+    , _Aggregates(_LevelInfo.Grids[_NextCoarserLevel], _LevelInfo.Grids[_CurrentLevel], 0)
+    , _CoarseMatrix(*_LevelInfo.Grids[_NextCoarserLevel]) {
     _NextPreconditionerLevel
       = std::unique_ptr<NextPreconditionerLevel>(new NextPreconditionerLevel(_MultiGridParams, _LevelInfo, _CoarseMatrix, _CoarseMatrix));
   }
@@ -281,7 +278,8 @@ public:
     Gamma                                       g5(Gamma::Algebra::Gamma5);
     MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
 
-    _Aggregates.CreateSubspace(_LevelInfo.PRNGs[level], fineMdagMOp /*, nb */); // NOTE: Don't specify nb to see the orthogonalization check
+    // NOTE: Don't specify nb here to see the orthogonalization check
+    _Aggregates.CreateSubspace(_LevelInfo.PRNGs[_CurrentLevel], fineMdagMOp /*, nb */);
 
     // TestVectorAnalyzer<FineVector, nbasis> fineTVA;
     // fineTVA(fineMdagMOp, _Aggregates.subspace);
@@ -294,7 +292,7 @@ public:
       _Aggregates.subspace[n + nb] = g5 * _Aggregates.subspace[n];
     }
 
-    _CoarseMatrix.CoarsenOperator(_LevelInfo.Grids[level], fineMdagMOp, _Aggregates);
+    _CoarseMatrix.CoarsenOperator(_LevelInfo.Grids[_CurrentLevel], fineMdagMOp, _Aggregates);
 
     _NextPreconditionerLevel->setup();
   }
@@ -312,8 +310,8 @@ public:
 
     RealD inputNorm = norm2(in);
 
-    CoarseVector coarseSrc(_LevelInfo.Grids[level - 1]);
-    CoarseVector coarseSol(_LevelInfo.Grids[level - 1]);
+    CoarseVector coarseSrc(_LevelInfo.Grids[_NextCoarserLevel]);
+    CoarseVector coarseSol(_LevelInfo.Grids[_NextCoarserLevel]);
     coarseSol = zero;
 
     FineVector fineTmp(in._grid);
@@ -340,7 +338,7 @@ public:
     r                              = norm2(fineTmp);
     auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
 
-    std::cout << GridLogMG << " Level " << level << ": V-cycle: Input norm = " << std::sqrt(inputNorm)
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": V-cycle: Input norm = " << std::sqrt(inputNorm)
               << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
               << std::endl;
   }
@@ -349,8 +347,8 @@ public:
 
     RealD inputNorm = norm2(in);
 
-    CoarseVector coarseSrc(_LevelInfo.Grids[level - 1]);
-    CoarseVector coarseSol(_LevelInfo.Grids[level - 1]);
+    CoarseVector coarseSrc(_LevelInfo.Grids[_NextCoarserLevel]);
+    CoarseVector coarseSol(_LevelInfo.Grids[_NextCoarserLevel]);
     coarseSol = zero;
 
     FineVector fineTmp(in._grid);
@@ -379,25 +377,24 @@ public:
     r                              = norm2(fineTmp);
     auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
 
-    std::cout << GridLogMG << " Level " << level << ": K-cycle: Input norm = " << std::sqrt(inputNorm)
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": K-cycle: Input norm = " << std::sqrt(inputNorm)
               << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
               << std::endl;
   }
 
   void runChecks() {
 
-    auto tolerance   = 1e-13; // TODO: this obviously depends on the precision we use, current value is for double
-    auto coarseLevel = level - 1;
+    auto tolerance = 1e-13; // TODO: this obviously depends on the precision we use, current value is for double
 
-    std::vector<FineVector>   fineTmps(2, _LevelInfo.Grids[level]);
-    std::vector<CoarseVector> coarseTmps(4, _LevelInfo.Grids[level - 1]);
+    std::vector<FineVector>   fineTmps(2, _LevelInfo.Grids[_CurrentLevel]);
+    std::vector<CoarseVector> coarseTmps(4, _LevelInfo.Grids[_NextCoarserLevel]);
 
     MdagMLinearOperator<FineMatrix, FineVector>     fineMdagMOp(_FineMatrix);
     MdagMLinearOperator<CoarseMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
 
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == (1 - P R) v" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (1 - P R) v" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
 
     for(auto i = 0; i < _Aggregates.subspace.size(); ++i) {
       _Aggregates.ProjectToSubspace(coarseTmps[0], _Aggregates.subspace[i]); //   R v_i
@@ -406,7 +403,7 @@ public:
       fineTmps[1]    = _Aggregates.subspace[i] - fineTmps[0]; // v_i - P R v_i
       auto deviation = std::sqrt(norm2(fineTmps[1]) / norm2(_Aggregates.subspace[i]));
 
-      std::cout << GridLogMG << " Level " << level << ": Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
+      std::cout << GridLogMG << " Level " << _CurrentLevel << ": Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
                 << " | norm2(R v_i) = " << norm2(coarseTmps[0]) << " | norm2(P R v_i) = " << norm2(fineTmps[0])
                 << " | relative deviation = " << deviation;
 
@@ -418,11 +415,11 @@ public:
       }
     }
 
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == (1 - R P) v_c" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (1 - R P) v_c" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
 
-    random(_LevelInfo.PRNGs[coarseLevel], coarseTmps[0]);
+    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
 
     _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //   P v_c
     _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[0]);   // R P v_c
@@ -430,7 +427,7 @@ public:
     coarseTmps[2]  = coarseTmps[0] - coarseTmps[1]; // v_c - R P v_c
     auto deviation = std::sqrt(norm2(coarseTmps[2]) / norm2(coarseTmps[0]));
 
-    std::cout << GridLogMG << " Level " << level << ": norm2(v_c) = " << norm2(coarseTmps[0])
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(v_c) = " << norm2(coarseTmps[0])
               << " | norm2(R P v_c) = " << norm2(coarseTmps[1]) << " | norm2(P v_c) = " << norm2(fineTmps[0])
               << " | relative deviation = " << deviation;
 
@@ -441,11 +438,11 @@ public:
       std::cout << " < " << tolerance << " -> check passed" << std::endl;
     }
 
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
 
-    random(_LevelInfo.PRNGs[coarseLevel], coarseTmps[0]);
+    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
 
     _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //     P v_c
     fineMdagMOp.Op(fineTmps[0], fineTmps[1]);                    //   D P v_c
@@ -456,7 +453,7 @@ public:
     coarseTmps[3] = coarseTmps[1] - coarseTmps[2]; // R D P v_c - D_c v_c
     deviation     = std::sqrt(norm2(coarseTmps[3]) / norm2(coarseTmps[1]));
 
-    std::cout << GridLogMG << " Level " << level << ": norm2(R D P v_c) = " << norm2(coarseTmps[1])
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(R D P v_c) = " << norm2(coarseTmps[1])
               << " | norm2(D_c v_c) = " << norm2(coarseTmps[2]) << " | relative deviation = " << deviation;
 
     if(deviation > tolerance) {
@@ -466,11 +463,11 @@ public:
       std::cout << " < " << tolerance << " -> check passed" << std::endl;
     }
 
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
-    std::cout << GridLogMG << " Level " << level << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
 
-    random(_LevelInfo.PRNGs[coarseLevel], coarseTmps[0]);
+    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
 
     coarseMdagMOp.Op(coarseTmps[0], coarseTmps[1]);    //         D_c v_c
     coarseMdagMOp.AdjOp(coarseTmps[1], coarseTmps[2]); // D_c^dag D_c v_c
@@ -478,7 +475,7 @@ public:
     auto dot  = innerProduct(coarseTmps[0], coarseTmps[2]); //v_c^dag D_c^dag D_c v_c
     deviation = abs(imag(dot)) / abs(real(dot));
 
-    std::cout << GridLogMG << " Level " << level << ": Re(v_c^dag D_c^dag D_c v_c) = " << real(dot)
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Re(v_c^dag D_c^dag D_c v_c) = " << real(dot)
               << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot) << " | relative deviation = " << deviation;
 
     if(deviation > tolerance) {
@@ -493,7 +490,7 @@ public:
   }
 };
 
-// Specialize the coarsest level, this corresponds to counting downwards with level: coarsest = 0, finest = N
+// Specialization for the coarsest level
 template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
 class MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 0, Matrix> : public LinearFunction<Lattice<Fobj>> {
 public:

From 04f9cf088dd59644179697cd2253ed25c4fc1b0b Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 27 Mar 2018 17:13:11 +0200
Subject: [PATCH 104/130] WilsonMG: Add more parameters to MultiGridParams
 struct

---
 tests/solver/Test_wilsonclover_mg.cc | 156 +++++++++++++++++++--------
 1 file changed, 114 insertions(+), 42 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index f60a4bca..956e68db 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -68,18 +68,44 @@ public:
   }
 };
 
+// TODO: Can think about having one parameter struct per level and then a
+// vector of these structs. How well would that work together with the
+// serialization strategy of Grid?
+
 // clang-format off
 struct MultiGridParams : Serializable {
 public:
   GRID_SERIALIZABLE_CLASS_MEMBERS(MultiGridParams,
-                                  int, nLevels,
-                                  std::vector<std::vector<int>>, blockSizes,
-                                  bool, kCycle);
+                                  int,                           nLevels,
+                                  std::vector<std::vector<int>>, blockSizes,           // size == nLevels - 1
+                                  std::vector<double>,           smootherTol,          // size == nLevels - 1
+                                  std::vector<int>,              smootherMaxOuterIter, // size == nLevels - 1
+                                  std::vector<int>,              smootherMaxInnerIter, // size == nLevels - 1
+                                  bool,                          kCycle,
+                                  std::vector<double>,           kCycleTol,            // size == nLevels - 1
+                                  std::vector<int>,              kCycleMaxOuterIter,   // size == nLevels - 1
+                                  std::vector<int>,              kCycleMaxInnerIter,   // size == nLevels - 1
+                                  double,                        coarseSolverTol,
+                                  int,                           coarseSolverMaxOuterIter,
+                                  int,                           coarseSolverMaxInnerIter);
   MultiGridParams(){};
 };
 MultiGridParams mgParams;
 // clang-format on
 
+void checkParameterValidity(MultiGridParams const &params) {
+
+  auto correctSize = mgParams.nLevels - 1;
+
+  assert(correctSize == params.blockSizes.size());
+  assert(correctSize == params.smootherTol.size());
+  assert(correctSize == params.smootherMaxOuterIter.size());
+  assert(correctSize == params.smootherMaxInnerIter.size());
+  assert(correctSize == params.kCycleTol.size());
+  assert(correctSize == params.kCycleMaxOuterIter.size());
+  assert(correctSize == params.kCycleMaxInnerIter.size());
+}
+
 struct LevelInfo {
 public:
   std::vector<std::vector<int>> Seeds;
@@ -316,8 +342,14 @@ public:
 
     FineVector fineTmp(in._grid);
 
+    auto maxSmootherIter = _MultiGridParams.smootherMaxOuterIter[_CurrentLevel] * _MultiGridParams.smootherMaxInnerIter[_CurrentLevel];
+
     TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
-    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(1.0e-14, 1, fineTrivialPreconditioner, 1, false);
+    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(_MultiGridParams.smootherTol[_CurrentLevel],
+                                                              maxSmootherIter,
+                                                              fineTrivialPreconditioner,
+                                                              _MultiGridParams.smootherMaxInnerIter[_CurrentLevel],
+                                                              false);
 
     MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
     MdagMLinearOperator<FineMatrix, FineVector> fineSmootherMdagMOp(_SmootherMatrix);
@@ -353,9 +385,20 @@ public:
 
     FineVector fineTmp(in._grid);
 
+    auto smootherMaxIter = _MultiGridParams.smootherMaxOuterIter[_CurrentLevel] * _MultiGridParams.smootherMaxInnerIter[_CurrentLevel];
+    auto kCycleMaxIter   = _MultiGridParams.kCycleMaxOuterIter[_CurrentLevel] * _MultiGridParams.kCycleMaxInnerIter[_CurrentLevel];
+
     TrivialPrecon<FineVector>                        fineTrivialPreconditioner;
-    FlexibleGeneralisedMinimalResidual<FineVector>   fineFGMRES(1.0e-14, 1, fineTrivialPreconditioner, 1, false);
-    FlexibleGeneralisedMinimalResidual<CoarseVector> coarseFGMRES(1.0e-14, 1, *_NextPreconditionerLevel, 1, false);
+    FlexibleGeneralisedMinimalResidual<FineVector>   fineFGMRES(_MultiGridParams.smootherTol[_CurrentLevel],
+                                                              smootherMaxIter,
+                                                              fineTrivialPreconditioner,
+                                                              _MultiGridParams.smootherMaxInnerIter[_CurrentLevel],
+                                                              false);
+    FlexibleGeneralisedMinimalResidual<CoarseVector> coarseFGMRES(_MultiGridParams.kCycleTol[_CurrentLevel],
+                                                                  kCycleMaxIter,
+                                                                  *_NextPreconditionerLevel,
+                                                                  _MultiGridParams.kCycleMaxInnerIter[_CurrentLevel],
+                                                                  false);
 
     MdagMLinearOperator<FineMatrix, FineVector>     fineMdagMOp(_FineMatrix);
     MdagMLinearOperator<FineMatrix, FineVector>     fineSmootherMdagMOp(_SmootherMatrix);
@@ -521,8 +564,12 @@ public:
 
   virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
 
+    auto coarseSolverMaxIter = _MultiGridParams.coarseSolverMaxOuterIter * _MultiGridParams.coarseSolverMaxInnerIter;
+
+    // On the coarsest level we only have a fine what I above call the fine level, no coarse one
     TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
-    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(1.0e-14, 1, fineTrivialPreconditioner, 1, false);
+    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(
+      _MultiGridParams.coarseSolverTol, coarseSolverMaxIter, fineTrivialPreconditioner, _MultiGridParams.coarseSolverMaxInnerIter, false);
 
     MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
 
@@ -532,17 +579,8 @@ public:
   void runChecks() {}
 };
 
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
-using FourLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 4 - 1, Matrix>;
-
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
-using ThreeLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 3 - 1, Matrix>;
-
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
-using TwoLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 2 - 1, Matrix>;
-
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, int nlevel, class Matrix>
-using NLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, nlevel - 1, Matrix>;
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, int nLevels, class Matrix>
+using NLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, nLevels - 1, Matrix>;
 
 int main(int argc, char **argv) {
 
@@ -575,11 +613,49 @@ int main(int argc, char **argv) {
   WilsonFermionR       Dw(Umu, *FGrid, *FrbGrid, mass);
   WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t, wilsonAnisCoeff, wcImplparams);
 
-  // mgParams.blockSizes = {{2, 2, 2, 2}, {2, 2, 1, 1}, {1, 1, 2, 1}};
-  // mgParams.blockSizes = {{2, 2, 2, 2}, {2, 2, 1, 1}};
-  mgParams.blockSizes = {{2, 2, 2, 2}};
-  mgParams.nLevels    = mgParams.blockSizes.size() + 1;
-  mgParams.kCycle     = true;
+  // Params for two-level MG preconditioner
+  mgParams.nLevels                  = 2;
+  mgParams.blockSizes               = {{2, 2, 2, 2}};
+  mgParams.smootherTol              = {1e-14};
+  mgParams.smootherMaxOuterIter     = {1};
+  mgParams.smootherMaxInnerIter     = {1};
+  mgParams.kCycle                   = true;
+  mgParams.kCycleTol                = {1e-14};
+  mgParams.kCycleMaxOuterIter       = {1};
+  mgParams.kCycleMaxInnerIter       = {1};
+  mgParams.coarseSolverTol          = 1e-14;
+  mgParams.coarseSolverMaxOuterIter = 1;
+  mgParams.coarseSolverMaxInnerIter = 1;
+
+  // // Params for three-level MG preconditioner
+  // mgParams.nLevels                  = 3;
+  // mgParams.blockSizes               = {{2, 2, 2, 2}, {2, 2, 1, 1}};
+  // mgParams.smootherTol              = {1e-14, 1e-14};
+  // mgParams.smootherMaxOuterIter     = {1, 1};
+  // mgParams.smootherMaxInnerIter     = {1, 1};
+  // mgParams.kCycle                   = true;
+  // mgParams.kCycleTol                = {1e-14, 1e-14};
+  // mgParams.kCycleMaxOuterIter       = {1, 1};
+  // mgParams.kCycleMaxInnerIter       = {1, 1};
+  // mgParams.coarseSolverTol          = 1e-14;
+  // mgParams.coarseSolverMaxOuterIter = 1;
+  // mgParams.coarseSolverMaxInnerIter = 1;
+
+  // // // Params for four-level MG preconditioner
+  // mgParams.nLevels                  = 4;
+  // mgParams.blockSizes               = {{2, 2, 2, 2}, {2, 2, 1, 1}, {1, 1, 2, 1}};
+  // mgParams.smootherTol              = {1e-14, 1e-14, 1e-14};
+  // mgParams.smootherMaxOuterIter     = {1, 1, 1};
+  // mgParams.smootherMaxInnerIter     = {1, 1, 1};
+  // mgParams.kCycle                   = true;
+  // mgParams.kCycleTol                = {1e-14, 1e-14, 1e-14};
+  // mgParams.kCycleMaxOuterIter       = {1, 1, 1};
+  // mgParams.kCycleMaxInnerIter       = {1, 1, 1};
+  // mgParams.coarseSolverTol          = 1e-14;
+  // mgParams.coarseSolverMaxOuterIter = 1;
+  // mgParams.coarseSolverMaxInnerIter = 1;
+
+  checkParameterValidity(mgParams);
 
   std::cout << mgParams << std::endl;
 
@@ -595,11 +671,10 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Testing Multigrid for Wilson" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  TrivialPrecon<LatticeFermion>                                                     TrivialPrecon;
-  TwoLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> TwoLevelMGPreconDw(mgParams, levelInfo, Dw, Dw);
-  // ThreeLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> ThreeLevelMGPreconDw(levelInfo, Dw, Dw);
-  // FourLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR> FourLevelMGPreconDw(levelInfo, Dw, Dw);
-  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 4, WilsonFermionR> NLevelMGPreconDw(levelInfo, Dw, Dw);
+  TrivialPrecon<LatticeFermion>                                                      TrivialPrecon;
+  NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 2, WilsonFermionR> TwoLevelMGPreconDw(mgParams, levelInfo, Dw, Dw);
+  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 3, WilsonFermionR> ThreeLevelMGPreconDw(mgParams, levelInfo, Dw, Dw);
+  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 4, WilsonFermionR> FourLevelMGPreconDw(mgParams, levelInfo, Dw, Dw);
 
   TwoLevelMGPreconDw.setup();
   TwoLevelMGPreconDw.runChecks();
@@ -615,11 +690,10 @@ int main(int argc, char **argv) {
 
   std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDw;
 
-  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 1000, false));
-  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TwoLevelMGPreconDw, 1000, false));
-  // solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, ThreeLevelMGPreconDw, 1000, false));
-  // solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, FourLevelMGPreconDw, 1000, false));
-  // solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, NLevelMGPreconDw, 1000, false));
+  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 100, false));
+  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TwoLevelMGPreconDw, 100, false));
+  // solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, ThreeLevelMGPreconDw, 100, false));
+  // solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, FourLevelMGPreconDw, 100, false));
 
   for(auto const &solver : solversDw) {
     std::cout << "Starting with a new solver" << std::endl;
@@ -632,11 +706,10 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Testing Multigrid for Wilson Clover" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  TwoLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonCloverFermionR> TwoLevelMGPreconDwc(
+  NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 2, WilsonCloverFermionR> TwoLevelMGPreconDwc(
     mgParams, levelInfo, Dwc, Dwc);
-  // ThreeLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonCloverFermionR> ThreeLevelMGPreconDwc(levelInfo, Dwc, Dwc);
-  // FourLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, WilsonCloverFermionR> FourLevelMGPreconDwc(levelInfo, Dwc, Dwc);
-  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 4, WilsonCloverFermionR> NLevelMGPreconDwc(levelInfo, Dwc, Dwc);
+  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 3, WilsonCloverFermionR> ThreeLevelMGPreconDwc(mgParams, velInfo, Dwc, Dwc);
+  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 4, WilsonCloverFermionR> FourLevelMGPreconDwc(lelevelInfo, Dwc, Dwc);
 
   TwoLevelMGPreconDwc.setup();
   TwoLevelMGPreconDwc.runChecks();
@@ -652,11 +725,10 @@ int main(int argc, char **argv) {
 
   std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDwc;
 
-  solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 1000, false));
-  solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TwoLevelMGPreconDwc, 1000, false));
-  // solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, ThreeLevelMGPreconDwc, 1000, false));
-  // solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, FourLevelMGPreconDwc, 1000, false));
-  // solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, NLevelMGPreconDwc, 1000, false));
+  solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 100, false));
+  solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TwoLevelMGPreconDwc, 100, false));
+  // solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, ThreeLevelMGPreconDwc, 100, false));
+  // solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, FourLevelMGPreconDwc, 100, false));
 
   for(auto const &solver : solversDwc) {
     std::cout << "Starting with a new solver" << std::endl;

From 917a92118a00a0d8716651a1edc45c7f96b2ad91 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 28 Mar 2018 11:04:54 +0200
Subject: [PATCH 105/130] WilsonMG: Move operator test to MG testing routine

---
 tests/solver/Test_wilsonclover_mg.cc | 144 ++++++++-------------------
 1 file changed, 40 insertions(+), 104 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 956e68db..43332909 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -152,105 +152,6 @@ public:
   }
 };
 
-template<class Field> void testLinearOperator(LinearOperatorBase<Field> &LinOp, GridBase *Grid, std::string const &name = "") {
-
-  std::vector<int> seeds({1, 2, 3, 4});
-  GridParallelRNG  RNG(Grid);
-  RNG.SeedFixedIntegers(seeds);
-
-  {
-    std::cout << GridLogMessage << "Testing that Mdiag + Σ_μ Mdir_μ == M for operator " << name << ":" << std::endl;
-
-    // clang-format off
-    Field src(Grid);    random(RNG, src);
-    Field ref(Grid);    ref    = zero;
-    Field result(Grid); result = zero;
-    Field diag(Grid);   diag   = zero;
-    Field sumDir(Grid); sumDir = zero;
-    Field tmp(Grid);
-    Field err(Grid);
-    // clang-format on
-
-    std::cout << setprecision(9);
-
-    std::cout << GridLogMessage << " norm2(src)\t\t\t\t= " << norm2(src) << std::endl;
-
-    LinOp.OpDiag(src, diag);
-    std::cout << GridLogMessage << " norm2(Mdiag * src)\t\t\t= " << norm2(diag) << std::endl;
-
-    for(int dir = 0; dir < 4; dir++) {
-      for(auto disp : {+1, -1}) {
-        LinOp.OpDir(src, tmp, dir, disp);
-        std::cout << GridLogMessage << " norm2(Mdir_{" << dir << "," << disp << "} * src)\t\t= " << norm2(tmp) << std::endl;
-        sumDir = sumDir + tmp;
-      }
-    }
-    std::cout << GridLogMessage << " norm2(Σ_μ Mdir_μ * src)\t\t= " << norm2(sumDir) << std::endl;
-
-    result = diag + sumDir;
-    std::cout << GridLogMessage << " norm2((Mdiag + Σ_μ Mdir_μ) * src)\t= " << norm2(result) << std::endl;
-
-    LinOp.Op(src, ref);
-    std::cout << GridLogMessage << " norm2(M * src)\t\t\t= " << norm2(ref) << std::endl;
-
-    err = ref - result;
-    std::cout << GridLogMessage << " Absolute deviation\t\t\t= " << norm2(err) << std::endl;
-    std::cout << GridLogMessage << " Relative deviation\t\t\t= " << norm2(err) / norm2(ref) << std::endl;
-  }
-
-  {
-    std::cout << GridLogMessage << "Testing hermiticity stochastically for operator " << name << ":" << std::endl;
-
-    // clang-format off
-    Field phi(Grid); random(RNG, phi);
-    Field chi(Grid); random(RNG, chi);
-    Field MPhi(Grid);
-    Field MdagChi(Grid);
-    // clang-format on
-
-    LinOp.Op(phi, MPhi);
-    LinOp.AdjOp(chi, MdagChi);
-
-    ComplexD chiMPhi    = innerProduct(chi, MPhi);
-    ComplexD phiMdagChi = innerProduct(phi, MdagChi);
-
-    ComplexD phiMPhi    = innerProduct(phi, MPhi);
-    ComplexD chiMdagChi = innerProduct(chi, MdagChi);
-
-    std::cout << GridLogMessage << " chiMPhi = " << chiMPhi << " phiMdagChi = " << phiMdagChi
-              << " difference = " << chiMPhi - conjugate(phiMdagChi) << std::endl;
-
-    std::cout << GridLogMessage << " phiMPhi = " << phiMPhi << " chiMdagChi = " << chiMdagChi << " <- should be real if hermitian"
-              << std::endl;
-  }
-
-  {
-    std::cout << GridLogMessage << "Testing linearity for operator " << name << ":" << std::endl;
-
-    // clang-format off
-    Field phi(Grid); random(RNG, phi);
-    Field chi(Grid); random(RNG, chi);
-    Field phiPlusChi(Grid);
-    Field MPhi(Grid);
-    Field MChi(Grid);
-    Field MPhiPlusChi(Grid);
-    Field linearityError(Grid);
-    // clang-format on
-
-    LinOp.Op(phi, MPhi);
-    LinOp.Op(chi, MChi);
-
-    phiPlusChi = phi + chi;
-
-    LinOp.Op(phiPlusChi, MPhiPlusChi);
-
-    linearityError = MPhiPlusChi - MPhi;
-    linearityError = linearityError - MChi;
-
-    std::cout << GridLogMessage << " norm2(linearityError) = " << norm2(linearityError) << std::endl;
-  }
-}
-
 template<class Fobj, class CoarseScalar, int nCoarseSpins, int nBasis, int nCoarserLevels, class Matrix>
 class MultiGridPreconditioner : public LinearFunction<Lattice<Fobj>> {
 public:
@@ -429,12 +330,47 @@ public:
 
     auto tolerance = 1e-13; // TODO: this obviously depends on the precision we use, current value is for double
 
-    std::vector<FineVector>   fineTmps(2, _LevelInfo.Grids[_CurrentLevel]);
+    std::vector<FineVector>   fineTmps(7, _LevelInfo.Grids[_CurrentLevel]);
     std::vector<CoarseVector> coarseTmps(4, _LevelInfo.Grids[_NextCoarserLevel]);
 
     MdagMLinearOperator<FineMatrix, FineVector>     fineMdagMOp(_FineMatrix);
     MdagMLinearOperator<CoarseMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
 
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (M - (Mdiag + Σ_μ Mdir_μ)) * v" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+
+    random(_LevelInfo.PRNGs[_CurrentLevel], fineTmps[0]);
+
+    fineMdagMOp.Op(fineTmps[0], fineTmps[1]);     //     M * v
+    fineMdagMOp.OpDiag(fineTmps[0], fineTmps[2]); // Mdiag * v
+
+    fineTmps[4] = zero;
+    for(int dir = 0; dir < 4; dir++) { //       Σ_μ Mdir_μ * v
+      for(auto disp : {+1, -1}) {
+        fineMdagMOp.OpDir(fineTmps[0], fineTmps[3], dir, disp);
+        fineTmps[4] = fineTmps[4] + fineTmps[3];
+      }
+    }
+
+    fineTmps[5] = fineTmps[2] + fineTmps[4]; // (Mdiag + Σ_μ Mdir_μ) * v
+
+    fineTmps[6]    = fineTmps[1] - fineTmps[5];
+    auto deviation = std::sqrt(norm2(fineTmps[6]) / norm2(fineTmps[1]));
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(M * v)                    = " << norm2(fineTmps[1]) << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(Mdiag * v)                = " << norm2(fineTmps[2]) << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(Σ_μ Mdir_μ * v)           = " << norm2(fineTmps[4]) << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2((Mdiag + Σ_μ Mdir_μ) * v) = " << norm2(fineTmps[5]) << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": relative deviation              = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
     std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
     std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (1 - P R) v" << std::endl;
     std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
@@ -443,8 +379,8 @@ public:
       _Aggregates.ProjectToSubspace(coarseTmps[0], _Aggregates.subspace[i]); //   R v_i
       _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]);           // P R v_i
 
-      fineTmps[1]    = _Aggregates.subspace[i] - fineTmps[0]; // v_i - P R v_i
-      auto deviation = std::sqrt(norm2(fineTmps[1]) / norm2(_Aggregates.subspace[i]));
+      fineTmps[1] = _Aggregates.subspace[i] - fineTmps[0]; // v_i - P R v_i
+      deviation   = std::sqrt(norm2(fineTmps[1]) / norm2(_Aggregates.subspace[i]));
 
       std::cout << GridLogMG << " Level " << _CurrentLevel << ": Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
                 << " | norm2(R v_i) = " << norm2(coarseTmps[0]) << " | norm2(P R v_i) = " << norm2(fineTmps[0])
@@ -467,8 +403,8 @@ public:
     _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //   P v_c
     _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[0]);   // R P v_c
 
-    coarseTmps[2]  = coarseTmps[0] - coarseTmps[1]; // v_c - R P v_c
-    auto deviation = std::sqrt(norm2(coarseTmps[2]) / norm2(coarseTmps[0]));
+    coarseTmps[2] = coarseTmps[0] - coarseTmps[1]; // v_c - R P v_c
+    deviation     = std::sqrt(norm2(coarseTmps[2]) / norm2(coarseTmps[0]));
 
     std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(v_c) = " << norm2(coarseTmps[0])
               << " | norm2(R P v_c) = " << norm2(coarseTmps[1]) << " | norm2(P v_c) = " << norm2(fineTmps[0])

From 58c30c0cb17ea09dc6388b7ef147a6148d13d28d Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 28 Mar 2018 13:17:36 +0200
Subject: [PATCH 106/130] WilsonMG: Add conformability checks in MG
 preconditioner

---
 tests/solver/Test_wilsonclover_mg.cc | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 43332909..e1bd328d 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -226,6 +226,9 @@ public:
 
   virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
 
+    conformable(_LevelInfo.Grids[_CurrentLevel], in._grid);
+    conformable(in, out);
+
     // TODO: implement a W-cycle
     if(_MultiGridParams.kCycle)
       kCycle(in, out);
@@ -484,6 +487,7 @@ public:
   // Member Data
   /////////////////////////////////////////////
 
+  int              _CurrentLevel;
   MultiGridParams &_MultiGridParams;
   LevelInfo &      _LevelInfo;
   FineMatrix &     _FineMatrix;
@@ -494,12 +498,19 @@ public:
   /////////////////////////////////////////////
 
   MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
-    : _MultiGridParams(mgParams), _LevelInfo(LvlInfo), _FineMatrix(FineMat), _SmootherMatrix(SmootherMat) {}
+    : _CurrentLevel(mgParams.nLevels - (0 + 1))
+    , _MultiGridParams(mgParams)
+    , _LevelInfo(LvlInfo)
+    , _FineMatrix(FineMat)
+    , _SmootherMatrix(SmootherMat) {}
 
   void setup() {}
 
   virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
 
+    conformable(_LevelInfo.Grids[_CurrentLevel], in._grid);
+    conformable(in, out);
+
     auto coarseSolverMaxIter = _MultiGridParams.coarseSolverMaxOuterIter * _MultiGridParams.coarseSolverMaxInnerIter;
 
     // On the coarsest level we only have a fine what I above call the fine level, no coarse one

From 74f79c5ac70f1b8fee1c1e1952ac6fca724686d3 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Thu, 29 Mar 2018 12:03:50 +0200
Subject: [PATCH 107/130] Revert "Add function to return full type as
 std::string"

This reverts commit 1cb745c8dcdf1099e2c9b693e87f851f8cf0d845.
---
 lib/tensors/Tensor_traits.h | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/lib/tensors/Tensor_traits.h b/lib/tensors/Tensor_traits.h
index be0e550a..c1ef397a 100644
--- a/lib/tensors/Tensor_traits.h
+++ b/lib/tensors/Tensor_traits.h
@@ -23,7 +23,6 @@ Author: Christopher Kelly <ckelly@phys.columbia.edu>
 #define GRID_MATH_TRAITS_H
 
 #include <type_traits>
-#include <cxxabi.h>
 
 namespace Grid {
 
@@ -289,25 +288,6 @@ namespace Grid {
 
     enum { value = sizeof(real_scalar_type)/sizeof(float) };
   };
-
-  template<typename T> std::string getTypename() {
-
-    typedef typename std::remove_reference<T>::type TWoRef;
-
-    std::unique_ptr<char, void (*)(void *)> own(abi::__cxa_demangle(typeid(TWoRef).name(), nullptr, nullptr, nullptr), std::free);
-
-    std::string r = own != nullptr ? own.get() : typeid(TWoRef).name();
-
-    if(std::is_const<TWoRef>::value)
-      r += " const";
-    if(std::is_volatile<TWoRef>::value)
-      r += " volatile";
-    if(std::is_lvalue_reference<T>::value)
-      r += "&";
-    else if(std::is_rvalue_reference<T>::value)
-      r += "&&";
-    return r;
-  }
 }
 
 #endif

From 2530bfed01de828a19e195a14d5b0d5f8b4a3e2f Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 3 Apr 2018 14:50:48 +0200
Subject: [PATCH 108/130] WilsonMG: Move params instance from global scope to
 test main function

---
 tests/solver/Test_wilsonclover_mg.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index e1bd328d..fee8fa4c 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -90,12 +90,11 @@ public:
                                   int,                           coarseSolverMaxInnerIter);
   MultiGridParams(){};
 };
-MultiGridParams mgParams;
 // clang-format on
 
 void checkParameterValidity(MultiGridParams const &params) {
 
-  auto correctSize = mgParams.nLevels - 1;
+  auto correctSize = params.nLevels - 1;
 
   assert(correctSize == params.blockSizes.size());
   assert(correctSize == params.smootherTol.size());
@@ -533,6 +532,8 @@ int main(int argc, char **argv) {
 
   Grid_init(&argc, &argv);
 
+  MultiGridParams mgParams;
+
   typename WilsonCloverFermionR::ImplParams wcImplparams;
   WilsonAnisotropyCoefficients              wilsonAnisCoeff;
 

From ff6413a7644d99c7bad7b792c0f5bbf777802400 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 3 Apr 2018 15:57:33 +0200
Subject: [PATCH 109/130] WilsonMG: Make number of levels chooseable at runtime

I don't like this solution though :(
---
 tests/solver/Test_wilsonclover_mg.cc | 90 +++++++++++++++-------------
 1 file changed, 47 insertions(+), 43 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index fee8fa4c..e21aa5ab 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -151,8 +151,16 @@ public:
   }
 };
 
+template<class Field> class MultiGridPreconditionerBase : public LinearFunction<Field> {
+public:
+  virtual ~MultiGridPreconditionerBase()               = default;
+  virtual void setup()                                 = 0;
+  virtual void operator()(Field const &in, Field &out) = 0;
+  virtual void runChecks()                             = 0;
+};
+
 template<class Fobj, class CoarseScalar, int nCoarseSpins, int nBasis, int nCoarserLevels, class Matrix>
-class MultiGridPreconditioner : public LinearFunction<Lattice<Fobj>> {
+class MultiGridPreconditioner : public MultiGridPreconditionerBase<Lattice<Fobj>> {
 public:
   /////////////////////////////////////////////
   // Type Definitions
@@ -213,10 +221,10 @@ public:
     static_assert((nBasis & 0x1) == 0, "MG Preconditioner only supports an even number of basis vectors");
     int nb = nBasis / 2;
 
-    // TODO: to get this to work for more than two levels, I would need to either implement coarse spins or have a template specialization of this class also for the finest level
-    for(int n = 0; n < nb; n++) {
-      _Aggregates.subspace[n + nb] = g5 * _Aggregates.subspace[n];
-    }
+    // // TODO: to get this to work for more than two levels, I would need to either implement coarse spins or have a template specialization of this class also for the finest level
+    // for(int n = 0; n < nb; n++) {
+    //   _Aggregates.subspace[n + nb] = g5 * _Aggregates.subspace[n];
+    // }
 
     _CoarseMatrix.CoarsenOperator(_LevelInfo.Grids[_CurrentLevel], fineMdagMOp, _Aggregates);
 
@@ -473,7 +481,7 @@ public:
 
 // Specialization for the coarsest level
 template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
-class MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 0, Matrix> : public LinearFunction<Lattice<Fobj>> {
+class MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 0, Matrix> : public MultiGridPreconditionerBase<Lattice<Fobj>> {
 public:
   /////////////////////////////////////////////
   // Type Definitions
@@ -528,6 +536,29 @@ public:
 template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, int nLevels, class Matrix>
 using NLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, nLevels - 1, Matrix>;
 
+template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
+std::unique_ptr<MultiGridPreconditionerBase<Lattice<Fobj>>>
+createMGInstance(MultiGridParams &mgParams, LevelInfo &levelInfo, Matrix &FineMat, Matrix &SmootherMat) {
+
+  // clang-format off
+  #define CASE_FOR_N_LEVELS(nLevels)                                                                                                       \
+    case nLevels:                                                                                                                          \
+      return std::unique_ptr<NLevelMGPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, nLevels, Matrix>>(                           \
+        new NLevelMGPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, nLevels, Matrix>(mgParams, levelInfo, FineMat, SmootherMat)); \
+      break;
+  // clang-format on
+
+  switch(mgParams.nLevels) {
+    CASE_FOR_N_LEVELS(2);
+    CASE_FOR_N_LEVELS(3);
+    CASE_FOR_N_LEVELS(4);
+    default:
+      std::cout << GridLogError << "We currently only support nLevels ∈ {2, 3, 4}" << std::endl;
+      exit(EXIT_FAILURE);
+      break;
+  }
+}
+
 int main(int argc, char **argv) {
 
   Grid_init(&argc, &argv);
@@ -619,29 +650,16 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Testing Multigrid for Wilson" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  TrivialPrecon<LatticeFermion>                                                      TrivialPrecon;
-  NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 2, WilsonFermionR> TwoLevelMGPreconDw(mgParams, levelInfo, Dw, Dw);
-  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 3, WilsonFermionR> ThreeLevelMGPreconDw(mgParams, levelInfo, Dw, Dw);
-  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 4, WilsonFermionR> FourLevelMGPreconDw(mgParams, levelInfo, Dw, Dw);
+  TrivialPrecon<LatticeFermion> TrivialPrecon;
+  auto MGPreconDw = createMGInstance<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR>(mgParams, levelInfo, Dw, Dw);
 
-  TwoLevelMGPreconDw.setup();
-  TwoLevelMGPreconDw.runChecks();
-
-  // ThreeLevelMGPreconDw.setup();
-  // ThreeLevelMGPreconDw.runChecks();
-
-  // FourLevelMGPreconDw.setup();
-  // FourLevelMGPreconDw.runChecks();
-
-  // NLevelMGPreconDw.setup();
-  // NLevelMGPreconDw.runChecks();
+  MGPreconDw->setup();
+  MGPreconDw->runChecks();
 
   std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDw;
 
   solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 100, false));
-  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TwoLevelMGPreconDw, 100, false));
-  // solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, ThreeLevelMGPreconDw, 100, false));
-  // solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, FourLevelMGPreconDw, 100, false));
+  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, *MGPreconDw, 100, false));
 
   for(auto const &solver : solversDw) {
     std::cout << "Starting with a new solver" << std::endl;
@@ -654,32 +672,18 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Testing Multigrid for Wilson Clover" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 2, WilsonCloverFermionR> TwoLevelMGPreconDwc(
-    mgParams, levelInfo, Dwc, Dwc);
-  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 3, WilsonCloverFermionR> ThreeLevelMGPreconDwc(mgParams, velInfo, Dwc, Dwc);
-  // NLevelMGPreconditioner<vSpinColourVector, vTComplex, 1, nbasis, 4, WilsonCloverFermionR> FourLevelMGPreconDwc(lelevelInfo, Dwc, Dwc);
+  auto MGPreconDwc = createMGInstance<vSpinColourVector, vTComplex, 1, nbasis, WilsonCloverFermionR>(mgParams, levelInfo, Dwc, Dwc);
 
-  TwoLevelMGPreconDwc.setup();
-  TwoLevelMGPreconDwc.runChecks();
-
-  // ThreeLevelMGPreconDwc.setup();
-  // ThreeLevelMGPreconDwc.runChecks();
-
-  // FourLevelMGPreconDwc.setup();
-  // FourLevelMGPreconDwc.runChecks();
-
-  // NLevelMGPreconDwc.setup();
-  // NLevelMGPreconDwc.runChecks();
+  MGPreconDwc->setup();
+  MGPreconDwc->runChecks();
 
   std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDwc;
 
   solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 100, false));
-  solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TwoLevelMGPreconDwc, 100, false));
-  // solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, ThreeLevelMGPreconDwc, 100, false));
-  // solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, FourLevelMGPreconDwc, 100, false));
+  solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, *MGPreconDwc, 100, false));
 
   for(auto const &solver : solversDwc) {
-    std::cout << "Starting with a new solver" << std::endl;
+    std::cout << std::endl << "Starting with a new solver" << std::endl;
     result = zero;
     (*solver)(MdagMOpDwc, src, result);
     std::cout << std::endl;

From 57a49ed22fdf4ad25d26da3802e3d3433ad68b0c Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 3 Apr 2018 16:03:11 +0200
Subject: [PATCH 110/130] WilsonMG: Read in MG parameters from xml in test

---
 tests/solver/Test_wilsonclover_mg.cc | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index e21aa5ab..5d03e4d6 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -520,7 +520,7 @@ public:
 
     auto coarseSolverMaxIter = _MultiGridParams.coarseSolverMaxOuterIter * _MultiGridParams.coarseSolverMaxInnerIter;
 
-    // On the coarsest level we only have a fine what I above call the fine level, no coarse one
+    // On the coarsest level we only have what I above call the fine level, no coarse one
     TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
     FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(
       _MultiGridParams.coarseSolverTol, coarseSolverMaxIter, fineTrivialPreconditioner, _MultiGridParams.coarseSolverMaxInnerIter, false);
@@ -592,7 +592,7 @@ int main(int argc, char **argv) {
   WilsonFermionR       Dw(Umu, *FGrid, *FrbGrid, mass);
   WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t, wilsonAnisCoeff, wcImplparams);
 
-  // Params for two-level MG preconditioner
+  // Default params for two-level MG preconditioner (TODO: use sensible ones)
   mgParams.nLevels                  = 2;
   mgParams.blockSizes               = {{2, 2, 2, 2}};
   mgParams.smootherTol              = {1e-14};
@@ -606,7 +606,7 @@ int main(int argc, char **argv) {
   mgParams.coarseSolverMaxOuterIter = 1;
   mgParams.coarseSolverMaxInnerIter = 1;
 
-  // // Params for three-level MG preconditioner
+  // // Default params for three-level MG preconditioner (TODO: use sensible ones)
   // mgParams.nLevels                  = 3;
   // mgParams.blockSizes               = {{2, 2, 2, 2}, {2, 2, 1, 1}};
   // mgParams.smootherTol              = {1e-14, 1e-14};
@@ -620,7 +620,7 @@ int main(int argc, char **argv) {
   // mgParams.coarseSolverMaxOuterIter = 1;
   // mgParams.coarseSolverMaxInnerIter = 1;
 
-  // // // Params for four-level MG preconditioner
+  // // Default params for four-level MG preconditioner (TODO: use sensible ones)
   // mgParams.nLevels                  = 4;
   // mgParams.blockSizes               = {{2, 2, 2, 2}, {2, 2, 1, 1}, {1, 1, 2, 1}};
   // mgParams.smootherTol              = {1e-14, 1e-14, 1e-14};
@@ -634,6 +634,18 @@ int main(int argc, char **argv) {
   // mgParams.coarseSolverMaxOuterIter = 1;
   // mgParams.coarseSolverMaxInnerIter = 1;
 
+  {
+    XmlWriter writer("mg_params_template.xml");
+    write(writer, "Params", mgParams);
+    std::cout << GridLogMessage << " Written mg_params_template.xml" << std::endl;
+  }
+
+  {
+    std::string paramFileName{"./mg_params.xml"};
+    XmlReader   reader(paramFileName);
+    read(reader, "Params", mgParams);
+  }
+
   checkParameterValidity(mgParams);
 
   std::cout << mgParams << std::endl;

From f69008edf1766d6cb29401caccfb8cd6274ae4d0 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 3 Apr 2018 17:26:49 +0200
Subject: [PATCH 111/130] WilsonMG: Add functionality to report timings to MG
 preconditioner

---
 tests/solver/Test_wilsonclover_mg.cc | 133 ++++++++++++++++++++++++---
 1 file changed, 121 insertions(+), 12 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 5d03e4d6..064b4d6e 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -157,6 +157,7 @@ public:
   virtual void setup()                                 = 0;
   virtual void operator()(Field const &in, Field &out) = 0;
   virtual void runChecks()                             = 0;
+  virtual void reportTimings()                         = 0;
 };
 
 template<class Fobj, class CoarseScalar, int nCoarseSpins, int nBasis, int nCoarserLevels, class Matrix>
@@ -180,16 +181,27 @@ public:
   // Member Data
   /////////////////////////////////////////////
 
-  int                                      _CurrentLevel;
-  int                                      _NextCoarserLevel;
-  MultiGridParams &                        _MultiGridParams;
-  LevelInfo &                              _LevelInfo;
-  FineMatrix &                             _FineMatrix;
-  FineMatrix &                             _SmootherMatrix;
-  Aggregates                               _Aggregates;
-  CoarseMatrix                             _CoarseMatrix;
+  int _CurrentLevel;
+  int _NextCoarserLevel;
+
+  MultiGridParams &_MultiGridParams;
+  LevelInfo &      _LevelInfo;
+
+  FineMatrix & _FineMatrix;
+  FineMatrix & _SmootherMatrix;
+  Aggregates   _Aggregates;
+  CoarseMatrix _CoarseMatrix;
+
   std::unique_ptr<NextPreconditionerLevel> _NextPreconditionerLevel;
 
+  GridStopWatch _SetupTotalTimer;
+  GridStopWatch _SetupNextLevelTimer;
+  GridStopWatch _SolveTotalTimer;
+  GridStopWatch _SolveRestrictionTimer;
+  GridStopWatch _SolveProlongationTimer;
+  GridStopWatch _SolveSmootherTimer;
+  GridStopWatch _SolveNextLevelTimer;
+
   /////////////////////////////////////////////
   // Member Functions
   /////////////////////////////////////////////
@@ -203,12 +215,17 @@ public:
     , _SmootherMatrix(SmootherMat)
     , _Aggregates(_LevelInfo.Grids[_NextCoarserLevel], _LevelInfo.Grids[_CurrentLevel], 0)
     , _CoarseMatrix(*_LevelInfo.Grids[_NextCoarserLevel]) {
+
     _NextPreconditionerLevel
       = std::unique_ptr<NextPreconditionerLevel>(new NextPreconditionerLevel(_MultiGridParams, _LevelInfo, _CoarseMatrix, _CoarseMatrix));
+
+    resetTimers();
   }
 
   void setup() {
 
+    _SetupTotalTimer.Start();
+
     Gamma                                       g5(Gamma::Algebra::Gamma5);
     MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
 
@@ -228,7 +245,11 @@ public:
 
     _CoarseMatrix.CoarsenOperator(_LevelInfo.Grids[_CurrentLevel], fineMdagMOp, _Aggregates);
 
+    _SetupNextLevelTimer.Start();
     _NextPreconditionerLevel->setup();
+    _SetupNextLevelTimer.Stop();
+
+    _SetupTotalTimer.Stop();
   }
 
   virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
@@ -245,6 +266,8 @@ public:
 
   void vCycle(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
 
+    _SolveTotalTimer.Start();
+
     RealD inputNorm = norm2(in);
 
     CoarseVector coarseSrc(_LevelInfo.Grids[_NextCoarserLevel]);
@@ -265,16 +288,26 @@ public:
     MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
     MdagMLinearOperator<FineMatrix, FineVector> fineSmootherMdagMOp(_SmootherMatrix);
 
+    _SolveRestrictionTimer.Start();
     _Aggregates.ProjectToSubspace(coarseSrc, in);
+    _SolveRestrictionTimer.Stop();
+
+    _SolveNextLevelTimer.Start();
     (*_NextPreconditionerLevel)(coarseSrc, coarseSol);
+    _SolveNextLevelTimer.Stop();
+
+    _SolveProlongationTimer.Start();
     _Aggregates.PromoteFromSubspace(coarseSol, out);
+    _SolveProlongationTimer.Stop();
 
     fineMdagMOp.Op(out, fineTmp);
     fineTmp                                = in - fineTmp;
     auto r                                 = norm2(fineTmp);
     auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
 
+    _SolveSmootherTimer.Start();
     fineFGMRES(fineSmootherMdagMOp, in, out);
+    _SolveSmootherTimer.Stop();
 
     fineMdagMOp.Op(out, fineTmp);
     fineTmp                        = in - fineTmp;
@@ -284,10 +317,14 @@ public:
     std::cout << GridLogMG << " Level " << _CurrentLevel << ": V-cycle: Input norm = " << std::sqrt(inputNorm)
               << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
               << std::endl;
+
+    _SolveTotalTimer.Stop();
   }
 
   void kCycle(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
 
+    _SolveTotalTimer.Start();
+
     RealD inputNorm = norm2(in);
 
     CoarseVector coarseSrc(_LevelInfo.Grids[_NextCoarserLevel]);
@@ -315,16 +352,26 @@ public:
     MdagMLinearOperator<FineMatrix, FineVector>     fineSmootherMdagMOp(_SmootherMatrix);
     MdagMLinearOperator<CoarseMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
 
+    _SolveRestrictionTimer.Start();
     _Aggregates.ProjectToSubspace(coarseSrc, in);
+    _SolveRestrictionTimer.Stop();
+
+    _SolveNextLevelTimer.Start();
     coarseFGMRES(coarseMdagMOp, coarseSrc, coarseSol);
+    _SolveNextLevelTimer.Stop();
+
+    _SolveProlongationTimer.Start();
     _Aggregates.PromoteFromSubspace(coarseSol, out);
+    _SolveProlongationTimer.Stop();
 
     fineMdagMOp.Op(out, fineTmp);
     fineTmp                                = in - fineTmp;
     auto r                                 = norm2(fineTmp);
     auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
 
+    _SolveSmootherTimer.Start();
     fineFGMRES(fineSmootherMdagMOp, in, out);
+    _SolveSmootherTimer.Stop();
 
     fineMdagMOp.Op(out, fineTmp);
     fineTmp                        = in - fineTmp;
@@ -334,6 +381,8 @@ public:
     std::cout << GridLogMG << " Level " << _CurrentLevel << ": K-cycle: Input norm = " << std::sqrt(inputNorm)
               << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
               << std::endl;
+
+    _SolveTotalTimer.Stop();
   }
 
   void runChecks() {
@@ -477,6 +526,34 @@ public:
 
     _NextPreconditionerLevel->runChecks();
   }
+
+  void reportTimings() {
+
+    // clang-format off
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup total        " <<        _SetupTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup next level   " <<    _SetupNextLevelTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total        " <<        _SolveTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve restriction  " <<  _SolveRestrictionTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve prolongation " << _SolveProlongationTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother     " <<     _SolveSmootherTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve next level   " <<    _SolveNextLevelTimer.Elapsed() << std::endl;
+    // clang-format on
+
+    _NextPreconditionerLevel->reportTimings();
+  }
+
+  void resetTimers() {
+
+    _SetupTotalTimer.Reset();
+    _SetupNextLevelTimer.Reset();
+    _SolveTotalTimer.Reset();
+    _SolveRestrictionTimer.Reset();
+    _SolveProlongationTimer.Reset();
+    _SolveSmootherTimer.Reset();
+    _SolveNextLevelTimer.Reset();
+
+    _NextPreconditionerLevel->resetTimers();
+  }
 };
 
 // Specialization for the coarsest level
@@ -494,11 +571,16 @@ public:
   // Member Data
   /////////////////////////////////////////////
 
-  int              _CurrentLevel;
+  int _CurrentLevel;
+
   MultiGridParams &_MultiGridParams;
   LevelInfo &      _LevelInfo;
-  FineMatrix &     _FineMatrix;
-  FineMatrix &     _SmootherMatrix;
+
+  FineMatrix &_FineMatrix;
+  FineMatrix &_SmootherMatrix;
+
+  GridStopWatch _SolveTotalTimer;
+  GridStopWatch _SolveSmootherTimer;
 
   /////////////////////////////////////////////
   // Member Functions
@@ -509,12 +591,17 @@ public:
     , _MultiGridParams(mgParams)
     , _LevelInfo(LvlInfo)
     , _FineMatrix(FineMat)
-    , _SmootherMatrix(SmootherMat) {}
+    , _SmootherMatrix(SmootherMat) {
+
+    resetTimers();
+  }
 
   void setup() {}
 
   virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
 
+    _SolveTotalTimer.Start();
+
     conformable(_LevelInfo.Grids[_CurrentLevel], in._grid);
     conformable(in, out);
 
@@ -527,10 +614,28 @@ public:
 
     MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
 
+    _SolveSmootherTimer.Start();
     fineFGMRES(fineMdagMOp, in, out);
+    _SolveSmootherTimer.Stop();
+
+    _SolveTotalTimer.Stop();
   }
 
   void runChecks() {}
+
+  void reportTimings() {
+
+    // clang-format off
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total        " <<    _SolveTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother     " << _SolveSmootherTimer.Elapsed() << std::endl;
+    // clang-format on
+  }
+
+  void resetTimers() {
+
+    _SolveTotalTimer.Reset();
+    _SolveSmootherTimer.Reset();
+  }
 };
 
 template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, int nLevels, class Matrix>
@@ -680,6 +785,8 @@ int main(int argc, char **argv) {
     std::cout << std::endl;
   }
 
+  MGPreconDw->reportTimings();
+
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Testing Multigrid for Wilson Clover" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
@@ -701,5 +808,7 @@ int main(int argc, char **argv) {
     std::cout << std::endl;
   }
 
+  MGPreconDwc->reportTimings();
+
   Grid_finalize();
 }

From df8c208f5c1aa9283a98e04935f6ca6f765baa80 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 16 May 2018 16:02:17 +0200
Subject: [PATCH 112/130] WilsonMG: Revert CoarsenedMatrix.h and
 Lattice_transfer.h back to state of develop branch

---
 lib/algorithms/CoarsenedMatrix.h | 102 +++++++++----------------------
 lib/lattice/Lattice_transfer.h   |  29 ++++-----
 2 files changed, 40 insertions(+), 91 deletions(-)

diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h
index 15db315a..8af8d7ac 100644
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -93,16 +93,12 @@ namespace Grid {
   template<class Fobj,class CComplex,int nbasis>
   class Aggregation   {
   public:
+    typedef iVector<CComplex,nbasis >             siteVector;
+    typedef Lattice<siteVector>                 CoarseVector;
+    typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
 
-    typedef typename CComplex::vector_type                     innerType;
-    typedef iScalar<iScalar<iScalar<innerType         >    > > siteScalar; // used for inner products on fine field
-    typedef iScalar<iVector<iVector<innerType, nbasis >, 1 > > siteVector;
-    typedef iScalar<iMatrix<iMatrix<innerType, nbasis >, 1 > > siteMatrix;
-    typedef Lattice<siteScalar> CoarseScalar; // used for inner products on fine field
-    typedef Lattice<siteVector> CoarseVector;
-    typedef Lattice<siteMatrix> CoarseMatrix;
-
-    typedef Lattice<Fobj>       FineField;
+    typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field
+    typedef Lattice<Fobj >        FineField;
 
     GridBase *CoarseGrid;
     GridBase *FineGrid;
@@ -119,12 +115,12 @@ namespace Grid {
   
     void Orthogonalise(void){
       CoarseScalar InnerProd(CoarseGrid); 
-      std::cout << GridLogMessage <<"Gram-Schmidt pass 1"<<std::endl;
+      std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
       blockOrthogonalise(InnerProd,subspace);
-      std::cout << GridLogMessage <<"Gram-Schmidt pass 2"<<std::endl;
+      std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
       blockOrthogonalise(InnerProd,subspace);
-      std::cout << GridLogMessage <<"Gram-Schmidt checking orthogonality"<<std::endl;
-      CheckOrthogonal();
+      //      std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
+      //      CheckOrthogonal();
     } 
     void CheckOrthogonal(void){
       CoarseVector iProj(CoarseGrid); 
@@ -133,7 +129,7 @@ namespace Grid {
 	blockProject(iProj,subspace[i],subspace);
 	eProj=zero; 
 	parallel_for(int ss=0;ss<CoarseGrid->oSites();ss++){
-	  eProj._odata[ss]()(0)(i)=innerType(1.0);
+	  eProj._odata[ss](i)=CComplex(1.0);
 	}
 	eProj=eProj - iProj;
 	std::cout<<GridLogMessage<<"Orthog check error "<<i<<" " << norm2(eProj)<<std::endl;
@@ -209,8 +205,7 @@ namespace Grid {
 
       RealD scale;
 
-      TrivialPrecon<FineField> TrivialPrec;
-      FlexibleGeneralisedMinimalResidual<FineField> FGMRES(1.0e-14,1,TrivialPrec,1,false); // TODO: need to use GMRES as long as Mdag doesn't work on coarser levels (i.e., MdagM isn't hermitian)
+      ConjugateGradient<FineField> CG(1.0e-2,10000);
       FineField noise(FineGrid);
       FineField Mn(FineGrid);
 
@@ -222,9 +217,9 @@ namespace Grid {
 
 	hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise   ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
 
-	for(int i=0;i<3;i++){
+	for(int i=0;i<1;i++){
 
-	  FGMRES(hermop,noise,subspace[b]);
+	  CG(hermop,noise,subspace[b]);
 
 	  noise = subspace[b];
 	  scale = std::pow(norm2(noise),-0.5); 
@@ -244,18 +239,15 @@ namespace Grid {
   // Fine Object == (per site) type of fine field
   // nbasis      == number of deflation vectors
   template<class Fobj,class CComplex,int nbasis>
-  class CoarsenedMatrix : public SparseMatrixBase<Lattice<iScalar<iVector<iVector<typename CComplex::vector_type, nbasis >, 1 > > > >  {
+  class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  {
   public:
+    
+    typedef iVector<CComplex,nbasis >             siteVector;
+    typedef Lattice<siteVector>                 CoarseVector;
+    typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
 
-    typedef typename CComplex::vector_type                     innerType;
-    typedef iScalar<iScalar<iScalar<innerType         >    > > siteScalar;
-    typedef iScalar<iVector<iVector<innerType, nbasis >, 1 > > siteVector;
-    typedef iScalar<iMatrix<iMatrix<innerType, nbasis >, 1 > > siteMatrix;
-    typedef Lattice<siteScalar> CoarseScalar; // used for inner products on fine field
-    typedef Lattice<siteVector> CoarseVector;
-    typedef Lattice<siteMatrix> CoarseMatrix;
-
-    typedef Lattice<Fobj>       FineField;
+    typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field
+    typedef Lattice<Fobj >        FineField;
 
     ////////////////////
     // Data members
@@ -303,50 +295,13 @@ namespace Grid {
       return norm2(out);
     };
 
-    RealD Mdag (const CoarseVector &in, CoarseVector &out){ // TODO: get this correct
+    RealD Mdag (const CoarseVector &in, CoarseVector &out){ 
       return M(in,out);
     };
 
-    void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp) {
-
-      conformable(_grid,in._grid);
-      conformable(in._grid,out._grid);
-
-      SimpleCompressor<siteVector> compressor;
-      Stencil.HaloExchange(in,compressor);
-
-      auto point = [dir, disp](){
-        if(dir == 0 and disp == 0)
-          return 8;
-        else
-          return (4 * dir + 1 - disp) / 2;
-      }();
-
-      parallel_for(int ss=0;ss<Grid()->oSites();ss++){
-        siteVector res = zero;
-        siteVector nbr;
-        int ptype;
-        StencilEntry *SE;
-
-        SE=Stencil.GetEntry(ptype,point,ss);
-
-        if(SE->_is_local&&SE->_permute) {
-          permute(nbr,in._odata[SE->_offset],ptype);
-        } else if(SE->_is_local) {
-          nbr = in._odata[SE->_offset];
-        } else {
-          nbr = Stencil.CommBuf()[SE->_offset];
-        }
-
-        res = res + A[point]._odata[ss]*nbr;
-
-        vstream(out._odata[ss],res);
-      }
-    };
-
-    void Mdiag(const CoarseVector &in, CoarseVector &out) {
-      Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil
-    };
+    // Defer support for further coarsening for now
+    void Mdiag    (const CoarseVector &in,  CoarseVector &out){};
+    void Mdir     (const CoarseVector &in,  CoarseVector &out,int dir, int disp){};
 
     CoarsenedMatrix(GridCartesian &CoarseGrid) 	: 
 
@@ -372,9 +327,10 @@ namespace Grid {
 
       CoarseVector iProj(Grid()); 
       CoarseVector oProj(Grid()); 
+      CoarseScalar InnerProd(Grid()); 
 
       // Orthogonalise the subblocks over the basis
-      Subspace.Orthogonalise();
+      blockOrthogonalise(InnerProd,Subspace.subspace);
 
       // Compute the matrix elements of linop between this orthonormal
       // set of vectors.
@@ -431,9 +387,9 @@ namespace Grid {
 	  parallel_for(int ss=0;ss<Grid()->oSites();ss++){
 	    for(int j=0;j<nbasis;j++){
 	      if( disp!= 0 ) {
-		A[p]._odata[ss]()(0,0)(j,i) = oProj._odata[ss]()(0)(j);
+		A[p]._odata[ss](j,i) = oProj._odata[ss](j);
 	      }
-	      A[self_stencil]._odata[ss]()(0,0)(j,i) =	A[self_stencil]._odata[ss]()(0,0)(j,i) + iProj._odata[ss]()(0)(j);
+	      A[self_stencil]._odata[ss](j,i) =	A[self_stencil]._odata[ss](j,i) + iProj._odata[ss](j);
 	    }
 	  }
 	}
@@ -461,7 +417,7 @@ namespace Grid {
       std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
 #endif
       //      ForceHermitian();
-      // AssertHermitian();
+      AssertHermitian();
       // ForceDiagonal();
     }
     void ForceDiagonal(void) {
diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h
index 95c3608a..f988f310 100644
--- a/lib/lattice/Lattice_transfer.h
+++ b/lib/lattice/Lattice_transfer.h
@@ -80,8 +80,8 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
   }
   
 
-template<class vobj,class vobjC>
-inline void blockProject(Lattice<vobjC>                    &coarseData,
+template<class vobj,class CComplex,int nbasis>
+inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
 			 const             Lattice<vobj>   &fineData,
 			 const std::vector<Lattice<vobj> > &Basis)
 {
@@ -90,8 +90,7 @@ inline void blockProject(Lattice<vobjC>                    &coarseData,
   int  _ndimension = coarse->_ndimension;
 
   // checks
-  assert((Basis.size() != 0) && ((Basis.size() & 0x1) == 0));
-  auto nbasis = Basis.size();
+  assert( nbasis == Basis.size() );
   subdivides(coarse,fine); 
   for(int i=0;i<nbasis;i++){
     conformable(Basis[i],fineData);
@@ -119,8 +118,8 @@ inline void blockProject(Lattice<vobjC>                    &coarseData,
 PARALLEL_CRITICAL
     for(int i=0;i<nbasis;i++) {
 
-      coarseData._odata[sc]()(0)(i)=coarseData._odata[sc]()(0)(i)
-        + TensorRemove(innerProduct(Basis[i]._odata[sf],fineData._odata[sf]));
+      coarseData._odata[sc](i)=coarseData._odata[sc](i)
+	+ innerProduct(Basis[i]._odata[sf],fineData._odata[sf]);
 
     }
   }
@@ -286,9 +285,9 @@ inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> >
   }
 }
 
-template<class vobj,class vobjC>
-inline void blockPromote(const Lattice<vobjC>              &coarseData,
-			 Lattice<vobj>                     &fineData,
+template<class vobj,class CComplex,int nbasis>
+inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
+			 Lattice<vobj>   &fineData,
 			 const std::vector<Lattice<vobj> > &Basis)
 {
   GridBase * fine  = fineData._grid;
@@ -296,9 +295,7 @@ inline void blockPromote(const Lattice<vobjC>              &coarseData,
   int  _ndimension = coarse->_ndimension;
 
   // checks
-  assert((Basis.size() != 0) && ((Basis.size() & 0x1) == 0));
-  auto nbasis = Basis.size();
-
+  assert( nbasis == Basis.size() );
   subdivides(coarse,fine); 
   for(int i=0;i<nbasis;i++){
     conformable(Basis[i]._grid,fine);
@@ -322,13 +319,9 @@ inline void blockPromote(const Lattice<vobjC>              &coarseData,
       for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
       Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
       
-      // The temporary is necessary, since a pure instance of Grid::simd<...> is
-      // not a valid argument to operator+ with an iVector, we need an an iScalar
-      typename vobjC::tensor_reduced tmp; // iScalar<iVector<iVector<...>>> -> iScalar<iScalar<iScalar<...>>>
       for(int i=0;i<nbasis;i++) {
-        tmp = coarseData._odata[sc]()(0)(i);
-        if(i==0) fineData._odata[sf] = tmp * Basis[i]._odata[sf];
-        else     fineData._odata[sf]=fineData._odata[sf]+tmp*Basis[i]._odata[sf];
+	if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
+	else     fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
       }
     }
   }

From c48b69ca8143a7c6e33be6cc86bb7abf32da0650 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 16 May 2018 16:07:58 +0200
Subject: [PATCH 113/130] WilsonMG: Implement Mdir & Mdiag in CoarsenedMatrix

---
 lib/algorithms/CoarsenedMatrix.h | 42 +++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h
index 8af8d7ac..4b94652e 100644
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -297,11 +297,47 @@ namespace Grid {
 
     RealD Mdag (const CoarseVector &in, CoarseVector &out){ 
       return M(in,out);
+
+    void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
+
+      conformable(_grid,in._grid);
+      conformable(in._grid,out._grid);
+
+      SimpleCompressor<siteVector> compressor;
+      Stencil.HaloExchange(in,compressor);
+
+      auto point = [dir, disp](){
+        if(dir == 0 and disp == 0)
+          return 8;
+        else
+          return (4 * dir + 1 - disp) / 2;
+      }();
+
+      parallel_for(int ss=0;ss<Grid()->oSites();ss++){
+        siteVector res = zero;
+        siteVector nbr;
+        int ptype;
+        StencilEntry *SE;
+
+        SE=Stencil.GetEntry(ptype,point,ss);
+
+        if(SE->_is_local&&SE->_permute) {
+          permute(nbr,in._odata[SE->_offset],ptype);
+        } else if(SE->_is_local) {
+          nbr = in._odata[SE->_offset];
+        } else {
+          nbr = Stencil.CommBuf()[SE->_offset];
+        }
+
+        res = res + A[point]._odata[ss]*nbr;
+
+        vstream(out._odata[ss],res);
+      }
     };
 
-    // Defer support for further coarsening for now
-    void Mdiag    (const CoarseVector &in,  CoarseVector &out){};
-    void Mdir     (const CoarseVector &in,  CoarseVector &out,int dir, int disp){};
+    void Mdiag(const CoarseVector &in, CoarseVector &out){
+      Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil
+    };
 
     CoarsenedMatrix(GridCartesian &CoarseGrid) 	: 
 

From 68d686ec38f8c54efda2392802bbfaea1844547e Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 16 May 2018 16:17:14 +0200
Subject: [PATCH 114/130] WilsonMG: Add functionality for applying G5 on coarse
 grids

---
 lib/qcd/utils/LinalgUtils.h | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/lib/qcd/utils/LinalgUtils.h b/lib/qcd/utils/LinalgUtils.h
index 5eaf1c2a..04a224e5 100644
--- a/lib/qcd/utils/LinalgUtils.h
+++ b/lib/qcd/utils/LinalgUtils.h
@@ -173,6 +173,39 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
     }
   }
 }
+}
 
-}}
+// I explicitly need these outside the QCD namespace
+template<typename vobj>
+void G5C(Lattice<vobj> &z, const Lattice<vobj> &x)
+{
+  GridBase *grid = x._grid;
+  z.checkerboard = x.checkerboard;
+  conformable(x, z);
+
+  QCD::Gamma G5(QCD::Gamma::Algebra::Gamma5);
+  z = G5 * x;
+}
+
+template<class CComplex, int nbasis>
+void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
+{
+  GridBase *grid = x._grid;
+  z.checkerboard = x.checkerboard;
+  conformable(x, z);
+
+  static_assert(nbasis % 2 == 0, "");
+  int nb = nbasis / 2;
+
+  parallel_for(int ss = 0; ss < grid->oSites(); ss++) {
+    for(int n = 0; n < nb; ++n) {
+      z._odata[ss](n) = x._odata[ss](n);
+    }
+    for(int n = nb; n < nbasis; ++n) {
+      z._odata[ss](n) = -x._odata[ss](n);
+    }
+  }
+}
+
+}
 #endif 

From 4b8710970c78682e6873ebbc61ba40a1e78226bf Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 16 May 2018 17:23:12 +0200
Subject: [PATCH 115/130] WilsonMG: Switch to Galerkin coarsening in
 CoarsenedMatrix

---
 lib/algorithms/CoarsenedMatrix.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h
index 4b94652e..1d42bab5 100644
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -295,8 +295,17 @@ namespace Grid {
       return norm2(out);
     };
 
-    RealD Mdag (const CoarseVector &in, CoarseVector &out){ 
-      return M(in,out);
+    RealD Mdag (const CoarseVector &in, CoarseVector &out){
+      // // corresponds to Petrov-Galerkin coarsening
+      // return M(in,out);
+
+      // corresponds to Galerkin coarsening
+      CoarseVector tmp(Grid());
+      G5C(tmp, in);
+      M(tmp, out);
+      G5C(out, out);
+      return norm2(out);
+    };
 
     void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
 
@@ -453,7 +462,7 @@ namespace Grid {
       std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
 #endif
       //      ForceHermitian();
-      AssertHermitian();
+      // AssertHermitian();
       // ForceDiagonal();
     }
     void ForceDiagonal(void) {

From 9c003d2d727cb93d9648d2c4af75432a82a5ee81 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 16 May 2018 16:46:25 +0200
Subject: [PATCH 116/130] WilsonMG: Base wilson mg preconditioner entirely on
 existing infrastructure

---
 tests/solver/Test_wilsonclover_mg.cc | 212 ++++++++++++---------------
 1 file changed, 96 insertions(+), 116 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 064b4d6e..18e9ae5e 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -32,42 +32,6 @@ using namespace std;
 using namespace Grid;
 using namespace Grid::QCD;
 
-template<class Field, int nbasis> class TestVectorAnalyzer {
-public:
-  void operator()(LinearOperatorBase<Field> &Linop, std::vector<Field> const &vectors, int nn = nbasis) {
-
-    auto positiveOnes = 0;
-
-    std::vector<Field> tmp(4, vectors[0]._grid);
-    Gamma              g5(Gamma::Algebra::Gamma5);
-
-    std::cout << GridLogMessage << "Test vector analysis:" << std::endl;
-
-    for(auto i = 0; i < nn; ++i) {
-
-      Linop.Op(vectors[i], tmp[3]);
-
-      tmp[0] = g5 * tmp[3];
-
-      auto lambda = innerProduct(vectors[i], tmp[0]) / innerProduct(vectors[i], vectors[i]);
-
-      tmp[1] = tmp[0] - lambda * vectors[i];
-
-      auto mu = ::sqrt(norm2(tmp[1]) / norm2(vectors[i]));
-
-      auto nrm = ::sqrt(norm2(vectors[i]));
-
-      if(real(lambda) > 0)
-        positiveOnes++;
-
-      std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << "vector " << i << ": "
-                << "singular value: " << lambda << ", singular vector precision: " << mu << ", norm: " << nrm << std::endl;
-    }
-    std::cout << GridLogMessage << std::scientific << std::setprecision(2) << std::setw(2) << std::showpos << positiveOnes << " out of "
-              << nn << " vectors were positive" << std::endl;
-  }
-};
-
 // TODO: Can think about having one parameter struct per level and then a
 // vector of these structs. How well would that work together with the
 // serialization strategy of Grid?
@@ -156,11 +120,11 @@ public:
   virtual ~MultiGridPreconditionerBase()               = default;
   virtual void setup()                                 = 0;
   virtual void operator()(Field const &in, Field &out) = 0;
-  virtual void runChecks()                             = 0;
+  virtual void runChecks(RealD tolerance)              = 0;
   virtual void reportTimings()                         = 0;
 };
 
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nBasis, int nCoarserLevels, class Matrix>
+template<class Fobj, class CComplex, int nBasis, int nCoarserLevels, class Matrix>
 class MultiGridPreconditioner : public MultiGridPreconditionerBase<Lattice<Fobj>> {
 public:
   /////////////////////////////////////////////
@@ -168,13 +132,13 @@ public:
   /////////////////////////////////////////////
 
   // clang-format off
-  typedef Aggregation<Fobj, CoarseScalar, nBasis>                                                                         Aggregates;
-  typedef CoarsenedMatrix<Fobj, CoarseScalar, nBasis>                                                                     CoarseMatrix;
-  typedef typename Aggregates::CoarseVector                                                                               CoarseVector;
-  typedef typename Aggregates::siteVector                                                                                 CoarseSiteVector;
-  typedef Matrix                                                                                                          FineMatrix;
-  typedef typename Aggregates::FineField                                                                                  FineVector;
-  typedef MultiGridPreconditioner<CoarseSiteVector, CoarseScalar, nCoarseSpins, nBasis, nCoarserLevels - 1, CoarseMatrix> NextPreconditionerLevel;
+  typedef Aggregation<Fobj, CComplex, nBasis>                                                                         Aggregates;
+  typedef CoarsenedMatrix<Fobj, CComplex, nBasis>                                                                     CoarseDiracMatrix;
+  typedef typename Aggregates::CoarseVector                                                                           CoarseVector;
+  typedef typename Aggregates::siteVector                                                                             CoarseSiteVector;
+  typedef Matrix                                                                                                      FineDiracMatrix;
+  typedef typename Aggregates::FineField                                                                              FineVector;
+  typedef MultiGridPreconditioner<CoarseSiteVector, iScalar<CComplex>, nBasis, nCoarserLevels - 1, CoarseDiracMatrix> NextPreconditionerLevel;
   // clang-format on
 
   /////////////////////////////////////////////
@@ -187,14 +151,17 @@ public:
   MultiGridParams &_MultiGridParams;
   LevelInfo &      _LevelInfo;
 
-  FineMatrix & _FineMatrix;
-  FineMatrix & _SmootherMatrix;
-  Aggregates   _Aggregates;
-  CoarseMatrix _CoarseMatrix;
+  FineDiracMatrix & _FineMatrix;
+  FineDiracMatrix & _SmootherMatrix;
+  Aggregates        _Aggregates;
+  CoarseDiracMatrix _CoarseMatrix;
 
   std::unique_ptr<NextPreconditionerLevel> _NextPreconditionerLevel;
 
   GridStopWatch _SetupTotalTimer;
+  GridStopWatch _SetupCreateSubspaceTimer;
+  GridStopWatch _SetupProjectToChiralitiesTimer;
+  GridStopWatch _SetupCoarsenOperatorTimer;
   GridStopWatch _SetupNextLevelTimer;
   GridStopWatch _SolveTotalTimer;
   GridStopWatch _SolveRestrictionTimer;
@@ -206,7 +173,7 @@ public:
   // Member Functions
   /////////////////////////////////////////////
 
-  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
+  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineDiracMatrix &FineMat, FineDiracMatrix &SmootherMat)
     : _CurrentLevel(mgParams.nLevels - (nCoarserLevels + 1)) // _Level = 0 corresponds to finest
     , _NextCoarserLevel(_CurrentLevel + 1)                   // incremented for instances on coarser levels
     , _MultiGridParams(mgParams)
@@ -226,24 +193,32 @@ public:
 
     _SetupTotalTimer.Start();
 
-    Gamma                                       g5(Gamma::Algebra::Gamma5);
-    MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
-
-    // NOTE: Don't specify nb here to see the orthogonalization check
-    _Aggregates.CreateSubspace(_LevelInfo.PRNGs[_CurrentLevel], fineMdagMOp /*, nb */);
-
-    // TestVectorAnalyzer<FineVector, nbasis> fineTVA;
-    // fineTVA(fineMdagMOp, _Aggregates.subspace);
-
     static_assert((nBasis & 0x1) == 0, "MG Preconditioner only supports an even number of basis vectors");
     int nb = nBasis / 2;
 
-    // // TODO: to get this to work for more than two levels, I would need to either implement coarse spins or have a template specialization of this class also for the finest level
-    // for(int n = 0; n < nb; n++) {
-    //   _Aggregates.subspace[n + nb] = g5 * _Aggregates.subspace[n];
-    // }
+    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
 
+    _SetupCreateSubspaceTimer.Start();
+    _Aggregates.CreateSubspace(_LevelInfo.PRNGs[_CurrentLevel], fineMdagMOp, nb);
+    _SetupCreateSubspaceTimer.Stop();
+
+    _SetupProjectToChiralitiesTimer.Start();
+    FineVector tmp1(_Aggregates.subspace[0]._grid);
+    FineVector tmp2(_Aggregates.subspace[0]._grid);
+    for(int n = 0; n < nb; n++) {
+      auto tmp1 = _Aggregates.subspace[n];
+      G5C(tmp2, _Aggregates.subspace[n]);
+      axpby(_Aggregates.subspace[n], 0.5, 0.5, tmp1, tmp2);
+      axpby(_Aggregates.subspace[n + nb], 0.5, -0.5, tmp1, tmp2);
+      std::cout << GridLogMG << " Level " << _CurrentLevel << ": Chirally doubled vector " << n << ". "
+                << "norm2(vec[" << n << "]) = " << norm2(_Aggregates.subspace[n]) << ". "
+                << "norm2(vec[" << n + nb << "]) = " << norm2(_Aggregates.subspace[n + nb]) << std::endl;
+    }
+    _SetupProjectToChiralitiesTimer.Stop();
+
+    _SetupCoarsenOperatorTimer.Start();
     _CoarseMatrix.CoarsenOperator(_LevelInfo.Grids[_CurrentLevel], fineMdagMOp, _Aggregates);
+    _SetupCoarsenOperatorTimer.Stop();
 
     _SetupNextLevelTimer.Start();
     _NextPreconditionerLevel->setup();
@@ -252,7 +227,7 @@ public:
     _SetupTotalTimer.Stop();
   }
 
-  virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
+  virtual void operator()(FineVector const &in, FineVector &out) {
 
     conformable(_LevelInfo.Grids[_CurrentLevel], in._grid);
     conformable(in, out);
@@ -264,7 +239,7 @@ public:
       vCycle(in, out);
   }
 
-  void vCycle(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
+  void vCycle(FineVector const &in, FineVector &out) {
 
     _SolveTotalTimer.Start();
 
@@ -285,8 +260,8 @@ public:
                                                               _MultiGridParams.smootherMaxInnerIter[_CurrentLevel],
                                                               false);
 
-    MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
-    MdagMLinearOperator<FineMatrix, FineVector> fineSmootherMdagMOp(_SmootherMatrix);
+    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<FineDiracMatrix, FineVector> fineSmootherMdagMOp(_SmootherMatrix);
 
     _SolveRestrictionTimer.Start();
     _Aggregates.ProjectToSubspace(coarseSrc, in);
@@ -321,7 +296,7 @@ public:
     _SolveTotalTimer.Stop();
   }
 
-  void kCycle(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
+  void kCycle(FineVector const &in, FineVector &out) {
 
     _SolveTotalTimer.Start();
 
@@ -348,9 +323,9 @@ public:
                                                                   _MultiGridParams.kCycleMaxInnerIter[_CurrentLevel],
                                                                   false);
 
-    MdagMLinearOperator<FineMatrix, FineVector>     fineMdagMOp(_FineMatrix);
-    MdagMLinearOperator<FineMatrix, FineVector>     fineSmootherMdagMOp(_SmootherMatrix);
-    MdagMLinearOperator<CoarseMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
+    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineSmootherMdagMOp(_SmootherMatrix);
+    MdagMLinearOperator<CoarseDiracMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
 
     _SolveRestrictionTimer.Start();
     _Aggregates.ProjectToSubspace(coarseSrc, in);
@@ -385,15 +360,13 @@ public:
     _SolveTotalTimer.Stop();
   }
 
-  void runChecks() {
-
-    auto tolerance = 1e-13; // TODO: this obviously depends on the precision we use, current value is for double
+  void runChecks(RealD tolerance) {
 
     std::vector<FineVector>   fineTmps(7, _LevelInfo.Grids[_CurrentLevel]);
     std::vector<CoarseVector> coarseTmps(4, _LevelInfo.Grids[_NextCoarserLevel]);
 
-    MdagMLinearOperator<FineMatrix, FineVector>     fineMdagMOp(_FineMatrix);
-    MdagMLinearOperator<CoarseMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
+    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<CoarseDiracMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
 
     std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
     std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (M - (Mdiag + Σ_μ Mdir_μ)) * v" << std::endl;
@@ -520,23 +493,26 @@ public:
       std::cout << " > " << tolerance << " -> check failed" << std::endl;
       // abort();
     } else {
-      std::cout << " < " << tolerance << " -> check passed"
-                << std::endl; // TODO: this check will work only when I got Mdag in CoarsenedMatrix to work
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
     }
 
-    _NextPreconditionerLevel->runChecks();
+    _NextPreconditionerLevel->runChecks(tolerance);
   }
 
   void reportTimings() {
 
     // clang-format off
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup total        " <<        _SetupTotalTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup next level   " <<    _SetupNextLevelTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total        " <<        _SolveTotalTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve restriction  " <<  _SolveRestrictionTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve prolongation " << _SolveProlongationTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother     " <<     _SolveSmootherTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve next level   " <<    _SolveNextLevelTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Sum   total            " <<                _SetupTotalTimer.Elapsed() + _SolveTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup total            " <<                _SetupTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup create subspace  " <<       _SetupCreateSubspaceTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup project chiral   " << _SetupProjectToChiralitiesTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup coarsen operator " <<      _SetupCoarsenOperatorTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup next level       " <<            _SetupNextLevelTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total            " <<                _SolveTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve restriction      " <<          _SolveRestrictionTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve prolongation     " <<         _SolveProlongationTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother         " <<             _SolveSmootherTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve next level       " <<            _SolveNextLevelTimer.Elapsed() << std::endl;
     // clang-format on
 
     _NextPreconditionerLevel->reportTimings();
@@ -545,6 +521,9 @@ public:
   void resetTimers() {
 
     _SetupTotalTimer.Reset();
+    _SetupCreateSubspaceTimer.Reset();
+    _SetupProjectToChiralitiesTimer.Reset();
+    _SetupCoarsenOperatorTimer.Reset();
     _SetupNextLevelTimer.Reset();
     _SolveTotalTimer.Reset();
     _SolveRestrictionTimer.Reset();
@@ -557,14 +536,14 @@ public:
 };
 
 // Specialization for the coarsest level
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
-class MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, 0, Matrix> : public MultiGridPreconditionerBase<Lattice<Fobj>> {
+template<class Fobj, class CComplex, int nBasis, class Matrix>
+class MultiGridPreconditioner<Fobj, CComplex, nBasis, 0, Matrix> : public MultiGridPreconditionerBase<Lattice<Fobj>> {
 public:
   /////////////////////////////////////////////
   // Type Definitions
   /////////////////////////////////////////////
 
-  typedef Matrix        FineMatrix;
+  typedef Matrix        FineDiracMatrix;
   typedef Lattice<Fobj> FineVector;
 
   /////////////////////////////////////////////
@@ -576,8 +555,8 @@ public:
   MultiGridParams &_MultiGridParams;
   LevelInfo &      _LevelInfo;
 
-  FineMatrix &_FineMatrix;
-  FineMatrix &_SmootherMatrix;
+  FineDiracMatrix &_FineMatrix;
+  FineDiracMatrix &_SmootherMatrix;
 
   GridStopWatch _SolveTotalTimer;
   GridStopWatch _SolveSmootherTimer;
@@ -586,7 +565,7 @@ public:
   // Member Functions
   /////////////////////////////////////////////
 
-  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineMatrix &FineMat, FineMatrix &SmootherMat)
+  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineDiracMatrix &FineMat, FineDiracMatrix &SmootherMat)
     : _CurrentLevel(mgParams.nLevels - (0 + 1))
     , _MultiGridParams(mgParams)
     , _LevelInfo(LvlInfo)
@@ -598,7 +577,7 @@ public:
 
   void setup() {}
 
-  virtual void operator()(Lattice<Fobj> const &in, Lattice<Fobj> &out) {
+  virtual void operator()(FineVector const &in, FineVector &out) {
 
     _SolveTotalTimer.Start();
 
@@ -612,7 +591,7 @@ public:
     FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(
       _MultiGridParams.coarseSolverTol, coarseSolverMaxIter, fineTrivialPreconditioner, _MultiGridParams.coarseSolverMaxInnerIter, false);
 
-    MdagMLinearOperator<FineMatrix, FineVector> fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
 
     _SolveSmootherTimer.Start();
     fineFGMRES(fineMdagMOp, in, out);
@@ -621,13 +600,13 @@ public:
     _SolveTotalTimer.Stop();
   }
 
-  void runChecks() {}
+  void runChecks(RealD tolerance) {}
 
   void reportTimings() {
 
     // clang-format off
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total        " <<    _SolveTotalTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother     " << _SolveSmootherTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total            " <<    _SolveTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother         " << _SolveSmootherTimer.Elapsed() << std::endl;
     // clang-format on
   }
 
@@ -638,20 +617,18 @@ public:
   }
 };
 
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, int nLevels, class Matrix>
-using NLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, nLevels - 1, Matrix>;
+template<class Fobj, class CComplex, int nBasis, int nLevels, class Matrix>
+using NLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CComplex, nBasis, nLevels - 1, Matrix>;
 
-template<class Fobj, class CoarseScalar, int nCoarseSpins, int nbasis, class Matrix>
+template<class Fobj, class CComplex, int nBasis, class Matrix>
 std::unique_ptr<MultiGridPreconditionerBase<Lattice<Fobj>>>
 createMGInstance(MultiGridParams &mgParams, LevelInfo &levelInfo, Matrix &FineMat, Matrix &SmootherMat) {
 
-  // clang-format off
-  #define CASE_FOR_N_LEVELS(nLevels)                                                                                                       \
-    case nLevels:                                                                                                                          \
-      return std::unique_ptr<NLevelMGPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, nLevels, Matrix>>(                           \
-        new NLevelMGPreconditioner<Fobj, CoarseScalar, nCoarseSpins, nbasis, nLevels, Matrix>(mgParams, levelInfo, FineMat, SmootherMat)); \
-      break;
-  // clang-format on
+#define CASE_FOR_N_LEVELS(nLevels)                                                                                     \
+  case nLevels:                                                                                                        \
+    return std::unique_ptr<NLevelMGPreconditioner<Fobj, CComplex, nBasis, nLevels, Matrix>>(                           \
+      new NLevelMGPreconditioner<Fobj, CComplex, nBasis, nLevels, Matrix>(mgParams, levelInfo, FineMat, SmootherMat)); \
+    break;
 
   switch(mgParams.nLevels) {
     CASE_FOR_N_LEVELS(2);
@@ -662,6 +639,7 @@ createMGInstance(MultiGridParams &mgParams, LevelInfo &levelInfo, Matrix &FineMa
       exit(EXIT_FAILURE);
       break;
   }
+#undef CASE_FOR_N_LEVELS
 }
 
 int main(int argc, char **argv) {
@@ -680,20 +658,21 @@ int main(int argc, char **argv) {
   GridParallelRNG  fPRNG(FGrid);
   fPRNG.SeedFixedIntegers(fSeeds);
 
-  Gamma g5(Gamma::Algebra::Gamma5);
-
   // clang-format off
   LatticeFermion    src(FGrid); gaussian(fPRNG, src);
   LatticeFermion result(FGrid); result = zero;
   LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
   // clang-format on
 
-  RealD mass  = 0.5;
+  RealD mass  = -0.25;
   RealD csw_r = 1.0;
   RealD csw_t = 1.0;
 
+  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
   const int nbasis = 20;
 
+  RealD toleranceForMGChecks = 1e-13; // TODO: depends on the precision MG precondtioner is run in
+
   WilsonFermionR       Dw(Umu, *FGrid, *FrbGrid, mass);
   WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t, wilsonAnisCoeff, wcImplparams);
 
@@ -768,21 +747,21 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
   TrivialPrecon<LatticeFermion> TrivialPrecon;
-  auto MGPreconDw = createMGInstance<vSpinColourVector, vTComplex, 1, nbasis, WilsonFermionR>(mgParams, levelInfo, Dw, Dw);
+  auto MGPreconDw = createMGInstance<vSpinColourVector, vTComplex, nbasis, WilsonFermionR>(mgParams, levelInfo, Dw, Dw);
 
   MGPreconDw->setup();
-  MGPreconDw->runChecks();
+  MGPreconDw->runChecks(toleranceForMGChecks);
 
   std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDw;
 
+  solversDw.emplace_back(new ConjugateGradient<LatticeFermion>(1.0e-12, 50000, false));
   solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 100, false));
   solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, *MGPreconDw, 100, false));
 
   for(auto const &solver : solversDw) {
-    std::cout << "Starting with a new solver" << std::endl;
+    std::cout << std::endl << "Starting with a new solver" << std::endl;
     result = zero;
     (*solver)(MdagMOpDw, src, result);
-    std::cout << std::endl;
   }
 
   MGPreconDw->reportTimings();
@@ -791,13 +770,14 @@ int main(int argc, char **argv) {
   std::cout << GridLogMessage << "Testing Multigrid for Wilson Clover" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
-  auto MGPreconDwc = createMGInstance<vSpinColourVector, vTComplex, 1, nbasis, WilsonCloverFermionR>(mgParams, levelInfo, Dwc, Dwc);
+  auto MGPreconDwc = createMGInstance<vSpinColourVector, vTComplex, nbasis, WilsonCloverFermionR>(mgParams, levelInfo, Dwc, Dwc);
 
   MGPreconDwc->setup();
-  MGPreconDwc->runChecks();
+  MGPreconDwc->runChecks(toleranceForMGChecks);
 
   std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDwc;
 
+  solversDwc.emplace_back(new ConjugateGradient<LatticeFermion>(1.0e-12, 50000, false));
   solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 100, false));
   solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, *MGPreconDwc, 100, false));
 

From 6c27c725858d35eb837278eb1abb67dbac132ad6 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 16 May 2018 17:08:05 +0200
Subject: [PATCH 117/130] WilsonMG: Provide more sensible default values for MG
 parameters

---
 tests/solver/Test_wilsonclover_mg.cc | 72 ++++++++++++++--------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 18e9ae5e..aa7c8cc1 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -676,47 +676,47 @@ int main(int argc, char **argv) {
   WilsonFermionR       Dw(Umu, *FGrid, *FrbGrid, mass);
   WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t, wilsonAnisCoeff, wcImplparams);
 
-  // Default params for two-level MG preconditioner (TODO: use sensible ones)
-  mgParams.nLevels                  = 2;
-  mgParams.blockSizes               = {{2, 2, 2, 2}};
-  mgParams.smootherTol              = {1e-14};
-  mgParams.smootherMaxOuterIter     = {1};
-  mgParams.smootherMaxInnerIter     = {1};
+  // // Default params for two-level MG preconditioner
+  // mgParams.nLevels                  = 2;
+  // mgParams.blockSizes               = {{4, 4, 4, 4}};
+  // mgParams.smootherTol              = {1e-14};
+  // mgParams.smootherMaxOuterIter     = {4};
+  // mgParams.smootherMaxInnerIter     = {4};
+  // mgParams.kCycle                   = true;
+  // mgParams.kCycleTol                = {1e-1};
+  // mgParams.kCycleMaxOuterIter       = {2};
+  // mgParams.kCycleMaxInnerIter       = {5};
+  // mgParams.coarseSolverTol          = 5e-2;
+  // mgParams.coarseSolverMaxOuterIter = 10;
+  // mgParams.coarseSolverMaxInnerIter = 500;
+
+  // Default params for three-level MG preconditioner
+  mgParams.nLevels                  = 3;
+  mgParams.blockSizes               = {{4, 4, 4, 4}, {2, 2, 2, 2}};
+  mgParams.smootherTol              = {1e-14, 1e-14};
+  mgParams.smootherMaxOuterIter     = {4, 4};
+  mgParams.smootherMaxInnerIter     = {4, 4};
   mgParams.kCycle                   = true;
-  mgParams.kCycleTol                = {1e-14};
-  mgParams.kCycleMaxOuterIter       = {1};
-  mgParams.kCycleMaxInnerIter       = {1};
-  mgParams.coarseSolverTol          = 1e-14;
-  mgParams.coarseSolverMaxOuterIter = 1;
-  mgParams.coarseSolverMaxInnerIter = 1;
+  mgParams.kCycleTol                = {1e-1, 1e-1};
+  mgParams.kCycleMaxOuterIter       = {2, 2};
+  mgParams.kCycleMaxInnerIter       = {5, 5};
+  mgParams.coarseSolverTol          = 5e-2;
+  mgParams.coarseSolverMaxOuterIter = 10;
+  mgParams.coarseSolverMaxInnerIter = 500;
 
-  // // Default params for three-level MG preconditioner (TODO: use sensible ones)
-  // mgParams.nLevels                  = 3;
-  // mgParams.blockSizes               = {{2, 2, 2, 2}, {2, 2, 1, 1}};
-  // mgParams.smootherTol              = {1e-14, 1e-14};
-  // mgParams.smootherMaxOuterIter     = {1, 1};
-  // mgParams.smootherMaxInnerIter     = {1, 1};
-  // mgParams.kCycle                   = true;
-  // mgParams.kCycleTol                = {1e-14, 1e-14};
-  // mgParams.kCycleMaxOuterIter       = {1, 1};
-  // mgParams.kCycleMaxInnerIter       = {1, 1};
-  // mgParams.coarseSolverTol          = 1e-14;
-  // mgParams.coarseSolverMaxOuterIter = 1;
-  // mgParams.coarseSolverMaxInnerIter = 1;
-
-  // // Default params for four-level MG preconditioner (TODO: use sensible ones)
+  // // Default params for four-level MG preconditioner
   // mgParams.nLevels                  = 4;
-  // mgParams.blockSizes               = {{2, 2, 2, 2}, {2, 2, 1, 1}, {1, 1, 2, 1}};
+  // mgParams.blockSizes               = {{4, 4, 4, 4}, {2, 2, 2, 2}, {2, 2, 2, 2}};
   // mgParams.smootherTol              = {1e-14, 1e-14, 1e-14};
-  // mgParams.smootherMaxOuterIter     = {1, 1, 1};
-  // mgParams.smootherMaxInnerIter     = {1, 1, 1};
+  // mgParams.smootherMaxOuterIter     = {4, 4, 4};
+  // mgParams.smootherMaxInnerIter     = {4, 4, 4};
   // mgParams.kCycle                   = true;
-  // mgParams.kCycleTol                = {1e-14, 1e-14, 1e-14};
-  // mgParams.kCycleMaxOuterIter       = {1, 1, 1};
-  // mgParams.kCycleMaxInnerIter       = {1, 1, 1};
-  // mgParams.coarseSolverTol          = 1e-14;
-  // mgParams.coarseSolverMaxOuterIter = 1;
-  // mgParams.coarseSolverMaxInnerIter = 1;
+  // mgParams.kCycleTol                = {1e-1, 1e-1, 1e-1};
+  // mgParams.kCycleMaxOuterIter       = {2, 2, 2};
+  // mgParams.kCycleMaxInnerIter       = {5, 5, 5};
+  // mgParams.coarseSolverTol          = 5e-2;
+  // mgParams.coarseSolverMaxOuterIter = 10;
+  // mgParams.coarseSolverMaxInnerIter = 500;
 
   {
     XmlWriter writer("mg_params_template.xml");

From 7564fedf68a9935b1a9fb393d87527fc276926be Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Wed, 23 May 2018 12:24:25 +0200
Subject: [PATCH 118/130] WilsonMG: Set subspace to zero to avoid random
 behavior

---
 lib/algorithms/CoarsenedMatrix.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h
index 1d42bab5..a6c6c030 100644
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -211,6 +211,7 @@ namespace Grid {
 
       for(int b=0;b<nn;b++){
 	
+	subspace[b] = zero;
 	gaussian(RNG,noise);
 	scale = std::pow(norm2(noise),-0.5); 
 	noise=noise*scale;

From e9b9550298f1c9af8e86b6c3ae2e7c9d8f754602 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Sat, 9 Jun 2018 16:21:19 +0200
Subject: [PATCH 119/130] WilsonMG: Fix incompatibility with single prec MG in
 construction of simd layout on coarser grids

---
 tests/solver/Test_wilsonclover_mg.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index aa7c8cc1..47597e0a 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -100,7 +100,7 @@ public:
         Seeds[level][d] = (level)*Nd + d + 1;
       }
 
-      Grids.push_back(SpaceTimeGrid::makeFourDimGrid(tmp, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()));
+      Grids.push_back(SpaceTimeGrid::makeFourDimGrid(tmp, Grids[level - 1]->_simd_layout, GridDefaultMpi()));
       PRNGs.push_back(GridParallelRNG(Grids[level]));
 
       PRNGs[level].SeedFixedIntegers(Seeds[level]);

From 11c4f5e32ccd64ade11dfc40cb956e831a10a7e2 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Fri, 1 Jun 2018 10:36:09 +0200
Subject: [PATCH 120/130] WilsonMG: Provide command line switch for reading in
 input xml + move default params to constructor of MultiGridParams

---
 tests/solver/Test_wilsonclover_mg.cc | 100 ++++++++++++---------------
 1 file changed, 43 insertions(+), 57 deletions(-)

diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 47597e0a..051d1bcd 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -52,7 +52,33 @@ public:
                                   double,                        coarseSolverTol,
                                   int,                           coarseSolverMaxOuterIter,
                                   int,                           coarseSolverMaxInnerIter);
-  MultiGridParams(){};
+
+  // constructor with default values
+  MultiGridParams(int                           _nLevels                  = 2,
+                  std::vector<std::vector<int>> _blockSizes               = {{4, 4, 4, 4}},
+                  std::vector<double>           _smootherTol              = {1e-14},
+                  std::vector<int>              _smootherMaxOuterIter     = {4},
+                  std::vector<int>              _smootherMaxInnerIter     = {4},
+                  bool                          _kCycle                   = true,
+                  std::vector<double>           _kCycleTol                = {1e-1},
+                  std::vector<int>              _kCycleMaxOuterIter       = {2},
+                  std::vector<int>              _kCycleMaxInnerIter       = {5},
+                  double                        _coarseSolverTol          = 5e-2,
+                  int                           _coarseSolverMaxOuterIter = 10,
+                  int                           _coarseSolverMaxInnerIter = 500)
+  : nLevels(_nLevels)
+  , blockSizes(_blockSizes)
+  , smootherTol(_smootherTol)
+  , smootherMaxOuterIter(_smootherMaxOuterIter)
+  , smootherMaxInnerIter(_smootherMaxInnerIter)
+  , kCycle(_kCycle)
+  , kCycleTol(_kCycleTol)
+  , kCycleMaxOuterIter(_kCycleMaxOuterIter)
+  , kCycleMaxInnerIter(_kCycleMaxInnerIter)
+  , coarseSolverTol(_coarseSolverTol)
+  , coarseSolverMaxOuterIter(_coarseSolverMaxOuterIter)
+  , coarseSolverMaxInnerIter(_coarseSolverMaxInnerIter)
+  {}
 };
 // clang-format on
 
@@ -646,8 +672,6 @@ int main(int argc, char **argv) {
 
   Grid_init(&argc, &argv);
 
-  MultiGridParams mgParams;
-
   typename WilsonCloverFermionR::ImplParams wcImplparams;
   WilsonAnisotropyCoefficients              wilsonAnisCoeff;
 
@@ -668,74 +692,36 @@ int main(int argc, char **argv) {
   RealD csw_r = 1.0;
   RealD csw_t = 1.0;
 
-  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
-  const int nbasis = 20;
+  MultiGridParams mgParams;
+  std::string     inputXml{"./mg_params.xml"};
 
-  RealD toleranceForMGChecks = 1e-13; // TODO: depends on the precision MG precondtioner is run in
-
-  WilsonFermionR       Dw(Umu, *FGrid, *FrbGrid, mass);
-  WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t, wilsonAnisCoeff, wcImplparams);
-
-  // // Default params for two-level MG preconditioner
-  // mgParams.nLevels                  = 2;
-  // mgParams.blockSizes               = {{4, 4, 4, 4}};
-  // mgParams.smootherTol              = {1e-14};
-  // mgParams.smootherMaxOuterIter     = {4};
-  // mgParams.smootherMaxInnerIter     = {4};
-  // mgParams.kCycle                   = true;
-  // mgParams.kCycleTol                = {1e-1};
-  // mgParams.kCycleMaxOuterIter       = {2};
-  // mgParams.kCycleMaxInnerIter       = {5};
-  // mgParams.coarseSolverTol          = 5e-2;
-  // mgParams.coarseSolverMaxOuterIter = 10;
-  // mgParams.coarseSolverMaxInnerIter = 500;
-
-  // Default params for three-level MG preconditioner
-  mgParams.nLevels                  = 3;
-  mgParams.blockSizes               = {{4, 4, 4, 4}, {2, 2, 2, 2}};
-  mgParams.smootherTol              = {1e-14, 1e-14};
-  mgParams.smootherMaxOuterIter     = {4, 4};
-  mgParams.smootherMaxInnerIter     = {4, 4};
-  mgParams.kCycle                   = true;
-  mgParams.kCycleTol                = {1e-1, 1e-1};
-  mgParams.kCycleMaxOuterIter       = {2, 2};
-  mgParams.kCycleMaxInnerIter       = {5, 5};
-  mgParams.coarseSolverTol          = 5e-2;
-  mgParams.coarseSolverMaxOuterIter = 10;
-  mgParams.coarseSolverMaxInnerIter = 500;
-
-  // // Default params for four-level MG preconditioner
-  // mgParams.nLevels                  = 4;
-  // mgParams.blockSizes               = {{4, 4, 4, 4}, {2, 2, 2, 2}, {2, 2, 2, 2}};
-  // mgParams.smootherTol              = {1e-14, 1e-14, 1e-14};
-  // mgParams.smootherMaxOuterIter     = {4, 4, 4};
-  // mgParams.smootherMaxInnerIter     = {4, 4, 4};
-  // mgParams.kCycle                   = true;
-  // mgParams.kCycleTol                = {1e-1, 1e-1, 1e-1};
-  // mgParams.kCycleMaxOuterIter       = {2, 2, 2};
-  // mgParams.kCycleMaxInnerIter       = {5, 5, 5};
-  // mgParams.coarseSolverTol          = 5e-2;
-  // mgParams.coarseSolverMaxOuterIter = 10;
-  // mgParams.coarseSolverMaxInnerIter = 500;
+  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
+    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
+    assert(inputXml.length() != 0);
+  }
 
   {
     XmlWriter writer("mg_params_template.xml");
     write(writer, "Params", mgParams);
-    std::cout << GridLogMessage << " Written mg_params_template.xml" << std::endl;
-  }
+    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
 
-  {
-    std::string paramFileName{"./mg_params.xml"};
-    XmlReader   reader(paramFileName);
+    XmlReader reader(inputXml);
     read(reader, "Params", mgParams);
+    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
   }
 
   checkParameterValidity(mgParams);
-
   std::cout << mgParams << std::endl;
 
   LevelInfo levelInfo(FGrid, mgParams);
 
+  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
+  const int nbasis               = 40;
+  RealD     toleranceForMGChecks = 1e-13; // TODO: depends on the precision MG precondtioner is run in
+
+  WilsonFermionR       Dw(Umu, *FGrid, *FrbGrid, mass);
+  WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t, wilsonAnisCoeff, wcImplparams);
+
   static_assert(std::is_same<LatticeFermion, typename WilsonFermionR::FermionField>::value, "");
   static_assert(std::is_same<LatticeFermion, typename WilsonCloverFermionR::FermionField>::value, "");
 

From 4f41cd114db35a9ea4ebea5d892cdf21ce2df40b Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Sat, 9 Jun 2018 16:38:36 +0200
Subject: [PATCH 121/130] WilsonMG: Add a mixed precision version of FGMRES

This version does everything in double prec but accepts a preconditioner working
in single precision.
---
 lib/algorithms/Algorithms.h                   |   1 +
 ...cisionFlexibleGeneralisedMinimalResidual.h | 272 ++++++++++++++++++
 2 files changed, 273 insertions(+)
 create mode 100644 lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h

diff --git a/lib/algorithms/Algorithms.h b/lib/algorithms/Algorithms.h
index 72fe9fe0..b716c48f 100644
--- a/lib/algorithms/Algorithms.h
+++ b/lib/algorithms/Algorithms.h
@@ -53,6 +53,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h>
+#include <Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 #include <Grid/algorithms/CoarsenedMatrix.h>
 #include <Grid/algorithms/FFT.h>
diff --git a/lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
new file mode 100644
index 00000000..d38d41a9
--- /dev/null
+++ b/lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
@@ -0,0 +1,272 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
+#define GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
+
+namespace Grid {
+
+template<class FieldD, class FieldF, typename std::enable_if<getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
+class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction<FieldD> {
+ public:
+  bool ErrorOnNoConverge; // Throw an assert when MPFGMRES fails to converge,
+                          // defaults to true
+
+  RealD   Tolerance;
+
+  Integer MaxIterations;
+  Integer RestartLength;
+  Integer MaxNumberOfRestarts;
+  Integer IterationCount; // Number of iterations the MPFGMRES took to finish,
+                          // filled in upon completion
+
+  GridStopWatch MatrixTimer;
+  GridStopWatch PrecTimer;
+  GridStopWatch LinalgTimer;
+  GridStopWatch QrTimer;
+  GridStopWatch CompSolutionTimer;
+  GridStopWatch ChangePrecTimer;
+
+  Eigen::MatrixXcd H;
+
+  std::vector<std::complex<double>> y;
+  std::vector<std::complex<double>> gamma;
+  std::vector<std::complex<double>> c;
+  std::vector<std::complex<double>> s;
+
+  GridBase* SinglePrecGrid;
+
+  LinearFunction<FieldF> &Preconditioner;
+
+  MixedPrecisionFlexibleGeneralisedMinimalResidual(RealD   tol,
+                                                   Integer maxit,
+                                                   GridBase * sp_grid,
+                                                   LinearFunction<FieldF> &Prec,
+                                                   Integer restart_length,
+                                                   bool    err_on_no_conv = true)
+      : Tolerance(tol)
+      , MaxIterations(maxit)
+      , RestartLength(restart_length)
+      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
+      , ErrorOnNoConverge(err_on_no_conv)
+      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
+      , y(RestartLength + 1, 0.)
+      , gamma(RestartLength + 1, 0.)
+      , c(RestartLength + 1, 0.)
+      , s(RestartLength + 1, 0.)
+      , SinglePrecGrid(sp_grid)
+      , Preconditioner(Prec) {};
+
+  void operator()(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi) {
+
+    psi.checkerboard = src.checkerboard;
+    conformable(psi, src);
+
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD cp;
+    RealD ssq = norm2(src);
+    RealD rsq = Tolerance * Tolerance * ssq;
+
+    FieldD r(src._grid);
+
+    std::cout << std::setprecision(4) << std::scientific;
+    std::cout << GridLogIterative << "MPFGMRES: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "MPFGMRES:   src " << ssq   << std::endl;
+
+    PrecTimer.Reset();
+    MatrixTimer.Reset();
+    LinalgTimer.Reset();
+    QrTimer.Reset();
+    CompSolutionTimer.Reset();
+    ChangePrecTimer.Reset();
+
+    GridStopWatch SolverTimer;
+    SolverTimer.Start();
+
+    IterationCount = 0;
+
+    for (int k=0; k<MaxNumberOfRestarts; k++) {
+
+      cp = outerLoopBody(LinOp, src, psi, rsq);
+
+      // Stopping condition
+      if (cp <= rsq) {
+
+        SolverTimer.Stop();
+
+        LinOp.Op(psi,r);
+        axpy(r,-1.0,src,r);
+
+        RealD srcnorm       = sqrt(ssq);
+        RealD resnorm       = sqrt(norm2(r));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage        << "MPFGMRES: Converged on iteration " << IterationCount
+                  << " computed residual " << sqrt(cp / ssq)
+                  << " true residual "     << true_residual
+                  << " target "            << Tolerance << std::endl;
+
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Total      " <<       SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Precon     " <<         PrecTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Matrix     " <<       MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Linalg     " <<       LinalgTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: QR         " <<           QrTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: CompSol    " << CompSolutionTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: PrecChange " <<   ChangePrecTimer.Elapsed() << std::endl;
+        return;
+      }
+    }
+
+    std::cout << GridLogMessage << "MPFGMRES did NOT converge" << std::endl;
+
+    if (ErrorOnNoConverge)
+      assert(0);
+  }
+
+  RealD outerLoopBody(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi, RealD rsq) {
+
+    RealD cp = 0;
+
+    FieldD w(src._grid);
+    FieldD r(src._grid);
+
+    std::vector<FieldD> v(RestartLength + 1, src._grid); // these should probably be made class members
+    std::vector<FieldD> z(RestartLength + 1, src._grid); // so that they are only allocated once, not in every restart
+
+    MatrixTimer.Start();
+    LinOp.Op(psi, w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    r = src - w;
+
+    gamma[0] = sqrt(norm2(r));
+
+    v[0] = (1. / gamma[0]) * r;
+    LinalgTimer.Stop();
+
+    for (int i=0; i<RestartLength; i++) {
+
+      IterationCount++;
+
+      arnoldiStep(LinOp, v, z, w, i);
+
+      qrUpdate(i);
+
+      cp = std::norm(gamma[i+1]);
+
+      std::cout << GridLogIterative << "MPFGMRES: Iteration " << IterationCount
+                << " residual " << cp << " target " << rsq << std::endl;
+
+      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
+
+        computeSolution(z, psi, i);
+
+        return cp;
+      }
+    }
+
+    assert(0); // Never reached
+    return cp;
+  }
+
+  void arnoldiStep(LinearOperatorBase<FieldD> &LinOp, std::vector<FieldD> &v, std::vector<FieldD> &z, FieldD &w, int iter) {
+
+    FieldF v_f(SinglePrecGrid);
+    FieldF z_f(SinglePrecGrid);
+
+    ChangePrecTimer.Start();
+    precisionChange(v_f, v[iter]);
+    precisionChange(z_f, z[iter]);
+    ChangePrecTimer.Stop();
+
+    PrecTimer.Start();
+    Preconditioner(v_f, z_f);
+    PrecTimer.Stop();
+
+    ChangePrecTimer.Start();
+    precisionChange(z[iter], z_f);
+    ChangePrecTimer.Stop();
+
+    MatrixTimer.Start();
+    LinOp.Op(z[iter], w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    for (int i = 0; i <= iter; ++i) {
+      H(iter, i) = innerProduct(v[i], w);
+      w = w - H(iter, i) * v[i];
+    }
+
+    H(iter, iter + 1) = sqrt(norm2(w));
+    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
+    LinalgTimer.Stop();
+  }
+
+  void qrUpdate(int iter) {
+
+    QrTimer.Start();
+    for (int i = 0; i < iter ; ++i) {
+      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
+      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
+      H(iter, i + 1) = tmp;
+    }
+
+    // Compute new Givens Rotation
+    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
+    c[iter]     = H(iter, iter) / nu;
+    s[iter]     = H(iter, iter + 1) / nu;
+
+    // Apply new Givens rotation
+    H(iter, iter)     = nu;
+    H(iter, iter + 1) = 0.;
+
+    gamma[iter + 1] = -s[iter] * gamma[iter];
+    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
+    QrTimer.Stop();
+  }
+
+  void computeSolution(std::vector<FieldD> const &z, FieldD &psi, int iter) {
+
+    CompSolutionTimer.Start();
+    for (int i = iter; i >= 0; i--) {
+      y[i] = gamma[i];
+      for (int k = i + 1; k <= iter; k++)
+        y[i] = y[i] - H(k, i) * y[k];
+      y[i] = y[i] / H(i, i);
+    }
+
+    for (int i = 0; i <= iter; i++)
+      psi = psi + z[i] * y[i];
+    CompSolutionTimer.Stop();
+  }
+};
+}
+#endif

From 2ab9d4bc56a666b0a66b0de440106ec1bcb40fb8 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 11 Jun 2018 15:54:32 +0200
Subject: [PATCH 122/130] WilsonMG: Fix random behavior in GMRES

From time to time I saw random since the basis vectors were not initialized
properly.
---
 .../CommunicationAvoidingGeneralisedMinimalResidual.h        | 3 ++-
 ...FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h | 5 +++--
 .../iterative/FlexibleGeneralisedMinimalResidual.h           | 5 +++--
 lib/algorithms/iterative/GeneralisedMinimalResidual.h        | 3 ++-
 .../MixedPrecisionFlexibleGeneralisedMinimalResidual.h       | 5 +++--
 5 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
index 1f5d293a..f0289683 100644
--- a/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
@@ -145,7 +145,8 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<
     Field w(src._grid);
     Field r(src._grid);
 
-    std::vector<Field> v(RestartLength + 1, src._grid);
+    // this should probably be made a class member so that it is only allocated once, not in every restart
+    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
 
     MatrixTimer.Start();
     LinOp.Op(psi, w);
diff --git a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
index b992f760..db857248 100644
--- a/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
@@ -152,8 +152,9 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF
     Field w(src._grid);
     Field r(src._grid);
 
-    std::vector<Field> v(RestartLength + 1, src._grid);
-    std::vector<Field> z(RestartLength + 1, src._grid);
+    // these should probably be made class members so that they are only allocated once, not in every restart
+    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
+    std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
 
     MatrixTimer.Start();
     LinOp.Op(psi, w);
diff --git a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
index bc5184d4..efc8c787 100644
--- a/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -150,8 +150,9 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
     Field w(src._grid);
     Field r(src._grid);
 
-    std::vector<Field> v(RestartLength + 1, src._grid);
-    std::vector<Field> z(RestartLength + 1, src._grid);
+    // these should probably be made class members so that they are only allocated once, not in every restart
+    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
+    std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
 
     MatrixTimer.Start();
     LinOp.Op(psi, w);
diff --git a/lib/algorithms/iterative/GeneralisedMinimalResidual.h b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
index eaa43563..10636234 100644
--- a/lib/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -143,7 +143,8 @@ class GeneralisedMinimalResidual : public OperatorFunction<Field> {
     Field w(src._grid);
     Field r(src._grid);
 
-    std::vector<Field> v(RestartLength + 1, src._grid);
+    // this should probably be made a class member so that it is only allocated once, not in every restart
+    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
 
     MatrixTimer.Start();
     LinOp.Op(psi, w);
diff --git a/lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h b/lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
index d38d41a9..04113684 100644
--- a/lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
+++ b/lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
@@ -157,8 +157,9 @@ class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction
     FieldD w(src._grid);
     FieldD r(src._grid);
 
-    std::vector<FieldD> v(RestartLength + 1, src._grid); // these should probably be made class members
-    std::vector<FieldD> z(RestartLength + 1, src._grid); // so that they are only allocated once, not in every restart
+    // these should probably be made class members so that they are only allocated once, not in every restart
+    std::vector<FieldD> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
+    std::vector<FieldD> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
 
     MatrixTimer.Start();
     LinOp.Op(psi, w);

From 66b7a0f87143e6bf5828b9039ba1985fc210eabb Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 12 Jun 2018 15:22:02 +0200
Subject: [PATCH 123/130] WilsonMG: Move multigrid class to separate file

---
 tests/solver/Test_multigrid_common.h | 670 +++++++++++++++++++++++++++
 tests/solver/Test_wilsonclover_mg.cc | 637 +------------------------
 2 files changed, 671 insertions(+), 636 deletions(-)
 create mode 100644 tests/solver/Test_multigrid_common.h

diff --git a/tests/solver/Test_multigrid_common.h b/tests/solver/Test_multigrid_common.h
new file mode 100644
index 00000000..58614195
--- /dev/null
+++ b/tests/solver/Test_multigrid_common.h
@@ -0,0 +1,670 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/solver/Test_multigrid_common.h
+
+    Copyright (C) 2015
+
+    Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_TEST_MULTIGRID_COMMON_H
+#define GRID_TEST_MULTIGRID_COMMON_H
+
+namespace Grid {
+
+// TODO: Can think about having one parameter struct per level and then a
+// vector of these structs. How well would that work together with the
+// serialization strategy of Grid?
+
+// clang-format off
+struct MultiGridParams : Serializable {
+public:
+  GRID_SERIALIZABLE_CLASS_MEMBERS(MultiGridParams,
+                                  int,                           nLevels,
+                                  std::vector<std::vector<int>>, blockSizes,           // size == nLevels - 1
+                                  std::vector<double>,           smootherTol,          // size == nLevels - 1
+                                  std::vector<int>,              smootherMaxOuterIter, // size == nLevels - 1
+                                  std::vector<int>,              smootherMaxInnerIter, // size == nLevels - 1
+                                  bool,                          kCycle,
+                                  std::vector<double>,           kCycleTol,            // size == nLevels - 1
+                                  std::vector<int>,              kCycleMaxOuterIter,   // size == nLevels - 1
+                                  std::vector<int>,              kCycleMaxInnerIter,   // size == nLevels - 1
+                                  double,                        coarseSolverTol,
+                                  int,                           coarseSolverMaxOuterIter,
+                                  int,                           coarseSolverMaxInnerIter);
+
+  // constructor with default values
+  MultiGridParams(int                           _nLevels                  = 2,
+                  std::vector<std::vector<int>> _blockSizes               = {{4, 4, 4, 4}},
+                  std::vector<double>           _smootherTol              = {1e-14},
+                  std::vector<int>              _smootherMaxOuterIter     = {4},
+                  std::vector<int>              _smootherMaxInnerIter     = {4},
+                  bool                          _kCycle                   = true,
+                  std::vector<double>           _kCycleTol                = {1e-1},
+                  std::vector<int>              _kCycleMaxOuterIter       = {2},
+                  std::vector<int>              _kCycleMaxInnerIter       = {5},
+                  double                        _coarseSolverTol          = 5e-2,
+                  int                           _coarseSolverMaxOuterIter = 10,
+                  int                           _coarseSolverMaxInnerIter = 500)
+  : nLevels(_nLevels)
+  , blockSizes(_blockSizes)
+  , smootherTol(_smootherTol)
+  , smootherMaxOuterIter(_smootherMaxOuterIter)
+  , smootherMaxInnerIter(_smootherMaxInnerIter)
+  , kCycle(_kCycle)
+  , kCycleTol(_kCycleTol)
+  , kCycleMaxOuterIter(_kCycleMaxOuterIter)
+  , kCycleMaxInnerIter(_kCycleMaxInnerIter)
+  , coarseSolverTol(_coarseSolverTol)
+  , coarseSolverMaxOuterIter(_coarseSolverMaxOuterIter)
+  , coarseSolverMaxInnerIter(_coarseSolverMaxInnerIter)
+  {}
+};
+// clang-format on
+
+void checkParameterValidity(MultiGridParams const &params) {
+
+  auto correctSize = params.nLevels - 1;
+
+  assert(correctSize == params.blockSizes.size());
+  assert(correctSize == params.smootherTol.size());
+  assert(correctSize == params.smootherMaxOuterIter.size());
+  assert(correctSize == params.smootherMaxInnerIter.size());
+  assert(correctSize == params.kCycleTol.size());
+  assert(correctSize == params.kCycleMaxOuterIter.size());
+  assert(correctSize == params.kCycleMaxInnerIter.size());
+}
+
+struct LevelInfo {
+public:
+  std::vector<std::vector<int>> Seeds;
+  std::vector<GridCartesian *>  Grids;
+  std::vector<GridParallelRNG>  PRNGs;
+
+  LevelInfo(GridCartesian *FineGrid, MultiGridParams const &mgParams) {
+
+    auto nCoarseLevels = mgParams.blockSizes.size();
+
+    assert(nCoarseLevels == mgParams.nLevels - 1);
+
+    // set up values for finest grid
+    Grids.push_back(FineGrid);
+    Seeds.push_back({1, 2, 3, 4});
+    PRNGs.push_back(GridParallelRNG(Grids.back()));
+    PRNGs.back().SeedFixedIntegers(Seeds.back());
+
+    // set up values for coarser grids
+    for(int level = 1; level < mgParams.nLevels; ++level) {
+      auto Nd  = Grids[level - 1]->_ndimension;
+      auto tmp = Grids[level - 1]->_fdimensions;
+      assert(tmp.size() == Nd);
+
+      Seeds.push_back(std::vector<int>(Nd));
+
+      for(int d = 0; d < Nd; ++d) {
+        tmp[d] /= mgParams.blockSizes[level - 1][d];
+        Seeds[level][d] = (level)*Nd + d + 1;
+      }
+
+      Grids.push_back(QCD::SpaceTimeGrid::makeFourDimGrid(tmp, Grids[level - 1]->_simd_layout, GridDefaultMpi()));
+      PRNGs.push_back(GridParallelRNG(Grids[level]));
+
+      PRNGs[level].SeedFixedIntegers(Seeds[level]);
+    }
+
+    std::cout << GridLogMessage << "Constructed " << mgParams.nLevels << " levels" << std::endl;
+
+    for(int level = 0; level < mgParams.nLevels; ++level) {
+      std::cout << GridLogMessage << "level = " << level << ":" << std::endl;
+      Grids[level]->show_decomposition();
+    }
+  }
+};
+
+template<class Field> class MultiGridPreconditionerBase : public LinearFunction<Field> {
+public:
+  virtual ~MultiGridPreconditionerBase()               = default;
+  virtual void setup()                                 = 0;
+  virtual void operator()(Field const &in, Field &out) = 0;
+  virtual void runChecks(RealD tolerance)              = 0;
+  virtual void reportTimings()                         = 0;
+};
+
+template<class Fobj, class CComplex, int nBasis, int nCoarserLevels, class Matrix>
+class MultiGridPreconditioner : public MultiGridPreconditionerBase<Lattice<Fobj>> {
+public:
+  /////////////////////////////////////////////
+  // Type Definitions
+  /////////////////////////////////////////////
+
+  // clang-format off
+  typedef Aggregation<Fobj, CComplex, nBasis>                                                                         Aggregates;
+  typedef CoarsenedMatrix<Fobj, CComplex, nBasis>                                                                     CoarseDiracMatrix;
+  typedef typename Aggregates::CoarseVector                                                                           CoarseVector;
+  typedef typename Aggregates::siteVector                                                                             CoarseSiteVector;
+  typedef Matrix                                                                                                      FineDiracMatrix;
+  typedef typename Aggregates::FineField                                                                              FineVector;
+  typedef MultiGridPreconditioner<CoarseSiteVector, iScalar<CComplex>, nBasis, nCoarserLevels - 1, CoarseDiracMatrix> NextPreconditionerLevel;
+  // clang-format on
+
+  /////////////////////////////////////////////
+  // Member Data
+  /////////////////////////////////////////////
+
+  int _CurrentLevel;
+  int _NextCoarserLevel;
+
+  MultiGridParams &_MultiGridParams;
+  LevelInfo &      _LevelInfo;
+
+  FineDiracMatrix & _FineMatrix;
+  FineDiracMatrix & _SmootherMatrix;
+  Aggregates        _Aggregates;
+  CoarseDiracMatrix _CoarseMatrix;
+
+  std::unique_ptr<NextPreconditionerLevel> _NextPreconditionerLevel;
+
+  GridStopWatch _SetupTotalTimer;
+  GridStopWatch _SetupCreateSubspaceTimer;
+  GridStopWatch _SetupProjectToChiralitiesTimer;
+  GridStopWatch _SetupCoarsenOperatorTimer;
+  GridStopWatch _SetupNextLevelTimer;
+  GridStopWatch _SolveTotalTimer;
+  GridStopWatch _SolveRestrictionTimer;
+  GridStopWatch _SolveProlongationTimer;
+  GridStopWatch _SolveSmootherTimer;
+  GridStopWatch _SolveNextLevelTimer;
+
+  /////////////////////////////////////////////
+  // Member Functions
+  /////////////////////////////////////////////
+
+  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineDiracMatrix &FineMat, FineDiracMatrix &SmootherMat)
+    : _CurrentLevel(mgParams.nLevels - (nCoarserLevels + 1)) // _Level = 0 corresponds to finest
+    , _NextCoarserLevel(_CurrentLevel + 1)                   // incremented for instances on coarser levels
+    , _MultiGridParams(mgParams)
+    , _LevelInfo(LvlInfo)
+    , _FineMatrix(FineMat)
+    , _SmootherMatrix(SmootherMat)
+    , _Aggregates(_LevelInfo.Grids[_NextCoarserLevel], _LevelInfo.Grids[_CurrentLevel], 0)
+    , _CoarseMatrix(*_LevelInfo.Grids[_NextCoarserLevel]) {
+
+    _NextPreconditionerLevel
+      = std::unique_ptr<NextPreconditionerLevel>(new NextPreconditionerLevel(_MultiGridParams, _LevelInfo, _CoarseMatrix, _CoarseMatrix));
+
+    resetTimers();
+  }
+
+  void setup() {
+
+    _SetupTotalTimer.Start();
+
+    static_assert((nBasis & 0x1) == 0, "MG Preconditioner only supports an even number of basis vectors");
+    int nb = nBasis / 2;
+
+    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
+
+    _SetupCreateSubspaceTimer.Start();
+    _Aggregates.CreateSubspace(_LevelInfo.PRNGs[_CurrentLevel], fineMdagMOp, nb);
+    _SetupCreateSubspaceTimer.Stop();
+
+    _SetupProjectToChiralitiesTimer.Start();
+    FineVector tmp1(_Aggregates.subspace[0]._grid);
+    FineVector tmp2(_Aggregates.subspace[0]._grid);
+    for(int n = 0; n < nb; n++) {
+      auto tmp1 = _Aggregates.subspace[n];
+      G5C(tmp2, _Aggregates.subspace[n]);
+      axpby(_Aggregates.subspace[n], 0.5, 0.5, tmp1, tmp2);
+      axpby(_Aggregates.subspace[n + nb], 0.5, -0.5, tmp1, tmp2);
+      std::cout << GridLogMG << " Level " << _CurrentLevel << ": Chirally doubled vector " << n << ". "
+                << "norm2(vec[" << n << "]) = " << norm2(_Aggregates.subspace[n]) << ". "
+                << "norm2(vec[" << n + nb << "]) = " << norm2(_Aggregates.subspace[n + nb]) << std::endl;
+    }
+    _SetupProjectToChiralitiesTimer.Stop();
+
+    _SetupCoarsenOperatorTimer.Start();
+    _CoarseMatrix.CoarsenOperator(_LevelInfo.Grids[_CurrentLevel], fineMdagMOp, _Aggregates);
+    _SetupCoarsenOperatorTimer.Stop();
+
+    _SetupNextLevelTimer.Start();
+    _NextPreconditionerLevel->setup();
+    _SetupNextLevelTimer.Stop();
+
+    _SetupTotalTimer.Stop();
+  }
+
+  virtual void operator()(FineVector const &in, FineVector &out) {
+
+    conformable(_LevelInfo.Grids[_CurrentLevel], in._grid);
+    conformable(in, out);
+
+    // TODO: implement a W-cycle
+    if(_MultiGridParams.kCycle)
+      kCycle(in, out);
+    else
+      vCycle(in, out);
+  }
+
+  void vCycle(FineVector const &in, FineVector &out) {
+
+    _SolveTotalTimer.Start();
+
+    RealD inputNorm = norm2(in);
+
+    CoarseVector coarseSrc(_LevelInfo.Grids[_NextCoarserLevel]);
+    CoarseVector coarseSol(_LevelInfo.Grids[_NextCoarserLevel]);
+    coarseSol = zero;
+
+    FineVector fineTmp(in._grid);
+
+    auto maxSmootherIter = _MultiGridParams.smootherMaxOuterIter[_CurrentLevel] * _MultiGridParams.smootherMaxInnerIter[_CurrentLevel];
+
+    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
+    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(_MultiGridParams.smootherTol[_CurrentLevel],
+                                                              maxSmootherIter,
+                                                              fineTrivialPreconditioner,
+                                                              _MultiGridParams.smootherMaxInnerIter[_CurrentLevel],
+                                                              false);
+
+    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<FineDiracMatrix, FineVector> fineSmootherMdagMOp(_SmootherMatrix);
+
+    _SolveRestrictionTimer.Start();
+    _Aggregates.ProjectToSubspace(coarseSrc, in);
+    _SolveRestrictionTimer.Stop();
+
+    _SolveNextLevelTimer.Start();
+    (*_NextPreconditionerLevel)(coarseSrc, coarseSol);
+    _SolveNextLevelTimer.Stop();
+
+    _SolveProlongationTimer.Start();
+    _Aggregates.PromoteFromSubspace(coarseSol, out);
+    _SolveProlongationTimer.Stop();
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                                = in - fineTmp;
+    auto r                                 = norm2(fineTmp);
+    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
+
+    _SolveSmootherTimer.Start();
+    fineFGMRES(fineSmootherMdagMOp, in, out);
+    _SolveSmootherTimer.Stop();
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                        = in - fineTmp;
+    r                              = norm2(fineTmp);
+    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": V-cycle: Input norm = " << std::sqrt(inputNorm)
+              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
+              << std::endl;
+
+    _SolveTotalTimer.Stop();
+  }
+
+  void kCycle(FineVector const &in, FineVector &out) {
+
+    _SolveTotalTimer.Start();
+
+    RealD inputNorm = norm2(in);
+
+    CoarseVector coarseSrc(_LevelInfo.Grids[_NextCoarserLevel]);
+    CoarseVector coarseSol(_LevelInfo.Grids[_NextCoarserLevel]);
+    coarseSol = zero;
+
+    FineVector fineTmp(in._grid);
+
+    auto smootherMaxIter = _MultiGridParams.smootherMaxOuterIter[_CurrentLevel] * _MultiGridParams.smootherMaxInnerIter[_CurrentLevel];
+    auto kCycleMaxIter   = _MultiGridParams.kCycleMaxOuterIter[_CurrentLevel] * _MultiGridParams.kCycleMaxInnerIter[_CurrentLevel];
+
+    TrivialPrecon<FineVector>                        fineTrivialPreconditioner;
+    FlexibleGeneralisedMinimalResidual<FineVector>   fineFGMRES(_MultiGridParams.smootherTol[_CurrentLevel],
+                                                              smootherMaxIter,
+                                                              fineTrivialPreconditioner,
+                                                              _MultiGridParams.smootherMaxInnerIter[_CurrentLevel],
+                                                              false);
+    FlexibleGeneralisedMinimalResidual<CoarseVector> coarseFGMRES(_MultiGridParams.kCycleTol[_CurrentLevel],
+                                                                  kCycleMaxIter,
+                                                                  *_NextPreconditionerLevel,
+                                                                  _MultiGridParams.kCycleMaxInnerIter[_CurrentLevel],
+                                                                  false);
+
+    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineSmootherMdagMOp(_SmootherMatrix);
+    MdagMLinearOperator<CoarseDiracMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
+
+    _SolveRestrictionTimer.Start();
+    _Aggregates.ProjectToSubspace(coarseSrc, in);
+    _SolveRestrictionTimer.Stop();
+
+    _SolveNextLevelTimer.Start();
+    coarseFGMRES(coarseMdagMOp, coarseSrc, coarseSol);
+    _SolveNextLevelTimer.Stop();
+
+    _SolveProlongationTimer.Start();
+    _Aggregates.PromoteFromSubspace(coarseSol, out);
+    _SolveProlongationTimer.Stop();
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                                = in - fineTmp;
+    auto r                                 = norm2(fineTmp);
+    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
+
+    _SolveSmootherTimer.Start();
+    fineFGMRES(fineSmootherMdagMOp, in, out);
+    _SolveSmootherTimer.Stop();
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                        = in - fineTmp;
+    r                              = norm2(fineTmp);
+    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": K-cycle: Input norm = " << std::sqrt(inputNorm)
+              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
+              << std::endl;
+
+    _SolveTotalTimer.Stop();
+  }
+
+  void runChecks(RealD tolerance) {
+
+    std::vector<FineVector>   fineTmps(7, _LevelInfo.Grids[_CurrentLevel]);
+    std::vector<CoarseVector> coarseTmps(4, _LevelInfo.Grids[_NextCoarserLevel]);
+
+    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<CoarseDiracMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (M - (Mdiag + Σ_μ Mdir_μ)) * v" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+
+    random(_LevelInfo.PRNGs[_CurrentLevel], fineTmps[0]);
+
+    fineMdagMOp.Op(fineTmps[0], fineTmps[1]);     //     M * v
+    fineMdagMOp.OpDiag(fineTmps[0], fineTmps[2]); // Mdiag * v
+
+    fineTmps[4] = zero;
+    for(int dir = 0; dir < 4; dir++) { //       Σ_μ Mdir_μ * v
+      for(auto disp : {+1, -1}) {
+        fineMdagMOp.OpDir(fineTmps[0], fineTmps[3], dir, disp);
+        fineTmps[4] = fineTmps[4] + fineTmps[3];
+      }
+    }
+
+    fineTmps[5] = fineTmps[2] + fineTmps[4]; // (Mdiag + Σ_μ Mdir_μ) * v
+
+    fineTmps[6]    = fineTmps[1] - fineTmps[5];
+    auto deviation = std::sqrt(norm2(fineTmps[6]) / norm2(fineTmps[1]));
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(M * v)                    = " << norm2(fineTmps[1]) << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(Mdiag * v)                = " << norm2(fineTmps[2]) << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(Σ_μ Mdir_μ * v)           = " << norm2(fineTmps[4]) << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2((Mdiag + Σ_μ Mdir_μ) * v) = " << norm2(fineTmps[5]) << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": relative deviation              = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (1 - P R) v" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+
+    for(auto i = 0; i < _Aggregates.subspace.size(); ++i) {
+      _Aggregates.ProjectToSubspace(coarseTmps[0], _Aggregates.subspace[i]); //   R v_i
+      _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]);           // P R v_i
+
+      fineTmps[1] = _Aggregates.subspace[i] - fineTmps[0]; // v_i - P R v_i
+      deviation   = std::sqrt(norm2(fineTmps[1]) / norm2(_Aggregates.subspace[i]));
+
+      std::cout << GridLogMG << " Level " << _CurrentLevel << ": Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
+                << " | norm2(R v_i) = " << norm2(coarseTmps[0]) << " | norm2(P R v_i) = " << norm2(fineTmps[0])
+                << " | relative deviation = " << deviation;
+
+      if(deviation > tolerance) {
+        std::cout << " > " << tolerance << " -> check failed" << std::endl;
+        // abort();
+      } else {
+        std::cout << " < " << tolerance << " -> check passed" << std::endl;
+      }
+    }
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (1 - R P) v_c" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+
+    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
+
+    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //   P v_c
+    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[0]);   // R P v_c
+
+    coarseTmps[2] = coarseTmps[0] - coarseTmps[1]; // v_c - R P v_c
+    deviation     = std::sqrt(norm2(coarseTmps[2]) / norm2(coarseTmps[0]));
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(v_c) = " << norm2(coarseTmps[0])
+              << " | norm2(R P v_c) = " << norm2(coarseTmps[1]) << " | norm2(P v_c) = " << norm2(fineTmps[0])
+              << " | relative deviation = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+
+    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
+
+    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //     P v_c
+    fineMdagMOp.Op(fineTmps[0], fineTmps[1]);                    //   D P v_c
+    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[1]);   // R D P v_c
+
+    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[2]); // D_c v_c
+
+    coarseTmps[3] = coarseTmps[1] - coarseTmps[2]; // R D P v_c - D_c v_c
+    deviation     = std::sqrt(norm2(coarseTmps[3]) / norm2(coarseTmps[1]));
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(R D P v_c) = " << norm2(coarseTmps[1])
+              << " | norm2(D_c v_c) = " << norm2(coarseTmps[2]) << " | relative deviation = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+
+    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
+
+    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[1]);    //         D_c v_c
+    coarseMdagMOp.AdjOp(coarseTmps[1], coarseTmps[2]); // D_c^dag D_c v_c
+
+    auto dot  = innerProduct(coarseTmps[0], coarseTmps[2]); //v_c^dag D_c^dag D_c v_c
+    deviation = std::abs(imag(dot)) / std::abs(real(dot));
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Re(v_c^dag D_c^dag D_c v_c) = " << real(dot)
+              << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot) << " | relative deviation = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      // abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
+    _NextPreconditionerLevel->runChecks(tolerance);
+  }
+
+  void reportTimings() {
+
+    // clang-format off
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Sum   total            " <<                _SetupTotalTimer.Elapsed() + _SolveTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup total            " <<                _SetupTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup create subspace  " <<       _SetupCreateSubspaceTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup project chiral   " << _SetupProjectToChiralitiesTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup coarsen operator " <<      _SetupCoarsenOperatorTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup next level       " <<            _SetupNextLevelTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total            " <<                _SolveTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve restriction      " <<          _SolveRestrictionTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve prolongation     " <<         _SolveProlongationTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother         " <<             _SolveSmootherTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve next level       " <<            _SolveNextLevelTimer.Elapsed() << std::endl;
+    // clang-format on
+
+    _NextPreconditionerLevel->reportTimings();
+  }
+
+  void resetTimers() {
+
+    _SetupTotalTimer.Reset();
+    _SetupCreateSubspaceTimer.Reset();
+    _SetupProjectToChiralitiesTimer.Reset();
+    _SetupCoarsenOperatorTimer.Reset();
+    _SetupNextLevelTimer.Reset();
+    _SolveTotalTimer.Reset();
+    _SolveRestrictionTimer.Reset();
+    _SolveProlongationTimer.Reset();
+    _SolveSmootherTimer.Reset();
+    _SolveNextLevelTimer.Reset();
+
+    _NextPreconditionerLevel->resetTimers();
+  }
+};
+
+// Specialization for the coarsest level
+template<class Fobj, class CComplex, int nBasis, class Matrix>
+class MultiGridPreconditioner<Fobj, CComplex, nBasis, 0, Matrix> : public MultiGridPreconditionerBase<Lattice<Fobj>> {
+public:
+  /////////////////////////////////////////////
+  // Type Definitions
+  /////////////////////////////////////////////
+
+  typedef Matrix        FineDiracMatrix;
+  typedef Lattice<Fobj> FineVector;
+
+  /////////////////////////////////////////////
+  // Member Data
+  /////////////////////////////////////////////
+
+  int _CurrentLevel;
+
+  MultiGridParams &_MultiGridParams;
+  LevelInfo &      _LevelInfo;
+
+  FineDiracMatrix &_FineMatrix;
+  FineDiracMatrix &_SmootherMatrix;
+
+  GridStopWatch _SolveTotalTimer;
+  GridStopWatch _SolveSmootherTimer;
+
+  /////////////////////////////////////////////
+  // Member Functions
+  /////////////////////////////////////////////
+
+  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineDiracMatrix &FineMat, FineDiracMatrix &SmootherMat)
+    : _CurrentLevel(mgParams.nLevels - (0 + 1))
+    , _MultiGridParams(mgParams)
+    , _LevelInfo(LvlInfo)
+    , _FineMatrix(FineMat)
+    , _SmootherMatrix(SmootherMat) {
+
+    resetTimers();
+  }
+
+  void setup() {}
+
+  virtual void operator()(FineVector const &in, FineVector &out) {
+
+    _SolveTotalTimer.Start();
+
+    conformable(_LevelInfo.Grids[_CurrentLevel], in._grid);
+    conformable(in, out);
+
+    auto coarseSolverMaxIter = _MultiGridParams.coarseSolverMaxOuterIter * _MultiGridParams.coarseSolverMaxInnerIter;
+
+    // On the coarsest level we only have what I above call the fine level, no coarse one
+    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
+    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(
+      _MultiGridParams.coarseSolverTol, coarseSolverMaxIter, fineTrivialPreconditioner, _MultiGridParams.coarseSolverMaxInnerIter, false);
+
+    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
+
+    _SolveSmootherTimer.Start();
+    fineFGMRES(fineMdagMOp, in, out);
+    _SolveSmootherTimer.Stop();
+
+    _SolveTotalTimer.Stop();
+  }
+
+  void runChecks(RealD tolerance) {}
+
+  void reportTimings() {
+
+    // clang-format off
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total            " <<    _SolveTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother         " << _SolveSmootherTimer.Elapsed() << std::endl;
+    // clang-format on
+  }
+
+  void resetTimers() {
+
+    _SolveTotalTimer.Reset();
+    _SolveSmootherTimer.Reset();
+  }
+};
+
+template<class Fobj, class CComplex, int nBasis, int nLevels, class Matrix>
+using NLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CComplex, nBasis, nLevels - 1, Matrix>;
+
+template<class Fobj, class CComplex, int nBasis, class Matrix>
+std::unique_ptr<MultiGridPreconditionerBase<Lattice<Fobj>>>
+createMGInstance(MultiGridParams &mgParams, LevelInfo &levelInfo, Matrix &FineMat, Matrix &SmootherMat) {
+
+#define CASE_FOR_N_LEVELS(nLevels)                                                                                     \
+  case nLevels:                                                                                                        \
+    return std::unique_ptr<NLevelMGPreconditioner<Fobj, CComplex, nBasis, nLevels, Matrix>>(                           \
+      new NLevelMGPreconditioner<Fobj, CComplex, nBasis, nLevels, Matrix>(mgParams, levelInfo, FineMat, SmootherMat)); \
+    break;
+
+  switch(mgParams.nLevels) {
+    CASE_FOR_N_LEVELS(2);
+    CASE_FOR_N_LEVELS(3);
+    CASE_FOR_N_LEVELS(4);
+    default:
+      std::cout << GridLogError << "We currently only support nLevels ∈ {2, 3, 4}" << std::endl;
+      exit(EXIT_FAILURE);
+      break;
+  }
+#undef CASE_FOR_N_LEVELS
+}
+
+}
+#endif
diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 051d1bcd..28e83ed1 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -27,647 +27,12 @@
 /*  END LEGAL */
 
 #include <Grid/Grid.h>
+#include <Test_multigrid_common.h>
 
 using namespace std;
 using namespace Grid;
 using namespace Grid::QCD;
 
-// TODO: Can think about having one parameter struct per level and then a
-// vector of these structs. How well would that work together with the
-// serialization strategy of Grid?
-
-// clang-format off
-struct MultiGridParams : Serializable {
-public:
-  GRID_SERIALIZABLE_CLASS_MEMBERS(MultiGridParams,
-                                  int,                           nLevels,
-                                  std::vector<std::vector<int>>, blockSizes,           // size == nLevels - 1
-                                  std::vector<double>,           smootherTol,          // size == nLevels - 1
-                                  std::vector<int>,              smootherMaxOuterIter, // size == nLevels - 1
-                                  std::vector<int>,              smootherMaxInnerIter, // size == nLevels - 1
-                                  bool,                          kCycle,
-                                  std::vector<double>,           kCycleTol,            // size == nLevels - 1
-                                  std::vector<int>,              kCycleMaxOuterIter,   // size == nLevels - 1
-                                  std::vector<int>,              kCycleMaxInnerIter,   // size == nLevels - 1
-                                  double,                        coarseSolverTol,
-                                  int,                           coarseSolverMaxOuterIter,
-                                  int,                           coarseSolverMaxInnerIter);
-
-  // constructor with default values
-  MultiGridParams(int                           _nLevels                  = 2,
-                  std::vector<std::vector<int>> _blockSizes               = {{4, 4, 4, 4}},
-                  std::vector<double>           _smootherTol              = {1e-14},
-                  std::vector<int>              _smootherMaxOuterIter     = {4},
-                  std::vector<int>              _smootherMaxInnerIter     = {4},
-                  bool                          _kCycle                   = true,
-                  std::vector<double>           _kCycleTol                = {1e-1},
-                  std::vector<int>              _kCycleMaxOuterIter       = {2},
-                  std::vector<int>              _kCycleMaxInnerIter       = {5},
-                  double                        _coarseSolverTol          = 5e-2,
-                  int                           _coarseSolverMaxOuterIter = 10,
-                  int                           _coarseSolverMaxInnerIter = 500)
-  : nLevels(_nLevels)
-  , blockSizes(_blockSizes)
-  , smootherTol(_smootherTol)
-  , smootherMaxOuterIter(_smootherMaxOuterIter)
-  , smootherMaxInnerIter(_smootherMaxInnerIter)
-  , kCycle(_kCycle)
-  , kCycleTol(_kCycleTol)
-  , kCycleMaxOuterIter(_kCycleMaxOuterIter)
-  , kCycleMaxInnerIter(_kCycleMaxInnerIter)
-  , coarseSolverTol(_coarseSolverTol)
-  , coarseSolverMaxOuterIter(_coarseSolverMaxOuterIter)
-  , coarseSolverMaxInnerIter(_coarseSolverMaxInnerIter)
-  {}
-};
-// clang-format on
-
-void checkParameterValidity(MultiGridParams const &params) {
-
-  auto correctSize = params.nLevels - 1;
-
-  assert(correctSize == params.blockSizes.size());
-  assert(correctSize == params.smootherTol.size());
-  assert(correctSize == params.smootherMaxOuterIter.size());
-  assert(correctSize == params.smootherMaxInnerIter.size());
-  assert(correctSize == params.kCycleTol.size());
-  assert(correctSize == params.kCycleMaxOuterIter.size());
-  assert(correctSize == params.kCycleMaxInnerIter.size());
-}
-
-struct LevelInfo {
-public:
-  std::vector<std::vector<int>> Seeds;
-  std::vector<GridCartesian *>  Grids;
-  std::vector<GridParallelRNG>  PRNGs;
-
-  LevelInfo(GridCartesian *FineGrid, MultiGridParams const &mgParams) {
-
-    auto nCoarseLevels = mgParams.blockSizes.size();
-
-    assert(nCoarseLevels == mgParams.nLevels - 1);
-
-    // set up values for finest grid
-    Grids.push_back(FineGrid);
-    Seeds.push_back({1, 2, 3, 4});
-    PRNGs.push_back(GridParallelRNG(Grids.back()));
-    PRNGs.back().SeedFixedIntegers(Seeds.back());
-
-    // set up values for coarser grids
-    for(int level = 1; level < mgParams.nLevels; ++level) {
-      auto Nd  = Grids[level - 1]->_ndimension;
-      auto tmp = Grids[level - 1]->_fdimensions;
-      assert(tmp.size() == Nd);
-
-      Seeds.push_back(std::vector<int>(Nd));
-
-      for(int d = 0; d < Nd; ++d) {
-        tmp[d] /= mgParams.blockSizes[level - 1][d];
-        Seeds[level][d] = (level)*Nd + d + 1;
-      }
-
-      Grids.push_back(SpaceTimeGrid::makeFourDimGrid(tmp, Grids[level - 1]->_simd_layout, GridDefaultMpi()));
-      PRNGs.push_back(GridParallelRNG(Grids[level]));
-
-      PRNGs[level].SeedFixedIntegers(Seeds[level]);
-    }
-
-    std::cout << GridLogMessage << "Constructed " << mgParams.nLevels << " levels" << std::endl;
-
-    for(int level = 0; level < mgParams.nLevels; ++level) {
-      std::cout << GridLogMessage << "level = " << level << ":" << std::endl;
-      Grids[level]->show_decomposition();
-    }
-  }
-};
-
-template<class Field> class MultiGridPreconditionerBase : public LinearFunction<Field> {
-public:
-  virtual ~MultiGridPreconditionerBase()               = default;
-  virtual void setup()                                 = 0;
-  virtual void operator()(Field const &in, Field &out) = 0;
-  virtual void runChecks(RealD tolerance)              = 0;
-  virtual void reportTimings()                         = 0;
-};
-
-template<class Fobj, class CComplex, int nBasis, int nCoarserLevels, class Matrix>
-class MultiGridPreconditioner : public MultiGridPreconditionerBase<Lattice<Fobj>> {
-public:
-  /////////////////////////////////////////////
-  // Type Definitions
-  /////////////////////////////////////////////
-
-  // clang-format off
-  typedef Aggregation<Fobj, CComplex, nBasis>                                                                         Aggregates;
-  typedef CoarsenedMatrix<Fobj, CComplex, nBasis>                                                                     CoarseDiracMatrix;
-  typedef typename Aggregates::CoarseVector                                                                           CoarseVector;
-  typedef typename Aggregates::siteVector                                                                             CoarseSiteVector;
-  typedef Matrix                                                                                                      FineDiracMatrix;
-  typedef typename Aggregates::FineField                                                                              FineVector;
-  typedef MultiGridPreconditioner<CoarseSiteVector, iScalar<CComplex>, nBasis, nCoarserLevels - 1, CoarseDiracMatrix> NextPreconditionerLevel;
-  // clang-format on
-
-  /////////////////////////////////////////////
-  // Member Data
-  /////////////////////////////////////////////
-
-  int _CurrentLevel;
-  int _NextCoarserLevel;
-
-  MultiGridParams &_MultiGridParams;
-  LevelInfo &      _LevelInfo;
-
-  FineDiracMatrix & _FineMatrix;
-  FineDiracMatrix & _SmootherMatrix;
-  Aggregates        _Aggregates;
-  CoarseDiracMatrix _CoarseMatrix;
-
-  std::unique_ptr<NextPreconditionerLevel> _NextPreconditionerLevel;
-
-  GridStopWatch _SetupTotalTimer;
-  GridStopWatch _SetupCreateSubspaceTimer;
-  GridStopWatch _SetupProjectToChiralitiesTimer;
-  GridStopWatch _SetupCoarsenOperatorTimer;
-  GridStopWatch _SetupNextLevelTimer;
-  GridStopWatch _SolveTotalTimer;
-  GridStopWatch _SolveRestrictionTimer;
-  GridStopWatch _SolveProlongationTimer;
-  GridStopWatch _SolveSmootherTimer;
-  GridStopWatch _SolveNextLevelTimer;
-
-  /////////////////////////////////////////////
-  // Member Functions
-  /////////////////////////////////////////////
-
-  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineDiracMatrix &FineMat, FineDiracMatrix &SmootherMat)
-    : _CurrentLevel(mgParams.nLevels - (nCoarserLevels + 1)) // _Level = 0 corresponds to finest
-    , _NextCoarserLevel(_CurrentLevel + 1)                   // incremented for instances on coarser levels
-    , _MultiGridParams(mgParams)
-    , _LevelInfo(LvlInfo)
-    , _FineMatrix(FineMat)
-    , _SmootherMatrix(SmootherMat)
-    , _Aggregates(_LevelInfo.Grids[_NextCoarserLevel], _LevelInfo.Grids[_CurrentLevel], 0)
-    , _CoarseMatrix(*_LevelInfo.Grids[_NextCoarserLevel]) {
-
-    _NextPreconditionerLevel
-      = std::unique_ptr<NextPreconditionerLevel>(new NextPreconditionerLevel(_MultiGridParams, _LevelInfo, _CoarseMatrix, _CoarseMatrix));
-
-    resetTimers();
-  }
-
-  void setup() {
-
-    _SetupTotalTimer.Start();
-
-    static_assert((nBasis & 0x1) == 0, "MG Preconditioner only supports an even number of basis vectors");
-    int nb = nBasis / 2;
-
-    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
-
-    _SetupCreateSubspaceTimer.Start();
-    _Aggregates.CreateSubspace(_LevelInfo.PRNGs[_CurrentLevel], fineMdagMOp, nb);
-    _SetupCreateSubspaceTimer.Stop();
-
-    _SetupProjectToChiralitiesTimer.Start();
-    FineVector tmp1(_Aggregates.subspace[0]._grid);
-    FineVector tmp2(_Aggregates.subspace[0]._grid);
-    for(int n = 0; n < nb; n++) {
-      auto tmp1 = _Aggregates.subspace[n];
-      G5C(tmp2, _Aggregates.subspace[n]);
-      axpby(_Aggregates.subspace[n], 0.5, 0.5, tmp1, tmp2);
-      axpby(_Aggregates.subspace[n + nb], 0.5, -0.5, tmp1, tmp2);
-      std::cout << GridLogMG << " Level " << _CurrentLevel << ": Chirally doubled vector " << n << ". "
-                << "norm2(vec[" << n << "]) = " << norm2(_Aggregates.subspace[n]) << ". "
-                << "norm2(vec[" << n + nb << "]) = " << norm2(_Aggregates.subspace[n + nb]) << std::endl;
-    }
-    _SetupProjectToChiralitiesTimer.Stop();
-
-    _SetupCoarsenOperatorTimer.Start();
-    _CoarseMatrix.CoarsenOperator(_LevelInfo.Grids[_CurrentLevel], fineMdagMOp, _Aggregates);
-    _SetupCoarsenOperatorTimer.Stop();
-
-    _SetupNextLevelTimer.Start();
-    _NextPreconditionerLevel->setup();
-    _SetupNextLevelTimer.Stop();
-
-    _SetupTotalTimer.Stop();
-  }
-
-  virtual void operator()(FineVector const &in, FineVector &out) {
-
-    conformable(_LevelInfo.Grids[_CurrentLevel], in._grid);
-    conformable(in, out);
-
-    // TODO: implement a W-cycle
-    if(_MultiGridParams.kCycle)
-      kCycle(in, out);
-    else
-      vCycle(in, out);
-  }
-
-  void vCycle(FineVector const &in, FineVector &out) {
-
-    _SolveTotalTimer.Start();
-
-    RealD inputNorm = norm2(in);
-
-    CoarseVector coarseSrc(_LevelInfo.Grids[_NextCoarserLevel]);
-    CoarseVector coarseSol(_LevelInfo.Grids[_NextCoarserLevel]);
-    coarseSol = zero;
-
-    FineVector fineTmp(in._grid);
-
-    auto maxSmootherIter = _MultiGridParams.smootherMaxOuterIter[_CurrentLevel] * _MultiGridParams.smootherMaxInnerIter[_CurrentLevel];
-
-    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
-    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(_MultiGridParams.smootherTol[_CurrentLevel],
-                                                              maxSmootherIter,
-                                                              fineTrivialPreconditioner,
-                                                              _MultiGridParams.smootherMaxInnerIter[_CurrentLevel],
-                                                              false);
-
-    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
-    MdagMLinearOperator<FineDiracMatrix, FineVector> fineSmootherMdagMOp(_SmootherMatrix);
-
-    _SolveRestrictionTimer.Start();
-    _Aggregates.ProjectToSubspace(coarseSrc, in);
-    _SolveRestrictionTimer.Stop();
-
-    _SolveNextLevelTimer.Start();
-    (*_NextPreconditionerLevel)(coarseSrc, coarseSol);
-    _SolveNextLevelTimer.Stop();
-
-    _SolveProlongationTimer.Start();
-    _Aggregates.PromoteFromSubspace(coarseSol, out);
-    _SolveProlongationTimer.Stop();
-
-    fineMdagMOp.Op(out, fineTmp);
-    fineTmp                                = in - fineTmp;
-    auto r                                 = norm2(fineTmp);
-    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
-
-    _SolveSmootherTimer.Start();
-    fineFGMRES(fineSmootherMdagMOp, in, out);
-    _SolveSmootherTimer.Stop();
-
-    fineMdagMOp.Op(out, fineTmp);
-    fineTmp                        = in - fineTmp;
-    r                              = norm2(fineTmp);
-    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
-
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": V-cycle: Input norm = " << std::sqrt(inputNorm)
-              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
-              << std::endl;
-
-    _SolveTotalTimer.Stop();
-  }
-
-  void kCycle(FineVector const &in, FineVector &out) {
-
-    _SolveTotalTimer.Start();
-
-    RealD inputNorm = norm2(in);
-
-    CoarseVector coarseSrc(_LevelInfo.Grids[_NextCoarserLevel]);
-    CoarseVector coarseSol(_LevelInfo.Grids[_NextCoarserLevel]);
-    coarseSol = zero;
-
-    FineVector fineTmp(in._grid);
-
-    auto smootherMaxIter = _MultiGridParams.smootherMaxOuterIter[_CurrentLevel] * _MultiGridParams.smootherMaxInnerIter[_CurrentLevel];
-    auto kCycleMaxIter   = _MultiGridParams.kCycleMaxOuterIter[_CurrentLevel] * _MultiGridParams.kCycleMaxInnerIter[_CurrentLevel];
-
-    TrivialPrecon<FineVector>                        fineTrivialPreconditioner;
-    FlexibleGeneralisedMinimalResidual<FineVector>   fineFGMRES(_MultiGridParams.smootherTol[_CurrentLevel],
-                                                              smootherMaxIter,
-                                                              fineTrivialPreconditioner,
-                                                              _MultiGridParams.smootherMaxInnerIter[_CurrentLevel],
-                                                              false);
-    FlexibleGeneralisedMinimalResidual<CoarseVector> coarseFGMRES(_MultiGridParams.kCycleTol[_CurrentLevel],
-                                                                  kCycleMaxIter,
-                                                                  *_NextPreconditionerLevel,
-                                                                  _MultiGridParams.kCycleMaxInnerIter[_CurrentLevel],
-                                                                  false);
-
-    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineMdagMOp(_FineMatrix);
-    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineSmootherMdagMOp(_SmootherMatrix);
-    MdagMLinearOperator<CoarseDiracMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
-
-    _SolveRestrictionTimer.Start();
-    _Aggregates.ProjectToSubspace(coarseSrc, in);
-    _SolveRestrictionTimer.Stop();
-
-    _SolveNextLevelTimer.Start();
-    coarseFGMRES(coarseMdagMOp, coarseSrc, coarseSol);
-    _SolveNextLevelTimer.Stop();
-
-    _SolveProlongationTimer.Start();
-    _Aggregates.PromoteFromSubspace(coarseSol, out);
-    _SolveProlongationTimer.Stop();
-
-    fineMdagMOp.Op(out, fineTmp);
-    fineTmp                                = in - fineTmp;
-    auto r                                 = norm2(fineTmp);
-    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
-
-    _SolveSmootherTimer.Start();
-    fineFGMRES(fineSmootherMdagMOp, in, out);
-    _SolveSmootherTimer.Stop();
-
-    fineMdagMOp.Op(out, fineTmp);
-    fineTmp                        = in - fineTmp;
-    r                              = norm2(fineTmp);
-    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
-
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": K-cycle: Input norm = " << std::sqrt(inputNorm)
-              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
-              << std::endl;
-
-    _SolveTotalTimer.Stop();
-  }
-
-  void runChecks(RealD tolerance) {
-
-    std::vector<FineVector>   fineTmps(7, _LevelInfo.Grids[_CurrentLevel]);
-    std::vector<CoarseVector> coarseTmps(4, _LevelInfo.Grids[_NextCoarserLevel]);
-
-    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineMdagMOp(_FineMatrix);
-    MdagMLinearOperator<CoarseDiracMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
-
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (M - (Mdiag + Σ_μ Mdir_μ)) * v" << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
-
-    random(_LevelInfo.PRNGs[_CurrentLevel], fineTmps[0]);
-
-    fineMdagMOp.Op(fineTmps[0], fineTmps[1]);     //     M * v
-    fineMdagMOp.OpDiag(fineTmps[0], fineTmps[2]); // Mdiag * v
-
-    fineTmps[4] = zero;
-    for(int dir = 0; dir < 4; dir++) { //       Σ_μ Mdir_μ * v
-      for(auto disp : {+1, -1}) {
-        fineMdagMOp.OpDir(fineTmps[0], fineTmps[3], dir, disp);
-        fineTmps[4] = fineTmps[4] + fineTmps[3];
-      }
-    }
-
-    fineTmps[5] = fineTmps[2] + fineTmps[4]; // (Mdiag + Σ_μ Mdir_μ) * v
-
-    fineTmps[6]    = fineTmps[1] - fineTmps[5];
-    auto deviation = std::sqrt(norm2(fineTmps[6]) / norm2(fineTmps[1]));
-
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(M * v)                    = " << norm2(fineTmps[1]) << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(Mdiag * v)                = " << norm2(fineTmps[2]) << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(Σ_μ Mdir_μ * v)           = " << norm2(fineTmps[4]) << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2((Mdiag + Σ_μ Mdir_μ) * v) = " << norm2(fineTmps[5]) << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": relative deviation              = " << deviation;
-
-    if(deviation > tolerance) {
-      std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
-    } else {
-      std::cout << " < " << tolerance << " -> check passed" << std::endl;
-    }
-
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (1 - P R) v" << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
-
-    for(auto i = 0; i < _Aggregates.subspace.size(); ++i) {
-      _Aggregates.ProjectToSubspace(coarseTmps[0], _Aggregates.subspace[i]); //   R v_i
-      _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]);           // P R v_i
-
-      fineTmps[1] = _Aggregates.subspace[i] - fineTmps[0]; // v_i - P R v_i
-      deviation   = std::sqrt(norm2(fineTmps[1]) / norm2(_Aggregates.subspace[i]));
-
-      std::cout << GridLogMG << " Level " << _CurrentLevel << ": Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
-                << " | norm2(R v_i) = " << norm2(coarseTmps[0]) << " | norm2(P R v_i) = " << norm2(fineTmps[0])
-                << " | relative deviation = " << deviation;
-
-      if(deviation > tolerance) {
-        std::cout << " > " << tolerance << " -> check failed" << std::endl;
-        // abort();
-      } else {
-        std::cout << " < " << tolerance << " -> check passed" << std::endl;
-      }
-    }
-
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (1 - R P) v_c" << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
-
-    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
-
-    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //   P v_c
-    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[0]);   // R P v_c
-
-    coarseTmps[2] = coarseTmps[0] - coarseTmps[1]; // v_c - R P v_c
-    deviation     = std::sqrt(norm2(coarseTmps[2]) / norm2(coarseTmps[0]));
-
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(v_c) = " << norm2(coarseTmps[0])
-              << " | norm2(R P v_c) = " << norm2(coarseTmps[1]) << " | norm2(P v_c) = " << norm2(fineTmps[0])
-              << " | relative deviation = " << deviation;
-
-    if(deviation > tolerance) {
-      std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
-    } else {
-      std::cout << " < " << tolerance << " -> check passed" << std::endl;
-    }
-
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
-
-    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
-
-    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //     P v_c
-    fineMdagMOp.Op(fineTmps[0], fineTmps[1]);                    //   D P v_c
-    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[1]);   // R D P v_c
-
-    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[2]); // D_c v_c
-
-    coarseTmps[3] = coarseTmps[1] - coarseTmps[2]; // R D P v_c - D_c v_c
-    deviation     = std::sqrt(norm2(coarseTmps[3]) / norm2(coarseTmps[1]));
-
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(R D P v_c) = " << norm2(coarseTmps[1])
-              << " | norm2(D_c v_c) = " << norm2(coarseTmps[2]) << " | relative deviation = " << deviation;
-
-    if(deviation > tolerance) {
-      std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
-    } else {
-      std::cout << " < " << tolerance << " -> check passed" << std::endl;
-    }
-
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
-
-    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
-
-    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[1]);    //         D_c v_c
-    coarseMdagMOp.AdjOp(coarseTmps[1], coarseTmps[2]); // D_c^dag D_c v_c
-
-    auto dot  = innerProduct(coarseTmps[0], coarseTmps[2]); //v_c^dag D_c^dag D_c v_c
-    deviation = abs(imag(dot)) / abs(real(dot));
-
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Re(v_c^dag D_c^dag D_c v_c) = " << real(dot)
-              << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot) << " | relative deviation = " << deviation;
-
-    if(deviation > tolerance) {
-      std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
-    } else {
-      std::cout << " < " << tolerance << " -> check passed" << std::endl;
-    }
-
-    _NextPreconditionerLevel->runChecks(tolerance);
-  }
-
-  void reportTimings() {
-
-    // clang-format off
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Sum   total            " <<                _SetupTotalTimer.Elapsed() + _SolveTotalTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup total            " <<                _SetupTotalTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup create subspace  " <<       _SetupCreateSubspaceTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup project chiral   " << _SetupProjectToChiralitiesTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup coarsen operator " <<      _SetupCoarsenOperatorTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup next level       " <<            _SetupNextLevelTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total            " <<                _SolveTotalTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve restriction      " <<          _SolveRestrictionTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve prolongation     " <<         _SolveProlongationTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother         " <<             _SolveSmootherTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve next level       " <<            _SolveNextLevelTimer.Elapsed() << std::endl;
-    // clang-format on
-
-    _NextPreconditionerLevel->reportTimings();
-  }
-
-  void resetTimers() {
-
-    _SetupTotalTimer.Reset();
-    _SetupCreateSubspaceTimer.Reset();
-    _SetupProjectToChiralitiesTimer.Reset();
-    _SetupCoarsenOperatorTimer.Reset();
-    _SetupNextLevelTimer.Reset();
-    _SolveTotalTimer.Reset();
-    _SolveRestrictionTimer.Reset();
-    _SolveProlongationTimer.Reset();
-    _SolveSmootherTimer.Reset();
-    _SolveNextLevelTimer.Reset();
-
-    _NextPreconditionerLevel->resetTimers();
-  }
-};
-
-// Specialization for the coarsest level
-template<class Fobj, class CComplex, int nBasis, class Matrix>
-class MultiGridPreconditioner<Fobj, CComplex, nBasis, 0, Matrix> : public MultiGridPreconditionerBase<Lattice<Fobj>> {
-public:
-  /////////////////////////////////////////////
-  // Type Definitions
-  /////////////////////////////////////////////
-
-  typedef Matrix        FineDiracMatrix;
-  typedef Lattice<Fobj> FineVector;
-
-  /////////////////////////////////////////////
-  // Member Data
-  /////////////////////////////////////////////
-
-  int _CurrentLevel;
-
-  MultiGridParams &_MultiGridParams;
-  LevelInfo &      _LevelInfo;
-
-  FineDiracMatrix &_FineMatrix;
-  FineDiracMatrix &_SmootherMatrix;
-
-  GridStopWatch _SolveTotalTimer;
-  GridStopWatch _SolveSmootherTimer;
-
-  /////////////////////////////////////////////
-  // Member Functions
-  /////////////////////////////////////////////
-
-  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineDiracMatrix &FineMat, FineDiracMatrix &SmootherMat)
-    : _CurrentLevel(mgParams.nLevels - (0 + 1))
-    , _MultiGridParams(mgParams)
-    , _LevelInfo(LvlInfo)
-    , _FineMatrix(FineMat)
-    , _SmootherMatrix(SmootherMat) {
-
-    resetTimers();
-  }
-
-  void setup() {}
-
-  virtual void operator()(FineVector const &in, FineVector &out) {
-
-    _SolveTotalTimer.Start();
-
-    conformable(_LevelInfo.Grids[_CurrentLevel], in._grid);
-    conformable(in, out);
-
-    auto coarseSolverMaxIter = _MultiGridParams.coarseSolverMaxOuterIter * _MultiGridParams.coarseSolverMaxInnerIter;
-
-    // On the coarsest level we only have what I above call the fine level, no coarse one
-    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
-    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(
-      _MultiGridParams.coarseSolverTol, coarseSolverMaxIter, fineTrivialPreconditioner, _MultiGridParams.coarseSolverMaxInnerIter, false);
-
-    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
-
-    _SolveSmootherTimer.Start();
-    fineFGMRES(fineMdagMOp, in, out);
-    _SolveSmootherTimer.Stop();
-
-    _SolveTotalTimer.Stop();
-  }
-
-  void runChecks(RealD tolerance) {}
-
-  void reportTimings() {
-
-    // clang-format off
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total            " <<    _SolveTotalTimer.Elapsed() << std::endl;
-    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother         " << _SolveSmootherTimer.Elapsed() << std::endl;
-    // clang-format on
-  }
-
-  void resetTimers() {
-
-    _SolveTotalTimer.Reset();
-    _SolveSmootherTimer.Reset();
-  }
-};
-
-template<class Fobj, class CComplex, int nBasis, int nLevels, class Matrix>
-using NLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CComplex, nBasis, nLevels - 1, Matrix>;
-
-template<class Fobj, class CComplex, int nBasis, class Matrix>
-std::unique_ptr<MultiGridPreconditionerBase<Lattice<Fobj>>>
-createMGInstance(MultiGridParams &mgParams, LevelInfo &levelInfo, Matrix &FineMat, Matrix &SmootherMat) {
-
-#define CASE_FOR_N_LEVELS(nLevels)                                                                                     \
-  case nLevels:                                                                                                        \
-    return std::unique_ptr<NLevelMGPreconditioner<Fobj, CComplex, nBasis, nLevels, Matrix>>(                           \
-      new NLevelMGPreconditioner<Fobj, CComplex, nBasis, nLevels, Matrix>(mgParams, levelInfo, FineMat, SmootherMat)); \
-    break;
-
-  switch(mgParams.nLevels) {
-    CASE_FOR_N_LEVELS(2);
-    CASE_FOR_N_LEVELS(3);
-    CASE_FOR_N_LEVELS(4);
-    default:
-      std::cout << GridLogError << "We currently only support nLevels ∈ {2, 3, 4}" << std::endl;
-      exit(EXIT_FAILURE);
-      break;
-  }
-#undef CASE_FOR_N_LEVELS
-}
-
 int main(int argc, char **argv) {
 
   Grid_init(&argc, &argv);

From 73a955be207125420156a27232140cd77c3a4087 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Sat, 9 Jun 2018 17:42:19 +0200
Subject: [PATCH 124/130] WilsonMG: Move tests for Wilson & WilsonClover into
 separate files

---
 tests/solver/Test_wilson_mg.cc       | 113 +++++++++++++++++++++++++++
 tests/solver/Test_wilsonclover_mg.cc |  35 +--------
 2 files changed, 116 insertions(+), 32 deletions(-)
 create mode 100644 tests/solver/Test_wilson_mg.cc

diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
new file mode 100644
index 00000000..16d9cdcf
--- /dev/null
+++ b/tests/solver/Test_wilson_mg.cc
@@ -0,0 +1,113 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/solver/Test_wilson_mg.cc
+
+    Copyright (C) 2017
+
+    Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+/*  END LEGAL */
+
+#include <Grid/Grid.h>
+#include <Test_multigrid_common.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main(int argc, char **argv) {
+
+  Grid_init(&argc, &argv);
+
+  GridCartesian *        FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
+
+  std::vector<int> fSeeds({1, 2, 3, 4});
+  GridParallelRNG  fPRNG(FGrid);
+  fPRNG.SeedFixedIntegers(fSeeds);
+
+  // clang-format off
+  LatticeFermion    src(FGrid); gaussian(fPRNG, src);
+  LatticeFermion result(FGrid); result = zero;
+  LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
+  // clang-format on
+
+  RealD mass = -0.25;
+
+  MultiGridParams mgParams;
+  std::string     inputXml{"./mg_params.xml"};
+
+  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
+    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
+    assert(inputXml.length() != 0);
+  }
+
+  {
+    XmlWriter writer("mg_params_template.xml");
+    write(writer, "Params", mgParams);
+    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
+
+    XmlReader reader(inputXml);
+    read(reader, "Params", mgParams);
+    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
+  }
+
+  checkParameterValidity(mgParams);
+  std::cout << mgParams << std::endl;
+
+  LevelInfo levelInfo(FGrid, mgParams);
+
+  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
+  const int nbasis               = 40;
+  RealD     toleranceForMGChecks = 1e-13; // TODO: depends on the precision MG precondtioner is run in
+
+  WilsonFermionR Dw(Umu, *FGrid, *FrbGrid, mass);
+
+  static_assert(std::is_same<LatticeFermion, typename WilsonFermionR::FermionField>::value, "");
+
+  MdagMLinearOperator<WilsonFermionR, LatticeFermion> MdagMOpDw(Dw);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing Multigrid for Wilson" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  TrivialPrecon<LatticeFermion> TrivialPrecon;
+  auto MGPreconDw = createMGInstance<vSpinColourVector, vTComplex, nbasis, WilsonFermionR>(mgParams, levelInfo, Dw, Dw);
+
+  MGPreconDw->setup();
+  MGPreconDw->runChecks(toleranceForMGChecks);
+
+  std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDw;
+
+  solversDw.emplace_back(new ConjugateGradient<LatticeFermion>(1.0e-12, 50000, false));
+  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 100, false));
+  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, *MGPreconDw, 100, false));
+
+  for(auto const &solver : solversDw) {
+    std::cout << std::endl << "Starting with a new solver" << std::endl;
+    result = zero;
+    (*solver)(MdagMOpDw, src, result);
+  }
+
+  MGPreconDw->reportTimings();
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 28e83ed1..65b5d7ce 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -37,10 +37,7 @@ int main(int argc, char **argv) {
 
   Grid_init(&argc, &argv);
 
-  typename WilsonCloverFermionR::ImplParams wcImplparams;
-  WilsonAnisotropyCoefficients              wilsonAnisCoeff;
-
-  GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
+  GridCartesian *        FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
   GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
 
   std::vector<int> fSeeds({1, 2, 3, 4});
@@ -84,43 +81,17 @@ int main(int argc, char **argv) {
   const int nbasis               = 40;
   RealD     toleranceForMGChecks = 1e-13; // TODO: depends on the precision MG precondtioner is run in
 
-  WilsonFermionR       Dw(Umu, *FGrid, *FrbGrid, mass);
-  WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t, wilsonAnisCoeff, wcImplparams);
+  WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t);
 
-  static_assert(std::is_same<LatticeFermion, typename WilsonFermionR::FermionField>::value, "");
   static_assert(std::is_same<LatticeFermion, typename WilsonCloverFermionR::FermionField>::value, "");
 
-  MdagMLinearOperator<WilsonFermionR, LatticeFermion>       MdagMOpDw(Dw);
   MdagMLinearOperator<WilsonCloverFermionR, LatticeFermion> MdagMOpDwc(Dwc);
 
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-  std::cout << GridLogMessage << "Testing Multigrid for Wilson" << std::endl;
-  std::cout << GridLogMessage << "**************************************************" << std::endl;
-
-  TrivialPrecon<LatticeFermion> TrivialPrecon;
-  auto MGPreconDw = createMGInstance<vSpinColourVector, vTComplex, nbasis, WilsonFermionR>(mgParams, levelInfo, Dw, Dw);
-
-  MGPreconDw->setup();
-  MGPreconDw->runChecks(toleranceForMGChecks);
-
-  std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDw;
-
-  solversDw.emplace_back(new ConjugateGradient<LatticeFermion>(1.0e-12, 50000, false));
-  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 100, false));
-  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, *MGPreconDw, 100, false));
-
-  for(auto const &solver : solversDw) {
-    std::cout << std::endl << "Starting with a new solver" << std::endl;
-    result = zero;
-    (*solver)(MdagMOpDw, src, result);
-  }
-
-  MGPreconDw->reportTimings();
-
   std::cout << GridLogMessage << "**************************************************" << std::endl;
   std::cout << GridLogMessage << "Testing Multigrid for Wilson Clover" << std::endl;
   std::cout << GridLogMessage << "**************************************************" << std::endl;
 
+  TrivialPrecon<LatticeFermion> TrivialPrecon;
   auto MGPreconDwc = createMGInstance<vSpinColourVector, vTComplex, nbasis, WilsonCloverFermionR>(mgParams, levelInfo, Dwc, Dwc);
 
   MGPreconDwc->setup();

From f32714a2d14cfda541882c8b9155b2180feaafef Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 11 Jun 2018 16:27:52 +0200
Subject: [PATCH 125/130] WilsonMG: Make running MG correctness checks optional
 via commandline

---
 tests/solver/Test_wilson_mg.cc       | 9 ++++++---
 tests/solver/Test_wilsonclover_mg.cc | 9 ++++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
index 16d9cdcf..0578b61c 100644
--- a/tests/solver/Test_wilson_mg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@@ -76,8 +76,7 @@ int main(int argc, char **argv) {
   LevelInfo levelInfo(FGrid, mgParams);
 
   // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
-  const int nbasis               = 40;
-  RealD     toleranceForMGChecks = 1e-13; // TODO: depends on the precision MG precondtioner is run in
+  const int nbasis = 40;
 
   WilsonFermionR Dw(Umu, *FGrid, *FrbGrid, mass);
 
@@ -93,7 +92,11 @@ int main(int argc, char **argv) {
   auto MGPreconDw = createMGInstance<vSpinColourVector, vTComplex, nbasis, WilsonFermionR>(mgParams, levelInfo, Dw, Dw);
 
   MGPreconDw->setup();
-  MGPreconDw->runChecks(toleranceForMGChecks);
+
+  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+    RealD toleranceForMGChecks = (getPrecision<LatticeFermion>::value == 1) ? 1e-6 : 1e-13;
+    MGPreconDw->runChecks(toleranceForMGChecks);
+  }
 
   std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDw;
 
diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 65b5d7ce..78467d9b 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -78,8 +78,7 @@ int main(int argc, char **argv) {
   LevelInfo levelInfo(FGrid, mgParams);
 
   // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
-  const int nbasis               = 40;
-  RealD     toleranceForMGChecks = 1e-13; // TODO: depends on the precision MG precondtioner is run in
+  const int nbasis = 40;
 
   WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t);
 
@@ -95,7 +94,11 @@ int main(int argc, char **argv) {
   auto MGPreconDwc = createMGInstance<vSpinColourVector, vTComplex, nbasis, WilsonCloverFermionR>(mgParams, levelInfo, Dwc, Dwc);
 
   MGPreconDwc->setup();
-  MGPreconDwc->runChecks(toleranceForMGChecks);
+
+  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+    RealD toleranceForMGChecks = (getPrecision<LatticeFermion>::value == 1) ? 1e-6 : 1e-13;
+    MGPreconDwc->runChecks(toleranceForMGChecks);
+  }
 
   std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDwc;
 

From 49fdc324a009902522a636fb644d44a692ec6f69 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 11 Jun 2018 16:50:59 +0200
Subject: [PATCH 126/130] WilsonMG: Make MG correctness checks abort on failing
 tests

---
 tests/solver/Test_multigrid_common.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/solver/Test_multigrid_common.h b/tests/solver/Test_multigrid_common.h
index 58614195..3250addb 100644
--- a/tests/solver/Test_multigrid_common.h
+++ b/tests/solver/Test_multigrid_common.h
@@ -422,7 +422,7 @@ public:
 
     if(deviation > tolerance) {
       std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
+      abort();
     } else {
       std::cout << " < " << tolerance << " -> check passed" << std::endl;
     }
@@ -444,7 +444,7 @@ public:
 
       if(deviation > tolerance) {
         std::cout << " > " << tolerance << " -> check failed" << std::endl;
-        // abort();
+        abort();
       } else {
         std::cout << " < " << tolerance << " -> check passed" << std::endl;
       }
@@ -468,7 +468,7 @@ public:
 
     if(deviation > tolerance) {
       std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
+      abort();
     } else {
       std::cout << " < " << tolerance << " -> check passed" << std::endl;
     }
@@ -493,7 +493,7 @@ public:
 
     if(deviation > tolerance) {
       std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
+      abort();
     } else {
       std::cout << " < " << tolerance << " -> check passed" << std::endl;
     }
@@ -515,7 +515,7 @@ public:
 
     if(deviation > tolerance) {
       std::cout << " > " << tolerance << " -> check failed" << std::endl;
-      // abort();
+      abort();
     } else {
       std::cout << " < " << tolerance << " -> check passed" << std::endl;
     }

From c73cc7d354be3e688c9d99b1ce28360a1118fc17 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Mon, 11 Jun 2018 22:05:18 +0200
Subject: [PATCH 127/130] WilsonMG: Add tests with MG preconditioner running
 single precision, outer solver running in double

---
 tests/solver/Test_wilson_mg_mp.cc       | 169 +++++++++++++++++++++++
 tests/solver/Test_wilsonclover_mg_mp.cc | 172 ++++++++++++++++++++++++
 2 files changed, 341 insertions(+)
 create mode 100644 tests/solver/Test_wilson_mg_mp.cc
 create mode 100644 tests/solver/Test_wilsonclover_mg_mp.cc

diff --git a/tests/solver/Test_wilson_mg_mp.cc b/tests/solver/Test_wilson_mg_mp.cc
new file mode 100644
index 00000000..3d8a242e
--- /dev/null
+++ b/tests/solver/Test_wilson_mg_mp.cc
@@ -0,0 +1,169 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/solver/Test_wilson_mg_mp.cc
+
+    Copyright (C) 2017
+
+    Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+/*  END LEGAL */
+
+#include <Grid/Grid.h>
+#include <Test_multigrid_common.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main(int argc, char **argv) {
+
+  Grid_init(&argc, &argv);
+
+  // clang-format off
+  GridCartesian         *FGrid_d   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi());
+  GridCartesian         *FGrid_f   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian *FrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_d);
+  GridRedBlackCartesian *FrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_f);
+  // clang-format on
+
+  std::vector<int> fSeeds({1, 2, 3, 4});
+  GridParallelRNG  fPRNG(FGrid_d);
+  fPRNG.SeedFixedIntegers(fSeeds);
+
+  // clang-format off
+  LatticeFermionD       src_d(FGrid_d); gaussian(fPRNG, src_d);
+  LatticeFermionD resultMGD_d(FGrid_d); resultMGD_d = zero;
+  LatticeFermionD resultMGF_d(FGrid_d); resultMGF_d = zero;
+  LatticeGaugeFieldD    Umu_d(FGrid_d); SU3::HotConfiguration(fPRNG, Umu_d);
+  LatticeGaugeFieldF    Umu_f(FGrid_f); precisionChange(Umu_f, Umu_d);
+  // clang-format on
+
+  RealD mass = -0.25;
+
+  MultiGridParams mgParams;
+  std::string     inputXml{"./mg_params.xml"};
+
+  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
+    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
+    assert(inputXml.length() != 0);
+  }
+
+  {
+    XmlWriter writer("mg_params_template.xml");
+    write(writer, "Params", mgParams);
+    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
+
+    XmlReader reader(inputXml);
+    read(reader, "Params", mgParams);
+    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
+  }
+
+  checkParameterValidity(mgParams);
+  std::cout << mgParams << std::endl;
+
+  LevelInfo levelInfo_d(FGrid_d, mgParams);
+  LevelInfo levelInfo_f(FGrid_f, mgParams);
+
+  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
+  const int nbasis = 40;
+
+  WilsonFermionD Dw_d(Umu_d, *FGrid_d, *FrbGrid_d, mass);
+  WilsonFermionF Dw_f(Umu_f, *FGrid_f, *FrbGrid_f, mass);
+
+  static_assert(std::is_same<LatticeFermionD, typename WilsonFermionD::FermionField>::value, "");
+  static_assert(std::is_same<LatticeFermionF, typename WilsonFermionF::FermionField>::value, "");
+
+  MdagMLinearOperator<WilsonFermionD, LatticeFermionD> MdagMOpDw_d(Dw_d);
+  MdagMLinearOperator<WilsonFermionF, LatticeFermionF> MdagMOpDw_f(Dw_f);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing single-precision Multigrid for Wilson" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  auto MGPreconDw_f = createMGInstance<vSpinColourVectorF, vTComplexF, nbasis, WilsonFermionF>(mgParams, levelInfo_f, Dw_f, Dw_f);
+
+  MGPreconDw_f->setup();
+
+  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+    MGPreconDw_f->runChecks(1e-6);
+  }
+
+  MixedPrecisionFlexibleGeneralisedMinimalResidual<LatticeFermionD, LatticeFermionF> MPFGMRESPREC(1.0e-12, 50000, FGrid_f, *MGPreconDw_f, 100, false);
+
+  std::cout << std::endl << "Starting with a new solver" << std::endl;
+  MPFGMRESPREC(MdagMOpDw_d, src_d, resultMGF_d);
+
+  MGPreconDw_f->reportTimings();
+
+  if(GridCmdOptionExists(argv, argv + argc, "--docomparison")) {
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "Testing double-precision Multigrid for Wilson" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    auto MGPreconDw_d = createMGInstance<vSpinColourVectorD, vTComplexD, nbasis, WilsonFermionD>(mgParams, levelInfo_d, Dw_d, Dw_d);
+
+    MGPreconDw_d->setup();
+
+    if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+      MGPreconDw_d->runChecks(1e-13);
+    }
+
+    FlexibleGeneralisedMinimalResidual<LatticeFermionD> FGMRESPREC(1.0e-12, 50000, *MGPreconDw_d, 100, false);
+
+    std::cout << std::endl << "Starting with a new solver" << std::endl;
+    FGMRESPREC(MdagMOpDw_d, src_d, resultMGD_d);
+
+    MGPreconDw_d->reportTimings();
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "Comparing single-precision Multigrid with double-precision one for Wilson" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    LatticeFermionD diffFullSolver(FGrid_d);
+
+    RealD deviationFullSolver = axpy_norm(diffFullSolver, -1.0, resultMGF_d, resultMGD_d);
+
+    // clang-format off
+    LatticeFermionF src_f(FGrid_f);    precisionChange(src_f, src_d);
+    LatticeFermionF resMGF_f(FGrid_f); resMGF_f = zero;
+    LatticeFermionD resMGD_d(FGrid_d); resMGD_d = zero;
+    // clang-format on
+
+    (*MGPreconDw_f)(src_f, resMGF_f);
+    (*MGPreconDw_d)(src_d, resMGD_d);
+
+    LatticeFermionD diffOnlyMG(FGrid_d);
+    LatticeFermionD resMGF_d(FGrid_d);
+    precisionChange(resMGF_d, resMGF_f);
+
+    RealD deviationOnlyPrec = axpy_norm(diffOnlyMG, -1.0, resMGF_d, resMGD_d);
+
+    // clang-format off
+    std::cout << GridLogMessage << "Absolute difference between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver                      << std::endl;
+    std::cout << GridLogMessage << "Relative deviation  between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver / norm2(resultMGD_d) << std::endl;
+    std::cout << GridLogMessage << "Absolute difference between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec                        << std::endl;
+    std::cout << GridLogMessage << "Relative deviation  between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec / norm2(resMGD_d)      << std::endl;
+    // clang-format on
+  }
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_mg_mp.cc b/tests/solver/Test_wilsonclover_mg_mp.cc
new file mode 100644
index 00000000..df457848
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_mg_mp.cc
@@ -0,0 +1,172 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/solver/Test_wilsonclover_mg_mp.cc
+
+    Copyright (C) 2017
+
+    Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+/*  END LEGAL */
+
+#include <Grid/Grid.h>
+#include <Test_multigrid_common.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main(int argc, char **argv) {
+
+  Grid_init(&argc, &argv);
+
+  // clang-format off
+  GridCartesian         *FGrid_d   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi());
+  GridCartesian         *FGrid_f   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian *FrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_d);
+  GridRedBlackCartesian *FrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_f);
+  // clang-format on
+
+  std::vector<int> fSeeds({1, 2, 3, 4});
+  GridParallelRNG  fPRNG(FGrid_d);
+  fPRNG.SeedFixedIntegers(fSeeds);
+
+  // clang-format off
+  LatticeFermionD       src_d(FGrid_d); gaussian(fPRNG, src_d);
+  LatticeFermionD resultMGD_d(FGrid_d); resultMGD_d = zero;
+  LatticeFermionD resultMGF_d(FGrid_d); resultMGF_d = zero;
+  LatticeGaugeFieldD    Umu_d(FGrid_d); SU3::HotConfiguration(fPRNG, Umu_d);
+  LatticeGaugeFieldF    Umu_f(FGrid_f); precisionChange(Umu_f, Umu_d);
+  // clang-format on
+
+  RealD mass  = -0.25;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+
+  MultiGridParams mgParams;
+  std::string     inputXml{"./mg_params.xml"};
+
+  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
+    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
+    assert(inputXml.length() != 0);
+  }
+
+  {
+    XmlWriter writer("mg_params_template.xml");
+    write(writer, "Params", mgParams);
+    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
+
+    XmlReader reader(inputXml);
+    read(reader, "Params", mgParams);
+    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
+  }
+
+  checkParameterValidity(mgParams);
+  std::cout << mgParams << std::endl;
+
+  LevelInfo levelInfo_d(FGrid_d, mgParams);
+  LevelInfo levelInfo_f(FGrid_f, mgParams);
+
+  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
+  const int nbasis = 40;
+
+  WilsonCloverFermionD Dwc_d(Umu_d, *FGrid_d, *FrbGrid_d, mass, csw_r, csw_t);
+  WilsonCloverFermionF Dwc_f(Umu_f, *FGrid_f, *FrbGrid_f, mass, csw_r, csw_t);
+
+  static_assert(std::is_same<LatticeFermionD, typename WilsonCloverFermionD::FermionField>::value, "");
+  static_assert(std::is_same<LatticeFermionF, typename WilsonCloverFermionF::FermionField>::value, "");
+
+  MdagMLinearOperator<WilsonCloverFermionD, LatticeFermionD> MdagMOpDwc_d(Dwc_d);
+  MdagMLinearOperator<WilsonCloverFermionF, LatticeFermionF> MdagMOpDwc_f(Dwc_f);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing single-precision Multigrid for Wilson Clover" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  auto MGPreconDwc_f = createMGInstance<vSpinColourVectorF, vTComplexF, nbasis, WilsonCloverFermionF>(mgParams, levelInfo_f, Dwc_f, Dwc_f);
+
+  MGPreconDwc_f->setup();
+
+  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+    MGPreconDwc_f->runChecks(1e-6);
+  }
+
+  MixedPrecisionFlexibleGeneralisedMinimalResidual<LatticeFermionD, LatticeFermionF> MPFGMRESPREC(
+    1.0e-12, 50000, FGrid_f, *MGPreconDwc_f, 100, false);
+
+  std::cout << std::endl << "Starting with a new solver" << std::endl;
+  MPFGMRESPREC(MdagMOpDwc_d, src_d, resultMGF_d);
+
+  MGPreconDwc_f->reportTimings();
+
+  if(GridCmdOptionExists(argv, argv + argc, "--docomparison")) {
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "Testing double-precision Multigrid for Wilson Clover" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    auto MGPreconDwc_d = createMGInstance<vSpinColourVectorD, vTComplexD, nbasis, WilsonCloverFermionD>(mgParams, levelInfo_d, Dwc_d, Dwc_d);
+
+    MGPreconDwc_d->setup();
+
+    if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+      MGPreconDwc_d->runChecks(1e-13);
+    }
+
+    FlexibleGeneralisedMinimalResidual<LatticeFermionD> FGMRESPREC(1.0e-12, 50000, *MGPreconDwc_d, 100, false);
+
+    std::cout << std::endl << "Starting with a new solver" << std::endl;
+    FGMRESPREC(MdagMOpDwc_d, src_d, resultMGD_d);
+
+    MGPreconDwc_d->reportTimings();
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "Comparing single-precision Multigrid with double-precision one for Wilson Clover" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    LatticeFermionD diffFullSolver(FGrid_d);
+
+    RealD deviationFullSolver = axpy_norm(diffFullSolver, -1.0, resultMGF_d, resultMGD_d);
+
+    // clang-format off
+    LatticeFermionF src_f(FGrid_f);    precisionChange(src_f, src_d);
+    LatticeFermionF resMGF_f(FGrid_f); resMGF_f = zero;
+    LatticeFermionD resMGD_d(FGrid_d); resMGD_d = zero;
+    // clang-format on
+
+    (*MGPreconDwc_f)(src_f, resMGF_f);
+    (*MGPreconDwc_d)(src_d, resMGD_d);
+
+    LatticeFermionD diffOnlyMG(FGrid_d);
+    LatticeFermionD resMGF_d(FGrid_d);
+    precisionChange(resMGF_d, resMGF_f);
+
+    RealD deviationOnlyPrec = axpy_norm(diffOnlyMG, -1.0, resMGF_d, resMGD_d);
+
+    // clang-format off
+    std::cout << GridLogMessage << "Absolute difference between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver                      << std::endl;
+    std::cout << GridLogMessage << "Relative deviation  between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver / norm2(resultMGD_d) << std::endl;
+    std::cout << GridLogMessage << "Absolute difference between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec                        << std::endl;
+    std::cout << GridLogMessage << "Relative deviation  between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec / norm2(resMGD_d)      << std::endl;
+    // clang-format on
+  }
+
+  Grid_finalize();
+}

From d1c80e1d461c5824c853768f1458039ae888c413 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@gmail.com>
Date: Wed, 13 Jun 2018 09:42:29 +0200
Subject: [PATCH 128/130] WilsonMG: Correct years in copyright line

---
 tests/solver/Test_multigrid_common.h             | 2 +-
 tests/solver/Test_staggered_cagmres_unprec.cc    | 2 +-
 tests/solver/Test_staggered_fcagmres_prec.cc     | 2 +-
 tests/solver/Test_staggered_fgmres_prec.cc       | 2 +-
 tests/solver/Test_staggered_gmres_unprec.cc      | 2 +-
 tests/solver/Test_staggered_mr_unprec.cc         | 2 +-
 tests/solver/Test_wilson_cagmres_unprec.cc       | 2 +-
 tests/solver/Test_wilson_fcagmres_prec.cc        | 2 +-
 tests/solver/Test_wilson_fgmres_prec.cc          | 2 +-
 tests/solver/Test_wilson_gmres_unprec.cc         | 2 +-
 tests/solver/Test_wilson_mg.cc                   | 2 +-
 tests/solver/Test_wilson_mg_mp.cc                | 2 +-
 tests/solver/Test_wilson_mr_unprec.cc            | 2 +-
 tests/solver/Test_wilsonclover_cagmres_unprec.cc | 2 +-
 tests/solver/Test_wilsonclover_fcagmres_prec.cc  | 2 +-
 tests/solver/Test_wilsonclover_fgmres_prec.cc    | 2 +-
 tests/solver/Test_wilsonclover_gmres_unprec.cc   | 2 +-
 tests/solver/Test_wilsonclover_mg.cc             | 2 +-
 tests/solver/Test_wilsonclover_mg_mp.cc          | 2 +-
 tests/solver/Test_wilsonclover_mr_unprec.cc      | 2 +-
 20 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/tests/solver/Test_multigrid_common.h b/tests/solver/Test_multigrid_common.h
index 3250addb..add833f2 100644
--- a/tests/solver/Test_multigrid_common.h
+++ b/tests/solver/Test_multigrid_common.h
@@ -4,7 +4,7 @@
 
     Source file: ./tests/solver/Test_multigrid_common.h
 
-    Copyright (C) 2015
+    Copyright (C) 2015-2018
 
     Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_staggered_cagmres_unprec.cc b/tests/solver/Test_staggered_cagmres_unprec.cc
index 506b68b1..247c4097 100644
--- a/tests/solver/Test_staggered_cagmres_unprec.cc
+++ b/tests/solver/Test_staggered_cagmres_unprec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_staggered_cagmres_unprec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_staggered_fcagmres_prec.cc b/tests/solver/Test_staggered_fcagmres_prec.cc
index 33ce182d..9b5c52d7 100644
--- a/tests/solver/Test_staggered_fcagmres_prec.cc
+++ b/tests/solver/Test_staggered_fcagmres_prec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_staggered_fcagmres_prec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_staggered_fgmres_prec.cc b/tests/solver/Test_staggered_fgmres_prec.cc
index 48eec1ce..a125af7f 100644
--- a/tests/solver/Test_staggered_fgmres_prec.cc
+++ b/tests/solver/Test_staggered_fgmres_prec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_staggered_fgmres_prec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_staggered_gmres_unprec.cc b/tests/solver/Test_staggered_gmres_unprec.cc
index 8b62eb78..2d8a247d 100644
--- a/tests/solver/Test_staggered_gmres_unprec.cc
+++ b/tests/solver/Test_staggered_gmres_unprec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_staggered_gmres_unprec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_staggered_mr_unprec.cc b/tests/solver/Test_staggered_mr_unprec.cc
index d29ec68d..835d1337 100644
--- a/tests/solver/Test_staggered_mr_unprec.cc
+++ b/tests/solver/Test_staggered_mr_unprec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_staggered_mr_unprec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilson_cagmres_unprec.cc b/tests/solver/Test_wilson_cagmres_unprec.cc
index 4f81b195..46f9e6a6 100644
--- a/tests/solver/Test_wilson_cagmres_unprec.cc
+++ b/tests/solver/Test_wilson_cagmres_unprec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_wilson_cagmres_unprec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilson_fcagmres_prec.cc b/tests/solver/Test_wilson_fcagmres_prec.cc
index 5ac9b87f..f802984f 100644
--- a/tests/solver/Test_wilson_fcagmres_prec.cc
+++ b/tests/solver/Test_wilson_fcagmres_prec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_wilson_fcagmres_prec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilson_fgmres_prec.cc b/tests/solver/Test_wilson_fgmres_prec.cc
index f9e1f352..f55516da 100644
--- a/tests/solver/Test_wilson_fgmres_prec.cc
+++ b/tests/solver/Test_wilson_fgmres_prec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_wilson_fgmres_prec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilson_gmres_unprec.cc b/tests/solver/Test_wilson_gmres_unprec.cc
index d4ec2ed0..443f7ebc 100644
--- a/tests/solver/Test_wilson_gmres_unprec.cc
+++ b/tests/solver/Test_wilson_gmres_unprec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_wilson_gmres_unprec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
index 0578b61c..5e4ee480 100644
--- a/tests/solver/Test_wilson_mg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@@ -4,7 +4,7 @@
 
     Source file: ./tests/solver/Test_wilson_mg.cc
 
-    Copyright (C) 2017
+    Copyright (C) 2015-2018
 
     Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilson_mg_mp.cc b/tests/solver/Test_wilson_mg_mp.cc
index 3d8a242e..f4298777 100644
--- a/tests/solver/Test_wilson_mg_mp.cc
+++ b/tests/solver/Test_wilson_mg_mp.cc
@@ -4,7 +4,7 @@
 
     Source file: ./tests/solver/Test_wilson_mg_mp.cc
 
-    Copyright (C) 2017
+    Copyright (C) 2015-2018
 
     Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilson_mr_unprec.cc b/tests/solver/Test_wilson_mr_unprec.cc
index 7b7ef0f9..976130d3 100644
--- a/tests/solver/Test_wilson_mr_unprec.cc
+++ b/tests/solver/Test_wilson_mr_unprec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_wilson_mr_unprec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilsonclover_cagmres_unprec.cc b/tests/solver/Test_wilsonclover_cagmres_unprec.cc
index e248614b..3ecdf738 100644
--- a/tests/solver/Test_wilsonclover_cagmres_unprec.cc
+++ b/tests/solver/Test_wilsonclover_cagmres_unprec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_wilsonclover_cagmres_unprec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilsonclover_fcagmres_prec.cc b/tests/solver/Test_wilsonclover_fcagmres_prec.cc
index 762a7fd7..3cbbfc02 100644
--- a/tests/solver/Test_wilsonclover_fcagmres_prec.cc
+++ b/tests/solver/Test_wilsonclover_fcagmres_prec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_wilsonclover_fcagmres_prec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilsonclover_fgmres_prec.cc b/tests/solver/Test_wilsonclover_fgmres_prec.cc
index 50c2c605..7ad0fa24 100644
--- a/tests/solver/Test_wilsonclover_fgmres_prec.cc
+++ b/tests/solver/Test_wilsonclover_fgmres_prec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_wilsonclover_fgmres_prec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilsonclover_gmres_unprec.cc b/tests/solver/Test_wilsonclover_gmres_unprec.cc
index c05af5b6..a9fe7181 100644
--- a/tests/solver/Test_wilsonclover_gmres_unprec.cc
+++ b/tests/solver/Test_wilsonclover_gmres_unprec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_wilsonclover_gmres_unprec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index 78467d9b..d65418ff 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -4,7 +4,7 @@
 
     Source file: ./tests/solver/Test_wilsonclover_mg.cc
 
-    Copyright (C) 2017
+    Copyright (C) 2015-2018
 
     Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilsonclover_mg_mp.cc b/tests/solver/Test_wilsonclover_mg_mp.cc
index df457848..d8204cfb 100644
--- a/tests/solver/Test_wilsonclover_mg_mp.cc
+++ b/tests/solver/Test_wilsonclover_mg_mp.cc
@@ -4,7 +4,7 @@
 
     Source file: ./tests/solver/Test_wilsonclover_mg_mp.cc
 
-    Copyright (C) 2017
+    Copyright (C) 2015-2018
 
     Author: Daniel Richtmann <daniel.richtmann@ur.de>
 
diff --git a/tests/solver/Test_wilsonclover_mr_unprec.cc b/tests/solver/Test_wilsonclover_mr_unprec.cc
index f39bea9a..e3aa8838 100644
--- a/tests/solver/Test_wilsonclover_mr_unprec.cc
+++ b/tests/solver/Test_wilsonclover_mr_unprec.cc
@@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
 
 Source file: ./tests/solver/Test_wilsonclover_mr_unprec.cc
 
-Copyright (C) 2015
+Copyright (C) 2015-2018
 
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 

From cc5d025ea440f8ffc9592ac636807f668ed7a155 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Mon, 18 Jun 2018 16:18:56 +0200
Subject: [PATCH 129/130] WilsonMG: Adapt staggered GMRES/MR tests to "new"
 constructor

---
 tests/solver/Test_staggered_cagmres_unprec.cc | 5 ++++-
 tests/solver/Test_staggered_fcagmres_prec.cc  | 5 ++++-
 tests/solver/Test_staggered_fgmres_prec.cc    | 5 ++++-
 tests/solver/Test_staggered_gmres_unprec.cc   | 5 ++++-
 tests/solver/Test_staggered_mr_unprec.cc      | 5 ++++-
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/tests/solver/Test_staggered_cagmres_unprec.cc b/tests/solver/Test_staggered_cagmres_unprec.cc
index 247c4097..b82ecaeb 100644
--- a/tests/solver/Test_staggered_cagmres_unprec.cc
+++ b/tests/solver/Test_staggered_cagmres_unprec.cc
@@ -59,7 +59,10 @@ int main (int argc, char ** argv)
   }
 
   RealD mass=0.5;
-  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
+  RealD c1=9.0/8.0;
+  RealD c2=-1.0/24.0;
+  RealD u0=1.0;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
 
   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
   CommunicationAvoidingGeneralisedMinimalResidual<FermionField> CAGMRES(1.0e-8, 10000, 25);
diff --git a/tests/solver/Test_staggered_fcagmres_prec.cc b/tests/solver/Test_staggered_fcagmres_prec.cc
index 9b5c52d7..7685585b 100644
--- a/tests/solver/Test_staggered_fcagmres_prec.cc
+++ b/tests/solver/Test_staggered_fcagmres_prec.cc
@@ -59,7 +59,10 @@ int main (int argc, char ** argv)
   }
 
   RealD mass=0.5;
-  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
+  RealD c1=9.0/8.0;
+  RealD c2=-1.0/24.0;
+  RealD u0=1.0;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
 
   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
 
diff --git a/tests/solver/Test_staggered_fgmres_prec.cc b/tests/solver/Test_staggered_fgmres_prec.cc
index a125af7f..30905e35 100644
--- a/tests/solver/Test_staggered_fgmres_prec.cc
+++ b/tests/solver/Test_staggered_fgmres_prec.cc
@@ -59,7 +59,10 @@ int main (int argc, char ** argv)
   }
 
   RealD mass=0.5;
-  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
+  RealD c1=9.0/8.0;
+  RealD c2=-1.0/24.0;
+  RealD u0=1.0;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
 
   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
 
diff --git a/tests/solver/Test_staggered_gmres_unprec.cc b/tests/solver/Test_staggered_gmres_unprec.cc
index 2d8a247d..d65b0b31 100644
--- a/tests/solver/Test_staggered_gmres_unprec.cc
+++ b/tests/solver/Test_staggered_gmres_unprec.cc
@@ -59,7 +59,10 @@ int main (int argc, char ** argv)
   }
 
   RealD mass=0.5;
-  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
+  RealD c1=9.0/8.0;
+  RealD c2=-1.0/24.0;
+  RealD u0=1.0;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
 
   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
   GeneralisedMinimalResidual<FermionField> GMRES(1.0e-8, 10000, 25);
diff --git a/tests/solver/Test_staggered_mr_unprec.cc b/tests/solver/Test_staggered_mr_unprec.cc
index 835d1337..ca60edb4 100644
--- a/tests/solver/Test_staggered_mr_unprec.cc
+++ b/tests/solver/Test_staggered_mr_unprec.cc
@@ -59,7 +59,10 @@ int main (int argc, char ** argv)
   }
 
   RealD mass=0.5;
-  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
+  RealD c1=9.0/8.0;
+  RealD c2=-1.0/24.0;
+  RealD u0=1.0;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
 
   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
   MinimalResidual<FermionField> MR(1.0e-8,10000,0.8);

From 2881b3e8e52644d0c41c7f21ded03ab43bb04f22 Mon Sep 17 00:00:00 2001
From: Daniel Richtmann <daniel.richtmann@physik.uni-regensburg.de>
Date: Tue, 26 Jun 2018 14:40:59 +0200
Subject: [PATCH 130/130] WilsonMG: Remove unnecessary static assertions

---
 tests/solver/Test_wilson_mg.cc          | 2 --
 tests/solver/Test_wilson_mg_mp.cc       | 3 ---
 tests/solver/Test_wilsonclover_mg.cc    | 2 --
 tests/solver/Test_wilsonclover_mg_mp.cc | 3 ---
 4 files changed, 10 deletions(-)

diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
index 5e4ee480..1609c1fc 100644
--- a/tests/solver/Test_wilson_mg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@@ -80,8 +80,6 @@ int main(int argc, char **argv) {
 
   WilsonFermionR Dw(Umu, *FGrid, *FrbGrid, mass);
 
-  static_assert(std::is_same<LatticeFermion, typename WilsonFermionR::FermionField>::value, "");
-
   MdagMLinearOperator<WilsonFermionR, LatticeFermion> MdagMOpDw(Dw);
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
diff --git a/tests/solver/Test_wilson_mg_mp.cc b/tests/solver/Test_wilson_mg_mp.cc
index f4298777..0cd51227 100644
--- a/tests/solver/Test_wilson_mg_mp.cc
+++ b/tests/solver/Test_wilson_mg_mp.cc
@@ -88,9 +88,6 @@ int main(int argc, char **argv) {
   WilsonFermionD Dw_d(Umu_d, *FGrid_d, *FrbGrid_d, mass);
   WilsonFermionF Dw_f(Umu_f, *FGrid_f, *FrbGrid_f, mass);
 
-  static_assert(std::is_same<LatticeFermionD, typename WilsonFermionD::FermionField>::value, "");
-  static_assert(std::is_same<LatticeFermionF, typename WilsonFermionF::FermionField>::value, "");
-
   MdagMLinearOperator<WilsonFermionD, LatticeFermionD> MdagMOpDw_d(Dw_d);
   MdagMLinearOperator<WilsonFermionF, LatticeFermionF> MdagMOpDw_f(Dw_f);
 
diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
index d65418ff..e749aacb 100644
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -82,8 +82,6 @@ int main(int argc, char **argv) {
 
   WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t);
 
-  static_assert(std::is_same<LatticeFermion, typename WilsonCloverFermionR::FermionField>::value, "");
-
   MdagMLinearOperator<WilsonCloverFermionR, LatticeFermion> MdagMOpDwc(Dwc);
 
   std::cout << GridLogMessage << "**************************************************" << std::endl;
diff --git a/tests/solver/Test_wilsonclover_mg_mp.cc b/tests/solver/Test_wilsonclover_mg_mp.cc
index d8204cfb..d9ed1d33 100644
--- a/tests/solver/Test_wilsonclover_mg_mp.cc
+++ b/tests/solver/Test_wilsonclover_mg_mp.cc
@@ -90,9 +90,6 @@ int main(int argc, char **argv) {
   WilsonCloverFermionD Dwc_d(Umu_d, *FGrid_d, *FrbGrid_d, mass, csw_r, csw_t);
   WilsonCloverFermionF Dwc_f(Umu_f, *FGrid_f, *FrbGrid_f, mass, csw_r, csw_t);
 
-  static_assert(std::is_same<LatticeFermionD, typename WilsonCloverFermionD::FermionField>::value, "");
-  static_assert(std::is_same<LatticeFermionF, typename WilsonCloverFermionF::FermionField>::value, "");
-
   MdagMLinearOperator<WilsonCloverFermionD, LatticeFermionD> MdagMOpDwc_d(Dwc_d);
   MdagMLinearOperator<WilsonCloverFermionF, LatticeFermionF> MdagMOpDwc_f(Dwc_f);