diff --git a/Grid/algorithms/Algorithms.h b/Grid/algorithms/Algorithms.h
index ef147c53..b716c48f 100644
--- a/Grid/algorithms/Algorithms.h
+++ b/Grid/algorithms/Algorithms.h
@@ -48,6 +48,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
 #include <Grid/algorithms/iterative/BlockConjugateGradient.h>
 #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
+#include <Grid/algorithms/iterative/MinimalResidual.h>
+#include <Grid/algorithms/iterative/GeneralisedMinimalResidual.h>
+#include <Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h>
+#include <Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h>
+#include <Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h>
+#include <Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 #include <Grid/algorithms/CoarsenedMatrix.h>
 #include <Grid/algorithms/FFT.h>
diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h
index 8af8d7ac..a6c6c030 100644
--- a/Grid/algorithms/CoarsenedMatrix.h
+++ b/Grid/algorithms/CoarsenedMatrix.h
@@ -211,6 +211,7 @@ namespace Grid {
 
       for(int b=0;b<nn;b++){
 	
+	subspace[b] = zero;
 	gaussian(RNG,noise);
 	scale = std::pow(norm2(noise),-0.5); 
 	noise=noise*scale;
@@ -295,13 +296,58 @@ namespace Grid {
       return norm2(out);
     };
 
-    RealD Mdag (const CoarseVector &in, CoarseVector &out){ 
-      return M(in,out);
+    RealD Mdag (const CoarseVector &in, CoarseVector &out){
+      // // corresponds to Petrov-Galerkin coarsening
+      // return M(in,out);
+
+      // corresponds to Galerkin coarsening
+      CoarseVector tmp(Grid());
+      G5C(tmp, in);
+      M(tmp, out);
+      G5C(out, out);
+      return norm2(out);
     };
 
-    // Defer support for further coarsening for now
-    void Mdiag    (const CoarseVector &in,  CoarseVector &out){};
-    void Mdir     (const CoarseVector &in,  CoarseVector &out,int dir, int disp){};
+    void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
+
+      conformable(_grid,in._grid);
+      conformable(in._grid,out._grid);
+
+      SimpleCompressor<siteVector> compressor;
+      Stencil.HaloExchange(in,compressor);
+
+      auto point = [dir, disp](){
+        if(dir == 0 and disp == 0)
+          return 8;
+        else
+          return (4 * dir + 1 - disp) / 2;
+      }();
+
+      parallel_for(int ss=0;ss<Grid()->oSites();ss++){
+        siteVector res = zero;
+        siteVector nbr;
+        int ptype;
+        StencilEntry *SE;
+
+        SE=Stencil.GetEntry(ptype,point,ss);
+
+        if(SE->_is_local&&SE->_permute) {
+          permute(nbr,in._odata[SE->_offset],ptype);
+        } else if(SE->_is_local) {
+          nbr = in._odata[SE->_offset];
+        } else {
+          nbr = Stencil.CommBuf()[SE->_offset];
+        }
+
+        res = res + A[point]._odata[ss]*nbr;
+
+        vstream(out._odata[ss],res);
+      }
+    };
+
+    void Mdiag(const CoarseVector &in, CoarseVector &out){
+      Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil
+    };
 
     CoarsenedMatrix(GridCartesian &CoarseGrid) 	: 
 
@@ -417,7 +463,7 @@ namespace Grid {
       std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
 #endif
       //      ForceHermitian();
-      AssertHermitian();
+      // AssertHermitian();
       // ForceDiagonal();
     }
     void ForceDiagonal(void) {
diff --git a/Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h b/Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
new file mode 100644
index 00000000..f0289683
--- /dev/null
+++ b/Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
@@ -0,0 +1,244 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
+#define GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
+
+namespace Grid {
+
+template<class Field>
+class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
+ public:
+  bool ErrorOnNoConverge; // Throw an assert when CAGMRES fails to converge,
+                          // defaults to true
+
+  RealD   Tolerance;
+
+  Integer MaxIterations;
+  Integer RestartLength;
+  Integer MaxNumberOfRestarts;
+  Integer IterationCount; // Number of iterations the CAGMRES took to finish,
+                          // filled in upon completion
+
+  GridStopWatch MatrixTimer;
+  GridStopWatch LinalgTimer;
+  GridStopWatch QrTimer;
+  GridStopWatch CompSolutionTimer;
+
+  Eigen::MatrixXcd H;
+
+  std::vector<std::complex<double>> y;
+  std::vector<std::complex<double>> gamma;
+  std::vector<std::complex<double>> c;
+  std::vector<std::complex<double>> s;
+
+  CommunicationAvoidingGeneralisedMinimalResidual(RealD   tol,
+                                                  Integer maxit,
+                                                  Integer restart_length,
+                                                  bool    err_on_no_conv = true)
+      : Tolerance(tol)
+      , MaxIterations(maxit)
+      , RestartLength(restart_length)
+      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
+      , ErrorOnNoConverge(err_on_no_conv)
+      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
+      , y(RestartLength + 1, 0.)
+      , gamma(RestartLength + 1, 0.)
+      , c(RestartLength + 1, 0.)
+      , s(RestartLength + 1, 0.) {};
+
+  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
+
+    std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular GMRES" << std::endl;
+
+    psi.checkerboard = src.checkerboard;
+    conformable(psi, src);
+
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD cp;
+    RealD ssq = norm2(src);
+    RealD rsq = Tolerance * Tolerance * ssq;
+
+    Field r(src._grid);
+
+    std::cout << std::setprecision(4) << std::scientific;
+    std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl;
+
+    MatrixTimer.Reset();
+    LinalgTimer.Reset();
+    QrTimer.Reset();
+    CompSolutionTimer.Reset();
+
+    GridStopWatch SolverTimer;
+    SolverTimer.Start();
+
+    IterationCount = 0;
+
+    for (int k=0; k<MaxNumberOfRestarts; k++) {
+
+      cp = outerLoopBody(LinOp, src, psi, rsq);
+
+      // Stopping condition
+      if (cp <= rsq) {
+
+        SolverTimer.Stop();
+
+        LinOp.Op(psi,r);
+        axpy(r,-1.0,src,r);
+
+        RealD srcnorm       = sqrt(ssq);
+        RealD resnorm       = sqrt(norm2(r));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage        << "CommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
+                  << " computed residual " << sqrt(cp / ssq)
+                  << " true residual "     << true_residual
+                  << " target "            << Tolerance << std::endl;
+
+        std::cout << GridLogMessage << "CAGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "CAGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "CAGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "CAGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "CAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
+        return;
+      }
+    }
+
+    std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
+
+    if (ErrorOnNoConverge)
+      assert(0);
+  }
+
+  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
+
+    RealD cp = 0;
+
+    Field w(src._grid);
+    Field r(src._grid);
+
+    // this should probably be made a class member so that it is only allocated once, not in every restart
+    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
+
+    MatrixTimer.Start();
+    LinOp.Op(psi, w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    r = src - w;
+
+    gamma[0] = sqrt(norm2(r));
+
+    v[0] = (1. / gamma[0]) * r;
+    LinalgTimer.Stop();
+
+    for (int i=0; i<RestartLength; i++) {
+
+      IterationCount++;
+
+      arnoldiStep(LinOp, v, w, i);
+
+      qrUpdate(i);
+
+      cp = std::norm(gamma[i+1]);
+
+      std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
+                << " residual " << cp << " target " << rsq << std::endl;
+
+      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
+
+        computeSolution(v, psi, i);
+
+        return cp;
+      }
+    }
+
+    assert(0); // Never reached
+    return cp;
+  }
+
+  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
+
+    MatrixTimer.Start();
+    LinOp.Op(v[iter], w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    for (int i = 0; i <= iter; ++i) {
+      H(iter, i) = innerProduct(v[i], w);
+      w = w - H(iter, i) * v[i];
+    }
+
+    H(iter, iter + 1) = sqrt(norm2(w));
+    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
+    LinalgTimer.Stop();
+  }
+
+  void qrUpdate(int iter) {
+
+    QrTimer.Start();
+    for (int i = 0; i < iter ; ++i) {
+      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
+      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
+      H(iter, i + 1) = tmp;
+    }
+
+    // Compute new Givens Rotation
+    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
+    c[iter]     = H(iter, iter) / nu;
+    s[iter]     = H(iter, iter + 1) / nu;
+
+    // Apply new Givens rotation
+    H(iter, iter)     = nu;
+    H(iter, iter + 1) = 0.;
+
+    gamma[iter + 1] = -s[iter] * gamma[iter];
+    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
+    QrTimer.Stop();
+  }
+
+  void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
+
+    CompSolutionTimer.Start();
+    for (int i = iter; i >= 0; i--) {
+      y[i] = gamma[i];
+      for (int k = i + 1; k <= iter; k++)
+        y[i] = y[i] - H(k, i) * y[k];
+      y[i] = y[i] / H(i, i);
+    }
+
+    for (int i = 0; i <= iter; i++)
+      psi = psi + v[i] * y[i];
+    CompSolutionTimer.Stop();
+  }
+};
+}
+#endif
diff --git a/Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h b/Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
new file mode 100644
index 00000000..db857248
--- /dev/null
+++ b/Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
@@ -0,0 +1,256 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
+#define GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
+
+namespace Grid {
+
+template<class Field>
+class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
+ public:
+  bool ErrorOnNoConverge; // Throw an assert when FCAGMRES fails to converge,
+                          // defaults to true
+
+  RealD   Tolerance;
+
+  Integer MaxIterations;
+  Integer RestartLength;
+  Integer MaxNumberOfRestarts;
+  Integer IterationCount; // Number of iterations the FCAGMRES took to finish,
+                          // filled in upon completion
+
+  GridStopWatch MatrixTimer;
+  GridStopWatch PrecTimer;
+  GridStopWatch LinalgTimer;
+  GridStopWatch QrTimer;
+  GridStopWatch CompSolutionTimer;
+
+  Eigen::MatrixXcd H;
+
+  std::vector<std::complex<double>> y;
+  std::vector<std::complex<double>> gamma;
+  std::vector<std::complex<double>> c;
+  std::vector<std::complex<double>> s;
+
+  LinearFunction<Field> &Preconditioner;
+
+  FlexibleCommunicationAvoidingGeneralisedMinimalResidual(RealD   tol,
+                                                          Integer maxit,
+                                                          LinearFunction<Field> &Prec,
+                                                          Integer restart_length,
+                                                          bool    err_on_no_conv = true)
+      : Tolerance(tol)
+      , MaxIterations(maxit)
+      , RestartLength(restart_length)
+      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
+      , ErrorOnNoConverge(err_on_no_conv)
+      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
+      , y(RestartLength + 1, 0.)
+      , gamma(RestartLength + 1, 0.)
+      , c(RestartLength + 1, 0.)
+      , s(RestartLength + 1, 0.)
+      , Preconditioner(Prec) {};
+
+  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
+
+    std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular FGMRES" << std::endl;
+
+    psi.checkerboard = src.checkerboard;
+    conformable(psi, src);
+
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD cp;
+    RealD ssq = norm2(src);
+    RealD rsq = Tolerance * Tolerance * ssq;
+
+    Field r(src._grid);
+
+    std::cout << std::setprecision(4) << std::scientific;
+    std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl;
+
+    PrecTimer.Reset();
+    MatrixTimer.Reset();
+    LinalgTimer.Reset();
+    QrTimer.Reset();
+    CompSolutionTimer.Reset();
+
+    GridStopWatch SolverTimer;
+    SolverTimer.Start();
+
+    IterationCount = 0;
+
+    for (int k=0; k<MaxNumberOfRestarts; k++) {
+
+      cp = outerLoopBody(LinOp, src, psi, rsq);
+
+      // Stopping condition
+      if (cp <= rsq) {
+
+        SolverTimer.Stop();
+
+        LinOp.Op(psi,r);
+        axpy(r,-1.0,src,r);
+
+        RealD srcnorm       = sqrt(ssq);
+        RealD resnorm       = sqrt(norm2(r));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage        << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
+                  << " computed residual " << sqrt(cp / ssq)
+                  << " true residual "     << true_residual
+                  << " target "            << Tolerance << std::endl;
+
+        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Precon  " <<         PrecTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FCAGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FCAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
+        return;
+      }
+    }
+
+    std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
+
+    if (ErrorOnNoConverge)
+      assert(0);
+  }
+
+  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
+
+    RealD cp = 0;
+
+    Field w(src._grid);
+    Field r(src._grid);
+
+    // these should probably be made class members so that they are only allocated once, not in every restart
+    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
+    std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
+
+    MatrixTimer.Start();
+    LinOp.Op(psi, w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    r = src - w;
+
+    gamma[0] = sqrt(norm2(r));
+
+    v[0] = (1. / gamma[0]) * r;
+    LinalgTimer.Stop();
+
+    for (int i=0; i<RestartLength; i++) {
+
+      IterationCount++;
+
+      arnoldiStep(LinOp, v, z, w, i);
+
+      qrUpdate(i);
+
+      cp = std::norm(gamma[i+1]);
+
+      std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
+                << " residual " << cp << " target " << rsq << std::endl;
+
+      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
+
+        computeSolution(z, psi, i);
+
+        return cp;
+      }
+    }
+
+    assert(0); // Never reached
+    return cp;
+  }
+
+  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
+
+    PrecTimer.Start();
+    Preconditioner(v[iter], z[iter]);
+    PrecTimer.Stop();
+
+    MatrixTimer.Start();
+    LinOp.Op(z[iter], w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    for (int i = 0; i <= iter; ++i) {
+      H(iter, i) = innerProduct(v[i], w);
+      w = w - H(iter, i) * v[i];
+    }
+
+    H(iter, iter + 1) = sqrt(norm2(w));
+    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
+    LinalgTimer.Stop();
+  }
+
+  void qrUpdate(int iter) {
+
+    QrTimer.Start();
+    for (int i = 0; i < iter ; ++i) {
+      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
+      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
+      H(iter, i + 1) = tmp;
+    }
+
+    // Compute new Givens Rotation
+    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
+    c[iter]     = H(iter, iter) / nu;
+    s[iter]     = H(iter, iter + 1) / nu;
+
+    // Apply new Givens rotation
+    H(iter, iter)     = nu;
+    H(iter, iter + 1) = 0.;
+
+    gamma[iter + 1] = -s[iter] * gamma[iter];
+    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
+    QrTimer.Stop();
+  }
+
+  void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
+
+    CompSolutionTimer.Start();
+    for (int i = iter; i >= 0; i--) {
+      y[i] = gamma[i];
+      for (int k = i + 1; k <= iter; k++)
+        y[i] = y[i] - H(k, i) * y[k];
+      y[i] = y[i] / H(i, i);
+    }
+
+    for (int i = 0; i <= iter; i++)
+      psi = psi + z[i] * y[i];
+    CompSolutionTimer.Stop();
+  }
+};
+}
+#endif
diff --git a/Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
new file mode 100644
index 00000000..efc8c787
--- /dev/null
+++ b/Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -0,0 +1,254 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
+#define GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
+
+namespace Grid {
+
+template<class Field>
+class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
+ public:
+  bool ErrorOnNoConverge; // Throw an assert when FGMRES fails to converge,
+                          // defaults to true
+
+  RealD   Tolerance;
+
+  Integer MaxIterations;
+  Integer RestartLength;
+  Integer MaxNumberOfRestarts;
+  Integer IterationCount; // Number of iterations the FGMRES took to finish,
+                          // filled in upon completion
+
+  GridStopWatch MatrixTimer;
+  GridStopWatch PrecTimer;
+  GridStopWatch LinalgTimer;
+  GridStopWatch QrTimer;
+  GridStopWatch CompSolutionTimer;
+
+  Eigen::MatrixXcd H;
+
+  std::vector<std::complex<double>> y;
+  std::vector<std::complex<double>> gamma;
+  std::vector<std::complex<double>> c;
+  std::vector<std::complex<double>> s;
+
+  LinearFunction<Field> &Preconditioner;
+
+  FlexibleGeneralisedMinimalResidual(RealD   tol,
+                                     Integer maxit,
+                                     LinearFunction<Field> &Prec,
+                                     Integer restart_length,
+                                     bool    err_on_no_conv = true)
+      : Tolerance(tol)
+      , MaxIterations(maxit)
+      , RestartLength(restart_length)
+      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
+      , ErrorOnNoConverge(err_on_no_conv)
+      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
+      , y(RestartLength + 1, 0.)
+      , gamma(RestartLength + 1, 0.)
+      , c(RestartLength + 1, 0.)
+      , s(RestartLength + 1, 0.)
+      , Preconditioner(Prec) {};
+
+  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
+
+    psi.checkerboard = src.checkerboard;
+    conformable(psi, src);
+
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD cp;
+    RealD ssq = norm2(src);
+    RealD rsq = Tolerance * Tolerance * ssq;
+
+    Field r(src._grid);
+
+    std::cout << std::setprecision(4) << std::scientific;
+    std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual:   src " << ssq   << std::endl;
+
+    PrecTimer.Reset();
+    MatrixTimer.Reset();
+    LinalgTimer.Reset();
+    QrTimer.Reset();
+    CompSolutionTimer.Reset();
+
+    GridStopWatch SolverTimer;
+    SolverTimer.Start();
+
+    IterationCount = 0;
+
+    for (int k=0; k<MaxNumberOfRestarts; k++) {
+
+      cp = outerLoopBody(LinOp, src, psi, rsq);
+
+      // Stopping condition
+      if (cp <= rsq) {
+
+        SolverTimer.Stop();
+
+        LinOp.Op(psi,r);
+        axpy(r,-1.0,src,r);
+
+        RealD srcnorm       = sqrt(ssq);
+        RealD resnorm       = sqrt(norm2(r));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage        << "FlexibleGeneralisedMinimalResidual: Converged on iteration " << IterationCount
+                  << " computed residual " << sqrt(cp / ssq)
+                  << " true residual "     << true_residual
+                  << " target "            << Tolerance << std::endl;
+
+        std::cout << GridLogMessage << "FGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FGMRES Time elapsed: Precon  " <<         PrecTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "FGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
+        return;
+      }
+    }
+
+    std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual did NOT converge" << std::endl;
+
+    if (ErrorOnNoConverge)
+      assert(0);
+  }
+
+  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
+
+    RealD cp = 0;
+
+    Field w(src._grid);
+    Field r(src._grid);
+
+    // these should probably be made class members so that they are only allocated once, not in every restart
+    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
+    std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
+
+    MatrixTimer.Start();
+    LinOp.Op(psi, w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    r = src - w;
+
+    gamma[0] = sqrt(norm2(r));
+
+    v[0] = (1. / gamma[0]) * r;
+    LinalgTimer.Stop();
+
+    for (int i=0; i<RestartLength; i++) {
+
+      IterationCount++;
+
+      arnoldiStep(LinOp, v, z, w, i);
+
+      qrUpdate(i);
+
+      cp = std::norm(gamma[i+1]);
+
+      std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: Iteration " << IterationCount
+                << " residual " << cp << " target " << rsq << std::endl;
+
+      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
+
+        computeSolution(z, psi, i);
+
+        return cp;
+      }
+    }
+
+    assert(0); // Never reached
+    return cp;
+  }
+
+  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
+
+    PrecTimer.Start();
+    Preconditioner(v[iter], z[iter]);
+    PrecTimer.Stop();
+
+    MatrixTimer.Start();
+    LinOp.Op(z[iter], w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    for (int i = 0; i <= iter; ++i) {
+      H(iter, i) = innerProduct(v[i], w);
+      w = w - H(iter, i) * v[i];
+    }
+
+    H(iter, iter + 1) = sqrt(norm2(w));
+    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
+    LinalgTimer.Stop();
+  }
+
+  void qrUpdate(int iter) {
+
+    QrTimer.Start();
+    for (int i = 0; i < iter ; ++i) {
+      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
+      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
+      H(iter, i + 1) = tmp;
+    }
+
+    // Compute new Givens Rotation
+    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
+    c[iter]     = H(iter, iter) / nu;
+    s[iter]     = H(iter, iter + 1) / nu;
+
+    // Apply new Givens rotation
+    H(iter, iter)     = nu;
+    H(iter, iter + 1) = 0.;
+
+    gamma[iter + 1] = -s[iter] * gamma[iter];
+    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
+    QrTimer.Stop();
+  }
+
+  void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
+
+    CompSolutionTimer.Start();
+    for (int i = iter; i >= 0; i--) {
+      y[i] = gamma[i];
+      for (int k = i + 1; k <= iter; k++)
+        y[i] = y[i] - H(k, i) * y[k];
+      y[i] = y[i] / H(i, i);
+    }
+
+    for (int i = 0; i <= iter; i++)
+      psi = psi + z[i] * y[i];
+    CompSolutionTimer.Stop();
+  }
+};
+}
+#endif
diff --git a/Grid/algorithms/iterative/GeneralisedMinimalResidual.h b/Grid/algorithms/iterative/GeneralisedMinimalResidual.h
new file mode 100644
index 00000000..10636234
--- /dev/null
+++ b/Grid/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -0,0 +1,242 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/GeneralisedMinimalResidual.h
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_GENERALISED_MINIMAL_RESIDUAL_H
+#define GRID_GENERALISED_MINIMAL_RESIDUAL_H
+
+namespace Grid {
+
+template<class Field>
+class GeneralisedMinimalResidual : public OperatorFunction<Field> {
+ public:
+  bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge,
+                          // defaults to true
+
+  RealD   Tolerance;
+
+  Integer MaxIterations;
+  Integer RestartLength;
+  Integer MaxNumberOfRestarts;
+  Integer IterationCount; // Number of iterations the GMRES took to finish,
+                          // filled in upon completion
+
+  GridStopWatch MatrixTimer;
+  GridStopWatch LinalgTimer;
+  GridStopWatch QrTimer;
+  GridStopWatch CompSolutionTimer;
+
+  Eigen::MatrixXcd H;
+
+  std::vector<std::complex<double>> y;
+  std::vector<std::complex<double>> gamma;
+  std::vector<std::complex<double>> c;
+  std::vector<std::complex<double>> s;
+
+  GeneralisedMinimalResidual(RealD   tol,
+                             Integer maxit,
+                             Integer restart_length,
+                             bool    err_on_no_conv = true)
+      : Tolerance(tol)
+      , MaxIterations(maxit)
+      , RestartLength(restart_length)
+      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
+      , ErrorOnNoConverge(err_on_no_conv)
+      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
+      , y(RestartLength + 1, 0.)
+      , gamma(RestartLength + 1, 0.)
+      , c(RestartLength + 1, 0.)
+      , s(RestartLength + 1, 0.) {};
+
+  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
+
+    psi.checkerboard = src.checkerboard;
+    conformable(psi, src);
+
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD cp;
+    RealD ssq = norm2(src);
+    RealD rsq = Tolerance * Tolerance * ssq;
+
+    Field r(src._grid);
+
+    std::cout << std::setprecision(4) << std::scientific;
+    std::cout << GridLogIterative << "GeneralisedMinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "GeneralisedMinimalResidual:   src " << ssq   << std::endl;
+
+    MatrixTimer.Reset();
+    LinalgTimer.Reset();
+    QrTimer.Reset();
+    CompSolutionTimer.Reset();
+
+    GridStopWatch SolverTimer;
+    SolverTimer.Start();
+
+    IterationCount = 0;
+
+    for (int k=0; k<MaxNumberOfRestarts; k++) {
+
+      cp = outerLoopBody(LinOp, src, psi, rsq);
+
+      // Stopping condition
+      if (cp <= rsq) {
+
+        SolverTimer.Stop();
+
+        LinOp.Op(psi,r);
+        axpy(r,-1.0,src,r);
+
+        RealD srcnorm       = sqrt(ssq);
+        RealD resnorm       = sqrt(norm2(r));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage        << "GeneralisedMinimalResidual: Converged on iteration " << IterationCount
+                  << " computed residual " << sqrt(cp / ssq)
+                  << " true residual "     << true_residual
+                  << " target "            << Tolerance << std::endl;
+
+        std::cout << GridLogMessage << "GMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "GMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "GMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "GMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "GMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
+        return;
+      }
+    }
+
+    std::cout << GridLogMessage << "GeneralisedMinimalResidual did NOT converge" << std::endl;
+
+    if (ErrorOnNoConverge)
+      assert(0);
+  }
+
+  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
+
+    RealD cp = 0;
+
+    Field w(src._grid);
+    Field r(src._grid);
+
+    // this should probably be made a class member so that it is only allocated once, not in every restart
+    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
+
+    MatrixTimer.Start();
+    LinOp.Op(psi, w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    r = src - w;
+
+    gamma[0] = sqrt(norm2(r));
+
+    v[0] = (1. / gamma[0]) * r;
+    LinalgTimer.Stop();
+
+    for (int i=0; i<RestartLength; i++) {
+
+      IterationCount++;
+
+      arnoldiStep(LinOp, v, w, i);
+
+      qrUpdate(i);
+
+      cp = std::norm(gamma[i+1]);
+
+      std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration " << IterationCount
+                << " residual " << cp << " target " << rsq << std::endl;
+
+      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
+
+        computeSolution(v, psi, i);
+
+        return cp;
+      }
+    }
+
+    assert(0); // Never reached
+    return cp;
+  }
+
+  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
+
+    MatrixTimer.Start();
+    LinOp.Op(v[iter], w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    for (int i = 0; i <= iter; ++i) {
+      H(iter, i) = innerProduct(v[i], w);
+      w = w - H(iter, i) * v[i];
+    }
+
+    H(iter, iter + 1) = sqrt(norm2(w));
+    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
+    LinalgTimer.Stop();
+  }
+
+  void qrUpdate(int iter) {
+
+    QrTimer.Start();
+    for (int i = 0; i < iter ; ++i) {
+      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
+      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
+      H(iter, i + 1) = tmp;
+    }
+
+    // Compute new Givens Rotation
+    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
+    c[iter]     = H(iter, iter) / nu;
+    s[iter]     = H(iter, iter + 1) / nu;
+
+    // Apply new Givens rotation
+    H(iter, iter)     = nu;
+    H(iter, iter + 1) = 0.;
+
+    gamma[iter + 1] = -s[iter] * gamma[iter];
+    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
+    QrTimer.Stop();
+  }
+
+  void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
+
+    CompSolutionTimer.Start();
+    for (int i = iter; i >= 0; i--) {
+      y[i] = gamma[i];
+      for (int k = i + 1; k <= iter; k++)
+        y[i] = y[i] - H(k, i) * y[k];
+      y[i] = y[i] / H(i, i);
+    }
+
+    for (int i = 0; i <= iter; i++)
+      psi = psi + v[i] * y[i];
+    CompSolutionTimer.Stop();
+  }
+};
+}
+#endif
diff --git a/Grid/algorithms/iterative/MinimalResidual.h b/Grid/algorithms/iterative/MinimalResidual.h
new file mode 100644
index 00000000..fa1912cf
--- /dev/null
+++ b/Grid/algorithms/iterative/MinimalResidual.h
@@ -0,0 +1,156 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/MinimalResidual.h
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_MINIMAL_RESIDUAL_H
+#define GRID_MINIMAL_RESIDUAL_H
+
+namespace Grid {
+
+template<class Field> class MinimalResidual : public OperatorFunction<Field> {
+ public:
+  bool ErrorOnNoConverge; // throw an assert when the MR fails to converge.
+                          // Defaults true.
+  RealD   Tolerance;
+  Integer MaxIterations;
+  RealD   overRelaxParam;
+  Integer IterationsToComplete; // Number of iterations the MR took to finish.
+                                // Filled in upon completion
+
+  MinimalResidual(RealD tol, Integer maxit, Real ovrelparam = 1.0, bool err_on_no_conv = true)
+    : Tolerance(tol), MaxIterations(maxit), overRelaxParam(ovrelparam), ErrorOnNoConverge(err_on_no_conv){};
+
+  void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
+
+    psi.checkerboard = src.checkerboard;
+    conformable(psi, src);
+
+    Complex a, c;
+    Real    d;
+
+    Field Mr(src);
+    Field r(src);
+
+    // Initial residual computation & set up
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD ssq = norm2(src);
+    RealD rsq = Tolerance * Tolerance * ssq;
+
+    Linop.Op(psi, Mr);
+
+    r = src - Mr;
+
+    RealD cp = norm2(r);
+
+    std::cout << std::setprecision(4) << std::scientific;
+    std::cout << GridLogIterative << "MinimalResidual: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "MinimalResidual:   src " << ssq << std::endl;
+    std::cout << GridLogIterative << "MinimalResidual:    mp " << d << std::endl;
+    std::cout << GridLogIterative << "MinimalResidual:  cp,r " << cp << std::endl;
+
+    if (cp <= rsq) {
+      return;
+    }
+
+    std::cout << GridLogIterative << "MinimalResidual: k=0 residual " << cp << " target " << rsq << std::endl;
+
+    GridStopWatch LinalgTimer;
+    GridStopWatch MatrixTimer;
+    GridStopWatch SolverTimer;
+
+    SolverTimer.Start();
+    int k;
+    for (k = 1; k <= MaxIterations; k++) {
+
+      MatrixTimer.Start();
+      Linop.Op(r, Mr);
+      MatrixTimer.Stop();
+
+      LinalgTimer.Start();
+
+      c = innerProduct(Mr, r);
+
+      d = norm2(Mr);
+
+      a = c / d;
+
+      a = a * overRelaxParam;
+
+      psi = psi + r * a;
+
+      r = r - Mr * a;
+
+      cp = norm2(r);
+
+      LinalgTimer.Stop();
+
+      std::cout << GridLogIterative << "MinimalResidual: Iteration " << k
+                << " residual " << cp << " target " << rsq << std::endl;
+      std::cout << GridLogDebug << "a = " << a << " c = " << c << " d = " << d << std::endl;
+
+      // Stopping condition
+      if (cp <= rsq) {
+        SolverTimer.Stop();
+
+        Linop.Op(psi, Mr);
+        r = src - Mr;
+
+        RealD srcnorm       = sqrt(ssq);
+        RealD resnorm       = sqrt(norm2(r));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage        << "MinimalResidual Converged on iteration " << k
+                  << " computed residual " << sqrt(cp / ssq)
+                  << " true residual "     << true_residual
+                  << " target "            << Tolerance << std::endl;
+
+        std::cout << GridLogMessage << "MR Time elapsed: Total   " << SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MR Time elapsed: Matrix  " << MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MR Time elapsed: Linalg  " << LinalgTimer.Elapsed() << std::endl;
+
+        if (ErrorOnNoConverge)
+          assert(true_residual / Tolerance < 10000.0);
+
+        IterationsToComplete = k;
+
+        return;
+      }
+    }
+
+    std::cout << GridLogMessage << "MinimalResidual did NOT converge"
+              << std::endl;
+
+    if (ErrorOnNoConverge)
+      assert(0);
+
+    IterationsToComplete = k;
+  }
+};
+} // namespace Grid
+#endif
diff --git a/Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h b/Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
new file mode 100644
index 00000000..04113684
--- /dev/null
+++ b/Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
@@ -0,0 +1,273 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
+
+Copyright (C) 2015
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
+#define GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
+
+namespace Grid {
+
+template<class FieldD, class FieldF, typename std::enable_if<getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
+class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction<FieldD> {
+ public:
+  bool ErrorOnNoConverge; // Throw an assert when MPFGMRES fails to converge,
+                          // defaults to true
+
+  RealD   Tolerance;
+
+  Integer MaxIterations;
+  Integer RestartLength;
+  Integer MaxNumberOfRestarts;
+  Integer IterationCount; // Number of iterations the MPFGMRES took to finish,
+                          // filled in upon completion
+
+  GridStopWatch MatrixTimer;
+  GridStopWatch PrecTimer;
+  GridStopWatch LinalgTimer;
+  GridStopWatch QrTimer;
+  GridStopWatch CompSolutionTimer;
+  GridStopWatch ChangePrecTimer;
+
+  Eigen::MatrixXcd H;
+
+  std::vector<std::complex<double>> y;
+  std::vector<std::complex<double>> gamma;
+  std::vector<std::complex<double>> c;
+  std::vector<std::complex<double>> s;
+
+  GridBase* SinglePrecGrid;
+
+  LinearFunction<FieldF> &Preconditioner;
+
+  MixedPrecisionFlexibleGeneralisedMinimalResidual(RealD   tol,
+                                                   Integer maxit,
+                                                   GridBase * sp_grid,
+                                                   LinearFunction<FieldF> &Prec,
+                                                   Integer restart_length,
+                                                   bool    err_on_no_conv = true)
+      : Tolerance(tol)
+      , MaxIterations(maxit)
+      , RestartLength(restart_length)
+      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
+      , ErrorOnNoConverge(err_on_no_conv)
+      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
+      , y(RestartLength + 1, 0.)
+      , gamma(RestartLength + 1, 0.)
+      , c(RestartLength + 1, 0.)
+      , s(RestartLength + 1, 0.)
+      , SinglePrecGrid(sp_grid)
+      , Preconditioner(Prec) {};
+
+  void operator()(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi) {
+
+    psi.checkerboard = src.checkerboard;
+    conformable(psi, src);
+
+    RealD guess = norm2(psi);
+    assert(std::isnan(guess) == 0);
+
+    RealD cp;
+    RealD ssq = norm2(src);
+    RealD rsq = Tolerance * Tolerance * ssq;
+
+    FieldD r(src._grid);
+
+    std::cout << std::setprecision(4) << std::scientific;
+    std::cout << GridLogIterative << "MPFGMRES: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "MPFGMRES:   src " << ssq   << std::endl;
+
+    PrecTimer.Reset();
+    MatrixTimer.Reset();
+    LinalgTimer.Reset();
+    QrTimer.Reset();
+    CompSolutionTimer.Reset();
+    ChangePrecTimer.Reset();
+
+    GridStopWatch SolverTimer;
+    SolverTimer.Start();
+
+    IterationCount = 0;
+
+    for (int k=0; k<MaxNumberOfRestarts; k++) {
+
+      cp = outerLoopBody(LinOp, src, psi, rsq);
+
+      // Stopping condition
+      if (cp <= rsq) {
+
+        SolverTimer.Stop();
+
+        LinOp.Op(psi,r);
+        axpy(r,-1.0,src,r);
+
+        RealD srcnorm       = sqrt(ssq);
+        RealD resnorm       = sqrt(norm2(r));
+        RealD true_residual = resnorm / srcnorm;
+
+        std::cout << GridLogMessage        << "MPFGMRES: Converged on iteration " << IterationCount
+                  << " computed residual " << sqrt(cp / ssq)
+                  << " true residual "     << true_residual
+                  << " target "            << Tolerance << std::endl;
+
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Total      " <<       SolverTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Precon     " <<         PrecTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Matrix     " <<       MatrixTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Linalg     " <<       LinalgTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: QR         " <<           QrTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: CompSol    " << CompSolutionTimer.Elapsed() << std::endl;
+        std::cout << GridLogMessage << "MPFGMRES Time elapsed: PrecChange " <<   ChangePrecTimer.Elapsed() << std::endl;
+        return;
+      }
+    }
+
+    std::cout << GridLogMessage << "MPFGMRES did NOT converge" << std::endl;
+
+    if (ErrorOnNoConverge)
+      assert(0);
+  }
+
+  RealD outerLoopBody(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi, RealD rsq) {
+
+    RealD cp = 0;
+
+    FieldD w(src._grid);
+    FieldD r(src._grid);
+
+    // these should probably be made class members so that they are only allocated once, not in every restart
+    std::vector<FieldD> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
+    std::vector<FieldD> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
+
+    MatrixTimer.Start();
+    LinOp.Op(psi, w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    r = src - w;
+
+    gamma[0] = sqrt(norm2(r));
+
+    v[0] = (1. / gamma[0]) * r;
+    LinalgTimer.Stop();
+
+    for (int i=0; i<RestartLength; i++) {
+
+      IterationCount++;
+
+      arnoldiStep(LinOp, v, z, w, i);
+
+      qrUpdate(i);
+
+      cp = std::norm(gamma[i+1]);
+
+      std::cout << GridLogIterative << "MPFGMRES: Iteration " << IterationCount
+                << " residual " << cp << " target " << rsq << std::endl;
+
+      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
+
+        computeSolution(z, psi, i);
+
+        return cp;
+      }
+    }
+
+    assert(0); // Never reached
+    return cp;
+  }
+
+  void arnoldiStep(LinearOperatorBase<FieldD> &LinOp, std::vector<FieldD> &v, std::vector<FieldD> &z, FieldD &w, int iter) {
+
+    FieldF v_f(SinglePrecGrid);
+    FieldF z_f(SinglePrecGrid);
+
+    ChangePrecTimer.Start();
+    precisionChange(v_f, v[iter]);
+    precisionChange(z_f, z[iter]);
+    ChangePrecTimer.Stop();
+
+    PrecTimer.Start();
+    Preconditioner(v_f, z_f);
+    PrecTimer.Stop();
+
+    ChangePrecTimer.Start();
+    precisionChange(z[iter], z_f);
+    ChangePrecTimer.Stop();
+
+    MatrixTimer.Start();
+    LinOp.Op(z[iter], w);
+    MatrixTimer.Stop();
+
+    LinalgTimer.Start();
+    for (int i = 0; i <= iter; ++i) {
+      H(iter, i) = innerProduct(v[i], w);
+      w = w - H(iter, i) * v[i];
+    }
+
+    H(iter, iter + 1) = sqrt(norm2(w));
+    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
+    LinalgTimer.Stop();
+  }
+
+  void qrUpdate(int iter) {
+
+    QrTimer.Start();
+    for (int i = 0; i < iter ; ++i) {
+      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
+      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
+      H(iter, i + 1) = tmp;
+    }
+
+    // Compute new Givens Rotation
+    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
+    c[iter]     = H(iter, iter) / nu;
+    s[iter]     = H(iter, iter + 1) / nu;
+
+    // Apply new Givens rotation
+    H(iter, iter)     = nu;
+    H(iter, iter + 1) = 0.;
+
+    gamma[iter + 1] = -s[iter] * gamma[iter];
+    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
+    QrTimer.Stop();
+  }
+
+  void computeSolution(std::vector<FieldD> const &z, FieldD &psi, int iter) {
+
+    CompSolutionTimer.Start();
+    for (int i = iter; i >= 0; i--) {
+      y[i] = gamma[i];
+      for (int k = i + 1; k <= iter; k++)
+        y[i] = y[i] - H(k, i) * y[k];
+      y[i] = y[i] / H(i, i);
+    }
+
+    for (int i = 0; i <= iter; i++)
+      psi = psi + z[i] * y[i];
+    CompSolutionTimer.Stop();
+  }
+};
+}
+#endif
diff --git a/Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h b/Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h
index fd11352e..c723c4a9 100644
--- a/Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h
+++ b/Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h
@@ -139,8 +139,11 @@ namespace Grid {
       MatTimer.Start();
       Linop.HermOpAndNorm(psi,Az,zAz,zAAz); 
       MatTimer.Stop();
+
+      LinalgTimer.Start();
       r=src-Az;
-      
+      LinalgTimer.Stop();
+
       /////////////////////
       // p = Prec(r)
       /////////////////////
@@ -152,8 +155,10 @@ namespace Grid {
       Linop.HermOp(z,tmp); 
       MatTimer.Stop();
 
+      LinalgTimer.Start();
       ttmp=tmp;
       tmp=tmp-r;
+      LinalgTimer.Stop();
 
       /*
       std::cout<<GridLogMessage<<r<<std::endl;
@@ -166,12 +171,14 @@ namespace Grid {
       Linop.HermOpAndNorm(z,Az,zAz,zAAz); 
       MatTimer.Stop();
 
+      LinalgTimer.Start();
       //p[0],q[0],qq[0] 
       p[0]= z;
       q[0]= Az;
       qq[0]= zAAz;
 
       cp =norm2(r);
+      LinalgTimer.Stop();
 
       for(int k=0;k<nstep;k++){
 
@@ -181,12 +188,14 @@ namespace Grid {
 	int peri_k = k %mmax;
 	int peri_kp= kp%mmax;
 
+        LinalgTimer.Start();
 	rq= real(innerProduct(r,q[peri_k])); // what if rAr not real?
 	a = rq/qq[peri_k];
 
 	axpy(psi,a,p[peri_k],psi);         
 
-	cp = axpy_norm(r,-a,q[peri_k],r);  
+	cp = axpy_norm(r,-a,q[peri_k],r);
+        LinalgTimer.Stop();
 
 	if((k==nstep-1)||(cp<rsq)){
 	  return cp;
@@ -202,6 +211,8 @@ namespace Grid {
 	Linop.HermOpAndNorm(z,Az,zAz,zAAz);
 	Linop.HermOp(z,tmp);
 	MatTimer.Stop();
+
+        LinalgTimer.Start();
         tmp=tmp-r;
 	std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl; 
 
@@ -219,9 +230,9 @@ namespace Grid {
 
 	}
 	qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
-
-
+        LinalgTimer.Stop();
       }
+
       assert(0); // never reached
       return cp;
     }
diff --git a/Grid/log/Log.cc b/Grid/log/Log.cc
index bc46893f..c3045a28 100644
--- a/Grid/log/Log.cc
+++ b/Grid/log/Log.cc
@@ -59,6 +59,7 @@ void GridLogTimestamp(int on){
 }
 
 Colours GridLogColours(0);
+GridLogger GridLogMG     (1, "MG"    , GridLogColours, "NORMAL");
 GridLogger GridLogIRL    (1, "IRL"   , GridLogColours, "NORMAL");
 GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL");
 GridLogger GridLogError  (1, "Error" , GridLogColours, "RED");
diff --git a/Grid/log/Log.h b/Grid/log/Log.h
index 5d97ee5a..322ab32e 100644
--- a/Grid/log/Log.h
+++ b/Grid/log/Log.h
@@ -169,6 +169,7 @@ public:
 
 void GridLogConfigure(std::vector<std::string> &logstreams);
 
+extern GridLogger GridLogMG;
 extern GridLogger GridLogIRL;
 extern GridLogger GridLogSolver;
 extern GridLogger GridLogError;
diff --git a/Grid/parallelIO/BinaryIO.cc b/Grid/parallelIO/BinaryIO.cc
new file mode 100644
index 00000000..221a7fe8
--- /dev/null
+++ b/Grid/parallelIO/BinaryIO.cc
@@ -0,0 +1,3 @@
+#include <Grid/GridCore.h>
+
+int Grid::BinaryIO::latticeWriteMaxRetry = -1;
diff --git a/Grid/parallelIO/BinaryIO.h b/Grid/parallelIO/BinaryIO.h
index a60fe962..1895dc3e 100644
--- a/Grid/parallelIO/BinaryIO.h
+++ b/Grid/parallelIO/BinaryIO.h
@@ -81,6 +81,7 @@ inline void removeWhitespace(std::string &key)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 class BinaryIO {
  public:
+  static int latticeWriteMaxRetry;
 
   /////////////////////////////////////////////////////////////////////////////
   // more byte manipulation helpers
@@ -209,10 +210,10 @@ PARALLEL_CRITICAL
   static inline void le32toh_v(void *file_object,uint64_t bytes)
   {
     uint32_t *fp = (uint32_t *)file_object;
-    uint32_t f;
 
     uint64_t count = bytes/sizeof(uint32_t);
     parallel_for(uint64_t i=0;i<count;i++){  
+      uint32_t f;
       f = fp[i];
       // got network order and the network to host
       f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
@@ -234,10 +235,9 @@ PARALLEL_CRITICAL
   static inline void le64toh_v(void *file_object,uint64_t bytes)
   {
     uint64_t *fp = (uint64_t *)file_object;
-    uint64_t f,g;
-    
     uint64_t count = bytes/sizeof(uint64_t);
     parallel_for(uint64_t i=0;i<count;i++){  
+      uint64_t f,g;
       f = fp[i];
       // got network order and the network to host
       g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
@@ -348,7 +348,8 @@ PARALLEL_CRITICAL
     int ieee32    = (format == std::string("IEEE32"));
     int ieee64big = (format == std::string("IEEE64BIG"));
     int ieee64    = (format == std::string("IEEE64"));
-
+    assert(ieee64||ieee32|ieee64big||ieee32big);
+    assert((ieee64+ieee32+ieee64big+ieee32big)==1);
     //////////////////////////////////////////////////////////////////////////////
     // Do the I/O
     //////////////////////////////////////////////////////////////////////////////
@@ -370,7 +371,7 @@ PARALLEL_CRITICAL
 #endif
       } else {
 	std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
-                  << iodata.size() * sizeof(fobj) << " bytes" << std::endl;
+                  << iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
         std::ifstream fin;
 	fin.open(file, std::ios::binary | std::ios::in);
         if (control & BINARYIO_MASTER_APPEND)
@@ -582,7 +583,9 @@ PARALLEL_CRITICAL
     typedef typename vobj::scalar_object sobj;
     typedef typename vobj::Realified::scalar_type word;    word w=0;
     GridBase *grid = Umu._grid;
-    uint64_t lsites = grid->lSites();
+    uint64_t lsites = grid->lSites(), offsetCopy = offset;
+    int attemptsLeft = std::max(0, BinaryIO::latticeWriteMaxRetry);
+    bool checkWrite = (BinaryIO::latticeWriteMaxRetry >= 0);
 
     std::vector<sobj> scalardata(lsites); 
     std::vector<fobj>     iodata(lsites); // Munge, checksum, byte order in here
@@ -597,9 +600,35 @@ PARALLEL_CRITICAL
 
     grid->Barrier();
     timer.Stop();
+    while (attemptsLeft >= 0)
+    {
+      grid->Barrier();
+      IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
+	             nersc_csum,scidac_csuma,scidac_csumb);
+      if (checkWrite)
+      {
+        std::vector<fobj> ckiodata(lsites);
+        uint32_t          cknersc_csum, ckscidac_csuma, ckscidac_csumb;
+        uint64_t          ckoffset = offsetCopy;
 
-    IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
-	     nersc_csum,scidac_csuma,scidac_csumb);
+        std::cout << GridLogMessage << "writeLatticeObject: read back object" << std::endl;
+        grid->Barrier();
+        IOobject(w,grid,ckiodata,file,ckoffset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
+	               cknersc_csum,ckscidac_csuma,ckscidac_csumb);
+        if ((cknersc_csum != nersc_csum) or (ckscidac_csuma != scidac_csuma) or (ckscidac_csumb != scidac_csumb))
+        {
+          std::cout << GridLogMessage << "writeLatticeObject: read test checksum failure, re-writing (" << attemptsLeft << " attempt(s) remaining)" << std::endl;
+          offset = offsetCopy;
+        }
+        else
+        {
+          std::cout << GridLogMessage << "writeLatticeObject: read test checksum correct" << std::endl;
+          break;
+        }
+      }
+      attemptsLeft--;
+    }
+    
 
     std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed()  <<std::endl;
   }
@@ -725,5 +754,6 @@ PARALLEL_CRITICAL
     std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
   }
 };
+
 }
 #endif
diff --git a/Grid/parallelIO/IldgIO.h b/Grid/parallelIO/IldgIO.h
index 518a84a9..85800af0 100644
--- a/Grid/parallelIO/IldgIO.h
+++ b/Grid/parallelIO/IldgIO.h
@@ -46,6 +46,9 @@ extern "C" {
 namespace Grid {
 namespace QCD {
 
+#define GRID_FIELD_NORM "FieldNormMetaData"
+#define GRID_FIELD_NORM_CHECK(FieldNormMetaData_,n2ck)  assert(fabs(FieldNormMetaData_.norm2 - n2ck < 1.0e-5 ));
+
   /////////////////////////////////
   // Encode word types as strings
   /////////////////////////////////
@@ -205,6 +208,7 @@ class GridLimeReader : public BinaryIO {
   {
     typedef typename vobj::scalar_object sobj;
     scidacChecksum scidacChecksum_;
+    FieldNormMetaData  FieldNormMetaData_;
     uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 
     std::string format = getFormatString<vobj>();
@@ -233,21 +237,52 @@ class GridLimeReader : public BinaryIO {
 	//	std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
 	BinarySimpleMunger<sobj,sobj> munge;
 	BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
-  std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl;
-  std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl;
+	std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl;
+	std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl;
 	/////////////////////////////////////////////
 	// Insist checksum is next record
 	/////////////////////////////////////////////
-	readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
-
+	readScidacChecksum(scidacChecksum_,FieldNormMetaData_);
 	/////////////////////////////////////////////
 	// Verify checksums
 	/////////////////////////////////////////////
+	if(FieldNormMetaData_.norm2 != 0.0){ 
+	  RealD n2ck = norm2(field);
+	  //	  std::cout << GridLogMessage << "checking field norm: metadata "<<FieldNormMetaData_.norm2<< " vs " << n2ck<<std::endl;
+	  GRID_FIELD_NORM_CHECK(FieldNormMetaData_,n2ck);
+	  std::cout << GridLogMessage <<  "FieldNormMetaData OK! "<<std::endl;
+	}
 	assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1);
+
+	// find out if next field is a GridFieldNorm
 	return;
       }
     }
   }
+  void readScidacChecksum(scidacChecksum     &scidacChecksum_,
+			  FieldNormMetaData  &FieldNormMetaData_)
+  {
+    FieldNormMetaData_.norm2 =0.0;
+    std::string scidac_str(SCIDAC_CHECKSUM);
+    std::string field_norm_str(GRID_FIELD_NORM);
+    while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { 
+      uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
+      std::vector<char> xmlc(nbytes+1,'\0');
+      limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);    
+      std::string xmlstring = std::string(&xmlc[0]);
+      XmlReader RD(xmlstring, true, "");
+      if ( !strncmp(limeReaderType(LimeR), field_norm_str.c_str(),strlen(field_norm_str.c_str()) )  ) {
+	//	std::cout << "FieldNormMetaData "<<xmlstring<<std::endl;
+	read(RD,field_norm_str,FieldNormMetaData_);
+      }
+      if ( !strncmp(limeReaderType(LimeR), scidac_str.c_str(),strlen(scidac_str.c_str()) )  ) {
+	//	std::cout << SCIDAC_CHECKSUM << " " <<xmlstring<<std::endl;
+	read(RD,std::string("scidacChecksum"),scidacChecksum_);
+	return;
+      }      
+    }
+    assert(0);
+  }
   ////////////////////////////////////////////
   // Read a generic serialisable object
   ////////////////////////////////////////////
@@ -266,7 +301,7 @@ class GridLimeReader : public BinaryIO {
 	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);    
 	//	std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
 
-   xmlstring = std::string(&xmlc[0]);
+	xmlstring = std::string(&xmlc[0]);
 	return;
       }
 
@@ -280,8 +315,8 @@ class GridLimeReader : public BinaryIO {
     std::string xmlstring;
 
     readLimeObject(xmlstring, record_name);
-	  XmlReader RD(xmlstring, true, "");
-	  read(RD,object_name,object);
+    XmlReader RD(xmlstring, true, "");
+    read(RD,object_name,object);
   }
 };
 
@@ -390,6 +425,8 @@ class GridLimeWriter : public BinaryIO
     GridBase *grid = field._grid;
     assert(boss_node == field._grid->IsBoss() );
 
+    FieldNormMetaData FNMD; FNMD.norm2 = norm2(field);
+
     ////////////////////////////////////////////
     // Create record header
     ////////////////////////////////////////////
@@ -448,6 +485,7 @@ class GridLimeWriter : public BinaryIO
     checksum.suma= streama.str();
     checksum.sumb= streamb.str();
     if ( boss_node ) { 
+      writeLimeObject(0,0,FNMD,std::string(GRID_FIELD_NORM),std::string(GRID_FIELD_NORM));
       writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
     }
   }
@@ -625,6 +663,12 @@ class IldgWriter : public ScidacWriter {
     assert(header.nd==4);
     assert(header.nd==header.dimension.size());
 
+    //////////////////////////////////////////////////////////////////////////////
+    // Field norm tests
+    //////////////////////////////////////////////////////////////////////////////
+    FieldNormMetaData FieldNormMetaData_;
+    FieldNormMetaData_.norm2 = norm2(Umu);
+
     //////////////////////////////////////////////////////////////////////////////
     // Fill the USQCD info field
     //////////////////////////////////////////////////////////////////////////////
@@ -633,11 +677,12 @@ class IldgWriter : public ScidacWriter {
     info.plaq   = header.plaquette;
     info.linktr = header.link_trace;
 
-    std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl;
+    //    std::cout << GridLogMessage << " Writing config; IldgIO n2 "<< FieldNormMetaData_.norm2<<std::endl;
     //////////////////////////////////////////////
     // Fill the Lime file record by record
     //////////////////////////////////////////////
     writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message 
+    writeLimeObject(0,0,FieldNormMetaData_,FieldNormMetaData_.SerialisableClassName(),std::string(GRID_FIELD_NORM));
     writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
     writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
     writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
@@ -680,6 +725,7 @@ class IldgReader : public GridLimeReader {
     std::string    ildgLFN_       ;
     scidacChecksum scidacChecksum_; 
     usqcdInfo      usqcdInfo_     ;
+    FieldNormMetaData FieldNormMetaData_;
 
     // track what we read from file
     int found_ildgFormat    =0;
@@ -688,7 +734,7 @@ class IldgReader : public GridLimeReader {
     int found_usqcdInfo     =0;
     int found_ildgBinary =0;
     int found_FieldMetaData =0;
-
+    int found_FieldNormMetaData =0;
     uint32_t nersc_csum;
     uint32_t scidac_csuma;
     uint32_t scidac_csumb;
@@ -722,7 +768,7 @@ class IldgReader : public GridLimeReader {
 	//////////////////////////////////
 	// ILDG format record
 
-  std::string xmlstring(&xmlc[0]);
+	std::string xmlstring(&xmlc[0]);
 	if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) { 
 
 	  XmlReader RD(xmlstring, true, "");
@@ -775,11 +821,17 @@ class IldgReader : public GridLimeReader {
 	  found_scidacChecksum = 1;
 	}
 
+	if ( !strncmp(limeReaderType(LimeR), GRID_FIELD_NORM,strlen(GRID_FIELD_NORM)) ) { 
+	  XmlReader RD(xmlstring, true, "");
+	  read(RD,GRID_FIELD_NORM,FieldNormMetaData_);
+	  found_FieldNormMetaData = 1;
+	}
+
       } else {  
 	/////////////////////////////////
 	// Binary data
 	/////////////////////////////////
-	std::cout << GridLogMessage << "ILDG Binary record found : "  ILDG_BINARY_DATA << std::endl;
+	//	std::cout << GridLogMessage << "ILDG Binary record found : "  ILDG_BINARY_DATA << std::endl;
 	uint64_t offset= ftello(File);
 	if ( format == std::string("IEEE64BIG") ) {
 	  GaugeSimpleMunger<dobj, sobj> munge;
@@ -846,6 +898,13 @@ class IldgReader : public GridLimeReader {
     ////////////////////////////////////////////////////////////
     // Really really want to mandate a scidac checksum
     ////////////////////////////////////////////////////////////
+    if ( found_FieldNormMetaData ) { 
+      RealD nn = norm2(Umu);
+      GRID_FIELD_NORM_CHECK(FieldNormMetaData_,nn);
+      std::cout << GridLogMessage<<"FieldNormMetaData matches " << std::endl;
+    }  else { 
+      std::cout << GridLogWarning<<"FieldNormMetaData not found. " << std::endl;
+    }
     if ( found_scidacChecksum ) {
       FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
       FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
diff --git a/Grid/parallelIO/MetaData.h b/Grid/parallelIO/MetaData.h
index 55254786..4cc93d03 100644
--- a/Grid/parallelIO/MetaData.h
+++ b/Grid/parallelIO/MetaData.h
@@ -56,6 +56,10 @@ namespace Grid {
   ////////////////////////////////////////////////////////////////////////////////
   // header specification/interpretation
   ////////////////////////////////////////////////////////////////////////////////
+    class FieldNormMetaData : Serializable {
+    public:
+      GRID_SERIALIZABLE_CLASS_MEMBERS(FieldNormMetaData, double, norm2);
+    };
     class FieldMetaData : Serializable {
     public:
 
diff --git a/Grid/perfmon/Timer.h b/Grid/perfmon/Timer.h
index 07c5febd..ce1b5d76 100644
--- a/Grid/perfmon/Timer.h
+++ b/Grid/perfmon/Timer.h
@@ -64,16 +64,20 @@ inline std::ostream& operator<< (std::ostream & stream, const GridMillisecs & no
 {
   GridSecs second(1);
   auto     secs       = now/second ; 
-  auto     subseconds = now%second ; 
+  auto     subseconds = now%second ;
+  auto     fill       = stream.fill();
   stream << secs<<"."<<std::setw(3)<<std::setfill('0')<<subseconds.count()<<" s";
+  stream.fill(fill);
   return stream;
 }
 inline std::ostream& operator<< (std::ostream & stream, const GridUsecs & now)
 {
   GridSecs second(1);
   auto     seconds    = now/second ; 
-  auto     subseconds = now%second ; 
+  auto     subseconds = now%second ;
+  auto     fill       = stream.fill();
   stream << seconds<<"."<<std::setw(6)<<std::setfill('0')<<subseconds.count()<<" s";
+  stream.fill(fill);
   return stream;
 }
 
diff --git a/Grid/qcd/action/gauge/Photon.h b/Grid/qcd/action/gauge/Photon.h
index 6ff697c5..f059fcf3 100644
--- a/Grid/qcd/action/gauge/Photon.h
+++ b/Grid/qcd/action/gauge/Photon.h
@@ -4,9 +4,11 @@
  
  Source file: ./lib/qcd/action/gauge/Photon.h
  
- Copyright (C) 2015
+Copyright (C) 2015-2018
  
  Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+ Author: Antonin Portelli <antonin.portelli@me.com>
+ Author: James Harrison <J.Harrison@soton.ac.uk>
  
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -30,8 +32,9 @@
 
 namespace Grid{
 namespace QCD{
+
   template <class S>
-  class QedGimpl
+  class QedGImpl
   {
   public:
     typedef S Simd;
@@ -43,27 +46,27 @@ namespace QCD{
     
     typedef iImplGaugeLink<Simd>  SiteLink;
     typedef iImplGaugeField<Simd> SiteField;
-    typedef SiteField             SiteComplex;
+    typedef SiteLink              SiteComplex;
     
     typedef Lattice<SiteLink>  LinkField;
     typedef Lattice<SiteField> Field;
     typedef Field              ComplexField;
   };
   
-  typedef QedGimpl<vComplex> QedGimplR;
+  typedef QedGImpl<vComplex> QedGImplR;
   
-  template<class Gimpl>
+  template <class GImpl>
   class Photon
   {
   public:
-    INHERIT_GIMPL_TYPES(Gimpl);
+    INHERIT_GIMPL_TYPES(GImpl);
+    typedef typename SiteGaugeLink::scalar_object ScalarSite;
+    typedef typename ScalarSite::scalar_type      ScalarComplex;
     GRID_SERIALIZABLE_ENUM(Gauge, undef, feynman, 1, coulomb, 2, landau, 3);
-    GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2, qedInf, 3);
+    GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2);
   public:
-    Photon(Gauge gauge, ZmScheme zmScheme);
-    Photon(Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvements);
-    Photon(Gauge gauge, ZmScheme zmScheme, Real G0);
-    Photon(Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvements, Real G0);
+    Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvement);
+    Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme);
     virtual ~Photon(void) = default;
     void FreePropagator(const GaugeField &in, GaugeField &out);
     void MomentumSpacePropagator(const GaugeField &in, GaugeField &out);
@@ -73,345 +76,255 @@ namespace QCD{
                          const GaugeLinkField &weight);
     void UnitField(GaugeField &out);
   private:
-    void infVolPropagator(GaugeLinkField &out);
-    void invKHatSquared(GaugeLinkField &out);
+    void makeSpatialNorm(LatticeInteger &spNrm);
+    void makeKHat(std::vector<GaugeLinkField> &khat);
+    void makeInvKHatSquared(GaugeLinkField &out);
     void zmSub(GaugeLinkField &out);
+    void transverseProjectSpatial(GaugeField &out);
+    void gaugeTransform(GaugeField &out);
   private:
-    Gauge    gauge_;
-    ZmScheme zmScheme_;
-    std::vector<Real>  improvement_;
-    Real     G0_;
+    GridBase          *grid_;
+    Gauge             gauge_;
+    ZmScheme          zmScheme_;
+    std::vector<Real> improvement_;
   };
 
-  typedef Photon<QedGimplR>  PhotonR;
+  typedef Photon<QedGImplR>  PhotonR;
   
-  template<class Gimpl>
-  Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme)
-  : gauge_(gauge), zmScheme_(zmScheme), improvement_(std::vector<Real>()),
-    G0_(0.15493339023106021408483720810737508876916113364521)
-  {}
-
-  template<class Gimpl>
-  Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme,
+  template<class GImpl>
+  Photon<GImpl>::Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme,
                         std::vector<Real> improvements)
-  : gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements),
-    G0_(0.15493339023106021408483720810737508876916113364521)
+  : grid_(grid), gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements)
   {}
 
-  template<class Gimpl>
-  Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme, Real G0)
-  : gauge_(gauge), zmScheme_(zmScheme), improvement_(std::vector<Real>()), G0_(G0)
+  template<class GImpl>
+  Photon<GImpl>::Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme)
+  : Photon(grid, gauge, zmScheme, std::vector<Real>())
   {}
 
-  template<class Gimpl>
-  Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme,
-                        std::vector<Real> improvements, Real G0)
-  : gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements), G0_(G0)
-  {}
-
-  template<class Gimpl>
-  void Photon<Gimpl>::FreePropagator (const GaugeField &in,GaugeField &out)
+  template<class GImpl>
+  void Photon<GImpl>::FreePropagator(const GaugeField &in, GaugeField &out)
   {
-    FFT theFFT(in._grid);
+    FFT        theFFT(dynamic_cast<GridCartesian *>(grid_));
+    GaugeField in_k(grid_);
+    GaugeField prop_k(grid_);
     
-    GaugeField in_k(in._grid);
-    GaugeField prop_k(in._grid);
-    
-    theFFT.FFT_all_dim(in_k,in,FFT::forward);
-    MomentumSpacePropagator(prop_k,in_k);
-    theFFT.FFT_all_dim(out,prop_k,FFT::backward);
+    theFFT.FFT_all_dim(in_k, in, FFT::forward);
+    MomentumSpacePropagator(prop_k, in_k);
+    theFFT.FFT_all_dim(out, prop_k, FFT::backward);
   }
 
-  template<class Gimpl>
-  void Photon<Gimpl>::infVolPropagator(GaugeLinkField &out)
+  template<class GImpl>
+  void Photon<GImpl>::makeSpatialNorm(LatticeInteger &spNrm)
   {
-    auto               *grid = dynamic_cast<GridCartesian *>(out._grid);
-    LatticeReal        xmu(grid);
-    GaugeLinkField     one(grid);
-    const unsigned int nd    = grid->_ndimension;
-    std::vector<int>   &l    = grid->_fdimensions;
-    std::vector<int>   x0(nd,0);
-    TComplex           Tone  = Complex(1.0,0.0);
-    TComplex           Tzero = Complex(G0_,0.0);
-    FFT                fft(grid);
+    LatticeInteger   coor(grid_);
+    std::vector<int> l = grid_->FullDimensions();
+
+    spNrm = zero;
+    for(int mu = 0; mu < grid_->Nd() - 1; mu++)
+    {
+      LatticeCoordinate(coor, mu);
+      coor  = where(coor < Integer(l[mu]/2), coor, coor - Integer(l[mu]));
+      spNrm = spNrm + coor*coor;
+    }
+  }
+
+  template<class GImpl>
+  void Photon<GImpl>::makeKHat(std::vector<GaugeLinkField> &khat)
+  {
+    const unsigned int nd = grid_->Nd();
+    std::vector<int>   l  = grid_->FullDimensions();
+    Complex            ci(0., 1.);
+
+    khat.resize(nd, grid_);
+    for (unsigned int mu = 0; mu < nd; ++mu)
+    {
+      Real piL = M_PI/l[mu];
+
+      LatticeCoordinate(khat[mu], mu);
+      khat[mu] = exp(piL*ci*khat[mu])*2.*sin(piL*khat[mu]);
+    }
+  }
+
+  template<class GImpl>
+  void Photon<GImpl>::makeInvKHatSquared(GaugeLinkField &out)
+  {
+    std::vector<GaugeLinkField> khat;
+    GaugeLinkField              lone(grid_);
+    const unsigned int          nd = grid_->Nd();
+    std::vector<int>            zm(nd, 0);
+    ScalarSite                  one = ScalarComplex(1., 0.), z = ScalarComplex(0., 0.);
     
-    one = Complex(1.0,0.0);
     out = zero;
+    makeKHat(khat);
     for(int mu = 0; mu < nd; mu++)
     {
-      LatticeCoordinate(xmu,mu);
-      Real lo2 = l[mu]/2.0;
-      xmu = where(xmu < lo2, xmu, xmu-double(l[mu]));
-      out = out + toComplex(4*M_PI*M_PI*xmu*xmu);
+      out = out + khat[mu]*conjugate(khat[mu]);
     }
-    pokeSite(Tone, out, x0);
-    out = one/out;
-    pokeSite(Tzero, out, x0);
-    fft.FFT_all_dim(out, out, FFT::forward);
+    lone = ScalarComplex(1., 0.);
+    pokeSite(one, out, zm);
+    out = lone/out;
+    pokeSite(z, out, zm);
   }
   
-  template<class Gimpl>
-  void Photon<Gimpl>::invKHatSquared(GaugeLinkField &out)
+  template<class GImpl>
+  void Photon<GImpl>::zmSub(GaugeLinkField &out)
   {
-    GridBase           *grid = out._grid;
-    GaugeLinkField     kmu(grid), one(grid);
-    const unsigned int nd    = grid->_ndimension;
-    std::vector<int>   &l    = grid->_fdimensions;
-    std::vector<int>   zm(nd,0);
-    TComplex           Tone = Complex(1.0,0.0);
-    TComplex           Tzero= Complex(0.0,0.0);
-    
-    one = Complex(1.0,0.0);
-    out = zero;
-    for(int mu = 0; mu < nd; mu++)
-    {
-      Real twoPiL = M_PI*2./l[mu];
-      
-      LatticeCoordinate(kmu,mu);
-      kmu = 2.*sin(.5*twoPiL*kmu);
-      out = out + kmu*kmu;
-    }
-    pokeSite(Tone, out, zm);
-    out = one/out;
-    pokeSite(Tzero, out, zm);
-  }
-  
-  template<class Gimpl>
-  void Photon<Gimpl>::zmSub(GaugeLinkField &out)
-  {
-    GridBase           *grid = out._grid;
-    const unsigned int nd    = grid->_ndimension;
-    std::vector<int>   &l    = grid->_fdimensions;
-    
     switch (zmScheme_)
     {
       case ZmScheme::qedTL:
       {
-        std::vector<int> zm(nd,0);
-        TComplex         Tzero = Complex(0.0,0.0);
-        
-        pokeSite(Tzero, out, zm);
+        std::vector<int> zm(grid_->Nd(), 0);
+        ScalarSite       z = ScalarComplex(0., 0.);
         
+        pokeSite(z, out, zm);
         break;
       }
       case ZmScheme::qedL:
       {
-        LatticeInteger spNrm(grid), coor(grid);
-        GaugeLinkField z(grid);
-        
-        spNrm = zero;
-        for(int d = 0; d < grid->_ndimension - 1; d++)
-        {
-          LatticeCoordinate(coor,d);
-          coor = where(coor < Integer(l[d]/2), coor, coor-Integer(l[d]));
-          spNrm = spNrm + coor*coor;
-        }
-        out = where(spNrm == Integer(0), 0.*out, out);
+        LatticeInteger spNrm(grid_);
 
-        // IR improvement
+        makeSpatialNorm(spNrm);
+        out = where(spNrm == Integer(0), 0.*out, out);
         for(int i = 0; i < improvement_.size(); i++)
         {
-          Real f = sqrt(improvement_[i]+1);
-          out = where(spNrm == Integer(i+1), f*out, out);
+          Real f = sqrt(improvement_[i] + 1);
+          out = where(spNrm == Integer(i + 1), f*out, out);
         }
+        break;
       }
       default:
+        assert(0);
         break;
     }
   }
 
-  template<class Gimpl>
-  void Photon<Gimpl>::MomentumSpacePropagator(const GaugeField &in,
-                                               GaugeField &out)
+  template<class GImpl>
+  void Photon<GImpl>::transverseProjectSpatial(GaugeField &out)
   {
-  GridBase           *grid = out._grid;
-    LatticeComplex     momProp(grid);
-    
-    switch (zmScheme_)
+    const unsigned int          nd = grid_->Nd();
+    GaugeLinkField              invKHat(grid_), cst(grid_), spdiv(grid_);
+    LatticeInteger              spNrm(grid_);
+    std::vector<GaugeLinkField> khat, a(nd, grid_), aProj(nd, grid_);
+
+    invKHat = zero;
+    makeSpatialNorm(spNrm);
+    makeKHat(khat);
+    for (unsigned int mu = 0; mu < nd; ++mu)
     {
-      case ZmScheme::qedTL:
-      case ZmScheme::qedL:
+      a[mu] = peekLorentz(out, mu);
+      if (mu < nd - 1)
       {
-        invKHatSquared(momProp);
-        zmSub(momProp);
-        break;
+        invKHat += khat[mu]*conjugate(khat[mu]);
       }
-      case ZmScheme::qedInf:
-      {
-        infVolPropagator(momProp);
+    }
+    cst     = ScalarComplex(1., 0.);
+    invKHat = where(spNrm == Integer(0), cst, invKHat);
+    invKHat = cst/invKHat;
+    cst     = zero;
+    invKHat = where(spNrm == Integer(0), cst, invKHat);
+    spdiv   = zero;
+    for (unsigned int nu = 0; nu < nd - 1; ++nu)
+    {
+      spdiv += conjugate(khat[nu])*a[nu];
+    }
+    spdiv *= invKHat;
+    for (unsigned int mu = 0; mu < nd; ++mu)
+    {
+      aProj[mu] = a[mu] - khat[mu]*spdiv;
+      pokeLorentz(out, aProj[mu], mu);
+    }
+  }
+
+  template<class GImpl>
+  void Photon<GImpl>::gaugeTransform(GaugeField &out)
+  {
+    switch (gauge_)
+    {
+      case Gauge::feynman:
+        break;
+      case Gauge::coulomb:
+        transverseProjectSpatial(out);
+        break;
+      case Gauge::landau:
+        assert(0);
         break;
-      }
       default:
+        assert(0);
         break;
     }
+  }
+
+  template<class GImpl>
+  void Photon<GImpl>::MomentumSpacePropagator(const GaugeField &in,
+                                              GaugeField &out)
+  {
+    LatticeComplex momProp(grid_);
+    
+    makeInvKHatSquared(momProp);
+    zmSub(momProp);
     
     out = in*momProp;
   }
   
-  template<class Gimpl>
-  void Photon<Gimpl>::StochasticWeight(GaugeLinkField &weight)
+  template<class GImpl>
+  void Photon<GImpl>::StochasticWeight(GaugeLinkField &weight)
   {
-    auto               *grid     = dynamic_cast<GridCartesian *>(weight._grid);
-    const unsigned int nd        = grid->_ndimension;
-    std::vector<int>   latt_size = grid->_fdimensions;
-    
-    switch (zmScheme_)
+    const unsigned int nd  = grid_->Nd();
+    std::vector<int>   l   = grid_->FullDimensions();
+    Integer            vol = 1;
+
+    for(unsigned int mu = 0; mu < nd; mu++)
     {
-      case ZmScheme::qedTL:
-      case ZmScheme::qedL:
-      {
-        Integer vol = 1;
-        for(int d = 0; d < nd; d++)
-        {
-          vol = vol * latt_size[d];
-        }
-        invKHatSquared(weight);
-        weight = sqrt(vol)*sqrt(weight);
-        zmSub(weight);
-        break;
-      }
-      case ZmScheme::qedInf:
-      {
-        infVolPropagator(weight);
-        weight = sqrt(real(weight));
-        break;
-      }
-      default:
-        break;
+      vol = vol*l[mu];
     }
+    makeInvKHatSquared(weight);
+    weight = sqrt(vol)*sqrt(weight);
+    zmSub(weight);
   }
   
-  template<class Gimpl>
-  void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng)
+  template<class GImpl>
+  void Photon<GImpl>::StochasticField(GaugeField &out, GridParallelRNG &rng)
   {
-    auto           *grid = dynamic_cast<GridCartesian *>(out._grid);
-    GaugeLinkField weight(grid);
+    GaugeLinkField weight(grid_);
     
     StochasticWeight(weight);
     StochasticField(out, rng, weight);
   }
   
-  template<class Gimpl>
-  void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng,
+  template<class GImpl>
+  void Photon<GImpl>::StochasticField(GaugeField &out, GridParallelRNG &rng,
                                       const GaugeLinkField &weight)
   {
-    auto               *grid = dynamic_cast<GridCartesian *>(out._grid);
-    const unsigned int nd = grid->_ndimension;
-    GaugeLinkField     r(grid);
-    GaugeField         aTilde(grid);
-    FFT                fft(grid);
+    const unsigned int nd = grid_->Nd();
+    GaugeLinkField     r(grid_);
+    GaugeField         aTilde(grid_);
+    FFT                fft(dynamic_cast<GridCartesian *>(grid_));
     
-    switch (zmScheme_)
+    for(unsigned int mu = 0; mu < nd; mu++)
     {
-      case ZmScheme::qedTL:
-      case ZmScheme::qedL:
-      {
-        for(int mu = 0; mu < nd; mu++)
-        {
-          gaussian(rng, r);
-          r = weight*r;
-          pokeLorentz(aTilde, r, mu);
-        }
-        break;
-      }
-      case ZmScheme::qedInf:
-      {
-        Complex                    shift(1., 1.); // This needs to be a GaugeLink element?
-        for(int mu = 0; mu < nd; mu++)
-        {
-          bernoulli(rng, r);
-          r = weight*(2.*r - shift);
-          pokeLorentz(aTilde, r, mu);
-        }
-        break;
-      }
-      default:
-        break;
+      gaussian(rng, r);
+      r = weight*r;
+      pokeLorentz(aTilde, r, mu);
     }
-
+    gaugeTransform(aTilde);
     fft.FFT_all_dim(out, aTilde, FFT::backward);
-    
     out = real(out);
   }
 
-  template<class Gimpl>
-  void Photon<Gimpl>::UnitField(GaugeField &out)
+  template<class GImpl>
+  void Photon<GImpl>::UnitField(GaugeField &out)
   {
-    auto               *grid = dynamic_cast<GridCartesian *>(out._grid);
-    const unsigned int nd = grid->_ndimension;
-    GaugeLinkField     r(grid);
+    const unsigned int nd = grid_->Nd();
+    GaugeLinkField     r(grid_);
     
-    r = Complex(1.0,0.0);
-
-    for(int mu = 0; mu < nd; mu++)
+    r = ScalarComplex(1., 0.);
+    for(unsigned int mu = 0; mu < nd; mu++)
     {
       pokeLorentz(out, r, mu);
     }
-    
     out = real(out);
   }
-//  template<class Gimpl>
-//  void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out,
-//                                                            const GaugeField &in)
-//  {
-//    
-//    FeynmanGaugeMomentumSpacePropagator_TL(out,in);
-//    
-//    GridBase *grid = out._grid;
-//    LatticeInteger     coor(grid);
-//    GaugeField zz(grid); zz=zero;
-//    
-//    // xyzt
-//    for(int d = 0; d < grid->_ndimension-1;d++){
-//      LatticeCoordinate(coor,d);
-//      out = where(coor==Integer(0),zz,out);
-//    }
-//  }
-//  
-//  template<class Gimpl>
-//  void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out,
-//                                                             const GaugeField &in)
-//  {
-//    
-//    // what type LatticeComplex
-//    GridBase *grid = out._grid;
-//    int nd = grid->_ndimension;
-//    
-//    typedef typename GaugeField::vector_type vector_type;
-//    typedef typename GaugeField::scalar_type ScalComplex;
-//    typedef Lattice<iSinglet<vector_type> > LatComplex;
-//    
-//    std::vector<int> latt_size   = grid->_fdimensions;
-//    
-//    LatComplex denom(grid); denom= zero;
-//    LatComplex   one(grid); one = ScalComplex(1.0,0.0);
-//    LatComplex   kmu(grid);
-//    
-//    ScalComplex ci(0.0,1.0);
-//    // momphase = n * 2pi / L
-//    for(int mu=0;mu<Nd;mu++) {
-//      
-//      LatticeCoordinate(kmu,mu);
-//      
-//      RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
-//      
-//      kmu = TwoPiL * kmu ;
-//      
-//      denom = denom + 4.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
-//    }
-//    std::vector<int> zero_mode(nd,0);
-//    TComplexD Tone = ComplexD(1.0,0.0);
-//    TComplexD Tzero= ComplexD(0.0,0.0);
-//    
-//    pokeSite(Tone,denom,zero_mode);
-//    
-//    denom= one/denom;
-//    
-//    pokeSite(Tzero,denom,zero_mode);
-//    
-//    out = zero;
-//    out = in*denom;
-//  };
   
 }}
 #endif
diff --git a/Grid/qcd/utils/LinalgUtils.h b/Grid/qcd/utils/LinalgUtils.h
index 5eaf1c2a..04a224e5 100644
--- a/Grid/qcd/utils/LinalgUtils.h
+++ b/Grid/qcd/utils/LinalgUtils.h
@@ -173,6 +173,39 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
     }
   }
 }
+}
 
-}}
+// I explicitly need these outside the QCD namespace
+template<typename vobj>
+void G5C(Lattice<vobj> &z, const Lattice<vobj> &x)
+{
+  GridBase *grid = x._grid;
+  z.checkerboard = x.checkerboard;
+  conformable(x, z);
+
+  QCD::Gamma G5(QCD::Gamma::Algebra::Gamma5);
+  z = G5 * x;
+}
+
+template<class CComplex, int nbasis>
+void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
+{
+  GridBase *grid = x._grid;
+  z.checkerboard = x.checkerboard;
+  conformable(x, z);
+
+  static_assert(nbasis % 2 == 0, "");
+  int nb = nbasis / 2;
+
+  parallel_for(int ss = 0; ss < grid->oSites(); ss++) {
+    for(int n = 0; n < nb; ++n) {
+      z._odata[ss](n) = x._odata[ss](n);
+    }
+    for(int n = nb; n < nbasis; ++n) {
+      z._odata[ss](n) = -x._odata[ss](n);
+    }
+  }
+}
+
+}
 #endif 
diff --git a/Grid/qcd/utils/WilsonLoops.h b/Grid/qcd/utils/WilsonLoops.h
index 6cf34e0c..d4790df2 100644
--- a/Grid/qcd/utils/WilsonLoops.h
+++ b/Grid/qcd/utils/WilsonLoops.h
@@ -6,10 +6,12 @@
 
     Copyright (C) 2015
 
-Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: neo <cossu@post.kek.jp>
-Author: paboyle <paboyle@ph.ed.ac.uk>
+    Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
+    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+    Author: neo <cossu@post.kek.jp>
+    Author: paboyle <paboyle@ph.ed.ac.uk>
+    Author: James Harrison <J.Harrison@soton.ac.uk>
+    Author: Antonin Portelli <antonin.portelli@me.com>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -645,6 +647,184 @@ static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) {
       }
     }
   }
+
+  //////////////////////////////////////////////////
+  // Wilson loop of size (R1, R2), oriented in mu,nu plane
+  //////////////////////////////////////////////////
+  static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U,
+                           const int Rmu, const int Rnu,
+                           const int mu, const int nu) {
+    wl = U[nu];
+
+    for(int i = 0; i < Rnu-1; i++){
+      wl = Gimpl::CovShiftForward(U[nu], nu, wl);
+    }
+
+    for(int i = 0; i < Rmu; i++){
+      wl = Gimpl::CovShiftForward(U[mu], mu, wl);
+    }
+
+    for(int i = 0; i < Rnu; i++){
+      wl = Gimpl::CovShiftBackward(U[nu], nu, wl);
+    }
+
+    for(int i = 0; i < Rmu; i++){
+      wl = Gimpl::CovShiftBackward(U[mu], mu, wl);
+    }
+  }
+  //////////////////////////////////////////////////
+  // trace of Wilson Loop oriented in mu,nu plane
+  //////////////////////////////////////////////////
+  static void traceWilsonLoop(LatticeComplex &wl,
+                                const std::vector<GaugeMat> &U,
+                                const int Rmu, const int Rnu,
+                                const int mu, const int nu) {
+    GaugeMat sp(U[0]._grid);
+    wilsonLoop(sp, U, Rmu, Rnu, mu, nu);
+    wl = trace(sp);
+  }
+  //////////////////////////////////////////////////
+  // sum over all planes of Wilson loop
+  //////////////////////////////////////////////////
+  static void siteWilsonLoop(LatticeComplex &Wl,
+                            const std::vector<GaugeMat> &U,
+                            const int R1, const int R2) {
+    LatticeComplex siteWl(U[0]._grid);
+    Wl = zero;
+    for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) {
+      for (int nu = 0; nu < mu; nu++) {
+        traceWilsonLoop(siteWl, U, R1, R2, mu, nu);
+        Wl = Wl + siteWl;
+        traceWilsonLoop(siteWl, U, R2, R1, mu, nu);
+        Wl = Wl + siteWl;
+      }
+    }
+  }
+  //////////////////////////////////////////////////
+  // sum over planes of Wilson loop with length R1
+  // in the time direction
+  //////////////////////////////////////////////////
+  static void siteTimelikeWilsonLoop(LatticeComplex &Wl,
+                            const std::vector<GaugeMat> &U,
+                            const int R1, const int R2) {
+    LatticeComplex siteWl(U[0]._grid);
+
+    int ndim = U[0]._grid->_ndimension;
+
+    Wl = zero;
+    for (int nu = 0; nu < ndim - 1; nu++) {
+      traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu);
+      Wl = Wl + siteWl;
+    }
+  }
+  //////////////////////////////////////////////////
+  // sum Wilson loop over all planes orthogonal to the time direction
+  //////////////////////////////////////////////////
+  static void siteSpatialWilsonLoop(LatticeComplex &Wl,
+                            const std::vector<GaugeMat> &U,
+                            const int R1, const int R2) {
+    LatticeComplex siteWl(U[0]._grid);
+
+    Wl = zero;
+    for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) {
+      for (int nu = 0; nu < mu; nu++) {
+        traceWilsonLoop(siteWl, U, R1, R2, mu, nu);
+        Wl = Wl + siteWl;
+        traceWilsonLoop(siteWl, U, R2, R1, mu, nu);
+        Wl = Wl + siteWl;
+      }
+    }
+  }
+  //////////////////////////////////////////////////
+  // sum over all x,y,z,t and over all planes of Wilson loop
+  //////////////////////////////////////////////////
+  static Real sumWilsonLoop(const GaugeLorentz &Umu,
+                            const int R1, const int R2) {
+    std::vector<GaugeMat> U(4, Umu._grid);
+
+    for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
+      U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
+    }
+
+    LatticeComplex Wl(Umu._grid);
+
+    siteWilsonLoop(Wl, U, R1, R2);
+
+    TComplex Tp = sum(Wl);
+    Complex p = TensorRemove(Tp);
+    return p.real();
+  }
+  //////////////////////////////////////////////////
+  // sum over all x,y,z,t and over all planes of timelike Wilson loop
+  //////////////////////////////////////////////////
+  static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu,
+                            const int R1, const int R2) {
+    std::vector<GaugeMat> U(4, Umu._grid);
+
+    for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
+      U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
+    }
+
+    LatticeComplex Wl(Umu._grid);
+
+    siteTimelikeWilsonLoop(Wl, U, R1, R2);
+
+    TComplex Tp = sum(Wl);
+    Complex p = TensorRemove(Tp);
+    return p.real();
+  }
+  //////////////////////////////////////////////////
+  // sum over all x,y,z,t and over all planes of spatial Wilson loop
+  //////////////////////////////////////////////////
+  static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu,
+                            const int R1, const int R2) {
+    std::vector<GaugeMat> U(4, Umu._grid);
+
+    for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
+      U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
+    }
+
+    LatticeComplex Wl(Umu._grid);
+
+    siteSpatialWilsonLoop(Wl, U, R1, R2);
+
+    TComplex Tp = sum(Wl);
+    Complex p = TensorRemove(Tp);
+    return p.real();
+  }
+  //////////////////////////////////////////////////
+  // average over all x,y,z,t and over all planes of Wilson loop
+  //////////////////////////////////////////////////
+  static Real avgWilsonLoop(const GaugeLorentz &Umu,
+                            const int R1, const int R2) {
+    int ndim = Umu._grid->_ndimension;
+    Real sumWl = sumWilsonLoop(Umu, R1, R2);
+    Real vol = Umu._grid->gSites();
+    Real faces = 1.0 * ndim * (ndim - 1);
+    return sumWl / vol / faces / Nc; // Nc dependent... FIXME
+  }
+  //////////////////////////////////////////////////
+  // average over all x,y,z,t and over all planes of timelike Wilson loop
+  //////////////////////////////////////////////////
+  static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu,
+                            const int R1, const int R2) {
+    int ndim = Umu._grid->_ndimension;
+    Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2);
+    Real vol = Umu._grid->gSites();
+    Real faces = 1.0 * (ndim - 1);
+    return sumWl / vol / faces / Nc; // Nc dependent... FIXME
+  }
+  //////////////////////////////////////////////////
+  // average over all x,y,z,t and over all planes of spatial Wilson loop
+  //////////////////////////////////////////////////
+  static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu,
+                            const int R1, const int R2) {
+    int ndim = Umu._grid->_ndimension;
+    Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2);
+    Real vol = Umu._grid->gSites();
+    Real faces = 1.0 * (ndim - 1) * (ndim - 2);
+    return sumWl / vol / faces / Nc; // Nc dependent... FIXME
+  }
 };
 
 typedef WilsonLoops<PeriodicGimplR> ColourWilsonLoops;
diff --git a/Grid/serialisation/Hdf5IO.h b/Grid/serialisation/Hdf5IO.h
index ec26612a..59804240 100644
--- a/Grid/serialisation/Hdf5IO.h
+++ b/Grid/serialisation/Hdf5IO.h
@@ -123,9 +123,12 @@ namespace Grid
     
     if (flatx.size() > dataSetThres_)
     {
-      H5NS::DataSet dataSet;
+      H5NS::DataSet           dataSet;
+      H5NS::DSetCreatPropList plist;
       
-      dataSet = group_.createDataSet(s, Hdf5Type<Element>::type(), dataSpace);
+      plist.setChunk(dim.size(), dim.data());
+      plist.setFletcher32();
+      dataSet = group_.createDataSet(s, Hdf5Type<Element>::type(), dataSpace, plist);
       dataSet.write(flatx.data(), Hdf5Type<Element>::type());
     }
     else
diff --git a/Hadrons/A2AMatrix.hpp b/Hadrons/A2AMatrix.hpp
index e224a95e..ed2f5d36 100644
--- a/Hadrons/A2AMatrix.hpp
+++ b/Hadrons/A2AMatrix.hpp
@@ -442,6 +442,7 @@ void A2AMatrixIo<T>::initFile(const MetadataType &d, const unsigned int chunkSiz
     push(reader, dataname_);
     auto &group = reader.getGroup();
     plist.setChunk(chunk.size(), chunk.data());
+    plist.setFletcher32();
     dataset = group.createDataSet(HADRONS_A2AM_NAME, Hdf5Type<T>::type(), dataspace, plist);
 #else
     HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
@@ -502,14 +503,12 @@ void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead)
     H5NS::DataSet        dataset;
     H5NS::DataSpace      dataspace;
     H5NS::CompType       datatype;
-    H5NS::DSetCreatPropList plist;
     
     push(reader, dataname_);
     auto &group = reader.getGroup();
     dataset     = group.openDataSet(HADRONS_A2AM_NAME);
     datatype    = dataset.getCompType();
     dataspace   = dataset.getSpace();
-    plist       = dataset.getCreatePlist();
     hdim.resize(dataspace.getSimpleExtentNdims());
     dataspace.getSimpleExtentDims(hdim.data());
     if ((nt_*ni_*nj_ != 0) and
diff --git a/Hadrons/Application.cc b/Hadrons/Application.cc
index 44579646..d04c9a50 100644
--- a/Hadrons/Application.cc
+++ b/Hadrons/Application.cc
@@ -108,6 +108,9 @@ void Application::run(void)
         HADRONS_ERROR(Definition, "run id is empty");
     }
     LOG(Message) << "RUN ID '" << getPar().runId << "'" << std::endl;
+    BinaryIO::latticeWriteMaxRetry = getPar().parallelWriteMaxRetry;
+    LOG(Message) << "Attempt(s) for resilient parallel I/O: " 
+                 << BinaryIO::latticeWriteMaxRetry << std::endl;
     vm().setRunId(getPar().runId);
     vm().printContent();
     env().printContent();
diff --git a/Hadrons/Application.hpp b/Hadrons/Application.hpp
index 432fe757..3578c919 100644
--- a/Hadrons/Application.hpp
+++ b/Hadrons/Application.hpp
@@ -56,7 +56,9 @@ public:
                                         TrajRange,                  trajCounter,
                                         VirtualMachine::GeneticPar, genetic,
                                         std::string,                runId,
-                                        std::string,                graphFile);
+                                        std::string,                graphFile,
+                                        int,                        parallelWriteMaxRetry);
+        GlobalPar(void): parallelWriteMaxRetry{-1} {}
     };
 public:
     // constructors
diff --git a/Hadrons/Modules/MGauge/StochEm.cc b/Hadrons/Modules/MGauge/StochEm.cc
index 6f8bf55e..574387e4 100644
--- a/Hadrons/Modules/MGauge/StochEm.cc
+++ b/Hadrons/Modules/MGauge/StochEm.cc
@@ -70,7 +70,7 @@ void TStochEm::execute(void)
     LOG(Message) << "Generating stochastic EM potential..." << std::endl;
 
     std::vector<Real> improvements = strToVec<Real>(par().improvement);
-    PhotonR photon(par().gauge, par().zmScheme, improvements, par().G0_qedInf);
+    PhotonR photon(envGetGrid(EmField), par().gauge, par().zmScheme, improvements);
     auto    &a = envGet(EmField, getName());
     auto    &w = envGet(EmComp, "_" + getName() + "_weight");
     
diff --git a/Hadrons/Modules/MGauge/StochEm.hpp b/Hadrons/Modules/MGauge/StochEm.hpp
index a3f8cc96..b549387b 100644
--- a/Hadrons/Modules/MGauge/StochEm.hpp
+++ b/Hadrons/Modules/MGauge/StochEm.hpp
@@ -47,8 +47,7 @@ public:
     GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar,
                                     PhotonR::Gauge,    gauge,
                                     PhotonR::ZmScheme, zmScheme,
-                                    std::string,       improvement,
-                                    Real,              G0_qedInf);
+                                    std::string,       improvement);
 };
 
 class TStochEm: public Module<StochEmPar>
diff --git a/Hadrons/Modules/MGauge/UnitEm.cc b/Hadrons/Modules/MGauge/UnitEm.cc
index d2ecad5e..97da8224 100644
--- a/Hadrons/Modules/MGauge/UnitEm.cc
+++ b/Hadrons/Modules/MGauge/UnitEm.cc
@@ -62,7 +62,7 @@ void TUnitEm::setup(void)
 // execution ///////////////////////////////////////////////////////////////////
 void TUnitEm::execute(void)
 {
-    PhotonR photon(0, 0); // Just chose arbitrary input values here
+    PhotonR photon(envGetGrid(EmField), 0, 0); // Just chose arbitrary input values here
     auto    &a = envGet(EmField, getName());
     LOG(Message) << "Generating unit EM potential..." << std::endl;
     photon.UnitField(a);
diff --git a/tests/IO/Test_ildg_io.cc b/tests/IO/Test_ildg_io.cc
index 55dd93b8..cb5efed2 100644
--- a/tests/IO/Test_ildg_io.cc
+++ b/tests/IO/Test_ildg_io.cc
@@ -53,7 +53,6 @@ int main (int argc, char ** argv)
   GridCartesian     Fine(latt_size,simd_layout,mpi_layout);
   GridCartesian     Coarse(clatt_size,simd_layout,mpi_layout);
 
-
   GridParallelRNG   pRNGa(&Fine);
   GridParallelRNG   pRNGb(&Fine);
   GridSerialRNG     sRNGa;
@@ -94,6 +93,27 @@ int main (int argc, char ** argv)
   _IldgReader.close();
   Umu_diff = Umu - Umu_saved;
 
+  std::cout <<GridLogMessage<<"**************************************"<<std::endl;
+  std::cout <<GridLogMessage<<"** Writing out  ILDG conf    *********"<<std::endl;
+  std::cout <<GridLogMessage<<"**************************************"<<std::endl;
+  file = std::string("./ckpoint_scidac.4000");
+  emptyUserRecord record;
+  ScidacWriter _ScidacWriter(Fine.IsBoss());
+  _ScidacWriter.open(file);
+  _ScidacWriter.writeScidacFieldRecord(Umu,record);
+  _ScidacWriter.close();
+
+  Umu_saved = Umu;
+  std::cout <<GridLogMessage<<"**************************************"<<std::endl;
+  std::cout <<GridLogMessage<<"** Reading back ILDG conf    *********"<<std::endl;
+  std::cout <<GridLogMessage<<"**************************************"<<std::endl;
+  ScidacReader _ScidacReader;
+  _ScidacReader.open(file);
+  _ScidacReader.readScidacFieldRecord(Umu,record);
+  _ScidacReader.close();
+  Umu_diff = Umu - Umu_saved;
+
+
   std::cout <<GridLogMessage<< "norm2 Gauge Diff = "<<norm2(Umu_diff)<<std::endl;
 
   Grid_finalize();
diff --git a/tests/core/Test_qed.cc b/tests/core/Test_qed.cc
new file mode 100644
index 00000000..f3b33454
--- /dev/null
+++ b/tests/core/Test_qed.cc
@@ -0,0 +1,138 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: tests/core/Test_qed.cc
+
+Copyright (C) 2015-2018
+
+Author: Antonin Portelli <antonin.portelli@me.com>
+Author: James Harrison <J.Harrison@soton.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace QCD;
+
+typedef PeriodicGaugeImpl<QedGImplR>  QedPeriodicGImplR;
+typedef PhotonR::GaugeField           EmField;
+typedef PhotonR::GaugeLinkField       EmComp;
+
+const int NCONFIGS = 20;
+const int NWILSON  = 10;
+
+int main(int argc, char *argv[])
+{
+  // initialization
+  Grid_init(&argc, &argv);
+  std::cout << GridLogMessage << "Grid initialized" << std::endl;
+  
+  // QED stuff
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(4, vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian    grid(latt_size,simd_layout,mpi_layout);
+  GridParallelRNG  pRNG(&grid);
+  PhotonR          photon(&grid, PhotonR::Gauge::coulomb, PhotonR::ZmScheme::qedL);
+  EmField          a(&grid);
+  EmField          expA(&grid);
+
+  Complex imag_unit(0, 1);
+
+  Real wlA;
+  std::vector<Real> logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0);
+
+  pRNG.SeedFixedIntegers({1, 2, 3, 4});
+
+  std::cout << GridLogMessage << "Wilson loop calculation beginning" << std::endl;
+  for(int ic = 0; ic < NCONFIGS; ic++){
+      std::cout << GridLogMessage << "Configuration " << ic <<std::endl;
+      photon.StochasticField(a, pRNG);
+
+      // Exponentiate photon field
+      expA = exp(imag_unit*a);
+
+      // Calculate zero-modes
+      std::vector<EmField::vector_object::scalar_object> zm;
+
+      std::cout << GridLogMessage << "Total zero-mode norm 2 " 
+                << std::sqrt(norm2(sum(a))) << std::endl;
+
+      std::cout << GridLogMessage << "Spatial zero-mode norm 2" << std::endl;
+      sliceSum(a, zm, grid.Nd() - 1);
+      for (unsigned int t = 0; t < latt_size.back(); ++t)
+      {
+        std::cout << GridLogMessage << "t = " << t << " " << std::sqrt(norm2(zm[t])) << std::endl;
+      }
+
+      // Calculate divergence
+      EmComp diva(&grid), amu(&grid);
+
+      diva = zero;
+      for (unsigned int mu = 0; mu < grid.Nd(); ++mu)
+      {
+        amu   = peekLorentz(a, mu);
+        diva += amu - Cshift(amu, mu, -1);
+        if (mu == grid.Nd() - 2)
+        {
+          std::cout << GridLogMessage << "Spatial divergence norm 2 " << std::sqrt(norm2(diva)) << std::endl;
+        }
+      }
+      std::cout << GridLogMessage << "Total divergence norm 2 " << std::sqrt(norm2(diva)) << std::endl;
+
+      // Calculate Wilson loops
+      for(int iw=1; iw<=NWILSON; iw++){
+          wlA = WilsonLoops<QedPeriodicGImplR>::avgWilsonLoop(expA, iw, iw) * 3;
+          logWlAvg[iw-1] -= 2*log(wlA);
+          wlA = WilsonLoops<QedPeriodicGImplR>::avgTimelikeWilsonLoop(expA, iw, iw) * 3;
+          logWlTime[iw-1] -= 2*log(wlA);
+          wlA = WilsonLoops<QedPeriodicGImplR>::avgSpatialWilsonLoop(expA, iw, iw) * 3;
+          logWlSpace[iw-1] -= 2*log(wlA);
+      }
+  }
+  std::cout << GridLogMessage << "Wilson loop calculation completed" << std::endl;
+  
+  // Calculate Wilson loops
+  // From A. Portelli's PhD thesis:
+  // size  -2*log(W)
+  // 1     0.500000000(1)
+  // 2     1.369311535(1) 
+  // 3     2.305193057(1) 
+  // 4     3.261483854(1) 
+  // 5     4.228829967(1) 
+  // 6     5.203604529(1) 
+  // 7     6.183728249(1) 
+  // 8     7.167859805(1) 
+  // 9     8.155091868(1) 
+  // 10    9.144788116(1)
+
+  for(int iw=1; iw<=10; iw++){
+      std::cout << GridLogMessage << iw << 'x' << iw << " Wilson loop" << std::endl;
+      std::cout << GridLogMessage << "-2*log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl;
+      std::cout << GridLogMessage << "-2*log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl;
+      std::cout << GridLogMessage << "-2*log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl;
+  }
+
+  // epilogue
+  std::cout << GridLogMessage << "Grid is finalizing now" << std::endl;
+  Grid_finalize();
+  
+  return EXIT_SUCCESS;
+}
diff --git a/tests/solver/Test_multigrid_common.h b/tests/solver/Test_multigrid_common.h
new file mode 100644
index 00000000..add833f2
--- /dev/null
+++ b/tests/solver/Test_multigrid_common.h
@@ -0,0 +1,670 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/solver/Test_multigrid_common.h
+
+    Copyright (C) 2015-2018
+
+    Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_TEST_MULTIGRID_COMMON_H
+#define GRID_TEST_MULTIGRID_COMMON_H
+
+namespace Grid {
+
+// TODO: Can think about having one parameter struct per level and then a
+// vector of these structs. How well would that work together with the
+// serialization strategy of Grid?
+
+// clang-format off
+struct MultiGridParams : Serializable {
+public:
+  GRID_SERIALIZABLE_CLASS_MEMBERS(MultiGridParams,
+                                  int,                           nLevels,
+                                  std::vector<std::vector<int>>, blockSizes,           // size == nLevels - 1
+                                  std::vector<double>,           smootherTol,          // size == nLevels - 1
+                                  std::vector<int>,              smootherMaxOuterIter, // size == nLevels - 1
+                                  std::vector<int>,              smootherMaxInnerIter, // size == nLevels - 1
+                                  bool,                          kCycle,
+                                  std::vector<double>,           kCycleTol,            // size == nLevels - 1
+                                  std::vector<int>,              kCycleMaxOuterIter,   // size == nLevels - 1
+                                  std::vector<int>,              kCycleMaxInnerIter,   // size == nLevels - 1
+                                  double,                        coarseSolverTol,
+                                  int,                           coarseSolverMaxOuterIter,
+                                  int,                           coarseSolverMaxInnerIter);
+
+  // constructor with default values
+  MultiGridParams(int                           _nLevels                  = 2,
+                  std::vector<std::vector<int>> _blockSizes               = {{4, 4, 4, 4}},
+                  std::vector<double>           _smootherTol              = {1e-14},
+                  std::vector<int>              _smootherMaxOuterIter     = {4},
+                  std::vector<int>              _smootherMaxInnerIter     = {4},
+                  bool                          _kCycle                   = true,
+                  std::vector<double>           _kCycleTol                = {1e-1},
+                  std::vector<int>              _kCycleMaxOuterIter       = {2},
+                  std::vector<int>              _kCycleMaxInnerIter       = {5},
+                  double                        _coarseSolverTol          = 5e-2,
+                  int                           _coarseSolverMaxOuterIter = 10,
+                  int                           _coarseSolverMaxInnerIter = 500)
+  : nLevels(_nLevels)
+  , blockSizes(_blockSizes)
+  , smootherTol(_smootherTol)
+  , smootherMaxOuterIter(_smootherMaxOuterIter)
+  , smootherMaxInnerIter(_smootherMaxInnerIter)
+  , kCycle(_kCycle)
+  , kCycleTol(_kCycleTol)
+  , kCycleMaxOuterIter(_kCycleMaxOuterIter)
+  , kCycleMaxInnerIter(_kCycleMaxInnerIter)
+  , coarseSolverTol(_coarseSolverTol)
+  , coarseSolverMaxOuterIter(_coarseSolverMaxOuterIter)
+  , coarseSolverMaxInnerIter(_coarseSolverMaxInnerIter)
+  {}
+};
+// clang-format on
+
+void checkParameterValidity(MultiGridParams const &params) {
+
+  auto correctSize = params.nLevels - 1;
+
+  assert(correctSize == params.blockSizes.size());
+  assert(correctSize == params.smootherTol.size());
+  assert(correctSize == params.smootherMaxOuterIter.size());
+  assert(correctSize == params.smootherMaxInnerIter.size());
+  assert(correctSize == params.kCycleTol.size());
+  assert(correctSize == params.kCycleMaxOuterIter.size());
+  assert(correctSize == params.kCycleMaxInnerIter.size());
+}
+
+struct LevelInfo {
+public:
+  std::vector<std::vector<int>> Seeds;
+  std::vector<GridCartesian *>  Grids;
+  std::vector<GridParallelRNG>  PRNGs;
+
+  LevelInfo(GridCartesian *FineGrid, MultiGridParams const &mgParams) {
+
+    auto nCoarseLevels = mgParams.blockSizes.size();
+
+    assert(nCoarseLevels == mgParams.nLevels - 1);
+
+    // set up values for finest grid
+    Grids.push_back(FineGrid);
+    Seeds.push_back({1, 2, 3, 4});
+    PRNGs.push_back(GridParallelRNG(Grids.back()));
+    PRNGs.back().SeedFixedIntegers(Seeds.back());
+
+    // set up values for coarser grids
+    for(int level = 1; level < mgParams.nLevels; ++level) {
+      auto Nd  = Grids[level - 1]->_ndimension;
+      auto tmp = Grids[level - 1]->_fdimensions;
+      assert(tmp.size() == Nd);
+
+      Seeds.push_back(std::vector<int>(Nd));
+
+      for(int d = 0; d < Nd; ++d) {
+        tmp[d] /= mgParams.blockSizes[level - 1][d];
+        Seeds[level][d] = (level)*Nd + d + 1;
+      }
+
+      Grids.push_back(QCD::SpaceTimeGrid::makeFourDimGrid(tmp, Grids[level - 1]->_simd_layout, GridDefaultMpi()));
+      PRNGs.push_back(GridParallelRNG(Grids[level]));
+
+      PRNGs[level].SeedFixedIntegers(Seeds[level]);
+    }
+
+    std::cout << GridLogMessage << "Constructed " << mgParams.nLevels << " levels" << std::endl;
+
+    for(int level = 0; level < mgParams.nLevels; ++level) {
+      std::cout << GridLogMessage << "level = " << level << ":" << std::endl;
+      Grids[level]->show_decomposition();
+    }
+  }
+};
+
+template<class Field> class MultiGridPreconditionerBase : public LinearFunction<Field> {
+public:
+  virtual ~MultiGridPreconditionerBase()               = default;
+  virtual void setup()                                 = 0;
+  virtual void operator()(Field const &in, Field &out) = 0;
+  virtual void runChecks(RealD tolerance)              = 0;
+  virtual void reportTimings()                         = 0;
+};
+
+template<class Fobj, class CComplex, int nBasis, int nCoarserLevels, class Matrix>
+class MultiGridPreconditioner : public MultiGridPreconditionerBase<Lattice<Fobj>> {
+public:
+  /////////////////////////////////////////////
+  // Type Definitions
+  /////////////////////////////////////////////
+
+  // clang-format off
+  typedef Aggregation<Fobj, CComplex, nBasis>                                                                         Aggregates;
+  typedef CoarsenedMatrix<Fobj, CComplex, nBasis>                                                                     CoarseDiracMatrix;
+  typedef typename Aggregates::CoarseVector                                                                           CoarseVector;
+  typedef typename Aggregates::siteVector                                                                             CoarseSiteVector;
+  typedef Matrix                                                                                                      FineDiracMatrix;
+  typedef typename Aggregates::FineField                                                                              FineVector;
+  typedef MultiGridPreconditioner<CoarseSiteVector, iScalar<CComplex>, nBasis, nCoarserLevels - 1, CoarseDiracMatrix> NextPreconditionerLevel;
+  // clang-format on
+
+  /////////////////////////////////////////////
+  // Member Data
+  /////////////////////////////////////////////
+
+  int _CurrentLevel;
+  int _NextCoarserLevel;
+
+  MultiGridParams &_MultiGridParams;
+  LevelInfo &      _LevelInfo;
+
+  FineDiracMatrix & _FineMatrix;
+  FineDiracMatrix & _SmootherMatrix;
+  Aggregates        _Aggregates;
+  CoarseDiracMatrix _CoarseMatrix;
+
+  std::unique_ptr<NextPreconditionerLevel> _NextPreconditionerLevel;
+
+  GridStopWatch _SetupTotalTimer;
+  GridStopWatch _SetupCreateSubspaceTimer;
+  GridStopWatch _SetupProjectToChiralitiesTimer;
+  GridStopWatch _SetupCoarsenOperatorTimer;
+  GridStopWatch _SetupNextLevelTimer;
+  GridStopWatch _SolveTotalTimer;
+  GridStopWatch _SolveRestrictionTimer;
+  GridStopWatch _SolveProlongationTimer;
+  GridStopWatch _SolveSmootherTimer;
+  GridStopWatch _SolveNextLevelTimer;
+
+  /////////////////////////////////////////////
+  // Member Functions
+  /////////////////////////////////////////////
+
+  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineDiracMatrix &FineMat, FineDiracMatrix &SmootherMat)
+    : _CurrentLevel(mgParams.nLevels - (nCoarserLevels + 1)) // _Level = 0 corresponds to finest
+    , _NextCoarserLevel(_CurrentLevel + 1)                   // incremented for instances on coarser levels
+    , _MultiGridParams(mgParams)
+    , _LevelInfo(LvlInfo)
+    , _FineMatrix(FineMat)
+    , _SmootherMatrix(SmootherMat)
+    , _Aggregates(_LevelInfo.Grids[_NextCoarserLevel], _LevelInfo.Grids[_CurrentLevel], 0)
+    , _CoarseMatrix(*_LevelInfo.Grids[_NextCoarserLevel]) {
+
+    _NextPreconditionerLevel
+      = std::unique_ptr<NextPreconditionerLevel>(new NextPreconditionerLevel(_MultiGridParams, _LevelInfo, _CoarseMatrix, _CoarseMatrix));
+
+    resetTimers();
+  }
+
+  void setup() {
+
+    _SetupTotalTimer.Start();
+
+    static_assert((nBasis & 0x1) == 0, "MG Preconditioner only supports an even number of basis vectors");
+    int nb = nBasis / 2;
+
+    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
+
+    _SetupCreateSubspaceTimer.Start();
+    _Aggregates.CreateSubspace(_LevelInfo.PRNGs[_CurrentLevel], fineMdagMOp, nb);
+    _SetupCreateSubspaceTimer.Stop();
+
+    _SetupProjectToChiralitiesTimer.Start();
+    FineVector tmp1(_Aggregates.subspace[0]._grid);
+    FineVector tmp2(_Aggregates.subspace[0]._grid);
+    for(int n = 0; n < nb; n++) {
+      auto tmp1 = _Aggregates.subspace[n];
+      G5C(tmp2, _Aggregates.subspace[n]);
+      axpby(_Aggregates.subspace[n], 0.5, 0.5, tmp1, tmp2);
+      axpby(_Aggregates.subspace[n + nb], 0.5, -0.5, tmp1, tmp2);
+      std::cout << GridLogMG << " Level " << _CurrentLevel << ": Chirally doubled vector " << n << ". "
+                << "norm2(vec[" << n << "]) = " << norm2(_Aggregates.subspace[n]) << ". "
+                << "norm2(vec[" << n + nb << "]) = " << norm2(_Aggregates.subspace[n + nb]) << std::endl;
+    }
+    _SetupProjectToChiralitiesTimer.Stop();
+
+    _SetupCoarsenOperatorTimer.Start();
+    _CoarseMatrix.CoarsenOperator(_LevelInfo.Grids[_CurrentLevel], fineMdagMOp, _Aggregates);
+    _SetupCoarsenOperatorTimer.Stop();
+
+    _SetupNextLevelTimer.Start();
+    _NextPreconditionerLevel->setup();
+    _SetupNextLevelTimer.Stop();
+
+    _SetupTotalTimer.Stop();
+  }
+
+  virtual void operator()(FineVector const &in, FineVector &out) {
+
+    conformable(_LevelInfo.Grids[_CurrentLevel], in._grid);
+    conformable(in, out);
+
+    // TODO: implement a W-cycle
+    if(_MultiGridParams.kCycle)
+      kCycle(in, out);
+    else
+      vCycle(in, out);
+  }
+
+  void vCycle(FineVector const &in, FineVector &out) {
+
+    _SolveTotalTimer.Start();
+
+    RealD inputNorm = norm2(in);
+
+    CoarseVector coarseSrc(_LevelInfo.Grids[_NextCoarserLevel]);
+    CoarseVector coarseSol(_LevelInfo.Grids[_NextCoarserLevel]);
+    coarseSol = zero;
+
+    FineVector fineTmp(in._grid);
+
+    auto maxSmootherIter = _MultiGridParams.smootherMaxOuterIter[_CurrentLevel] * _MultiGridParams.smootherMaxInnerIter[_CurrentLevel];
+
+    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
+    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(_MultiGridParams.smootherTol[_CurrentLevel],
+                                                              maxSmootherIter,
+                                                              fineTrivialPreconditioner,
+                                                              _MultiGridParams.smootherMaxInnerIter[_CurrentLevel],
+                                                              false);
+
+    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<FineDiracMatrix, FineVector> fineSmootherMdagMOp(_SmootherMatrix);
+
+    _SolveRestrictionTimer.Start();
+    _Aggregates.ProjectToSubspace(coarseSrc, in);
+    _SolveRestrictionTimer.Stop();
+
+    _SolveNextLevelTimer.Start();
+    (*_NextPreconditionerLevel)(coarseSrc, coarseSol);
+    _SolveNextLevelTimer.Stop();
+
+    _SolveProlongationTimer.Start();
+    _Aggregates.PromoteFromSubspace(coarseSol, out);
+    _SolveProlongationTimer.Stop();
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                                = in - fineTmp;
+    auto r                                 = norm2(fineTmp);
+    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
+
+    _SolveSmootherTimer.Start();
+    fineFGMRES(fineSmootherMdagMOp, in, out);
+    _SolveSmootherTimer.Stop();
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                        = in - fineTmp;
+    r                              = norm2(fineTmp);
+    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": V-cycle: Input norm = " << std::sqrt(inputNorm)
+              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
+              << std::endl;
+
+    _SolveTotalTimer.Stop();
+  }
+
+  void kCycle(FineVector const &in, FineVector &out) {
+
+    _SolveTotalTimer.Start();
+
+    RealD inputNorm = norm2(in);
+
+    CoarseVector coarseSrc(_LevelInfo.Grids[_NextCoarserLevel]);
+    CoarseVector coarseSol(_LevelInfo.Grids[_NextCoarserLevel]);
+    coarseSol = zero;
+
+    FineVector fineTmp(in._grid);
+
+    auto smootherMaxIter = _MultiGridParams.smootherMaxOuterIter[_CurrentLevel] * _MultiGridParams.smootherMaxInnerIter[_CurrentLevel];
+    auto kCycleMaxIter   = _MultiGridParams.kCycleMaxOuterIter[_CurrentLevel] * _MultiGridParams.kCycleMaxInnerIter[_CurrentLevel];
+
+    TrivialPrecon<FineVector>                        fineTrivialPreconditioner;
+    FlexibleGeneralisedMinimalResidual<FineVector>   fineFGMRES(_MultiGridParams.smootherTol[_CurrentLevel],
+                                                              smootherMaxIter,
+                                                              fineTrivialPreconditioner,
+                                                              _MultiGridParams.smootherMaxInnerIter[_CurrentLevel],
+                                                              false);
+    FlexibleGeneralisedMinimalResidual<CoarseVector> coarseFGMRES(_MultiGridParams.kCycleTol[_CurrentLevel],
+                                                                  kCycleMaxIter,
+                                                                  *_NextPreconditionerLevel,
+                                                                  _MultiGridParams.kCycleMaxInnerIter[_CurrentLevel],
+                                                                  false);
+
+    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineSmootherMdagMOp(_SmootherMatrix);
+    MdagMLinearOperator<CoarseDiracMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
+
+    _SolveRestrictionTimer.Start();
+    _Aggregates.ProjectToSubspace(coarseSrc, in);
+    _SolveRestrictionTimer.Stop();
+
+    _SolveNextLevelTimer.Start();
+    coarseFGMRES(coarseMdagMOp, coarseSrc, coarseSol);
+    _SolveNextLevelTimer.Stop();
+
+    _SolveProlongationTimer.Start();
+    _Aggregates.PromoteFromSubspace(coarseSol, out);
+    _SolveProlongationTimer.Stop();
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                                = in - fineTmp;
+    auto r                                 = norm2(fineTmp);
+    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
+
+    _SolveSmootherTimer.Start();
+    fineFGMRES(fineSmootherMdagMOp, in, out);
+    _SolveSmootherTimer.Stop();
+
+    fineMdagMOp.Op(out, fineTmp);
+    fineTmp                        = in - fineTmp;
+    r                              = norm2(fineTmp);
+    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": K-cycle: Input norm = " << std::sqrt(inputNorm)
+              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
+              << std::endl;
+
+    _SolveTotalTimer.Stop();
+  }
+
+  void runChecks(RealD tolerance) {
+
+    std::vector<FineVector>   fineTmps(7, _LevelInfo.Grids[_CurrentLevel]);
+    std::vector<CoarseVector> coarseTmps(4, _LevelInfo.Grids[_NextCoarserLevel]);
+
+    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineMdagMOp(_FineMatrix);
+    MdagMLinearOperator<CoarseDiracMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (M - (Mdiag + Σ_μ Mdir_μ)) * v" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+
+    random(_LevelInfo.PRNGs[_CurrentLevel], fineTmps[0]);
+
+    fineMdagMOp.Op(fineTmps[0], fineTmps[1]);     //     M * v
+    fineMdagMOp.OpDiag(fineTmps[0], fineTmps[2]); // Mdiag * v
+
+    fineTmps[4] = zero;
+    for(int dir = 0; dir < 4; dir++) { //       Σ_μ Mdir_μ * v
+      for(auto disp : {+1, -1}) {
+        fineMdagMOp.OpDir(fineTmps[0], fineTmps[3], dir, disp);
+        fineTmps[4] = fineTmps[4] + fineTmps[3];
+      }
+    }
+
+    fineTmps[5] = fineTmps[2] + fineTmps[4]; // (Mdiag + Σ_μ Mdir_μ) * v
+
+    fineTmps[6]    = fineTmps[1] - fineTmps[5];
+    auto deviation = std::sqrt(norm2(fineTmps[6]) / norm2(fineTmps[1]));
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(M * v)                    = " << norm2(fineTmps[1]) << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(Mdiag * v)                = " << norm2(fineTmps[2]) << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(Σ_μ Mdir_μ * v)           = " << norm2(fineTmps[4]) << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2((Mdiag + Σ_μ Mdir_μ) * v) = " << norm2(fineTmps[5]) << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": relative deviation              = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (1 - P R) v" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+
+    for(auto i = 0; i < _Aggregates.subspace.size(); ++i) {
+      _Aggregates.ProjectToSubspace(coarseTmps[0], _Aggregates.subspace[i]); //   R v_i
+      _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]);           // P R v_i
+
+      fineTmps[1] = _Aggregates.subspace[i] - fineTmps[0]; // v_i - P R v_i
+      deviation   = std::sqrt(norm2(fineTmps[1]) / norm2(_Aggregates.subspace[i]));
+
+      std::cout << GridLogMG << " Level " << _CurrentLevel << ": Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
+                << " | norm2(R v_i) = " << norm2(coarseTmps[0]) << " | norm2(P R v_i) = " << norm2(fineTmps[0])
+                << " | relative deviation = " << deviation;
+
+      if(deviation > tolerance) {
+        std::cout << " > " << tolerance << " -> check failed" << std::endl;
+        abort();
+      } else {
+        std::cout << " < " << tolerance << " -> check passed" << std::endl;
+      }
+    }
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (1 - R P) v_c" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+
+    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
+
+    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //   P v_c
+    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[0]);   // R P v_c
+
+    coarseTmps[2] = coarseTmps[0] - coarseTmps[1]; // v_c - R P v_c
+    deviation     = std::sqrt(norm2(coarseTmps[2]) / norm2(coarseTmps[0]));
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(v_c) = " << norm2(coarseTmps[0])
+              << " | norm2(R P v_c) = " << norm2(coarseTmps[1]) << " | norm2(P v_c) = " << norm2(fineTmps[0])
+              << " | relative deviation = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+
+    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
+
+    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //     P v_c
+    fineMdagMOp.Op(fineTmps[0], fineTmps[1]);                    //   D P v_c
+    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[1]);   // R D P v_c
+
+    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[2]); // D_c v_c
+
+    coarseTmps[3] = coarseTmps[1] - coarseTmps[2]; // R D P v_c - D_c v_c
+    deviation     = std::sqrt(norm2(coarseTmps[3]) / norm2(coarseTmps[1]));
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(R D P v_c) = " << norm2(coarseTmps[1])
+              << " | norm2(D_c v_c) = " << norm2(coarseTmps[2]) << " | relative deviation = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
+
+    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
+
+    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[1]);    //         D_c v_c
+    coarseMdagMOp.AdjOp(coarseTmps[1], coarseTmps[2]); // D_c^dag D_c v_c
+
+    auto dot  = innerProduct(coarseTmps[0], coarseTmps[2]); //v_c^dag D_c^dag D_c v_c
+    deviation = std::abs(imag(dot)) / std::abs(real(dot));
+
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Re(v_c^dag D_c^dag D_c v_c) = " << real(dot)
+              << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot) << " | relative deviation = " << deviation;
+
+    if(deviation > tolerance) {
+      std::cout << " > " << tolerance << " -> check failed" << std::endl;
+      abort();
+    } else {
+      std::cout << " < " << tolerance << " -> check passed" << std::endl;
+    }
+
+    _NextPreconditionerLevel->runChecks(tolerance);
+  }
+
+  void reportTimings() {
+
+    // clang-format off
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Sum   total            " <<                _SetupTotalTimer.Elapsed() + _SolveTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup total            " <<                _SetupTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup create subspace  " <<       _SetupCreateSubspaceTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup project chiral   " << _SetupProjectToChiralitiesTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup coarsen operator " <<      _SetupCoarsenOperatorTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup next level       " <<            _SetupNextLevelTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total            " <<                _SolveTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve restriction      " <<          _SolveRestrictionTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve prolongation     " <<         _SolveProlongationTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother         " <<             _SolveSmootherTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve next level       " <<            _SolveNextLevelTimer.Elapsed() << std::endl;
+    // clang-format on
+
+    _NextPreconditionerLevel->reportTimings();
+  }
+
+  void resetTimers() {
+
+    _SetupTotalTimer.Reset();
+    _SetupCreateSubspaceTimer.Reset();
+    _SetupProjectToChiralitiesTimer.Reset();
+    _SetupCoarsenOperatorTimer.Reset();
+    _SetupNextLevelTimer.Reset();
+    _SolveTotalTimer.Reset();
+    _SolveRestrictionTimer.Reset();
+    _SolveProlongationTimer.Reset();
+    _SolveSmootherTimer.Reset();
+    _SolveNextLevelTimer.Reset();
+
+    _NextPreconditionerLevel->resetTimers();
+  }
+};
+
+// Specialization for the coarsest level
+template<class Fobj, class CComplex, int nBasis, class Matrix>
+class MultiGridPreconditioner<Fobj, CComplex, nBasis, 0, Matrix> : public MultiGridPreconditionerBase<Lattice<Fobj>> {
+public:
+  /////////////////////////////////////////////
+  // Type Definitions
+  /////////////////////////////////////////////
+
+  typedef Matrix        FineDiracMatrix;
+  typedef Lattice<Fobj> FineVector;
+
+  /////////////////////////////////////////////
+  // Member Data
+  /////////////////////////////////////////////
+
+  int _CurrentLevel;
+
+  MultiGridParams &_MultiGridParams;
+  LevelInfo &      _LevelInfo;
+
+  FineDiracMatrix &_FineMatrix;
+  FineDiracMatrix &_SmootherMatrix;
+
+  GridStopWatch _SolveTotalTimer;
+  GridStopWatch _SolveSmootherTimer;
+
+  /////////////////////////////////////////////
+  // Member Functions
+  /////////////////////////////////////////////
+
+  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineDiracMatrix &FineMat, FineDiracMatrix &SmootherMat)
+    : _CurrentLevel(mgParams.nLevels - (0 + 1))
+    , _MultiGridParams(mgParams)
+    , _LevelInfo(LvlInfo)
+    , _FineMatrix(FineMat)
+    , _SmootherMatrix(SmootherMat) {
+
+    resetTimers();
+  }
+
+  void setup() {}
+
+  virtual void operator()(FineVector const &in, FineVector &out) {
+
+    _SolveTotalTimer.Start();
+
+    conformable(_LevelInfo.Grids[_CurrentLevel], in._grid);
+    conformable(in, out);
+
+    auto coarseSolverMaxIter = _MultiGridParams.coarseSolverMaxOuterIter * _MultiGridParams.coarseSolverMaxInnerIter;
+
+    // On the coarsest level we only have what I above call the fine level, no coarse one
+    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
+    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(
+      _MultiGridParams.coarseSolverTol, coarseSolverMaxIter, fineTrivialPreconditioner, _MultiGridParams.coarseSolverMaxInnerIter, false);
+
+    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
+
+    _SolveSmootherTimer.Start();
+    fineFGMRES(fineMdagMOp, in, out);
+    _SolveSmootherTimer.Stop();
+
+    _SolveTotalTimer.Stop();
+  }
+
+  void runChecks(RealD tolerance) {}
+
+  void reportTimings() {
+
+    // clang-format off
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total            " <<    _SolveTotalTimer.Elapsed() << std::endl;
+    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother         " << _SolveSmootherTimer.Elapsed() << std::endl;
+    // clang-format on
+  }
+
+  void resetTimers() {
+
+    _SolveTotalTimer.Reset();
+    _SolveSmootherTimer.Reset();
+  }
+};
+
+template<class Fobj, class CComplex, int nBasis, int nLevels, class Matrix>
+using NLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CComplex, nBasis, nLevels - 1, Matrix>;
+
+template<class Fobj, class CComplex, int nBasis, class Matrix>
+std::unique_ptr<MultiGridPreconditionerBase<Lattice<Fobj>>>
+createMGInstance(MultiGridParams &mgParams, LevelInfo &levelInfo, Matrix &FineMat, Matrix &SmootherMat) {
+
+#define CASE_FOR_N_LEVELS(nLevels)                                                                                     \
+  case nLevels:                                                                                                        \
+    return std::unique_ptr<NLevelMGPreconditioner<Fobj, CComplex, nBasis, nLevels, Matrix>>(                           \
+      new NLevelMGPreconditioner<Fobj, CComplex, nBasis, nLevels, Matrix>(mgParams, levelInfo, FineMat, SmootherMat)); \
+    break;
+
+  switch(mgParams.nLevels) {
+    CASE_FOR_N_LEVELS(2);
+    CASE_FOR_N_LEVELS(3);
+    CASE_FOR_N_LEVELS(4);
+    default:
+      std::cout << GridLogError << "We currently only support nLevels ∈ {2, 3, 4}" << std::endl;
+      exit(EXIT_FAILURE);
+      break;
+  }
+#undef CASE_FOR_N_LEVELS
+}
+
+}
+#endif
diff --git a/tests/solver/Test_staggered_cagmres_unprec.cc b/tests/solver/Test_staggered_cagmres_unprec.cc
new file mode 100644
index 00000000..b82ecaeb
--- /dev/null
+++ b/tests/solver/Test_staggered_cagmres_unprec.cc
@@ -0,0 +1,72 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_staggered_cagmres_unprec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
+  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
+  typename ImprovedStaggeredFermionR::ImplParams params;
+
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  RealD c1=9.0/8.0;
+  RealD c2=-1.0/24.0;
+  RealD u0=1.0;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
+
+  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
+  CommunicationAvoidingGeneralisedMinimalResidual<FermionField> CAGMRES(1.0e-8, 10000, 25);
+  CAGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_staggered_fcagmres_prec.cc b/tests/solver/Test_staggered_fcagmres_prec.cc
new file mode 100644
index 00000000..7685585b
--- /dev/null
+++ b/tests/solver/Test_staggered_fcagmres_prec.cc
@@ -0,0 +1,75 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_staggered_fcagmres_prec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
+  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
+  typename ImprovedStaggeredFermionR::ImplParams params;
+
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  RealD c1=9.0/8.0;
+  RealD c2=-1.0/24.0;
+  RealD u0=1.0;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
+
+  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
+
+  TrivialPrecon<FermionField> simple;
+
+  FlexibleCommunicationAvoidingGeneralisedMinimalResidual<FermionField> FCAGMRES(1.0e-8, 10000, simple, 25);
+  FCAGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_staggered_fgmres_prec.cc b/tests/solver/Test_staggered_fgmres_prec.cc
new file mode 100644
index 00000000..30905e35
--- /dev/null
+++ b/tests/solver/Test_staggered_fgmres_prec.cc
@@ -0,0 +1,75 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_staggered_fgmres_prec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
+  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
+  typename ImprovedStaggeredFermionR::ImplParams params;
+
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  RealD c1=9.0/8.0;
+  RealD c2=-1.0/24.0;
+  RealD u0=1.0;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
+
+  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
+
+  TrivialPrecon<FermionField> simple;
+
+  FlexibleGeneralisedMinimalResidual<FermionField> FGMRES(1.0e-8, 10000, simple, 25);
+  FGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_staggered_gmres_unprec.cc b/tests/solver/Test_staggered_gmres_unprec.cc
new file mode 100644
index 00000000..d65b0b31
--- /dev/null
+++ b/tests/solver/Test_staggered_gmres_unprec.cc
@@ -0,0 +1,72 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_staggered_gmres_unprec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
+  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
+  typename ImprovedStaggeredFermionR::ImplParams params;
+
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  RealD c1=9.0/8.0;
+  RealD c2=-1.0/24.0;
+  RealD u0=1.0;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
+
+  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
+  GeneralisedMinimalResidual<FermionField> GMRES(1.0e-8, 10000, 25);
+  GMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_staggered_mr_unprec.cc b/tests/solver/Test_staggered_mr_unprec.cc
new file mode 100644
index 00000000..ca60edb4
--- /dev/null
+++ b/tests/solver/Test_staggered_mr_unprec.cc
@@ -0,0 +1,72 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_staggered_mr_unprec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
+  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
+  typename ImprovedStaggeredFermionR::ImplParams params;
+
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  RealD c1=9.0/8.0;
+  RealD c2=-1.0/24.0;
+  RealD u0=1.0;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
+
+  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
+  MinimalResidual<FermionField> MR(1.0e-8,10000,0.8);
+  MR(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilson_cagmres_unprec.cc b/tests/solver/Test_wilson_cagmres_unprec.cc
new file mode 100644
index 00000000..46f9e6a6
--- /dev/null
+++ b/tests/solver/Test_wilson_cagmres_unprec.cc
@@ -0,0 +1,65 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilson_cagmres_unprec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeFermion src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  LatticeFermion result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
+  CommunicationAvoidingGeneralisedMinimalResidual<LatticeFermion> CAGMRES(1.0e-8, 10000, 25);
+  CAGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilson_fcagmres_prec.cc b/tests/solver/Test_wilson_fcagmres_prec.cc
new file mode 100644
index 00000000..f802984f
--- /dev/null
+++ b/tests/solver/Test_wilson_fcagmres_prec.cc
@@ -0,0 +1,68 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilson_fcagmres_prec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeFermion src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  LatticeFermion result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
+
+  TrivialPrecon<LatticeFermion> simple;
+
+  FlexibleCommunicationAvoidingGeneralisedMinimalResidual<LatticeFermion> FCAGMRES(1.0e-8, 10000, simple, 25);
+  FCAGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilson_fgmres_prec.cc b/tests/solver/Test_wilson_fgmres_prec.cc
new file mode 100644
index 00000000..f55516da
--- /dev/null
+++ b/tests/solver/Test_wilson_fgmres_prec.cc
@@ -0,0 +1,68 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilson_fgmres_prec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeFermion src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  LatticeFermion result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
+
+  TrivialPrecon<LatticeFermion> simple;
+
+  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMRES(1.0e-8, 10000, simple, 25);
+  FGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilson_gmres_unprec.cc b/tests/solver/Test_wilson_gmres_unprec.cc
new file mode 100644
index 00000000..443f7ebc
--- /dev/null
+++ b/tests/solver/Test_wilson_gmres_unprec.cc
@@ -0,0 +1,65 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilson_gmres_unprec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeFermion src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  LatticeFermion result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
+  GeneralisedMinimalResidual<LatticeFermion> GMRES(1.0e-8, 10000, 25);
+  GMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc
new file mode 100644
index 00000000..1609c1fc
--- /dev/null
+++ b/tests/solver/Test_wilson_mg.cc
@@ -0,0 +1,114 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/solver/Test_wilson_mg.cc
+
+    Copyright (C) 2015-2018
+
+    Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+/*  END LEGAL */
+
+#include <Grid/Grid.h>
+#include <Test_multigrid_common.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main(int argc, char **argv) {
+
+  Grid_init(&argc, &argv);
+
+  GridCartesian *        FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
+
+  std::vector<int> fSeeds({1, 2, 3, 4});
+  GridParallelRNG  fPRNG(FGrid);
+  fPRNG.SeedFixedIntegers(fSeeds);
+
+  // clang-format off
+  LatticeFermion    src(FGrid); gaussian(fPRNG, src);
+  LatticeFermion result(FGrid); result = zero;
+  LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
+  // clang-format on
+
+  RealD mass = -0.25;
+
+  MultiGridParams mgParams;
+  std::string     inputXml{"./mg_params.xml"};
+
+  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
+    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
+    assert(inputXml.length() != 0);
+  }
+
+  {
+    XmlWriter writer("mg_params_template.xml");
+    write(writer, "Params", mgParams);
+    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
+
+    XmlReader reader(inputXml);
+    read(reader, "Params", mgParams);
+    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
+  }
+
+  checkParameterValidity(mgParams);
+  std::cout << mgParams << std::endl;
+
+  LevelInfo levelInfo(FGrid, mgParams);
+
+  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
+  const int nbasis = 40;
+
+  WilsonFermionR Dw(Umu, *FGrid, *FrbGrid, mass);
+
+  MdagMLinearOperator<WilsonFermionR, LatticeFermion> MdagMOpDw(Dw);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing Multigrid for Wilson" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  TrivialPrecon<LatticeFermion> TrivialPrecon;
+  auto MGPreconDw = createMGInstance<vSpinColourVector, vTComplex, nbasis, WilsonFermionR>(mgParams, levelInfo, Dw, Dw);
+
+  MGPreconDw->setup();
+
+  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+    RealD toleranceForMGChecks = (getPrecision<LatticeFermion>::value == 1) ? 1e-6 : 1e-13;
+    MGPreconDw->runChecks(toleranceForMGChecks);
+  }
+
+  std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDw;
+
+  solversDw.emplace_back(new ConjugateGradient<LatticeFermion>(1.0e-12, 50000, false));
+  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 100, false));
+  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, *MGPreconDw, 100, false));
+
+  for(auto const &solver : solversDw) {
+    std::cout << std::endl << "Starting with a new solver" << std::endl;
+    result = zero;
+    (*solver)(MdagMOpDw, src, result);
+  }
+
+  MGPreconDw->reportTimings();
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilson_mg_mp.cc b/tests/solver/Test_wilson_mg_mp.cc
new file mode 100644
index 00000000..0cd51227
--- /dev/null
+++ b/tests/solver/Test_wilson_mg_mp.cc
@@ -0,0 +1,166 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/solver/Test_wilson_mg_mp.cc
+
+    Copyright (C) 2015-2018
+
+    Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+/*  END LEGAL */
+
+#include <Grid/Grid.h>
+#include <Test_multigrid_common.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main(int argc, char **argv) {
+
+  Grid_init(&argc, &argv);
+
+  // clang-format off
+  GridCartesian         *FGrid_d   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi());
+  GridCartesian         *FGrid_f   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian *FrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_d);
+  GridRedBlackCartesian *FrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_f);
+  // clang-format on
+
+  std::vector<int> fSeeds({1, 2, 3, 4});
+  GridParallelRNG  fPRNG(FGrid_d);
+  fPRNG.SeedFixedIntegers(fSeeds);
+
+  // clang-format off
+  LatticeFermionD       src_d(FGrid_d); gaussian(fPRNG, src_d);
+  LatticeFermionD resultMGD_d(FGrid_d); resultMGD_d = zero;
+  LatticeFermionD resultMGF_d(FGrid_d); resultMGF_d = zero;
+  LatticeGaugeFieldD    Umu_d(FGrid_d); SU3::HotConfiguration(fPRNG, Umu_d);
+  LatticeGaugeFieldF    Umu_f(FGrid_f); precisionChange(Umu_f, Umu_d);
+  // clang-format on
+
+  RealD mass = -0.25;
+
+  MultiGridParams mgParams;
+  std::string     inputXml{"./mg_params.xml"};
+
+  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
+    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
+    assert(inputXml.length() != 0);
+  }
+
+  {
+    XmlWriter writer("mg_params_template.xml");
+    write(writer, "Params", mgParams);
+    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
+
+    XmlReader reader(inputXml);
+    read(reader, "Params", mgParams);
+    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
+  }
+
+  checkParameterValidity(mgParams);
+  std::cout << mgParams << std::endl;
+
+  LevelInfo levelInfo_d(FGrid_d, mgParams);
+  LevelInfo levelInfo_f(FGrid_f, mgParams);
+
+  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
+  const int nbasis = 40;
+
+  WilsonFermionD Dw_d(Umu_d, *FGrid_d, *FrbGrid_d, mass);
+  WilsonFermionF Dw_f(Umu_f, *FGrid_f, *FrbGrid_f, mass);
+
+  MdagMLinearOperator<WilsonFermionD, LatticeFermionD> MdagMOpDw_d(Dw_d);
+  MdagMLinearOperator<WilsonFermionF, LatticeFermionF> MdagMOpDw_f(Dw_f);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing single-precision Multigrid for Wilson" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  auto MGPreconDw_f = createMGInstance<vSpinColourVectorF, vTComplexF, nbasis, WilsonFermionF>(mgParams, levelInfo_f, Dw_f, Dw_f);
+
+  MGPreconDw_f->setup();
+
+  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+    MGPreconDw_f->runChecks(1e-6);
+  }
+
+  MixedPrecisionFlexibleGeneralisedMinimalResidual<LatticeFermionD, LatticeFermionF> MPFGMRESPREC(1.0e-12, 50000, FGrid_f, *MGPreconDw_f, 100, false);
+
+  std::cout << std::endl << "Starting with a new solver" << std::endl;
+  MPFGMRESPREC(MdagMOpDw_d, src_d, resultMGF_d);
+
+  MGPreconDw_f->reportTimings();
+
+  if(GridCmdOptionExists(argv, argv + argc, "--docomparison")) {
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "Testing double-precision Multigrid for Wilson" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    auto MGPreconDw_d = createMGInstance<vSpinColourVectorD, vTComplexD, nbasis, WilsonFermionD>(mgParams, levelInfo_d, Dw_d, Dw_d);
+
+    MGPreconDw_d->setup();
+
+    if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+      MGPreconDw_d->runChecks(1e-13);
+    }
+
+    FlexibleGeneralisedMinimalResidual<LatticeFermionD> FGMRESPREC(1.0e-12, 50000, *MGPreconDw_d, 100, false);
+
+    std::cout << std::endl << "Starting with a new solver" << std::endl;
+    FGMRESPREC(MdagMOpDw_d, src_d, resultMGD_d);
+
+    MGPreconDw_d->reportTimings();
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "Comparing single-precision Multigrid with double-precision one for Wilson" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    LatticeFermionD diffFullSolver(FGrid_d);
+
+    RealD deviationFullSolver = axpy_norm(diffFullSolver, -1.0, resultMGF_d, resultMGD_d);
+
+    // clang-format off
+    LatticeFermionF src_f(FGrid_f);    precisionChange(src_f, src_d);
+    LatticeFermionF resMGF_f(FGrid_f); resMGF_f = zero;
+    LatticeFermionD resMGD_d(FGrid_d); resMGD_d = zero;
+    // clang-format on
+
+    (*MGPreconDw_f)(src_f, resMGF_f);
+    (*MGPreconDw_d)(src_d, resMGD_d);
+
+    LatticeFermionD diffOnlyMG(FGrid_d);
+    LatticeFermionD resMGF_d(FGrid_d);
+    precisionChange(resMGF_d, resMGF_f);
+
+    RealD deviationOnlyPrec = axpy_norm(diffOnlyMG, -1.0, resMGF_d, resMGD_d);
+
+    // clang-format off
+    std::cout << GridLogMessage << "Absolute difference between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver                      << std::endl;
+    std::cout << GridLogMessage << "Relative deviation  between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver / norm2(resultMGD_d) << std::endl;
+    std::cout << GridLogMessage << "Absolute difference between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec                        << std::endl;
+    std::cout << GridLogMessage << "Relative deviation  between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec / norm2(resMGD_d)      << std::endl;
+    // clang-format on
+  }
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilson_mr_unprec.cc b/tests/solver/Test_wilson_mr_unprec.cc
new file mode 100644
index 00000000..976130d3
--- /dev/null
+++ b/tests/solver/Test_wilson_mr_unprec.cc
@@ -0,0 +1,65 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilson_mr_unprec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeFermion src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  LatticeFermion result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass=0.5;
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
+
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
+  MinimalResidual<LatticeFermion> MR(1.0e-8,10000,0.8);
+  MR(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_cagmres_unprec.cc b/tests/solver/Test_wilsonclover_cagmres_unprec.cc
new file mode 100644
index 00000000..3ecdf738
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_cagmres_unprec.cc
@@ -0,0 +1,71 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilsonclover_cagmres_unprec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  typedef typename WilsonCloverFermionR::FermionField FermionField;
+  typename WilsonCloverFermionR::ImplParams params;
+  WilsonAnisotropyCoefficients anis;
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass  = 0.5;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
+
+  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
+  CommunicationAvoidingGeneralisedMinimalResidual<FermionField> CAGMRES(1.0e-8, 10000, 25);
+  CAGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_fcagmres_prec.cc b/tests/solver/Test_wilsonclover_fcagmres_prec.cc
new file mode 100644
index 00000000..3cbbfc02
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_fcagmres_prec.cc
@@ -0,0 +1,74 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilsonclover_fcagmres_prec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  typedef typename WilsonCloverFermionR::FermionField FermionField;
+  typename WilsonCloverFermionR::ImplParams params;
+  WilsonAnisotropyCoefficients anis;
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass  = 0.5;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
+
+  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
+
+  TrivialPrecon<FermionField> simple;
+
+  FlexibleCommunicationAvoidingGeneralisedMinimalResidual<FermionField> FCAGMRES(1.0e-8, 10000, simple, 25);
+  FCAGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_fgmres_prec.cc b/tests/solver/Test_wilsonclover_fgmres_prec.cc
new file mode 100644
index 00000000..7ad0fa24
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_fgmres_prec.cc
@@ -0,0 +1,74 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilsonclover_fgmres_prec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  typedef typename WilsonCloverFermionR::FermionField FermionField;
+  typename WilsonCloverFermionR::ImplParams params;
+  WilsonAnisotropyCoefficients anis;
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass  = 0.5;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
+
+  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
+
+  TrivialPrecon<FermionField> simple;
+
+  FlexibleGeneralisedMinimalResidual<FermionField> FGMRES(1.0e-8, 10000, simple, 25);
+  FGMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_gmres_unprec.cc b/tests/solver/Test_wilsonclover_gmres_unprec.cc
new file mode 100644
index 00000000..a9fe7181
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_gmres_unprec.cc
@@ -0,0 +1,71 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilsonclover_gmres_unprec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  typedef typename WilsonCloverFermionR::FermionField FermionField;
+  typename WilsonCloverFermionR::ImplParams params;
+  WilsonAnisotropyCoefficients anis;
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass  = 0.5;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
+
+  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
+  GeneralisedMinimalResidual<FermionField> GMRES(1.0e-8, 10000, 25);
+  GMRES(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc
new file mode 100644
index 00000000..e749aacb
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_mg.cc
@@ -0,0 +1,117 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/solver/Test_wilsonclover_mg.cc
+
+    Copyright (C) 2015-2018
+
+    Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+/*  END LEGAL */
+
+#include <Grid/Grid.h>
+#include <Test_multigrid_common.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main(int argc, char **argv) {
+
+  Grid_init(&argc, &argv);
+
+  GridCartesian *        FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
+
+  std::vector<int> fSeeds({1, 2, 3, 4});
+  GridParallelRNG  fPRNG(FGrid);
+  fPRNG.SeedFixedIntegers(fSeeds);
+
+  // clang-format off
+  LatticeFermion    src(FGrid); gaussian(fPRNG, src);
+  LatticeFermion result(FGrid); result = zero;
+  LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
+  // clang-format on
+
+  RealD mass  = -0.25;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+
+  MultiGridParams mgParams;
+  std::string     inputXml{"./mg_params.xml"};
+
+  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
+    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
+    assert(inputXml.length() != 0);
+  }
+
+  {
+    XmlWriter writer("mg_params_template.xml");
+    write(writer, "Params", mgParams);
+    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
+
+    XmlReader reader(inputXml);
+    read(reader, "Params", mgParams);
+    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
+  }
+
+  checkParameterValidity(mgParams);
+  std::cout << mgParams << std::endl;
+
+  LevelInfo levelInfo(FGrid, mgParams);
+
+  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
+  const int nbasis = 40;
+
+  WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t);
+
+  MdagMLinearOperator<WilsonCloverFermionR, LatticeFermion> MdagMOpDwc(Dwc);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing Multigrid for Wilson Clover" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  TrivialPrecon<LatticeFermion> TrivialPrecon;
+  auto MGPreconDwc = createMGInstance<vSpinColourVector, vTComplex, nbasis, WilsonCloverFermionR>(mgParams, levelInfo, Dwc, Dwc);
+
+  MGPreconDwc->setup();
+
+  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+    RealD toleranceForMGChecks = (getPrecision<LatticeFermion>::value == 1) ? 1e-6 : 1e-13;
+    MGPreconDwc->runChecks(toleranceForMGChecks);
+  }
+
+  std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDwc;
+
+  solversDwc.emplace_back(new ConjugateGradient<LatticeFermion>(1.0e-12, 50000, false));
+  solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 100, false));
+  solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, *MGPreconDwc, 100, false));
+
+  for(auto const &solver : solversDwc) {
+    std::cout << std::endl << "Starting with a new solver" << std::endl;
+    result = zero;
+    (*solver)(MdagMOpDwc, src, result);
+    std::cout << std::endl;
+  }
+
+  MGPreconDwc->reportTimings();
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_mg_mp.cc b/tests/solver/Test_wilsonclover_mg_mp.cc
new file mode 100644
index 00000000..d9ed1d33
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_mg_mp.cc
@@ -0,0 +1,169 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/solver/Test_wilsonclover_mg_mp.cc
+
+    Copyright (C) 2015-2018
+
+    Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+/*  END LEGAL */
+
+#include <Grid/Grid.h>
+#include <Test_multigrid_common.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main(int argc, char **argv) {
+
+  Grid_init(&argc, &argv);
+
+  // clang-format off
+  GridCartesian         *FGrid_d   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi());
+  GridCartesian         *FGrid_f   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian *FrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_d);
+  GridRedBlackCartesian *FrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_f);
+  // clang-format on
+
+  std::vector<int> fSeeds({1, 2, 3, 4});
+  GridParallelRNG  fPRNG(FGrid_d);
+  fPRNG.SeedFixedIntegers(fSeeds);
+
+  // clang-format off
+  LatticeFermionD       src_d(FGrid_d); gaussian(fPRNG, src_d);
+  LatticeFermionD resultMGD_d(FGrid_d); resultMGD_d = zero;
+  LatticeFermionD resultMGF_d(FGrid_d); resultMGF_d = zero;
+  LatticeGaugeFieldD    Umu_d(FGrid_d); SU3::HotConfiguration(fPRNG, Umu_d);
+  LatticeGaugeFieldF    Umu_f(FGrid_f); precisionChange(Umu_f, Umu_d);
+  // clang-format on
+
+  RealD mass  = -0.25;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+
+  MultiGridParams mgParams;
+  std::string     inputXml{"./mg_params.xml"};
+
+  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
+    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
+    assert(inputXml.length() != 0);
+  }
+
+  {
+    XmlWriter writer("mg_params_template.xml");
+    write(writer, "Params", mgParams);
+    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
+
+    XmlReader reader(inputXml);
+    read(reader, "Params", mgParams);
+    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
+  }
+
+  checkParameterValidity(mgParams);
+  std::cout << mgParams << std::endl;
+
+  LevelInfo levelInfo_d(FGrid_d, mgParams);
+  LevelInfo levelInfo_f(FGrid_f, mgParams);
+
+  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
+  const int nbasis = 40;
+
+  WilsonCloverFermionD Dwc_d(Umu_d, *FGrid_d, *FrbGrid_d, mass, csw_r, csw_t);
+  WilsonCloverFermionF Dwc_f(Umu_f, *FGrid_f, *FrbGrid_f, mass, csw_r, csw_t);
+
+  MdagMLinearOperator<WilsonCloverFermionD, LatticeFermionD> MdagMOpDwc_d(Dwc_d);
+  MdagMLinearOperator<WilsonCloverFermionF, LatticeFermionF> MdagMOpDwc_f(Dwc_f);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing single-precision Multigrid for Wilson Clover" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  auto MGPreconDwc_f = createMGInstance<vSpinColourVectorF, vTComplexF, nbasis, WilsonCloverFermionF>(mgParams, levelInfo_f, Dwc_f, Dwc_f);
+
+  MGPreconDwc_f->setup();
+
+  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+    MGPreconDwc_f->runChecks(1e-6);
+  }
+
+  MixedPrecisionFlexibleGeneralisedMinimalResidual<LatticeFermionD, LatticeFermionF> MPFGMRESPREC(
+    1.0e-12, 50000, FGrid_f, *MGPreconDwc_f, 100, false);
+
+  std::cout << std::endl << "Starting with a new solver" << std::endl;
+  MPFGMRESPREC(MdagMOpDwc_d, src_d, resultMGF_d);
+
+  MGPreconDwc_f->reportTimings();
+
+  if(GridCmdOptionExists(argv, argv + argc, "--docomparison")) {
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "Testing double-precision Multigrid for Wilson Clover" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    auto MGPreconDwc_d = createMGInstance<vSpinColourVectorD, vTComplexD, nbasis, WilsonCloverFermionD>(mgParams, levelInfo_d, Dwc_d, Dwc_d);
+
+    MGPreconDwc_d->setup();
+
+    if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+      MGPreconDwc_d->runChecks(1e-13);
+    }
+
+    FlexibleGeneralisedMinimalResidual<LatticeFermionD> FGMRESPREC(1.0e-12, 50000, *MGPreconDwc_d, 100, false);
+
+    std::cout << std::endl << "Starting with a new solver" << std::endl;
+    FGMRESPREC(MdagMOpDwc_d, src_d, resultMGD_d);
+
+    MGPreconDwc_d->reportTimings();
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "Comparing single-precision Multigrid with double-precision one for Wilson Clover" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    LatticeFermionD diffFullSolver(FGrid_d);
+
+    RealD deviationFullSolver = axpy_norm(diffFullSolver, -1.0, resultMGF_d, resultMGD_d);
+
+    // clang-format off
+    LatticeFermionF src_f(FGrid_f);    precisionChange(src_f, src_d);
+    LatticeFermionF resMGF_f(FGrid_f); resMGF_f = zero;
+    LatticeFermionD resMGD_d(FGrid_d); resMGD_d = zero;
+    // clang-format on
+
+    (*MGPreconDwc_f)(src_f, resMGF_f);
+    (*MGPreconDwc_d)(src_d, resMGD_d);
+
+    LatticeFermionD diffOnlyMG(FGrid_d);
+    LatticeFermionD resMGF_d(FGrid_d);
+    precisionChange(resMGF_d, resMGF_f);
+
+    RealD deviationOnlyPrec = axpy_norm(diffOnlyMG, -1.0, resMGF_d, resMGD_d);
+
+    // clang-format off
+    std::cout << GridLogMessage << "Absolute difference between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver                      << std::endl;
+    std::cout << GridLogMessage << "Relative deviation  between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver / norm2(resultMGD_d) << std::endl;
+    std::cout << GridLogMessage << "Absolute difference between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec                        << std::endl;
+    std::cout << GridLogMessage << "Relative deviation  between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec / norm2(resMGD_d)      << std::endl;
+    // clang-format on
+  }
+
+  Grid_finalize();
+}
diff --git a/tests/solver/Test_wilsonclover_mr_unprec.cc b/tests/solver/Test_wilsonclover_mr_unprec.cc
new file mode 100644
index 00000000..e3aa8838
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_mr_unprec.cc
@@ -0,0 +1,71 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./tests/solver/Test_wilsonclover_mr_unprec.cc
+
+Copyright (C) 2015-2018
+
+Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  typedef typename WilsonCloverFermionR::FermionField FermionField;
+  typename WilsonCloverFermionR::ImplParams params;
+  WilsonAnisotropyCoefficients anis;
+
+  FermionField src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  FermionField result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  double volume=1;
+  for(int mu=0;mu<Nd;mu++){
+    volume=volume*latt_size[mu];
+  }
+
+  RealD mass  = 0.5;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
+
+  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
+  MinimalResidual<FermionField> MR(1.0e-8,10000,0.8);
+  MR(HermOp,src,result);
+
+  Grid_finalize();
+}