Hadrons: faster A2A matrix load

Hadrons: contractor fixes
Hadrons: first stab at MPI contractor
2025-06-16 23:07:05 +01:00 · 2019-01-11 16:12:49 +00:00 · 2019-01-11 16:12:16 +00:00 · 2019-01-10 16:29:57 +00:00 · 2019-01-02 14:40:31 +00:00 · 2019-01-02 14:39:59 +00:00
45 changed files with 4566 additions and 448 deletions
--- a/Grid/algorithms/Algorithms.h
+++ b/Grid/algorithms/Algorithms.h
@ -48,6 +48,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
 #include <Grid/algorithms/iterative/BlockConjugateGradient.h>
 #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
 #include <Grid/algorithms/iterative/MinimalResidual.h>
 #include <Grid/algorithms/iterative/GeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 #include <Grid/algorithms/CoarsenedMatrix.h>
 #include <Grid/algorithms/FFT.h>
--- a/Grid/algorithms/CoarsenedMatrix.h
+++ b/Grid/algorithms/CoarsenedMatrix.h
@ -211,6 +211,7 @@ namespace Grid {
      for(int b=0;b<nn;b++){
 	subspace[b] = zero;
 	gaussian(RNG,noise);
 	scale = std::pow(norm2(noise),-0.5); 
 	noise=noise*scale;
@ -295,13 +296,58 @@ namespace Grid {
      return norm2(out);
    };
-    RealD Mdag (const CoarseVector &in, CoarseVector &out){ 
+    RealD Mdag (const CoarseVector &in, CoarseVector &out){
-      return M(in,out);
+      // // corresponds to Petrov-Galerkin coarsening
      // return M(in,out);
      // corresponds to Galerkin coarsening
      CoarseVector tmp(Grid());
      G5C(tmp, in);
      M(tmp, out);
      G5C(out, out);
      return norm2(out);
    };
-    // Defer support for further coarsening for now
+    void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
-    void Mdiag    (const CoarseVector &in,  CoarseVector &out){};
+
-    void Mdir     (const CoarseVector &in,  CoarseVector &out,int dir, int disp){};
+      conformable(_grid,in._grid);
      conformable(in._grid,out._grid);
      SimpleCompressor<siteVector> compressor;
      Stencil.HaloExchange(in,compressor);
      auto point = [dir, disp](){
        if(dir == 0 and disp == 0)
          return 8;
        else
          return (4 * dir + 1 - disp) / 2;
      }();
      parallel_for(int ss=0;ss<Grid()->oSites();ss++){
        siteVector res = zero;
        siteVector nbr;
        int ptype;
        StencilEntry *SE;
        SE=Stencil.GetEntry(ptype,point,ss);
        if(SE->_is_local&&SE->_permute) {
          permute(nbr,in._odata[SE->_offset],ptype);
        } else if(SE->_is_local) {
          nbr = in._odata[SE->_offset];
        } else {
          nbr = Stencil.CommBuf()[SE->_offset];
        }
        res = res + A[point]._odata[ss]*nbr;
        vstream(out._odata[ss],res);
      }
    };
    void Mdiag(const CoarseVector &in, CoarseVector &out){
      Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil
    };
    CoarsenedMatrix(GridCartesian &CoarseGrid) 	: 
@ -417,7 +463,7 @@ namespace Grid {
      std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
 #endif
      //      ForceHermitian();
-      AssertHermitian();
+      // AssertHermitian();
      // ForceDiagonal();
    }
    void ForceDiagonal(void) {
--- a/Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
@ -0,0 +1,244 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
 Copyright (C) 2015
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #ifndef GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
 #define GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
 namespace Grid {
 template<class Field>
 class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
 public:
  bool ErrorOnNoConverge; // Throw an assert when CAGMRES fails to converge,
                          // defaults to true
  RealD   Tolerance;
  Integer MaxIterations;
  Integer RestartLength;
  Integer MaxNumberOfRestarts;
  Integer IterationCount; // Number of iterations the CAGMRES took to finish,
                          // filled in upon completion
  GridStopWatch MatrixTimer;
  GridStopWatch LinalgTimer;
  GridStopWatch QrTimer;
  GridStopWatch CompSolutionTimer;
  Eigen::MatrixXcd H;
  std::vector<std::complex<double>> y;
  std::vector<std::complex<double>> gamma;
  std::vector<std::complex<double>> c;
  std::vector<std::complex<double>> s;
  CommunicationAvoidingGeneralisedMinimalResidual(RealD   tol,
                                                  Integer maxit,
                                                  Integer restart_length,
                                                  bool    err_on_no_conv = true)
      : Tolerance(tol)
      , MaxIterations(maxit)
      , RestartLength(restart_length)
      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
      , ErrorOnNoConverge(err_on_no_conv)
      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
      , y(RestartLength + 1, 0.)
      , gamma(RestartLength + 1, 0.)
      , c(RestartLength + 1, 0.)
      , s(RestartLength + 1, 0.) {};
  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
    std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular GMRES" << std::endl;
    psi.checkerboard = src.checkerboard;
    conformable(psi, src);
    RealD guess = norm2(psi);
    assert(std::isnan(guess) == 0);
    RealD cp;
    RealD ssq = norm2(src);
    RealD rsq = Tolerance * Tolerance * ssq;
    Field r(src._grid);
    std::cout << std::setprecision(4) << std::scientific;
    std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
    std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl;
    MatrixTimer.Reset();
    LinalgTimer.Reset();
    QrTimer.Reset();
    CompSolutionTimer.Reset();
    GridStopWatch SolverTimer;
    SolverTimer.Start();
    IterationCount = 0;
    for (int k=0; k<MaxNumberOfRestarts; k++) {
      cp = outerLoopBody(LinOp, src, psi, rsq);
      // Stopping condition
      if (cp <= rsq) {
        SolverTimer.Stop();
        LinOp.Op(psi,r);
        axpy(r,-1.0,src,r);
        RealD srcnorm       = sqrt(ssq);
        RealD resnorm       = sqrt(norm2(r));
        RealD true_residual = resnorm / srcnorm;
        std::cout << GridLogMessage        << "CommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
                  << " computed residual " << sqrt(cp / ssq)
                  << " true residual "     << true_residual
                  << " target "            << Tolerance << std::endl;
        std::cout << GridLogMessage << "CAGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "CAGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "CAGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "CAGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "CAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
        return;
      }
    }
    std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
    if (ErrorOnNoConverge)
      assert(0);
  }
  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
    RealD cp = 0;
    Field w(src._grid);
    Field r(src._grid);
    // this should probably be made a class member so that it is only allocated once, not in every restart
    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
    MatrixTimer.Start();
    LinOp.Op(psi, w);
    MatrixTimer.Stop();
    LinalgTimer.Start();
    r = src - w;
    gamma[0] = sqrt(norm2(r));
    v[0] = (1. / gamma[0]) * r;
    LinalgTimer.Stop();
    for (int i=0; i<RestartLength; i++) {
      IterationCount++;
      arnoldiStep(LinOp, v, w, i);
      qrUpdate(i);
      cp = std::norm(gamma[i+1]);
      std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
                << " residual " << cp << " target " << rsq << std::endl;
      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
        computeSolution(v, psi, i);
        return cp;
      }
    }
    assert(0); // Never reached
    return cp;
  }
  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
    MatrixTimer.Start();
    LinOp.Op(v[iter], w);
    MatrixTimer.Stop();
    LinalgTimer.Start();
    for (int i = 0; i <= iter; ++i) {
      H(iter, i) = innerProduct(v[i], w);
      w = w - H(iter, i) * v[i];
    }
    H(iter, iter + 1) = sqrt(norm2(w));
    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
    LinalgTimer.Stop();
  }
  void qrUpdate(int iter) {
    QrTimer.Start();
    for (int i = 0; i < iter ; ++i) {
      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
      H(iter, i + 1) = tmp;
    }
    // Compute new Givens Rotation
    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
    c[iter]     = H(iter, iter) / nu;
    s[iter]     = H(iter, iter + 1) / nu;
    // Apply new Givens rotation
    H(iter, iter)     = nu;
    H(iter, iter + 1) = 0.;
    gamma[iter + 1] = -s[iter] * gamma[iter];
    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
    QrTimer.Stop();
  }
  void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
    CompSolutionTimer.Start();
    for (int i = iter; i >= 0; i--) {
      y[i] = gamma[i];
      for (int k = i + 1; k <= iter; k++)
        y[i] = y[i] - H(k, i) * y[k];
      y[i] = y[i] / H(i, i);
    }
    for (int i = 0; i <= iter; i++)
      psi = psi + v[i] * y[i];
    CompSolutionTimer.Stop();
  }
 };
 }
 #endif
--- a/Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
@ -0,0 +1,256 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
 Copyright (C) 2015
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #ifndef GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
 #define GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
 namespace Grid {
 template<class Field>
 class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
 public:
  bool ErrorOnNoConverge; // Throw an assert when FCAGMRES fails to converge,
                          // defaults to true
  RealD   Tolerance;
  Integer MaxIterations;
  Integer RestartLength;
  Integer MaxNumberOfRestarts;
  Integer IterationCount; // Number of iterations the FCAGMRES took to finish,
                          // filled in upon completion
  GridStopWatch MatrixTimer;
  GridStopWatch PrecTimer;
  GridStopWatch LinalgTimer;
  GridStopWatch QrTimer;
  GridStopWatch CompSolutionTimer;
  Eigen::MatrixXcd H;
  std::vector<std::complex<double>> y;
  std::vector<std::complex<double>> gamma;
  std::vector<std::complex<double>> c;
  std::vector<std::complex<double>> s;
  LinearFunction<Field> &Preconditioner;
  FlexibleCommunicationAvoidingGeneralisedMinimalResidual(RealD   tol,
                                                          Integer maxit,
                                                          LinearFunction<Field> &Prec,
                                                          Integer restart_length,
                                                          bool    err_on_no_conv = true)
      : Tolerance(tol)
      , MaxIterations(maxit)
      , RestartLength(restart_length)
      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
      , ErrorOnNoConverge(err_on_no_conv)
      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
      , y(RestartLength + 1, 0.)
      , gamma(RestartLength + 1, 0.)
      , c(RestartLength + 1, 0.)
      , s(RestartLength + 1, 0.)
      , Preconditioner(Prec) {};
  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
    std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular FGMRES" << std::endl;
    psi.checkerboard = src.checkerboard;
    conformable(psi, src);
    RealD guess = norm2(psi);
    assert(std::isnan(guess) == 0);
    RealD cp;
    RealD ssq = norm2(src);
    RealD rsq = Tolerance * Tolerance * ssq;
    Field r(src._grid);
    std::cout << std::setprecision(4) << std::scientific;
    std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
    std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl;
    PrecTimer.Reset();
    MatrixTimer.Reset();
    LinalgTimer.Reset();
    QrTimer.Reset();
    CompSolutionTimer.Reset();
    GridStopWatch SolverTimer;
    SolverTimer.Start();
    IterationCount = 0;
    for (int k=0; k<MaxNumberOfRestarts; k++) {
      cp = outerLoopBody(LinOp, src, psi, rsq);
      // Stopping condition
      if (cp <= rsq) {
        SolverTimer.Stop();
        LinOp.Op(psi,r);
        axpy(r,-1.0,src,r);
        RealD srcnorm       = sqrt(ssq);
        RealD resnorm       = sqrt(norm2(r));
        RealD true_residual = resnorm / srcnorm;
        std::cout << GridLogMessage        << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
                  << " computed residual " << sqrt(cp / ssq)
                  << " true residual "     << true_residual
                  << " target "            << Tolerance << std::endl;
        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Precon  " <<         PrecTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "FCAGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "FCAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
        return;
      }
    }
    std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
    if (ErrorOnNoConverge)
      assert(0);
  }
  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
    RealD cp = 0;
    Field w(src._grid);
    Field r(src._grid);
    // these should probably be made class members so that they are only allocated once, not in every restart
    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
    std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
    MatrixTimer.Start();
    LinOp.Op(psi, w);
    MatrixTimer.Stop();
    LinalgTimer.Start();
    r = src - w;
    gamma[0] = sqrt(norm2(r));
    v[0] = (1. / gamma[0]) * r;
    LinalgTimer.Stop();
    for (int i=0; i<RestartLength; i++) {
      IterationCount++;
      arnoldiStep(LinOp, v, z, w, i);
      qrUpdate(i);
      cp = std::norm(gamma[i+1]);
      std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
                << " residual " << cp << " target " << rsq << std::endl;
      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
        computeSolution(z, psi, i);
        return cp;
      }
    }
    assert(0); // Never reached
    return cp;
  }
  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
    PrecTimer.Start();
    Preconditioner(v[iter], z[iter]);
    PrecTimer.Stop();
    MatrixTimer.Start();
    LinOp.Op(z[iter], w);
    MatrixTimer.Stop();
    LinalgTimer.Start();
    for (int i = 0; i <= iter; ++i) {
      H(iter, i) = innerProduct(v[i], w);
      w = w - H(iter, i) * v[i];
    }
    H(iter, iter + 1) = sqrt(norm2(w));
    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
    LinalgTimer.Stop();
  }
  void qrUpdate(int iter) {
    QrTimer.Start();
    for (int i = 0; i < iter ; ++i) {
      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
      H(iter, i + 1) = tmp;
    }
    // Compute new Givens Rotation
    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
    c[iter]     = H(iter, iter) / nu;
    s[iter]     = H(iter, iter + 1) / nu;
    // Apply new Givens rotation
    H(iter, iter)     = nu;
    H(iter, iter + 1) = 0.;
    gamma[iter + 1] = -s[iter] * gamma[iter];
    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
    QrTimer.Stop();
  }
  void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
    CompSolutionTimer.Start();
    for (int i = iter; i >= 0; i--) {
      y[i] = gamma[i];
      for (int k = i + 1; k <= iter; k++)
        y[i] = y[i] - H(k, i) * y[k];
      y[i] = y[i] / H(i, i);
    }
    for (int i = 0; i <= iter; i++)
      psi = psi + z[i] * y[i];
    CompSolutionTimer.Stop();
  }
 };
 }
 #endif
--- a/Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@ -0,0 +1,254 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
 Copyright (C) 2015
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #ifndef GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
 #define GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
 namespace Grid {
 template<class Field>
 class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
 public:
  bool ErrorOnNoConverge; // Throw an assert when FGMRES fails to converge,
                          // defaults to true
  RealD   Tolerance;
  Integer MaxIterations;
  Integer RestartLength;
  Integer MaxNumberOfRestarts;
  Integer IterationCount; // Number of iterations the FGMRES took to finish,
                          // filled in upon completion
  GridStopWatch MatrixTimer;
  GridStopWatch PrecTimer;
  GridStopWatch LinalgTimer;
  GridStopWatch QrTimer;
  GridStopWatch CompSolutionTimer;
  Eigen::MatrixXcd H;
  std::vector<std::complex<double>> y;
  std::vector<std::complex<double>> gamma;
  std::vector<std::complex<double>> c;
  std::vector<std::complex<double>> s;
  LinearFunction<Field> &Preconditioner;
  FlexibleGeneralisedMinimalResidual(RealD   tol,
                                     Integer maxit,
                                     LinearFunction<Field> &Prec,
                                     Integer restart_length,
                                     bool    err_on_no_conv = true)
      : Tolerance(tol)
      , MaxIterations(maxit)
      , RestartLength(restart_length)
      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
      , ErrorOnNoConverge(err_on_no_conv)
      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
      , y(RestartLength + 1, 0.)
      , gamma(RestartLength + 1, 0.)
      , c(RestartLength + 1, 0.)
      , s(RestartLength + 1, 0.)
      , Preconditioner(Prec) {};
  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
    psi.checkerboard = src.checkerboard;
    conformable(psi, src);
    RealD guess = norm2(psi);
    assert(std::isnan(guess) == 0);
    RealD cp;
    RealD ssq = norm2(src);
    RealD rsq = Tolerance * Tolerance * ssq;
    Field r(src._grid);
    std::cout << std::setprecision(4) << std::scientific;
    std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: guess " << guess << std::endl;
    std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual:   src " << ssq   << std::endl;
    PrecTimer.Reset();
    MatrixTimer.Reset();
    LinalgTimer.Reset();
    QrTimer.Reset();
    CompSolutionTimer.Reset();
    GridStopWatch SolverTimer;
    SolverTimer.Start();
    IterationCount = 0;
    for (int k=0; k<MaxNumberOfRestarts; k++) {
      cp = outerLoopBody(LinOp, src, psi, rsq);
      // Stopping condition
      if (cp <= rsq) {
        SolverTimer.Stop();
        LinOp.Op(psi,r);
        axpy(r,-1.0,src,r);
        RealD srcnorm       = sqrt(ssq);
        RealD resnorm       = sqrt(norm2(r));
        RealD true_residual = resnorm / srcnorm;
        std::cout << GridLogMessage        << "FlexibleGeneralisedMinimalResidual: Converged on iteration " << IterationCount
                  << " computed residual " << sqrt(cp / ssq)
                  << " true residual "     << true_residual
                  << " target "            << Tolerance << std::endl;
        std::cout << GridLogMessage << "FGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "FGMRES Time elapsed: Precon  " <<         PrecTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "FGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "FGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "FGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "FGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
        return;
      }
    }
    std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual did NOT converge" << std::endl;
    if (ErrorOnNoConverge)
      assert(0);
  }
  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
    RealD cp = 0;
    Field w(src._grid);
    Field r(src._grid);
    // these should probably be made class members so that they are only allocated once, not in every restart
    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
    std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
    MatrixTimer.Start();
    LinOp.Op(psi, w);
    MatrixTimer.Stop();
    LinalgTimer.Start();
    r = src - w;
    gamma[0] = sqrt(norm2(r));
    v[0] = (1. / gamma[0]) * r;
    LinalgTimer.Stop();
    for (int i=0; i<RestartLength; i++) {
      IterationCount++;
      arnoldiStep(LinOp, v, z, w, i);
      qrUpdate(i);
      cp = std::norm(gamma[i+1]);
      std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: Iteration " << IterationCount
                << " residual " << cp << " target " << rsq << std::endl;
      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
        computeSolution(z, psi, i);
        return cp;
      }
    }
    assert(0); // Never reached
    return cp;
  }
  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
    PrecTimer.Start();
    Preconditioner(v[iter], z[iter]);
    PrecTimer.Stop();
    MatrixTimer.Start();
    LinOp.Op(z[iter], w);
    MatrixTimer.Stop();
    LinalgTimer.Start();
    for (int i = 0; i <= iter; ++i) {
      H(iter, i) = innerProduct(v[i], w);
      w = w - H(iter, i) * v[i];
    }
    H(iter, iter + 1) = sqrt(norm2(w));
    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
    LinalgTimer.Stop();
  }
  void qrUpdate(int iter) {
    QrTimer.Start();
    for (int i = 0; i < iter ; ++i) {
      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
      H(iter, i + 1) = tmp;
    }
    // Compute new Givens Rotation
    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
    c[iter]     = H(iter, iter) / nu;
    s[iter]     = H(iter, iter + 1) / nu;
    // Apply new Givens rotation
    H(iter, iter)     = nu;
    H(iter, iter + 1) = 0.;
    gamma[iter + 1] = -s[iter] * gamma[iter];
    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
    QrTimer.Stop();
  }
  void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
    CompSolutionTimer.Start();
    for (int i = iter; i >= 0; i--) {
      y[i] = gamma[i];
      for (int k = i + 1; k <= iter; k++)
        y[i] = y[i] - H(k, i) * y[k];
      y[i] = y[i] / H(i, i);
    }
    for (int i = 0; i <= iter; i++)
      psi = psi + z[i] * y[i];
    CompSolutionTimer.Stop();
  }
 };
 }
 #endif
--- a/Grid/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/Grid/algorithms/iterative/GeneralisedMinimalResidual.h
@ -0,0 +1,242 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./lib/algorithms/iterative/GeneralisedMinimalResidual.h
 Copyright (C) 2015
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #ifndef GRID_GENERALISED_MINIMAL_RESIDUAL_H
 #define GRID_GENERALISED_MINIMAL_RESIDUAL_H
 namespace Grid {
 template<class Field>
 class GeneralisedMinimalResidual : public OperatorFunction<Field> {
 public:
  bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge,
                          // defaults to true
  RealD   Tolerance;
  Integer MaxIterations;
  Integer RestartLength;
  Integer MaxNumberOfRestarts;
  Integer IterationCount; // Number of iterations the GMRES took to finish,
                          // filled in upon completion
  GridStopWatch MatrixTimer;
  GridStopWatch LinalgTimer;
  GridStopWatch QrTimer;
  GridStopWatch CompSolutionTimer;
  Eigen::MatrixXcd H;
  std::vector<std::complex<double>> y;
  std::vector<std::complex<double>> gamma;
  std::vector<std::complex<double>> c;
  std::vector<std::complex<double>> s;
  GeneralisedMinimalResidual(RealD   tol,
                             Integer maxit,
                             Integer restart_length,
                             bool    err_on_no_conv = true)
      : Tolerance(tol)
      , MaxIterations(maxit)
      , RestartLength(restart_length)
      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
      , ErrorOnNoConverge(err_on_no_conv)
      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
      , y(RestartLength + 1, 0.)
      , gamma(RestartLength + 1, 0.)
      , c(RestartLength + 1, 0.)
      , s(RestartLength + 1, 0.) {};
  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
    psi.checkerboard = src.checkerboard;
    conformable(psi, src);
    RealD guess = norm2(psi);
    assert(std::isnan(guess) == 0);
    RealD cp;
    RealD ssq = norm2(src);
    RealD rsq = Tolerance * Tolerance * ssq;
    Field r(src._grid);
    std::cout << std::setprecision(4) << std::scientific;
    std::cout << GridLogIterative << "GeneralisedMinimalResidual: guess " << guess << std::endl;
    std::cout << GridLogIterative << "GeneralisedMinimalResidual:   src " << ssq   << std::endl;
    MatrixTimer.Reset();
    LinalgTimer.Reset();
    QrTimer.Reset();
    CompSolutionTimer.Reset();
    GridStopWatch SolverTimer;
    SolverTimer.Start();
    IterationCount = 0;
    for (int k=0; k<MaxNumberOfRestarts; k++) {
      cp = outerLoopBody(LinOp, src, psi, rsq);
      // Stopping condition
      if (cp <= rsq) {
        SolverTimer.Stop();
        LinOp.Op(psi,r);
        axpy(r,-1.0,src,r);
        RealD srcnorm       = sqrt(ssq);
        RealD resnorm       = sqrt(norm2(r));
        RealD true_residual = resnorm / srcnorm;
        std::cout << GridLogMessage        << "GeneralisedMinimalResidual: Converged on iteration " << IterationCount
                  << " computed residual " << sqrt(cp / ssq)
                  << " true residual "     << true_residual
                  << " target "            << Tolerance << std::endl;
        std::cout << GridLogMessage << "GMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "GMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "GMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "GMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "GMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
        return;
      }
    }
    std::cout << GridLogMessage << "GeneralisedMinimalResidual did NOT converge" << std::endl;
    if (ErrorOnNoConverge)
      assert(0);
  }
  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
    RealD cp = 0;
    Field w(src._grid);
    Field r(src._grid);
    // this should probably be made a class member so that it is only allocated once, not in every restart
    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
    MatrixTimer.Start();
    LinOp.Op(psi, w);
    MatrixTimer.Stop();
    LinalgTimer.Start();
    r = src - w;
    gamma[0] = sqrt(norm2(r));
    v[0] = (1. / gamma[0]) * r;
    LinalgTimer.Stop();
    for (int i=0; i<RestartLength; i++) {
      IterationCount++;
      arnoldiStep(LinOp, v, w, i);
      qrUpdate(i);
      cp = std::norm(gamma[i+1]);
      std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration " << IterationCount
                << " residual " << cp << " target " << rsq << std::endl;
      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
        computeSolution(v, psi, i);
        return cp;
      }
    }
    assert(0); // Never reached
    return cp;
  }
  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
    MatrixTimer.Start();
    LinOp.Op(v[iter], w);
    MatrixTimer.Stop();
    LinalgTimer.Start();
    for (int i = 0; i <= iter; ++i) {
      H(iter, i) = innerProduct(v[i], w);
      w = w - H(iter, i) * v[i];
    }
    H(iter, iter + 1) = sqrt(norm2(w));
    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
    LinalgTimer.Stop();
  }
  void qrUpdate(int iter) {
    QrTimer.Start();
    for (int i = 0; i < iter ; ++i) {
      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
      H(iter, i + 1) = tmp;
    }
    // Compute new Givens Rotation
    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
    c[iter]     = H(iter, iter) / nu;
    s[iter]     = H(iter, iter + 1) / nu;
    // Apply new Givens rotation
    H(iter, iter)     = nu;
    H(iter, iter + 1) = 0.;
    gamma[iter + 1] = -s[iter] * gamma[iter];
    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
    QrTimer.Stop();
  }
  void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
    CompSolutionTimer.Start();
    for (int i = iter; i >= 0; i--) {
      y[i] = gamma[i];
      for (int k = i + 1; k <= iter; k++)
        y[i] = y[i] - H(k, i) * y[k];
      y[i] = y[i] / H(i, i);
    }
    for (int i = 0; i <= iter; i++)
      psi = psi + v[i] * y[i];
    CompSolutionTimer.Stop();
  }
 };
 }
 #endif
--- a/Grid/algorithms/iterative/MinimalResidual.h
+++ b/Grid/algorithms/iterative/MinimalResidual.h
@ -0,0 +1,156 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./lib/algorithms/iterative/MinimalResidual.h
 Copyright (C) 2015
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #ifndef GRID_MINIMAL_RESIDUAL_H
 #define GRID_MINIMAL_RESIDUAL_H
 namespace Grid {
 template<class Field> class MinimalResidual : public OperatorFunction<Field> {
 public:
  bool ErrorOnNoConverge; // throw an assert when the MR fails to converge.
                          // Defaults true.
  RealD   Tolerance;
  Integer MaxIterations;
  RealD   overRelaxParam;
  Integer IterationsToComplete; // Number of iterations the MR took to finish.
                                // Filled in upon completion
  MinimalResidual(RealD tol, Integer maxit, Real ovrelparam = 1.0, bool err_on_no_conv = true)
    : Tolerance(tol), MaxIterations(maxit), overRelaxParam(ovrelparam), ErrorOnNoConverge(err_on_no_conv){};
  void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
    psi.checkerboard = src.checkerboard;
    conformable(psi, src);
    Complex a, c;
    Real    d;
    Field Mr(src);
    Field r(src);
    // Initial residual computation & set up
    RealD guess = norm2(psi);
    assert(std::isnan(guess) == 0);
    RealD ssq = norm2(src);
    RealD rsq = Tolerance * Tolerance * ssq;
    Linop.Op(psi, Mr);
    r = src - Mr;
    RealD cp = norm2(r);
    std::cout << std::setprecision(4) << std::scientific;
    std::cout << GridLogIterative << "MinimalResidual: guess " << guess << std::endl;
    std::cout << GridLogIterative << "MinimalResidual:   src " << ssq << std::endl;
    std::cout << GridLogIterative << "MinimalResidual:    mp " << d << std::endl;
    std::cout << GridLogIterative << "MinimalResidual:  cp,r " << cp << std::endl;
    if (cp <= rsq) {
      return;
    }
    std::cout << GridLogIterative << "MinimalResidual: k=0 residual " << cp << " target " << rsq << std::endl;
    GridStopWatch LinalgTimer;
    GridStopWatch MatrixTimer;
    GridStopWatch SolverTimer;
    SolverTimer.Start();
    int k;
    for (k = 1; k <= MaxIterations; k++) {
      MatrixTimer.Start();
      Linop.Op(r, Mr);
      MatrixTimer.Stop();
      LinalgTimer.Start();
      c = innerProduct(Mr, r);
      d = norm2(Mr);
      a = c / d;
      a = a * overRelaxParam;
      psi = psi + r * a;
      r = r - Mr * a;
      cp = norm2(r);
      LinalgTimer.Stop();
      std::cout << GridLogIterative << "MinimalResidual: Iteration " << k
                << " residual " << cp << " target " << rsq << std::endl;
      std::cout << GridLogDebug << "a = " << a << " c = " << c << " d = " << d << std::endl;
      // Stopping condition
      if (cp <= rsq) {
        SolverTimer.Stop();
        Linop.Op(psi, Mr);
        r = src - Mr;
        RealD srcnorm       = sqrt(ssq);
        RealD resnorm       = sqrt(norm2(r));
        RealD true_residual = resnorm / srcnorm;
        std::cout << GridLogMessage        << "MinimalResidual Converged on iteration " << k
                  << " computed residual " << sqrt(cp / ssq)
                  << " true residual "     << true_residual
                  << " target "            << Tolerance << std::endl;
        std::cout << GridLogMessage << "MR Time elapsed: Total   " << SolverTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "MR Time elapsed: Matrix  " << MatrixTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "MR Time elapsed: Linalg  " << LinalgTimer.Elapsed() << std::endl;
        if (ErrorOnNoConverge)
          assert(true_residual / Tolerance < 10000.0);
        IterationsToComplete = k;
        return;
      }
    }
    std::cout << GridLogMessage << "MinimalResidual did NOT converge"
              << std::endl;
    if (ErrorOnNoConverge)
      assert(0);
    IterationsToComplete = k;
  }
 };
 } // namespace Grid
 #endif
--- a/Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
+++ b/Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
@ -0,0 +1,273 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
 Copyright (C) 2015
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #ifndef GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
 #define GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
 namespace Grid {
 template<class FieldD, class FieldF, typename std::enable_if<getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
 class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction<FieldD> {
 public:
  bool ErrorOnNoConverge; // Throw an assert when MPFGMRES fails to converge,
                          // defaults to true
  RealD   Tolerance;
  Integer MaxIterations;
  Integer RestartLength;
  Integer MaxNumberOfRestarts;
  Integer IterationCount; // Number of iterations the MPFGMRES took to finish,
                          // filled in upon completion
  GridStopWatch MatrixTimer;
  GridStopWatch PrecTimer;
  GridStopWatch LinalgTimer;
  GridStopWatch QrTimer;
  GridStopWatch CompSolutionTimer;
  GridStopWatch ChangePrecTimer;
  Eigen::MatrixXcd H;
  std::vector<std::complex<double>> y;
  std::vector<std::complex<double>> gamma;
  std::vector<std::complex<double>> c;
  std::vector<std::complex<double>> s;
  GridBase* SinglePrecGrid;
  LinearFunction<FieldF> &Preconditioner;
  MixedPrecisionFlexibleGeneralisedMinimalResidual(RealD   tol,
                                                   Integer maxit,
                                                   GridBase * sp_grid,
                                                   LinearFunction<FieldF> &Prec,
                                                   Integer restart_length,
                                                   bool    err_on_no_conv = true)
      : Tolerance(tol)
      , MaxIterations(maxit)
      , RestartLength(restart_length)
      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
      , ErrorOnNoConverge(err_on_no_conv)
      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
      , y(RestartLength + 1, 0.)
      , gamma(RestartLength + 1, 0.)
      , c(RestartLength + 1, 0.)
      , s(RestartLength + 1, 0.)
      , SinglePrecGrid(sp_grid)
      , Preconditioner(Prec) {};
  void operator()(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi) {
    psi.checkerboard = src.checkerboard;
    conformable(psi, src);
    RealD guess = norm2(psi);
    assert(std::isnan(guess) == 0);
    RealD cp;
    RealD ssq = norm2(src);
    RealD rsq = Tolerance * Tolerance * ssq;
    FieldD r(src._grid);
    std::cout << std::setprecision(4) << std::scientific;
    std::cout << GridLogIterative << "MPFGMRES: guess " << guess << std::endl;
    std::cout << GridLogIterative << "MPFGMRES:   src " << ssq   << std::endl;
    PrecTimer.Reset();
    MatrixTimer.Reset();
    LinalgTimer.Reset();
    QrTimer.Reset();
    CompSolutionTimer.Reset();
    ChangePrecTimer.Reset();
    GridStopWatch SolverTimer;
    SolverTimer.Start();
    IterationCount = 0;
    for (int k=0; k<MaxNumberOfRestarts; k++) {
      cp = outerLoopBody(LinOp, src, psi, rsq);
      // Stopping condition
      if (cp <= rsq) {
        SolverTimer.Stop();
        LinOp.Op(psi,r);
        axpy(r,-1.0,src,r);
        RealD srcnorm       = sqrt(ssq);
        RealD resnorm       = sqrt(norm2(r));
        RealD true_residual = resnorm / srcnorm;
        std::cout << GridLogMessage        << "MPFGMRES: Converged on iteration " << IterationCount
                  << " computed residual " << sqrt(cp / ssq)
                  << " true residual "     << true_residual
                  << " target "            << Tolerance << std::endl;
        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Total      " <<       SolverTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Precon     " <<         PrecTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Matrix     " <<       MatrixTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Linalg     " <<       LinalgTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "MPFGMRES Time elapsed: QR         " <<           QrTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "MPFGMRES Time elapsed: CompSol    " << CompSolutionTimer.Elapsed() << std::endl;
        std::cout << GridLogMessage << "MPFGMRES Time elapsed: PrecChange " <<   ChangePrecTimer.Elapsed() << std::endl;
        return;
      }
    }
    std::cout << GridLogMessage << "MPFGMRES did NOT converge" << std::endl;
    if (ErrorOnNoConverge)
      assert(0);
  }
  RealD outerLoopBody(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi, RealD rsq) {
    RealD cp = 0;
    FieldD w(src._grid);
    FieldD r(src._grid);
    // these should probably be made class members so that they are only allocated once, not in every restart
    std::vector<FieldD> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
    std::vector<FieldD> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
    MatrixTimer.Start();
    LinOp.Op(psi, w);
    MatrixTimer.Stop();
    LinalgTimer.Start();
    r = src - w;
    gamma[0] = sqrt(norm2(r));
    v[0] = (1. / gamma[0]) * r;
    LinalgTimer.Stop();
    for (int i=0; i<RestartLength; i++) {
      IterationCount++;
      arnoldiStep(LinOp, v, z, w, i);
      qrUpdate(i);
      cp = std::norm(gamma[i+1]);
      std::cout << GridLogIterative << "MPFGMRES: Iteration " << IterationCount
                << " residual " << cp << " target " << rsq << std::endl;
      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
        computeSolution(z, psi, i);
        return cp;
      }
    }
    assert(0); // Never reached
    return cp;
  }
  void arnoldiStep(LinearOperatorBase<FieldD> &LinOp, std::vector<FieldD> &v, std::vector<FieldD> &z, FieldD &w, int iter) {
    FieldF v_f(SinglePrecGrid);
    FieldF z_f(SinglePrecGrid);
    ChangePrecTimer.Start();
    precisionChange(v_f, v[iter]);
    precisionChange(z_f, z[iter]);
    ChangePrecTimer.Stop();
    PrecTimer.Start();
    Preconditioner(v_f, z_f);
    PrecTimer.Stop();
    ChangePrecTimer.Start();
    precisionChange(z[iter], z_f);
    ChangePrecTimer.Stop();
    MatrixTimer.Start();
    LinOp.Op(z[iter], w);
    MatrixTimer.Stop();
    LinalgTimer.Start();
    for (int i = 0; i <= iter; ++i) {
      H(iter, i) = innerProduct(v[i], w);
      w = w - H(iter, i) * v[i];
    }
    H(iter, iter + 1) = sqrt(norm2(w));
    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
    LinalgTimer.Stop();
  }
  void qrUpdate(int iter) {
    QrTimer.Start();
    for (int i = 0; i < iter ; ++i) {
      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
      H(iter, i + 1) = tmp;
    }
    // Compute new Givens Rotation
    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
    c[iter]     = H(iter, iter) / nu;
    s[iter]     = H(iter, iter + 1) / nu;
    // Apply new Givens rotation
    H(iter, iter)     = nu;
    H(iter, iter + 1) = 0.;
    gamma[iter + 1] = -s[iter] * gamma[iter];
    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
    QrTimer.Stop();
  }
  void computeSolution(std::vector<FieldD> const &z, FieldD &psi, int iter) {
    CompSolutionTimer.Start();
    for (int i = iter; i >= 0; i--) {
      y[i] = gamma[i];
      for (int k = i + 1; k <= iter; k++)
        y[i] = y[i] - H(k, i) * y[k];
      y[i] = y[i] / H(i, i);
    }
    for (int i = 0; i <= iter; i++)
      psi = psi + z[i] * y[i];
    CompSolutionTimer.Stop();
  }
 };
 }
 #endif
--- a/Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h
+++ b/Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h
@ -139,8 +139,11 @@ namespace Grid {
      MatTimer.Start();
      Linop.HermOpAndNorm(psi,Az,zAz,zAAz); 
      MatTimer.Stop();
      LinalgTimer.Start();
      r=src-Az;
-      
+      LinalgTimer.Stop();
      /////////////////////
      // p = Prec(r)
      /////////////////////
@ -152,8 +155,10 @@ namespace Grid {
      Linop.HermOp(z,tmp); 
      MatTimer.Stop();
      LinalgTimer.Start();
      ttmp=tmp;
      tmp=tmp-r;
      LinalgTimer.Stop();
      /*
      std::cout<<GridLogMessage<<r<<std::endl;
@ -166,12 +171,14 @@ namespace Grid {
      Linop.HermOpAndNorm(z,Az,zAz,zAAz); 
      MatTimer.Stop();
      LinalgTimer.Start();
      //p[0],q[0],qq[0] 
      p[0]= z;
      q[0]= Az;
      qq[0]= zAAz;
      cp =norm2(r);
      LinalgTimer.Stop();
      for(int k=0;k<nstep;k++){
@ -181,12 +188,14 @@ namespace Grid {
 	int peri_k = k %mmax;
 	int peri_kp= kp%mmax;
        LinalgTimer.Start();
 	rq= real(innerProduct(r,q[peri_k])); // what if rAr not real?
 	a = rq/qq[peri_k];
 	axpy(psi,a,p[peri_k],psi);         
-	cp = axpy_norm(r,-a,q[peri_k],r);  
+	cp = axpy_norm(r,-a,q[peri_k],r);
        LinalgTimer.Stop();
 	if((k==nstep-1)||(cp<rsq)){
 	  return cp;
@ -202,6 +211,8 @@ namespace Grid {
 	Linop.HermOpAndNorm(z,Az,zAz,zAAz);
 	Linop.HermOp(z,tmp);
 	MatTimer.Stop();
        LinalgTimer.Start();
        tmp=tmp-r;
 	std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl; 
@ -219,9 +230,9 @@ namespace Grid {
 	}
 	qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
-
+        LinalgTimer.Stop();
      }
      assert(0); // never reached
      return cp;
    }
--- a/Grid/log/Log.cc
+++ b/Grid/log/Log.cc
@ -59,6 +59,7 @@ void GridLogTimestamp(int on){
 }
 Colours GridLogColours(0);
 GridLogger GridLogMG     (1, "MG"    , GridLogColours, "NORMAL");
 GridLogger GridLogIRL    (1, "IRL"   , GridLogColours, "NORMAL");
 GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL");
 GridLogger GridLogError  (1, "Error" , GridLogColours, "RED");
--- a/Grid/log/Log.h
+++ b/Grid/log/Log.h
@ -169,6 +169,7 @@ public:
 void GridLogConfigure(std::vector<std::string> &logstreams);
 extern GridLogger GridLogMG;
 extern GridLogger GridLogIRL;
 extern GridLogger GridLogSolver;
 extern GridLogger GridLogError;
--- a/Grid/qcd/action/gauge/Photon.h
+++ b/Grid/qcd/action/gauge/Photon.h
@ -4,9 +4,11 @@
 Source file: ./lib/qcd/action/gauge/Photon.h
- Copyright (C) 2015
+Copyright (C) 2015-2018
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 Author: Antonin Portelli <antonin.portelli@me.com>
 Author: James Harrison <J.Harrison@soton.ac.uk>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -30,8 +32,9 @@
 namespace Grid{
 namespace QCD{
  template <class S>
-  class QedGimpl
+  class QedGImpl
  {
  public:
    typedef S Simd;
@ -43,27 +46,27 @@ namespace QCD{
    typedef iImplGaugeLink<Simd>  SiteLink;
    typedef iImplGaugeField<Simd> SiteField;
-    typedef SiteField             SiteComplex;
+    typedef SiteLink              SiteComplex;
    typedef Lattice<SiteLink>  LinkField;
    typedef Lattice<SiteField> Field;
    typedef Field              ComplexField;
  };
-  typedef QedGimpl<vComplex> QedGimplR;
+  typedef QedGImpl<vComplex> QedGImplR;
-  template<class Gimpl>
+  template <class GImpl>
  class Photon
  {
  public:
-    INHERIT_GIMPL_TYPES(Gimpl);
+    INHERIT_GIMPL_TYPES(GImpl);
    typedef typename SiteGaugeLink::scalar_object ScalarSite;
    typedef typename ScalarSite::scalar_type      ScalarComplex;
    GRID_SERIALIZABLE_ENUM(Gauge, undef, feynman, 1, coulomb, 2, landau, 3);
-    GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2, qedInf, 3);
+    GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2);
  public:
-    Photon(Gauge gauge, ZmScheme zmScheme);
+    Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvement);
-    Photon(Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvements);
+    Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme);
    Photon(Gauge gauge, ZmScheme zmScheme, Real G0);
    Photon(Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvements, Real G0);
    virtual ~Photon(void) = default;
    void FreePropagator(const GaugeField &in, GaugeField &out);
    void MomentumSpacePropagator(const GaugeField &in, GaugeField &out);
@ -73,345 +76,255 @@ namespace QCD{
                         const GaugeLinkField &weight);
    void UnitField(GaugeField &out);
  private:
-    void infVolPropagator(GaugeLinkField &out);
+    void makeSpatialNorm(LatticeInteger &spNrm);
-    void invKHatSquared(GaugeLinkField &out);
+    void makeKHat(std::vector<GaugeLinkField> &khat);
    void makeInvKHatSquared(GaugeLinkField &out);
    void zmSub(GaugeLinkField &out);
    void transverseProjectSpatial(GaugeField &out);
    void gaugeTransform(GaugeField &out);
  private:
-    Gauge    gauge_;
+    GridBase          *grid_;
-    ZmScheme zmScheme_;
+    Gauge             gauge_;
-    std::vector<Real>  improvement_;
+    ZmScheme          zmScheme_;
-    Real     G0_;
+    std::vector<Real> improvement_;
  };
-  typedef Photon<QedGimplR>  PhotonR;
+  typedef Photon<QedGImplR>  PhotonR;
-  template<class Gimpl>
+  template<class GImpl>
-  Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme)
+  Photon<GImpl>::Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme,
  : gauge_(gauge), zmScheme_(zmScheme), improvement_(std::vector<Real>()),
    G0_(0.15493339023106021408483720810737508876916113364521)
  {}
  template<class Gimpl>
  Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme,
                        std::vector<Real> improvements)
-  : gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements),
+  : grid_(grid), gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements)
    G0_(0.15493339023106021408483720810737508876916113364521)
  {}
-  template<class Gimpl>
+  template<class GImpl>
-  Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme, Real G0)
+  Photon<GImpl>::Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme)
-  : gauge_(gauge), zmScheme_(zmScheme), improvement_(std::vector<Real>()), G0_(G0)
+  : Photon(grid, gauge, zmScheme, std::vector<Real>())
  {}
-  template<class Gimpl>
+  template<class GImpl>
-  Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme,
+  void Photon<GImpl>::FreePropagator(const GaugeField &in, GaugeField &out)
                        std::vector<Real> improvements, Real G0)
  : gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements), G0_(G0)
  {}
  template<class Gimpl>
  void Photon<Gimpl>::FreePropagator (const GaugeField &in,GaugeField &out)
  {
-    FFT theFFT(in._grid);
+    FFT        theFFT(dynamic_cast<GridCartesian *>(grid_));
    GaugeField in_k(grid_);
    GaugeField prop_k(grid_);
-    GaugeField in_k(in._grid);
+    theFFT.FFT_all_dim(in_k, in, FFT::forward);
-    GaugeField prop_k(in._grid);
+    MomentumSpacePropagator(prop_k, in_k);
-    
+    theFFT.FFT_all_dim(out, prop_k, FFT::backward);
    theFFT.FFT_all_dim(in_k,in,FFT::forward);
    MomentumSpacePropagator(prop_k,in_k);
    theFFT.FFT_all_dim(out,prop_k,FFT::backward);
  }
-  template<class Gimpl>
+  template<class GImpl>
-  void Photon<Gimpl>::infVolPropagator(GaugeLinkField &out)
+  void Photon<GImpl>::makeSpatialNorm(LatticeInteger &spNrm)
  {
-    auto               *grid = dynamic_cast<GridCartesian *>(out._grid);
+    LatticeInteger   coor(grid_);
-    LatticeReal        xmu(grid);
+    std::vector<int> l = grid_->FullDimensions();
-    GaugeLinkField     one(grid);
+
-    const unsigned int nd    = grid->_ndimension;
+    spNrm = zero;
-    std::vector<int>   &l    = grid->_fdimensions;
+    for(int mu = 0; mu < grid_->Nd() - 1; mu++)
-    std::vector<int>   x0(nd,0);
+    {
-    TComplex           Tone  = Complex(1.0,0.0);
+      LatticeCoordinate(coor, mu);
-    TComplex           Tzero = Complex(G0_,0.0);
+      coor  = where(coor < Integer(l[mu]/2), coor, coor - Integer(l[mu]));
-    FFT                fft(grid);
+      spNrm = spNrm + coor*coor;
    }
  }
  template<class GImpl>
  void Photon<GImpl>::makeKHat(std::vector<GaugeLinkField> &khat)
  {
    const unsigned int nd = grid_->Nd();
    std::vector<int>   l  = grid_->FullDimensions();
    Complex            ci(0., 1.);
    khat.resize(nd, grid_);
    for (unsigned int mu = 0; mu < nd; ++mu)
    {
      Real piL = M_PI/l[mu];
      LatticeCoordinate(khat[mu], mu);
      khat[mu] = exp(piL*ci*khat[mu])*2.*sin(piL*khat[mu]);
    }
  }
  template<class GImpl>
  void Photon<GImpl>::makeInvKHatSquared(GaugeLinkField &out)
  {
    std::vector<GaugeLinkField> khat;
    GaugeLinkField              lone(grid_);
    const unsigned int          nd = grid_->Nd();
    std::vector<int>            zm(nd, 0);
    ScalarSite                  one = ScalarComplex(1., 0.), z = ScalarComplex(0., 0.);
    one = Complex(1.0,0.0);
    out = zero;
    makeKHat(khat);
    for(int mu = 0; mu < nd; mu++)
    {
-      LatticeCoordinate(xmu,mu);
+      out = out + khat[mu]*conjugate(khat[mu]);
      Real lo2 = l[mu]/2.0;
      xmu = where(xmu < lo2, xmu, xmu-double(l[mu]));
      out = out + toComplex(4*M_PI*M_PI*xmu*xmu);
    }
-    pokeSite(Tone, out, x0);
+    lone = ScalarComplex(1., 0.);
-    out = one/out;
+    pokeSite(one, out, zm);
-    pokeSite(Tzero, out, x0);
+    out = lone/out;
-    fft.FFT_all_dim(out, out, FFT::forward);
+    pokeSite(z, out, zm);
  }
-  template<class Gimpl>
+  template<class GImpl>
-  void Photon<Gimpl>::invKHatSquared(GaugeLinkField &out)
+  void Photon<GImpl>::zmSub(GaugeLinkField &out)
  {
    GridBase           *grid = out._grid;
    GaugeLinkField     kmu(grid), one(grid);
    const unsigned int nd    = grid->_ndimension;
    std::vector<int>   &l    = grid->_fdimensions;
    std::vector<int>   zm(nd,0);
    TComplex           Tone = Complex(1.0,0.0);
    TComplex           Tzero= Complex(0.0,0.0);
    one = Complex(1.0,0.0);
    out = zero;
    for(int mu = 0; mu < nd; mu++)
    {
      Real twoPiL = M_PI*2./l[mu];
      LatticeCoordinate(kmu,mu);
      kmu = 2.*sin(.5*twoPiL*kmu);
      out = out + kmu*kmu;
    }
    pokeSite(Tone, out, zm);
    out = one/out;
    pokeSite(Tzero, out, zm);
  }
  template<class Gimpl>
  void Photon<Gimpl>::zmSub(GaugeLinkField &out)
  {
    GridBase           *grid = out._grid;
    const unsigned int nd    = grid->_ndimension;
    std::vector<int>   &l    = grid->_fdimensions;
    switch (zmScheme_)
    {
      case ZmScheme::qedTL:
      {
-        std::vector<int> zm(nd,0);
+        std::vector<int> zm(grid_->Nd(), 0);
-        TComplex         Tzero = Complex(0.0,0.0);
+        ScalarSite       z = ScalarComplex(0., 0.);
        pokeSite(Tzero, out, zm);
        pokeSite(z, out, zm);
        break;
      }
      case ZmScheme::qedL:
      {
-        LatticeInteger spNrm(grid), coor(grid);
+        LatticeInteger spNrm(grid_);
        GaugeLinkField z(grid);
        spNrm = zero;
        for(int d = 0; d < grid->_ndimension - 1; d++)
        {
          LatticeCoordinate(coor,d);
          coor = where(coor < Integer(l[d]/2), coor, coor-Integer(l[d]));
          spNrm = spNrm + coor*coor;
        }
        out = where(spNrm == Integer(0), 0.*out, out);
-        // IR improvement
+        makeSpatialNorm(spNrm);
        out = where(spNrm == Integer(0), 0.*out, out);
        for(int i = 0; i < improvement_.size(); i++)
        {
-          Real f = sqrt(improvement_[i]+1);
+          Real f = sqrt(improvement_[i] + 1);
-          out = where(spNrm == Integer(i+1), f*out, out);
+          out = where(spNrm == Integer(i + 1), f*out, out);
        }
        break;
      }
      default:
        assert(0);
        break;
    }
  }
-  template<class Gimpl>
+  template<class GImpl>
-  void Photon<Gimpl>::MomentumSpacePropagator(const GaugeField &in,
+  void Photon<GImpl>::transverseProjectSpatial(GaugeField &out)
                                               GaugeField &out)
  {
-  GridBase           *grid = out._grid;
+    const unsigned int          nd = grid_->Nd();
-    LatticeComplex     momProp(grid);
+    GaugeLinkField              invKHat(grid_), cst(grid_), spdiv(grid_);
-    
+    LatticeInteger              spNrm(grid_);
-    switch (zmScheme_)
+    std::vector<GaugeLinkField> khat, a(nd, grid_), aProj(nd, grid_);
    invKHat = zero;
    makeSpatialNorm(spNrm);
    makeKHat(khat);
    for (unsigned int mu = 0; mu < nd; ++mu)
    {
-      case ZmScheme::qedTL:
+      a[mu] = peekLorentz(out, mu);
-      case ZmScheme::qedL:
+      if (mu < nd - 1)
      {
-        invKHatSquared(momProp);
+        invKHat += khat[mu]*conjugate(khat[mu]);
        zmSub(momProp);
        break;
      }
-      case ZmScheme::qedInf:
+    }
-      {
+    cst     = ScalarComplex(1., 0.);
-        infVolPropagator(momProp);
+    invKHat = where(spNrm == Integer(0), cst, invKHat);
    invKHat = cst/invKHat;
    cst     = zero;
    invKHat = where(spNrm == Integer(0), cst, invKHat);
    spdiv   = zero;
    for (unsigned int nu = 0; nu < nd - 1; ++nu)
    {
      spdiv += conjugate(khat[nu])*a[nu];
    }
    spdiv *= invKHat;
    for (unsigned int mu = 0; mu < nd; ++mu)
    {
      aProj[mu] = a[mu] - khat[mu]*spdiv;
      pokeLorentz(out, aProj[mu], mu);
    }
  }
  template<class GImpl>
  void Photon<GImpl>::gaugeTransform(GaugeField &out)
  {
    switch (gauge_)
    {
      case Gauge::feynman:
        break;
      case Gauge::coulomb:
        transverseProjectSpatial(out);
        break;
      case Gauge::landau:
        assert(0);
        break;
      }
      default:
        assert(0);
        break;
    }
  }
  template<class GImpl>
  void Photon<GImpl>::MomentumSpacePropagator(const GaugeField &in,
                                              GaugeField &out)
  {
    LatticeComplex momProp(grid_);
    makeInvKHatSquared(momProp);
    zmSub(momProp);
    out = in*momProp;
  }
-  template<class Gimpl>
+  template<class GImpl>
-  void Photon<Gimpl>::StochasticWeight(GaugeLinkField &weight)
+  void Photon<GImpl>::StochasticWeight(GaugeLinkField &weight)
  {
-    auto               *grid     = dynamic_cast<GridCartesian *>(weight._grid);
+    const unsigned int nd  = grid_->Nd();
-    const unsigned int nd        = grid->_ndimension;
+    std::vector<int>   l   = grid_->FullDimensions();
-    std::vector<int>   latt_size = grid->_fdimensions;
+    Integer            vol = 1;
-    
+
-    switch (zmScheme_)
+    for(unsigned int mu = 0; mu < nd; mu++)
    {
-      case ZmScheme::qedTL:
+      vol = vol*l[mu];
      case ZmScheme::qedL:
      {
        Integer vol = 1;
        for(int d = 0; d < nd; d++)
        {
          vol = vol * latt_size[d];
        }
        invKHatSquared(weight);
        weight = sqrt(vol)*sqrt(weight);
        zmSub(weight);
        break;
      }
      case ZmScheme::qedInf:
      {
        infVolPropagator(weight);
        weight = sqrt(real(weight));
        break;
      }
      default:
        break;
    }
    makeInvKHatSquared(weight);
    weight = sqrt(vol)*sqrt(weight);
    zmSub(weight);
  }
-  template<class Gimpl>
+  template<class GImpl>
-  void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng)
+  void Photon<GImpl>::StochasticField(GaugeField &out, GridParallelRNG &rng)
  {
-    auto           *grid = dynamic_cast<GridCartesian *>(out._grid);
+    GaugeLinkField weight(grid_);
    GaugeLinkField weight(grid);
    StochasticWeight(weight);
    StochasticField(out, rng, weight);
  }
-  template<class Gimpl>
+  template<class GImpl>
-  void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng,
+  void Photon<GImpl>::StochasticField(GaugeField &out, GridParallelRNG &rng,
                                      const GaugeLinkField &weight)
  {
-    auto               *grid = dynamic_cast<GridCartesian *>(out._grid);
+    const unsigned int nd = grid_->Nd();
-    const unsigned int nd = grid->_ndimension;
+    GaugeLinkField     r(grid_);
-    GaugeLinkField     r(grid);
+    GaugeField         aTilde(grid_);
-    GaugeField         aTilde(grid);
+    FFT                fft(dynamic_cast<GridCartesian *>(grid_));
    FFT                fft(grid);
-    switch (zmScheme_)
+    for(unsigned int mu = 0; mu < nd; mu++)
    {
-      case ZmScheme::qedTL:
+      gaussian(rng, r);
-      case ZmScheme::qedL:
+      r = weight*r;
-      {
+      pokeLorentz(aTilde, r, mu);
        for(int mu = 0; mu < nd; mu++)
        {
          gaussian(rng, r);
          r = weight*r;
          pokeLorentz(aTilde, r, mu);
        }
        break;
      }
      case ZmScheme::qedInf:
      {
        Complex                    shift(1., 1.); // This needs to be a GaugeLink element?
        for(int mu = 0; mu < nd; mu++)
        {
          bernoulli(rng, r);
          r = weight*(2.*r - shift);
          pokeLorentz(aTilde, r, mu);
        }
        break;
      }
      default:
        break;
    }
-
+    gaugeTransform(aTilde);
    fft.FFT_all_dim(out, aTilde, FFT::backward);
    out = real(out);
  }
-  template<class Gimpl>
+  template<class GImpl>
-  void Photon<Gimpl>::UnitField(GaugeField &out)
+  void Photon<GImpl>::UnitField(GaugeField &out)
  {
-    auto               *grid = dynamic_cast<GridCartesian *>(out._grid);
+    const unsigned int nd = grid_->Nd();
-    const unsigned int nd = grid->_ndimension;
+    GaugeLinkField     r(grid_);
    GaugeLinkField     r(grid);
-    r = Complex(1.0,0.0);
+    r = ScalarComplex(1., 0.);
-
+    for(unsigned int mu = 0; mu < nd; mu++)
    for(int mu = 0; mu < nd; mu++)
    {
      pokeLorentz(out, r, mu);
    }
    out = real(out);
  }
 //  template<class Gimpl>
 //  void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out,
 //                                                            const GaugeField &in)
 //  {
 //    
 //    FeynmanGaugeMomentumSpacePropagator_TL(out,in);
 //    
 //    GridBase *grid = out._grid;
 //    LatticeInteger     coor(grid);
 //    GaugeField zz(grid); zz=zero;
 //    
 //    // xyzt
 //    for(int d = 0; d < grid->_ndimension-1;d++){
 //      LatticeCoordinate(coor,d);
 //      out = where(coor==Integer(0),zz,out);
 //    }
 //  }
 //  
 //  template<class Gimpl>
 //  void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out,
 //                                                             const GaugeField &in)
 //  {
 //    
 //    // what type LatticeComplex
 //    GridBase *grid = out._grid;
 //    int nd = grid->_ndimension;
 //    
 //    typedef typename GaugeField::vector_type vector_type;
 //    typedef typename GaugeField::scalar_type ScalComplex;
 //    typedef Lattice<iSinglet<vector_type> > LatComplex;
 //    
 //    std::vector<int> latt_size   = grid->_fdimensions;
 //    
 //    LatComplex denom(grid); denom= zero;
 //    LatComplex   one(grid); one = ScalComplex(1.0,0.0);
 //    LatComplex   kmu(grid);
 //    
 //    ScalComplex ci(0.0,1.0);
 //    // momphase = n * 2pi / L
 //    for(int mu=0;mu<Nd;mu++) {
 //      
 //      LatticeCoordinate(kmu,mu);
 //      
 //      RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 //      
 //      kmu = TwoPiL * kmu ;
 //      
 //      denom = denom + 4.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
 //    }
 //    std::vector<int> zero_mode(nd,0);
 //    TComplexD Tone = ComplexD(1.0,0.0);
 //    TComplexD Tzero= ComplexD(0.0,0.0);
 //    
 //    pokeSite(Tone,denom,zero_mode);
 //    
 //    denom= one/denom;
 //    
 //    pokeSite(Tzero,denom,zero_mode);
 //    
 //    out = zero;
 //    out = in*denom;
 //  };
 }}
 #endif
--- a/Grid/qcd/utils/LinalgUtils.h
+++ b/Grid/qcd/utils/LinalgUtils.h
@ -173,6 +173,39 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
    }
  }
 }
 }
-}}
+// I explicitly need these outside the QCD namespace
 template<typename vobj>
 void G5C(Lattice<vobj> &z, const Lattice<vobj> &x)
 {
  GridBase *grid = x._grid;
  z.checkerboard = x.checkerboard;
  conformable(x, z);
  QCD::Gamma G5(QCD::Gamma::Algebra::Gamma5);
  z = G5 * x;
 }
 template<class CComplex, int nbasis>
 void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
 {
  GridBase *grid = x._grid;
  z.checkerboard = x.checkerboard;
  conformable(x, z);
  static_assert(nbasis % 2 == 0, "");
  int nb = nbasis / 2;
  parallel_for(int ss = 0; ss < grid->oSites(); ss++) {
    for(int n = 0; n < nb; ++n) {
      z._odata[ss](n) = x._odata[ss](n);
    }
    for(int n = nb; n < nbasis; ++n) {
      z._odata[ss](n) = -x._odata[ss](n);
    }
  }
 }
 }
 #endif 
--- a/Grid/qcd/utils/WilsonLoops.h
+++ b/Grid/qcd/utils/WilsonLoops.h
@ -6,10 +6,12 @@
    Copyright (C) 2015
-Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
+    Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: neo <cossu@post.kek.jp>
+    Author: neo <cossu@post.kek.jp>
-Author: paboyle <paboyle@ph.ed.ac.uk>
+    Author: paboyle <paboyle@ph.ed.ac.uk>
    Author: James Harrison <J.Harrison@soton.ac.uk>
    Author: Antonin Portelli <antonin.portelli@me.com>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@ -645,6 +647,184 @@ static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) {
      }
    }
  }
  //////////////////////////////////////////////////
  // Wilson loop of size (R1, R2), oriented in mu,nu plane
  //////////////////////////////////////////////////
  static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U,
                           const int Rmu, const int Rnu,
                           const int mu, const int nu) {
    wl = U[nu];
    for(int i = 0; i < Rnu-1; i++){
      wl = Gimpl::CovShiftForward(U[nu], nu, wl);
    }
    for(int i = 0; i < Rmu; i++){
      wl = Gimpl::CovShiftForward(U[mu], mu, wl);
    }
    for(int i = 0; i < Rnu; i++){
      wl = Gimpl::CovShiftBackward(U[nu], nu, wl);
    }
    for(int i = 0; i < Rmu; i++){
      wl = Gimpl::CovShiftBackward(U[mu], mu, wl);
    }
  }
  //////////////////////////////////////////////////
  // trace of Wilson Loop oriented in mu,nu plane
  //////////////////////////////////////////////////
  static void traceWilsonLoop(LatticeComplex &wl,
                                const std::vector<GaugeMat> &U,
                                const int Rmu, const int Rnu,
                                const int mu, const int nu) {
    GaugeMat sp(U[0]._grid);
    wilsonLoop(sp, U, Rmu, Rnu, mu, nu);
    wl = trace(sp);
  }
  //////////////////////////////////////////////////
  // sum over all planes of Wilson loop
  //////////////////////////////////////////////////
  static void siteWilsonLoop(LatticeComplex &Wl,
                            const std::vector<GaugeMat> &U,
                            const int R1, const int R2) {
    LatticeComplex siteWl(U[0]._grid);
    Wl = zero;
    for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) {
      for (int nu = 0; nu < mu; nu++) {
        traceWilsonLoop(siteWl, U, R1, R2, mu, nu);
        Wl = Wl + siteWl;
        traceWilsonLoop(siteWl, U, R2, R1, mu, nu);
        Wl = Wl + siteWl;
      }
    }
  }
  //////////////////////////////////////////////////
  // sum over planes of Wilson loop with length R1
  // in the time direction
  //////////////////////////////////////////////////
  static void siteTimelikeWilsonLoop(LatticeComplex &Wl,
                            const std::vector<GaugeMat> &U,
                            const int R1, const int R2) {
    LatticeComplex siteWl(U[0]._grid);
    int ndim = U[0]._grid->_ndimension;
    Wl = zero;
    for (int nu = 0; nu < ndim - 1; nu++) {
      traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu);
      Wl = Wl + siteWl;
    }
  }
  //////////////////////////////////////////////////
  // sum Wilson loop over all planes orthogonal to the time direction
  //////////////////////////////////////////////////
  static void siteSpatialWilsonLoop(LatticeComplex &Wl,
                            const std::vector<GaugeMat> &U,
                            const int R1, const int R2) {
    LatticeComplex siteWl(U[0]._grid);
    Wl = zero;
    for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) {
      for (int nu = 0; nu < mu; nu++) {
        traceWilsonLoop(siteWl, U, R1, R2, mu, nu);
        Wl = Wl + siteWl;
        traceWilsonLoop(siteWl, U, R2, R1, mu, nu);
        Wl = Wl + siteWl;
      }
    }
  }
  //////////////////////////////////////////////////
  // sum over all x,y,z,t and over all planes of Wilson loop
  //////////////////////////////////////////////////
  static Real sumWilsonLoop(const GaugeLorentz &Umu,
                            const int R1, const int R2) {
    std::vector<GaugeMat> U(4, Umu._grid);
    for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
      U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
    }
    LatticeComplex Wl(Umu._grid);
    siteWilsonLoop(Wl, U, R1, R2);
    TComplex Tp = sum(Wl);
    Complex p = TensorRemove(Tp);
    return p.real();
  }
  //////////////////////////////////////////////////
  // sum over all x,y,z,t and over all planes of timelike Wilson loop
  //////////////////////////////////////////////////
  static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu,
                            const int R1, const int R2) {
    std::vector<GaugeMat> U(4, Umu._grid);
    for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
      U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
    }
    LatticeComplex Wl(Umu._grid);
    siteTimelikeWilsonLoop(Wl, U, R1, R2);
    TComplex Tp = sum(Wl);
    Complex p = TensorRemove(Tp);
    return p.real();
  }
  //////////////////////////////////////////////////
  // sum over all x,y,z,t and over all planes of spatial Wilson loop
  //////////////////////////////////////////////////
  static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu,
                            const int R1, const int R2) {
    std::vector<GaugeMat> U(4, Umu._grid);
    for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
      U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
    }
    LatticeComplex Wl(Umu._grid);
    siteSpatialWilsonLoop(Wl, U, R1, R2);
    TComplex Tp = sum(Wl);
    Complex p = TensorRemove(Tp);
    return p.real();
  }
  //////////////////////////////////////////////////
  // average over all x,y,z,t and over all planes of Wilson loop
  //////////////////////////////////////////////////
  static Real avgWilsonLoop(const GaugeLorentz &Umu,
                            const int R1, const int R2) {
    int ndim = Umu._grid->_ndimension;
    Real sumWl = sumWilsonLoop(Umu, R1, R2);
    Real vol = Umu._grid->gSites();
    Real faces = 1.0 * ndim * (ndim - 1);
    return sumWl / vol / faces / Nc; // Nc dependent... FIXME
  }
  //////////////////////////////////////////////////
  // average over all x,y,z,t and over all planes of timelike Wilson loop
  //////////////////////////////////////////////////
  static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu,
                            const int R1, const int R2) {
    int ndim = Umu._grid->_ndimension;
    Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2);
    Real vol = Umu._grid->gSites();
    Real faces = 1.0 * (ndim - 1);
    return sumWl / vol / faces / Nc; // Nc dependent... FIXME
  }
  //////////////////////////////////////////////////
  // average over all x,y,z,t and over all planes of spatial Wilson loop
  //////////////////////////////////////////////////
  static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu,
                            const int R1, const int R2) {
    int ndim = Umu._grid->_ndimension;
    Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2);
    Real vol = Umu._grid->gSites();
    Real faces = 1.0 * (ndim - 1) * (ndim - 2);
    return sumWl / vol / faces / Nc; // Nc dependent... FIXME
  }
 };
 typedef WilsonLoops<PeriodicGimplR> ColourWilsonLoops;
--- a/Hadrons/A2AMatrix.hpp
+++ b/Hadrons/A2AMatrix.hpp
@ -62,6 +62,9 @@ using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>;
 template <typename T>
 using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>;
 template <typename T>
 using A2AMatrixMap = Eigen::Map<A2AMatrix<T>>;
 template <typename T>
 using A2AMatrixTr = Eigen::Matrix<T, -1, -1, Eigen::ColMajor>;
@ -108,7 +111,7 @@ public:
    void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str,
                   const unsigned int i, const unsigned int j);
    template <template <class> class Vec, typename VecT>
-    void load(Vec<VecT> &v, double *tRead = nullptr);
+    void load(Vec<VecT> &v, double *tRead = nullptr, const bool useCache = true);
 private:
    std::string  filename_{""}, dataname_{""};
    unsigned int nt_{0}, ni_{0}, nj_{0};
@ -495,7 +498,7 @@ void A2AMatrixIo<T>::saveBlock(const A2AMatrixSet<T> &m,
 template <typename T>
 template <template <class> class Vec, typename VecT>
-void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead)
+void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead, const bool useCache)
 {
 #ifdef HAVE_HDF5
    Hdf5Reader           reader(filename_);
@ -532,36 +535,55 @@ void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead)
        nj_ = hdim[2];
    }
-    A2AMatrix<T>         buf(ni_, nj_);
+    if (useCache)
    std::vector<hsize_t> count    = {1, static_cast<hsize_t>(ni_),
                                     static_cast<hsize_t>(nj_)},
                         stride   = {1, 1, 1},
                         block    = {1, 1, 1},
                         memCount = {static_cast<hsize_t>(ni_),
                                     static_cast<hsize_t>(nj_)};
    H5NS::DataSpace      memspace(memCount.size(), memCount.data());
    std::cout << "Loading timeslice";
    std::cout.flush();
    *tRead = 0.;
    for (unsigned int tp1 = nt_; tp1 > 0; --tp1)
    {
-        unsigned int         t      = tp1 - 1;
+        std::vector<T> buf(nt_*ni_*nj_);
-        std::vector<hsize_t> offset = {static_cast<hsize_t>(t), 0, 0};
+        T              *pt;
-        
+
-        if (t % 10 == 0)
+        dataset.read(buf.data(), datatype);
        pt = buf.data();
        for (unsigned int t = 0; t < nt_; ++t)
        {
-            std::cout << " " << t;
+            A2AMatrixMap<T> bufMap(pt, ni_, nj_);
-            std::cout.flush();
+
            v[t]  = bufMap.template cast<VecT>();
            pt   += ni_*nj_;
        }
        dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
                                  stride.data(), block.data());
        if (tRead) *tRead -= usecond();    
        dataset.read(buf.data(), datatype, memspace, dataspace);
        if (tRead) *tRead += usecond();
        v[t] = buf.template cast<VecT>();
    }
-    std::cout << std::endl;
+    // if useCache = false, do I/O timeslice per timeslice (much slower)
    else
    {
        A2AMatrix<T>         buf(ni_, nj_);
        std::vector<hsize_t> count    = {1, static_cast<hsize_t>(ni_),
                                        static_cast<hsize_t>(nj_)},
                             stride   = {1, 1, 1},
                             block    = {1, 1, 1},
                             memCount = {static_cast<hsize_t>(ni_),
                                         static_cast<hsize_t>(nj_)};
        H5NS::DataSpace      memspace(memCount.size(), memCount.data());
        std::cout << "Loading timeslice";
        std::cout.flush();
        *tRead = 0.;
        for (unsigned int tp1 = nt_; tp1 > 0; --tp1)
        {
            unsigned int         t      = tp1 - 1;
            std::vector<hsize_t> offset = {static_cast<hsize_t>(t), 0, 0};
            if (t % 10 == 0)
            {
                std::cout << " " << t;
                std::cout.flush();
            }
            dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
                                      stride.data(), block.data());
            if (tRead) *tRead -= usecond();    
            dataset.read(buf.data(), datatype, memspace, dataspace);
            if (tRead) *tRead += usecond();
            v[t] = buf.template cast<VecT>();
        }
        std::cout << std::endl;
    }
 #else
    HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
 #endif
--- a/Hadrons/Application.hpp
+++ b/Hadrons/Application.hpp
@ -41,14 +41,6 @@ BEGIN_HADRONS_NAMESPACE
 class Application
 {
 public:
    class TrajRange: Serializable
    {
    public:
        GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
                                        unsigned int, start,
                                        unsigned int, end,
                                        unsigned int, step);
    };
    class GlobalPar: Serializable
    {
    public:
--- a/Hadrons/Global.hpp
+++ b/Hadrons/Global.hpp
@ -263,6 +263,33 @@ void tokenReplace(std::string &str, const std::string token,
    }
 }
 // trajectory range
 class TrajRange: Serializable
 {
 public:
    GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
                                    unsigned int, start,
                                    unsigned int, end,
                                    unsigned int, step,
                                    std::string,  exclude);
    inline std::vector<unsigned int> getTrajectoryList(void)
    {
        std::vector<unsigned int> excVec = strToVec<unsigned int>(exclude);
        std::vector<unsigned int> list;
        for (unsigned int t = start; t < end; t += step)
        {
            if (std::find(excVec.begin(), excVec.end(), t) == excVec.end())
            {
                list.push_back(t);
            }
        }
        return list;
    }
 };
 END_HADRONS_NAMESPACE
 #include <Hadrons/Exceptions.hpp>
--- a/Hadrons/Makefile.am
+++ b/Hadrons/Makefile.am
@ -34,4 +34,5 @@ nobase_libHadrons_a_HEADERS = \
 	Solver.hpp                \
 	TimerArray.hpp            \
 	VirtualMachine.hpp        \
 	Utilities/Contractor.hpp  \
 	$(modules_hpp)
--- a/Hadrons/Modules/MGauge/StochEm.cc
+++ b/Hadrons/Modules/MGauge/StochEm.cc
@ -70,7 +70,7 @@ void TStochEm::execute(void)
    LOG(Message) << "Generating stochastic EM potential..." << std::endl;
    std::vector<Real> improvements = strToVec<Real>(par().improvement);
-    PhotonR photon(par().gauge, par().zmScheme, improvements, par().G0_qedInf);
+    PhotonR photon(envGetGrid(EmField), par().gauge, par().zmScheme, improvements);
    auto    &a = envGet(EmField, getName());
    auto    &w = envGet(EmComp, "_" + getName() + "_weight");
--- a/Hadrons/Modules/MGauge/StochEm.hpp
+++ b/Hadrons/Modules/MGauge/StochEm.hpp
@ -47,8 +47,7 @@ public:
    GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar,
                                    PhotonR::Gauge,    gauge,
                                    PhotonR::ZmScheme, zmScheme,
-                                    std::string,       improvement,
+                                    std::string,       improvement);
                                    Real,              G0_qedInf);
 };
 class TStochEm: public Module<StochEmPar>
--- a/Hadrons/Modules/MGauge/UnitEm.cc
+++ b/Hadrons/Modules/MGauge/UnitEm.cc
@ -62,7 +62,7 @@ void TUnitEm::setup(void)
 // execution ///////////////////////////////////////////////////////////////////
 void TUnitEm::execute(void)
 {
-    PhotonR photon(0, 0); // Just chose arbitrary input values here
+    PhotonR photon(envGetGrid(EmField), 0, 0); // Just chose arbitrary input values here
    auto    &a = envGet(EmField, getName());
    LOG(Message) << "Generating unit EM potential..." << std::endl;
    photon.UnitField(a);
--- a/Hadrons/Utilities/Contractor.cc
+++ b/Hadrons/Utilities/Contractor.cc
@ -28,6 +28,25 @@ See the full license in the file "LICENSE" in the top level distribution directo
 #include <Hadrons/A2AMatrix.hpp>
 #include <Hadrons/DiskVector.hpp>
 #include <Hadrons/TimerArray.hpp>
 #include <Hadrons/Utilities/Contractor.hpp>
 #ifdef GRID_COMMS_MPI3
 #define GET_RANK(rank, nMpi) \
 MPI_Comm_size(MPI_COMM_WORLD, &(nMpi));\
 MPI_Comm_rank(MPI_COMM_WORLD, &(rank))
 #define BARRIER() MPI_Barrier(MPI_COMM_WORLD)
 #define GLOBAL_DSUM(x) MPI_Allreduce(MPI_IN_PLACE, &x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD)
 #define GLOBAL_DMAX(x) MPI_Allreduce(MPI_IN_PLACE, &x, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD)
 #define INIT() MPI_Init(NULL, NULL)
 #define FINALIZE() MPI_Finalize()
 #else
 #define GET_RANK(rank, nMpi) (nMpi) = 1; (rank) = 0
 #define BARRIER()
 #define GLOBAL_DSUM(x)
 #define GLOBAL_DMAX(x)
 #define INIT()
 #define FINALIZE()
 #endif
 using namespace Grid;
 using namespace QCD;
@ -35,58 +54,6 @@ using namespace Hadrons;
 #define TIME_MOD(t) (((t) + par.global.nt) % par.global.nt)
 namespace Contractor
 {
    class TrajRange: Serializable
    {
    public:
        GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
                                        unsigned int, start,
                                        unsigned int, end,
                                        unsigned int, step);
    };
    class GlobalPar: Serializable
    {
    public:
        GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
                                        TrajRange, trajCounter,
                                        unsigned int, nt,
                                        std::string, diskVectorDir,
                                        std::string, output);
    };
    class A2AMatrixPar: Serializable
    {
    public:
        GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMatrixPar,
                                        std::string, file,
                                        std::string, dataset,
                                        unsigned int, cacheSize,
                                        std::string, name);
    };
    class ProductPar: Serializable
    {
    public:
        GRID_SERIALIZABLE_CLASS_MEMBERS(ProductPar,
                                        std::string, terms,
                                        std::vector<std::string>, times,
                                        std::string, translations,
                                        bool, translationAverage);
    };
    class CorrelatorResult: Serializable
    {
    public:
        GRID_SERIALIZABLE_CLASS_MEMBERS(CorrelatorResult,
                                        std::vector<Contractor::A2AMatrixPar>,  a2aMatrix,
                                        ProductPar, contraction,
                                        std::vector<unsigned int>, times,
                                        std::vector<ComplexD>, correlator);
    };
 }
 struct ContractorPar
 {
    Contractor::GlobalPar                  global;
@ -143,6 +110,27 @@ void saveCorrelator(const Contractor::CorrelatorResult &result, const std::strin
    write(writer, fileStem, result);
 }
 void printPerf(const double bytes, const double usec)
 {
    double maxt;
    maxt = usec;
    GLOBAL_DMAX(maxt);
    std::cout << maxt/1.0e6 << " sec " << bytes/maxt*1.0e6/1024/1024/1024 << " GB/s";
 }
 void printPerf(const double bytes, const double busec, 
               const double flops, const double fusec)
 {
    double maxt;
    printPerf(bytes, busec);
    std::cout << " ";
    maxt = fusec;
    GLOBAL_DMAX(maxt);
    std::cout << flops/fusec/1.0e3 << " GFlop/s";
 }
 std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int nt)
 {
    std::regex               rex("([0-9]+)|(([0-9]+)\\.\\.([0-9]+))");
@ -184,59 +172,18 @@ std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int
    return tSet;
 }
 struct Sec
 {
    Sec(const double usec)
    {
        seconds = usec/1.0e6;
    }
    double seconds;
 };
 inline std::ostream & operator<< (std::ostream& s, const Sec &&sec)
 {
    s << std::setw(10) << sec.seconds << " sec";
    return s;
 }
 struct Flops
 {
    Flops(const double flops, const double fusec)
    {
        gFlopsPerSec = flops/fusec/1.0e3;
    }
    double gFlopsPerSec;
 };
 inline std::ostream & operator<< (std::ostream& s, const Flops &&f)
 {
    s << std::setw(10) << f.gFlopsPerSec << " GFlop/s";
    return s;
 }
 struct Bytes
 {
    Bytes(const double bytes, const double busec)
    {
        gBytesPerSec = bytes/busec*1.0e6/1024/1024/1024;
    }
    double gBytesPerSec;
 };
 inline std::ostream & operator<< (std::ostream& s, const Bytes &&b)
 {
    s << std::setw(10) << b.gBytesPerSec << " GB/s";
    return s;
 }
 int main(int argc, char* argv[])
 {
    // MPI init
    int nMpi, rank;
    INIT();
    GET_RANK(rank, nMpi);
    if (rank != 0)
    {
        std::cout.setstate(std::ios::badbit);
    }
    // parse command line
    std::string   parFilename;
@ -266,31 +213,68 @@ int main(int argc, char* argv[])
    for (auto &p: par.a2aMatrix)
    {
-        std::string dirName = par.global.diskVectorDir + "/" + p.name;
+        std::string dirName = par.global.diskVectorDir + "/" + p.name + "." + std::to_string(rank);
        a2aMat.emplace(p.name, EigenDiskVector<ComplexD>(dirName, par.global.nt, p.cacheSize));
    }
    // trajectory loop
-    for (unsigned int traj = par.global.trajCounter.start; 
+    std::vector<unsigned int> tList = par.global.trajCounter.getTrajectoryList();
-         traj < par.global.trajCounter.end; traj += par.global.trajCounter.step)
+    unsigned int              indi, inde, indPerRank;
    {
        std::cout << ":::::::: Trajectory " << traj << std::endl;
    indPerRank = tList.size()/nMpi;
    indi       = rank*indPerRank;
    BARRIER();
    for (unsigned int tInd = indi; tInd < indi + indPerRank; tInd++)
    {
        unsigned int traj;
        if (tInd < tList.size())
        {
            traj = tList[tInd];
        }
        else
        {
            traj = tList.back();
        }
        if (nMpi > 1)
        {
            if (rank == 0)
            {
                std::cout << ":::::::: Trajectories ";
                for (unsigned int r = 0; r < nMpi - 1; ++r)
                {
                    std::cout << tList[tInd + r*indPerRank] << " ";
                }
                if (tInd + (nMpi - 1)*indPerRank < tList.size())
                {
                    std::cout << tList[tInd + (nMpi - 1)*indPerRank];
                }
                std::cout << std::endl;
            }
        }
        else
        {
            std::cout << ":::::::: Trajectory " << traj << std::endl;
        }
        // load data
        for (auto &p: par.a2aMatrix)
        {
            std::string filename = p.file;
-            double      t, size;
+            double      t;
            tokenReplace(filename, "traj", traj);
-            std::cout << "======== Loading '" << filename << "'" << std::endl;
+            std::cout << "======== Loading '" << p.file << "'" << std::endl;
            BARRIER();
            A2AMatrixIo<HADRONS_A2AM_IO_TYPE> a2aIo(filename, p.dataset, par.global.nt);
            a2aIo.load(a2aMat.at(p.name), &t);
-            std::cout << "Read " << a2aIo.getSize() << " bytes in " << t/1.0e6 
+            GLOBAL_DMAX(t);
-                    << " sec, " << a2aIo.getSize()/t*1.0e6/1024/1024 << " MB/s" << std::endl;
+            std::cout << "Read "  << nMpi*a2aIo.getSize() << " bytes in " << t/1.0e6 
                      << " sec, " << nMpi*a2aIo.getSize()/t*1.0e6/1024/1024 
                      << " MB/s"  << std::endl;
        }
        // contract
@ -308,6 +292,7 @@ int main(int argc, char* argv[])
            double                                 fusec, busec, flops, bytes, tusec;
            Contractor::CorrelatorResult           result;             
            BARRIER();
            tAr.startTimer("Total");
            std::cout << "======== Contraction tr(";
            for (unsigned int g = 0; g < term.size(); ++g)
@ -358,9 +343,10 @@ int main(int argc, char* argv[])
                }
                tAr.stopTimer("Transpose caching");
            }
-            bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
+            bytes  = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols();
-            std::cout << Sec(tAr.getDTimer("Transpose caching")) << " " 
+            bytes *= sizeof(ComplexD)*nMpi;
-                      << Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
+            printPerf(bytes, tAr.getDTimer("Transpose caching"));
            std::cout << std::endl;
            for (unsigned int i = 0; i < timeSeq.size(); ++i)
            {
                unsigned int dti = 0;
@ -378,7 +364,7 @@ int main(int argc, char* argv[])
                            << " -- positions= " << t << ", dt= " << dt << std::endl;
                    if (term.size() > 2)
                    {
-                        std::cout << std::setw(8) << "products";
+                        std::cout << std::setw(10) << "products ";
                    }
                    flops  = 0.;
                    bytes  = 0.;
@ -405,11 +391,11 @@ int main(int argc, char* argv[])
                    }
                    if (term.size() > 2)
                    {
-                        std::cout << Sec(tAr.getDTimer("A*B total") - busec) << " "
+                        printPerf(bytes*nMpi, tAr.getDTimer("A*B total") - busec,
-                                << Flops(flops, tAr.getDTimer("A*B algebra") - fusec) << " " 
+                                  flops*nMpi, tAr.getDTimer("A*B algebra") - fusec);
-                                << Bytes(bytes, tAr.getDTimer("A*B total") - busec) << std::endl;
+                        std::cout << std::endl;
                    }
-                    std::cout << std::setw(8) << "traces";
+                    std::cout << std::setw(10) << "traces ";
                    flops  = 0.;
                    bytes  = 0.;
                    fusec  = tAr.getDTimer("tr(A*B)");
@ -423,9 +409,9 @@ int main(int argc, char* argv[])
                        bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD);
                    }
                    tAr.stopTimer("Linear algebra");
-                    std::cout << Sec(tAr.getDTimer("tr(A*B)") - busec) << " "
+                    printPerf(bytes*nMpi, tAr.getDTimer("tr(A*B)") - busec,
-                            << Flops(flops, tAr.getDTimer("tr(A*B)") - fusec) << " " 
+                              flops*nMpi, tAr.getDTimer("tr(A*B)") - fusec);
-                            << Bytes(bytes, tAr.getDTimer("tr(A*B)") - busec) << std::endl;
+                    std::cout << std::endl;
                    if (!p.translationAverage)
                    {
                        saveCorrelator(result, par.global.output, dt, traj);
@ -450,5 +436,7 @@ int main(int argc, char* argv[])
        }
    }
    FINALIZE();
    return EXIT_SUCCESS;
 }
--- a/Hadrons/Utilities/Contractor.hpp
+++ b/Hadrons/Utilities/Contractor.hpp
@ -5,7 +5,48 @@
 BEGIN_HADRONS_NAMESPACE
 namespace Contractor
 {   
    class GlobalPar: Serializable
    {
    public:
        GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
                                        TrajRange, trajCounter,
                                        unsigned int, nt,
                                        std::string, diskVectorDir,
                                        std::string, output);
    };
    class A2AMatrixPar: Serializable
    {
    public:
        GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMatrixPar,
                                        std::string, file,
                                        std::string, dataset,
                                        unsigned int, cacheSize,
                                        std::string, name);
    };
    class ProductPar: Serializable
    {
    public:
        GRID_SERIALIZABLE_CLASS_MEMBERS(ProductPar,
                                        std::string, terms,
                                        std::vector<std::string>, times,
                                        std::string, translations,
                                        bool, translationAverage);
    };
    class CorrelatorResult: Serializable
    {
    public:
        GRID_SERIALIZABLE_CLASS_MEMBERS(CorrelatorResult,
                                        std::vector<Contractor::A2AMatrixPar>,  a2aMatrix,
                                        ProductPar, contraction,
                                        std::vector<unsigned int>, times,
                                        std::vector<ComplexD>, correlator);
    };
 }
 END_HADRONS_NAMESPACE
--- a/Hadrons/Utilities/Makefile.am
+++ b/Hadrons/Utilities/Makefile.am
@ -7,7 +7,7 @@ HadronsFermionEP64To32_SOURCES  = EigenPackCast.cc
 HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField
 HadronsFermionEP64To32_LDADD    = ../libHadrons.a ../../Grid/libGrid.a
-HadronsContractor_SOURCES = Contractor.cc
+HadronsContractor_SOURCES = Contractor.cc Contractor.hpp
 HadronsContractor_LDADD   = ../libHadrons.a ../../Grid/libGrid.a
 HadronsContractorBenchmark_SOURCES = ContractorBenchmark.cc
--- a/tests/core/Test_qed.cc
+++ b/tests/core/Test_qed.cc
@ -0,0 +1,138 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid 
 Source file: tests/core/Test_qed.cc
 Copyright (C) 2015-2018
 Author: Antonin Portelli <antonin.portelli@me.com>
 Author: James Harrison <J.Harrison@soton.ac.uk>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution directory
 *************************************************************************************/
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace QCD;
 typedef PeriodicGaugeImpl<QedGImplR>  QedPeriodicGImplR;
 typedef PhotonR::GaugeField           EmField;
 typedef PhotonR::GaugeLinkField       EmComp;
 const int NCONFIGS = 20;
 const int NWILSON  = 10;
 int main(int argc, char *argv[])
 {
  // initialization
  Grid_init(&argc, &argv);
  std::cout << GridLogMessage << "Grid initialized" << std::endl;
  // QED stuff
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(4, vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian    grid(latt_size,simd_layout,mpi_layout);
  GridParallelRNG  pRNG(&grid);
  PhotonR          photon(&grid, PhotonR::Gauge::coulomb, PhotonR::ZmScheme::qedL);
  EmField          a(&grid);
  EmField          expA(&grid);
  Complex imag_unit(0, 1);
  Real wlA;
  std::vector<Real> logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0);
  pRNG.SeedFixedIntegers({1, 2, 3, 4});
  std::cout << GridLogMessage << "Wilson loop calculation beginning" << std::endl;
  for(int ic = 0; ic < NCONFIGS; ic++){
      std::cout << GridLogMessage << "Configuration " << ic <<std::endl;
      photon.StochasticField(a, pRNG);
      // Exponentiate photon field
      expA = exp(imag_unit*a);
      // Calculate zero-modes
      std::vector<EmField::vector_object::scalar_object> zm;
      std::cout << GridLogMessage << "Total zero-mode norm 2 " 
                << std::sqrt(norm2(sum(a))) << std::endl;
      std::cout << GridLogMessage << "Spatial zero-mode norm 2" << std::endl;
      sliceSum(a, zm, grid.Nd() - 1);
      for (unsigned int t = 0; t < latt_size.back(); ++t)
      {
        std::cout << GridLogMessage << "t = " << t << " " << std::sqrt(norm2(zm[t])) << std::endl;
      }
      // Calculate divergence
      EmComp diva(&grid), amu(&grid);
      diva = zero;
      for (unsigned int mu = 0; mu < grid.Nd(); ++mu)
      {
        amu   = peekLorentz(a, mu);
        diva += amu - Cshift(amu, mu, -1);
        if (mu == grid.Nd() - 2)
        {
          std::cout << GridLogMessage << "Spatial divergence norm 2 " << std::sqrt(norm2(diva)) << std::endl;
        }
      }
      std::cout << GridLogMessage << "Total divergence norm 2 " << std::sqrt(norm2(diva)) << std::endl;
      // Calculate Wilson loops
      for(int iw=1; iw<=NWILSON; iw++){
          wlA = WilsonLoops<QedPeriodicGImplR>::avgWilsonLoop(expA, iw, iw) * 3;
          logWlAvg[iw-1] -= 2*log(wlA);
          wlA = WilsonLoops<QedPeriodicGImplR>::avgTimelikeWilsonLoop(expA, iw, iw) * 3;
          logWlTime[iw-1] -= 2*log(wlA);
          wlA = WilsonLoops<QedPeriodicGImplR>::avgSpatialWilsonLoop(expA, iw, iw) * 3;
          logWlSpace[iw-1] -= 2*log(wlA);
      }
  }
  std::cout << GridLogMessage << "Wilson loop calculation completed" << std::endl;
  // Calculate Wilson loops
  // From A. Portelli's PhD thesis:
  // size  -2*log(W)
  // 1     0.500000000(1)
  // 2     1.369311535(1) 
  // 3     2.305193057(1) 
  // 4     3.261483854(1) 
  // 5     4.228829967(1) 
  // 6     5.203604529(1) 
  // 7     6.183728249(1) 
  // 8     7.167859805(1) 
  // 9     8.155091868(1) 
  // 10    9.144788116(1)
  for(int iw=1; iw<=10; iw++){
      std::cout << GridLogMessage << iw << 'x' << iw << " Wilson loop" << std::endl;
      std::cout << GridLogMessage << "-2*log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl;
      std::cout << GridLogMessage << "-2*log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl;
      std::cout << GridLogMessage << "-2*log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl;
  }
  // epilogue
  std::cout << GridLogMessage << "Grid is finalizing now" << std::endl;
  Grid_finalize();
  return EXIT_SUCCESS;
 }
--- a/tests/solver/Test_multigrid_common.h
+++ b/tests/solver/Test_multigrid_common.h
@ -0,0 +1,670 @@
 /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./tests/solver/Test_multigrid_common.h
    Copyright (C) 2015-2018
    Author: Daniel Richtmann <daniel.richtmann@ur.de>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
 /*  END LEGAL */
 #ifndef GRID_TEST_MULTIGRID_COMMON_H
 #define GRID_TEST_MULTIGRID_COMMON_H
 namespace Grid {
 // TODO: Can think about having one parameter struct per level and then a
 // vector of these structs. How well would that work together with the
 // serialization strategy of Grid?
 // clang-format off
 struct MultiGridParams : Serializable {
 public:
  GRID_SERIALIZABLE_CLASS_MEMBERS(MultiGridParams,
                                  int,                           nLevels,
                                  std::vector<std::vector<int>>, blockSizes,           // size == nLevels - 1
                                  std::vector<double>,           smootherTol,          // size == nLevels - 1
                                  std::vector<int>,              smootherMaxOuterIter, // size == nLevels - 1
                                  std::vector<int>,              smootherMaxInnerIter, // size == nLevels - 1
                                  bool,                          kCycle,
                                  std::vector<double>,           kCycleTol,            // size == nLevels - 1
                                  std::vector<int>,              kCycleMaxOuterIter,   // size == nLevels - 1
                                  std::vector<int>,              kCycleMaxInnerIter,   // size == nLevels - 1
                                  double,                        coarseSolverTol,
                                  int,                           coarseSolverMaxOuterIter,
                                  int,                           coarseSolverMaxInnerIter);
  // constructor with default values
  MultiGridParams(int                           _nLevels                  = 2,
                  std::vector<std::vector<int>> _blockSizes               = {{4, 4, 4, 4}},
                  std::vector<double>           _smootherTol              = {1e-14},
                  std::vector<int>              _smootherMaxOuterIter     = {4},
                  std::vector<int>              _smootherMaxInnerIter     = {4},
                  bool                          _kCycle                   = true,
                  std::vector<double>           _kCycleTol                = {1e-1},
                  std::vector<int>              _kCycleMaxOuterIter       = {2},
                  std::vector<int>              _kCycleMaxInnerIter       = {5},
                  double                        _coarseSolverTol          = 5e-2,
                  int                           _coarseSolverMaxOuterIter = 10,
                  int                           _coarseSolverMaxInnerIter = 500)
  : nLevels(_nLevels)
  , blockSizes(_blockSizes)
  , smootherTol(_smootherTol)
  , smootherMaxOuterIter(_smootherMaxOuterIter)
  , smootherMaxInnerIter(_smootherMaxInnerIter)
  , kCycle(_kCycle)
  , kCycleTol(_kCycleTol)
  , kCycleMaxOuterIter(_kCycleMaxOuterIter)
  , kCycleMaxInnerIter(_kCycleMaxInnerIter)
  , coarseSolverTol(_coarseSolverTol)
  , coarseSolverMaxOuterIter(_coarseSolverMaxOuterIter)
  , coarseSolverMaxInnerIter(_coarseSolverMaxInnerIter)
  {}
 };
 // clang-format on
 void checkParameterValidity(MultiGridParams const &params) {
  auto correctSize = params.nLevels - 1;
  assert(correctSize == params.blockSizes.size());
  assert(correctSize == params.smootherTol.size());
  assert(correctSize == params.smootherMaxOuterIter.size());
  assert(correctSize == params.smootherMaxInnerIter.size());
  assert(correctSize == params.kCycleTol.size());
  assert(correctSize == params.kCycleMaxOuterIter.size());
  assert(correctSize == params.kCycleMaxInnerIter.size());
 }
 struct LevelInfo {
 public:
  std::vector<std::vector<int>> Seeds;
  std::vector<GridCartesian *>  Grids;
  std::vector<GridParallelRNG>  PRNGs;
  LevelInfo(GridCartesian *FineGrid, MultiGridParams const &mgParams) {
    auto nCoarseLevels = mgParams.blockSizes.size();
    assert(nCoarseLevels == mgParams.nLevels - 1);
    // set up values for finest grid
    Grids.push_back(FineGrid);
    Seeds.push_back({1, 2, 3, 4});
    PRNGs.push_back(GridParallelRNG(Grids.back()));
    PRNGs.back().SeedFixedIntegers(Seeds.back());
    // set up values for coarser grids
    for(int level = 1; level < mgParams.nLevels; ++level) {
      auto Nd  = Grids[level - 1]->_ndimension;
      auto tmp = Grids[level - 1]->_fdimensions;
      assert(tmp.size() == Nd);
      Seeds.push_back(std::vector<int>(Nd));
      for(int d = 0; d < Nd; ++d) {
        tmp[d] /= mgParams.blockSizes[level - 1][d];
        Seeds[level][d] = (level)*Nd + d + 1;
      }
      Grids.push_back(QCD::SpaceTimeGrid::makeFourDimGrid(tmp, Grids[level - 1]->_simd_layout, GridDefaultMpi()));
      PRNGs.push_back(GridParallelRNG(Grids[level]));
      PRNGs[level].SeedFixedIntegers(Seeds[level]);
    }
    std::cout << GridLogMessage << "Constructed " << mgParams.nLevels << " levels" << std::endl;
    for(int level = 0; level < mgParams.nLevels; ++level) {
      std::cout << GridLogMessage << "level = " << level << ":" << std::endl;
      Grids[level]->show_decomposition();
    }
  }
 };
 template<class Field> class MultiGridPreconditionerBase : public LinearFunction<Field> {
 public:
  virtual ~MultiGridPreconditionerBase()               = default;
  virtual void setup()                                 = 0;
  virtual void operator()(Field const &in, Field &out) = 0;
  virtual void runChecks(RealD tolerance)              = 0;
  virtual void reportTimings()                         = 0;
 };
 template<class Fobj, class CComplex, int nBasis, int nCoarserLevels, class Matrix>
 class MultiGridPreconditioner : public MultiGridPreconditionerBase<Lattice<Fobj>> {
 public:
  /////////////////////////////////////////////
  // Type Definitions
  /////////////////////////////////////////////
  // clang-format off
  typedef Aggregation<Fobj, CComplex, nBasis>                                                                         Aggregates;
  typedef CoarsenedMatrix<Fobj, CComplex, nBasis>                                                                     CoarseDiracMatrix;
  typedef typename Aggregates::CoarseVector                                                                           CoarseVector;
  typedef typename Aggregates::siteVector                                                                             CoarseSiteVector;
  typedef Matrix                                                                                                      FineDiracMatrix;
  typedef typename Aggregates::FineField                                                                              FineVector;
  typedef MultiGridPreconditioner<CoarseSiteVector, iScalar<CComplex>, nBasis, nCoarserLevels - 1, CoarseDiracMatrix> NextPreconditionerLevel;
  // clang-format on
  /////////////////////////////////////////////
  // Member Data
  /////////////////////////////////////////////
  int _CurrentLevel;
  int _NextCoarserLevel;
  MultiGridParams &_MultiGridParams;
  LevelInfo &      _LevelInfo;
  FineDiracMatrix & _FineMatrix;
  FineDiracMatrix & _SmootherMatrix;
  Aggregates        _Aggregates;
  CoarseDiracMatrix _CoarseMatrix;
  std::unique_ptr<NextPreconditionerLevel> _NextPreconditionerLevel;
  GridStopWatch _SetupTotalTimer;
  GridStopWatch _SetupCreateSubspaceTimer;
  GridStopWatch _SetupProjectToChiralitiesTimer;
  GridStopWatch _SetupCoarsenOperatorTimer;
  GridStopWatch _SetupNextLevelTimer;
  GridStopWatch _SolveTotalTimer;
  GridStopWatch _SolveRestrictionTimer;
  GridStopWatch _SolveProlongationTimer;
  GridStopWatch _SolveSmootherTimer;
  GridStopWatch _SolveNextLevelTimer;
  /////////////////////////////////////////////
  // Member Functions
  /////////////////////////////////////////////
  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineDiracMatrix &FineMat, FineDiracMatrix &SmootherMat)
    : _CurrentLevel(mgParams.nLevels - (nCoarserLevels + 1)) // _Level = 0 corresponds to finest
    , _NextCoarserLevel(_CurrentLevel + 1)                   // incremented for instances on coarser levels
    , _MultiGridParams(mgParams)
    , _LevelInfo(LvlInfo)
    , _FineMatrix(FineMat)
    , _SmootherMatrix(SmootherMat)
    , _Aggregates(_LevelInfo.Grids[_NextCoarserLevel], _LevelInfo.Grids[_CurrentLevel], 0)
    , _CoarseMatrix(*_LevelInfo.Grids[_NextCoarserLevel]) {
    _NextPreconditionerLevel
      = std::unique_ptr<NextPreconditionerLevel>(new NextPreconditionerLevel(_MultiGridParams, _LevelInfo, _CoarseMatrix, _CoarseMatrix));
    resetTimers();
  }
  void setup() {
    _SetupTotalTimer.Start();
    static_assert((nBasis & 0x1) == 0, "MG Preconditioner only supports an even number of basis vectors");
    int nb = nBasis / 2;
    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
    _SetupCreateSubspaceTimer.Start();
    _Aggregates.CreateSubspace(_LevelInfo.PRNGs[_CurrentLevel], fineMdagMOp, nb);
    _SetupCreateSubspaceTimer.Stop();
    _SetupProjectToChiralitiesTimer.Start();
    FineVector tmp1(_Aggregates.subspace[0]._grid);
    FineVector tmp2(_Aggregates.subspace[0]._grid);
    for(int n = 0; n < nb; n++) {
      auto tmp1 = _Aggregates.subspace[n];
      G5C(tmp2, _Aggregates.subspace[n]);
      axpby(_Aggregates.subspace[n], 0.5, 0.5, tmp1, tmp2);
      axpby(_Aggregates.subspace[n + nb], 0.5, -0.5, tmp1, tmp2);
      std::cout << GridLogMG << " Level " << _CurrentLevel << ": Chirally doubled vector " << n << ". "
                << "norm2(vec[" << n << "]) = " << norm2(_Aggregates.subspace[n]) << ". "
                << "norm2(vec[" << n + nb << "]) = " << norm2(_Aggregates.subspace[n + nb]) << std::endl;
    }
    _SetupProjectToChiralitiesTimer.Stop();
    _SetupCoarsenOperatorTimer.Start();
    _CoarseMatrix.CoarsenOperator(_LevelInfo.Grids[_CurrentLevel], fineMdagMOp, _Aggregates);
    _SetupCoarsenOperatorTimer.Stop();
    _SetupNextLevelTimer.Start();
    _NextPreconditionerLevel->setup();
    _SetupNextLevelTimer.Stop();
    _SetupTotalTimer.Stop();
  }
  virtual void operator()(FineVector const &in, FineVector &out) {
    conformable(_LevelInfo.Grids[_CurrentLevel], in._grid);
    conformable(in, out);
    // TODO: implement a W-cycle
    if(_MultiGridParams.kCycle)
      kCycle(in, out);
    else
      vCycle(in, out);
  }
  void vCycle(FineVector const &in, FineVector &out) {
    _SolveTotalTimer.Start();
    RealD inputNorm = norm2(in);
    CoarseVector coarseSrc(_LevelInfo.Grids[_NextCoarserLevel]);
    CoarseVector coarseSol(_LevelInfo.Grids[_NextCoarserLevel]);
    coarseSol = zero;
    FineVector fineTmp(in._grid);
    auto maxSmootherIter = _MultiGridParams.smootherMaxOuterIter[_CurrentLevel] * _MultiGridParams.smootherMaxInnerIter[_CurrentLevel];
    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(_MultiGridParams.smootherTol[_CurrentLevel],
                                                              maxSmootherIter,
                                                              fineTrivialPreconditioner,
                                                              _MultiGridParams.smootherMaxInnerIter[_CurrentLevel],
                                                              false);
    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
    MdagMLinearOperator<FineDiracMatrix, FineVector> fineSmootherMdagMOp(_SmootherMatrix);
    _SolveRestrictionTimer.Start();
    _Aggregates.ProjectToSubspace(coarseSrc, in);
    _SolveRestrictionTimer.Stop();
    _SolveNextLevelTimer.Start();
    (*_NextPreconditionerLevel)(coarseSrc, coarseSol);
    _SolveNextLevelTimer.Stop();
    _SolveProlongationTimer.Start();
    _Aggregates.PromoteFromSubspace(coarseSol, out);
    _SolveProlongationTimer.Stop();
    fineMdagMOp.Op(out, fineTmp);
    fineTmp                                = in - fineTmp;
    auto r                                 = norm2(fineTmp);
    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
    _SolveSmootherTimer.Start();
    fineFGMRES(fineSmootherMdagMOp, in, out);
    _SolveSmootherTimer.Stop();
    fineMdagMOp.Op(out, fineTmp);
    fineTmp                        = in - fineTmp;
    r                              = norm2(fineTmp);
    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": V-cycle: Input norm = " << std::sqrt(inputNorm)
              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
              << std::endl;
    _SolveTotalTimer.Stop();
  }
  void kCycle(FineVector const &in, FineVector &out) {
    _SolveTotalTimer.Start();
    RealD inputNorm = norm2(in);
    CoarseVector coarseSrc(_LevelInfo.Grids[_NextCoarserLevel]);
    CoarseVector coarseSol(_LevelInfo.Grids[_NextCoarserLevel]);
    coarseSol = zero;
    FineVector fineTmp(in._grid);
    auto smootherMaxIter = _MultiGridParams.smootherMaxOuterIter[_CurrentLevel] * _MultiGridParams.smootherMaxInnerIter[_CurrentLevel];
    auto kCycleMaxIter   = _MultiGridParams.kCycleMaxOuterIter[_CurrentLevel] * _MultiGridParams.kCycleMaxInnerIter[_CurrentLevel];
    TrivialPrecon<FineVector>                        fineTrivialPreconditioner;
    FlexibleGeneralisedMinimalResidual<FineVector>   fineFGMRES(_MultiGridParams.smootherTol[_CurrentLevel],
                                                              smootherMaxIter,
                                                              fineTrivialPreconditioner,
                                                              _MultiGridParams.smootherMaxInnerIter[_CurrentLevel],
                                                              false);
    FlexibleGeneralisedMinimalResidual<CoarseVector> coarseFGMRES(_MultiGridParams.kCycleTol[_CurrentLevel],
                                                                  kCycleMaxIter,
                                                                  *_NextPreconditionerLevel,
                                                                  _MultiGridParams.kCycleMaxInnerIter[_CurrentLevel],
                                                                  false);
    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineMdagMOp(_FineMatrix);
    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineSmootherMdagMOp(_SmootherMatrix);
    MdagMLinearOperator<CoarseDiracMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
    _SolveRestrictionTimer.Start();
    _Aggregates.ProjectToSubspace(coarseSrc, in);
    _SolveRestrictionTimer.Stop();
    _SolveNextLevelTimer.Start();
    coarseFGMRES(coarseMdagMOp, coarseSrc, coarseSol);
    _SolveNextLevelTimer.Stop();
    _SolveProlongationTimer.Start();
    _Aggregates.PromoteFromSubspace(coarseSol, out);
    _SolveProlongationTimer.Stop();
    fineMdagMOp.Op(out, fineTmp);
    fineTmp                                = in - fineTmp;
    auto r                                 = norm2(fineTmp);
    auto residualAfterCoarseGridCorrection = std::sqrt(r / inputNorm);
    _SolveSmootherTimer.Start();
    fineFGMRES(fineSmootherMdagMOp, in, out);
    _SolveSmootherTimer.Stop();
    fineMdagMOp.Op(out, fineTmp);
    fineTmp                        = in - fineTmp;
    r                              = norm2(fineTmp);
    auto residualAfterPostSmoother = std::sqrt(r / inputNorm);
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": K-cycle: Input norm = " << std::sqrt(inputNorm)
              << " Coarse residual = " << residualAfterCoarseGridCorrection << " Post-Smoother residual = " << residualAfterPostSmoother
              << std::endl;
    _SolveTotalTimer.Stop();
  }
  void runChecks(RealD tolerance) {
    std::vector<FineVector>   fineTmps(7, _LevelInfo.Grids[_CurrentLevel]);
    std::vector<CoarseVector> coarseTmps(4, _LevelInfo.Grids[_NextCoarserLevel]);
    MdagMLinearOperator<FineDiracMatrix, FineVector>     fineMdagMOp(_FineMatrix);
    MdagMLinearOperator<CoarseDiracMatrix, CoarseVector> coarseMdagMOp(_CoarseMatrix);
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (M - (Mdiag + Σ_μ Mdir_μ)) * v" << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
    random(_LevelInfo.PRNGs[_CurrentLevel], fineTmps[0]);
    fineMdagMOp.Op(fineTmps[0], fineTmps[1]);     //     M * v
    fineMdagMOp.OpDiag(fineTmps[0], fineTmps[2]); // Mdiag * v
    fineTmps[4] = zero;
    for(int dir = 0; dir < 4; dir++) { //       Σ_μ Mdir_μ * v
      for(auto disp : {+1, -1}) {
        fineMdagMOp.OpDir(fineTmps[0], fineTmps[3], dir, disp);
        fineTmps[4] = fineTmps[4] + fineTmps[3];
      }
    }
    fineTmps[5] = fineTmps[2] + fineTmps[4]; // (Mdiag + Σ_μ Mdir_μ) * v
    fineTmps[6]    = fineTmps[1] - fineTmps[5];
    auto deviation = std::sqrt(norm2(fineTmps[6]) / norm2(fineTmps[1]));
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(M * v)                    = " << norm2(fineTmps[1]) << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(Mdiag * v)                = " << norm2(fineTmps[2]) << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(Σ_μ Mdir_μ * v)           = " << norm2(fineTmps[4]) << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2((Mdiag + Σ_μ Mdir_μ) * v) = " << norm2(fineTmps[5]) << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": relative deviation              = " << deviation;
    if(deviation > tolerance) {
      std::cout << " > " << tolerance << " -> check failed" << std::endl;
      abort();
    } else {
      std::cout << " < " << tolerance << " -> check passed" << std::endl;
    }
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (1 - P R) v" << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
    for(auto i = 0; i < _Aggregates.subspace.size(); ++i) {
      _Aggregates.ProjectToSubspace(coarseTmps[0], _Aggregates.subspace[i]); //   R v_i
      _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]);           // P R v_i
      fineTmps[1] = _Aggregates.subspace[i] - fineTmps[0]; // v_i - P R v_i
      deviation   = std::sqrt(norm2(fineTmps[1]) / norm2(_Aggregates.subspace[i]));
      std::cout << GridLogMG << " Level " << _CurrentLevel << ": Vector " << i << ": norm2(v_i) = " << norm2(_Aggregates.subspace[i])
                << " | norm2(R v_i) = " << norm2(coarseTmps[0]) << " | norm2(P R v_i) = " << norm2(fineTmps[0])
                << " | relative deviation = " << deviation;
      if(deviation > tolerance) {
        std::cout << " > " << tolerance << " -> check failed" << std::endl;
        abort();
      } else {
        std::cout << " < " << tolerance << " -> check passed" << std::endl;
      }
    }
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (1 - R P) v_c" << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //   P v_c
    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[0]);   // R P v_c
    coarseTmps[2] = coarseTmps[0] - coarseTmps[1]; // v_c - R P v_c
    deviation     = std::sqrt(norm2(coarseTmps[2]) / norm2(coarseTmps[0]));
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(v_c) = " << norm2(coarseTmps[0])
              << " | norm2(R P v_c) = " << norm2(coarseTmps[1]) << " | norm2(P v_c) = " << norm2(fineTmps[0])
              << " | relative deviation = " << deviation;
    if(deviation > tolerance) {
      std::cout << " > " << tolerance << " -> check failed" << std::endl;
      abort();
    } else {
      std::cout << " < " << tolerance << " -> check passed" << std::endl;
    }
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == (R D P - D_c) v_c" << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
    _Aggregates.PromoteFromSubspace(coarseTmps[0], fineTmps[0]); //     P v_c
    fineMdagMOp.Op(fineTmps[0], fineTmps[1]);                    //   D P v_c
    _Aggregates.ProjectToSubspace(coarseTmps[1], fineTmps[1]);   // R D P v_c
    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[2]); // D_c v_c
    coarseTmps[3] = coarseTmps[1] - coarseTmps[2]; // R D P v_c - D_c v_c
    deviation     = std::sqrt(norm2(coarseTmps[3]) / norm2(coarseTmps[1]));
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": norm2(R D P v_c) = " << norm2(coarseTmps[1])
              << " | norm2(D_c v_c) = " << norm2(coarseTmps[2]) << " | relative deviation = " << deviation;
    if(deviation > tolerance) {
      std::cout << " > " << tolerance << " -> check failed" << std::endl;
      abort();
    } else {
      std::cout << " < " << tolerance << " -> check passed" << std::endl;
    }
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": MG correctness check: 0 == |(Im(v_c^dag D_c^dag D_c v_c)|" << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": **************************************************" << std::endl;
    random(_LevelInfo.PRNGs[_NextCoarserLevel], coarseTmps[0]);
    coarseMdagMOp.Op(coarseTmps[0], coarseTmps[1]);    //         D_c v_c
    coarseMdagMOp.AdjOp(coarseTmps[1], coarseTmps[2]); // D_c^dag D_c v_c
    auto dot  = innerProduct(coarseTmps[0], coarseTmps[2]); //v_c^dag D_c^dag D_c v_c
    deviation = std::abs(imag(dot)) / std::abs(real(dot));
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Re(v_c^dag D_c^dag D_c v_c) = " << real(dot)
              << " | Im(v_c^dag D_c^dag D_c v_c) = " << imag(dot) << " | relative deviation = " << deviation;
    if(deviation > tolerance) {
      std::cout << " > " << tolerance << " -> check failed" << std::endl;
      abort();
    } else {
      std::cout << " < " << tolerance << " -> check passed" << std::endl;
    }
    _NextPreconditionerLevel->runChecks(tolerance);
  }
  void reportTimings() {
    // clang-format off
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Sum   total            " <<                _SetupTotalTimer.Elapsed() + _SolveTotalTimer.Elapsed() << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup total            " <<                _SetupTotalTimer.Elapsed() << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup create subspace  " <<       _SetupCreateSubspaceTimer.Elapsed() << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup project chiral   " << _SetupProjectToChiralitiesTimer.Elapsed() << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup coarsen operator " <<      _SetupCoarsenOperatorTimer.Elapsed() << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Setup next level       " <<            _SetupNextLevelTimer.Elapsed() << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total            " <<                _SolveTotalTimer.Elapsed() << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve restriction      " <<          _SolveRestrictionTimer.Elapsed() << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve prolongation     " <<         _SolveProlongationTimer.Elapsed() << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother         " <<             _SolveSmootherTimer.Elapsed() << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve next level       " <<            _SolveNextLevelTimer.Elapsed() << std::endl;
    // clang-format on
    _NextPreconditionerLevel->reportTimings();
  }
  void resetTimers() {
    _SetupTotalTimer.Reset();
    _SetupCreateSubspaceTimer.Reset();
    _SetupProjectToChiralitiesTimer.Reset();
    _SetupCoarsenOperatorTimer.Reset();
    _SetupNextLevelTimer.Reset();
    _SolveTotalTimer.Reset();
    _SolveRestrictionTimer.Reset();
    _SolveProlongationTimer.Reset();
    _SolveSmootherTimer.Reset();
    _SolveNextLevelTimer.Reset();
    _NextPreconditionerLevel->resetTimers();
  }
 };
 // Specialization for the coarsest level
 template<class Fobj, class CComplex, int nBasis, class Matrix>
 class MultiGridPreconditioner<Fobj, CComplex, nBasis, 0, Matrix> : public MultiGridPreconditionerBase<Lattice<Fobj>> {
 public:
  /////////////////////////////////////////////
  // Type Definitions
  /////////////////////////////////////////////
  typedef Matrix        FineDiracMatrix;
  typedef Lattice<Fobj> FineVector;
  /////////////////////////////////////////////
  // Member Data
  /////////////////////////////////////////////
  int _CurrentLevel;
  MultiGridParams &_MultiGridParams;
  LevelInfo &      _LevelInfo;
  FineDiracMatrix &_FineMatrix;
  FineDiracMatrix &_SmootherMatrix;
  GridStopWatch _SolveTotalTimer;
  GridStopWatch _SolveSmootherTimer;
  /////////////////////////////////////////////
  // Member Functions
  /////////////////////////////////////////////
  MultiGridPreconditioner(MultiGridParams &mgParams, LevelInfo &LvlInfo, FineDiracMatrix &FineMat, FineDiracMatrix &SmootherMat)
    : _CurrentLevel(mgParams.nLevels - (0 + 1))
    , _MultiGridParams(mgParams)
    , _LevelInfo(LvlInfo)
    , _FineMatrix(FineMat)
    , _SmootherMatrix(SmootherMat) {
    resetTimers();
  }
  void setup() {}
  virtual void operator()(FineVector const &in, FineVector &out) {
    _SolveTotalTimer.Start();
    conformable(_LevelInfo.Grids[_CurrentLevel], in._grid);
    conformable(in, out);
    auto coarseSolverMaxIter = _MultiGridParams.coarseSolverMaxOuterIter * _MultiGridParams.coarseSolverMaxInnerIter;
    // On the coarsest level we only have what I above call the fine level, no coarse one
    TrivialPrecon<FineVector>                      fineTrivialPreconditioner;
    FlexibleGeneralisedMinimalResidual<FineVector> fineFGMRES(
      _MultiGridParams.coarseSolverTol, coarseSolverMaxIter, fineTrivialPreconditioner, _MultiGridParams.coarseSolverMaxInnerIter, false);
    MdagMLinearOperator<FineDiracMatrix, FineVector> fineMdagMOp(_FineMatrix);
    _SolveSmootherTimer.Start();
    fineFGMRES(fineMdagMOp, in, out);
    _SolveSmootherTimer.Stop();
    _SolveTotalTimer.Stop();
  }
  void runChecks(RealD tolerance) {}
  void reportTimings() {
    // clang-format off
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve total            " <<    _SolveTotalTimer.Elapsed() << std::endl;
    std::cout << GridLogMG << " Level " << _CurrentLevel << ": Time elapsed: Solve smoother         " << _SolveSmootherTimer.Elapsed() << std::endl;
    // clang-format on
  }
  void resetTimers() {
    _SolveTotalTimer.Reset();
    _SolveSmootherTimer.Reset();
  }
 };
 template<class Fobj, class CComplex, int nBasis, int nLevels, class Matrix>
 using NLevelMGPreconditioner = MultiGridPreconditioner<Fobj, CComplex, nBasis, nLevels - 1, Matrix>;
 template<class Fobj, class CComplex, int nBasis, class Matrix>
 std::unique_ptr<MultiGridPreconditionerBase<Lattice<Fobj>>>
 createMGInstance(MultiGridParams &mgParams, LevelInfo &levelInfo, Matrix &FineMat, Matrix &SmootherMat) {
 #define CASE_FOR_N_LEVELS(nLevels)                                                                                     \
  case nLevels:                                                                                                        \
    return std::unique_ptr<NLevelMGPreconditioner<Fobj, CComplex, nBasis, nLevels, Matrix>>(                           \
      new NLevelMGPreconditioner<Fobj, CComplex, nBasis, nLevels, Matrix>(mgParams, levelInfo, FineMat, SmootherMat)); \
    break;
  switch(mgParams.nLevels) {
    CASE_FOR_N_LEVELS(2);
    CASE_FOR_N_LEVELS(3);
    CASE_FOR_N_LEVELS(4);
    default:
      std::cout << GridLogError << "We currently only support nLevels ∈ {2, 3, 4}" << std::endl;
      exit(EXIT_FAILURE);
      break;
  }
 #undef CASE_FOR_N_LEVELS
 }
 }
 #endif
--- a/tests/solver/Test_staggered_cagmres_unprec.cc
+++ b/tests/solver/Test_staggered_cagmres_unprec.cc
@ -0,0 +1,72 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_staggered_cagmres_unprec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
  typename ImprovedStaggeredFermionR::ImplParams params;
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  FermionField src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  FermionField result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass=0.5;
  RealD c1=9.0/8.0;
  RealD c2=-1.0/24.0;
  RealD u0=1.0;
  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
  CommunicationAvoidingGeneralisedMinimalResidual<FermionField> CAGMRES(1.0e-8, 10000, 25);
  CAGMRES(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_staggered_fcagmres_prec.cc
+++ b/tests/solver/Test_staggered_fcagmres_prec.cc
@ -0,0 +1,75 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_staggered_fcagmres_prec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
  typename ImprovedStaggeredFermionR::ImplParams params;
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  FermionField src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  FermionField result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass=0.5;
  RealD c1=9.0/8.0;
  RealD c2=-1.0/24.0;
  RealD u0=1.0;
  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
  TrivialPrecon<FermionField> simple;
  FlexibleCommunicationAvoidingGeneralisedMinimalResidual<FermionField> FCAGMRES(1.0e-8, 10000, simple, 25);
  FCAGMRES(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_staggered_fgmres_prec.cc
+++ b/tests/solver/Test_staggered_fgmres_prec.cc
@ -0,0 +1,75 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_staggered_fgmres_prec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
  typename ImprovedStaggeredFermionR::ImplParams params;
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  FermionField src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  FermionField result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass=0.5;
  RealD c1=9.0/8.0;
  RealD c2=-1.0/24.0;
  RealD u0=1.0;
  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
  TrivialPrecon<FermionField> simple;
  FlexibleGeneralisedMinimalResidual<FermionField> FGMRES(1.0e-8, 10000, simple, 25);
  FGMRES(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_staggered_gmres_unprec.cc
+++ b/tests/solver/Test_staggered_gmres_unprec.cc
@ -0,0 +1,72 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_staggered_gmres_unprec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
  typename ImprovedStaggeredFermionR::ImplParams params;
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  FermionField src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  FermionField result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass=0.5;
  RealD c1=9.0/8.0;
  RealD c2=-1.0/24.0;
  RealD u0=1.0;
  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
  GeneralisedMinimalResidual<FermionField> GMRES(1.0e-8, 10000, 25);
  GMRES(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_staggered_mr_unprec.cc
+++ b/tests/solver/Test_staggered_mr_unprec.cc
@ -0,0 +1,72 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_staggered_mr_unprec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
  typename ImprovedStaggeredFermionR::ImplParams params;
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  FermionField src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  FermionField result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass=0.5;
  RealD c1=9.0/8.0;
  RealD c2=-1.0/24.0;
  RealD u0=1.0;
  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
  MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
  MinimalResidual<FermionField> MR(1.0e-8,10000,0.8);
  MR(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_wilson_cagmres_unprec.cc
+++ b/tests/solver/Test_wilson_cagmres_unprec.cc
@ -0,0 +1,65 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_wilson_cagmres_unprec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  LatticeFermion src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  LatticeFermion result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass=0.5;
  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
  CommunicationAvoidingGeneralisedMinimalResidual<LatticeFermion> CAGMRES(1.0e-8, 10000, 25);
  CAGMRES(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_wilson_fcagmres_prec.cc
+++ b/tests/solver/Test_wilson_fcagmres_prec.cc
@ -0,0 +1,68 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_wilson_fcagmres_prec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  LatticeFermion src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  LatticeFermion result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass=0.5;
  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
  TrivialPrecon<LatticeFermion> simple;
  FlexibleCommunicationAvoidingGeneralisedMinimalResidual<LatticeFermion> FCAGMRES(1.0e-8, 10000, simple, 25);
  FCAGMRES(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_wilson_fgmres_prec.cc
+++ b/tests/solver/Test_wilson_fgmres_prec.cc
@ -0,0 +1,68 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_wilson_fgmres_prec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  LatticeFermion src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  LatticeFermion result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass=0.5;
  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
  TrivialPrecon<LatticeFermion> simple;
  FlexibleGeneralisedMinimalResidual<LatticeFermion> FGMRES(1.0e-8, 10000, simple, 25);
  FGMRES(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_wilson_gmres_unprec.cc
+++ b/tests/solver/Test_wilson_gmres_unprec.cc
@ -0,0 +1,65 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_wilson_gmres_unprec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  LatticeFermion src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  LatticeFermion result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass=0.5;
  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
  GeneralisedMinimalResidual<LatticeFermion> GMRES(1.0e-8, 10000, 25);
  GMRES(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_wilson_mg.cc
+++ b/tests/solver/Test_wilson_mg.cc
@ -0,0 +1,114 @@
 /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./tests/solver/Test_wilson_mg.cc
    Copyright (C) 2015-2018
    Author: Daniel Richtmann <daniel.richtmann@ur.de>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 #include <Test_multigrid_common.h>
 using namespace std;
 using namespace Grid;
 using namespace Grid::QCD;
 int main(int argc, char **argv) {
  Grid_init(&argc, &argv);
  GridCartesian *        FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
  GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
  std::vector<int> fSeeds({1, 2, 3, 4});
  GridParallelRNG  fPRNG(FGrid);
  fPRNG.SeedFixedIntegers(fSeeds);
  // clang-format off
  LatticeFermion    src(FGrid); gaussian(fPRNG, src);
  LatticeFermion result(FGrid); result = zero;
  LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
  // clang-format on
  RealD mass = -0.25;
  MultiGridParams mgParams;
  std::string     inputXml{"./mg_params.xml"};
  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
    assert(inputXml.length() != 0);
  }
  {
    XmlWriter writer("mg_params_template.xml");
    write(writer, "Params", mgParams);
    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
    XmlReader reader(inputXml);
    read(reader, "Params", mgParams);
    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
  }
  checkParameterValidity(mgParams);
  std::cout << mgParams << std::endl;
  LevelInfo levelInfo(FGrid, mgParams);
  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
  const int nbasis = 40;
  WilsonFermionR Dw(Umu, *FGrid, *FrbGrid, mass);
  MdagMLinearOperator<WilsonFermionR, LatticeFermion> MdagMOpDw(Dw);
  std::cout << GridLogMessage << "**************************************************" << std::endl;
  std::cout << GridLogMessage << "Testing Multigrid for Wilson" << std::endl;
  std::cout << GridLogMessage << "**************************************************" << std::endl;
  TrivialPrecon<LatticeFermion> TrivialPrecon;
  auto MGPreconDw = createMGInstance<vSpinColourVector, vTComplex, nbasis, WilsonFermionR>(mgParams, levelInfo, Dw, Dw);
  MGPreconDw->setup();
  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
    RealD toleranceForMGChecks = (getPrecision<LatticeFermion>::value == 1) ? 1e-6 : 1e-13;
    MGPreconDw->runChecks(toleranceForMGChecks);
  }
  std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDw;
  solversDw.emplace_back(new ConjugateGradient<LatticeFermion>(1.0e-12, 50000, false));
  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 100, false));
  solversDw.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, *MGPreconDw, 100, false));
  for(auto const &solver : solversDw) {
    std::cout << std::endl << "Starting with a new solver" << std::endl;
    result = zero;
    (*solver)(MdagMOpDw, src, result);
  }
  MGPreconDw->reportTimings();
  Grid_finalize();
 }
--- a/tests/solver/Test_wilson_mg_mp.cc
+++ b/tests/solver/Test_wilson_mg_mp.cc
@ -0,0 +1,166 @@
 /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./tests/solver/Test_wilson_mg_mp.cc
    Copyright (C) 2015-2018
    Author: Daniel Richtmann <daniel.richtmann@ur.de>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 #include <Test_multigrid_common.h>
 using namespace std;
 using namespace Grid;
 using namespace Grid::QCD;
 int main(int argc, char **argv) {
  Grid_init(&argc, &argv);
  // clang-format off
  GridCartesian         *FGrid_d   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi());
  GridCartesian         *FGrid_f   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
  GridRedBlackCartesian *FrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_d);
  GridRedBlackCartesian *FrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_f);
  // clang-format on
  std::vector<int> fSeeds({1, 2, 3, 4});
  GridParallelRNG  fPRNG(FGrid_d);
  fPRNG.SeedFixedIntegers(fSeeds);
  // clang-format off
  LatticeFermionD       src_d(FGrid_d); gaussian(fPRNG, src_d);
  LatticeFermionD resultMGD_d(FGrid_d); resultMGD_d = zero;
  LatticeFermionD resultMGF_d(FGrid_d); resultMGF_d = zero;
  LatticeGaugeFieldD    Umu_d(FGrid_d); SU3::HotConfiguration(fPRNG, Umu_d);
  LatticeGaugeFieldF    Umu_f(FGrid_f); precisionChange(Umu_f, Umu_d);
  // clang-format on
  RealD mass = -0.25;
  MultiGridParams mgParams;
  std::string     inputXml{"./mg_params.xml"};
  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
    assert(inputXml.length() != 0);
  }
  {
    XmlWriter writer("mg_params_template.xml");
    write(writer, "Params", mgParams);
    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
    XmlReader reader(inputXml);
    read(reader, "Params", mgParams);
    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
  }
  checkParameterValidity(mgParams);
  std::cout << mgParams << std::endl;
  LevelInfo levelInfo_d(FGrid_d, mgParams);
  LevelInfo levelInfo_f(FGrid_f, mgParams);
  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
  const int nbasis = 40;
  WilsonFermionD Dw_d(Umu_d, *FGrid_d, *FrbGrid_d, mass);
  WilsonFermionF Dw_f(Umu_f, *FGrid_f, *FrbGrid_f, mass);
  MdagMLinearOperator<WilsonFermionD, LatticeFermionD> MdagMOpDw_d(Dw_d);
  MdagMLinearOperator<WilsonFermionF, LatticeFermionF> MdagMOpDw_f(Dw_f);
  std::cout << GridLogMessage << "**************************************************" << std::endl;
  std::cout << GridLogMessage << "Testing single-precision Multigrid for Wilson" << std::endl;
  std::cout << GridLogMessage << "**************************************************" << std::endl;
  auto MGPreconDw_f = createMGInstance<vSpinColourVectorF, vTComplexF, nbasis, WilsonFermionF>(mgParams, levelInfo_f, Dw_f, Dw_f);
  MGPreconDw_f->setup();
  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
    MGPreconDw_f->runChecks(1e-6);
  }
  MixedPrecisionFlexibleGeneralisedMinimalResidual<LatticeFermionD, LatticeFermionF> MPFGMRESPREC(1.0e-12, 50000, FGrid_f, *MGPreconDw_f, 100, false);
  std::cout << std::endl << "Starting with a new solver" << std::endl;
  MPFGMRESPREC(MdagMOpDw_d, src_d, resultMGF_d);
  MGPreconDw_f->reportTimings();
  if(GridCmdOptionExists(argv, argv + argc, "--docomparison")) {
    std::cout << GridLogMessage << "**************************************************" << std::endl;
    std::cout << GridLogMessage << "Testing double-precision Multigrid for Wilson" << std::endl;
    std::cout << GridLogMessage << "**************************************************" << std::endl;
    auto MGPreconDw_d = createMGInstance<vSpinColourVectorD, vTComplexD, nbasis, WilsonFermionD>(mgParams, levelInfo_d, Dw_d, Dw_d);
    MGPreconDw_d->setup();
    if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
      MGPreconDw_d->runChecks(1e-13);
    }
    FlexibleGeneralisedMinimalResidual<LatticeFermionD> FGMRESPREC(1.0e-12, 50000, *MGPreconDw_d, 100, false);
    std::cout << std::endl << "Starting with a new solver" << std::endl;
    FGMRESPREC(MdagMOpDw_d, src_d, resultMGD_d);
    MGPreconDw_d->reportTimings();
    std::cout << GridLogMessage << "**************************************************" << std::endl;
    std::cout << GridLogMessage << "Comparing single-precision Multigrid with double-precision one for Wilson" << std::endl;
    std::cout << GridLogMessage << "**************************************************" << std::endl;
    LatticeFermionD diffFullSolver(FGrid_d);
    RealD deviationFullSolver = axpy_norm(diffFullSolver, -1.0, resultMGF_d, resultMGD_d);
    // clang-format off
    LatticeFermionF src_f(FGrid_f);    precisionChange(src_f, src_d);
    LatticeFermionF resMGF_f(FGrid_f); resMGF_f = zero;
    LatticeFermionD resMGD_d(FGrid_d); resMGD_d = zero;
    // clang-format on
    (*MGPreconDw_f)(src_f, resMGF_f);
    (*MGPreconDw_d)(src_d, resMGD_d);
    LatticeFermionD diffOnlyMG(FGrid_d);
    LatticeFermionD resMGF_d(FGrid_d);
    precisionChange(resMGF_d, resMGF_f);
    RealD deviationOnlyPrec = axpy_norm(diffOnlyMG, -1.0, resMGF_d, resMGD_d);
    // clang-format off
    std::cout << GridLogMessage << "Absolute difference between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver                      << std::endl;
    std::cout << GridLogMessage << "Relative deviation  between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver / norm2(resultMGD_d) << std::endl;
    std::cout << GridLogMessage << "Absolute difference between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec                        << std::endl;
    std::cout << GridLogMessage << "Relative deviation  between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec / norm2(resMGD_d)      << std::endl;
    // clang-format on
  }
  Grid_finalize();
 }
--- a/tests/solver/Test_wilson_mr_unprec.cc
+++ b/tests/solver/Test_wilson_mr_unprec.cc
@ -0,0 +1,65 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_wilson_mr_unprec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  LatticeFermion src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  LatticeFermion result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass=0.5;
  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
  MinimalResidual<LatticeFermion> MR(1.0e-8,10000,0.8);
  MR(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_wilsonclover_cagmres_unprec.cc
+++ b/tests/solver/Test_wilsonclover_cagmres_unprec.cc
@ -0,0 +1,71 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_wilsonclover_cagmres_unprec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  typedef typename WilsonCloverFermionR::FermionField FermionField;
  typename WilsonCloverFermionR::ImplParams params;
  WilsonAnisotropyCoefficients anis;
  FermionField src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  FermionField result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass  = 0.5;
  RealD csw_r = 1.0;
  RealD csw_t = 1.0;
  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
  CommunicationAvoidingGeneralisedMinimalResidual<FermionField> CAGMRES(1.0e-8, 10000, 25);
  CAGMRES(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_wilsonclover_fcagmres_prec.cc
+++ b/tests/solver/Test_wilsonclover_fcagmres_prec.cc
@ -0,0 +1,74 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_wilsonclover_fcagmres_prec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  typedef typename WilsonCloverFermionR::FermionField FermionField;
  typename WilsonCloverFermionR::ImplParams params;
  WilsonAnisotropyCoefficients anis;
  FermionField src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  FermionField result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass  = 0.5;
  RealD csw_r = 1.0;
  RealD csw_t = 1.0;
  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
  TrivialPrecon<FermionField> simple;
  FlexibleCommunicationAvoidingGeneralisedMinimalResidual<FermionField> FCAGMRES(1.0e-8, 10000, simple, 25);
  FCAGMRES(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_wilsonclover_fgmres_prec.cc
+++ b/tests/solver/Test_wilsonclover_fgmres_prec.cc
@ -0,0 +1,74 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_wilsonclover_fgmres_prec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  typedef typename WilsonCloverFermionR::FermionField FermionField;
  typename WilsonCloverFermionR::ImplParams params;
  WilsonAnisotropyCoefficients anis;
  FermionField src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  FermionField result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass  = 0.5;
  RealD csw_r = 1.0;
  RealD csw_t = 1.0;
  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
  TrivialPrecon<FermionField> simple;
  FlexibleGeneralisedMinimalResidual<FermionField> FGMRES(1.0e-8, 10000, simple, 25);
  FGMRES(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_wilsonclover_gmres_unprec.cc
+++ b/tests/solver/Test_wilsonclover_gmres_unprec.cc
@ -0,0 +1,71 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_wilsonclover_gmres_unprec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  typedef typename WilsonCloverFermionR::FermionField FermionField;
  typename WilsonCloverFermionR::ImplParams params;
  WilsonAnisotropyCoefficients anis;
  FermionField src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  FermionField result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass  = 0.5;
  RealD csw_r = 1.0;
  RealD csw_t = 1.0;
  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
  GeneralisedMinimalResidual<FermionField> GMRES(1.0e-8, 10000, 25);
  GMRES(HermOp,src,result);
  Grid_finalize();
 }
--- a/tests/solver/Test_wilsonclover_mg.cc
+++ b/tests/solver/Test_wilsonclover_mg.cc
@ -0,0 +1,117 @@
 /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./tests/solver/Test_wilsonclover_mg.cc
    Copyright (C) 2015-2018
    Author: Daniel Richtmann <daniel.richtmann@ur.de>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 #include <Test_multigrid_common.h>
 using namespace std;
 using namespace Grid;
 using namespace Grid::QCD;
 int main(int argc, char **argv) {
  Grid_init(&argc, &argv);
  GridCartesian *        FGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
  GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
  std::vector<int> fSeeds({1, 2, 3, 4});
  GridParallelRNG  fPRNG(FGrid);
  fPRNG.SeedFixedIntegers(fSeeds);
  // clang-format off
  LatticeFermion    src(FGrid); gaussian(fPRNG, src);
  LatticeFermion result(FGrid); result = zero;
  LatticeGaugeField Umu(FGrid); SU3::HotConfiguration(fPRNG, Umu);
  // clang-format on
  RealD mass  = -0.25;
  RealD csw_r = 1.0;
  RealD csw_t = 1.0;
  MultiGridParams mgParams;
  std::string     inputXml{"./mg_params.xml"};
  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
    assert(inputXml.length() != 0);
  }
  {
    XmlWriter writer("mg_params_template.xml");
    write(writer, "Params", mgParams);
    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
    XmlReader reader(inputXml);
    read(reader, "Params", mgParams);
    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
  }
  checkParameterValidity(mgParams);
  std::cout << mgParams << std::endl;
  LevelInfo levelInfo(FGrid, mgParams);
  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
  const int nbasis = 40;
  WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t);
  MdagMLinearOperator<WilsonCloverFermionR, LatticeFermion> MdagMOpDwc(Dwc);
  std::cout << GridLogMessage << "**************************************************" << std::endl;
  std::cout << GridLogMessage << "Testing Multigrid for Wilson Clover" << std::endl;
  std::cout << GridLogMessage << "**************************************************" << std::endl;
  TrivialPrecon<LatticeFermion> TrivialPrecon;
  auto MGPreconDwc = createMGInstance<vSpinColourVector, vTComplex, nbasis, WilsonCloverFermionR>(mgParams, levelInfo, Dwc, Dwc);
  MGPreconDwc->setup();
  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
    RealD toleranceForMGChecks = (getPrecision<LatticeFermion>::value == 1) ? 1e-6 : 1e-13;
    MGPreconDwc->runChecks(toleranceForMGChecks);
  }
  std::vector<std::unique_ptr<OperatorFunction<LatticeFermion>>> solversDwc;
  solversDwc.emplace_back(new ConjugateGradient<LatticeFermion>(1.0e-12, 50000, false));
  solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, TrivialPrecon, 100, false));
  solversDwc.emplace_back(new FlexibleGeneralisedMinimalResidual<LatticeFermion>(1.0e-12, 50000, *MGPreconDwc, 100, false));
  for(auto const &solver : solversDwc) {
    std::cout << std::endl << "Starting with a new solver" << std::endl;
    result = zero;
    (*solver)(MdagMOpDwc, src, result);
    std::cout << std::endl;
  }
  MGPreconDwc->reportTimings();
  Grid_finalize();
 }
--- a/tests/solver/Test_wilsonclover_mg_mp.cc
+++ b/tests/solver/Test_wilsonclover_mg_mp.cc
@ -0,0 +1,169 @@
 /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./tests/solver/Test_wilsonclover_mg_mp.cc
    Copyright (C) 2015-2018
    Author: Daniel Richtmann <daniel.richtmann@ur.de>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 #include <Test_multigrid_common.h>
 using namespace std;
 using namespace Grid;
 using namespace Grid::QCD;
 int main(int argc, char **argv) {
  Grid_init(&argc, &argv);
  // clang-format off
  GridCartesian         *FGrid_d   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi());
  GridCartesian         *FGrid_f   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
  GridRedBlackCartesian *FrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_d);
  GridRedBlackCartesian *FrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_f);
  // clang-format on
  std::vector<int> fSeeds({1, 2, 3, 4});
  GridParallelRNG  fPRNG(FGrid_d);
  fPRNG.SeedFixedIntegers(fSeeds);
  // clang-format off
  LatticeFermionD       src_d(FGrid_d); gaussian(fPRNG, src_d);
  LatticeFermionD resultMGD_d(FGrid_d); resultMGD_d = zero;
  LatticeFermionD resultMGF_d(FGrid_d); resultMGF_d = zero;
  LatticeGaugeFieldD    Umu_d(FGrid_d); SU3::HotConfiguration(fPRNG, Umu_d);
  LatticeGaugeFieldF    Umu_f(FGrid_f); precisionChange(Umu_f, Umu_d);
  // clang-format on
  RealD mass  = -0.25;
  RealD csw_r = 1.0;
  RealD csw_t = 1.0;
  MultiGridParams mgParams;
  std::string     inputXml{"./mg_params.xml"};
  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
    assert(inputXml.length() != 0);
  }
  {
    XmlWriter writer("mg_params_template.xml");
    write(writer, "Params", mgParams);
    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
    XmlReader reader(inputXml);
    read(reader, "Params", mgParams);
    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
  }
  checkParameterValidity(mgParams);
  std::cout << mgParams << std::endl;
  LevelInfo levelInfo_d(FGrid_d, mgParams);
  LevelInfo levelInfo_f(FGrid_f, mgParams);
  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
  const int nbasis = 40;
  WilsonCloverFermionD Dwc_d(Umu_d, *FGrid_d, *FrbGrid_d, mass, csw_r, csw_t);
  WilsonCloverFermionF Dwc_f(Umu_f, *FGrid_f, *FrbGrid_f, mass, csw_r, csw_t);
  MdagMLinearOperator<WilsonCloverFermionD, LatticeFermionD> MdagMOpDwc_d(Dwc_d);
  MdagMLinearOperator<WilsonCloverFermionF, LatticeFermionF> MdagMOpDwc_f(Dwc_f);
  std::cout << GridLogMessage << "**************************************************" << std::endl;
  std::cout << GridLogMessage << "Testing single-precision Multigrid for Wilson Clover" << std::endl;
  std::cout << GridLogMessage << "**************************************************" << std::endl;
  auto MGPreconDwc_f = createMGInstance<vSpinColourVectorF, vTComplexF, nbasis, WilsonCloverFermionF>(mgParams, levelInfo_f, Dwc_f, Dwc_f);
  MGPreconDwc_f->setup();
  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
    MGPreconDwc_f->runChecks(1e-6);
  }
  MixedPrecisionFlexibleGeneralisedMinimalResidual<LatticeFermionD, LatticeFermionF> MPFGMRESPREC(
    1.0e-12, 50000, FGrid_f, *MGPreconDwc_f, 100, false);
  std::cout << std::endl << "Starting with a new solver" << std::endl;
  MPFGMRESPREC(MdagMOpDwc_d, src_d, resultMGF_d);
  MGPreconDwc_f->reportTimings();
  if(GridCmdOptionExists(argv, argv + argc, "--docomparison")) {
    std::cout << GridLogMessage << "**************************************************" << std::endl;
    std::cout << GridLogMessage << "Testing double-precision Multigrid for Wilson Clover" << std::endl;
    std::cout << GridLogMessage << "**************************************************" << std::endl;
    auto MGPreconDwc_d = createMGInstance<vSpinColourVectorD, vTComplexD, nbasis, WilsonCloverFermionD>(mgParams, levelInfo_d, Dwc_d, Dwc_d);
    MGPreconDwc_d->setup();
    if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
      MGPreconDwc_d->runChecks(1e-13);
    }
    FlexibleGeneralisedMinimalResidual<LatticeFermionD> FGMRESPREC(1.0e-12, 50000, *MGPreconDwc_d, 100, false);
    std::cout << std::endl << "Starting with a new solver" << std::endl;
    FGMRESPREC(MdagMOpDwc_d, src_d, resultMGD_d);
    MGPreconDwc_d->reportTimings();
    std::cout << GridLogMessage << "**************************************************" << std::endl;
    std::cout << GridLogMessage << "Comparing single-precision Multigrid with double-precision one for Wilson Clover" << std::endl;
    std::cout << GridLogMessage << "**************************************************" << std::endl;
    LatticeFermionD diffFullSolver(FGrid_d);
    RealD deviationFullSolver = axpy_norm(diffFullSolver, -1.0, resultMGF_d, resultMGD_d);
    // clang-format off
    LatticeFermionF src_f(FGrid_f);    precisionChange(src_f, src_d);
    LatticeFermionF resMGF_f(FGrid_f); resMGF_f = zero;
    LatticeFermionD resMGD_d(FGrid_d); resMGD_d = zero;
    // clang-format on
    (*MGPreconDwc_f)(src_f, resMGF_f);
    (*MGPreconDwc_d)(src_d, resMGD_d);
    LatticeFermionD diffOnlyMG(FGrid_d);
    LatticeFermionD resMGF_d(FGrid_d);
    precisionChange(resMGF_d, resMGF_f);
    RealD deviationOnlyPrec = axpy_norm(diffOnlyMG, -1.0, resMGF_d, resMGD_d);
    // clang-format off
    std::cout << GridLogMessage << "Absolute difference between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver                      << std::endl;
    std::cout << GridLogMessage << "Relative deviation  between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver / norm2(resultMGD_d) << std::endl;
    std::cout << GridLogMessage << "Absolute difference between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec                        << std::endl;
    std::cout << GridLogMessage << "Relative deviation  between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec / norm2(resMGD_d)      << std::endl;
    // clang-format on
  }
  Grid_finalize();
 }
--- a/tests/solver/Test_wilsonclover_mr_unprec.cc
+++ b/tests/solver/Test_wilsonclover_mr_unprec.cc
@ -0,0 +1,71 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./tests/solver/Test_wilsonclover_mr_unprec.cc
 Copyright (C) 2015-2018
 Author: Daniel Richtmann <daniel.richtmann@ur.de>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 using namespace Grid;
 using namespace Grid::QCD;
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  std::vector<int> mpi_layout  = GridDefaultMpi();
  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
  GridRedBlackCartesian     RBGrid(&Grid);
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
  typedef typename WilsonCloverFermionR::FermionField FermionField;
  typename WilsonCloverFermionR::ImplParams params;
  WilsonAnisotropyCoefficients anis;
  FermionField src(&Grid); random(pRNG,src);
  RealD nrm = norm2(src);
  FermionField result(&Grid); result=zero;
  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
  double volume=1;
  for(int mu=0;mu<Nd;mu++){
    volume=volume*latt_size[mu];
  }
  RealD mass  = 0.5;
  RealD csw_r = 1.0;
  RealD csw_t = 1.0;
  WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params);
  MdagMLinearOperator<WilsonCloverFermionR,FermionField> HermOp(Dwc);
  MinimalResidual<FermionField> MR(1.0e-8,10000,0.8);
  MR(HermOp,src,result);
  Grid_finalize();
 }
Author	SHA1	Message	Date
Antonin Portelli	de8b2dcca3	Hadrons: faster A2A matrix load	2019-01-11 16:12:49 +00:00
Antonin Portelli	efe000341d	Hadrons: contractor fixes	2019-01-11 16:12:16 +00:00
Antonin Portelli	11086c5c25	Hadrons: first stab at MPI contractor	2019-01-10 16:29:57 +00:00
Peter Boyle	91a7fe247b	Merge branch 'DanielRichtmann-feature/wilsonmg' into develop	2019-01-02 14:40:31 +00:00
Peter Boyle	8a1be021d3	Merge branch 'feature/wilsonmg' of https://github.com/DanielRichtmann/Grid into DanielRichtmann-feature/wilsonmg	2019-01-02 14:39:59 +00:00
Antonin Portelli	fd66325321	pure QED test and copyright update	2018-12-14 17:39:11 +00:00
Antonin Portelli	c637c0c48c	James H.'s code for general size Wilson loops	2018-12-14 17:37:09 +00:00
Antonin Portelli	c4b472176c	Photon code fix	2018-12-14 17:36:38 +00:00
Antonin Portelli	856476a890	big cleanup of the Photon class + QED Coulomb gauge	2018-12-13 21:52:38 +00:00
Antonin Portelli	c509bd3fe2	Merge branch 'feature/resilient-io' into develop	2018-12-01 12:57:43 +00:00
Daniel Richtmann	2881b3e8e5	WilsonMG: Remove unnecessary static assertions	2018-06-26 14:42:30 +02:00
Daniel Richtmann	cc5d025ea4	WilsonMG: Adapt staggered GMRES/MR tests to "new" constructor	2018-06-18 16:20:20 +02:00
Daniel Richtmann	ddcb53bce2	Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg	2018-06-13 09:50:37 +02:00
Daniel Richtmann	d1c80e1d46	WilsonMG: Correct years in copyright line	2018-06-13 09:44:09 +02:00
Daniel Richtmann	c73cc7d354	WilsonMG: Add tests with MG preconditioner running single precision, outer solver running in double	2018-06-12 16:10:48 +02:00
Daniel Richtmann	49fdc324a0	WilsonMG: Make MG correctness checks abort on failing tests	2018-06-12 16:10:48 +02:00
Daniel Richtmann	f32714a2d1	WilsonMG: Make running MG correctness checks optional via commandline	2018-06-12 16:10:48 +02:00
Daniel Richtmann	73a955be20	WilsonMG: Move tests for Wilson & WilsonClover into separate files	2018-06-12 16:10:48 +02:00
Daniel Richtmann	66b7a0f871	WilsonMG: Move multigrid class to separate file	2018-06-12 16:10:48 +02:00
Daniel Richtmann	2ab9d4bc56	WilsonMG: Fix random behavior in GMRES From time to time I saw random since the basis vectors were not initialized properly.	2018-06-12 15:01:31 +02:00
Daniel Richtmann	4f41cd114d	WilsonMG: Add a mixed precision version of FGMRES This version does everything in double prec but accepts a preconditioner working in single precision.	2018-06-12 15:01:31 +02:00
Daniel Richtmann	11c4f5e32c	WilsonMG: Provide command line switch for reading in input xml + move default params to constructor of MultiGridParams	2018-06-12 15:01:31 +02:00
Daniel Richtmann	e9b9550298	WilsonMG: Fix incompatibility with single prec MG in construction of simd layout on coarser grids	2018-06-12 15:01:31 +02:00
Daniel Richtmann	7564fedf68	WilsonMG: Set subspace to zero to avoid random behavior	2018-06-12 15:01:31 +02:00
Daniel Richtmann	6c27c72585	WilsonMG: Provide more sensible default values for MG parameters	2018-05-16 17:26:09 +02:00
Daniel Richtmann	9c003d2d72	WilsonMG: Base wilson mg preconditioner entirely on existing infrastructure	2018-05-16 17:26:09 +02:00
Daniel Richtmann	4b8710970c	WilsonMG: Switch to Galerkin coarsening in CoarsenedMatrix	2018-05-16 17:26:09 +02:00
Daniel Richtmann	68d686ec38	WilsonMG: Add functionality for applying G5 on coarse grids	2018-05-16 16:17:14 +02:00
Daniel Richtmann	c48b69ca81	WilsonMG: Implement Mdir & Mdiag in CoarsenedMatrix	2018-05-16 16:08:05 +02:00
Daniel Richtmann	df8c208f5c	WilsonMG: Revert CoarsenedMatrix.h and Lattice_transfer.h back to state of develop branch	2018-05-16 16:02:54 +02:00
Daniel Richtmann	61812ab7f1	Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg	2018-05-15 14:57:18 +02:00
Daniel Richtmann	73ced656eb	Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg	2018-04-03 17:51:11 +02:00
Daniel Richtmann	f69008edf1	WilsonMG: Add functionality to report timings to MG preconditioner	2018-04-03 17:26:49 +02:00
Daniel Richtmann	57a49ed22f	WilsonMG: Read in MG parameters from xml in test	2018-04-03 16:03:11 +02:00
Daniel Richtmann	ff6413a764	WilsonMG: Make number of levels chooseable at runtime I don't like this solution though :(	2018-04-03 15:57:33 +02:00
Daniel Richtmann	2530bfed01	WilsonMG: Move params instance from global scope to test main function	2018-04-03 14:50:48 +02:00
Daniel Richtmann	74f79c5ac7	Revert "Add function to return full type as std::string" This reverts commit `1cb745c8dc`.	2018-03-29 12:03:50 +02:00
Daniel Richtmann	58c30c0cb1	WilsonMG: Add conformability checks in MG preconditioner	2018-03-28 13:24:39 +02:00
Daniel Richtmann	917a92118a	WilsonMG: Move operator test to MG testing routine	2018-03-28 12:19:25 +02:00
Daniel Richtmann	04f9cf088d	WilsonMG: Add more parameters to MultiGridParams struct	2018-03-27 17:13:11 +02:00
Daniel Richtmann	99107038f9	WilsonMG: Rationalize the level counting strategy	2018-03-27 17:06:33 +02:00
Daniel Richtmann	b78456bdf4	WilsonMG: Get rid of explicit include of GCR header	2018-03-26 15:41:53 +02:00
Daniel Richtmann	08543b6b11	WilsonMG: Provide a switch between V- and K-cycle	2018-03-26 15:37:17 +02:00
Daniel Richtmann	63ba33371f	WilsonMG: Some minor refactoring	2018-03-26 15:34:53 +02:00
Daniel Richtmann	683a7d2ddd	WilsonMG: Move comment to make clang-format happy	2018-03-26 14:59:40 +02:00
Daniel Richtmann	afdcbf79d1	Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg	2018-03-23 21:13:50 +01:00
Daniel Richtmann	3c3ec4e267	WilsonMG: Move tests for Wilson & WilsonClover into the same file	2018-03-23 21:12:27 +01:00
Daniel Richtmann	bbe1d5b49e	WilsonMG: Temporarily use GMRES in construction of basis vectors This can go back to CG once Mdag in CoarsenedMatrix works.	2018-03-23 20:02:27 +01:00
Daniel Richtmann	0f6009a29f	WilsonMG: Huge refactor into something that could be considered an algorithm	2018-03-23 19:55:43 +01:00
Daniel Richtmann	1cfed3de7c	WilsonMG: Add new logger for MG	2018-03-23 19:55:16 +01:00
Daniel Richtmann	edbc0d49d7	WilsonMG: Get rid of explicit GridTypeMappers in CoarsenedMatrix	2018-03-22 16:38:24 +01:00
Daniel Richtmann	ee5cf6c8c5	WilsonMG: Some minor changes to GMRES implementations	2018-03-16 13:10:45 +01:00
Daniel Richtmann	a66cecc509	WilsonMG: Fix invalid call to MR ctor	2018-03-09 17:34:29 +01:00
Daniel Richtmann	0f6cdf3d4b	WilsonMG: Implement missing parts of CoarsenedMatrix	2018-03-09 16:56:16 +01:00
Daniel Richtmann	1e63b73a14	WilsonMG: Some cleanup/formatting	2018-03-09 16:50:19 +01:00
Daniel Richtmann	6ab60c5b70	Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg	2018-02-08 23:59:07 +01:00
Daniel Richtmann	8c692b7ffd	WilsonMG: Comment assertion on hermiticity of coarse operator for now TODO: Think of a way to not break dwf_hdcr by doing that. It's only an assertion but it still interferes with it.	2018-02-08 23:55:05 +01:00
Daniel Richtmann	2976132bdd	Add first version of multigrid for wilson clover analogous to wilson one Just like the wilson one, this algorithm • is currently only a 2-level method since I don't have correct implementations for Mdir and Mdiag in CoarsenedMatrix yet (needed for further coarsening) • needs levelization and refactoring into a proper algorithm	2018-02-08 23:52:10 +01:00
Daniel Richtmann	48177f2f2d	Add tests for all MR\|GMRES solvers with wilson clover action	2018-02-08 23:52:09 +01:00
Daniel Richtmann	c4ce70a821	WilsonMG: Major cleanup	2018-02-08 23:52:08 +01:00
Daniel Richtmann	a3e009ba54	Add tests for CAGMRES solvers with staggered action	2018-02-08 17:46:28 +01:00
Daniel Richtmann	eb7cf239d9	Print warning messages in CAGMRES solvers Currently, the implementation of these algorithms doesn't differ from their non communication-avoiding versions.	2018-02-08 17:43:47 +01:00
Daniel Richtmann	13ae371ef8	Make solver parameters match in all MR\|GMRES solver tests	2018-02-08 17:33:10 +01:00
Daniel Richtmann	9f79a87102	Fix bugs in Flexible GMRES solvers Somehow I got the left and right-preconditioned versions of GMRES mixed up. As of now this is right-preconditioned version, which is what we want.	2018-02-08 16:00:31 +01:00
Daniel Richtmann	4ded1ceeb0	Make GMRES solvers perform no more than MaxIterations steps I noticed that it was possible to overrun this number.	2018-02-08 15:29:44 +01:00
Daniel Richtmann	8bc12e0ce1	Remove superfluous comments in MR solver	2018-02-07 18:09:09 +01:00
Daniel Richtmann	cc2f00f827	Remove test for MR solver with dwf action as it doesn't converge	2018-02-07 18:09:08 +01:00
Daniel Richtmann	cd61e2e6d6	Increase max iterations in test of MR solver with staggered action	2018-02-07 18:09:07 +01:00
Daniel Richtmann	323ed1a588	Add an overrelaxation parameter to the MR solver	2018-02-07 18:09:06 +01:00
Daniel Richtmann	68c66d2e4b	Remove empty line in output of Residual solvers	2018-02-07 18:08:56 +01:00
Daniel Richtmann	1671adfd49	WilsonMG: Add some tests for linear operators	2018-02-07 17:15:22 +01:00
Daniel Richtmann	871649238c	WilsonMG: Stricter naming for linear operators	2018-02-01 14:43:08 +01:00
Daniel Richtmann	7c86d2085b	WilsonMG: Some minor cleanup	2018-02-01 12:24:16 +01:00
Daniel Richtmann	9292be0b69	WilsonMG: Add check for Mdiag + Σ Mdir == M Need to test my implementations of CoarsenedMatrix::Mdiag & CoarsenedMatrix::Mdir.	2018-01-31 14:03:30 +01:00
Daniel Richtmann	10141f90c9	WilsonMG: Rename test file	2018-01-30 10:25:09 +01:00
Daniel Richtmann	a414430817	Merge remote-tracking branch 'upstream/develop' into feature/ddalphaamg	2018-01-29 18:32:31 +01:00
Daniel Richtmann	f20728baa9	WilsonMG: Some further steps towards a three level method Currently this is very "manual" as we are still testing stuff. Will refactor and make it an algorithm once everything works. What currently does work: - All tests in MultiGridPreconditioner::runChecks for the first coarse grid - The tests for the intergrid operators going from the first to the second coarse grid - (1 - P R) v == 0 - (1 - R P) v_c == 0 - A full solve with VPGCR and a two-level MG preconditioner What hinders the rest of the tests from passing with a three-level method is the absence of implementations of CoarsenedMatrix::Mdir and CoarsenedMatrix::Mdiag.	2018-01-29 18:29:49 +01:00
Daniel Richtmann	d2e68c4355	WilsonMG: Perform some minor cleanup	2018-01-29 18:07:10 +01:00
Daniel Richtmann	1cb745c8dc	Add function to return full type as std::string Also works for nested templates. I find it useful for debugging. Possible usage: std::cout << "getTypename<AType>() = " << getTypename<Atype>() << std::endl; std::cout << "getTypename<decltype(AnInstance)>() = " << getTypename<decltype(AnInstance)>() << std::endl;	2018-01-29 17:39:19 +01:00
Daniel Richtmann	faf4278019	Use 2 passes of GS in coarse operator construction	2018-01-29 17:21:42 +01:00
Daniel Richtmann	194e4b94bb	Make MG checking function work level-wise	2018-01-29 17:18:20 +01:00
Daniel Richtmann	bfc1411c1f	Use more iterations in subspace creation	2018-01-29 17:11:29 +01:00
Daniel Richtmann	161637e573	Turn on orthogonality checking temporarily	2018-01-29 17:10:05 +01:00
Daniel Richtmann	04f92ccddf	WilsonMG: Provide a fix for the previous commit; compiles and runs successfully now I don't like the solution with the temporary very much though ...	2018-01-22 14:56:48 +01:00
Daniel Richtmann	3b2d805398	WilsonMG: Some first steps towards coarse spin dofs; not compiling yet A failing conversion from the innermost type (Grid::Simd<...>) to a coarse scalar (triple iScalar) in blockPromote prohibits this commit from working.	2018-01-22 12:45:51 +01:00
Daniel Richtmann	9dc885d297	Fix a bug in Wilson MG The calculation of the lattice size of a second coarse level was incorrect.	2018-01-18 17:02:04 +01:00
Daniel Richtmann	a70c1feecc	Remove some unnecessary stuff in Wilson MG	2018-01-18 15:48:28 +01:00
Daniel Richtmann	38328100c9	Implement correctness checks for Wilson MG	2018-01-18 15:43:15 +01:00
Daniel Richtmann	9732519c41	Apply clang-format to Wilson MG I can provide the configuration file I used if people want that.	2018-01-18 15:14:37 +01:00
Daniel Richtmann	fa4eeb28c4	Save current state in Wilson MG test file	2018-01-17 17:56:34 +01:00
Daniel Richtmann	10f7a17ae4	Make timing in VPGCR more detailed	2018-01-11 13:42:18 +01:00
Daniel Richtmann	26f14d7dd7	Adapt output format of non-herm solvers to the one of VPGCR	2018-01-11 13:36:30 +01:00
Daniel Richtmann	73434db636	Merge remote-tracking branch 'upstream/develop' into feature/ddalphaamg	2018-01-09 10:43:33 +01:00
Daniel Richtmann	c6411f8514	Merge remote-tracking branch 'upstream/develop' into feature/ddalphaamg	2018-01-08 10:37:10 +01:00
Daniel Richtmann	6cf635d61c	Remove some old code in Wilson MG	2017-12-22 13:20:09 +01:00
Daniel Richtmann	39558cce52	Multiply TVs in Wilson MG with G5 instead of G5R5	2017-12-22 13:07:56 +01:00
Daniel Richtmann	df152648d6	Fix error in MR code when compiling for single precision	2017-12-06 18:00:58 +01:00
Daniel Richtmann	4e965c168e	Implement analogon to test vector analysis in WMG codebase	2017-11-29 15:05:27 +01:00
Daniel Richtmann	f260af546e	Save current state	2017-11-28 15:03:02 +01:00
Daniel Richtmann	649b8c9aca	Save current state	2017-11-24 10:46:20 +01:00
Daniel Richtmann	0afa22747d	Merge remote-tracking branch 'upstream/develop' into feature/new-solver-algorithms	2017-11-24 10:11:42 +01:00
Daniel Richtmann	fa43206c79	Remove some empty lines	2017-11-10 13:48:38 +01:00
Daniel Richtmann	a367835bf2	Set everything up for the implementation of FCAGMRES The current implementation is the exact same code as normal FGMRES. This commit only sets up the "framework" for the implementation of FCAGMRES, i.e., a test and an include in the algorithms header file.	2017-11-09 17:30:41 +01:00
Daniel Richtmann	d7743591ea	Fix some minor formatting errors	2017-11-09 17:28:19 +01:00
Daniel Richtmann	c6cbe533ea	Set everything up for the implementation of CAGMRES The current implementation is the exact same code as normal GMRES. This commit only sets up the "framework" for the implementation of CAGMRES, i.e., a test and an include in the algorithms header file.	2017-11-09 17:14:44 +01:00
Daniel Richtmann	8402ab6cf9	Some minor formatting improvements	2017-11-09 12:52:04 +01:00
Daniel Richtmann	c63095345e	Remove some superfluous comments	2017-11-09 12:47:20 +01:00
Daniel Richtmann	a7ae46b61e	Remove some comments	2017-11-08 16:58:20 +01:00
Daniel Richtmann	cd63052205	Remove everything preconditioner-related in GMRES code	2017-11-08 16:57:40 +01:00
Daniel Richtmann	699d537cd6	Add FGMRES test with staggered fermions	2017-11-08 16:56:42 +01:00
Daniel Richtmann	9031f0ed95	Fix a filename in a file header	2017-11-08 16:42:26 +01:00
Daniel Richtmann	26b3d441bb	Check in forgotten FGMRES test with wilson Fermions	2017-11-08 16:39:11 +01:00
Daniel Richtmann	99bc4cde56	Fix an implementation error in FGMRES	2017-11-08 16:38:34 +01:00
Daniel Richtmann	e843d83d9d	Make z in FGMRES a single Field	2017-11-08 16:38:16 +01:00
Daniel Richtmann	0f75ea52b7	First version of FGMRES; not working yet	2017-11-08 16:17:18 +01:00
Daniel Richtmann	8107b785cc	Rename misunderstood "rsd_sq" to "rsq" in GMRES code	2017-11-08 14:40:03 +01:00
Daniel Richtmann	37b777d801	Add test for GMRES solver with staggered fermions	2017-11-08 14:28:48 +01:00
Daniel Richtmann	7382787856	Some minor changes	2017-11-08 14:23:55 +01:00
Daniel Richtmann	781c611ca0	Perform minor code style fix	2017-11-08 14:22:38 +01:00
Daniel Richtmann	b069090b52	Remove a superfluous comment	2017-11-08 13:58:02 +01:00
Daniel Richtmann	0c1c1d9900	Set precision and formatting only once in MR code	2017-11-08 13:57:06 +01:00
Daniel Richtmann	7f4ed6c2e5	First working version of GMRES + a test for Wilson fermions	2017-11-08 13:56:41 +01:00
Daniel Richtmann	56d32a4afb	Rename misunderstood "rsd_sq" to "rsq" in MR code	2017-11-08 13:51:08 +01:00
Daniel Richtmann	b8ee496ed6	Print some info at start of GMRES	2017-11-08 13:23:41 +01:00
Daniel Richtmann	b87416dac4	Fix error with conformable	2017-11-07 15:00:08 +01:00
Daniel Richtmann	176bf37372	Remove some commented stuff	2017-11-07 14:57:36 +01:00
Daniel Richtmann	b3d342ca22	Remove old implementation of GMRES operator	2017-11-07 10:24:49 +01:00
Daniel Richtmann	e1f928398d	Save current state	2017-11-07 10:22:41 +01:00
Daniel Richtmann	8c579d2d4a	Save current state	2017-11-06 18:09:48 +01:00
Daniel Richtmann	fc7d07ade0	Correct function signature of body of GMRES outer loop	2017-11-06 17:12:38 +01:00
Daniel Richtmann	b3be9195b4	Save one lattice fermion in GMRES code	2017-11-06 17:12:23 +01:00
Daniel Richtmann	9e3c187a4d	Save current state	2017-11-06 17:05:25 +01:00
Daniel Richtmann	8363edfcdb	Perform some minor changes to GMRES code	2017-11-06 16:17:44 +01:00
Daniel Richtmann	74af31564f	Adapt style of wilson GMRES test to style of wilson MR test	2017-11-06 14:06:45 +01:00
Daniel Richtmann	e0819d395f	Merge remote-tracking branch 'upstream/develop' into feature/new-solver-algorithms	2017-11-06 13:09:36 +01:00
Daniel Richtmann	6f81906b00	Add test for the MR solver with staggered fermions; does not converge atm TODO: Is this a property of staggered or did I do something wrong?	2017-10-30 16:57:55 +01:00
Daniel Richtmann	a2d83d4f3d	Add test for the MR solver with DW fermions; does not converge atm TODO: Is this a property of DWF or did I do something wrong?	2017-10-30 16:39:30 +01:00
Daniel Richtmann	89bacb0470	Fix path in MR solver header commentary	2017-10-30 16:16:55 +01:00
Daniel Richtmann	19010ff66a	Merge remote-tracking branch 'upstream/develop' into feature/new-solver-algorithms	2017-10-30 13:16:46 +01:00
Daniel Richtmann	5a477ed29e	Perform minor style correction	2017-10-27 14:46:18 +02:00
Daniel Richtmann	54128d579a	Make MR a bit more verbose	2017-10-27 14:45:29 +02:00
Daniel Richtmann	e7b1933e88	Add a test for the MR solver	2017-10-27 14:38:57 +02:00
Daniel Richtmann	1bad64ac6a	Some formatting	2017-10-27 14:35:04 +02:00
Daniel Richtmann	15dfa9f663	Change stopping criterion implementation in MR solver + some cleanup	2017-10-27 14:33:25 +02:00
Daniel Richtmann	2185b0d651	Correct author in the file	2017-10-27 14:32:38 +02:00
Daniel Richtmann	f61c0b5d03	Very early version of MR solver	2017-10-27 14:09:02 +02:00
Daniel Richtmann	074db32e54	Fix build of gmres test	2017-10-27 14:08:48 +02:00
Daniel Richtmann	d5f661ba70	Save intermediate state	2017-10-25 10:38:26 +02:00
Daniel Richtmann	1ab8d5cc13	Save two more files	2017-10-24 16:58:05 +02:00
Daniel Richtmann	789e892865	Save current state	2017-10-24 16:58:04 +02:00
Daniel Richtmann	53cfa44d7a	Save current state	2017-10-24 16:58:03 +02:00