Minor changes

Fixing a compilation error
Added laplacian operator for smearing sources
2025-07-26 09:17:08 +01:00 · 2017-10-09 09:44:03 +01:00 · 2017-10-04 14:29:01 +01:00 · 2017-10-04 13:54:54 +01:00
734 changed files with 17439 additions and 68608 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -83,7 +83,6 @@ ltmain.sh
 .Trashes
 ehthumbs.db
 Thumbs.db
-.dirstamp

 # build directory #
 ###################
@@ -94,12 +93,14 @@ build*/*
 *.xcodeproj/*
 build.sh
 .vscode
-*.code-workspace

 # Eigen source #
 ################
-Grid/Eigen
-Eigen/*
+lib/Eigen/*
+
+# FFTW source #
+################
+lib/fftw/*

 # libtool macros #
 ##################
@@ -110,8 +111,15 @@ m4/libtool.m4
 ################
 gh-pages/

+# Buck files #
+##############
+.buck*
+buck-out
+BUCK
+make-bin-BUCK.sh
+
 # generated sources #
 #####################
-Grid/qcd/spin/gamma-gen/*.h
-Grid/qcd/spin/gamma-gen/*.cc
-Grid/util/Version.h
+lib/qcd/spin/gamma-gen/*.h
+lib/qcd/spin/gamma-gen/*.cc
+
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,11 +9,6 @@ matrix:
    - os:        osx
      osx_image: xcode8.3
      compiler: clang
-      env: PREC=single
-    - os:        osx
-      osx_image: xcode8.3
-      compiler: clang
-      env: PREC=double
      
 before_install:
    - export GRIDDIR=`pwd`
@@ -21,11 +16,9 @@ before_install:
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
-    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
+    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
    
 install:
-    - export CWD=`pwd`
-    - echo $CWD
    - export CC=$CC$VERSION
    - export CXX=$CXX$VERSION
    - echo $PATH
@@ -38,24 +31,16 @@ install:
    - which $CXX
    - $CXX --version
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
-    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
    
 script:
    - ./bootstrap.sh
    - mkdir build
    - cd build
-    - mkdir lime
-    - cd lime
-    - mkdir build
-    - cd build
-    - wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
-    - tar xf lime-1.3.2.tar.gz
-    - cd lime-1.3.2
-    - ./configure --prefix=$CWD/build/lime/install
-    - make -j4
-    - make install
-    - cd $CWD/build
-    - ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
+    - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
    - make -j4 
    - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
+    - echo make clean
+    - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
+    - make -j4
+    - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
    - make check
--- a/5
+++ b/5
@@ -1,5 +0,0 @@
-Version : 0.8.0
-
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
- MPI and MPI3 comms optimisations for KNL and OPA finished
- Half precision comms
--- a/Grid/Makefile.am
+++ b/Grid/Makefile.am
@@ -1,63 +0,0 @@
-extra_sources=
-extra_headers=
-
-if BUILD_COMMS_MPI3
-  extra_sources+=communicator/Communicator_mpi3.cc
-  extra_sources+=communicator/Communicator_base.cc
-  extra_sources+=communicator/SharedMemoryMPI.cc
-  extra_sources+=communicator/SharedMemory.cc
-endif
-
-if BUILD_COMMS_NONE
-  extra_sources+=communicator/Communicator_none.cc
-  extra_sources+=communicator/Communicator_base.cc
-  extra_sources+=communicator/SharedMemoryNone.cc
-  extra_sources+=communicator/SharedMemory.cc
-endif
-
-if BUILD_HDF5
-  extra_sources+=serialisation/Hdf5IO.cc 
-  extra_headers+=serialisation/Hdf5IO.h
-  extra_headers+=serialisation/Hdf5Type.h
-endif
-
-all: version-cache
-
-version-cache:
-	@if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\
-		a="uncommited changes";\
-	else\
-		a="clean";\
-	fi;\
-	echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\
-	if [ -e version-cache ]; then\
-		d=`diff vertmp version-cache`;\
-		if [ "$${d}" != "" ]; then\
-			mv vertmp version-cache;\
-			rm -f Version.h;\
-		fi;\
-	else\
-		mv vertmp version-cache;\
-		rm -f Version.h;\
-	fi;\
-	rm -f vertmp
-
-Version.h:
-	cp version-cache Version.h
-
-.PHONY: version-cache
-
-#
-# Libraries
-#
-include Make.inc
-include Eigen.inc
-
-lib_LIBRARIES = libGrid.a
-
-CCFILES += $(extra_sources)
-HFILES  += $(extra_headers) Config.h Version.h
-
-libGrid_a_SOURCES              = $(CCFILES)
-libGrid_adir                   = $(includedir)/Grid
-nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files)
--- a/Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
@@ -1,244 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
-
-Copyright (C) 2015
-
-Author: Daniel Richtmann <daniel.richtmann@ur.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
-#define GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
-
-namespace Grid {
-
-template<class Field>
-class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
- public:
-  bool ErrorOnNoConverge; // Throw an assert when CAGMRES fails to converge,
-                          // defaults to true
-
-  RealD   Tolerance;
-
-  Integer MaxIterations;
-  Integer RestartLength;
-  Integer MaxNumberOfRestarts;
-  Integer IterationCount; // Number of iterations the CAGMRES took to finish,
-                          // filled in upon completion
-
-  GridStopWatch MatrixTimer;
-  GridStopWatch LinalgTimer;
-  GridStopWatch QrTimer;
-  GridStopWatch CompSolutionTimer;
-
-  Eigen::MatrixXcd H;
-
-  std::vector<std::complex<double>> y;
-  std::vector<std::complex<double>> gamma;
-  std::vector<std::complex<double>> c;
-  std::vector<std::complex<double>> s;
-
-  CommunicationAvoidingGeneralisedMinimalResidual(RealD   tol,
-                                                  Integer maxit,
-                                                  Integer restart_length,
-                                                  bool    err_on_no_conv = true)
-      : Tolerance(tol)
-      , MaxIterations(maxit)
-      , RestartLength(restart_length)
-      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
-      , ErrorOnNoConverge(err_on_no_conv)
-      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
-      , y(RestartLength + 1, 0.)
-      , gamma(RestartLength + 1, 0.)
-      , c(RestartLength + 1, 0.)
-      , s(RestartLength + 1, 0.) {};
-
-  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
-
-    std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular GMRES" << std::endl;
-
-    psi.checkerboard = src.checkerboard;
-    conformable(psi, src);
-
-    RealD guess = norm2(psi);
-    assert(std::isnan(guess) == 0);
-
-    RealD cp;
-    RealD ssq = norm2(src);
-    RealD rsq = Tolerance * Tolerance * ssq;
-
-    Field r(src._grid);
-
-    std::cout << std::setprecision(4) << std::scientific;
-    std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
-    std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl;
-
-    MatrixTimer.Reset();
-    LinalgTimer.Reset();
-    QrTimer.Reset();
-    CompSolutionTimer.Reset();
-
-    GridStopWatch SolverTimer;
-    SolverTimer.Start();
-
-    IterationCount = 0;
-
-    for (int k=0; k<MaxNumberOfRestarts; k++) {
-
-      cp = outerLoopBody(LinOp, src, psi, rsq);
-
-      // Stopping condition
-      if (cp <= rsq) {
-
-        SolverTimer.Stop();
-
-        LinOp.Op(psi,r);
-        axpy(r,-1.0,src,r);
-
-        RealD srcnorm       = sqrt(ssq);
-        RealD resnorm       = sqrt(norm2(r));
-        RealD true_residual = resnorm / srcnorm;
-
-        std::cout << GridLogMessage        << "CommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
-                  << " computed residual " << sqrt(cp / ssq)
-                  << " true residual "     << true_residual
-                  << " target "            << Tolerance << std::endl;
-
-        std::cout << GridLogMessage << "CAGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "CAGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "CAGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "CAGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "CAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
-        return;
-      }
-    }
-
-    std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
-
-    if (ErrorOnNoConverge)
-      assert(0);
-  }
-
-  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
-
-    RealD cp = 0;
-
-    Field w(src._grid);
-    Field r(src._grid);
-
-    // this should probably be made a class member so that it is only allocated once, not in every restart
-    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
-
-    MatrixTimer.Start();
-    LinOp.Op(psi, w);
-    MatrixTimer.Stop();
-
-    LinalgTimer.Start();
-    r = src - w;
-
-    gamma[0] = sqrt(norm2(r));
-
-    v[0] = (1. / gamma[0]) * r;
-    LinalgTimer.Stop();
-
-    for (int i=0; i<RestartLength; i++) {
-
-      IterationCount++;
-
-      arnoldiStep(LinOp, v, w, i);
-
-      qrUpdate(i);
-
-      cp = std::norm(gamma[i+1]);
-
-      std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
-                << " residual " << cp << " target " << rsq << std::endl;
-
-      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
-
-        computeSolution(v, psi, i);
-
-        return cp;
-      }
-    }
-
-    assert(0); // Never reached
-    return cp;
-  }
-
-  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
-
-    MatrixTimer.Start();
-    LinOp.Op(v[iter], w);
-    MatrixTimer.Stop();
-
-    LinalgTimer.Start();
-    for (int i = 0; i <= iter; ++i) {
-      H(iter, i) = innerProduct(v[i], w);
-      w = w - H(iter, i) * v[i];
-    }
-
-    H(iter, iter + 1) = sqrt(norm2(w));
-    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
-    LinalgTimer.Stop();
-  }
-
-  void qrUpdate(int iter) {
-
-    QrTimer.Start();
-    for (int i = 0; i < iter ; ++i) {
-      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
-      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
-      H(iter, i + 1) = tmp;
-    }
-
-    // Compute new Givens Rotation
-    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
-    c[iter]     = H(iter, iter) / nu;
-    s[iter]     = H(iter, iter + 1) / nu;
-
-    // Apply new Givens rotation
-    H(iter, iter)     = nu;
-    H(iter, iter + 1) = 0.;
-
-    gamma[iter + 1] = -s[iter] * gamma[iter];
-    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
-    QrTimer.Stop();
-  }
-
-  void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
-
-    CompSolutionTimer.Start();
-    for (int i = iter; i >= 0; i--) {
-      y[i] = gamma[i];
-      for (int k = i + 1; k <= iter; k++)
-        y[i] = y[i] - H(k, i) * y[k];
-      y[i] = y[i] / H(i, i);
-    }
-
-    for (int i = 0; i <= iter; i++)
-      psi = psi + v[i] * y[i];
-    CompSolutionTimer.Stop();
-  }
-};
-}
-#endif
--- a/Grid/algorithms/iterative/Deflation.h
+++ b/Grid/algorithms/iterative/Deflation.h
@@ -1,108 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#ifndef GRID_DEFLATION_H
-#define GRID_DEFLATION_H
-
-namespace Grid { 
-
-template<class Field>
-class ZeroGuesser: public LinearFunction<Field> {
-public:
-  virtual void operator()(const Field &src, Field &guess) { guess = zero; };
-};
-template<class Field>
-class DoNothingGuesser: public LinearFunction<Field> {
-public:
-  virtual void operator()(const Field &src, Field &guess) {  };
-};
-template<class Field>
-class SourceGuesser: public LinearFunction<Field> {
-public:
-  virtual void operator()(const Field &src, Field &guess) { guess = src; };
-};
-
-////////////////////////////////
-// Fine grid deflation
-////////////////////////////////
-template<class Field>
-class DeflatedGuesser: public LinearFunction<Field> {
-private:
-  const std::vector<Field> &evec;
-  const std::vector<RealD> &eval;
-
-public:
-
-  DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
-
-  virtual void operator()(const Field &src,Field &guess) {
-    guess = zero;
-    assert(evec.size()==eval.size());
-    auto N = evec.size();
-    for (int i=0;i<N;i++) {
-      const Field& tmp = evec[i];
-      axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
-    }
-    guess.checkerboard = src.checkerboard;
-  }
-};
-
-template<class FineField, class CoarseField>
-class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
-private:
-  const std::vector<FineField>   &subspace;
-  const std::vector<CoarseField> &evec_coarse;
-  const std::vector<RealD>       &eval_coarse;
-public:
-  
-  LocalCoherenceDeflatedGuesser(const std::vector<FineField>   &_subspace,
-				const std::vector<CoarseField> &_evec_coarse,
-				const std::vector<RealD>       &_eval_coarse)
-    : subspace(_subspace), 
-      evec_coarse(_evec_coarse), 
-      eval_coarse(_eval_coarse)  
-  {
-  }
-  
-  void operator()(const FineField &src,FineField &guess) { 
-    int N = (int)evec_coarse.size();
-    CoarseField src_coarse(evec_coarse[0]._grid);
-    CoarseField guess_coarse(evec_coarse[0]._grid);    guess_coarse = zero;
-    blockProject(src_coarse,src,subspace);    
-    for (int i=0;i<N;i++) {
-      const CoarseField & tmp = evec_coarse[i];
-      axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
-    }
-    blockPromote(guess_coarse,guess,subspace);
-    guess.checkerboard = src.checkerboard;
-  };
-};
-
-
-
-}
-#endif
--- a/Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
+++ b/Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
@@ -1,256 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
-
-Copyright (C) 2015
-
-Author: Daniel Richtmann <daniel.richtmann@ur.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
-#define GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
-
-namespace Grid {
-
-template<class Field>
-class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
- public:
-  bool ErrorOnNoConverge; // Throw an assert when FCAGMRES fails to converge,
-                          // defaults to true
-
-  RealD   Tolerance;
-
-  Integer MaxIterations;
-  Integer RestartLength;
-  Integer MaxNumberOfRestarts;
-  Integer IterationCount; // Number of iterations the FCAGMRES took to finish,
-                          // filled in upon completion
-
-  GridStopWatch MatrixTimer;
-  GridStopWatch PrecTimer;
-  GridStopWatch LinalgTimer;
-  GridStopWatch QrTimer;
-  GridStopWatch CompSolutionTimer;
-
-  Eigen::MatrixXcd H;
-
-  std::vector<std::complex<double>> y;
-  std::vector<std::complex<double>> gamma;
-  std::vector<std::complex<double>> c;
-  std::vector<std::complex<double>> s;
-
-  LinearFunction<Field> &Preconditioner;
-
-  FlexibleCommunicationAvoidingGeneralisedMinimalResidual(RealD   tol,
-                                                          Integer maxit,
-                                                          LinearFunction<Field> &Prec,
-                                                          Integer restart_length,
-                                                          bool    err_on_no_conv = true)
-      : Tolerance(tol)
-      , MaxIterations(maxit)
-      , RestartLength(restart_length)
-      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
-      , ErrorOnNoConverge(err_on_no_conv)
-      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
-      , y(RestartLength + 1, 0.)
-      , gamma(RestartLength + 1, 0.)
-      , c(RestartLength + 1, 0.)
-      , s(RestartLength + 1, 0.)
-      , Preconditioner(Prec) {};
-
-  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
-
-    std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular FGMRES" << std::endl;
-
-    psi.checkerboard = src.checkerboard;
-    conformable(psi, src);
-
-    RealD guess = norm2(psi);
-    assert(std::isnan(guess) == 0);
-
-    RealD cp;
-    RealD ssq = norm2(src);
-    RealD rsq = Tolerance * Tolerance * ssq;
-
-    Field r(src._grid);
-
-    std::cout << std::setprecision(4) << std::scientific;
-    std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
-    std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual:   src " << ssq   << std::endl;
-
-    PrecTimer.Reset();
-    MatrixTimer.Reset();
-    LinalgTimer.Reset();
-    QrTimer.Reset();
-    CompSolutionTimer.Reset();
-
-    GridStopWatch SolverTimer;
-    SolverTimer.Start();
-
-    IterationCount = 0;
-
-    for (int k=0; k<MaxNumberOfRestarts; k++) {
-
-      cp = outerLoopBody(LinOp, src, psi, rsq);
-
-      // Stopping condition
-      if (cp <= rsq) {
-
-        SolverTimer.Stop();
-
-        LinOp.Op(psi,r);
-        axpy(r,-1.0,src,r);
-
-        RealD srcnorm       = sqrt(ssq);
-        RealD resnorm       = sqrt(norm2(r));
-        RealD true_residual = resnorm / srcnorm;
-
-        std::cout << GridLogMessage        << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
-                  << " computed residual " << sqrt(cp / ssq)
-                  << " true residual "     << true_residual
-                  << " target "            << Tolerance << std::endl;
-
-        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Precon  " <<         PrecTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "FCAGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "FCAGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "FCAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
-        return;
-      }
-    }
-
-    std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
-
-    if (ErrorOnNoConverge)
-      assert(0);
-  }
-
-  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
-
-    RealD cp = 0;
-
-    Field w(src._grid);
-    Field r(src._grid);
-
-    // these should probably be made class members so that they are only allocated once, not in every restart
-    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
-    std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
-
-    MatrixTimer.Start();
-    LinOp.Op(psi, w);
-    MatrixTimer.Stop();
-
-    LinalgTimer.Start();
-    r = src - w;
-
-    gamma[0] = sqrt(norm2(r));
-
-    v[0] = (1. / gamma[0]) * r;
-    LinalgTimer.Stop();
-
-    for (int i=0; i<RestartLength; i++) {
-
-      IterationCount++;
-
-      arnoldiStep(LinOp, v, z, w, i);
-
-      qrUpdate(i);
-
-      cp = std::norm(gamma[i+1]);
-
-      std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
-                << " residual " << cp << " target " << rsq << std::endl;
-
-      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
-
-        computeSolution(z, psi, i);
-
-        return cp;
-      }
-    }
-
-    assert(0); // Never reached
-    return cp;
-  }
-
-  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
-
-    PrecTimer.Start();
-    Preconditioner(v[iter], z[iter]);
-    PrecTimer.Stop();
-
-    MatrixTimer.Start();
-    LinOp.Op(z[iter], w);
-    MatrixTimer.Stop();
-
-    LinalgTimer.Start();
-    for (int i = 0; i <= iter; ++i) {
-      H(iter, i) = innerProduct(v[i], w);
-      w = w - H(iter, i) * v[i];
-    }
-
-    H(iter, iter + 1) = sqrt(norm2(w));
-    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
-    LinalgTimer.Stop();
-  }
-
-  void qrUpdate(int iter) {
-
-    QrTimer.Start();
-    for (int i = 0; i < iter ; ++i) {
-      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
-      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
-      H(iter, i + 1) = tmp;
-    }
-
-    // Compute new Givens Rotation
-    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
-    c[iter]     = H(iter, iter) / nu;
-    s[iter]     = H(iter, iter + 1) / nu;
-
-    // Apply new Givens rotation
-    H(iter, iter)     = nu;
-    H(iter, iter + 1) = 0.;
-
-    gamma[iter + 1] = -s[iter] * gamma[iter];
-    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
-    QrTimer.Stop();
-  }
-
-  void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
-
-    CompSolutionTimer.Start();
-    for (int i = iter; i >= 0; i--) {
-      y[i] = gamma[i];
-      for (int k = i + 1; k <= iter; k++)
-        y[i] = y[i] - H(k, i) * y[k];
-      y[i] = y[i] / H(i, i);
-    }
-
-    for (int i = 0; i <= iter; i++)
-      psi = psi + z[i] * y[i];
-    CompSolutionTimer.Stop();
-  }
-};
-}
-#endif
--- a/Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
+++ b/Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
@@ -1,254 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
-
-Copyright (C) 2015
-
-Author: Daniel Richtmann <daniel.richtmann@ur.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
-#define GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
-
-namespace Grid {
-
-template<class Field>
-class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
- public:
-  bool ErrorOnNoConverge; // Throw an assert when FGMRES fails to converge,
-                          // defaults to true
-
-  RealD   Tolerance;
-
-  Integer MaxIterations;
-  Integer RestartLength;
-  Integer MaxNumberOfRestarts;
-  Integer IterationCount; // Number of iterations the FGMRES took to finish,
-                          // filled in upon completion
-
-  GridStopWatch MatrixTimer;
-  GridStopWatch PrecTimer;
-  GridStopWatch LinalgTimer;
-  GridStopWatch QrTimer;
-  GridStopWatch CompSolutionTimer;
-
-  Eigen::MatrixXcd H;
-
-  std::vector<std::complex<double>> y;
-  std::vector<std::complex<double>> gamma;
-  std::vector<std::complex<double>> c;
-  std::vector<std::complex<double>> s;
-
-  LinearFunction<Field> &Preconditioner;
-
-  FlexibleGeneralisedMinimalResidual(RealD   tol,
-                                     Integer maxit,
-                                     LinearFunction<Field> &Prec,
-                                     Integer restart_length,
-                                     bool    err_on_no_conv = true)
-      : Tolerance(tol)
-      , MaxIterations(maxit)
-      , RestartLength(restart_length)
-      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
-      , ErrorOnNoConverge(err_on_no_conv)
-      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
-      , y(RestartLength + 1, 0.)
-      , gamma(RestartLength + 1, 0.)
-      , c(RestartLength + 1, 0.)
-      , s(RestartLength + 1, 0.)
-      , Preconditioner(Prec) {};
-
-  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
-
-    psi.checkerboard = src.checkerboard;
-    conformable(psi, src);
-
-    RealD guess = norm2(psi);
-    assert(std::isnan(guess) == 0);
-
-    RealD cp;
-    RealD ssq = norm2(src);
-    RealD rsq = Tolerance * Tolerance * ssq;
-
-    Field r(src._grid);
-
-    std::cout << std::setprecision(4) << std::scientific;
-    std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: guess " << guess << std::endl;
-    std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual:   src " << ssq   << std::endl;
-
-    PrecTimer.Reset();
-    MatrixTimer.Reset();
-    LinalgTimer.Reset();
-    QrTimer.Reset();
-    CompSolutionTimer.Reset();
-
-    GridStopWatch SolverTimer;
-    SolverTimer.Start();
-
-    IterationCount = 0;
-
-    for (int k=0; k<MaxNumberOfRestarts; k++) {
-
-      cp = outerLoopBody(LinOp, src, psi, rsq);
-
-      // Stopping condition
-      if (cp <= rsq) {
-
-        SolverTimer.Stop();
-
-        LinOp.Op(psi,r);
-        axpy(r,-1.0,src,r);
-
-        RealD srcnorm       = sqrt(ssq);
-        RealD resnorm       = sqrt(norm2(r));
-        RealD true_residual = resnorm / srcnorm;
-
-        std::cout << GridLogMessage        << "FlexibleGeneralisedMinimalResidual: Converged on iteration " << IterationCount
-                  << " computed residual " << sqrt(cp / ssq)
-                  << " true residual "     << true_residual
-                  << " target "            << Tolerance << std::endl;
-
-        std::cout << GridLogMessage << "FGMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "FGMRES Time elapsed: Precon  " <<         PrecTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "FGMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "FGMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "FGMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "FGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
-        return;
-      }
-    }
-
-    std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual did NOT converge" << std::endl;
-
-    if (ErrorOnNoConverge)
-      assert(0);
-  }
-
-  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
-
-    RealD cp = 0;
-
-    Field w(src._grid);
-    Field r(src._grid);
-
-    // these should probably be made class members so that they are only allocated once, not in every restart
-    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
-    std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
-
-    MatrixTimer.Start();
-    LinOp.Op(psi, w);
-    MatrixTimer.Stop();
-
-    LinalgTimer.Start();
-    r = src - w;
-
-    gamma[0] = sqrt(norm2(r));
-
-    v[0] = (1. / gamma[0]) * r;
-    LinalgTimer.Stop();
-
-    for (int i=0; i<RestartLength; i++) {
-
-      IterationCount++;
-
-      arnoldiStep(LinOp, v, z, w, i);
-
-      qrUpdate(i);
-
-      cp = std::norm(gamma[i+1]);
-
-      std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: Iteration " << IterationCount
-                << " residual " << cp << " target " << rsq << std::endl;
-
-      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
-
-        computeSolution(z, psi, i);
-
-        return cp;
-      }
-    }
-
-    assert(0); // Never reached
-    return cp;
-  }
-
-  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
-
-    PrecTimer.Start();
-    Preconditioner(v[iter], z[iter]);
-    PrecTimer.Stop();
-
-    MatrixTimer.Start();
-    LinOp.Op(z[iter], w);
-    MatrixTimer.Stop();
-
-    LinalgTimer.Start();
-    for (int i = 0; i <= iter; ++i) {
-      H(iter, i) = innerProduct(v[i], w);
-      w = w - H(iter, i) * v[i];
-    }
-
-    H(iter, iter + 1) = sqrt(norm2(w));
-    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
-    LinalgTimer.Stop();
-  }
-
-  void qrUpdate(int iter) {
-
-    QrTimer.Start();
-    for (int i = 0; i < iter ; ++i) {
-      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
-      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
-      H(iter, i + 1) = tmp;
-    }
-
-    // Compute new Givens Rotation
-    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
-    c[iter]     = H(iter, iter) / nu;
-    s[iter]     = H(iter, iter + 1) / nu;
-
-    // Apply new Givens rotation
-    H(iter, iter)     = nu;
-    H(iter, iter + 1) = 0.;
-
-    gamma[iter + 1] = -s[iter] * gamma[iter];
-    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
-    QrTimer.Stop();
-  }
-
-  void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
-
-    CompSolutionTimer.Start();
-    for (int i = iter; i >= 0; i--) {
-      y[i] = gamma[i];
-      for (int k = i + 1; k <= iter; k++)
-        y[i] = y[i] - H(k, i) * y[k];
-      y[i] = y[i] / H(i, i);
-    }
-
-    for (int i = 0; i <= iter; i++)
-      psi = psi + z[i] * y[i];
-    CompSolutionTimer.Stop();
-  }
-};
-}
-#endif
--- a/Grid/algorithms/iterative/GeneralisedMinimalResidual.h
+++ b/Grid/algorithms/iterative/GeneralisedMinimalResidual.h
@@ -1,242 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./lib/algorithms/iterative/GeneralisedMinimalResidual.h
-
-Copyright (C) 2015
-
-Author: Daniel Richtmann <daniel.richtmann@ur.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef GRID_GENERALISED_MINIMAL_RESIDUAL_H
-#define GRID_GENERALISED_MINIMAL_RESIDUAL_H
-
-namespace Grid {
-
-template<class Field>
-class GeneralisedMinimalResidual : public OperatorFunction<Field> {
- public:
-  bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge,
-                          // defaults to true
-
-  RealD   Tolerance;
-
-  Integer MaxIterations;
-  Integer RestartLength;
-  Integer MaxNumberOfRestarts;
-  Integer IterationCount; // Number of iterations the GMRES took to finish,
-                          // filled in upon completion
-
-  GridStopWatch MatrixTimer;
-  GridStopWatch LinalgTimer;
-  GridStopWatch QrTimer;
-  GridStopWatch CompSolutionTimer;
-
-  Eigen::MatrixXcd H;
-
-  std::vector<std::complex<double>> y;
-  std::vector<std::complex<double>> gamma;
-  std::vector<std::complex<double>> c;
-  std::vector<std::complex<double>> s;
-
-  GeneralisedMinimalResidual(RealD   tol,
-                             Integer maxit,
-                             Integer restart_length,
-                             bool    err_on_no_conv = true)
-      : Tolerance(tol)
-      , MaxIterations(maxit)
-      , RestartLength(restart_length)
-      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
-      , ErrorOnNoConverge(err_on_no_conv)
-      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
-      , y(RestartLength + 1, 0.)
-      , gamma(RestartLength + 1, 0.)
-      , c(RestartLength + 1, 0.)
-      , s(RestartLength + 1, 0.) {};
-
-  void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
-
-    psi.checkerboard = src.checkerboard;
-    conformable(psi, src);
-
-    RealD guess = norm2(psi);
-    assert(std::isnan(guess) == 0);
-
-    RealD cp;
-    RealD ssq = norm2(src);
-    RealD rsq = Tolerance * Tolerance * ssq;
-
-    Field r(src._grid);
-
-    std::cout << std::setprecision(4) << std::scientific;
-    std::cout << GridLogIterative << "GeneralisedMinimalResidual: guess " << guess << std::endl;
-    std::cout << GridLogIterative << "GeneralisedMinimalResidual:   src " << ssq   << std::endl;
-
-    MatrixTimer.Reset();
-    LinalgTimer.Reset();
-    QrTimer.Reset();
-    CompSolutionTimer.Reset();
-
-    GridStopWatch SolverTimer;
-    SolverTimer.Start();
-
-    IterationCount = 0;
-
-    for (int k=0; k<MaxNumberOfRestarts; k++) {
-
-      cp = outerLoopBody(LinOp, src, psi, rsq);
-
-      // Stopping condition
-      if (cp <= rsq) {
-
-        SolverTimer.Stop();
-
-        LinOp.Op(psi,r);
-        axpy(r,-1.0,src,r);
-
-        RealD srcnorm       = sqrt(ssq);
-        RealD resnorm       = sqrt(norm2(r));
-        RealD true_residual = resnorm / srcnorm;
-
-        std::cout << GridLogMessage        << "GeneralisedMinimalResidual: Converged on iteration " << IterationCount
-                  << " computed residual " << sqrt(cp / ssq)
-                  << " true residual "     << true_residual
-                  << " target "            << Tolerance << std::endl;
-
-        std::cout << GridLogMessage << "GMRES Time elapsed: Total   " <<       SolverTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "GMRES Time elapsed: Matrix  " <<       MatrixTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "GMRES Time elapsed: Linalg  " <<       LinalgTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "GMRES Time elapsed: QR      " <<           QrTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "GMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
-        return;
-      }
-    }
-
-    std::cout << GridLogMessage << "GeneralisedMinimalResidual did NOT converge" << std::endl;
-
-    if (ErrorOnNoConverge)
-      assert(0);
-  }
-
-  RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
-
-    RealD cp = 0;
-
-    Field w(src._grid);
-    Field r(src._grid);
-
-    // this should probably be made a class member so that it is only allocated once, not in every restart
-    std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
-
-    MatrixTimer.Start();
-    LinOp.Op(psi, w);
-    MatrixTimer.Stop();
-
-    LinalgTimer.Start();
-    r = src - w;
-
-    gamma[0] = sqrt(norm2(r));
-
-    v[0] = (1. / gamma[0]) * r;
-    LinalgTimer.Stop();
-
-    for (int i=0; i<RestartLength; i++) {
-
-      IterationCount++;
-
-      arnoldiStep(LinOp, v, w, i);
-
-      qrUpdate(i);
-
-      cp = std::norm(gamma[i+1]);
-
-      std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration " << IterationCount
-                << " residual " << cp << " target " << rsq << std::endl;
-
-      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
-
-        computeSolution(v, psi, i);
-
-        return cp;
-      }
-    }
-
-    assert(0); // Never reached
-    return cp;
-  }
-
-  void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
-
-    MatrixTimer.Start();
-    LinOp.Op(v[iter], w);
-    MatrixTimer.Stop();
-
-    LinalgTimer.Start();
-    for (int i = 0; i <= iter; ++i) {
-      H(iter, i) = innerProduct(v[i], w);
-      w = w - H(iter, i) * v[i];
-    }
-
-    H(iter, iter + 1) = sqrt(norm2(w));
-    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
-    LinalgTimer.Stop();
-  }
-
-  void qrUpdate(int iter) {
-
-    QrTimer.Start();
-    for (int i = 0; i < iter ; ++i) {
-      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
-      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
-      H(iter, i + 1) = tmp;
-    }
-
-    // Compute new Givens Rotation
-    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
-    c[iter]     = H(iter, iter) / nu;
-    s[iter]     = H(iter, iter + 1) / nu;
-
-    // Apply new Givens rotation
-    H(iter, iter)     = nu;
-    H(iter, iter + 1) = 0.;
-
-    gamma[iter + 1] = -s[iter] * gamma[iter];
-    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
-    QrTimer.Stop();
-  }
-
-  void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
-
-    CompSolutionTimer.Start();
-    for (int i = iter; i >= 0; i--) {
-      y[i] = gamma[i];
-      for (int k = i + 1; k <= iter; k++)
-        y[i] = y[i] - H(k, i) * y[k];
-      y[i] = y[i] / H(i, i);
-    }
-
-    for (int i = 0; i <= iter; i++)
-      psi = psi + v[i] * y[i];
-    CompSolutionTimer.Stop();
-  }
-};
-}
-#endif
--- a/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h
+++ b/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h
@@ -1,842 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: paboyle <paboyle@ph.ed.ac.uk>
-Author: Chulwoo Jung <chulwoo@bnl.gov>
-Author: Christoph Lehner <clehner@bnl.gov>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#ifndef GRID_BIRL_H
-#define GRID_BIRL_H
-
-#include <string.h> //memset
-//#include <zlib.h>
-#include <sys/stat.h>
-
-namespace Grid { 
-
-  ////////////////////////////////////////////////////////
-  // Move following 100 LOC to lattice/Lattice_basis.h
-  ////////////////////////////////////////////////////////
-template<class Field>
-void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k) 
-{
-  for(int j=0; j<k; ++j){
-    auto ip = innerProduct(basis[j],w);
-    w = w - ip*basis[j];
-  }
-}
-
-template<class Field>
-void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) 
-{
-  typedef typename Field::vector_object vobj;
-  GridBase* grid = basis[0]._grid;
-      
-  parallel_region
-  {
-
-    std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
-       
-    parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
-      for(int j=j0; j<j1; ++j) B[j]=0.;
-      
-      for(int j=j0; j<j1; ++j){
-	for(int k=k0; k<k1; ++k){
-	  B[j] +=Qt(j,k) * basis[k]._odata[ss];
-	}
-      }
-      for(int j=j0; j<j1; ++j){
-	  basis[j]._odata[ss] = B[j];
-      }
-    }
-  }
-}
-
-// Extract a single rotated vector
-template<class Field>
-void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) 
-{
-  typedef typename Field::vector_object vobj;
-  GridBase* grid = basis[0]._grid;
-
-  result.checkerboard = basis[0].checkerboard;
-  parallel_for(int ss=0;ss < grid->oSites();ss++){
-    vobj B = zero;
-    for(int k=k0; k<k1; ++k){
-      B +=Qt(j,k) * basis[k]._odata[ss];
-    }
-    result._odata[ss] = B;
-  }
-}
-
-template<class Field>
-void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx) 
-{
-  int vlen = idx.size();
-
-  assert(vlen>=1);
-  assert(vlen<=sort_vals.size());
-  assert(vlen<=_v.size());
-
-  for (size_t i=0;i<vlen;i++) {
-
-    if (idx[i] != i) {
-
-      //////////////////////////////////////
-      // idx[i] is a table of desired sources giving a permutation.
-      // Swap v[i] with v[idx[i]].
-      // Find  j>i for which _vnew[j] = _vold[i],
-      // track the move idx[j] => idx[i]
-      // track the move idx[i] => i
-      //////////////////////////////////////
-      size_t j;
-      for (j=i;j<idx.size();j++)
-	if (idx[j]==i)
-	  break;
-
-      assert(idx[i] > i);     assert(j!=idx.size());      assert(idx[j]==i);
-
-      std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy
-      std::swap(sort_vals[i],sort_vals[idx[i]]);
-
-      idx[j] = idx[i];
-      idx[i] = i;
-    }
-  }
-}
-
-inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals) 
-{
-  std::vector<int> idx(sort_vals.size());
-  std::iota(idx.begin(), idx.end(), 0);
-
-  // sort indexes based on comparing values in v
-  std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
-    return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
-  });
-  return idx;
-}
-
-template<class Field>
-void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse) 
-{
-  std::vector<int> idx = basisSortGetIndex(sort_vals);
-  if (reverse)
-    std::reverse(idx.begin(), idx.end());
-  
-  basisReorderInPlace(_v,sort_vals,idx);
-}
-
-/////////////////////////////////////////////////////////////
-// Implicitly restarted lanczos
-/////////////////////////////////////////////////////////////
-template<class Field> class ImplicitlyRestartedLanczosTester 
-{
- public:
-  virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
-  virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
-};
-
-enum IRLdiagonalisation { 
-  IRLdiagonaliseWithDSTEGR,
-  IRLdiagonaliseWithQR,
-  IRLdiagonaliseWithEigen
-};
-
-template<class Field> class ImplicitlyRestartedLanczosHermOpTester  : public ImplicitlyRestartedLanczosTester<Field>
-{
- public:
-
-  LinearFunction<Field>       &_HermOp;
-  ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp)  {  };
-  int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
-  {
-    return TestConvergence(j,resid,B,eval,evalMaxApprox);
-  }
-  int TestConvergence(int j,RealD eresid,Field &B, RealD &eval,RealD evalMaxApprox)
-  {
-    Field v(B);
-    RealD eval_poly = eval;
-    // Apply operator
-    _HermOp(B,v);
-
-    RealD vnum = real(innerProduct(B,v)); // HermOp.
-    RealD vden = norm2(B);
-    RealD vv0  = norm2(v);
-    eval   = vnum/vden;
-    v -= eval*B;
-
-    RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
-
-    std::cout.precision(13);
-    std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] "
-	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
-	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
-	     <<std::endl;
-
-    int conv=0;
-    if( (vv<eresid*eresid) ) conv = 1;
-
-    return conv;
-  }
-};
-
-template<class Field> 
-class ImplicitlyRestartedLanczos {
- private:
-  const RealD small = 1.0e-8;
-  int MaxIter;
-  int MinRestart; // Minimum number of restarts; only check for convergence after
-  int Nstop;   // Number of evecs checked for convergence
-  int Nk;      // Number of converged sought
-  //  int Np;      // Np -- Number of spare vecs in krylov space //  == Nm - Nk
-  int Nm;      // Nm -- total number of vectors
-  IRLdiagonalisation diagonalisation;
-  int orth_period;
-    
-  RealD OrthoTime;
-  RealD eresid, betastp;
-  ////////////////////////////////
-  // Embedded objects
-  ////////////////////////////////
-  LinearFunction<Field>       &_PolyOp;
-  LinearFunction<Field>       &_HermOp;
-  ImplicitlyRestartedLanczosTester<Field> &_Tester;
-  // Default tester provided (we need a ref to something in default case)
-  ImplicitlyRestartedLanczosHermOpTester<Field> SimpleTester;
-  /////////////////////////
-  // Constructor
-  /////////////////////////
-  
-public:       
-
-  //////////////////////////////////////////////////////////////////
-  // PAB:
-  //////////////////////////////////////////////////////////////////
-  // Too many options  & knobs. 
-  // Eliminate:
-  //   orth_period
-  //   betastp
-  //   MinRestart
-  //
-  // Do we really need orth_period
-  // What is the theoretical basis & guarantees of betastp ?
-  // Nstop=Nk viable?
-  // MinRestart avoidable with new convergence test?
-  // Could cut to PolyOp, HermOp, Tester, Nk, Nm, resid, maxiter (+diagonalisation)
-  // HermOp could be eliminated if we dropped the Power method for max eval.
-  // -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear
-  //////////////////////////////////////////////////////////////////
- ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
-			    LinearFunction<Field> & HermOp,
-			    ImplicitlyRestartedLanczosTester<Field> & Tester,
-			    int _Nstop, // sought vecs
-			    int _Nk, // sought vecs
-			    int _Nm, // spare vecs
-			    RealD _eresid, // resid in lmdue deficit 
-			    int _MaxIter, // Max iterations
-			    RealD _betastp=0.0, // if beta(k) < betastp: converged
-			    int _MinRestart=1, int _orth_period = 1,
-			    IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
-    SimpleTester(HermOp), _PolyOp(PolyOp),      _HermOp(HermOp), _Tester(Tester),
-    Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm),
-    eresid(_eresid),      betastp(_betastp),
-    MaxIter(_MaxIter)  ,      MinRestart(_MinRestart),
-    orth_period(_orth_period), diagonalisation(_diagonalisation)  { };
-
-    ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
-			       LinearFunction<Field> & HermOp,
-			       int _Nstop, // sought vecs
-			       int _Nk, // sought vecs
-			       int _Nm, // spare vecs
-			       RealD _eresid, // resid in lmdue deficit 
-			       int _MaxIter, // Max iterations
-			       RealD _betastp=0.0, // if beta(k) < betastp: converged
-			       int _MinRestart=1, int _orth_period = 1,
-			       IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
-    SimpleTester(HermOp),  _PolyOp(PolyOp),      _HermOp(HermOp), _Tester(SimpleTester),
-    Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm),
-    eresid(_eresid),      betastp(_betastp),
-    MaxIter(_MaxIter)  ,      MinRestart(_MinRestart),
-    orth_period(_orth_period), diagonalisation(_diagonalisation)  { };
-
-  ////////////////////////////////
-  // Helpers
-  ////////////////////////////////
-  template<typename T>  static RealD normalise(T& v) 
-  {
-    RealD nn = norm2(v);
-    nn = sqrt(nn);
-    v = v * (1.0/nn);
-    return nn;
-  }
-
-  void orthogonalize(Field& w, std::vector<Field>& evec,int k)
-  {
-    OrthoTime-=usecond()/1e6;
-    basisOrthogonalize(evec,w,k);
-    normalise(w);
-    OrthoTime+=usecond()/1e6;
-  }
-
-/* Rudy Arthur's thesis pp.137
------------------------
-Require: M > K P = M − K †
-Compute the factorization AVM = VM HM + fM eM 
-repeat
-  Q=I
-  for i = 1,...,P do
-    QiRi =HM −θiI Q = QQi
-    H M = Q †i H M Q i
-  end for
-  βK =HM(K+1,K) σK =Q(M,K)
-  r=vK+1βK +rσK
-  VK =VM(1:M)Q(1:M,1:K)
-  HK =HM(1:K,1:K)
-  →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM
-until convergence
-*/
-  void calc(std::vector<RealD>& eval, std::vector<Field>& evec,  const Field& src, int& Nconv, bool reverse=false)
-  {
-    GridBase *grid = src._grid;
-    assert(grid == evec[0]._grid);
-    
-    GridLogIRL.TimingMode(1);
-    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
-    std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 /  "<< MaxIter<< std::endl;
-    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
-    std::cout << GridLogIRL <<" -- seek   Nk    = " << Nk    <<" vectors"<< std::endl;
-    std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl;
-    std::cout << GridLogIRL <<" -- total  Nm    = " << Nm    <<" vectors"<< std::endl;
-    std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl;
-    std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl;
-    if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
-      std::cout << GridLogIRL << "Diagonalisation is DSTEGR "<<std::endl;
-    } else if ( diagonalisation == IRLdiagonaliseWithQR ) { 
-      std::cout << GridLogIRL << "Diagonalisation is QR "<<std::endl;
-    }  else if ( diagonalisation == IRLdiagonaliseWithEigen ) { 
-      std::cout << GridLogIRL << "Diagonalisation is Eigen "<<std::endl;
-    }
-    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
-	
-    assert(Nm <= evec.size() && Nm <= eval.size());
-    
-    // quickly get an idea of the largest eigenvalue to more properly normalize the residuum
-    RealD evalMaxApprox = 0.0;
-    {
-      auto src_n = src;
-      auto tmp = src;
-      const int _MAX_ITER_IRL_MEVAPP_ = 50;
-      for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
-	normalise(src_n);
-	_HermOp(src_n,tmp);
-	RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
-	RealD vden = norm2(src_n);
-	RealD na = vnum/vden;
-	if (fabs(evalMaxApprox/na - 1.0) < 0.05)
-	  i=_MAX_ITER_IRL_MEVAPP_;
-	evalMaxApprox = na;
-	std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
-	src_n = tmp;
-      }
-    }
-	
-    std::vector<RealD> lme(Nm);  
-    std::vector<RealD> lme2(Nm);
-    std::vector<RealD> eval2(Nm);
-    std::vector<RealD> eval2_copy(Nm);
-    Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm);
-
-    Field f(grid);
-    Field v(grid);
-    int k1 = 1;
-    int k2 = Nk;
-    RealD beta_k;
-
-    Nconv = 0;
-  
-    // Set initial vector
-    evec[0] = src;
-    normalise(evec[0]);
-	
-    // Initial Nk steps
-    OrthoTime=0.;
-    for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
-    std::cout<<GridLogIRL <<"Initial "<< Nk <<"steps done "<<std::endl;
-    std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
-
-    //////////////////////////////////
-    // Restarting loop begins
-    //////////////////////////////////
-    int iter;
-    for(iter = 0; iter<MaxIter; ++iter){
-      
-      OrthoTime=0.;
-
-      std::cout<< GridLogMessage <<" **********************"<< std::endl;
-      std::cout<< GridLogMessage <<" Restart iteration = "<< iter << std::endl;
-      std::cout<< GridLogMessage <<" **********************"<< std::endl;
-
-      std::cout<<GridLogIRL <<" running "<<Nm-Nk <<" steps: "<<std::endl;
-      for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
-      f *= lme[Nm-1];
-
-      std::cout<<GridLogIRL <<" "<<Nm-Nk <<" steps done "<<std::endl;
-      std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
-	  
-      //////////////////////////////////
-      // getting eigenvalues
-      //////////////////////////////////
-      for(int k=0; k<Nm; ++k){
-	eval2[k] = eval[k+k1-1];
-	lme2[k] = lme[k+k1-1];
-      }
-      Qt = Eigen::MatrixXd::Identity(Nm,Nm);
-      diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
-      std::cout<<GridLogIRL <<" diagonalized "<<std::endl;
-
-      //////////////////////////////////
-      // sorting
-      //////////////////////////////////
-      eval2_copy = eval2;
-      std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater<RealD>());
-      std::cout<<GridLogIRL <<" evals sorted "<<std::endl;
-      const int chunk=8;
-      for(int io=0; io<k2;io+=chunk){
-	std::cout<<GridLogIRL << "eval "<< std::setw(3) << io ;
-	for(int ii=0;ii<chunk;ii++){
-	  if ( (io+ii)<k2 )
-	    std::cout<< " "<< std::setw(12)<< eval2[io+ii];
-	}
-	std::cout << std::endl;
-      }
-
-      //////////////////////////////////
-      // Implicitly shifted QR transformations
-      //////////////////////////////////
-      Qt = Eigen::MatrixXd::Identity(Nm,Nm);
-      for(int ip=k2; ip<Nm; ++ip){ 
-	QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
-      }
-      std::cout<<GridLogIRL <<"QR decomposed "<<std::endl;
-
-      assert(k2<Nm);      assert(k2<Nm);      assert(k1>0);
-
-      basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis
-      std::cout<<GridLogIRL <<"basisRotated  by Qt"<<std::endl;
-      
-      ////////////////////////////////////////////////////
-      // Compressed vector f and beta(k2)
-      ////////////////////////////////////////////////////
-      f *= Qt(k2-1,Nm-1);
-      f += lme[k2-1] * evec[k2];
-      beta_k = norm2(f);
-      beta_k = sqrt(beta_k);
-      std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl;
-	  
-      RealD betar = 1.0/beta_k;
-      evec[k2] = betar * f;
-      lme[k2-1] = beta_k;
-	  
-      ////////////////////////////////////////////////////
-      // Convergence test
-      ////////////////////////////////////////////////////
-      for(int k=0; k<Nm; ++k){    
-	eval2[k] = eval[k];
-	lme2[k] = lme[k];
-      }
-      Qt = Eigen::MatrixXd::Identity(Nm,Nm);
-      diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
-      std::cout<<GridLogIRL <<" Diagonalized "<<std::endl;
-	  
-      Nconv = 0;
-      if (iter >= MinRestart) {
-
-	std::cout << GridLogIRL << "Test convergence: rotate subset of vectors to test convergence " << std::endl;
-
-	Field B(grid); B.checkerboard = evec[0].checkerboard;
-
-	//  power of two search pattern;  not every evalue in eval2 is assessed.
-	int allconv =1;
-	for(int jj = 1; jj<=Nstop; jj*=2){
-	  int j = Nstop-jj;
-	  RealD e = eval2_copy[j]; // Discard the evalue
-	  basisRotateJ(B,evec,Qt,j,0,Nk,Nm);	    
-	  if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
-	    allconv=0;
-	  }
-	}
-	// Do evec[0] for good measure
-	{ 
-	  int j=0;
-	  RealD e = eval2_copy[0]; 
-	  basisRotateJ(B,evec,Qt,j,0,Nk,Nm);	    
-	  if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
-	}
-	if ( allconv ) Nconv = Nstop;
-
-	// test if we converged, if so, terminate
-	std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
-	//	if( Nconv>=Nstop || beta_k < betastp){
-	if( Nconv>=Nstop){
-	  goto converged;
-	}
-	  
-      } else {
-	std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n";
-      } // end of iter loop
-    }
-
-    std::cout<<GridLogError<<"\n NOT converged.\n";
-    abort();
-	
-  converged:
-    {
-      Field B(grid); B.checkerboard = evec[0].checkerboard;
-      basisRotate(evec,Qt,0,Nk,0,Nk,Nm);	    
-      std::cout << GridLogIRL << " Rotated basis"<<std::endl;
-      Nconv=0;
-      //////////////////////////////////////////////////////////////////////
-      // Full final convergence test; unconditionally applied
-      //////////////////////////////////////////////////////////////////////
-      for(int j = 0; j<=Nk; j++){
-	B=evec[j];
-	if( _Tester.ReconstructEval(j,eresid,B,eval2[j],evalMaxApprox) ) {
-	  Nconv++;
-	}
-      }
-
-      if ( Nconv < Nstop )
-	std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
-
-      eval=eval2;
-      
-      //Keep only converged
-      eval.resize(Nconv);// Nstop?
-      evec.resize(Nconv,grid);// Nstop?
-      basisSortInPlace(evec,eval,reverse);
-      
-    }
-       
-    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
-    std::cout << GridLogIRL << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n";
-    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
-    std::cout << GridLogIRL << " -- Iterations  = "<< iter   << "\n";
-    std::cout << GridLogIRL << " -- beta(k)     = "<< beta_k << "\n";
-    std::cout << GridLogIRL << " -- Nconv       = "<< Nconv  << "\n";
-    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
-  }
-
- private:
-/* Saad PP. 195
-1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0
-2. For k = 1,2,...,m Do:
-3. wk:=Avk−βkv_{k−1}      
-4. αk:=(wk,vk)       // 
-5. wk:=wk−αkvk       // wk orthog vk 
-6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
-7. vk+1 := wk/βk+1
-8. EndDo
- */
-  void step(std::vector<RealD>& lmd,
-	    std::vector<RealD>& lme, 
-	    std::vector<Field>& evec,
-	    Field& w,int Nm,int k)
-  {
-    const RealD tiny = 1.0e-20;
-    assert( k< Nm );
-
-    GridStopWatch gsw_op,gsw_o;
-
-    Field& evec_k = evec[k];
-
-    _PolyOp(evec_k,w);    std::cout<<GridLogIRL << "PolyOp" <<std::endl;
-
-    if(k>0) w -= lme[k-1] * evec[k-1];
-
-    ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
-    RealD     alph = real(zalph);
-
-    w = w - alph * evec_k;// 5. wk:=wk−αkvk
-
-    RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
-    // 7. vk+1 := wk/βk+1
-
-    lmd[k] = alph;
-    lme[k] = beta;
-
-    if (k>0 && k % orth_period == 0) {
-      orthogonalize(w,evec,k); // orthonormalise
-      std::cout<<GridLogIRL << "Orthogonalised " <<std::endl;
-    }
-
-    if(k < Nm-1) evec[k+1] = w;
-
-    std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
-    if ( beta < tiny ) 
-      std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
-  }
-
-  void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme, 
-			 int Nk, int Nm,  
-			 Eigen::MatrixXd & Qt, // Nm x Nm
-			 GridBase *grid)
-  {
-    Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk);
-
-    for(int i=0;i<Nk;i++)   TriDiag(i,i)   = lmd[i];
-    for(int i=0;i<Nk-1;i++) TriDiag(i,i+1) = lme[i];
-    for(int i=0;i<Nk-1;i++) TriDiag(i+1,i) = lme[i];
-    
-    Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eigensolver(TriDiag);
-
-    for (int i = 0; i < Nk; i++) {
-      lmd[Nk-1-i] = eigensolver.eigenvalues()(i);
-    }
-    for (int i = 0; i < Nk; i++) {
-      for (int j = 0; j < Nk; j++) {
-	Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i);
-      }
-    }
-  }
-
-  ///////////////////////////////////////////////////////////////////////////
-  // File could end here if settle on Eigen ??? !!!
-  ///////////////////////////////////////////////////////////////////////////
-  void QR_decomp(std::vector<RealD>& lmd,   // Nm 
-		 std::vector<RealD>& lme,   // Nm 
-		 int Nk, int Nm,            // Nk, Nm
-		 Eigen::MatrixXd& Qt,       // Nm x Nm matrix
-		 RealD Dsh, int kmin, int kmax)
-  {
-    int k = kmin-1;
-    RealD x;
-    
-    RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]);
-    RealD c = ( lmd[k] -Dsh) *Fden;
-    RealD s = -lme[k] *Fden;
-      
-    RealD tmpa1 = lmd[k];
-    RealD tmpa2 = lmd[k+1];
-    RealD tmpb  = lme[k];
-
-    lmd[k]   = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
-    lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
-    lme[k]   = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
-    x        =-s*lme[k+1];
-    lme[k+1] = c*lme[k+1];
-      
-    for(int i=0; i<Nk; ++i){
-      RealD Qtmp1 = Qt(k,i);
-      RealD Qtmp2 = Qt(k+1,i);
-      Qt(k,i)  = c*Qtmp1 - s*Qtmp2;
-      Qt(k+1,i)= s*Qtmp1 + c*Qtmp2; 
-    }
-
-    // Givens transformations
-    for(int k = kmin; k < kmax-1; ++k){
-      
-      RealD Fden = 1.0/hypot(x,lme[k-1]);
-      RealD c = lme[k-1]*Fden;
-      RealD s = - x*Fden;
-	
-      RealD tmpa1 = lmd[k];
-      RealD tmpa2 = lmd[k+1];
-      RealD tmpb  = lme[k];
-
-      lmd[k]   = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
-      lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
-      lme[k]   = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
-      lme[k-1] = c*lme[k-1] -s*x;
-
-      if(k != kmax-2){
-	x = -s*lme[k+1];
-	lme[k+1] = c*lme[k+1];
-      }
-
-      for(int i=0; i<Nk; ++i){
-	RealD Qtmp1 = Qt(k,i);
-	RealD Qtmp2 = Qt(k+1,i);
-	Qt(k,i)     = c*Qtmp1 -s*Qtmp2;
-	Qt(k+1,i)   = s*Qtmp1 +c*Qtmp2;
-      }
-    }
-  }
-
-  void diagonalize(std::vector<RealD>& lmd, std::vector<RealD>& lme, 
-		   int Nk, int Nm,   
-		   Eigen::MatrixXd & Qt,
-		   GridBase *grid)
-  {
-    Qt = Eigen::MatrixXd::Identity(Nm,Nm);
-    if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
-      diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid);
-    } else if ( diagonalisation == IRLdiagonaliseWithQR ) { 
-      diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid);
-    }  else if ( diagonalisation == IRLdiagonaliseWithEigen ) { 
-      diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid);
-    } else { 
-      assert(0);
-    }
-  }
-
-#ifdef USE_LAPACK
-void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
-                   double *vl, double *vu, int *il, int *iu, double *abstol,
-                   int *m, double *w, double *z, int *ldz, int *isuppz,
-                   double *work, int *lwork, int *iwork, int *liwork,
-                   int *info);
-#endif
-
-void diagonalize_lapack(std::vector<RealD>& lmd,
-			std::vector<RealD>& lme, 
-			int Nk, int Nm,  
-			Eigen::MatrixXd& Qt,
-			GridBase *grid)
-{
-#ifdef USE_LAPACK
-  const int size = Nm;
-  int NN = Nk;
-  double evals_tmp[NN];
-  double evec_tmp[NN][NN];
-  memset(evec_tmp[0],0,sizeof(double)*NN*NN);
-  double DD[NN];
-  double EE[NN];
-  for (int i = 0; i< NN; i++) {
-    for (int j = i - 1; j <= i + 1; j++) {
-      if ( j < NN && j >= 0 ) {
-	if (i==j) DD[i] = lmd[i];
-	if (i==j) evals_tmp[i] = lmd[i];
-	if (j==(i-1)) EE[j] = lme[j];
-      }
-    }
-  }
-  int evals_found;
-  int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
-  int liwork =  3+NN*10 ;
-  int iwork[liwork];
-  double work[lwork];
-  int isuppz[2*NN];
-  char jobz = 'V'; // calculate evals & evecs
-  char range = 'I'; // calculate all evals
-  //    char range = 'A'; // calculate all evals
-  char uplo = 'U'; // refer to upper half of original matrix
-  char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
-  int ifail[NN];
-  int info;
-  int total = grid->_Nprocessors;
-  int node  = grid->_processor;
-  int interval = (NN/total)+1;
-  double vl = 0.0, vu = 0.0;
-  int il = interval*node+1 , iu = interval*(node+1);
-  if (iu > NN)  iu=NN;
-  double tol = 0.0;
-  if (1) {
-    memset(evals_tmp,0,sizeof(double)*NN);
-    if ( il <= NN){
-      LAPACK_dstegr(&jobz, &range, &NN,
-		    (double*)DD, (double*)EE,
-		    &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
-		    &tol, // tolerance
-		    &evals_found, evals_tmp, (double*)evec_tmp, &NN,
-		    isuppz,
-		    work, &lwork, iwork, &liwork,
-		    &info);
-      for (int i = iu-1; i>= il-1; i--){
-	evals_tmp[i] = evals_tmp[i - (il-1)];
-	if (il>1) evals_tmp[i-(il-1)]=0.;
-	for (int j = 0; j< NN; j++){
-	  evec_tmp[i][j] = evec_tmp[i - (il-1)][j];
-	  if (il>1) evec_tmp[i-(il-1)][j]=0.;
-	}
-      }
-    }
-    {
-      grid->GlobalSumVector(evals_tmp,NN);
-      grid->GlobalSumVector((double*)evec_tmp,NN*NN);
-    }
-  } 
-  // Safer to sort instead of just reversing it, 
-  // but the document of the routine says evals are sorted in increasing order. 
-  // qr gives evals in decreasing order.
-  for(int i=0;i<NN;i++){
-    lmd [NN-1-i]=evals_tmp[i];
-    for(int j=0;j<NN;j++){
-      Qt((NN-1-i),j)=evec_tmp[i][j];
-    }
-  }
-#else 
-  assert(0);
-#endif
-}
-
-void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme, 
-		    int Nk, int Nm,   
-		    Eigen::MatrixXd & Qt,
-		    GridBase *grid)
-{
-  int QRiter = 100*Nm;
-  int kmin = 1;
-  int kmax = Nk;
-  
-  // (this should be more sophisticated)
-  for(int iter=0; iter<QRiter; ++iter){
-    
-    // determination of 2x2 leading submatrix
-    RealD dsub = lmd[kmax-1]-lmd[kmax-2];
-    RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
-    RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
-    // (Dsh: shift)
-    
-    // transformation
-    QR_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm
-    
-    // Convergence criterion (redef of kmin and kamx)
-    for(int j=kmax-1; j>= kmin; --j){
-      RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
-      if(fabs(lme[j-1])+dds > dds){
-	kmax = j+1;
-	goto continued;
-      }
-    }
-    QRiter = iter;
-    return;
-    
-  continued:
-    for(int j=0; j<kmax-1; ++j){
-      RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
-      if(fabs(lme[j])+dds > dds){
-	kmin = j+1;
-	break;
-      }
-    }
-  }
-  std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<QRiter<<"\n";
-  abort();
-}
-};
-}
-#endif
--- a/Grid/algorithms/iterative/LocalCoherenceLanczos.h
+++ b/Grid/algorithms/iterative/LocalCoherenceLanczos.h
@@ -1,406 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h
-
-    Copyright (C) 2015
-
-Author: Christoph Lehner <clehner@bnl.gov>
-Author: paboyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#ifndef GRID_LOCAL_COHERENCE_IRL_H
-#define GRID_LOCAL_COHERENCE_IRL_H
-
-namespace Grid { 
-
-
-struct LanczosParams : Serializable {
- public:
-  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
-				  ChebyParams, Cheby,/*Chebyshev*/
-				  int, Nstop,    /*Vecs in Lanczos must converge Nstop < Nk < Nm*/
-				  int, Nk,       /*Vecs in Lanczos seek converge*/
-				  int, Nm,       /*Total vecs in Lanczos include restart*/
-				  RealD, resid,  /*residual*/
- 				  int, MaxIt, 
-				  RealD, betastp,  /* ? */
-				  int, MinRes);    // Must restart
-};
-
-struct LocalCoherenceLanczosParams : Serializable {
- public:
-  GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
-				  bool, saveEvecs,
-				  bool, doFine,
-				  bool, doFineRead,
-				  bool, doCoarse,
-	       			  bool, doCoarseRead,
-				  LanczosParams, FineParams,
-				  LanczosParams, CoarseParams,
-				  ChebyParams,   Smoother,
-				  RealD        , coarse_relax_tol,
-				  std::vector<int>, blockSize,
-				  std::string, config,
-				  std::vector < std::complex<double>  >, omega,
-				  RealD, mass,
-				  RealD, M5);
-};
-
-// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function
-template<class Fobj,class CComplex,int nbasis>
-class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
-public:
-  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
-  typedef Lattice<CoarseSiteVector>           CoarseField;
-  typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field
-  typedef Lattice<Fobj>          FineField;
-
-  LinearOperatorBase<FineField> &_Linop;
-  std::vector<FineField>        &subspace;
-
-  ProjectedHermOp(LinearOperatorBase<FineField>& linop, std::vector<FineField> & _subspace) : 
-    _Linop(linop), subspace(_subspace)
-  {  
-    assert(subspace.size() >0);
-  };
-
-  void operator()(const CoarseField& in, CoarseField& out) {
-    GridBase *FineGrid = subspace[0]._grid;    
-    int   checkerboard = subspace[0].checkerboard;
-      
-    FineField fin (FineGrid);     fin.checkerboard= checkerboard;
-    FineField fout(FineGrid);   fout.checkerboard = checkerboard;
-
-    blockPromote(in,fin,subspace);       std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
-    _Linop.HermOp(fin,fout);             std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
-    blockProject(out,fout,subspace);     std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
-  }
-};
-
-template<class Fobj,class CComplex,int nbasis>
-class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
-public:
-  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
-  typedef Lattice<CoarseSiteVector>           CoarseField;
-  typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field
-  typedef Lattice<Fobj>          FineField;
-
-
-  OperatorFunction<FineField>   & _poly;
-  LinearOperatorBase<FineField> &_Linop;
-  std::vector<FineField>        &subspace;
-
-  ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,
-			  LinearOperatorBase<FineField>& linop, 
-			  std::vector<FineField> & _subspace) :
-    _poly(poly),
-    _Linop(linop),
-    subspace(_subspace)
-  {  };
-
-  void operator()(const CoarseField& in, CoarseField& out) {
-    
-    GridBase *FineGrid = subspace[0]._grid;    
-    int   checkerboard = subspace[0].checkerboard;
-
-    FineField fin (FineGrid); fin.checkerboard =checkerboard;
-    FineField fout(FineGrid);fout.checkerboard =checkerboard;
-    
-    blockPromote(in,fin,subspace);             std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
-    _poly(_Linop,fin,fout);                    std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
-    blockProject(out,fout,subspace);           std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
-  }
-};
-
-template<class Fobj,class CComplex,int nbasis>
-class ImplicitlyRestartedLanczosSmoothedTester  : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > >
-{
- public:
-  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
-  typedef Lattice<CoarseSiteVector>           CoarseField;
-  typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field
-  typedef Lattice<Fobj>          FineField;
-
-  LinearFunction<CoarseField> & _Poly;
-  OperatorFunction<FineField>   & _smoother;
-  LinearOperatorBase<FineField> &_Linop;
-  RealD                          _coarse_relax_tol;
-  std::vector<FineField>        &_subspace;
-  
-  ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField>   &Poly,
-					   OperatorFunction<FineField>   &smoother,
-					   LinearOperatorBase<FineField> &Linop,
-					   std::vector<FineField>        &subspace,
-					   RealD coarse_relax_tol=5.0e3) 
-    : _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
-      _coarse_relax_tol(coarse_relax_tol)  
-  {    };
-
-  int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
-  {
-    CoarseField v(B);
-    RealD eval_poly = eval;
-
-    // Apply operator
-    _Poly(B,v);
-
-    RealD vnum = real(innerProduct(B,v)); // HermOp.
-    RealD vden = norm2(B);
-    RealD vv0  = norm2(v);
-    eval   = vnum/vden;
-    v -= eval*B;
-
-    RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
-
-    std::cout.precision(13);
-    std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] "
-	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
-	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
-	     <<std::endl;
-
-    int conv=0;
-    if( (vv<eresid*eresid) ) conv = 1;
-    return conv;
-  }
-  int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
-  {
-    GridBase *FineGrid = _subspace[0]._grid;    
-    int checkerboard   = _subspace[0].checkerboard;
-    FineField fB(FineGrid);fB.checkerboard =checkerboard;
-    FineField fv(FineGrid);fv.checkerboard =checkerboard;
-
-    blockPromote(B,fv,_subspace);  
-    
-    _smoother(_Linop,fv,fB); 
-
-    RealD eval_poly = eval;
-    _Linop.HermOp(fB,fv);
-
-    RealD vnum = real(innerProduct(fB,fv)); // HermOp.
-    RealD vden = norm2(fB);
-    RealD vv0  = norm2(fv);
-    eval   = vnum/vden;
-    fv -= eval*fB;
-    RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
-
-    std::cout.precision(13);
-    std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] "
-	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
-	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
-	     <<std::endl;
-    if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
-    if( (vv<eresid*eresid) ) return 1;
-    return 0;
-  }
-};
-
-////////////////////////////////////////////
-// Make serializable Lanczos params
-////////////////////////////////////////////
-template<class Fobj,class CComplex,int nbasis>
-class LocalCoherenceLanczos 
-{
-public:
-  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
-  typedef Lattice<CComplex>                   CoarseScalar; // used for inner products on fine field
-  typedef Lattice<CoarseSiteVector>           CoarseField;
-  typedef Lattice<Fobj>                       FineField;
-
-protected:
-  GridBase *_CoarseGrid;
-  GridBase *_FineGrid;
-  int _checkerboard;
-  LinearOperatorBase<FineField>                 & _FineOp;
-  
-  std::vector<RealD>                              &evals_fine;
-  std::vector<RealD>                              &evals_coarse; 
-  std::vector<FineField>                          &subspace;
-  std::vector<CoarseField>                        &evec_coarse;
-
-private:
-  std::vector<RealD>                              _evals_fine;
-  std::vector<RealD>                              _evals_coarse; 
-  std::vector<FineField>                          _subspace;
-  std::vector<CoarseField>                        _evec_coarse;
-
-public:
-
-  LocalCoherenceLanczos(GridBase *FineGrid,
-			GridBase *CoarseGrid,
-			LinearOperatorBase<FineField> &FineOp,
-			int checkerboard) :
-    _CoarseGrid(CoarseGrid),
-    _FineGrid(FineGrid),
-    _FineOp(FineOp),
-    _checkerboard(checkerboard),
-    evals_fine  (_evals_fine),
-    evals_coarse(_evals_coarse),
-    subspace    (_subspace),
-    evec_coarse(_evec_coarse)
-  {
-    evals_fine.resize(0);
-    evals_coarse.resize(0);
-  };
-  //////////////////////////////////////////////////////////////////////////
-  // Alternate constructore, external storage for use by Hadrons module
-  //////////////////////////////////////////////////////////////////////////
-  LocalCoherenceLanczos(GridBase *FineGrid,
-			GridBase *CoarseGrid,
-			LinearOperatorBase<FineField> &FineOp,
-			int checkerboard,
-			std::vector<FineField>   &ext_subspace,
-			std::vector<CoarseField> &ext_coarse,
-			std::vector<RealD>       &ext_eval_fine,
-			std::vector<RealD>       &ext_eval_coarse
-			) :
-    _CoarseGrid(CoarseGrid),
-    _FineGrid(FineGrid),
-    _FineOp(FineOp),
-    _checkerboard(checkerboard),
-    evals_fine  (ext_eval_fine), 
-    evals_coarse(ext_eval_coarse),
-    subspace    (ext_subspace),
-    evec_coarse (ext_coarse)
-  {
-    evals_fine.resize(0);
-    evals_coarse.resize(0);
-  };
-
-  void Orthogonalise(void ) {
-    CoarseScalar InnerProd(_CoarseGrid);
-    std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
-    blockOrthogonalise(InnerProd,subspace);
-    std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
-    blockOrthogonalise(InnerProd,subspace);
-  };
-
-  template<typename T>  static RealD normalise(T& v) 
-  {
-    RealD nn = norm2(v);
-    nn = ::sqrt(nn);
-    v = v * (1.0/nn);
-    return nn;
-  }
-  /*
-  void fakeFine(void)
-  {
-    int Nk = nbasis;
-    subspace.resize(Nk,_FineGrid);
-    subspace[0]=1.0;
-    subspace[0].checkerboard=_checkerboard;
-    normalise(subspace[0]);
-    PlainHermOp<FineField>    Op(_FineOp);
-    for(int k=1;k<Nk;k++){
-      subspace[k].checkerboard=_checkerboard;
-      Op(subspace[k-1],subspace[k]);
-      normalise(subspace[k]);
-    }
-  }
-  */
-
-  void testFine(RealD resid) 
-  {
-    assert(evals_fine.size() == nbasis);
-    assert(subspace.size() == nbasis);
-    PlainHermOp<FineField>    Op(_FineOp);
-    ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
-    for(int k=0;k<nbasis;k++){
-      assert(SimpleTester.ReconstructEval(k,resid,subspace[k],evals_fine[k],1.0)==1);
-    }
-  }
-
-  void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax) 
-  {
-    assert(evals_fine.size() == nbasis);
-    assert(subspace.size() == nbasis);
-    //////////////////////////////////////////////////////////////////////////////////////////////////
-    // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
-    //////////////////////////////////////////////////////////////////////////////////////////////////
-    Chebyshev<FineField>                          ChebySmooth(cheby_smooth);
-    ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
-    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
-
-    for(int k=0;k<evec_coarse.size();k++){
-      if ( k < nbasis ) { 
-	assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1);
-      } else { 
-	assert(ChebySmoothTester.ReconstructEval(k,resid*relax,evec_coarse[k],evals_coarse[k],1.0)==1);
-      }
-    }
-  }
-
-  void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid, 
-		RealD MaxIt, RealD betastp, int MinRes)
-  {
-    assert(nbasis<=Nm);
-    Chebyshev<FineField>      Cheby(cheby_parms);
-    FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp);
-    PlainHermOp<FineField>    Op(_FineOp);
-
-    evals_fine.resize(Nm);
-    subspace.resize(Nm,_FineGrid);
-
-    ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
-
-    FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
-
-    int Nconv;
-    IRL.calc(evals_fine,subspace,src,Nconv,false);
-    
-    // Shrink down to number saved
-    assert(Nstop>=nbasis);
-    assert(Nconv>=nbasis);
-    evals_fine.resize(nbasis);
-    subspace.resize(nbasis,_FineGrid);
-  }
-  void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
-		  int Nstop, int Nk, int Nm,RealD resid, 
-		  RealD MaxIt, RealD betastp, int MinRes)
-  {
-    Chebyshev<FineField>                          Cheby(cheby_op);
-    ProjectedHermOp<Fobj,CComplex,nbasis>         Op(_FineOp,subspace);
-    ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
-    //////////////////////////////////////////////////////////////////////////////////////////////////
-    // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
-    //////////////////////////////////////////////////////////////////////////////////////////////////
-
-    Chebyshev<FineField>                                           ChebySmooth(cheby_smooth);
-    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
-
-    evals_coarse.resize(Nm);
-    evec_coarse.resize(Nm,_CoarseGrid);
-
-    CoarseField src(_CoarseGrid);     src=1.0; 
-
-    ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
-    int Nconv=0;
-    IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
-    assert(Nconv>=Nstop);
-    evals_coarse.resize(Nstop);
-    evec_coarse.resize (Nstop,_CoarseGrid);
-    for (int i=0;i<Nstop;i++){
-      std::cout << i << " Coarse eval = " << evals_coarse[i]  << std::endl;
-    }
-  }
-};
-
-}
-#endif
--- a/Grid/algorithms/iterative/MinimalResidual.h
+++ b/Grid/algorithms/iterative/MinimalResidual.h
@@ -1,156 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./lib/algorithms/iterative/MinimalResidual.h
-
-Copyright (C) 2015
-
-Author: Daniel Richtmann <daniel.richtmann@ur.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef GRID_MINIMAL_RESIDUAL_H
-#define GRID_MINIMAL_RESIDUAL_H
-
-namespace Grid {
-
-template<class Field> class MinimalResidual : public OperatorFunction<Field> {
- public:
-  bool ErrorOnNoConverge; // throw an assert when the MR fails to converge.
-                          // Defaults true.
-  RealD   Tolerance;
-  Integer MaxIterations;
-  RealD   overRelaxParam;
-  Integer IterationsToComplete; // Number of iterations the MR took to finish.
-                                // Filled in upon completion
-
-  MinimalResidual(RealD tol, Integer maxit, Real ovrelparam = 1.0, bool err_on_no_conv = true)
-    : Tolerance(tol), MaxIterations(maxit), overRelaxParam(ovrelparam), ErrorOnNoConverge(err_on_no_conv){};
-
-  void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
-
-    psi.checkerboard = src.checkerboard;
-    conformable(psi, src);
-
-    Complex a, c;
-    Real    d;
-
-    Field Mr(src);
-    Field r(src);
-
-    // Initial residual computation & set up
-    RealD guess = norm2(psi);
-    assert(std::isnan(guess) == 0);
-
-    RealD ssq = norm2(src);
-    RealD rsq = Tolerance * Tolerance * ssq;
-
-    Linop.Op(psi, Mr);
-
-    r = src - Mr;
-
-    RealD cp = norm2(r);
-
-    std::cout << std::setprecision(4) << std::scientific;
-    std::cout << GridLogIterative << "MinimalResidual: guess " << guess << std::endl;
-    std::cout << GridLogIterative << "MinimalResidual:   src " << ssq << std::endl;
-    std::cout << GridLogIterative << "MinimalResidual:    mp " << d << std::endl;
-    std::cout << GridLogIterative << "MinimalResidual:  cp,r " << cp << std::endl;
-
-    if (cp <= rsq) {
-      return;
-    }
-
-    std::cout << GridLogIterative << "MinimalResidual: k=0 residual " << cp << " target " << rsq << std::endl;
-
-    GridStopWatch LinalgTimer;
-    GridStopWatch MatrixTimer;
-    GridStopWatch SolverTimer;
-
-    SolverTimer.Start();
-    int k;
-    for (k = 1; k <= MaxIterations; k++) {
-
-      MatrixTimer.Start();
-      Linop.Op(r, Mr);
-      MatrixTimer.Stop();
-
-      LinalgTimer.Start();
-
-      c = innerProduct(Mr, r);
-
-      d = norm2(Mr);
-
-      a = c / d;
-
-      a = a * overRelaxParam;
-
-      psi = psi + r * a;
-
-      r = r - Mr * a;
-
-      cp = norm2(r);
-
-      LinalgTimer.Stop();
-
-      std::cout << GridLogIterative << "MinimalResidual: Iteration " << k
-                << " residual " << cp << " target " << rsq << std::endl;
-      std::cout << GridLogDebug << "a = " << a << " c = " << c << " d = " << d << std::endl;
-
-      // Stopping condition
-      if (cp <= rsq) {
-        SolverTimer.Stop();
-
-        Linop.Op(psi, Mr);
-        r = src - Mr;
-
-        RealD srcnorm       = sqrt(ssq);
-        RealD resnorm       = sqrt(norm2(r));
-        RealD true_residual = resnorm / srcnorm;
-
-        std::cout << GridLogMessage        << "MinimalResidual Converged on iteration " << k
-                  << " computed residual " << sqrt(cp / ssq)
-                  << " true residual "     << true_residual
-                  << " target "            << Tolerance << std::endl;
-
-        std::cout << GridLogMessage << "MR Time elapsed: Total   " << SolverTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "MR Time elapsed: Matrix  " << MatrixTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "MR Time elapsed: Linalg  " << LinalgTimer.Elapsed() << std::endl;
-
-        if (ErrorOnNoConverge)
-          assert(true_residual / Tolerance < 10000.0);
-
-        IterationsToComplete = k;
-
-        return;
-      }
-    }
-
-    std::cout << GridLogMessage << "MinimalResidual did NOT converge"
-              << std::endl;
-
-    if (ErrorOnNoConverge)
-      assert(0);
-
-    IterationsToComplete = k;
-  }
-};
-} // namespace Grid
-#endif
--- a/Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
+++ b/Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
@@ -1,273 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
-
-Copyright (C) 2015
-
-Author: Daniel Richtmann <daniel.richtmann@ur.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
-#define GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
-
-namespace Grid {
-
-template<class FieldD, class FieldF, typename std::enable_if<getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
-class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction<FieldD> {
- public:
-  bool ErrorOnNoConverge; // Throw an assert when MPFGMRES fails to converge,
-                          // defaults to true
-
-  RealD   Tolerance;
-
-  Integer MaxIterations;
-  Integer RestartLength;
-  Integer MaxNumberOfRestarts;
-  Integer IterationCount; // Number of iterations the MPFGMRES took to finish,
-                          // filled in upon completion
-
-  GridStopWatch MatrixTimer;
-  GridStopWatch PrecTimer;
-  GridStopWatch LinalgTimer;
-  GridStopWatch QrTimer;
-  GridStopWatch CompSolutionTimer;
-  GridStopWatch ChangePrecTimer;
-
-  Eigen::MatrixXcd H;
-
-  std::vector<std::complex<double>> y;
-  std::vector<std::complex<double>> gamma;
-  std::vector<std::complex<double>> c;
-  std::vector<std::complex<double>> s;
-
-  GridBase* SinglePrecGrid;
-
-  LinearFunction<FieldF> &Preconditioner;
-
-  MixedPrecisionFlexibleGeneralisedMinimalResidual(RealD   tol,
-                                                   Integer maxit,
-                                                   GridBase * sp_grid,
-                                                   LinearFunction<FieldF> &Prec,
-                                                   Integer restart_length,
-                                                   bool    err_on_no_conv = true)
-      : Tolerance(tol)
-      , MaxIterations(maxit)
-      , RestartLength(restart_length)
-      , MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
-      , ErrorOnNoConverge(err_on_no_conv)
-      , H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
-      , y(RestartLength + 1, 0.)
-      , gamma(RestartLength + 1, 0.)
-      , c(RestartLength + 1, 0.)
-      , s(RestartLength + 1, 0.)
-      , SinglePrecGrid(sp_grid)
-      , Preconditioner(Prec) {};
-
-  void operator()(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi) {
-
-    psi.checkerboard = src.checkerboard;
-    conformable(psi, src);
-
-    RealD guess = norm2(psi);
-    assert(std::isnan(guess) == 0);
-
-    RealD cp;
-    RealD ssq = norm2(src);
-    RealD rsq = Tolerance * Tolerance * ssq;
-
-    FieldD r(src._grid);
-
-    std::cout << std::setprecision(4) << std::scientific;
-    std::cout << GridLogIterative << "MPFGMRES: guess " << guess << std::endl;
-    std::cout << GridLogIterative << "MPFGMRES:   src " << ssq   << std::endl;
-
-    PrecTimer.Reset();
-    MatrixTimer.Reset();
-    LinalgTimer.Reset();
-    QrTimer.Reset();
-    CompSolutionTimer.Reset();
-    ChangePrecTimer.Reset();
-
-    GridStopWatch SolverTimer;
-    SolverTimer.Start();
-
-    IterationCount = 0;
-
-    for (int k=0; k<MaxNumberOfRestarts; k++) {
-
-      cp = outerLoopBody(LinOp, src, psi, rsq);
-
-      // Stopping condition
-      if (cp <= rsq) {
-
-        SolverTimer.Stop();
-
-        LinOp.Op(psi,r);
-        axpy(r,-1.0,src,r);
-
-        RealD srcnorm       = sqrt(ssq);
-        RealD resnorm       = sqrt(norm2(r));
-        RealD true_residual = resnorm / srcnorm;
-
-        std::cout << GridLogMessage        << "MPFGMRES: Converged on iteration " << IterationCount
-                  << " computed residual " << sqrt(cp / ssq)
-                  << " true residual "     << true_residual
-                  << " target "            << Tolerance << std::endl;
-
-        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Total      " <<       SolverTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Precon     " <<         PrecTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Matrix     " <<       MatrixTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "MPFGMRES Time elapsed: Linalg     " <<       LinalgTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "MPFGMRES Time elapsed: QR         " <<           QrTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "MPFGMRES Time elapsed: CompSol    " << CompSolutionTimer.Elapsed() << std::endl;
-        std::cout << GridLogMessage << "MPFGMRES Time elapsed: PrecChange " <<   ChangePrecTimer.Elapsed() << std::endl;
-        return;
-      }
-    }
-
-    std::cout << GridLogMessage << "MPFGMRES did NOT converge" << std::endl;
-
-    if (ErrorOnNoConverge)
-      assert(0);
-  }
-
-  RealD outerLoopBody(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi, RealD rsq) {
-
-    RealD cp = 0;
-
-    FieldD w(src._grid);
-    FieldD r(src._grid);
-
-    // these should probably be made class members so that they are only allocated once, not in every restart
-    std::vector<FieldD> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
-    std::vector<FieldD> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
-
-    MatrixTimer.Start();
-    LinOp.Op(psi, w);
-    MatrixTimer.Stop();
-
-    LinalgTimer.Start();
-    r = src - w;
-
-    gamma[0] = sqrt(norm2(r));
-
-    v[0] = (1. / gamma[0]) * r;
-    LinalgTimer.Stop();
-
-    for (int i=0; i<RestartLength; i++) {
-
-      IterationCount++;
-
-      arnoldiStep(LinOp, v, z, w, i);
-
-      qrUpdate(i);
-
-      cp = std::norm(gamma[i+1]);
-
-      std::cout << GridLogIterative << "MPFGMRES: Iteration " << IterationCount
-                << " residual " << cp << " target " << rsq << std::endl;
-
-      if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
-
-        computeSolution(z, psi, i);
-
-        return cp;
-      }
-    }
-
-    assert(0); // Never reached
-    return cp;
-  }
-
-  void arnoldiStep(LinearOperatorBase<FieldD> &LinOp, std::vector<FieldD> &v, std::vector<FieldD> &z, FieldD &w, int iter) {
-
-    FieldF v_f(SinglePrecGrid);
-    FieldF z_f(SinglePrecGrid);
-
-    ChangePrecTimer.Start();
-    precisionChange(v_f, v[iter]);
-    precisionChange(z_f, z[iter]);
-    ChangePrecTimer.Stop();
-
-    PrecTimer.Start();
-    Preconditioner(v_f, z_f);
-    PrecTimer.Stop();
-
-    ChangePrecTimer.Start();
-    precisionChange(z[iter], z_f);
-    ChangePrecTimer.Stop();
-
-    MatrixTimer.Start();
-    LinOp.Op(z[iter], w);
-    MatrixTimer.Stop();
-
-    LinalgTimer.Start();
-    for (int i = 0; i <= iter; ++i) {
-      H(iter, i) = innerProduct(v[i], w);
-      w = w - H(iter, i) * v[i];
-    }
-
-    H(iter, iter + 1) = sqrt(norm2(w));
-    v[iter + 1] = (1. / H(iter, iter + 1)) * w;
-    LinalgTimer.Stop();
-  }
-
-  void qrUpdate(int iter) {
-
-    QrTimer.Start();
-    for (int i = 0; i < iter ; ++i) {
-      auto tmp       = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
-      H(iter, i)     = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
-      H(iter, i + 1) = tmp;
-    }
-
-    // Compute new Givens Rotation
-    ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
-    c[iter]     = H(iter, iter) / nu;
-    s[iter]     = H(iter, iter + 1) / nu;
-
-    // Apply new Givens rotation
-    H(iter, iter)     = nu;
-    H(iter, iter + 1) = 0.;
-
-    gamma[iter + 1] = -s[iter] * gamma[iter];
-    gamma[iter]     = std::conj(c[iter]) * gamma[iter];
-    QrTimer.Stop();
-  }
-
-  void computeSolution(std::vector<FieldD> const &z, FieldD &psi, int iter) {
-
-    CompSolutionTimer.Start();
-    for (int i = iter; i >= 0; i--) {
-      y[i] = gamma[i];
-      for (int k = i + 1; k <= iter; k++)
-        y[i] = y[i] - H(k, i) * y[k];
-      y[i] = y[i] / H(i, i);
-    }
-
-    for (int i = 0; i <= iter; i++)
-      psi = psi + z[i] * y[i];
-    CompSolutionTimer.Stop();
-  }
-};
-}
-#endif
--- a/Grid/algorithms/iterative/PowerMethod.h
+++ b/Grid/algorithms/iterative/PowerMethod.h
@@ -1,45 +0,0 @@
-#pragma once
-namespace Grid {
-template<class Field> class PowerMethod  
-{ 
- public: 
-
-  template<typename T>  static RealD normalise(T& v) 
-  {
-    RealD nn = norm2(v);
-    nn = sqrt(nn);
-    v = v * (1.0/nn);
-    return nn;
-  }
-
-  RealD operator()(LinearOperatorBase<Field> &HermOp, const Field &src) 
-  { 
-    GridBase *grid = src._grid; 
-    
-    // quickly get an idea of the largest eigenvalue to more properly normalize the residuum 
-    RealD evalMaxApprox = 0.0; 
-    auto src_n = src; 
-    auto tmp = src; 
-    const int _MAX_ITER_EST_ = 50; 
-
-    for (int i=0;i<_MAX_ITER_EST_;i++) { 
-      
-      normalise(src_n); 
-      HermOp.HermOp(src_n,tmp); 
-      RealD vnum = real(innerProduct(src_n,tmp)); // HermOp. 
-      RealD vden = norm2(src_n); 
-      RealD na = vnum/vden; 
-      
-      if ( (fabs(evalMaxApprox/na - 1.0) < 0.01) || (i==_MAX_ITER_EST_-1) ) { 
- 	evalMaxApprox = na; 
- 	return evalMaxApprox; 
-      } 
-      evalMaxApprox = na; 
-      std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
-      src_n = tmp;
-    }
-    assert(0);
-    return 0;
-  }
-};
-}
--- a/Grid/algorithms/iterative/SchurRedBlack.h
+++ b/Grid/algorithms/iterative/SchurRedBlack.h
@@ -1,486 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/SchurRedBlack.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#ifndef GRID_SCHUR_RED_BLACK_H
-#define GRID_SCHUR_RED_BLACK_H
-
-
-  /*
-   * Red black Schur decomposition
-   *
-   *  M = (Mee Meo) =  (1             0 )   (Mee   0               )  (1 Mee^{-1} Meo)
-   *      (Moe Moo)    (Moe Mee^-1    1 )   (0   Moo-Moe Mee^-1 Meo)  (0   1         )
-   *                =         L                     D                     U
-   *
-   * L^-1 = (1              0 )
-   *        (-MoeMee^{-1}   1 )   
-   * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} )
-   *           ( 0       1                    )
-   * L^{-d}  = ( 1      -Mee^{-dag} Moe^{dag} )
-   *           ( 0       1                    )
-   *
-   * U^-1 = (1   -Mee^{-1} Meo)
-   *        (0    1           )
-   * U^{dag} = ( 1                 0)
-   *           (Meo^dag Mee^{-dag} 1)
-   * U^{-dag} = (  1                 0)
-   *            (-Meo^dag Mee^{-dag} 1)
-   ***********************
-   *     M psi = eta
-   ***********************
-   *Odd
-   * i)                 D_oo psi_o =  L^{-1}  eta_o
-   *                        eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
-   *
-   * Wilson:
-   *      (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1}  eta_o
-   * Stag:
-   *      D_oo psi_o = L^{-1}  eta =    (eta_o - Moe Mee^{-1} eta_e)
-   *
-   * L^-1 eta_o= (1              0 ) (e
-   *             (-MoeMee^{-1}   1 )   
-   *
-   *Even
-   * ii)  Mee psi_e + Meo psi_o = src_e
-   *
-   *   => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
-   *
-   * 
-   * TODO: Other options:
-   * 
-   * a) change checkerboards for Schur e<->o
-   *
-   * Left precon by Moo^-1
-   * b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 =  (D_oo)^dag M_oo^-dag Moo^-1 L^{-1}  eta_o
-   *                              eta_o'     = (D_oo)^dag  M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
-   *
-   * Right precon by Moo^-1
-   * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1}  eta_o
-   *                              eta_o'     = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
-   *                              psi_o = M_oo^-1 phi_o
-   * TODO: Deflation 
-   */
-namespace Grid {
-
-  ///////////////////////////////////////////////////////////////////////////////////////////////////////
-  // Use base class to share code
-  ///////////////////////////////////////////////////////////////////////////////////////////////////////
-  ///////////////////////////////////////////////////////////////////////////////////////////////////////
-  // Take a matrix and form a Red Black solver calling a Herm solver
-  // Use of RB info prevents making SchurRedBlackSolve conform to standard interface
-  ///////////////////////////////////////////////////////////////////////////////////////////////////////
-  template<class Field> class SchurRedBlackBase {
-  protected:
-    typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
-    OperatorFunction<Field> & _HermitianRBSolver;
-    int CBfactorise;
-    bool subGuess;
-    bool useSolnAsInitGuess; // if true user-supplied solution vector is used as initial guess for solver
-  public:
-
-    SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
-        const bool _solnAsInitGuess = false)  :
-    _HermitianRBSolver(HermitianRBSolver),
-    useSolnAsInitGuess(_solnAsInitGuess)
-    { 
-      CBfactorise = 0;
-      subtractGuess(initSubGuess);
-    };
-    void subtractGuess(const bool initSubGuess)
-    {
-      subGuess = initSubGuess;
-    }
-    bool isSubtractGuess(void)
-    {
-      return subGuess;
-    }
-
-    /////////////////////////////////////////////////////////////
-    // Shared code
-    /////////////////////////////////////////////////////////////
-    void operator() (Matrix & _Matrix,const Field &in, Field &out){
-      ZeroGuesser<Field> guess;
-      (*this)(_Matrix,in,out,guess);
-    }
-    void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out) 
-    {
-      ZeroGuesser<Field> guess;
-      (*this)(_Matrix,in,out,guess);
-    }
-
-    template<class Guesser>
-    void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess) 
-    {
-      GridBase *grid = _Matrix.RedBlackGrid();
-      GridBase *fgrid= _Matrix.Grid();
-      int nblock = in.size();
-
-      std::vector<Field> src_o(nblock,grid);
-      std::vector<Field> sol_o(nblock,grid);
-      
-      std::vector<Field> guess_save;
-
-      Field resid(fgrid);
-      Field tmp(grid);
-
-      ////////////////////////////////////////////////
-      // Prepare RedBlack source
-      ////////////////////////////////////////////////
-      for(int b=0;b<nblock;b++){
-	RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
-      }
-      ////////////////////////////////////////////////
-      // Make the guesses
-      ////////////////////////////////////////////////
-      if ( subGuess ) guess_save.resize(nblock,grid);
-
-      for(int b=0;b<nblock;b++){
-        if(useSolnAsInitGuess) {
-          pickCheckerboard(Odd, sol_o[b], out[b]);
-        } else {
-          guess(src_o[b],sol_o[b]); 
-        }
-
-	if ( subGuess ) { 
-	  guess_save[b] = sol_o[b];
-	}
-      }
-      //////////////////////////////////////////////////////////////
-      // Call the block solver
-      //////////////////////////////////////////////////////////////
-      std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl;
-      RedBlackSolve(_Matrix,src_o,sol_o);
-
-      ////////////////////////////////////////////////
-      // A2A boolean behavioural control & reconstruct other checkerboard
-      ////////////////////////////////////////////////
-      for(int b=0;b<nblock;b++) {
-
-	if (subGuess)   sol_o[b] = sol_o[b] - guess_save[b];
-
-	///////// Needs even source //////////////
-	pickCheckerboard(Even,tmp,in[b]);
-	RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
-
-	/////////////////////////////////////////////////
-	// Check unprec residual if possible
-	/////////////////////////////////////////////////
-	if ( ! subGuess ) {
-	  _Matrix.M(out[b],resid); 
-	  resid = resid-in[b];
-	  RealD ns = norm2(in[b]);
-	  RealD nr = norm2(resid);
-	
-	  std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl;
-	} else {
-	  std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl;
-	}
-
-      }
-    }
-    template<class Guesser>
-    void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
-
-      // FIXME CGdiagonalMee not implemented virtual function
-      // FIXME use CBfactorise to control schur decomp
-      GridBase *grid = _Matrix.RedBlackGrid();
-      GridBase *fgrid= _Matrix.Grid();
-
-      Field resid(fgrid);
-      Field src_o(grid);
-      Field src_e(grid);
-      Field sol_o(grid);
-
-      ////////////////////////////////////////////////
-      // RedBlack source
-      ////////////////////////////////////////////////
-      RedBlackSource(_Matrix,in,src_e,src_o);
-
-      ////////////////////////////////
-      // Construct the guess
-      ////////////////////////////////
-      if(useSolnAsInitGuess) {
-        pickCheckerboard(Odd, sol_o, out);
-      } else {
-        guess(src_o,sol_o);
-      }
-
-      Field  guess_save(grid);
-      guess_save = sol_o;
-
-      //////////////////////////////////////////////////////////////
-      // Call the red-black solver
-      //////////////////////////////////////////////////////////////
-      RedBlackSolve(_Matrix,src_o,sol_o);
-
-      ////////////////////////////////////////////////
-      // Fionn A2A boolean behavioural control
-      ////////////////////////////////////////////////
-      if (subGuess)      sol_o= sol_o-guess_save;
-
-      ///////////////////////////////////////////////////
-      // RedBlack solution needs the even source
-      ///////////////////////////////////////////////////
-      RedBlackSolution(_Matrix,sol_o,src_e,out);
-
-      // Verify the unprec residual
-      if ( ! subGuess ) {
-        _Matrix.M(out,resid); 
-        resid = resid-in;
-        RealD ns = norm2(in);
-        RealD nr = norm2(resid);
-
-        std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl;
-      } else {
-        std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl;
-      }
-    }     
-    
-    /////////////////////////////////////////////////////////////
-    // Override in derived. 
-    /////////////////////////////////////////////////////////////
-    virtual void RedBlackSource  (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)                =0;
-    virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)          =0;
-    virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)                           =0;
-    virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o)=0;
-
-  };
-
-  template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> {
-  public:
-    typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
-
-    SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
-        const bool _solnAsInitGuess = false) 
-      :    SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess,_solnAsInitGuess) 
-    {
-    }
-
-    //////////////////////////////////////////////////////
-    // Override RedBlack specialisation
-    //////////////////////////////////////////////////////
-    virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
-    {
-      GridBase *grid = _Matrix.RedBlackGrid();
-      GridBase *fgrid= _Matrix.Grid();
-
-      Field   tmp(grid);
-      Field  Mtmp(grid);
-
-      pickCheckerboard(Even,src_e,src);
-      pickCheckerboard(Odd ,src_o,src);
-
-      /////////////////////////////////////////////////////
-      // src_o = (source_o - Moe MeeInv source_e)
-      /////////////////////////////////////////////////////
-      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
-      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
-      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     
-
-      _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
-    }
-    virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol)
-    {
-      GridBase *grid = _Matrix.RedBlackGrid();
-      GridBase *fgrid= _Matrix.Grid();
-
-      Field   tmp(grid);
-      Field   sol_e(grid);
-      Field   src_e(grid);
-
-      src_e = src_e_c; // Const correctness
-
-      ///////////////////////////////////////////////////
-      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
-      ///////////////////////////////////////////////////
-      _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even);
-      src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even);
-      _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even);
-     
-      setCheckerboard(sol,sol_e); assert(  sol_e.checkerboard ==Even);
-      setCheckerboard(sol,sol_o); assert(  sol_o.checkerboard ==Odd );
-    }
-    virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)
-    {
-      SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
-      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd);
-    };
-    virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o)
-    {
-      SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
-      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); 
-    }
-  };
-  template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
-
-  ///////////////////////////////////////////////////////////////////////////////////////////////////////
-  // Site diagonal has Mooee on it.
-  ///////////////////////////////////////////////////////////////////////////////////////////////////////
-  template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> {
-  public:
-    typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
-
-    SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
-        const bool _solnAsInitGuess = false)  
-      : SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess,_solnAsInitGuess) {};
-
-
-    //////////////////////////////////////////////////////
-    // Override RedBlack specialisation
-    //////////////////////////////////////////////////////
-    virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
-    {
-      GridBase *grid = _Matrix.RedBlackGrid();
-      GridBase *fgrid= _Matrix.Grid();
-
-      Field   tmp(grid);
-      Field  Mtmp(grid);
-
-      pickCheckerboard(Even,src_e,src);
-      pickCheckerboard(Odd ,src_o,src);
-
-      /////////////////////////////////////////////////////
-      // src_o = Mdag * (source_o - Moe MeeInv source_e)
-      /////////////////////////////////////////////////////
-      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
-      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
-      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     
-
-      // get the right MpcDag
-      SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
-      _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);       
-
-    }
-    virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
-    {
-      GridBase *grid = _Matrix.RedBlackGrid();
-      GridBase *fgrid= _Matrix.Grid();
-
-      Field   tmp(grid);
-      Field  sol_e(grid);
-      Field  src_e_i(grid);
-      ///////////////////////////////////////////////////
-      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
-      ///////////////////////////////////////////////////
-      _Matrix.Meooe(sol_o,tmp);          assert(  tmp.checkerboard   ==Even);
-      src_e_i = src_e-tmp;               assert(  src_e_i.checkerboard ==Even);
-      _Matrix.MooeeInv(src_e_i,sol_e);   assert(  sol_e.checkerboard ==Even);
-     
-      setCheckerboard(sol,sol_e); assert(  sol_e.checkerboard ==Even);
-      setCheckerboard(sol,sol_o); assert(  sol_o.checkerboard ==Odd );
-    }
-    virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)
-    {
-      SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
-      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd);
-    };
-    virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o)
-    {
-      SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
-      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); 
-    }
-  };
-
-  ///////////////////////////////////////////////////////////////////////////////////////////////////////
-  // Site diagonal is identity, right preconditioned by Mee^inv
-  // ( 1 - Meo Moo^inv Moe Mee^inv  ) phi =( 1 - Meo Moo^inv Moe Mee^inv  ) Mee psi =  = eta  = eta
-  //=> psi = MeeInv phi
-  ///////////////////////////////////////////////////////////////////////////////////////////////////////
-  template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> {
-  public:
-    typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
-
-    /////////////////////////////////////////////////////
-    // Wrap the usual normal equations Schur trick
-    /////////////////////////////////////////////////////
-  SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
-      const bool _solnAsInitGuess = false)  
-    : SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess,_solnAsInitGuess) {};
-
-    virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
-    {
-      GridBase *grid = _Matrix.RedBlackGrid();
-      GridBase *fgrid= _Matrix.Grid();
-
-      SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
-      
-      Field   tmp(grid);
-      Field  Mtmp(grid);
-
-      pickCheckerboard(Even,src_e,src);
-      pickCheckerboard(Odd ,src_o,src);
-    
-      /////////////////////////////////////////////////////
-      // src_o = Mdag * (source_o - Moe MeeInv source_e)
-      /////////////////////////////////////////////////////
-      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
-      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
-      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     
-
-      // get the right MpcDag
-      _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);       
-    }
-
-    virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
-    {
-      GridBase *grid = _Matrix.RedBlackGrid();
-      GridBase *fgrid= _Matrix.Grid();
-
-      Field   sol_o_i(grid);
-      Field   tmp(grid);
-      Field   sol_e(grid);
-
-      ////////////////////////////////////////////////
-      // MooeeInv due to pecond
-      ////////////////////////////////////////////////
-      _Matrix.MooeeInv(sol_o,tmp);
-      sol_o_i = tmp;
-
-      ///////////////////////////////////////////////////
-      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
-      ///////////////////////////////////////////////////
-      _Matrix.Meooe(sol_o_i,tmp);    assert(  tmp.checkerboard   ==Even);
-      tmp = src_e-tmp;               assert(  src_e.checkerboard ==Even);
-      _Matrix.MooeeInv(tmp,sol_e);   assert(  sol_e.checkerboard ==Even);
-     
-      setCheckerboard(sol,sol_e);    assert(  sol_e.checkerboard ==Even);
-      setCheckerboard(sol,sol_o_i);  assert(  sol_o_i.checkerboard ==Odd );
-    };
-
-    virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)
-    {
-      SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
-      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
-    };
-    virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o)
-    {
-      SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
-      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); 
-    }
-  };
-}
-#endif
--- a/Grid/communicator/Communicator_base.cc
+++ b/Grid/communicator/Communicator_base.cc
@@ -1,76 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/communicator/Communicator_none.cc
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/GridCore.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <limits.h>
-#include <sys/mman.h>
-
-namespace Grid {
-
-///////////////////////////////////////////////////////////////
-// Info that is setup once and indept of cartesian layout
-///////////////////////////////////////////////////////////////
-CartesianCommunicator::CommunicatorPolicy_t  
-CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
-int CartesianCommunicator::nCommThreads = -1;
-
-/////////////////////////////////
-// Grid information queries
-/////////////////////////////////
-int                      CartesianCommunicator::Dimensions(void)        { return _ndimension; };
-int                      CartesianCommunicator::IsBoss(void)            { return _processor==0; };
-int                      CartesianCommunicator::BossRank(void)          { return 0; };
-int                      CartesianCommunicator::ThisRank(void)          { return _processor; };
-const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return _processor_coor; };
-const std::vector<int> & CartesianCommunicator::ProcessorGrid(void)     { return _processors; };
-int                      CartesianCommunicator::ProcessorCount(void)    { return _Nprocessors; };
-
-////////////////////////////////////////////////////////////////////////////////
-// very VERY rarely (Log, serial RNG) we need world without a grid
-////////////////////////////////////////////////////////////////////////////////
-
-void CartesianCommunicator::GlobalSum(ComplexF &c)
-{
-  GlobalSumVector((float *)&c,2);
-}
-void CartesianCommunicator::GlobalSumVector(ComplexF *c,int N)
-{
-  GlobalSumVector((float *)c,2*N);
-}
-void CartesianCommunicator::GlobalSum(ComplexD &c)
-{
-  GlobalSumVector((double *)&c,2);
-}
-void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
-{
-  GlobalSumVector((double *)c,2*N);
-}
-  
-}
-
--- a/Grid/communicator/Communicator_mpi3.cc
+++ b/Grid/communicator/Communicator_mpi3.cc
@@ -1,509 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/communicator/Communicator_mpi.cc
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/GridCore.h>
-#include <Grid/communicator/SharedMemory.h>
-
-namespace Grid {
-
-Grid_MPI_Comm       CartesianCommunicator::communicator_world;
-
-////////////////////////////////////////////
-// First initialise of comms system
-////////////////////////////////////////////
-void CartesianCommunicator::Init(int *argc, char ***argv) 
-{
-
-  int flag;
-  int provided;
-
-  MPI_Initialized(&flag); // needed to coexist with other libs apparently
-  if ( !flag ) {
-    MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
-    //If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
-    if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) ||
-        (nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) )
-      assert(0);
-  }
-
-  // Never clean up as done once.
-  MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
-
-  Grid_quiesce_nodes();
-  GlobalSharedMemory::Init(communicator_world);
-  GlobalSharedMemory::SharedMemoryAllocate(
-		   GlobalSharedMemory::MAX_MPI_SHM_BYTES,
-		   GlobalSharedMemory::Hugepages);
-  Grid_unquiesce_nodes();
-}
-
-///////////////////////////////////////////////////////////////////////////
-// Use cartesian communicators now even in MPI3
-///////////////////////////////////////////////////////////////////////////
-void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
-{
-  int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
-  assert(ierr==0);
-}
-int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
-{
-  int rank;
-  int ierr=MPI_Cart_rank  (communicator, &coor[0], &rank);
-  assert(ierr==0);
-  return rank;
-}
-void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
-{
-  coor.resize(_ndimension);
-  int ierr=MPI_Cart_coords  (communicator, rank, _ndimension,&coor[0]);
-  assert(ierr==0);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////
-// Initialises from communicator_world
-////////////////////////////////////////////////////////////////////////////////////////////////////////
-CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) 
-{
-  MPI_Comm optimal_comm;
-  ////////////////////////////////////////////////////
-  // Remap using the shared memory optimising routine
-  // The remap creates a comm which must be freed
-  ////////////////////////////////////////////////////
-  GlobalSharedMemory::OptimalCommunicator    (processors,optimal_comm);
-  InitFromMPICommunicator(processors,optimal_comm);
-  SetCommunicator(optimal_comm);
-  ///////////////////////////////////////////////////
-  // Free the temp communicator
-  ///////////////////////////////////////////////////
-  MPI_Comm_free(&optimal_comm);
-}
-
-//////////////////////////////////
-// Try to subdivide communicator
-//////////////////////////////////
-CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)    
-{
-  _ndimension = processors.size();  assert(_ndimension>=1);
-  int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
-  std::vector<int> parent_processor_coor(_ndimension,0);
-  std::vector<int> parent_processors    (_ndimension,1);
-
-  // Can make 5d grid from 4d etc...
-  int pad = _ndimension-parent_ndimension;
-  for(int d=0;d<parent_ndimension;d++){
-    parent_processor_coor[pad+d]=parent._processor_coor[d];
-    parent_processors    [pad+d]=parent._processors[d];
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////////
-  // split the communicator
-  //////////////////////////////////////////////////////////////////////////////////////////////////////
-  //  int Nparent = parent._processors ; 
-  int Nparent;
-  MPI_Comm_size(parent.communicator,&Nparent);
-
-  int childsize=1;
-  for(int d=0;d<processors.size();d++) {
-    childsize *= processors[d];
-  }
-  int Nchild = Nparent/childsize;
-  assert (childsize * Nchild == Nparent);
-
-  std::vector<int> ccoor(_ndimension); // coor within subcommunicator
-  std::vector<int> scoor(_ndimension); // coor of split within parent
-  std::vector<int> ssize(_ndimension); // coor of split within parent
-
-  for(int d=0;d<_ndimension;d++){
-    ccoor[d] = parent_processor_coor[d] % processors[d];
-    scoor[d] = parent_processor_coor[d] / processors[d];
-    ssize[d] = parent_processors[d]     / processors[d];
-  }
-
-  // rank within subcomm ; srank is rank of subcomm within blocks of subcomms
-  int crank;  
-  // Mpi uses the reverse Lexico convention to us; so reversed routines called
-  Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions
-  Lexicographic::IndexFromCoorReversed(scoor,srank,ssize);      // ssize is the number of split grids
-
-  MPI_Comm comm_split;
-  if ( Nchild > 1 ) { 
-
-    if(0){
-      std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
-      std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"]    ";
-      for(int d=0;d<parent._ndimension;d++)  std::cout << parent._processors[d] << " ";
-      std::cout<<std::endl;
-      
-      std::cout << GridLogMessage<<" child grid["<< _ndimension <<"]    ";
-      for(int d=0;d<processors.size();d++)  std::cout << processors[d] << " ";
-      std::cout<<std::endl;
-      
-      std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"]    ";
-      for(int d=0;d<parent._ndimension;d++)  std::cout << parent._processor_coor[d] << " ";
-      std::cout<<std::endl;
-      
-      std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"]    ";
-      for(int d=0;d<processors.size();d++)  std::cout << scoor[d] << " ";
-      std::cout<<std::endl;
-      
-      std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"]    ";
-      for(int d=0;d<processors.size();d++)  std::cout << ccoor[d] << " ";
-      std::cout<<std::endl;
-
-      //////////////////////////////////////////////////////////////////////////////////////////////////////
-      // Declare victory
-      //////////////////////////////////////////////////////////////////////////////////////////////////////
-      std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
-		<< Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
-      std::cout << " Split communicator " <<comm_split <<std::endl;
-    }
-
-    ////////////////////////////////////////////////////////////////
-    // Split the communicator
-    ////////////////////////////////////////////////////////////////
-    int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
-    assert(ierr==0);
-
-  } else {
-    srank = 0;
-    int ierr = MPI_Comm_dup (parent.communicator,&comm_split);
-    assert(ierr==0);
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////////
-  // Set up from the new split communicator
-  //////////////////////////////////////////////////////////////////////////////////////////////////////
-  InitFromMPICommunicator(processors,comm_split);
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////////
-  // Take the right SHM buffers
-  //////////////////////////////////////////////////////////////////////////////////////////////////////
-  SetCommunicator(comm_split);
-  
-  ///////////////////////////////////////////////
-  // Free the temp communicator 
-  ///////////////////////////////////////////////
-  MPI_Comm_free(&comm_split);
-
-  if(0){ 
-    std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
-    for(int d=0;d<processors.size();d++){
-      std::cout << d<< " " << _processor_coor[d] <<" " <<  ccoor[d]<<std::endl;
-    }
-  }
-  for(int d=0;d<processors.size();d++){
-    assert(_processor_coor[d] == ccoor[d] );
-  }
-}
-
-void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
-{
-  ////////////////////////////////////////////////////
-  // Creates communicator, and the communicator_halo
-  ////////////////////////////////////////////////////
-  _ndimension = processors.size();
-  _processor_coor.resize(_ndimension);
-
-  /////////////////////////////////
-  // Count the requested nodes
-  /////////////////////////////////
-  _Nprocessors=1;
-  _processors = processors;
-  for(int i=0;i<_ndimension;i++){
-    _Nprocessors*=_processors[i];
-  }
-
-  std::vector<int> periodic(_ndimension,1);
-  MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator);
-  MPI_Comm_rank(communicator,&_processor);
-  MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
-
-  if ( 0 && (communicator_base != communicator_world) ) {
-    std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
-    std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";
-    for(int d=0;d<_processors.size();d++){
-      std::cout << _processor_coor[d]<<" ";
-    }
-    std::cout << std::endl;
-  }
-
-  int Size;
-  MPI_Comm_size(communicator,&Size);
-
-  communicator_halo.resize (2*_ndimension);
-  for(int i=0;i<_ndimension*2;i++){
-    MPI_Comm_dup(communicator,&communicator_halo[i]);
-  }
-  assert(Size==_Nprocessors);
-}
-
-CartesianCommunicator::~CartesianCommunicator()
-{
-  int MPI_is_finalised;
-  MPI_Finalized(&MPI_is_finalised);
-  if (communicator && !MPI_is_finalised) {
-    MPI_Comm_free(&communicator);
-    for(int i=0;i<communicator_halo.size();i++){
-      MPI_Comm_free(&communicator_halo[i]);
-    }
-  }  
-}
-void CartesianCommunicator::GlobalSum(uint32_t &u){
-  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
-  assert(ierr==0);
-}
-void CartesianCommunicator::GlobalSum(uint64_t &u){
-  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
-  assert(ierr==0);
-}
-void CartesianCommunicator::GlobalXOR(uint32_t &u){
-  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
-  assert(ierr==0);
-}
-void CartesianCommunicator::GlobalXOR(uint64_t &u){
-  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
-  assert(ierr==0);
-}
-void CartesianCommunicator::GlobalSum(float &f){
-  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
-  assert(ierr==0);
-}
-void CartesianCommunicator::GlobalSumVector(float *f,int N)
-{
-  int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
-  assert(ierr==0);
-}
-void CartesianCommunicator::GlobalSum(double &d)
-{
-  int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
-  assert(ierr==0);
-}
-void CartesianCommunicator::GlobalSumVector(double *d,int N)
-{
-  int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
-  assert(ierr==0);
-}
-// Basic Halo comms primitive
-void CartesianCommunicator::SendToRecvFrom(void *xmit,
-					   int dest,
-					   void *recv,
-					   int from,
-					   int bytes)
-{
-  std::vector<CommsRequest_t> reqs(0);
-  //    unsigned long  xcrc = crc32(0L, Z_NULL, 0);
-  //    unsigned long  rcrc = crc32(0L, Z_NULL, 0);
-  //    xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
-  SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
-  SendToRecvFromComplete(reqs);
-  //    rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
-  //    printf("proc %d SendToRecvFrom %d bytes %lx %lx\n",_processor,bytes,xcrc,rcrc);
-}
-void CartesianCommunicator::SendRecvPacket(void *xmit,
-					   void *recv,
-					   int sender,
-					   int receiver,
-					   int bytes)
-{
-  MPI_Status stat;
-  assert(sender != receiver);
-  int tag = sender;
-  if ( _processor == sender ) {
-    MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
-  }
-  if ( _processor == receiver ) { 
-    MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
-  }
-}
-// Basic Halo comms primitive
-void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
-						void *xmit,
-						int dest,
-						void *recv,
-						int from,
-						int bytes)
-{
-  int myrank = _processor;
-  int ierr;
-
-  if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { 
-    MPI_Request xrq;
-    MPI_Request rrq;
-
-    ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
-    ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
-    
-    assert(ierr==0);
-    list.push_back(xrq);
-    list.push_back(rrq);
-  } else { 
-    // Give the CPU to MPI immediately; can use threads to overlap optionally
-    ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
-		      recv,bytes,MPI_CHAR,from, from,
-		      communicator,MPI_STATUS_IGNORE);
-    assert(ierr==0);
-  }
-}
-
-double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
-						     int dest,
-						     void *recv,
-						     int from,
-						     int bytes,int dir)
-{
-  std::vector<CommsRequest_t> list;
-  double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
-  StencilSendToRecvFromComplete(list,dir);
-  return offbytes;
-}
-
-double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
-							 void *xmit,
-							 int dest,
-							 void *recv,
-							 int from,
-							 int bytes,int dir)
-{
-  int ncomm  =communicator_halo.size(); 
-  int commdir=dir%ncomm;
-
-  MPI_Request xrq;
-  MPI_Request rrq;
-
-  int ierr;
-  int gdest = ShmRanks[dest];
-  int gfrom = ShmRanks[from];
-  int gme   = ShmRanks[_processor];
-
-  assert(dest != _processor);
-  assert(from != _processor);
-  assert(gme  == ShmRank);
-  double off_node_bytes=0.0;
-
-  if ( gfrom ==MPI_UNDEFINED) {
-    ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[commdir],&rrq);
-    assert(ierr==0);
-    list.push_back(rrq);
-    off_node_bytes+=bytes;
-  }
-
-  if ( gdest == MPI_UNDEFINED ) {
-    ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[commdir],&xrq);
-    assert(ierr==0);
-    list.push_back(xrq);
-    off_node_bytes+=bytes;
-  }
-
-  if ( CommunicatorPolicy == CommunicatorPolicySequential ) { 
-    this->StencilSendToRecvFromComplete(list,dir);
-  }
-
-  return off_node_bytes;
-}
-void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
-{
-  SendToRecvFromComplete(waitall);
-}
-void CartesianCommunicator::StencilBarrier(void)
-{
-  MPI_Barrier  (ShmComm);
-}
-void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
-{
-  int nreq=list.size();
-
-  if (nreq==0) return;
-
-  std::vector<MPI_Status> status(nreq);
-  int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
-  assert(ierr==0);
-  list.resize(0);
-}
-void CartesianCommunicator::Barrier(void)
-{
-  int ierr = MPI_Barrier(communicator);
-  assert(ierr==0);
-}
-void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
-{
-  int ierr=MPI_Bcast(data,
-		     bytes,
-		     MPI_BYTE,
-		     root,
-		     communicator);
-  assert(ierr==0);
-}
-int CartesianCommunicator::RankWorld(void){ 
-  int r; 
-  MPI_Comm_rank(communicator_world,&r);
-  return r;
-}
-void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
-{
-  int ierr= MPI_Bcast(data,
-		      bytes,
-		      MPI_BYTE,
-		      root,
-		      communicator_world);
-  assert(ierr==0);
-}
-
-void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,uint64_t words,uint64_t bytes)
-{
-  std::vector<int> row(_ndimension,1);
-  assert(dim>=0 && dim<_ndimension);
-
-  //  Split the communicator
-  row[dim] = _processors[dim];
-
-  int me;
-  CartesianCommunicator Comm(row,*this,me);
-  Comm.AllToAll(in,out,words,bytes);
-}
-void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t bytes)
-{
-  // MPI is a pain and uses "int" arguments
-  // 64*64*64*128*16 == 500Million elements of data.
-  // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
-  // (Turns up on 32^3 x 64 Gparity too)
-  MPI_Datatype object;
-  int iwords; 
-  int ibytes;
-  iwords = words;
-  ibytes = bytes;
-  assert(words == iwords); // safe to cast to int ?
-  assert(bytes == ibytes); // safe to cast to int ?
-  MPI_Type_contiguous(ibytes,MPI_BYTE,&object);
-  MPI_Type_commit(&object);
-  MPI_Alltoall(in,iwords,object,out,iwords,object,communicator);
-  MPI_Type_free(&object);
-}
-
-
-
-}
-
--- a/Grid/communicator/SharedMemory.cc
+++ b/Grid/communicator/SharedMemory.cc
@@ -1,92 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/communicator/SharedMemory.cc
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/GridCore.h>
-
-namespace Grid { 
-
-// static data
-
-uint64_t            GlobalSharedMemory::MAX_MPI_SHM_BYTES   = 1024LL*1024LL*1024LL; 
-int                 GlobalSharedMemory::Hugepages = 0;
-int                 GlobalSharedMemory::_ShmSetup;
-int                 GlobalSharedMemory::_ShmAlloc;
-uint64_t            GlobalSharedMemory::_ShmAllocBytes;
-
-std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
-
-Grid_MPI_Comm       GlobalSharedMemory::WorldShmComm;
-int                 GlobalSharedMemory::WorldShmRank;
-int                 GlobalSharedMemory::WorldShmSize;
-std::vector<int>    GlobalSharedMemory::WorldShmRanks;
-
-Grid_MPI_Comm       GlobalSharedMemory::WorldComm;
-int                 GlobalSharedMemory::WorldSize;
-int                 GlobalSharedMemory::WorldRank;
-
-int                 GlobalSharedMemory::WorldNodes;
-int                 GlobalSharedMemory::WorldNode;
-
-void GlobalSharedMemory::SharedMemoryFree(void)
-{
-  assert(_ShmAlloc);
-  assert(_ShmAllocBytes>0);
-  for(int r=0;r<WorldShmSize;r++){
-    munmap(WorldShmCommBufs[r],_ShmAllocBytes);
-  }
-  _ShmAlloc = 0;
-  _ShmAllocBytes = 0;
-}
-/////////////////////////////////
-// Alloc, free shmem region
-/////////////////////////////////
-void *SharedMemory::ShmBufferMalloc(size_t bytes){
-  //  bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
-  void *ptr = (void *)heap_top;
-  heap_top  += bytes;
-  heap_bytes+= bytes;
-  if (heap_bytes >= heap_size) {
-    std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
-    std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
-    std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
-    assert(heap_bytes<heap_size);
-  }
-  return ptr;
-}
-void SharedMemory::ShmBufferFreeAll(void) { 
-  heap_top  =(size_t)ShmBufferSelf();
-  heap_bytes=0;
-}
-void *SharedMemory::ShmBufferSelf(void)
-{
-  return ShmCommBufs[ShmRank];
-}
-
-
-
-}
--- a/Grid/communicator/SharedMemory.h
+++ b/Grid/communicator/SharedMemory.h
@@ -1,167 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/communicator/SharedMemory.cc
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-
-// TODO
-// 1) move includes into SharedMemory.cc
-//
-// 2) split shared memory into a) optimal communicator creation from comm world
-// 
-//                             b) shared memory buffers container
-//                                -- static globally shared; init once
-//                                -- per instance set of buffers.
-//                                   
-
-#pragma once 
-
-#include <Grid/GridCore.h>
-
-#if defined (GRID_COMMS_MPI3) 
-#include <mpi.h>
-#endif 
-#include <semaphore.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/ipc.h>
-#include <sys/shm.h>
-#include <sys/mman.h>
-#include <zlib.h>
-#ifdef HAVE_NUMAIF_H
-#include <numaif.h>
-#endif
-
-namespace Grid {
-
-#if defined (GRID_COMMS_MPI3) 
-  typedef MPI_Comm    Grid_MPI_Comm;
-  typedef MPI_Request CommsRequest_t;
-#else 
-  typedef int CommsRequest_t;
-  typedef int Grid_MPI_Comm;
-#endif
-
-class GlobalSharedMemory {
- private:
-  static const int     MAXLOG2RANKSPERNODE = 16;            
-
-  // Init once lock on the buffer allocation
-  static int      _ShmSetup;
-  static int      _ShmAlloc;
-  static uint64_t _ShmAllocBytes;
-
- public:
-  static int      ShmSetup(void)      { return _ShmSetup; }
-  static int      ShmAlloc(void)      { return _ShmAlloc; }
-  static uint64_t ShmAllocBytes(void) { return _ShmAllocBytes; }
-  static uint64_t      MAX_MPI_SHM_BYTES;
-  static int           Hugepages;
-
-  static std::vector<void *> WorldShmCommBufs;
-
-  static Grid_MPI_Comm WorldComm;
-  static int           WorldRank;
-  static int           WorldSize;
-
-  static Grid_MPI_Comm WorldShmComm;
-  static int           WorldShmRank;
-  static int           WorldShmSize;
-
-  static int           WorldNodes;
-  static int           WorldNode;
-
-  static std::vector<int>  WorldShmRanks;
-
-  //////////////////////////////////////////////////////////////////////////////////////
-  // Create an optimal reordered communicator that makes MPI_Cart_create get it right
-  //////////////////////////////////////////////////////////////////////////////////////
-  static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
-  static void OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
-  static void OptimalCommunicatorHypercube(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
-  static void OptimalCommunicatorSharedMemory(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
-  ///////////////////////////////////////////////////
-  // Provide shared memory facilities off comm world
-  ///////////////////////////////////////////////////
-  static void SharedMemoryAllocate(uint64_t bytes, int flags);
-  static void SharedMemoryFree(void);
-
-};
-
-//////////////////////////////
-// one per communicator
-//////////////////////////////
-class SharedMemory 
-{
- private:
-  static const int     MAXLOG2RANKSPERNODE = 16;            
-
-  size_t heap_top;
-  size_t heap_bytes;
-  size_t heap_size;
-
- protected:
-
-  Grid_MPI_Comm    ShmComm; // for barriers
-  int    ShmRank; 
-  int    ShmSize;
-  std::vector<void *> ShmCommBufs;
-  std::vector<int>    ShmRanks;// Mapping comm ranks to Shm ranks
-
- public:
-  SharedMemory() {};
-  ~SharedMemory();
-  ///////////////////////////////////////////////////////////////////////////////////////
-  // set the buffers & sizes
-  ///////////////////////////////////////////////////////////////////////////////////////
-  void SetCommunicator(Grid_MPI_Comm comm);
-
-  ////////////////////////////////////////////////////////////////////////
-  // For this instance ; disjoint buffer sets between splits if split grid
-  ////////////////////////////////////////////////////////////////////////
-  void ShmBarrier(void); 
-
-  ///////////////////////////////////////////////////
-  // Call on any instance
-  ///////////////////////////////////////////////////
-  void SharedMemoryTest(void);
-  void *ShmBufferSelf(void);
-  void *ShmBuffer    (int rank);
-  void *ShmBufferTranslate(int rank,void * local_p);
-  void *ShmBufferMalloc(size_t bytes);
-  void  ShmBufferFreeAll(void) ;
-  
-  //////////////////////////////////////////////////////////////////////////
-  // Make info on Nodes & ranks and Shared memory available
-  //////////////////////////////////////////////////////////////////////////
-  int NodeCount(void) { return GlobalSharedMemory::WorldNodes;};
-  int RankCount(void) { return GlobalSharedMemory::WorldSize;};
-
-};
-
-}
--- a/Grid/communicator/SharedMemoryMPI.cc
+++ b/Grid/communicator/SharedMemoryMPI.cc
@@ -1,667 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/communicator/SharedMemory.cc
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/GridCore.h>
-#include <pwd.h>
-
-namespace Grid { 
-
-/*Construct from an MPI communicator*/
-void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
-{
-  assert(_ShmSetup==0);
-  WorldComm = comm;
-  MPI_Comm_rank(WorldComm,&WorldRank);
-  MPI_Comm_size(WorldComm,&WorldSize);
-  // WorldComm, WorldSize, WorldRank
-
-  /////////////////////////////////////////////////////////////////////
-  // Split into groups that can share memory
-  /////////////////////////////////////////////////////////////////////
-  MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm);
-  MPI_Comm_rank(WorldShmComm     ,&WorldShmRank);
-  MPI_Comm_size(WorldShmComm     ,&WorldShmSize);
-  // WorldShmComm, WorldShmSize, WorldShmRank
-
-  // WorldNodes
-  WorldNodes = WorldSize/WorldShmSize;
-  assert( (WorldNodes * WorldShmSize) == WorldSize );
-
-  // FIXME: Check all WorldShmSize are the same ?
-
-  /////////////////////////////////////////////////////////////////////
-  // find world ranks in our SHM group (i.e. which ranks are on our node)
-  /////////////////////////////////////////////////////////////////////
-  MPI_Group WorldGroup, ShmGroup;
-  MPI_Comm_group (WorldComm, &WorldGroup); 
-  MPI_Comm_group (WorldShmComm, &ShmGroup);
-
-  std::vector<int> world_ranks(WorldSize);   for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
-
-  WorldShmRanks.resize(WorldSize); 
-  MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &WorldShmRanks[0]); 
-
-  ///////////////////////////////////////////////////////////////////
-  // Identify who is in my group and nominate the leader
-  ///////////////////////////////////////////////////////////////////
-  int g=0;
-  std::vector<int> MyGroup;
-  MyGroup.resize(WorldShmSize);
-  for(int rank=0;rank<WorldSize;rank++){
-    if(WorldShmRanks[rank]!=MPI_UNDEFINED){
-      assert(g<WorldShmSize);
-      MyGroup[g++] = rank;
-    }
-  }
-  
-  std::sort(MyGroup.begin(),MyGroup.end(),std::less<int>());
-  int myleader = MyGroup[0];
-  
-  std::vector<int> leaders_1hot(WorldSize,0);
-  std::vector<int> leaders_group(WorldNodes,0);
-  leaders_1hot [ myleader ] = 1;
-    
-  ///////////////////////////////////////////////////////////////////
-  // global sum leaders over comm world
-  ///////////////////////////////////////////////////////////////////
-  int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,WorldComm);
-  assert(ierr==0);
-
-  ///////////////////////////////////////////////////////////////////
-  // find the group leaders world rank
-  ///////////////////////////////////////////////////////////////////
-  int group=0;
-  for(int l=0;l<WorldSize;l++){
-    if(leaders_1hot[l]){
-      leaders_group[group++] = l;
-    }
-  }
-
-  ///////////////////////////////////////////////////////////////////
-  // Identify the node of the group in which I (and my leader) live
-  ///////////////////////////////////////////////////////////////////
-  WorldNode=-1;
-  for(int g=0;g<WorldNodes;g++){
-    if (myleader == leaders_group[g]){
-      WorldNode=g;
-    }
-  }
-  assert(WorldNode!=-1);
-  _ShmSetup=1;
-}
-// Gray encode support 
-int BinaryToGray (int  binary) {
-  int gray = (binary>>1)^binary;
-  return gray;
-}
-int Log2Size(int TwoToPower,int MAXLOG2)
-{
-  int log2size = -1;
-  for(int i=0;i<=MAXLOG2;i++){
-    if ( (0x1<<i) == TwoToPower ) {
-      log2size = i;
-      break;
-    }
-  }
-  return log2size;
-}
-void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
-{
-  //////////////////////////////////////////////////////////////////////////////
-  // Look and see if it looks like an HPE 8600 based on hostname conventions
-  //////////////////////////////////////////////////////////////////////////////
-  const int namelen = _POSIX_HOST_NAME_MAX;
-  char name[namelen];
-  int R;
-  int I;
-  int N;
-  gethostname(name,namelen);
-  int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
-
-  if(nscan==3) OptimalCommunicatorHypercube(processors,optimal_comm);
-  else         OptimalCommunicatorSharedMemory(processors,optimal_comm);
-}
-void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
-{
-  ////////////////////////////////////////////////////////////////
-  // Assert power of two shm_size.
-  ////////////////////////////////////////////////////////////////
-  int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
-  assert(log2size != -1);
-
-  ////////////////////////////////////////////////////////////////
-  // Identify the hypercube coordinate of this node using hostname
-  ////////////////////////////////////////////////////////////////
-  // n runs 0...7 9...16 18...25 27...34     (8*4)  5 bits
-  // i runs 0..7                                    3 bits
-  // r runs 0..3                                    2 bits
-  // 2^10 = 1024 nodes
-  const int maxhdim = 10; 
-  std::vector<int> HyperCubeCoords(maxhdim,0);
-  std::vector<int> RootHyperCubeCoords(maxhdim,0);
-  int R;
-  int I;
-  int N;
-  const int namelen = _POSIX_HOST_NAME_MAX;
-  char name[namelen];
-
-  // Parse ICE-XA hostname to get hypercube location
-  gethostname(name,namelen);
-  int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
-  assert(nscan==3);
-
-  int nlo = N%9;
-  int nhi = N/9;
-  uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
-  uint32_t rootcoor  = hypercoor;
-
-  //////////////////////////////////////////////////////////////////
-  // Print debug info
-  //////////////////////////////////////////////////////////////////
-  for(int d=0;d<maxhdim;d++){
-    HyperCubeCoords[d] = (hypercoor>>d)&0x1;
-  }
-
-  std::string hname(name);
-  std::cout << "hostname "<<hname<<std::endl;
-  std::cout << "R " << R << " I " << I << " N "<< N
-            << " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
-
-  //////////////////////////////////////////////////////////////////
-  // broadcast node 0's base coordinate for this partition.
-  //////////////////////////////////////////////////////////////////
-  MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm); 
-  hypercoor=hypercoor-rootcoor;
-  assert(hypercoor<WorldSize);
-  assert(hypercoor>=0);
-
-  //////////////////////////////////////
-  // Printing
-  //////////////////////////////////////
-  for(int d=0;d<maxhdim;d++){
-    HyperCubeCoords[d] = (hypercoor>>d)&0x1;
-  }
-
-  ////////////////////////////////////////////////////////////////
-  // Identify subblock of ranks on node spreading across dims
-  // in a maximally symmetrical way
-  ////////////////////////////////////////////////////////////////
-  int ndimension              = processors.size();
-  std::vector<int> processor_coor(ndimension);
-  std::vector<int> WorldDims = processors;   std::vector<int> ShmDims  (ndimension,1);  std::vector<int> NodeDims (ndimension);
-  std::vector<int> ShmCoor  (ndimension);    std::vector<int> NodeCoor (ndimension);    std::vector<int> WorldCoor(ndimension);
-  std::vector<int> HyperCoor(ndimension);
-  int dim = 0;
-  for(int l2=0;l2<log2size;l2++){
-    while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
-    ShmDims[dim]*=2;
-    dim=(dim+1)%ndimension;
-  }
-
-  ////////////////////////////////////////////////////////////////
-  // Establish torus of processes and nodes with sub-blockings
-  ////////////////////////////////////////////////////////////////
-  for(int d=0;d<ndimension;d++){
-    NodeDims[d] = WorldDims[d]/ShmDims[d];
-  }
-  ////////////////////////////////////////////////////////////////
-  // Map Hcube according to physical lattice 
-  // must partition. Loop over dims and find out who would join.
-  ////////////////////////////////////////////////////////////////
-  int hcoor = hypercoor;
-  for(int d=0;d<ndimension;d++){
-     int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
-     int msk  = (0x1<<bits)-1;
-     HyperCoor[d]=hcoor & msk;  
-     HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
-     hcoor = hcoor >> bits;
-  } 
-  ////////////////////////////////////////////////////////////////
-  // Check processor counts match
-  ////////////////////////////////////////////////////////////////
-  int Nprocessors=1;
-  for(int i=0;i<ndimension;i++){
-    Nprocessors*=processors[i];
-  }
-  assert(WorldSize==Nprocessors);
-
-  ////////////////////////////////////////////////////////////////
-  // Establish mapping between lexico physics coord and WorldRank
-  ////////////////////////////////////////////////////////////////
-  int rank;
-
-  Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode   ,NodeDims);
-
-  for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
-
-  Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
-  for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
-  Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
-
-  /////////////////////////////////////////////////////////////////
-  // Build the new communicator
-  /////////////////////////////////////////////////////////////////
-  int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
-  assert(ierr==0);
-}
-void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
-{
-  ////////////////////////////////////////////////////////////////
-  // Assert power of two shm_size.
-  ////////////////////////////////////////////////////////////////
-  int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
-  assert(log2size != -1);
-
-  ////////////////////////////////////////////////////////////////
-  // Identify subblock of ranks on node spreading across dims
-  // in a maximally symmetrical way
-  ////////////////////////////////////////////////////////////////
-  int ndimension              = processors.size();
-  std::vector<int> processor_coor(ndimension);
-  std::vector<int> WorldDims = processors;   std::vector<int> ShmDims  (ndimension,1);  std::vector<int> NodeDims (ndimension);
-  std::vector<int> ShmCoor  (ndimension);    std::vector<int> NodeCoor (ndimension);    std::vector<int> WorldCoor(ndimension);
-  int dim = 0;
-  for(int l2=0;l2<log2size;l2++){
-    while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
-    ShmDims[dim]*=2;
-    dim=(dim+1)%ndimension;
-  }
-
-  ////////////////////////////////////////////////////////////////
-  // Establish torus of processes and nodes with sub-blockings
-  ////////////////////////////////////////////////////////////////
-  for(int d=0;d<ndimension;d++){
-    NodeDims[d] = WorldDims[d]/ShmDims[d];
-  }
-
-  ////////////////////////////////////////////////////////////////
-  // Check processor counts match
-  ////////////////////////////////////////////////////////////////
-  int Nprocessors=1;
-  for(int i=0;i<ndimension;i++){
-    Nprocessors*=processors[i];
-  }
-  assert(WorldSize==Nprocessors);
-
-  ////////////////////////////////////////////////////////////////
-  // Establish mapping between lexico physics coord and WorldRank
-  ////////////////////////////////////////////////////////////////
-  int rank;
-
-  Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode   ,NodeDims);
-  Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
-  for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
-  Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
-
-  /////////////////////////////////////////////////////////////////
-  // Build the new communicator
-  /////////////////////////////////////////////////////////////////
-  int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
-  assert(ierr==0);
-}
-////////////////////////////////////////////////////////////////////////////////////////////
-// SHMGET
-////////////////////////////////////////////////////////////////////////////////////////////
-#ifdef GRID_MPI3_SHMGET
-void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
-{
-  std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
-  assert(_ShmSetup==1);
-  assert(_ShmAlloc==0);
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////////////
-  // allocate the shared windows for our group
-  //////////////////////////////////////////////////////////////////////////////////////////////////////////
-  MPI_Barrier(WorldShmComm);
-  WorldShmCommBufs.resize(WorldShmSize);
-  std::vector<int> shmids(WorldShmSize);
-
-  if ( WorldShmRank == 0 ) {
-    for(int r=0;r<WorldShmSize;r++){
-      size_t size = bytes;
-      key_t key   = IPC_PRIVATE;
-      int flags = IPC_CREAT | SHM_R | SHM_W;
-#ifdef SHM_HUGETLB
-      if (Hugepages) flags|=SHM_HUGETLB;
-#endif
-      if ((shmids[r]= shmget(key,size, flags)) ==-1) {
-        int errsv = errno;
-        printf("Errno %d\n",errsv);
-        printf("key   %d\n",key);
-        printf("size  %ld\n",size);
-        printf("flags %d\n",flags);
-        perror("shmget");
-        exit(1);
-      }
-    }
-  }
-  MPI_Barrier(WorldShmComm);
-  MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
-  MPI_Barrier(WorldShmComm);
-
-  for(int r=0;r<WorldShmSize;r++){
-    WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
-    if (WorldShmCommBufs[r] == (uint64_t *)-1) {
-      perror("Shared memory attach failure");
-      shmctl(shmids[r], IPC_RMID, NULL);
-      exit(2);
-    }
-  }
-  MPI_Barrier(WorldShmComm);
-  ///////////////////////////////////
-  // Mark for clean up
-  ///////////////////////////////////
-  for(int r=0;r<WorldShmSize;r++){
-    shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
-  }
-  MPI_Barrier(WorldShmComm);
-
-  _ShmAlloc=1;
-  _ShmAllocBytes  = bytes;
-}
-#endif
- 
-////////////////////////////////////////////////////////////////////////////////////////////
-// Hugetlbfs mapping intended
-////////////////////////////////////////////////////////////////////////////////////////////
-#ifdef GRID_MPI3_SHMMMAP
-void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
-{
-  std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
-  assert(_ShmSetup==1);
-  assert(_ShmAlloc==0);
-  //////////////////////////////////////////////////////////////////////////////////////////////////////////
-  // allocate the shared windows for our group
-  //////////////////////////////////////////////////////////////////////////////////////////////////////////
-  MPI_Barrier(WorldShmComm);
-  WorldShmCommBufs.resize(WorldShmSize);
-  
-  ////////////////////////////////////////////////////////////////////////////////////////////
-  // Hugetlbfs and others map filesystems as mappable huge pages
-  ////////////////////////////////////////////////////////////////////////////////////////////
-  char shm_name [NAME_MAX];
-  for(int r=0;r<WorldShmSize;r++){
-    
-    sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",WorldNode,r);
-    int fd=open(shm_name,O_RDWR|O_CREAT,0666);
-    if ( fd == -1) { 
-      printf("open %s failed\n",shm_name);
-      perror("open hugetlbfs");
-      exit(0);
-    }
-    int mmap_flag = MAP_SHARED ;
-#ifdef MAP_POPULATE    
-    mmap_flag|=MAP_POPULATE;
-#endif
-#ifdef MAP_HUGETLB
-    if ( flags ) mmap_flag |= MAP_HUGETLB;
-#endif
-    void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0); 
-    if ( ptr == (void *)MAP_FAILED ) {    
-      printf("mmap %s failed\n",shm_name);
-      perror("failed mmap");      assert(0);    
-    }
-    assert(((uint64_t)ptr&0x3F)==0);
-    close(fd);
-    WorldShmCommBufs[r] =ptr;
-    //    std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
-  }
-  _ShmAlloc=1;
-  _ShmAllocBytes  = bytes;
-};
-#endif // MMAP
-
-#ifdef GRID_MPI3_SHM_NONE
-void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
-{
-  std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
-  assert(_ShmSetup==1);
-  assert(_ShmAlloc==0);
-  //////////////////////////////////////////////////////////////////////////////////////////////////////////
-  // allocate the shared windows for our group
-  //////////////////////////////////////////////////////////////////////////////////////////////////////////
-  MPI_Barrier(WorldShmComm);
-  WorldShmCommBufs.resize(WorldShmSize);
-  
-  ////////////////////////////////////////////////////////////////////////////////////////////
-  // Hugetlbf and others map filesystems as mappable huge pages
-  ////////////////////////////////////////////////////////////////////////////////////////////
-  char shm_name [NAME_MAX];
-  assert(WorldShmSize == 1);
-  for(int r=0;r<WorldShmSize;r++){
-    
-    int fd=-1;
-    int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
-#ifdef MAP_POPULATE    
-    mmap_flag|=MAP_POPULATE;
-#endif
-#ifdef MAP_HUGETLB
-    if ( flags ) mmap_flag |= MAP_HUGETLB;
-#endif
-    void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0); 
-    if ( ptr == (void *)MAP_FAILED ) {    
-      printf("mmap %s failed\n",shm_name);
-      perror("failed mmap");      assert(0);    
-    }
-    assert(((uint64_t)ptr&0x3F)==0);
-    close(fd);
-    WorldShmCommBufs[r] =ptr;
-    //    std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
-  }
-  _ShmAlloc=1;
-  _ShmAllocBytes  = bytes;
-};
-#endif // MMAP
-
-#ifdef GRID_MPI3_SHMOPEN
-////////////////////////////////////////////////////////////////////////////////////////////
-// POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
-// tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for 
-// the posix shm virtual file system
-////////////////////////////////////////////////////////////////////////////////////////////
-void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
-{ 
-  std::cout << "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
-  assert(_ShmSetup==1);
-  assert(_ShmAlloc==0); 
-  MPI_Barrier(WorldShmComm);
-  WorldShmCommBufs.resize(WorldShmSize);
-
-  char shm_name [NAME_MAX];
-  if ( WorldShmRank == 0 ) {
-    for(int r=0;r<WorldShmSize;r++){
-	
-      size_t size = bytes;
-      
-      struct passwd *pw = getpwuid (getuid());
-      sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
-      
-      shm_unlink(shm_name);
-      int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
-      if ( fd < 0 ) {	perror("failed shm_open");	assert(0);      }
-      ftruncate(fd, size);
-	
-      int mmap_flag = MAP_SHARED;
-#ifdef MAP_POPULATE 
-      mmap_flag |= MAP_POPULATE;
-#endif
-#ifdef MAP_HUGETLB
-      if (flags) mmap_flag |= MAP_HUGETLB;
-#endif
-      void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
-      
-      //      std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
-      if ( ptr == (void * )MAP_FAILED ) {       
-	perror("failed mmap");     
-	assert(0);    
-      }
-      assert(((uint64_t)ptr&0x3F)==0);
-      
-      WorldShmCommBufs[r] =ptr;
-      close(fd);
-    }
-  }
-
-  MPI_Barrier(WorldShmComm);
-  
-  if ( WorldShmRank != 0 ) { 
-    for(int r=0;r<WorldShmSize;r++){
-
-      size_t size = bytes ;
-      
-      struct passwd *pw = getpwuid (getuid());
-      sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
-      
-      int fd=shm_open(shm_name,O_RDWR,0666);
-      if ( fd<0 ) {	perror("failed shm_open");	assert(0);      }
-      
-      void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-      if ( ptr == MAP_FAILED ) {       perror("failed mmap");      assert(0);    }
-      assert(((uint64_t)ptr&0x3F)==0);
-      WorldShmCommBufs[r] =ptr;
-
-      close(fd);
-    }
-  }
-  _ShmAlloc=1;
-  _ShmAllocBytes = bytes;
-}
-#endif
-
-
-
-
-  ////////////////////////////////////////////////////////
-  // Global shared functionality finished
-  // Now move to per communicator functionality
-  ////////////////////////////////////////////////////////
-void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
-{
-  int rank, size;
-  MPI_Comm_rank(comm,&rank);
-  MPI_Comm_size(comm,&size);
-  ShmRanks.resize(size);
-
-  /////////////////////////////////////////////////////////////////////
-  // Split into groups that can share memory
-  /////////////////////////////////////////////////////////////////////
-  MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
-  MPI_Comm_rank(ShmComm     ,&ShmRank);
-  MPI_Comm_size(ShmComm     ,&ShmSize);
-  ShmCommBufs.resize(ShmSize);
-
-  //////////////////////////////////////////////////////////////////////
-  // Map ShmRank to WorldShmRank and use the right buffer
-  //////////////////////////////////////////////////////////////////////
-  assert (GlobalSharedMemory::ShmAlloc()==1);
-  heap_size = GlobalSharedMemory::ShmAllocBytes();
-  for(int r=0;r<ShmSize;r++){
-
-    uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
-
-    MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
-
-    ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
-    //    std::cout << "SetCommunicator ShmCommBufs ["<< r<< "] = "<< ShmCommBufs[r]<< "  wsr = "<<wsr<<std::endl;
-  }
-  ShmBufferFreeAll();
-
-  /////////////////////////////////////////////////////////////////////
-  // find comm ranks in our SHM group (i.e. which ranks are on our node)
-  /////////////////////////////////////////////////////////////////////
-  MPI_Group FullGroup, ShmGroup;
-  MPI_Comm_group (comm   , &FullGroup); 
-  MPI_Comm_group (ShmComm, &ShmGroup);
-
-  std::vector<int> ranks(size);   for(int r=0;r<size;r++) ranks[r]=r;
-  MPI_Group_translate_ranks (FullGroup,size,&ranks[0],ShmGroup, &ShmRanks[0]); 
-}
-//////////////////////////////////////////////////////////////////
-// On node barrier
-//////////////////////////////////////////////////////////////////
-void SharedMemory::ShmBarrier(void)
-{
-  MPI_Barrier  (ShmComm);
-}
-//////////////////////////////////////////////////////////////////////////////////////////////////////////
-// Test the shared memory is working
-//////////////////////////////////////////////////////////////////////////////////////////////////////////
-void SharedMemory::SharedMemoryTest(void)
-{
-  ShmBarrier();
-  if ( ShmRank == 0 ) {
-    for(int r=0;r<ShmSize;r++){
-      uint64_t * check = (uint64_t *) ShmCommBufs[r];
-      check[0] = GlobalSharedMemory::WorldNode;
-      check[1] = r;
-      check[2] = 0x5A5A5A;
-    }
-  }
-  ShmBarrier();
-  for(int r=0;r<ShmSize;r++){
-    uint64_t * check = (uint64_t *) ShmCommBufs[r];
-    
-    assert(check[0]==GlobalSharedMemory::WorldNode);
-    assert(check[1]==r);
-    assert(check[2]==0x5A5A5A);
-    
-  }
-  ShmBarrier();
-}
-
-void *SharedMemory::ShmBuffer(int rank)
-{
-  int gpeer = ShmRanks[rank];
-  if (gpeer == MPI_UNDEFINED){
-    return NULL;
-  } else { 
-    return ShmCommBufs[gpeer];
-  }
-}
-void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
-{
-  static int count =0;
-  int gpeer = ShmRanks[rank];
-  assert(gpeer!=ShmRank); // never send to self
-  if (gpeer == MPI_UNDEFINED){
-    return NULL;
-  } else { 
-    uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank];
-    uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset;
-    return (void *) remote;
-  }
-}
-SharedMemory::~SharedMemory()
-{
-  int MPI_is_finalised;  MPI_Finalized(&MPI_is_finalised);
-  if ( !MPI_is_finalised ) { 
-    MPI_Comm_free(&ShmComm);
-  }
-};
-
-}
--- a/Grid/communicator/SharedMemoryNone.cc
+++ b/Grid/communicator/SharedMemoryNone.cc
@@ -1,128 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/communicator/SharedMemory.cc
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/GridCore.h>
-
-namespace Grid { 
-
-/*Construct from an MPI communicator*/
-void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
-{
-  assert(_ShmSetup==0);
-  WorldComm = 0;
-  WorldRank = 0;
-  WorldSize = 1;
-  WorldShmComm = 0 ;
-  WorldShmRank = 0 ;
-  WorldShmSize = 1 ;
-  WorldNodes   = 1 ;
-  WorldNode    = 0 ;
-  WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0;
-  WorldShmCommBufs.resize(1);
-  _ShmSetup=1;
-}
-
-void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
-{
-  optimal_comm = WorldComm;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////
-// Hugetlbfs mapping intended, use anonymous mmap
-////////////////////////////////////////////////////////////////////////////////////////////
-void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
-{
-  void * ShmCommBuf ; 
-  assert(_ShmSetup==1);
-  assert(_ShmAlloc==0);
-  int mmap_flag =0;
-#ifdef MAP_ANONYMOUS
-  mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
-#endif
-#ifdef MAP_ANON
-  mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON;
-#endif
-#ifdef MAP_HUGETLB
-  if ( flags ) mmap_flag |= MAP_HUGETLB;
-#endif
-  ShmCommBuf =(void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag, -1, 0); 
-  if (ShmCommBuf == (void *)MAP_FAILED) {
-    perror("mmap failed ");
-    exit(EXIT_FAILURE);  
-  }
-#ifdef MADV_HUGEPAGE
-  if (!Hugepages ) madvise(ShmCommBuf,bytes,MADV_HUGEPAGE);
-#endif
-  bzero(ShmCommBuf,bytes);
-  WorldShmCommBufs[0] = ShmCommBuf;
-  _ShmAllocBytes=bytes;
-  _ShmAlloc=1;
-};
-
-  ////////////////////////////////////////////////////////
-  // Global shared functionality finished
-  // Now move to per communicator functionality
-  ////////////////////////////////////////////////////////
-void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
-{
-  assert(GlobalSharedMemory::ShmAlloc()==1);
-  ShmRanks.resize(1);
-  ShmCommBufs.resize(1);
-  ShmRanks[0] = 0;
-  ShmRank     = 0;
-  ShmSize     = 1;
-  //////////////////////////////////////////////////////////////////////
-  // Map ShmRank to WorldShmRank and use the right buffer
-  //////////////////////////////////////////////////////////////////////
-  ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0];
-  heap_size      = GlobalSharedMemory::ShmAllocBytes();
-  ShmBufferFreeAll();
-  return;
-}
-//////////////////////////////////////////////////////////////////
-// On node barrier
-//////////////////////////////////////////////////////////////////
-void SharedMemory::ShmBarrier(void){ return ; }
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////
-// Test the shared memory is working
-//////////////////////////////////////////////////////////////////////////////////////////////////////////
-void SharedMemory::SharedMemoryTest(void) { return; }
-
-void *SharedMemory::ShmBuffer(int rank)
-{
-  return NULL;
-}
-void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
-{
-  return NULL;
-}
-SharedMemory::~SharedMemory()
-{};
-
-}
--- a/Grid/parallelIO/BinaryIO.cc
+++ b/Grid/parallelIO/BinaryIO.cc
@@ -1,3 +0,0 @@
-#include <Grid/GridCore.h>
-
-int Grid::BinaryIO::latticeWriteMaxRetry = -1;
--- a/Grid/qcd/action/fermion/DomainWallFermion.h
+++ b/Grid/qcd/action/fermion/DomainWallFermion.h
@@ -1,142 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/qcd/action/fermion/DomainWallFermion.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#ifndef  GRID_QCD_DOMAIN_WALL_FERMION_H
-#define  GRID_QCD_DOMAIN_WALL_FERMION_H
-
-#include <Grid/qcd/action/fermion/FermionCore.h>
-
-namespace Grid {
-
-  namespace QCD {
-
-    template<class Impl>
-    class DomainWallFermion : public CayleyFermion5D<Impl>
-    {
-    public:
-     INHERIT_IMPL_TYPES(Impl);
-    public:
-
-      void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary, std::vector<double> twist, bool fiveD) {
-	FermionField in_k(in._grid);
-	FermionField prop_k(in._grid);
-
-	FFT theFFT((GridCartesian *) in._grid);
-
-	//phase for boundary condition
-	ComplexField coor(in._grid);
-	ComplexField ph(in._grid);  ph = zero;
-	FermionField in_buf(in._grid); in_buf = zero;
-	Scalar ci(0.0,1.0);
-	assert(twist.size() == Nd);//check that twist is Nd
-	assert(boundary.size() == Nd);//check that boundary conditions is Nd
-	int shift = 0;
-	if(fiveD) shift = 1;
-	for(unsigned int nu = 0; nu < Nd; nu++)
-	{
-	  // Shift coordinate lattice index by 1 to account for 5th dimension.
-          LatticeCoordinate(coor, nu + shift);
-	  double boundary_phase = ::acos(real(boundary[nu]));
-	  ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu+shift])));
-	  //momenta for propagator shifted by twist+boundary
-	  twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI));
-	}
-	in_buf = exp(ci*ph*(-1.0))*in;
-
-
-	if(fiveD){//FFT only on temporal and spatial dimensions
-          std::vector<int> mask(Nd+1,1); mask[0] = 0;
-	  theFFT.FFT_dim_mask(in_k,in_buf,mask,FFT::forward);
-          this->MomentumSpacePropagatorHt_5d(prop_k,in_k,mass,twist);
-          theFFT.FFT_dim_mask(out,prop_k,mask,FFT::backward);
-        }
-	else{
-	  theFFT.FFT_all_dim(in_k,in,FFT::forward);
-          this->MomentumSpacePropagatorHt(prop_k,in_k,mass,twist);
-	  theFFT.FFT_all_dim(out,prop_k,FFT::backward);
-        }
-	//phase for boundary condition
-	out = out * exp(ci*ph);
-      };
-
-      virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {
-        bool fiveD = true; //5d propagator by default
-	FreePropagator(in,out,mass,boundary,twist,fiveD);
-      };
-
-      virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass, bool fiveD) {
-	std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
-	std::vector<Complex> boundary;
-	for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions
-	FreePropagator(in,out,mass,boundary,twist,fiveD);
-      };
-
-      virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
-        bool fiveD = true; //5d propagator by default
-	std::vector<double> twist(Nd,0.0); //default: twist angle 0
-	std::vector<Complex> boundary;
-	for(int i=0;i<Nd;i++) boundary.push_back(1); //default: periodic boundary conditions
-	FreePropagator(in,out,mass,boundary,twist,fiveD);
-      };
-
-      virtual void   Instantiatable(void) {};
-      // Constructors
-      DomainWallFermion(GaugeField &_Umu,
-			GridCartesian         &FiveDimGrid,
-			GridRedBlackCartesian &FiveDimRedBlackGrid,
-			GridCartesian         &FourDimGrid,
-			GridRedBlackCartesian &FourDimRedBlackGrid,
-			RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) : 
-
-
-      CayleyFermion5D<Impl>(_Umu,
-			    FiveDimGrid,
-			    FiveDimRedBlackGrid,
-			    FourDimGrid,
-			    FourDimRedBlackGrid,_mass,_M5,p)
-
-      {
-	RealD eps = 1.0;
-
-	Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
-	assert(zdata->n==this->Ls);
-	
-	std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
-	// Call base setter
-	this->SetCoefficientsTanh(zdata,1.0,0.0);
-
-	Approx::zolotarev_free(zdata);
-      }
-
-    };
-
-  }
-}
-
-#endif
--- a/Grid/qcd/action/fermion/FourierAcceleratedPV.h
+++ b/Grid/qcd/action/fermion/FourierAcceleratedPV.h
@@ -1,237 +0,0 @@
-
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/qcd/action/fermion/FourierAcceleratedPV.h
-
-    Copyright (C) 2015
-
-Author: Christoph Lehner (lifted with permission by Peter Boyle, brought back to Grid)
-Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#pragma once
-namespace Grid {
-namespace QCD {
-
-  template<typename M>
-    void get_real_const_bc(M& m, RealD& _b, RealD& _c) {
-    ComplexD b,c;
-    b=m.bs[0];
-    c=m.cs[0];
-    std::cout << GridLogMessage << "b=" << b << ", c=" << c << std::endl;
-    for (size_t i=1;i<m.bs.size();i++) {
-      assert(m.bs[i] == b);
-      assert(m.cs[i] == c);
-    }
-    assert(b.imag() == 0.0);
-    assert(c.imag() == 0.0);
-    _b = b.real();
-    _c = c.real();
-  }
-
-
-template<typename Vi, typename M, typename G>
-class FourierAcceleratedPV {
- public:
-
-  ConjugateGradient<Vi> &cg;
-  M& dwfPV;
-  G& Umu;
-  GridCartesian* grid5D;
-  GridRedBlackCartesian* gridRB5D;
-  int group_in_s;
-
-  FourierAcceleratedPV(M& _dwfPV, G& _Umu, ConjugateGradient<Vi> &_cg, int _group_in_s = 2) 
-   : dwfPV(_dwfPV), Umu(_Umu), cg(_cg), group_in_s(_group_in_s) 
-  {
-    assert( dwfPV.FermionGrid()->_fdimensions[0] % (2*group_in_s) == 0);
-    grid5D = QCD::SpaceTimeGrid::makeFiveDimGrid(2*group_in_s, (GridCartesian*)Umu._grid);
-    gridRB5D = QCD::SpaceTimeGrid::makeFiveDimRedBlackGrid(2*group_in_s, (GridCartesian*)Umu._grid);
-  }
-
-  void rotatePV(const Vi& _src, Vi& dst, bool forward) const {
-
-    GridStopWatch gsw1, gsw2;
-
-    typedef typename Vi::scalar_type Coeff_t;
-    int Ls = dst._grid->_fdimensions[0];
-
-    Vi _tmp(dst._grid);
-    double phase = M_PI / (double)Ls;
-    Coeff_t bzero(0.0,0.0);
-
-    FFT theFFT((GridCartesian*)dst._grid);
-
-    if (!forward) {
-      gsw1.Start();
-      for (int s=0;s<Ls;s++) {
-	Coeff_t a(::cos(phase*s),-::sin(phase*s));
-	axpby_ssp(_tmp,a,_src,bzero,_src,s,s);
-      }
-      gsw1.Stop();
-
-      gsw2.Start();
-      theFFT.FFT_dim(dst,_tmp,0,FFT::forward);
-      gsw2.Stop();
-
-    } else {
-
-      gsw2.Start();
-      theFFT.FFT_dim(_tmp,_src,0,FFT::backward);
-      gsw2.Stop();
-
-      gsw1.Start();
-      for (int s=0;s<Ls;s++) {
-	Coeff_t a(::cos(phase*s),::sin(phase*s));
-	axpby_ssp(dst,a,_tmp,bzero,_tmp,s,s);
-      }
-      gsw1.Stop();
-    }
-
-    std::cout << GridLogMessage << "Timing rotatePV: " << gsw1.Elapsed() << ", " << gsw2.Elapsed() << std::endl;
-
-  }
-
-  void pvInv(const Vi& _src, Vi& _dst) const {
-
-    std::cout << GridLogMessage << "Fourier-Accelerated Outer Pauli Villars"<<std::endl;
-
-    typedef typename Vi::scalar_type Coeff_t;
-    int Ls = _dst._grid->_fdimensions[0];
-
-    GridStopWatch gswT;
-    gswT.Start();
-
-    RealD b,c;
-    get_real_const_bc(dwfPV,b,c);
-    RealD M5 = dwfPV.M5;
-    
-    // U(true) Rightinv TMinv U(false) = Minv
-
-    Vi _src_diag(_dst._grid);
-    Vi _src_diag_slice(dwfPV.GaugeGrid());
-    Vi _dst_diag_slice(dwfPV.GaugeGrid());
-    Vi _src_diag_slices(grid5D);
-    Vi _dst_diag_slices(grid5D);
-    Vi _dst_diag(_dst._grid);
-
-    rotatePV(_src,_src_diag,false);
-
-    // now do TM solves
-    Gamma G5(Gamma::Algebra::Gamma5);
-
-    GridStopWatch gswA, gswB;
-
-    gswA.Start();
-
-    typedef typename M::Impl_t Impl;
-    //WilsonTMFermion<Impl> tm(x.Umu,*x.UGridF,*x.UrbGridF,0.0,0.0,solver_outer.parent.par.wparams_f);
-    std::vector<RealD> vmass(grid5D->_fdimensions[0],0.0);
-    std::vector<RealD> vmu(grid5D->_fdimensions[0],0.0);
-
-    WilsonTMFermion5D<Impl> tm(Umu,*grid5D,*gridRB5D,
-			   *(GridCartesian*)dwfPV.GaugeGrid(),
-			   *(GridRedBlackCartesian*)dwfPV.GaugeRedBlackGrid(),
-			   vmass,vmu);
-    
-    //SchurRedBlackDiagTwoSolve<Vi> sol(cg);
-    SchurRedBlackDiagMooeeSolve<Vi> sol(cg); // same performance as DiagTwo
-    gswA.Stop();
-
-    gswB.Start();
-
-    for (int sgroup=0;sgroup<Ls/2/group_in_s;sgroup++) {
-
-      for (int sidx=0;sidx<group_in_s;sidx++) {
-
-	int s = sgroup*group_in_s + sidx;
-	int sprime = Ls-s-1;
-
-	RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
-	RealD cosp = ::cos(phase);
-	RealD sinp = ::sin(phase);
-	RealD denom = b*b + c*c + 2.0*b*c*cosp;
-	RealD mass = -(b*b*M5 + c*(1.0 - cosp + c*M5) + b*(-1.0 + cosp + 2.0*c*cosp*M5))/denom;
-	RealD mu = (b+c)*sinp/denom;
-
-	vmass[2*sidx + 0] = mass;
-	vmass[2*sidx + 1] = mass;
-	vmu[2*sidx + 0] = mu;
-	vmu[2*sidx + 1] = -mu;
-
-      }
-
-      tm.update(vmass,vmu);
-
-      for (int sidx=0;sidx<group_in_s;sidx++) {
-
-	int s = sgroup*group_in_s + sidx;
-	int sprime = Ls-s-1;
-
-	ExtractSlice(_src_diag_slice,_src_diag,s,0);
-	InsertSlice(_src_diag_slice,_src_diag_slices,2*sidx + 0,0);
-
-	ExtractSlice(_src_diag_slice,_src_diag,sprime,0);
-	InsertSlice(_src_diag_slice,_src_diag_slices,2*sidx + 1,0);
-
-      }
-
-      GridStopWatch gsw;
-      gsw.Start();
-      _dst_diag_slices = zero; // zero guess
-      sol(tm,_src_diag_slices,_dst_diag_slices);
-      gsw.Stop();
-      std::cout << GridLogMessage << "Solve[sgroup=" << sgroup << "] completed in " << gsw.Elapsed() << ", " << gswA.Elapsed() << std::endl;
-
-      for (int sidx=0;sidx<group_in_s;sidx++) {
-
-	int s = sgroup*group_in_s + sidx;
-	int sprime = Ls-s-1;
-
-	RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
-	RealD cosp = ::cos(phase);
-	RealD sinp = ::sin(phase);
-
-	// now rotate with inverse of
-	Coeff_t pA = b + c*cosp;
-	Coeff_t pB = - Coeff_t(0.0,1.0)*c*sinp;
-	Coeff_t pABden = pA*pA - pB*pB;
-	// (pA + pB * G5) * (pA - pB*G5) = (pA^2 - pB^2)
-      
-	ExtractSlice(_dst_diag_slice,_dst_diag_slices,2*sidx + 0,0);
-	_dst_diag_slice = (pA/pABden) * _dst_diag_slice - (pB/pABden) * (G5 * _dst_diag_slice);
-	InsertSlice(_dst_diag_slice,_dst_diag,s,0);
-	
-	ExtractSlice(_dst_diag_slice,_dst_diag_slices,2*sidx + 1,0);
-	_dst_diag_slice = (pA/pABden) * _dst_diag_slice + (pB/pABden) * (G5 * _dst_diag_slice);
-	InsertSlice(_dst_diag_slice,_dst_diag,sprime,0);
-      }
-    }
-    gswB.Stop();
-
-    rotatePV(_dst_diag,_dst,true);
-
-    gswT.Stop();
-    std::cout << GridLogMessage << "PV completed in " << gswT.Elapsed() << " (Setup: " << gswA.Elapsed() << ", s-loop: " << gswB.Elapsed() << ")" << std::endl;
-  }
-
-};
-}}
--- a/Grid/qcd/action/fermion/MADWF.h
+++ b/Grid/qcd/action/fermion/MADWF.h
@@ -1,193 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/MADWF.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#pragma once
-
-namespace Grid {
-namespace QCD {
-
-template <class Fieldi, class Fieldo,IfNotSame<Fieldi,Fieldo> X=0>
-inline void convert(const Fieldi &from,Fieldo &to) 
-{
-  precisionChange(to,from);
-}
-template <class Fieldi, class Fieldo,IfSame<Fieldi,Fieldo> X=0>
-inline void convert(const Fieldi &from,Fieldo &to) 
-{
-  to=from;
-}
-
-template<class Matrixo,class Matrixi,class PVinverter,class SchurSolver, class Guesser> 
-class MADWF 
-{
- private:
-  typedef typename Matrixo::FermionField FermionFieldo;
-  typedef typename Matrixi::FermionField FermionFieldi;
-
-  PVinverter  & PauliVillarsSolvero;// For the outer field
-  SchurSolver & SchurSolveri;       // For the inner approx field
-  Guesser     & Guesseri;           // To deflate the inner approx solves
-
-  Matrixo & Mato;                   // Action object for outer
-  Matrixi & Mati;                   // Action object for inner
-
-  RealD target_resid;
-  int   maxiter;
- public:
-
-  MADWF(Matrixo &_Mato,
-	Matrixi &_Mati, 
-	PVinverter &_PauliVillarsSolvero, 
-	SchurSolver &_SchurSolveri,
-	Guesser & _Guesseri,
-	RealD resid,
-	int _maxiter) :
-
-  Mato(_Mato),Mati(_Mati),
-    SchurSolveri(_SchurSolveri),
-    PauliVillarsSolvero(_PauliVillarsSolvero),Guesseri(_Guesseri)
-  {   
-    target_resid=resid;
-    maxiter     =_maxiter; 
-  };
-
-  void operator() (const FermionFieldo &src4,FermionFieldo &sol5)
-  {
-    std::cout << GridLogMessage<< " ************************************************" << std::endl;
-    std::cout << GridLogMessage<< "  MADWF-like algorithm                           " << std::endl;
-    std::cout << GridLogMessage<< " ************************************************" << std::endl;
-
-    FermionFieldi    c0i(Mati.GaugeGrid()); // 4d 
-    FermionFieldi    y0i(Mati.GaugeGrid()); // 4d
-    FermionFieldo    c0 (Mato.GaugeGrid()); // 4d 
-    FermionFieldo    y0 (Mato.GaugeGrid()); // 4d
-
-    FermionFieldo    A(Mato.FermionGrid()); // Temporary outer
-    FermionFieldo    B(Mato.FermionGrid()); // Temporary outer
-    FermionFieldo    b(Mato.FermionGrid()); // 5d source
-
-    FermionFieldo    c(Mato.FermionGrid()); // PVinv source; reused so store
-    FermionFieldo    defect(Mato.FermionGrid()); // 5d source
-
-    FermionFieldi   ci(Mati.FermionGrid()); 
-    FermionFieldi   yi(Mati.FermionGrid()); 
-    FermionFieldi   xi(Mati.FermionGrid()); 
-    FermionFieldi srci(Mati.FermionGrid()); 
-    FermionFieldi   Ai(Mati.FermionGrid()); 
-
-    RealD m=Mati.Mass();
-
-    ///////////////////////////////////////
-    //Import source, include Dminus factors
-    ///////////////////////////////////////
-    Mato.ImportPhysicalFermionSource(src4,b); 
-    std::cout << GridLogMessage << " src4 " <<norm2(src4)<<std::endl;
-    std::cout << GridLogMessage << " b    " <<norm2(b)<<std::endl;
-
-    defect = b;
-    sol5=zero;
-    for (int i=0;i<maxiter;i++) {
-
-      ///////////////////////////////////////
-      // Set up c0 from current defect
-      ///////////////////////////////////////
-      PauliVillarsSolvero(Mato,defect,A);
-      Mato.Pdag(A,c);
-      ExtractSlice(c0, c, 0 , 0);
-
-      ////////////////////////////////////////////////
-      // Solve the inner system with surface term c0
-      ////////////////////////////////////////////////
-      ci = zero;  
-      convert(c0,c0i); // Possible precison change
-      InsertSlice(c0i,ci,0, 0);
-
-      // Dwm P y = Dwm x = D(1) P (c0,0,0,0)^T
-      Mati.P(ci,Ai);
-      Mati.SetMass(1.0);      Mati.M(Ai,srci);      Mati.SetMass(m);
-      SchurSolveri(Mati,srci,xi,Guesseri); 
-      Mati.Pdag(xi,yi);
-      ExtractSlice(y0i, yi, 0 , 0);
-      convert(y0i,y0); // Possible precision change
-
-      //////////////////////////////////////
-      // Propagate solution back to outer system
-      // Build Pdag PV^-1 Dm P [-sol4,c2,c3... cL]
-      //////////////////////////////////////
-      c0 = - y0;
-      InsertSlice(c0, c, 0   , 0);
-
-      /////////////////////////////
-      // Reconstruct the bulk solution Pdag PV^-1 Dm P 
-      /////////////////////////////
-      Mato.P(c,B);
-      Mato.M(B,A);
-      PauliVillarsSolvero(Mato,A,B);
-      Mato.Pdag(B,A);
-
-      //////////////////////////////
-      // Reinsert surface prop
-      //////////////////////////////
-      InsertSlice(y0,A,0,0);
-
-      //////////////////////////////
-      // Convert from y back to x 
-      //////////////////////////////
-      Mato.P(A,B);
-
-      //         sol5' = sol5 + M^-1 defect
-      //               = sol5 + M^-1 src - M^-1 M sol5  ...
-      sol5 = sol5 + B;
-      std::cout << GridLogMessage << "***************************************" <<std::endl;
-      std::cout << GridLogMessage << " Sol5 update "<<std::endl;
-      std::cout << GridLogMessage << "***************************************" <<std::endl;
-      std::cout << GridLogMessage << " Sol5 now "<<norm2(sol5)<<std::endl;
-      std::cout << GridLogMessage << " delta    "<<norm2(B)<<std::endl;
-
-       // New defect  = b - M sol5
-       Mato.M(sol5,A);
-       defect = b - A;
-
-       std::cout << GridLogMessage << " defect   "<<norm2(defect)<<std::endl;
-
-       double resid = ::sqrt(norm2(defect) / norm2(b));
-       std::cout << GridLogMessage << "Residual " << i << ": " << resid  << std::endl;
-       std::cout << GridLogMessage << "***************************************" <<std::endl;
-
-       if (resid < target_resid) {
-	 return;
-       }
-    }
-
-    std::cout << GridLogMessage << "MADWF : Exceeded maxiter "<<std::endl;
-    assert(0);
-
-  }
-
-};
-
-}}
--- a/Grid/qcd/action/fermion/PauliVillarsInverters.h
+++ b/Grid/qcd/action/fermion/PauliVillarsInverters.h
@@ -1,95 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/SchurRedBlack.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#pragma once
-
-namespace Grid {
-namespace QCD {
-
-template<class Field>
-class PauliVillarsSolverUnprec
-{
- public:
-  ConjugateGradient<Field> & CG;
-  PauliVillarsSolverUnprec(  ConjugateGradient<Field> &_CG) : CG(_CG){};
-
-  template<class Matrix>
-  void operator() (Matrix &_Matrix,const Field &src,Field &sol)
-  {
-    RealD m = _Matrix.Mass();
-    Field A  (_Matrix.FermionGrid());
-
-    MdagMLinearOperator<Matrix,Field> HermOp(_Matrix);
-
-    _Matrix.SetMass(1.0);
-    _Matrix.Mdag(src,A);
-    CG(HermOp,A,sol);
-    _Matrix.SetMass(m);
-  };
-};
-
-template<class Field,class SchurSolverType>
-class PauliVillarsSolverRBprec
-{
- public:
-  SchurSolverType & SchurSolver;
-  PauliVillarsSolverRBprec( SchurSolverType &_SchurSolver) : SchurSolver(_SchurSolver){};
-
-  template<class Matrix>
-  void operator() (Matrix &_Matrix,const Field &src,Field &sol)
-  {
-    RealD m = _Matrix.Mass();
-    Field A  (_Matrix.FermionGrid());
-
-    _Matrix.SetMass(1.0);
-    SchurSolver(_Matrix,src,sol);
-    _Matrix.SetMass(m);
-  };
-};
-
-template<class Field,class GaugeField>
-class PauliVillarsSolverFourierAccel
-{
- public:
-  GaugeField      & Umu;
-  ConjugateGradient<Field> & CG;
-
-  PauliVillarsSolverFourierAccel(GaugeField &_Umu,ConjugateGradient<Field> &_CG) :  Umu(_Umu), CG(_CG)
-  {
-  };
-
-  template<class Matrix>
-  void operator() (Matrix &_Matrix,const Field &src,Field &sol)
-  {
-    FourierAcceleratedPV<Field, Matrix, typename Matrix::GaugeField > faPV(_Matrix,Umu,CG) ;
-    faPV.pvInv(src,sol);
-  };
-};
-
-
-}
-}
--- a/Grid/qcd/action/fermion/Reconstruct5Dprop.h
+++ b/Grid/qcd/action/fermion/Reconstruct5Dprop.h
@@ -1,135 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/SchurRedBlack.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#pragma once
-
-namespace Grid {
-namespace QCD {
-
-template<class Field,class PVinverter> class Reconstruct5DfromPhysical {
- private:
-  PVinverter & PauliVillarsSolver;
- public:
-
- /////////////////////////////////////////////////////
- // First cut works, 10 Oct 2018.
- //
- // Must form a plan to get this into production for Zmobius acceleration
- // of the Mobius exact AMA corrections.
- //
- // TODO : understand absence of contact term in eqns in Hantao's thesis
- //        sol4 is contact term subtracted, but thesis & Brower's paper suggests not.
- //
- // Step 1: Localise PV inverse in a routine. [DONE]
- // Step 2: Schur based PV inverse            [DONE]
- // Step 3: Fourier accelerated PV inverse    [DONE]
- //
- /////////////////////////////////////////////////////
- 
-  Reconstruct5DfromPhysical(PVinverter &_PauliVillarsSolver) 
-    : PauliVillarsSolver(_PauliVillarsSolver) 
-  { 
-  };
-
-
-   template<class Matrix>
-   void PV(Matrix &_Matrix,const Field &src,Field &sol)
-   {
-     RealD m = _Matrix.Mass();
-     _Matrix.SetMass(1.0);
-     _Matrix.M(src,sol);
-     _Matrix.SetMass(m);
-   }
-   template<class Matrix>
-   void PVdag(Matrix &_Matrix,const Field &src,Field &sol)
-   {
-     RealD m = _Matrix.Mass();
-     _Matrix.SetMass(1.0);
-     _Matrix.Mdag(src,sol);
-     _Matrix.SetMass(m);
-   }
-  template<class Matrix>
-  void operator() (Matrix & _Matrix,const Field &sol4,const Field &src4, Field &sol5){
-
-    int Ls =  _Matrix.Ls;
-
-    Field psi4(_Matrix.GaugeGrid());
-    Field psi(_Matrix.FermionGrid());
-    Field A  (_Matrix.FermionGrid());
-    Field B  (_Matrix.FermionGrid());
-    Field c  (_Matrix.FermionGrid());
-
-    typedef typename Matrix::Coeff_t Coeff_t;
-
-    std::cout << GridLogMessage<< " ************************************************" << std::endl;
-    std::cout << GridLogMessage<< " Reconstruct5Dprop: c.f. MADWF algorithm         " << std::endl;
-    std::cout << GridLogMessage<< " ************************************************" << std::endl;
-
-    ///////////////////////////////////////
-    //Import source, include Dminus factors
-    ///////////////////////////////////////
-    _Matrix.ImportPhysicalFermionSource(src4,B); 
-
-    ///////////////////////////////////////
-    // Set up c from src4
-    ///////////////////////////////////////
-    PauliVillarsSolver(_Matrix,B,A);
-    _Matrix.Pdag(A,c);
-
-    //////////////////////////////////////
-    // Build Pdag PV^-1 Dm P [-sol4,c2,c3... cL]
-    //////////////////////////////////////
-    psi4 = - sol4;
-    InsertSlice(psi4, psi, 0   , 0);
-    for (int s=1;s<Ls;s++) {
-      ExtractSlice(psi4,c,s,0);
-       InsertSlice(psi4,psi,s,0);
-    }
-
-    /////////////////////////////
-    // Pdag PV^-1 Dm P 
-    /////////////////////////////
-    _Matrix.P(psi,B);
-    _Matrix.M(B,A);
-    PauliVillarsSolver(_Matrix,A,B);
-    _Matrix.Pdag(B,A);
-
-    //////////////////////////////
-    // Reinsert surface prop
-    //////////////////////////////
-    InsertSlice(sol4,A,0,0);
-
-    //////////////////////////////
-    // Convert from y back to x 
-    //////////////////////////////
-    _Matrix.P(A,sol5);
-    
-  }
-};
-
-}
-}
--- a/Grid/qcd/action/fermion/StaggeredKernels.cc
+++ b/Grid/qcd/action/fermion/StaggeredKernels.cc
@@ -1,294 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
-
-Copyright (C) 2015
-
-Author: Azusa Yamaguchi, Peter Boyle
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Grid/qcd/action/fermion/FermionCore.h>
-
-namespace Grid {
-namespace QCD {
-
-int StaggeredKernelsStatic::Opt= StaggeredKernelsStatic::OptGeneric;
-int StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsAndCompute;
-
-#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink)		\
-  SE = st.GetEntry(ptype, Dir+skew, sF);			\
-  if (SE->_is_local ) {						\
-    if (SE->_permute) {						\
-      chi_p = &chi;						\
-      permute(chi,  in._odata[SE->_offset], ptype);		\
-    } else {							\
-      chi_p = &in._odata[SE->_offset];				\
-    }								\
-  } else {							\
-    chi_p = &buf[SE->_offset];					\
-  }								\
-  multLink(Uchi, U._odata[sU], *chi_p, Dir);			
-
-#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink)		\
-  SE = st.GetEntry(ptype, Dir+skew, sF);			\
-  if (SE->_is_local ) {						\
-    if (SE->_permute) {						\
-      chi_p = &chi;						\
-      permute(chi,  in._odata[SE->_offset], ptype);		\
-    } else {							\
-      chi_p = &in._odata[SE->_offset];				\
-    }								\
-  } else if ( st.same_node[Dir] ) {				\
-    chi_p = &buf[SE->_offset];					\
-  }								\
-  if (SE->_is_local || st.same_node[Dir] ) {			\
-    multLink(Uchi, U._odata[sU], *chi_p, Dir);			\
-  }
-
-#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink)		\
-  SE = st.GetEntry(ptype, Dir+skew, sF);			\
-  if ((!SE->_is_local) && (!st.same_node[Dir]) ) {		\
-    nmu++;							\
-    chi_p = &buf[SE->_offset];					\
-    multLink(Uchi, U._odata[sU], *chi_p, Dir);			\
-  }
-
-template <class Impl>
-StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
-
-////////////////////////////////////////////////////////////////////////////////////
-// Generic implementation; move to different file?
-// Int, Ext, Int+Ext cases for comms overlap
-////////////////////////////////////////////////////////////////////////////////////
-template <class Impl>
-void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, 
-					     DoubledGaugeField &U, DoubledGaugeField &UUU,
-					     SiteSpinor *buf, int LLs, int sU, 
-					     const FermionField &in, FermionField &out, int dag) {
-  const SiteSpinor *chi_p;
-  SiteSpinor chi;
-  SiteSpinor Uchi;
-  StencilEntry *SE;
-  int ptype;
-  int skew;
-
-  for(int s=0;s<LLs;s++){
-    int sF=LLs*sU+s;
-    skew = 0;
-    GENERIC_STENCIL_LEG(U,Xp,skew,Impl::multLink);
-    GENERIC_STENCIL_LEG(U,Yp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(U,Zp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(U,Tp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(U,Xm,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(U,Ym,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(U,Zm,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(U,Tm,skew,Impl::multLinkAdd);
-    skew=8;
-    GENERIC_STENCIL_LEG(UUU,Xp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(UUU,Yp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(UUU,Zp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(UUU,Tp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(UUU,Xm,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(UUU,Ym,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(UUU,Zm,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG(UUU,Tm,skew,Impl::multLinkAdd);
-    if ( dag ) { 
-      Uchi = - Uchi;
-    } 
-    vstream(out._odata[sF], Uchi);
-  }
-};
-
-  ///////////////////////////////////////////////////
-  // Only contributions from interior of our node
-  ///////////////////////////////////////////////////
-template <class Impl>
-void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo, 
-						DoubledGaugeField &U, DoubledGaugeField &UUU,
-						SiteSpinor *buf, int LLs, int sU, 
-						const FermionField &in, FermionField &out,int dag) {
-  const SiteSpinor *chi_p;
-  SiteSpinor chi;
-  SiteSpinor Uchi;
-  StencilEntry *SE;
-  int ptype;
-  int skew ;
-
-  for(int s=0;s<LLs;s++){
-    int sF=LLs*sU+s;
-    skew = 0;
-    Uchi=zero;
-    GENERIC_STENCIL_LEG_INT(U,Xp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(U,Yp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(U,Zp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(U,Tp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(U,Xm,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(U,Ym,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(U,Zm,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(U,Tm,skew,Impl::multLinkAdd);
-    skew=8;
-    GENERIC_STENCIL_LEG_INT(UUU,Xp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(UUU,Yp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(UUU,Zp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(UUU,Tp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(UUU,Xm,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(UUU,Ym,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(UUU,Zm,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_INT(UUU,Tm,skew,Impl::multLinkAdd);
-    if ( dag ) {
-      Uchi = - Uchi;
-    }
-    vstream(out._odata[sF], Uchi);
-  }
-};
-
-
-  ///////////////////////////////////////////////////
-  // Only contributions from exterior of our node
-  ///////////////////////////////////////////////////
-template <class Impl>
-void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo, 
-						DoubledGaugeField &U, DoubledGaugeField &UUU,
-						SiteSpinor *buf, int LLs, int sU,
-						const FermionField &in, FermionField &out,int dag) {
-  const SiteSpinor *chi_p;
-  SiteSpinor chi;
-  SiteSpinor Uchi;
-  StencilEntry *SE;
-  int ptype;
-  int nmu=0;
-  int skew ;
-
-  for(int s=0;s<LLs;s++){
-    int sF=LLs*sU+s;
-    skew = 0;
-    Uchi=zero;
-    GENERIC_STENCIL_LEG_EXT(U,Xp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(U,Yp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(U,Zp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(U,Tp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(U,Xm,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(U,Ym,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(U,Zm,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(U,Tm,skew,Impl::multLinkAdd);
-    skew=8;
-    GENERIC_STENCIL_LEG_EXT(UUU,Xp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(UUU,Yp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(UUU,Zp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(UUU,Tp,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(UUU,Xm,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(UUU,Ym,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd);
-    GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
-
-    if ( nmu ) { 
-      if ( dag ) { 
-	out._odata[sF] = out._odata[sF] - Uchi;
-      } else { 
-	out._odata[sF] = out._odata[sF] + Uchi;
-      }
-    }
-  }
-};
-
-////////////////////////////////////////////////////////////////////////////////////
-// Driving / wrapping routine to select right kernel
-////////////////////////////////////////////////////////////////////////////////////
-
-template <class Impl>
-void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
-					 SiteSpinor *buf, int LLs, int sU,
-					 const FermionField &in, FermionField &out,
-					 int interior,int exterior)
-{
-  int dag=1;
-  DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior);
-};
-
-template <class Impl>
-void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
-				      SiteSpinor *buf, int LLs, int sU,
-				      const FermionField &in, FermionField &out,
-				      int interior,int exterior)
-{
-  int dag=0;
-  DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior);
-};
-
-template <class Impl>
-void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
-				      SiteSpinor *buf, int LLs,
-				      int sU, const FermionField &in, FermionField &out,
-				      int dag,int interior,int exterior) 
-{
-  switch(Opt) {
-#ifdef AVX512
-  case OptInlineAsm:
-    if ( interior && exterior ) {
-      DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
-    } else { 
-      std::cout << GridLogError << "Cannot overlap comms and compute with Staggered assembly"<<std::endl;
-      assert(0);
-    }
-    break;
-#endif
-  case OptHandUnroll:
-    if ( interior && exterior ) {
-      DhopSiteHand   (st,lo,U,UUU,buf,LLs,sU,in,out,dag);
-    } else if ( interior ) {
-      DhopSiteHandInt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
-    } else if ( exterior ) {
-      DhopSiteHandExt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
-    }
-    break;
-  case OptGeneric:
-    if ( interior && exterior ) {
-      DhopSiteGeneric   (st,lo,U,UUU,buf,LLs,sU,in,out,dag);
-    } else if ( interior ) {
-      DhopSiteGenericInt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
-    } else if ( exterior ) {
-      DhopSiteGenericExt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
-    }
-    break;
-  default:
-    std::cout<<"Oops Opt = "<<Opt<<std::endl;
-    assert(0);
-    break;
-  }
-};
-
-template <class Impl>
-void StaggeredKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U,  DoubledGaugeField &UUU, SiteSpinor *buf, int sF,
-				      int sU, const FermionField &in, FermionField &out, int dir, int disp) 
-{
-  // Disp should be either +1,-1,+3,-3
-  // What about "dag" ?
-  // Because we work out pU . dS/dU 
-  // U
-  assert(0);
-}
-
-FermOpStaggeredTemplateInstantiate(StaggeredKernels);
-FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
-
-}}
-
--- a/Grid/qcd/action/fermion/StaggeredKernels.h
+++ b/Grid/qcd/action/fermion/StaggeredKernels.h
@@ -1,122 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./lib/qcd/action/fermion/StaggeredKernels.h
-
-Copyright (C) 2015
-
-Author: Azusa Yamaguchi, Peter Boyle
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef GRID_QCD_STAGGERED_KERNELS_H
-#define GRID_QCD_STAGGERED_KERNELS_H
-
-namespace Grid {
-namespace QCD {
-
-  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-  // Helper routines that implement Staggered stencil for a single site.
-  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-class StaggeredKernelsStatic { 
- public:
-  enum { OptGeneric, OptHandUnroll, OptInlineAsm };
-  enum { CommsAndCompute, CommsThenCompute };
-  static int Opt;
-  static int Comms;
-};
- 
-template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , public StaggeredKernelsStatic { 
- public:
-   
-  INHERIT_IMPL_TYPES(Impl);
-  typedef FermionOperator<Impl> Base;
-   
-public:
-    
-   void DhopDir(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
-		      int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp);
-
-   ///////////////////////////////////////////////////////////////////////////////////////
-   // Generic Nc kernels
-   ///////////////////////////////////////////////////////////////////////////////////////
-   void DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, 
-			DoubledGaugeField &U, DoubledGaugeField &UUU, 
-			SiteSpinor * buf, int LLs, int sU, 
-			const FermionField &in, FermionField &out,int dag);
-   void DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo, 
-			   DoubledGaugeField &U, DoubledGaugeField &UUU, 
-			   SiteSpinor * buf, int LLs, int sU, 
-			   const FermionField &in, FermionField &out,int dag);
-   void DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo, 
-			   DoubledGaugeField &U, DoubledGaugeField &UUU,
-			   SiteSpinor * buf, int LLs, int sU, 
-			   const FermionField &in, FermionField &out,int dag);
-
-   ///////////////////////////////////////////////////////////////////////////////////////
-   // Nc=3 specific kernels
-   ///////////////////////////////////////////////////////////////////////////////////////
-   void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, 
-		     DoubledGaugeField &U,DoubledGaugeField &UUU, 
-		     SiteSpinor * buf, int LLs, int sU, 
-		     const FermionField &in, FermionField &out,int dag);
-   void DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, 
-			DoubledGaugeField &U,DoubledGaugeField &UUU, 
-			SiteSpinor * buf, int LLs, int sU, 
-			const FermionField &in, FermionField &out,int dag);
-   void DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, 
-			DoubledGaugeField &U,DoubledGaugeField &UUU, 
-			SiteSpinor * buf, int LLs, int sU, 
-			const FermionField &in, FermionField &out,int dag);
-
-   ///////////////////////////////////////////////////////////////////////////////////////
-   // Asm Nc=3 specific kernels
-   ///////////////////////////////////////////////////////////////////////////////////////
-   void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, 
-		    DoubledGaugeField &U,DoubledGaugeField &UUU, 
-		    SiteSpinor * buf, int LLs, int sU, 
-		    const FermionField &in, FermionField &out,int dag);
-   ///////////////////////////////////////////////////////////////////////////////////////////////////
-   // Generic interface; fan out to right routine
-   ///////////////////////////////////////////////////////////////////////////////////////////////////
-   void DhopSite(StencilImpl &st, LebesgueOrder &lo, 
-		 DoubledGaugeField &U, DoubledGaugeField &UUU, 
-		 SiteSpinor * buf, int LLs, int sU,
-		 const FermionField &in, FermionField &out, int interior=1,int exterior=1);
-
-   void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, 
-		    DoubledGaugeField &U, DoubledGaugeField &UUU, 
-		    SiteSpinor * buf, int LLs, int sU,
-		    const FermionField &in, FermionField &out, int interior=1,int exterior=1);
-
-   void DhopSite(StencilImpl &st, LebesgueOrder &lo, 
-		 DoubledGaugeField &U, DoubledGaugeField &UUU, 
-		 SiteSpinor * buf, int LLs, int sU,
-		 const FermionField &in, FermionField &out, int dag, int interior,int exterior);
-  
-public:
-
-  StaggeredKernels(const ImplParams &p = ImplParams());
-
-};
-    
-}}
-
-#endif
--- a/Grid/qcd/action/fermion/StaggeredKernelsHand.cc
+++ b/Grid/qcd/action/fermion/StaggeredKernelsHand.cc
@@ -1,399 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/qcd/action/fermion/StaggerdKernelsHand.cc
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: paboyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/Grid.h>
-
-
-#define LOAD_CHI(b)		\
-  const SiteSpinor & ref (b[offset]);	\
-    Chi_0=ref()()(0);\
-    Chi_1=ref()()(1);\
-    Chi_2=ref()()(2);
-
-
-// To splat or not to splat depends on the implementation
-#define MULT(A,UChi)				\
-  auto & ref(U._odata[sU](A));			\
-   Impl::loadLinkElement(U_00,ref()(0,0));      \
-   Impl::loadLinkElement(U_10,ref()(1,0));      \
-   Impl::loadLinkElement(U_20,ref()(2,0));      \
-   Impl::loadLinkElement(U_01,ref()(0,1));      \
-   Impl::loadLinkElement(U_11,ref()(1,1));      \
-   Impl::loadLinkElement(U_21,ref()(2,1));      \
-   Impl::loadLinkElement(U_02,ref()(0,2));     \
-   Impl::loadLinkElement(U_12,ref()(1,2));     \
-   Impl::loadLinkElement(U_22,ref()(2,2));     \
-    UChi ## _0  = U_00*Chi_0;	       \
-    UChi ## _1  = U_10*Chi_0;\
-    UChi ## _2  = U_20*Chi_0;\
-    UChi ## _0 += U_01*Chi_1;\
-    UChi ## _1 += U_11*Chi_1;\
-    UChi ## _2 += U_21*Chi_1;\
-    UChi ## _0 += U_02*Chi_2;\
-    UChi ## _1 += U_12*Chi_2;\
-    UChi ## _2 += U_22*Chi_2;
-
-#define MULT_ADD(U,A,UChi)			\
-  auto & ref(U._odata[sU](A));			\
-   Impl::loadLinkElement(U_00,ref()(0,0));      \
-   Impl::loadLinkElement(U_10,ref()(1,0));      \
-   Impl::loadLinkElement(U_20,ref()(2,0));      \
-   Impl::loadLinkElement(U_01,ref()(0,1));      \
-   Impl::loadLinkElement(U_11,ref()(1,1));      \
-   Impl::loadLinkElement(U_21,ref()(2,1));      \
-   Impl::loadLinkElement(U_02,ref()(0,2));     \
-   Impl::loadLinkElement(U_12,ref()(1,2));     \
-   Impl::loadLinkElement(U_22,ref()(2,2));     \
-    UChi ## _0 += U_00*Chi_0;	       \
-    UChi ## _1 += U_10*Chi_0;\
-    UChi ## _2 += U_20*Chi_0;\
-    UChi ## _0 += U_01*Chi_1;\
-    UChi ## _1 += U_11*Chi_1;\
-    UChi ## _2 += U_21*Chi_1;\
-    UChi ## _0 += U_02*Chi_2;\
-    UChi ## _1 += U_12*Chi_2;\
-    UChi ## _2 += U_22*Chi_2;
-
-
-#define PERMUTE_DIR(dir)			\
-  permute##dir(Chi_0,Chi_0);			\
-  permute##dir(Chi_1,Chi_1);			\
-  permute##dir(Chi_2,Chi_2);
-
-
-#define HAND_STENCIL_LEG_BASE(Dir,Perm,skew)	\
-  SE=st.GetEntry(ptype,Dir+skew,sF);	\
-  offset = SE->_offset;			\
-  local  = SE->_is_local;		\
-  perm   = SE->_permute;		\
-  if ( local ) {						\
-    LOAD_CHI(in._odata);					\
-    if ( perm) {						\
-      PERMUTE_DIR(Perm);					\
-    }								\
-  } else {							\
-    LOAD_CHI(buf);						\
-  }								
-
-#define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even)		\
-  HAND_STENCIL_LEG_BASE(Dir,Perm,skew)				\
-  {								\
-    MULT(Dir,even);						\
-  }
-
-#define HAND_STENCIL_LEG(U,Dir,Perm,skew,even)			\
-  HAND_STENCIL_LEG_BASE(Dir,Perm,skew)				\
-  {								\
-    MULT_ADD(U,Dir,even);					\
-  }
-
-
-
-#define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even)	\
-  SE=st.GetEntry(ptype,Dir+skew,sF);			\
-  offset = SE->_offset;					\
-  local  = SE->_is_local;				\
-  perm   = SE->_permute;				\
-  if ( local ) {					\
-    LOAD_CHI(in._odata);				\
-    if ( perm) {					\
-      PERMUTE_DIR(Perm);				\
-    }							\
-  } else if ( st.same_node[Dir] ) {			\
-    LOAD_CHI(buf);					\
-  }							\
-  if (SE->_is_local || st.same_node[Dir] ) {		\
-    MULT_ADD(U,Dir,even);				\
-  }
-
-#define HAND_STENCIL_LEG_EXT(U,Dir,Perm,skew,even)	\
-  SE=st.GetEntry(ptype,Dir+skew,sF);			\
-  offset = SE->_offset;					\
-  local  = SE->_is_local;				\
-  perm   = SE->_permute;				\
-  if ((!SE->_is_local) && (!st.same_node[Dir]) ) {		\
-    nmu++;							\
-    { LOAD_CHI(buf);	  }					\
-    { MULT_ADD(U,Dir,even); }					\
-  }								
-
-namespace Grid {
-namespace QCD {
-
-
-template <class Impl>
-void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, 
-					  DoubledGaugeField &U,DoubledGaugeField &UUU,
-					  SiteSpinor *buf, int LLs, int sU, 
-					  const FermionField &in, FermionField &out,int dag) 
-{
-  typedef typename Simd::scalar_type S;
-  typedef typename Simd::vector_type V;
-
-  Simd even_0; // 12 regs on knc
-  Simd even_1;
-  Simd even_2;
-  Simd odd_0; // 12 regs on knc
-  Simd odd_1;
-  Simd odd_2;
-
-  Simd Chi_0;    // two spinor; 6 regs
-  Simd Chi_1;
-  Simd Chi_2;
-  
-  Simd U_00;  // two rows of U matrix
-  Simd U_10;
-  Simd U_20;  
-  Simd U_01;
-  Simd U_11;
-  Simd U_21;  // 2 reg left.
-  Simd U_02;
-  Simd U_12;
-  Simd U_22; 
-
-  SiteSpinor result;
-  int offset,local,perm, ptype;
-
-  StencilEntry *SE;
-  int skew;
-
-  for(int s=0;s<LLs;s++){
-    int sF=s+LLs*sU;
-
-    skew = 0;
-    HAND_STENCIL_LEG_BEGIN(Xp,3,skew,even);  
-    HAND_STENCIL_LEG_BEGIN(Yp,2,skew,odd);   
-    HAND_STENCIL_LEG      (U,Zp,1,skew,even);  
-    HAND_STENCIL_LEG      (U,Tp,0,skew,odd);  
-    HAND_STENCIL_LEG      (U,Xm,3,skew,even);  
-    HAND_STENCIL_LEG      (U,Ym,2,skew,odd);   
-    HAND_STENCIL_LEG      (U,Zm,1,skew,even);  
-    HAND_STENCIL_LEG      (U,Tm,0,skew,odd);  
-    skew = 8;
-    HAND_STENCIL_LEG(UUU,Xp,3,skew,even);  
-    HAND_STENCIL_LEG(UUU,Yp,2,skew,odd);   
-    HAND_STENCIL_LEG(UUU,Zp,1,skew,even);  
-    HAND_STENCIL_LEG(UUU,Tp,0,skew,odd);  
-    HAND_STENCIL_LEG(UUU,Xm,3,skew,even);  
-    HAND_STENCIL_LEG(UUU,Ym,2,skew,odd);   
-    HAND_STENCIL_LEG(UUU,Zm,1,skew,even);  
-    HAND_STENCIL_LEG(UUU,Tm,0,skew,odd);  
-    
-    if ( dag ) {
-      result()()(0) = - even_0 - odd_0;
-      result()()(1) = - even_1 - odd_1;
-      result()()(2) = - even_2 - odd_2;
-    } else { 
-      result()()(0) = even_0 + odd_0;
-      result()()(1) = even_1 + odd_1;
-      result()()(2) = even_2 + odd_2;
-    }
-    vstream(out._odata[sF],result);
-  }
-}
-
-
-template <class Impl>
-void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, 
-					     DoubledGaugeField &U, DoubledGaugeField &UUU,
-					     SiteSpinor *buf, int LLs, int sU, 
-					     const FermionField &in, FermionField &out,int dag) 
-{
-  typedef typename Simd::scalar_type S;
-  typedef typename Simd::vector_type V;
-
-  Simd even_0; // 12 regs on knc
-  Simd even_1;
-  Simd even_2;
-  Simd odd_0; // 12 regs on knc
-  Simd odd_1;
-  Simd odd_2;
-
-  Simd Chi_0;    // two spinor; 6 regs
-  Simd Chi_1;
-  Simd Chi_2;
-  
-  Simd U_00;  // two rows of U matrix
-  Simd U_10;
-  Simd U_20;  
-  Simd U_01;
-  Simd U_11;
-  Simd U_21;  // 2 reg left.
-  Simd U_02;
-  Simd U_12;
-  Simd U_22; 
-
-  SiteSpinor result;
-  int offset,local,perm, ptype;
-
-  StencilEntry *SE;
-  int skew;
-
-  for(int s=0;s<LLs;s++){
-    int sF=s+LLs*sU;
-
-    even_0 = zero;    even_1 = zero;    even_2 = zero;
-     odd_0 = zero;     odd_1 = zero;     odd_2 = zero;
-
-    skew = 0;
-    HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);  
-    HAND_STENCIL_LEG_INT(U,Yp,2,skew,odd);   
-    HAND_STENCIL_LEG_INT(U,Zp,1,skew,even);  
-    HAND_STENCIL_LEG_INT(U,Tp,0,skew,odd);  
-    HAND_STENCIL_LEG_INT(U,Xm,3,skew,even);  
-    HAND_STENCIL_LEG_INT(U,Ym,2,skew,odd);   
-    HAND_STENCIL_LEG_INT(U,Zm,1,skew,even);  
-    HAND_STENCIL_LEG_INT(U,Tm,0,skew,odd);  
-    skew = 8;
-    HAND_STENCIL_LEG_INT(UUU,Xp,3,skew,even);  
-    HAND_STENCIL_LEG_INT(UUU,Yp,2,skew,odd);   
-    HAND_STENCIL_LEG_INT(UUU,Zp,1,skew,even);  
-    HAND_STENCIL_LEG_INT(UUU,Tp,0,skew,odd);  
-    HAND_STENCIL_LEG_INT(UUU,Xm,3,skew,even);  
-    HAND_STENCIL_LEG_INT(UUU,Ym,2,skew,odd);   
-    HAND_STENCIL_LEG_INT(UUU,Zm,1,skew,even);  
-    HAND_STENCIL_LEG_INT(UUU,Tm,0,skew,odd);  
-
-    // Assume every site must be connected to at least one interior point. No 1^4 subvols.
-    if ( dag ) {
-      result()()(0) = - even_0 - odd_0;
-      result()()(1) = - even_1 - odd_1;
-      result()()(2) = - even_2 - odd_2;
-    } else { 
-      result()()(0) = even_0 + odd_0;
-      result()()(1) = even_1 + odd_1;
-      result()()(2) = even_2 + odd_2;
-    }
-    vstream(out._odata[sF],result);
-  }
-}
-
-
-template <class Impl>
-void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, 
-					     DoubledGaugeField &U, DoubledGaugeField &UUU,
-					     SiteSpinor *buf, int LLs, int sU, 
-					     const FermionField &in, FermionField &out,int dag) 
-{
-  typedef typename Simd::scalar_type S;
-  typedef typename Simd::vector_type V;
-
-  Simd even_0; // 12 regs on knc
-  Simd even_1;
-  Simd even_2;
-  Simd odd_0; // 12 regs on knc
-  Simd odd_1;
-  Simd odd_2;
-
-  Simd Chi_0;    // two spinor; 6 regs
-  Simd Chi_1;
-  Simd Chi_2;
-  
-  Simd U_00;  // two rows of U matrix
-  Simd U_10;
-  Simd U_20;  
-  Simd U_01;
-  Simd U_11;
-  Simd U_21;  // 2 reg left.
-  Simd U_02;
-  Simd U_12;
-  Simd U_22; 
-
-  SiteSpinor result;
-  int offset,local,perm, ptype;
-
-  StencilEntry *SE;
-  int skew;
-
-  for(int s=0;s<LLs;s++){
-    int sF=s+LLs*sU;
-
-    even_0 = zero;    even_1 = zero;    even_2 = zero;
-     odd_0 = zero;     odd_1 = zero;     odd_2 = zero;
-    int nmu=0;
-    skew = 0;
-    HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);  
-    HAND_STENCIL_LEG_EXT(U,Yp,2,skew,odd);   
-    HAND_STENCIL_LEG_EXT(U,Zp,1,skew,even);  
-    HAND_STENCIL_LEG_EXT(U,Tp,0,skew,odd);  
-    HAND_STENCIL_LEG_EXT(U,Xm,3,skew,even);  
-    HAND_STENCIL_LEG_EXT(U,Ym,2,skew,odd);   
-    HAND_STENCIL_LEG_EXT(U,Zm,1,skew,even);  
-    HAND_STENCIL_LEG_EXT(U,Tm,0,skew,odd);  
-    skew = 8;
-    HAND_STENCIL_LEG_EXT(UUU,Xp,3,skew,even);  
-    HAND_STENCIL_LEG_EXT(UUU,Yp,2,skew,odd);   
-    HAND_STENCIL_LEG_EXT(UUU,Zp,1,skew,even);  
-    HAND_STENCIL_LEG_EXT(UUU,Tp,0,skew,odd);  
-    HAND_STENCIL_LEG_EXT(UUU,Xm,3,skew,even);  
-    HAND_STENCIL_LEG_EXT(UUU,Ym,2,skew,odd);   
-    HAND_STENCIL_LEG_EXT(UUU,Zm,1,skew,even);  
-    HAND_STENCIL_LEG_EXT(UUU,Tm,0,skew,odd);  
-
-    // Add sum of all exterior connected stencil legs
-    if ( nmu ) { 
-      if ( dag ) {
-	result()()(0) = - even_0 - odd_0;
-	result()()(1) = - even_1 - odd_1;
-	result()()(2) = - even_2 - odd_2;
-      } else { 
-	result()()(0) = even_0 + odd_0;
-	result()()(1) = even_1 + odd_1;
-	result()()(2) = even_2 + odd_2;
-      }
-      out._odata[sF] = out._odata[sF] + result;
-    }
-  }
-}
-
-
-#define DHOP_SITE_HAND_INSTANTIATE(IMPL)				\
-  template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
-						     DoubledGaugeField &U,DoubledGaugeField &UUU, \
-						     SiteSpinor *buf, int LLs, int sU, \
-						     const FermionField &in, FermionField &out, int dag); \
-									\
-  template void StaggeredKernels<IMPL>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, \
-						     DoubledGaugeField &U,DoubledGaugeField &UUU, \
-						     SiteSpinor *buf, int LLs, int sU, \
-						     const FermionField &in, FermionField &out, int dag); \
-									\
-  template void StaggeredKernels<IMPL>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, \
-						     DoubledGaugeField &U,DoubledGaugeField &UUU, \
-						     SiteSpinor *buf, int LLs, int sU, \
-						     const FermionField &in, FermionField &out, int dag); \
-
-DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD);
-DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF);
-DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD);
-DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplF);
-
-
-}
-}
-
--- a/Grid/qcd/action/fermion/WilsonCloverFermion.cc
+++ b/Grid/qcd/action/fermion/WilsonCloverFermion.cc
@@ -1,243 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid
-
-    Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc
-
-    Copyright (C) 2017
-
-    Author: paboyle <paboyle@ph.ed.ac.uk>
-    Author: Guido Cossu <guido.cossu@ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-/*  END LEGAL */
-#include <Grid/Grid.h>
-//#include <Grid/Eigen/Dense>
-#include <Grid/qcd/spin/Dirac.h>
-
-namespace Grid
-{
-namespace QCD
-{
-
-// *NOT* EO
-template <class Impl>
-RealD WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
-{
-  FermionField temp(out._grid);
-
-  // Wilson term
-  out.checkerboard = in.checkerboard;
-  this->Dhop(in, out, DaggerNo);
-
-  // Clover term
-  Mooee(in, temp);
-
-  out += temp;
-  return norm2(out);
-}
-
-template <class Impl>
-RealD WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
-{
-  FermionField temp(out._grid);
-
-  // Wilson term
-  out.checkerboard = in.checkerboard;
-  this->Dhop(in, out, DaggerYes);
-
-  // Clover term
-  MooeeDag(in, temp);
-
-  out += temp;
-  return norm2(out);
-}
-
-template <class Impl>
-void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
-{
-  WilsonFermion<Impl>::ImportGauge(_Umu);
-  GridBase *grid = _Umu._grid;
-  typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid);
-
-  // Compute the field strength terms mu>nu
-  WilsonLoops<Impl>::FieldStrength(Bx, _Umu, Zdir, Ydir);
-  WilsonLoops<Impl>::FieldStrength(By, _Umu, Zdir, Xdir);
-  WilsonLoops<Impl>::FieldStrength(Bz, _Umu, Ydir, Xdir);
-  WilsonLoops<Impl>::FieldStrength(Ex, _Umu, Tdir, Xdir);
-  WilsonLoops<Impl>::FieldStrength(Ey, _Umu, Tdir, Ydir);
-  WilsonLoops<Impl>::FieldStrength(Ez, _Umu, Tdir, Zdir);
-
-  // Compute the Clover Operator acting on Colour and Spin
-  // multiply here by the clover coefficients for the anisotropy
-  CloverTerm  = fillCloverYZ(Bx) * csw_r;
-  CloverTerm += fillCloverXZ(By) * csw_r;
-  CloverTerm += fillCloverXY(Bz) * csw_r;
-  CloverTerm += fillCloverXT(Ex) * csw_t;
-  CloverTerm += fillCloverYT(Ey) * csw_t;
-  CloverTerm += fillCloverZT(Ez) * csw_t;
-  CloverTerm += diag_mass;
-
-  int lvol = _Umu._grid->lSites();
-  int DimRep = Impl::Dimension;
-
-  Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
-  Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
-
-  std::vector<int> lcoor;
-  typename SiteCloverType::scalar_object Qx = zero, Qxinv = zero;
-
-  for (int site = 0; site < lvol; site++)
-  {
-    grid->LocalIndexToLocalCoor(site, lcoor);
-    EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
-    peekLocalSite(Qx, CloverTerm, lcoor);
-    Qxinv = zero;
-    //if (csw!=0){
-    for (int j = 0; j < Ns; j++)
-      for (int k = 0; k < Ns; k++)
-        for (int a = 0; a < DimRep; a++)
-          for (int b = 0; b < DimRep; b++)
-            EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b);
-    //   if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
-
-    EigenInvCloverOp = EigenCloverOp.inverse();
-    //std::cout << EigenInvCloverOp << std::endl;
-    for (int j = 0; j < Ns; j++)
-      for (int k = 0; k < Ns; k++)
-        for (int a = 0; a < DimRep; a++)
-          for (int b = 0; b < DimRep; b++)
-            Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep);
-    //    if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl;
-    //  }
-    pokeLocalSite(Qxinv, CloverTermInv, lcoor);
-  }
-
-  // Separate the even and odd parts
-  pickCheckerboard(Even, CloverTermEven, CloverTerm);
-  pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
-
-  pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm));
-  pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm));
-
-  pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
-  pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
-
-  pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
-  pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
-}
-
-template <class Impl>
-void WilsonCloverFermion<Impl>::Mooee(const FermionField &in, FermionField &out)
-{
-  this->MooeeInternal(in, out, DaggerNo, InverseNo);
-}
-
-template <class Impl>
-void WilsonCloverFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out)
-{
-  this->MooeeInternal(in, out, DaggerYes, InverseNo);
-}
-
-template <class Impl>
-void WilsonCloverFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out)
-{
-  this->MooeeInternal(in, out, DaggerNo, InverseYes);
-}
-
-template <class Impl>
-void WilsonCloverFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
-{
-  this->MooeeInternal(in, out, DaggerYes, InverseYes);
-}
-
-template <class Impl>
-void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv)
-{
-  out.checkerboard = in.checkerboard;
-  CloverFieldType *Clover;
-  assert(in.checkerboard == Odd || in.checkerboard == Even);
-
-  if (dag)
-  {
-    if (in._grid->_isCheckerBoarded)
-    {
-      if (in.checkerboard == Odd)
-      {
-        Clover = (inv) ? &CloverTermInvDagOdd : &CloverTermDagOdd;
-      }
-      else
-      {
-        Clover = (inv) ? &CloverTermInvDagEven : &CloverTermDagEven;
-      }
-      out = *Clover * in;
-    }
-    else
-    {
-      Clover = (inv) ? &CloverTermInv : &CloverTerm;
-      out = adj(*Clover) * in;
-    }
-  }
-  else
-  {
-    if (in._grid->_isCheckerBoarded)
-    {
-
-      if (in.checkerboard == Odd)
-      {
-        //  std::cout << "Calling clover term Odd" << std::endl;
-        Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd;
-      }
-      else
-      {
-        //  std::cout << "Calling clover term Even" << std::endl;
-        Clover = (inv) ? &CloverTermInvEven : &CloverTermEven;
-      }
-      out = *Clover * in;
-      //  std::cout << GridLogMessage << "*Clover.checkerboard "  << (*Clover).checkerboard << std::endl;
-    }
-    else
-    {
-      Clover = (inv) ? &CloverTermInv : &CloverTerm;
-      out = *Clover * in;
-    }
-  }
-
-} // MooeeInternal
-
-
-// Derivative parts
-template <class Impl>
-void WilsonCloverFermion<Impl>::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag)
-{
-  assert(0);
-}
-
-// Derivative parts
-template <class Impl>
-void WilsonCloverFermion<Impl>::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
-{
-  assert(0); // not implemented yet
-}
-
-FermOpTemplateInstantiate(WilsonCloverFermion);
-AdjointFermOpTemplateInstantiate(WilsonCloverFermion);
-TwoIndexFermOpTemplateInstantiate(WilsonCloverFermion);
-//GparityFermOpTemplateInstantiate(WilsonCloverFermion);
-}
-}
--- a/Grid/qcd/action/fermion/WilsonCloverFermion.h
+++ b/Grid/qcd/action/fermion/WilsonCloverFermion.h
@@ -1,367 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid
-
-    Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.h
-
-    Copyright (C) 2017
-
-    Author: Guido Cossu <guido.cossu@ed.ac.uk>
-    Author: David Preti <>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-/*  END LEGAL */
-
-#ifndef GRID_QCD_WILSON_CLOVER_FERMION_H
-#define GRID_QCD_WILSON_CLOVER_FERMION_H
-
-#include <Grid/Grid.h>
-
-namespace Grid
-{
-namespace QCD
-{
-
-///////////////////////////////////////////////////////////////////
-// Wilson Clover
-//
-// Operator ( with anisotropy coefficients):
-//
-// Q =   1 + (Nd-1)/xi_0 + m
-//     + W_t + (nu/xi_0) * W_s
-//     - 1/2*[ csw_t * sum_s (sigma_ts F_ts) + (csw_s/xi_0) * sum_ss (sigma_ss F_ss)  ]
-//
-// s spatial, t temporal directions.
-// where W_t and W_s are the temporal and spatial components of the
-// Wilson Dirac operator
-//
-// csw_r = csw_t to recover the isotropic version
-//////////////////////////////////////////////////////////////////
-
-template <class Impl>
-class WilsonCloverFermion : public WilsonFermion<Impl>
-{
-public:
-  // Types definitions
-  INHERIT_IMPL_TYPES(Impl);
-  template <typename vtype>
-  using iImplClover = iScalar<iMatrix<iMatrix<vtype, Impl::Dimension>, Ns>>;
-  typedef iImplClover<Simd> SiteCloverType;
-  typedef Lattice<SiteCloverType> CloverFieldType;
-
-public:
-  typedef WilsonFermion<Impl> WilsonBase;
-
-  virtual int    ConstEE(void)     { return 0; };
-  virtual void Instantiatable(void){};
-  // Constructors
-  WilsonCloverFermion(GaugeField &_Umu, GridCartesian &Fgrid,
-                      GridRedBlackCartesian &Hgrid,
-                      const RealD _mass,
-                      const RealD _csw_r = 0.0,
-                      const RealD _csw_t = 0.0,
-                      const WilsonAnisotropyCoefficients &clover_anisotropy = WilsonAnisotropyCoefficients(),
-                      const ImplParams &impl_p = ImplParams()) : WilsonFermion<Impl>(_Umu,
-                                                                                     Fgrid,
-                                                                                     Hgrid,
-                                                                                     _mass, impl_p, clover_anisotropy),
-                                                                 CloverTerm(&Fgrid),
-                                                                 CloverTermInv(&Fgrid),
-                                                                 CloverTermEven(&Hgrid),
-                                                                 CloverTermOdd(&Hgrid),
-                                                                 CloverTermInvEven(&Hgrid),
-                                                                 CloverTermInvOdd(&Hgrid),
-                                                                 CloverTermDagEven(&Hgrid),
-                                                                 CloverTermDagOdd(&Hgrid),
-                                                                 CloverTermInvDagEven(&Hgrid),
-                                                                 CloverTermInvDagOdd(&Hgrid)
-  {
-    assert(Nd == 4); // require 4 dimensions
-
-    if (clover_anisotropy.isAnisotropic)
-    {
-      csw_r = _csw_r * 0.5 / clover_anisotropy.xi_0;
-      diag_mass = _mass + 1.0 + (Nd - 1) * (clover_anisotropy.nu / clover_anisotropy.xi_0);
-    }
-    else
-    {
-      csw_r = _csw_r * 0.5;
-      diag_mass = 4.0 + _mass;
-    }
-    csw_t = _csw_t * 0.5;
-
-    if (csw_r == 0)
-      std::cout << GridLogWarning << "Initializing WilsonCloverFermion with csw_r = 0" << std::endl;
-    if (csw_t == 0)
-      std::cout << GridLogWarning << "Initializing WilsonCloverFermion with csw_t = 0" << std::endl;
-
-    ImportGauge(_Umu);
-  }
-
-  virtual RealD M(const FermionField &in, FermionField &out);
-  virtual RealD Mdag(const FermionField &in, FermionField &out);
-
-  virtual void Mooee(const FermionField &in, FermionField &out);
-  virtual void MooeeDag(const FermionField &in, FermionField &out);
-  virtual void MooeeInv(const FermionField &in, FermionField &out);
-  virtual void MooeeInvDag(const FermionField &in, FermionField &out);
-  virtual void MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv);
-
-  //virtual void MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
-  virtual void MooDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
-  virtual void MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
-
-  void ImportGauge(const GaugeField &_Umu);
-
-  // Derivative parts unpreconditioned pseudofermions
-  void MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag)
-  {
-    conformable(X._grid, Y._grid);
-    conformable(X._grid, force._grid);
-    GaugeLinkField force_mu(force._grid), lambda(force._grid);
-    GaugeField clover_force(force._grid);
-    PropagatorField Lambda(force._grid);
-
-    // Guido: Here we are hitting some performance issues:
-    // need to extract the components of the DoubledGaugeField
-    // for each call
-    // Possible solution
-    // Create a vector object to store them? (cons: wasting space)
-    std::vector<GaugeLinkField> U(Nd, this->Umu._grid);
-
-    Impl::extractLinkField(U, this->Umu);
-
-    force = zero;
-    // Derivative of the Wilson hopping term
-    this->DhopDeriv(force, X, Y, dag);
-
-    ///////////////////////////////////////////////////////////
-    // Clover term derivative
-    ///////////////////////////////////////////////////////////
-    Impl::outerProductImpl(Lambda, X, Y);
-    //std::cout << "Lambda:" << Lambda << std::endl;
-
-    Gamma::Algebra sigma[] = {
-        Gamma::Algebra::SigmaXY,
-        Gamma::Algebra::SigmaXZ,
-        Gamma::Algebra::SigmaXT,
-        Gamma::Algebra::MinusSigmaXY,
-        Gamma::Algebra::SigmaYZ,
-        Gamma::Algebra::SigmaYT,
-        Gamma::Algebra::MinusSigmaXZ,
-        Gamma::Algebra::MinusSigmaYZ,
-        Gamma::Algebra::SigmaZT,
-        Gamma::Algebra::MinusSigmaXT,
-        Gamma::Algebra::MinusSigmaYT,
-        Gamma::Algebra::MinusSigmaZT};
-
-    /*
-      sigma_{\mu \nu}=
-      | 0         sigma[0]  sigma[1]  sigma[2] |
-      | sigma[3]    0       sigma[4]  sigma[5] |
-      | sigma[6]  sigma[7]     0      sigma[8] |
-      | sigma[9]  sigma[10] sigma[11]   0      |
-    */
-
-    int count = 0;
-    clover_force = zero;
-    for (int mu = 0; mu < 4; mu++)
-    {
-      force_mu = zero;
-      for (int nu = 0; nu < 4; nu++)
-      {
-        if (mu == nu)
-        continue;
-        
-        RealD factor;
-        if (nu == 4 || mu == 4)
-        {
-          factor = 2.0 * csw_t;
-        }
-        else
-        {
-          factor = 2.0 * csw_r;
-        }
-        PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
-        Impl::TraceSpinImpl(lambda, Slambda);                   // traceSpin ok
-        force_mu -= factor*Cmunu(U, lambda, mu, nu);                   // checked
-        count++;
-      }
-
-      pokeLorentz(clover_force, U[mu] * force_mu, mu);
-    }
-    //clover_force *= csw;
-    force += clover_force;
-  }
-
-  // Computing C_{\mu \nu}(x) as in Eq.(B.39) in Zbigniew Sroczynski's PhD thesis
-  GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu)
-  {
-    conformable(lambda._grid, U[0]._grid);
-    GaugeLinkField out(lambda._grid), tmp(lambda._grid);
-    // insertion in upper staple
-    // please check redundancy of shift operations
-
-    // C1+
-    tmp = lambda * U[nu];
-    out = Impl::ShiftStaple(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu);
-
-    // C2+
-    tmp = U[mu] * Impl::ShiftStaple(adj(lambda), mu);
-    out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu);
-
-    // C3+
-    tmp = U[nu] * Impl::ShiftStaple(adj(lambda), nu);
-    out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(tmp, nu))), mu);
-
-    // C4+
-    out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu) * lambda;
-
-    // insertion in lower staple
-    // C1-
-    out -= Impl::ShiftStaple(lambda, mu) * Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu);
-
-    // C2-
-    tmp = adj(lambda) * U[nu];
-    out -= Impl::ShiftStaple(Impl::CovShiftBackward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu);
-
-    // C3-
-    tmp = lambda * U[nu];
-    out -= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, tmp)), mu);
-
-    // C4-
-    out -= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu) * lambda;
-
-    return out;
-  }
-
-private:
-  // here fixing the 4 dimensions, make it more general?
-
-  RealD csw_r;                                               // Clover coefficient - spatial
-  RealD csw_t;                                               // Clover coefficient - temporal
-  RealD diag_mass;                                           // Mass term
-  CloverFieldType CloverTerm, CloverTermInv;                 // Clover term
-  CloverFieldType CloverTermEven, CloverTermOdd;             // Clover term EO
-  CloverFieldType CloverTermInvEven, CloverTermInvOdd;       // Clover term Inv EO
-  CloverFieldType CloverTermDagEven, CloverTermDagOdd;       // Clover term Dag EO
-  CloverFieldType CloverTermInvDagEven, CloverTermInvDagOdd; // Clover term Inv Dag EO
-
-  // eventually these can be compressed into 6x6 blocks instead of the 12x12
-  // using the DeGrand-Rossi basis for the gamma matrices
-  CloverFieldType fillCloverYZ(const GaugeLinkField &F)
-  {
-    CloverFieldType T(F._grid);
-    T = zero;
-    PARALLEL_FOR_LOOP
-    for (int i = 0; i < CloverTerm._grid->oSites(); i++)
-    {
-      T._odata[i]()(0, 1) = timesMinusI(F._odata[i]()());
-      T._odata[i]()(1, 0) = timesMinusI(F._odata[i]()());
-      T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()());
-      T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()());
-    }
-
-    return T;
-  }
-
-  CloverFieldType fillCloverXZ(const GaugeLinkField &F)
-  {
-    CloverFieldType T(F._grid);
-    T = zero;
-    PARALLEL_FOR_LOOP
-    for (int i = 0; i < CloverTerm._grid->oSites(); i++)
-    {
-      T._odata[i]()(0, 1) = -F._odata[i]()();
-      T._odata[i]()(1, 0) = F._odata[i]()();
-      T._odata[i]()(2, 3) = -F._odata[i]()();
-      T._odata[i]()(3, 2) = F._odata[i]()();
-    }
-
-    return T;
-  }
-
-  CloverFieldType fillCloverXY(const GaugeLinkField &F)
-  {
-    CloverFieldType T(F._grid);
-    T = zero;
-    PARALLEL_FOR_LOOP
-    for (int i = 0; i < CloverTerm._grid->oSites(); i++)
-    {
-
-      T._odata[i]()(0, 0) = timesMinusI(F._odata[i]()());
-      T._odata[i]()(1, 1) = timesI(F._odata[i]()());
-      T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()());
-      T._odata[i]()(3, 3) = timesI(F._odata[i]()());
-    }
-
-    return T;
-  }
-
-  CloverFieldType fillCloverXT(const GaugeLinkField &F)
-  {
-    CloverFieldType T(F._grid);
-    T = zero;
-    PARALLEL_FOR_LOOP
-    for (int i = 0; i < CloverTerm._grid->oSites(); i++)
-    {
-      T._odata[i]()(0, 1) = timesI(F._odata[i]()());
-      T._odata[i]()(1, 0) = timesI(F._odata[i]()());
-      T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()());
-      T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()());
-    }
-
-    return T;
-  }
-
-  CloverFieldType fillCloverYT(const GaugeLinkField &F)
-  {
-    CloverFieldType T(F._grid);
-    T = zero;
-    PARALLEL_FOR_LOOP
-    for (int i = 0; i < CloverTerm._grid->oSites(); i++)
-    {
-      T._odata[i]()(0, 1) = -(F._odata[i]()());
-      T._odata[i]()(1, 0) = (F._odata[i]()());
-      T._odata[i]()(2, 3) = (F._odata[i]()());
-      T._odata[i]()(3, 2) = -(F._odata[i]()());
-    }
-
-    return T;
-  }
-
-  CloverFieldType fillCloverZT(const GaugeLinkField &F)
-  {
-    CloverFieldType T(F._grid);
-    T = zero;
-    PARALLEL_FOR_LOOP
-    for (int i = 0; i < CloverTerm._grid->oSites(); i++)
-    {
-      T._odata[i]()(0, 0) = timesI(F._odata[i]()());
-      T._odata[i]()(1, 1) = timesMinusI(F._odata[i]()());
-      T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()());
-      T._odata[i]()(3, 3) = timesI(F._odata[i]()());
-    }
-
-    return T;
-  }
-};
-}
-}
-
-#endif // GRID_QCD_WILSON_CLOVER_FERMION_H
--- a/Grid/qcd/action/fermion/WilsonKernelsHand.cc
+++ b/Grid/qcd/action/fermion/WilsonKernelsHand.cc
@@ -1,631 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/qcd/action/fermion/WilsonKernelsHand.cc
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: paboyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/qcd/action/fermion/FermionCore.h>
-
-#define REGISTER
-
-#define LOAD_CHIMU \
-  {const SiteSpinor & ref (in._odata[offset]);	\
-    Chimu_00=ref()(0)(0);\
-    Chimu_01=ref()(0)(1);\
-    Chimu_02=ref()(0)(2);\
-    Chimu_10=ref()(1)(0);\
-    Chimu_11=ref()(1)(1);\
-    Chimu_12=ref()(1)(2);\
-    Chimu_20=ref()(2)(0);\
-    Chimu_21=ref()(2)(1);\
-    Chimu_22=ref()(2)(2);\
-    Chimu_30=ref()(3)(0);\
-    Chimu_31=ref()(3)(1);\
-    Chimu_32=ref()(3)(2);}
-
-#define LOAD_CHI\
-  {const SiteHalfSpinor &ref(buf[offset]);	\
-    Chi_00 = ref()(0)(0);\
-    Chi_01 = ref()(0)(1);\
-    Chi_02 = ref()(0)(2);\
-    Chi_10 = ref()(1)(0);\
-    Chi_11 = ref()(1)(1);\
-    Chi_12 = ref()(1)(2);}
-
-// To splat or not to splat depends on the implementation
-#define MULT_2SPIN(A)\
-  {auto & ref(U._odata[sU](A));			\
-   Impl::loadLinkElement(U_00,ref()(0,0));	\
-   Impl::loadLinkElement(U_10,ref()(1,0));	\
-   Impl::loadLinkElement(U_20,ref()(2,0));	\
-   Impl::loadLinkElement(U_01,ref()(0,1));	\
-   Impl::loadLinkElement(U_11,ref()(1,1));	\
-   Impl::loadLinkElement(U_21,ref()(2,1));	\
-    UChi_00 = U_00*Chi_00;\
-    UChi_10 = U_00*Chi_10;\
-    UChi_01 = U_10*Chi_00;\
-    UChi_11 = U_10*Chi_10;\
-    UChi_02 = U_20*Chi_00;\
-    UChi_12 = U_20*Chi_10;\
-    UChi_00+= U_01*Chi_01;\
-    UChi_10+= U_01*Chi_11;\
-    UChi_01+= U_11*Chi_01;\
-    UChi_11+= U_11*Chi_11;\
-    UChi_02+= U_21*Chi_01;\
-    UChi_12+= U_21*Chi_11;\
-    Impl::loadLinkElement(U_00,ref()(0,2));	\
-    Impl::loadLinkElement(U_10,ref()(1,2));	\
-    Impl::loadLinkElement(U_20,ref()(2,2));	\
-    UChi_00+= U_00*Chi_02;\
-    UChi_10+= U_00*Chi_12;\
-    UChi_01+= U_10*Chi_02;\
-    UChi_11+= U_10*Chi_12;\
-    UChi_02+= U_20*Chi_02;\
-    UChi_12+= U_20*Chi_12;}
-
-
-#define PERMUTE_DIR(dir)			\
-      permute##dir(Chi_00,Chi_00);\
-      permute##dir(Chi_01,Chi_01);\
-      permute##dir(Chi_02,Chi_02);\
-      permute##dir(Chi_10,Chi_10);\
-      permute##dir(Chi_11,Chi_11);\
-      permute##dir(Chi_12,Chi_12);
-
-//      hspin(0)=fspin(0)+timesI(fspin(3));
-//      hspin(1)=fspin(1)+timesI(fspin(2));
-#define XP_PROJ \
-    Chi_00 = Chimu_00+timesI(Chimu_30);\
-    Chi_01 = Chimu_01+timesI(Chimu_31);\
-    Chi_02 = Chimu_02+timesI(Chimu_32);\
-    Chi_10 = Chimu_10+timesI(Chimu_20);\
-    Chi_11 = Chimu_11+timesI(Chimu_21);\
-    Chi_12 = Chimu_12+timesI(Chimu_22);
-
-#define YP_PROJ \
-    Chi_00 = Chimu_00-Chimu_30;\
-    Chi_01 = Chimu_01-Chimu_31;\
-    Chi_02 = Chimu_02-Chimu_32;\
-    Chi_10 = Chimu_10+Chimu_20;\
-    Chi_11 = Chimu_11+Chimu_21;\
-    Chi_12 = Chimu_12+Chimu_22;
-
-#define ZP_PROJ \
-  Chi_00 = Chimu_00+timesI(Chimu_20);		\
-  Chi_01 = Chimu_01+timesI(Chimu_21);		\
-  Chi_02 = Chimu_02+timesI(Chimu_22);		\
-  Chi_10 = Chimu_10-timesI(Chimu_30);		\
-  Chi_11 = Chimu_11-timesI(Chimu_31);		\
-  Chi_12 = Chimu_12-timesI(Chimu_32);
-
-#define TP_PROJ \
-  Chi_00 = Chimu_00+Chimu_20;		\
-  Chi_01 = Chimu_01+Chimu_21;		\
-  Chi_02 = Chimu_02+Chimu_22;		\
-  Chi_10 = Chimu_10+Chimu_30;		\
-  Chi_11 = Chimu_11+Chimu_31;		\
-  Chi_12 = Chimu_12+Chimu_32;
-
-
-//      hspin(0)=fspin(0)-timesI(fspin(3));
-//      hspin(1)=fspin(1)-timesI(fspin(2));
-#define XM_PROJ \
-    Chi_00 = Chimu_00-timesI(Chimu_30);\
-    Chi_01 = Chimu_01-timesI(Chimu_31);\
-    Chi_02 = Chimu_02-timesI(Chimu_32);\
-    Chi_10 = Chimu_10-timesI(Chimu_20);\
-    Chi_11 = Chimu_11-timesI(Chimu_21);\
-    Chi_12 = Chimu_12-timesI(Chimu_22);
-
-#define YM_PROJ \
-    Chi_00 = Chimu_00+Chimu_30;\
-    Chi_01 = Chimu_01+Chimu_31;\
-    Chi_02 = Chimu_02+Chimu_32;\
-    Chi_10 = Chimu_10-Chimu_20;\
-    Chi_11 = Chimu_11-Chimu_21;\
-    Chi_12 = Chimu_12-Chimu_22;
-
-#define ZM_PROJ \
-  Chi_00 = Chimu_00-timesI(Chimu_20);		\
-  Chi_01 = Chimu_01-timesI(Chimu_21);		\
-  Chi_02 = Chimu_02-timesI(Chimu_22);		\
-  Chi_10 = Chimu_10+timesI(Chimu_30);		\
-  Chi_11 = Chimu_11+timesI(Chimu_31);		\
-  Chi_12 = Chimu_12+timesI(Chimu_32);
-
-#define TM_PROJ \
-  Chi_00 = Chimu_00-Chimu_20;		\
-  Chi_01 = Chimu_01-Chimu_21;		\
-  Chi_02 = Chimu_02-Chimu_22;		\
-  Chi_10 = Chimu_10-Chimu_30;		\
-  Chi_11 = Chimu_11-Chimu_31;		\
-  Chi_12 = Chimu_12-Chimu_32;
-
-//      fspin(0)=hspin(0);
-//      fspin(1)=hspin(1);
-//      fspin(2)=timesMinusI(hspin(1));
-//      fspin(3)=timesMinusI(hspin(0));
-#define XP_RECON\
-  result_00 = UChi_00;\
-  result_01 = UChi_01;\
-  result_02 = UChi_02;\
-  result_10 = UChi_10;\
-  result_11 = UChi_11;\
-  result_12 = UChi_12;\
-  result_20 = timesMinusI(UChi_10);\
-  result_21 = timesMinusI(UChi_11);\
-  result_22 = timesMinusI(UChi_12);\
-  result_30 = timesMinusI(UChi_00);\
-  result_31 = timesMinusI(UChi_01);\
-  result_32 = timesMinusI(UChi_02);
-
-#define XP_RECON_ACCUM\
-  result_00+=UChi_00;\
-  result_01+=UChi_01;\
-  result_02+=UChi_02;\
-  result_10+=UChi_10;\
-  result_11+=UChi_11;\
-  result_12+=UChi_12;\
-  result_20-=timesI(UChi_10);\
-  result_21-=timesI(UChi_11);\
-  result_22-=timesI(UChi_12);\
-  result_30-=timesI(UChi_00);\
-  result_31-=timesI(UChi_01);\
-  result_32-=timesI(UChi_02);
-
-#define XM_RECON\
-  result_00 = UChi_00;\
-  result_01 = UChi_01;\
-  result_02 = UChi_02;\
-  result_10 = UChi_10;\
-  result_11 = UChi_11;\
-  result_12 = UChi_12;\
-  result_20 = timesI(UChi_10);\
-  result_21 = timesI(UChi_11);\
-  result_22 = timesI(UChi_12);\
-  result_30 = timesI(UChi_00);\
-  result_31 = timesI(UChi_01);\
-  result_32 = timesI(UChi_02);
-
-#define XM_RECON_ACCUM\
-  result_00+= UChi_00;\
-  result_01+= UChi_01;\
-  result_02+= UChi_02;\
-  result_10+= UChi_10;\
-  result_11+= UChi_11;\
-  result_12+= UChi_12;\
-  result_20+= timesI(UChi_10);\
-  result_21+= timesI(UChi_11);\
-  result_22+= timesI(UChi_12);\
-  result_30+= timesI(UChi_00);\
-  result_31+= timesI(UChi_01);\
-  result_32+= timesI(UChi_02);
-
-#define YP_RECON_ACCUM\
-  result_00+= UChi_00;\
-  result_01+= UChi_01;\
-  result_02+= UChi_02;\
-  result_10+= UChi_10;\
-  result_11+= UChi_11;\
-  result_12+= UChi_12;\
-  result_20+= UChi_10;\
-  result_21+= UChi_11;\
-  result_22+= UChi_12;\
-  result_30-= UChi_00;\
-  result_31-= UChi_01;\
-  result_32-= UChi_02;
-
-#define YM_RECON_ACCUM\
-  result_00+= UChi_00;\
-  result_01+= UChi_01;\
-  result_02+= UChi_02;\
-  result_10+= UChi_10;\
-  result_11+= UChi_11;\
-  result_12+= UChi_12;\
-  result_20-= UChi_10;\
-  result_21-= UChi_11;\
-  result_22-= UChi_12;\
-  result_30+= UChi_00;\
-  result_31+= UChi_01;\
-  result_32+= UChi_02;
-
-#define ZP_RECON_ACCUM\
-  result_00+= UChi_00;\
-  result_01+= UChi_01;\
-  result_02+= UChi_02;\
-  result_10+= UChi_10;\
-  result_11+= UChi_11;\
-  result_12+= UChi_12;\
-  result_20-= timesI(UChi_00);			\
-  result_21-= timesI(UChi_01);			\
-  result_22-= timesI(UChi_02);			\
-  result_30+= timesI(UChi_10);			\
-  result_31+= timesI(UChi_11);			\
-  result_32+= timesI(UChi_12);
-
-#define ZM_RECON_ACCUM\
-  result_00+= UChi_00;\
-  result_01+= UChi_01;\
-  result_02+= UChi_02;\
-  result_10+= UChi_10;\
-  result_11+= UChi_11;\
-  result_12+= UChi_12;\
-  result_20+= timesI(UChi_00);			\
-  result_21+= timesI(UChi_01);			\
-  result_22+= timesI(UChi_02);			\
-  result_30-= timesI(UChi_10);			\
-  result_31-= timesI(UChi_11);			\
-  result_32-= timesI(UChi_12);
-
-#define TP_RECON_ACCUM\
-  result_00+= UChi_00;\
-  result_01+= UChi_01;\
-  result_02+= UChi_02;\
-  result_10+= UChi_10;\
-  result_11+= UChi_11;\
-  result_12+= UChi_12;\
-  result_20+= UChi_00;			\
-  result_21+= UChi_01;			\
-  result_22+= UChi_02;			\
-  result_30+= UChi_10;			\
-  result_31+= UChi_11;			\
-  result_32+= UChi_12;
-
-#define TM_RECON_ACCUM\
-  result_00+= UChi_00;\
-  result_01+= UChi_01;\
-  result_02+= UChi_02;\
-  result_10+= UChi_10;\
-  result_11+= UChi_11;\
-  result_12+= UChi_12;\
-  result_20-= UChi_00;	\
-  result_21-= UChi_01;	\
-  result_22-= UChi_02;	\
-  result_30-= UChi_10;	\
-  result_31-= UChi_11;	\
-  result_32-= UChi_12;
-
-#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON)	\
-  SE=st.GetEntry(ptype,DIR,ss);			\
-  offset = SE->_offset;				\
-  local  = SE->_is_local;			\
-  perm   = SE->_permute;			\
-  if ( local ) {				\
-    LOAD_CHIMU;					\
-    PROJ;					\
-    if ( perm) {				\
-      PERMUTE_DIR(PERM);			\
-    }						\
-  } else {					\
-    LOAD_CHI;					\
-  }						\
-  MULT_2SPIN(DIR);				\
-  RECON;					
-
-#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON)	\
-  SE=st.GetEntry(ptype,DIR,ss);			\
-  offset = SE->_offset;				\
-  local  = SE->_is_local;			\
-  perm   = SE->_permute;			\
-  if ( local ) {				\
-    LOAD_CHIMU;					\
-    PROJ;					\
-    if ( perm) {				\
-      PERMUTE_DIR(PERM);			\
-    }						\
-  } else if ( st.same_node[DIR] ) {		\
-    LOAD_CHI;					\
-  }						\
-  if (local || st.same_node[DIR] ) {		\
-    MULT_2SPIN(DIR);				\
-    RECON;					\
-  }
-
-#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON)	\
-  SE=st.GetEntry(ptype,DIR,ss);			\
-  offset = SE->_offset;				\
-  if((!SE->_is_local)&&(!st.same_node[DIR]) ) {	\
-    LOAD_CHI;					\
-    MULT_2SPIN(DIR);				\
-    RECON;					\
-    nmu++;					\
-  }
-
-#define HAND_RESULT(ss)				\
-  {						\
-    SiteSpinor & ref (out._odata[ss]);		\
-    vstream(ref()(0)(0),result_00);		\
-    vstream(ref()(0)(1),result_01);		\
-    vstream(ref()(0)(2),result_02);		\
-    vstream(ref()(1)(0),result_10);		\
-    vstream(ref()(1)(1),result_11);		\
-    vstream(ref()(1)(2),result_12);		\
-    vstream(ref()(2)(0),result_20);		\
-    vstream(ref()(2)(1),result_21);		\
-    vstream(ref()(2)(2),result_22);		\
-    vstream(ref()(3)(0),result_30);		\
-    vstream(ref()(3)(1),result_31);		\
-    vstream(ref()(3)(2),result_32);		\
-  }
-
-#define HAND_RESULT_EXT(ss)			\
-  if (nmu){					\
-    SiteSpinor & ref (out._odata[ss]);		\
-    ref()(0)(0)+=result_00;		\
-    ref()(0)(1)+=result_01;		\
-    ref()(0)(2)+=result_02;		\
-    ref()(1)(0)+=result_10;		\
-    ref()(1)(1)+=result_11;		\
-    ref()(1)(2)+=result_12;		\
-    ref()(2)(0)+=result_20;		\
-    ref()(2)(1)+=result_21;		\
-    ref()(2)(2)+=result_22;		\
-    ref()(3)(0)+=result_30;		\
-    ref()(3)(1)+=result_31;		\
-    ref()(3)(2)+=result_32;		\
-  }
-
-
-#define HAND_DECLARATIONS(a)			\
-  Simd result_00;				\
-  Simd result_01;				\
-  Simd result_02;				\
-  Simd result_10;				\
-  Simd result_11;				\
-  Simd result_12;				\
-  Simd result_20;				\
-  Simd result_21;				\
-  Simd result_22;				\
-  Simd result_30;				\
-  Simd result_31;				\
-  Simd result_32;				\
-  Simd Chi_00;					\
-  Simd Chi_01;					\
-  Simd Chi_02;					\
-  Simd Chi_10;					\
-  Simd Chi_11;					\
-  Simd Chi_12;					\
-  Simd UChi_00;					\
-  Simd UChi_01;					\
-  Simd UChi_02;					\
-  Simd UChi_10;					\
-  Simd UChi_11;					\
-  Simd UChi_12;					\
-  Simd U_00;					\
-  Simd U_10;					\
-  Simd U_20;					\
-  Simd U_01;					\
-  Simd U_11;					\
-  Simd U_21;
-
-#define ZERO_RESULT				\
-  result_00=zero;				\
-  result_01=zero;				\
-  result_02=zero;				\
-  result_10=zero;				\
-  result_11=zero;				\
-  result_12=zero;				\
-  result_20=zero;				\
-  result_21=zero;				\
-  result_22=zero;				\
-  result_30=zero;				\
-  result_31=zero;				\
-  result_32=zero;			
-
-#define Chimu_00 Chi_00
-#define Chimu_01 Chi_01
-#define Chimu_02 Chi_02
-#define Chimu_10 Chi_10
-#define Chimu_11 Chi_11
-#define Chimu_12 Chi_12
-#define Chimu_20 UChi_00
-#define Chimu_21 UChi_01
-#define Chimu_22 UChi_02
-#define Chimu_30 UChi_10
-#define Chimu_31 UChi_11
-#define Chimu_32 UChi_12
-
-namespace Grid {
-namespace QCD {
-
-template<class Impl> void 
-WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor  *buf,
-					  int ss,int sU,const FermionField &in, FermionField &out)
-{
-// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
-  typedef typename Simd::scalar_type S;
-  typedef typename Simd::vector_type V;
-
-  HAND_DECLARATIONS(ignore);
-
-  int offset,local,perm, ptype;
-  StencilEntry *SE;
-
-  HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
-  HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
-  HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
-  HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM);
-  HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM);
-  HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM);
-  HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
-  HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
-  HAND_RESULT(ss);
-}
-
-template<class Impl>
-void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
-						  int ss,int sU,const FermionField &in, FermionField &out)
-{
-  typedef typename Simd::scalar_type S;
-  typedef typename Simd::vector_type V;
-
-  HAND_DECLARATIONS(ignore);
-
-  StencilEntry *SE;
-  int offset,local,perm, ptype;
-  
-  HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON);
-  HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM);
-  HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
-  HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM);
-  HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM);
-  HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM);
-  HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
-  HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
-  HAND_RESULT(ss);
-}
-
-template<class Impl> void 
-WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor  *buf,
-					  int ss,int sU,const FermionField &in, FermionField &out)
-{
-// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
-  typedef typename Simd::scalar_type S;
-  typedef typename Simd::vector_type V;
-
-  HAND_DECLARATIONS(ignore);
-
-  int offset,local,perm, ptype;
-  StencilEntry *SE;
-  ZERO_RESULT;
-  HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
-  HAND_RESULT(ss);
-}
-
-template<class Impl>
-void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
-						  int ss,int sU,const FermionField &in, FermionField &out)
-{
-  typedef typename Simd::scalar_type S;
-  typedef typename Simd::vector_type V;
-
-  HAND_DECLARATIONS(ignore);
-
-  StencilEntry *SE;
-  int offset,local,perm, ptype;
-  ZERO_RESULT;
-  HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
-  HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
-  HAND_RESULT(ss);
-}
-
-template<class Impl> void 
-WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor  *buf,
-					  int ss,int sU,const FermionField &in, FermionField &out)
-{
-// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
-  typedef typename Simd::scalar_type S;
-  typedef typename Simd::vector_type V;
-
-  HAND_DECLARATIONS(ignore);
-
-  int offset,local,perm, ptype;
-  StencilEntry *SE;
-  int nmu=0;
-  ZERO_RESULT;
-  HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
-  HAND_RESULT_EXT(ss);
-}
-
-template<class Impl>
-void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
-						  int ss,int sU,const FermionField &in, FermionField &out)
-{
-  typedef typename Simd::scalar_type S;
-  typedef typename Simd::vector_type V;
-
-  HAND_DECLARATIONS(ignore);
-
-  StencilEntry *SE;
-  int offset,local,perm, ptype;
-  int nmu=0;
-  ZERO_RESULT;
-  HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
-  HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
-  HAND_RESULT_EXT(ss);
-}
-
-////////////// Wilson ; uses this implementation /////////////////////
-
-#define INSTANTIATE_THEM(A) \
-template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
-					     int ss,int sU,const FermionField &in, FermionField &out); \
-template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
-						int ss,int sU,const FermionField &in, FermionField &out);\
-template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
-						int ss,int sU,const FermionField &in, FermionField &out); \
-template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
-						   int ss,int sU,const FermionField &in, FermionField &out); \
-template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
-						int ss,int sU,const FermionField &in, FermionField &out); \
-template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
-						   int ss,int sU,const FermionField &in, FermionField &out); 
-
-INSTANTIATE_THEM(WilsonImplF);
-INSTANTIATE_THEM(WilsonImplD);
-INSTANTIATE_THEM(ZWilsonImplF);
-INSTANTIATE_THEM(ZWilsonImplD);
-INSTANTIATE_THEM(DomainWallVec5dImplF);
-INSTANTIATE_THEM(DomainWallVec5dImplD);
-INSTANTIATE_THEM(ZDomainWallVec5dImplF);
-INSTANTIATE_THEM(ZDomainWallVec5dImplD);
-INSTANTIATE_THEM(WilsonImplFH);
-INSTANTIATE_THEM(WilsonImplDF);
-INSTANTIATE_THEM(ZWilsonImplFH);
-INSTANTIATE_THEM(ZWilsonImplDF);
-INSTANTIATE_THEM(DomainWallVec5dImplFH);
-INSTANTIATE_THEM(DomainWallVec5dImplDF);
-INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
-INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
-INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplF);
-INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplD);
-
-}}
--- a/Grid/qcd/action/fermion/WilsonTMFermion5D.h
+++ b/Grid/qcd/action/fermion/WilsonTMFermion5D.h
@@ -1,155 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/qcd/action/fermion/WilsonTMFermion5D.h
-
-    Copyright (C) 2015
-
-Author: paboyle <paboyle@ph.ed.ac.uk> ; NB Christoph did similar in GPT
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#pragma once 
-
-#include <Grid/qcd/action/fermion/FermionCore.h>
-#include <Grid/qcd/action/fermion/WilsonFermion.h>
-
-
-namespace Grid {
-
-  namespace QCD {
-    
-    template<class Impl>
-      class WilsonTMFermion5D : public WilsonFermion5D<Impl>
-      {
-      public:
-	INHERIT_IMPL_TYPES(Impl);
-      public:
-
-	virtual void   Instantiatable(void) {};
-
-	// Constructors
-        WilsonTMFermion5D(GaugeField &_Umu,
-			  GridCartesian         &Fgrid,
-			  GridRedBlackCartesian &Frbgrid, 
-			  GridCartesian         &Ugrid,
-			  GridRedBlackCartesian &Urbgrid, 
-			  const std::vector<RealD> _mass,
-			  const std::vector<RealD> _mu,
-			  const ImplParams &p= ImplParams()
-			  ) :
-	WilsonFermion5D<Impl>(_Umu,
-			      Fgrid,
-			      Frbgrid,
-			      Ugrid,
-			      Urbgrid,
-			      4.0,p)
-	
-	  {
-	    update(_mass,_mu);
-	  }
-
-	virtual void Meooe(const FermionField &in, FermionField &out) {
-	  if (in.checkerboard == Odd) {
-	    this->DhopEO(in, out, DaggerNo);
-	  } else {
-	    this->DhopOE(in, out, DaggerNo);
-	  }
-	}
-
-	virtual void MeooeDag(const FermionField &in, FermionField &out) {
-	  if (in.checkerboard == Odd) {
-	    this->DhopEO(in, out, DaggerYes);
-	  } else {
-	    this->DhopOE(in, out, DaggerYes);
-	  }
-	}	
-	
-	// allow override for twisted mass and clover
-	virtual void Mooee(const FermionField &in, FermionField &out) {
-	  out.checkerboard = in.checkerboard;
-	  //axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
-	  for (int s=0;s<(int)this->mass.size();s++) {
-	    ComplexD a = 4.0+this->mass[s];
-	    ComplexD b(0.0,this->mu[s]);
-	    axpbg5y_ssp(out,a,in,b,in,s,s);
-	  }
-	}
-
-	virtual void MooeeDag(const FermionField &in, FermionField &out) {
-	  out.checkerboard = in.checkerboard;
-	  for (int s=0;s<(int)this->mass.size();s++) {
-	    ComplexD a = 4.0+this->mass[s];
-	    ComplexD b(0.0,-this->mu[s]);
-	    axpbg5y_ssp(out,a,in,b,in,s,s);
-	  }
-	}
-	virtual void MooeeInv(const FermionField &in, FermionField &out) {
-	  for (int s=0;s<(int)this->mass.size();s++) {
-	    RealD m    = this->mass[s];
-	    RealD tm   = this->mu[s];
-	    RealD mtil = 4.0+this->mass[s];
-	    RealD sq   = mtil*mtil+tm*tm;
-	    ComplexD a    = mtil/sq;
-	    ComplexD b(0.0, -tm /sq);
-	    axpbg5y_ssp(out,a,in,b,in,s,s);
-	  }
-	}
-	virtual void MooeeInvDag(const FermionField &in, FermionField &out) {
-	  for (int s=0;s<(int)this->mass.size();s++) {
-	    RealD m    = this->mass[s];
-	    RealD tm   = this->mu[s];
-	    RealD mtil = 4.0+this->mass[s];
-	    RealD sq   = mtil*mtil+tm*tm;
-	    ComplexD a    = mtil/sq;
-	    ComplexD b(0.0,tm /sq);
-	    axpbg5y_ssp(out,a,in,b,in,s,s);
-	  }
-	}
-
-	virtual RealD M(const FermionField &in, FermionField &out) {
-	  out.checkerboard = in.checkerboard;
-	  this->Dhop(in, out, DaggerNo);
-	  FermionField tmp(out._grid);
-	  for (int s=0;s<(int)this->mass.size();s++) {
-	    ComplexD a = 4.0+this->mass[s];
-	    ComplexD b(0.0,this->mu[s]);
-	    axpbg5y_ssp(tmp,a,in,b,in,s,s);
-	  }
-	  return axpy_norm(out, 1.0, tmp, out);
-	}
-	
-	// needed for fast PV
-	void update(const std::vector<RealD>& _mass, const std::vector<RealD>& _mu) {
-	  assert(_mass.size() == _mu.size());
-	  assert(_mass.size() == this->FermionGrid()->_fdimensions[0]);
-	  this->mass = _mass;
-	  this->mu = _mu;
-	}
-	
-      private:
-	std::vector<RealD> mu;
-	std::vector<RealD> mass;
-	
-      };
-   
-    typedef WilsonTMFermion5D<WilsonImplF> WilsonTMFermion5DF; 
-    typedef WilsonTMFermion5D<WilsonImplD> WilsonTMFermion5DD; 
-
-}}
--- a/Grid/qcd/action/gauge/Photon.h
+++ b/Grid/qcd/action/gauge/Photon.h
@@ -1,331 +0,0 @@
-/*************************************************************************************
- 
- Grid physics library, www.github.com/paboyle/Grid
- 
- Source file: ./lib/qcd/action/gauge/Photon.h
- 
-Copyright (C) 2015-2018
- 
- Author: Peter Boyle <paboyle@ph.ed.ac.uk>
- Author: Antonin Portelli <antonin.portelli@me.com>
- Author: James Harrison <J.Harrison@soton.ac.uk>
- 
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- 
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU General Public License for more details.
- 
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- 
- See the full license in the file "LICENSE" in the top level distribution directory
- *************************************************************************************/
-/*  END LEGAL */
-#ifndef QCD_PHOTON_ACTION_H
-#define QCD_PHOTON_ACTION_H
-
-namespace Grid{
-namespace QCD{
-
-  template <class S>
-  class QedGImpl
-  {
-  public:
-    typedef S Simd;
-    typedef typename Simd::scalar_type Scalar;
-    
-    template <typename vtype>
-    using iImplGaugeLink  = iScalar<iScalar<iScalar<vtype>>>;
-    template <typename vtype>
-    using iImplGaugeField = iVector<iScalar<iScalar<vtype>>, Nd>;
-    
-    typedef iImplGaugeLink<Simd>  SiteLink;
-    typedef iImplGaugeField<Simd> SiteField;
-    typedef SiteLink              SiteComplex;
-    
-    typedef Lattice<SiteLink>  LinkField;
-    typedef Lattice<SiteField> Field;
-    typedef Field              ComplexField;
-  };
-  
-  typedef QedGImpl<vComplex> QedGImplR;
-  
-  template <class GImpl>
-  class Photon
-  {
-  public:
-    INHERIT_GIMPL_TYPES(GImpl);
-    typedef typename SiteGaugeLink::scalar_object ScalarSite;
-    typedef typename ScalarSite::scalar_type      ScalarComplex;
-    GRID_SERIALIZABLE_ENUM(Gauge, undef, feynman, 1, coulomb, 2, landau, 3);
-    GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2);
-  public:
-    Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvement);
-    Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme);
-    virtual ~Photon(void) = default;
-    void FreePropagator(const GaugeField &in, GaugeField &out);
-    void MomentumSpacePropagator(const GaugeField &in, GaugeField &out);
-    void StochasticWeight(GaugeLinkField &weight);
-    void StochasticField(GaugeField &out, GridParallelRNG &rng);
-    void StochasticField(GaugeField &out, GridParallelRNG &rng,
-                         const GaugeLinkField &weight);
-    void UnitField(GaugeField &out);
-  private:
-    void makeSpatialNorm(LatticeInteger &spNrm);
-    void makeKHat(std::vector<GaugeLinkField> &khat);
-    void makeInvKHatSquared(GaugeLinkField &out);
-    void zmSub(GaugeLinkField &out);
-    void transverseProjectSpatial(GaugeField &out);
-    void gaugeTransform(GaugeField &out);
-  private:
-    GridBase          *grid_;
-    Gauge             gauge_;
-    ZmScheme          zmScheme_;
-    std::vector<Real> improvement_;
-  };
-
-  typedef Photon<QedGImplR>  PhotonR;
-  
-  template<class GImpl>
-  Photon<GImpl>::Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme,
-                        std::vector<Real> improvements)
-  : grid_(grid), gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements)
-  {}
-
-  template<class GImpl>
-  Photon<GImpl>::Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme)
-  : Photon(grid, gauge, zmScheme, std::vector<Real>())
-  {}
-
-  template<class GImpl>
-  void Photon<GImpl>::FreePropagator(const GaugeField &in, GaugeField &out)
-  {
-    FFT        theFFT(dynamic_cast<GridCartesian *>(grid_));
-    GaugeField in_k(grid_);
-    GaugeField prop_k(grid_);
-    
-    theFFT.FFT_all_dim(in_k, in, FFT::forward);
-    MomentumSpacePropagator(prop_k, in_k);
-    theFFT.FFT_all_dim(out, prop_k, FFT::backward);
-  }
-
-  template<class GImpl>
-  void Photon<GImpl>::makeSpatialNorm(LatticeInteger &spNrm)
-  {
-    LatticeInteger   coor(grid_);
-    std::vector<int> l = grid_->FullDimensions();
-
-    spNrm = zero;
-    for(int mu = 0; mu < grid_->Nd() - 1; mu++)
-    {
-      LatticeCoordinate(coor, mu);
-      coor  = where(coor < Integer(l[mu]/2), coor, coor - Integer(l[mu]));
-      spNrm = spNrm + coor*coor;
-    }
-  }
-
-  template<class GImpl>
-  void Photon<GImpl>::makeKHat(std::vector<GaugeLinkField> &khat)
-  {
-    const unsigned int nd = grid_->Nd();
-    std::vector<int>   l  = grid_->FullDimensions();
-    Complex            ci(0., 1.);
-
-    khat.resize(nd, grid_);
-    for (unsigned int mu = 0; mu < nd; ++mu)
-    {
-      Real piL = M_PI/l[mu];
-
-      LatticeCoordinate(khat[mu], mu);
-      khat[mu] = exp(piL*ci*khat[mu])*2.*sin(piL*khat[mu]);
-    }
-  }
-
-  template<class GImpl>
-  void Photon<GImpl>::makeInvKHatSquared(GaugeLinkField &out)
-  {
-    std::vector<GaugeLinkField> khat;
-    GaugeLinkField              lone(grid_);
-    const unsigned int          nd = grid_->Nd();
-    std::vector<int>            zm(nd, 0);
-    ScalarSite                  one = ScalarComplex(1., 0.), z = ScalarComplex(0., 0.);
-    
-    out = zero;
-    makeKHat(khat);
-    for(int mu = 0; mu < nd; mu++)
-    {
-      out = out + khat[mu]*conjugate(khat[mu]);
-    }
-    lone = ScalarComplex(1., 0.);
-    pokeSite(one, out, zm);
-    out = lone/out;
-    pokeSite(z, out, zm);
-  }
-  
-  template<class GImpl>
-  void Photon<GImpl>::zmSub(GaugeLinkField &out)
-  {
-    switch (zmScheme_)
-    {
-      case ZmScheme::qedTL:
-      {
-        std::vector<int> zm(grid_->Nd(), 0);
-        ScalarSite       z = ScalarComplex(0., 0.);
-        
-        pokeSite(z, out, zm);
-        break;
-      }
-      case ZmScheme::qedL:
-      {
-        LatticeInteger spNrm(grid_);
-
-        makeSpatialNorm(spNrm);
-        out = where(spNrm == Integer(0), 0.*out, out);
-        for(int i = 0; i < improvement_.size(); i++)
-        {
-          Real f = sqrt(improvement_[i] + 1);
-          out = where(spNrm == Integer(i + 1), f*out, out);
-        }
-        break;
-      }
-      default:
-        assert(0);
-        break;
-    }
-  }
-
-  template<class GImpl>
-  void Photon<GImpl>::transverseProjectSpatial(GaugeField &out)
-  {
-    const unsigned int          nd = grid_->Nd();
-    GaugeLinkField              invKHat(grid_), cst(grid_), spdiv(grid_);
-    LatticeInteger              spNrm(grid_);
-    std::vector<GaugeLinkField> khat, a(nd, grid_), aProj(nd, grid_);
-
-    invKHat = zero;
-    makeSpatialNorm(spNrm);
-    makeKHat(khat);
-    for (unsigned int mu = 0; mu < nd; ++mu)
-    {
-      a[mu] = peekLorentz(out, mu);
-      if (mu < nd - 1)
-      {
-        invKHat += khat[mu]*conjugate(khat[mu]);
-      }
-    }
-    cst     = ScalarComplex(1., 0.);
-    invKHat = where(spNrm == Integer(0), cst, invKHat);
-    invKHat = cst/invKHat;
-    cst     = zero;
-    invKHat = where(spNrm == Integer(0), cst, invKHat);
-    spdiv   = zero;
-    for (unsigned int nu = 0; nu < nd - 1; ++nu)
-    {
-      spdiv += conjugate(khat[nu])*a[nu];
-    }
-    spdiv *= invKHat;
-    for (unsigned int mu = 0; mu < nd; ++mu)
-    {
-      aProj[mu] = a[mu] - khat[mu]*spdiv;
-      pokeLorentz(out, aProj[mu], mu);
-    }
-  }
-
-  template<class GImpl>
-  void Photon<GImpl>::gaugeTransform(GaugeField &out)
-  {
-    switch (gauge_)
-    {
-      case Gauge::feynman:
-        break;
-      case Gauge::coulomb:
-        transverseProjectSpatial(out);
-        break;
-      case Gauge::landau:
-        assert(0);
-        break;
-      default:
-        assert(0);
-        break;
-    }
-  }
-
-  template<class GImpl>
-  void Photon<GImpl>::MomentumSpacePropagator(const GaugeField &in,
-                                              GaugeField &out)
-  {
-    LatticeComplex momProp(grid_);
-    
-    makeInvKHatSquared(momProp);
-    zmSub(momProp);
-    
-    out = in*momProp;
-  }
-  
-  template<class GImpl>
-  void Photon<GImpl>::StochasticWeight(GaugeLinkField &weight)
-  {
-    const unsigned int nd  = grid_->Nd();
-    std::vector<int>   l   = grid_->FullDimensions();
-    Integer            vol = 1;
-
-    for(unsigned int mu = 0; mu < nd; mu++)
-    {
-      vol = vol*l[mu];
-    }
-    makeInvKHatSquared(weight);
-    weight = sqrt(vol)*sqrt(weight);
-    zmSub(weight);
-  }
-  
-  template<class GImpl>
-  void Photon<GImpl>::StochasticField(GaugeField &out, GridParallelRNG &rng)
-  {
-    GaugeLinkField weight(grid_);
-    
-    StochasticWeight(weight);
-    StochasticField(out, rng, weight);
-  }
-  
-  template<class GImpl>
-  void Photon<GImpl>::StochasticField(GaugeField &out, GridParallelRNG &rng,
-                                      const GaugeLinkField &weight)
-  {
-    const unsigned int nd = grid_->Nd();
-    GaugeLinkField     r(grid_);
-    GaugeField         aTilde(grid_);
-    FFT                fft(dynamic_cast<GridCartesian *>(grid_));
-    
-    for(unsigned int mu = 0; mu < nd; mu++)
-    {
-      gaussian(rng, r);
-      r = weight*r;
-      pokeLorentz(aTilde, r, mu);
-    }
-    gaugeTransform(aTilde);
-    fft.FFT_all_dim(out, aTilde, FFT::backward);
-    out = real(out);
-  }
-
-  template<class GImpl>
-  void Photon<GImpl>::UnitField(GaugeField &out)
-  {
-    const unsigned int nd = grid_->Nd();
-    GaugeLinkField     r(grid_);
-    
-    r = ScalarComplex(1., 0.);
-    for(unsigned int mu = 0; mu < nd; mu++)
-    {
-      pokeLorentz(out, r, mu);
-    }
-    out = real(out);
-  }
-  
-}}
-#endif
--- a/Grid/qcd/action/pseudofermion/Bounds.h
+++ b/Grid/qcd/action/pseudofermion/Bounds.h
@@ -1,53 +0,0 @@
-#pragma once
-
-namespace Grid{
-  namespace QCD{
-
-    template<class Field>
-    void HighBoundCheck(LinearOperatorBase<Field> &HermOp, 
-			Field &Phi,
-			RealD hi)
-    {
-      // Eigenvalue bound check at high end
-      PowerMethod<Field> power_method;
-      auto lambda_max = power_method(HermOp,Phi);
-      std::cout << GridLogMessage << "Pseudofermion action lamda_max "<<lambda_max<<"( bound "<<hi<<")"<<std::endl;
-      assert( (lambda_max < hi) && " High Bounds Check on operator failed" );
-    }
-      
-    template<class Field> void InverseSqrtBoundsCheck(int MaxIter,double tol,
-						       LinearOperatorBase<Field> &HermOp,
-						       Field &GaussNoise,
-						       MultiShiftFunction &PowerNegHalf) 
-    {
-      GridBase *FermionGrid = GaussNoise._grid;
-
-      Field X(FermionGrid);
-      Field Y(FermionGrid);
-      Field Z(FermionGrid);
-
-      X=GaussNoise;
-      RealD Nx = norm2(X);
-
-      ConjugateGradientMultiShift<Field> msCG(MaxIter,PowerNegHalf);
-      msCG(HermOp,X,Y);
-      msCG(HermOp,Y,Z);
-
-      RealD Nz = norm2(Z);
-
-      HermOp.HermOp(Z,Y);
-      RealD Ny = norm2(Y);
-
-      X=X-Y;
-      RealD Nd = norm2(X);
-      std::cout << "************************* "<<std::endl;
-      std::cout << " noise                         = "<<Nx<<std::endl;
-      std::cout << " (MdagM^-1/2)^2  noise         = "<<Nz<<std::endl;
-      std::cout << " MdagM (MdagM^-1/2)^2  noise   = "<<Ny<<std::endl;
-      std::cout << " noise - MdagM (MdagM^-1/2)^2  noise   = "<<Nd<<std::endl;
-      std::cout << "************************* "<<std::endl;
-      assert( (std::sqrt(Nd/Nx)<tol) && " InverseSqrtBoundsCheck ");
-    }
-
-  }
-}
--- a/Grid/qcd/action/scalar/ScalarInteractionAction.h
+++ b/Grid/qcd/action/scalar/ScalarInteractionAction.h
@@ -1,208 +0,0 @@
-/*************************************************************************************
-
-  Grid physics library, www.github.com/paboyle/Grid
-
-  Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
-
-  Copyright (C) 2015
-
-  Author: Guido Cossu <guido,cossu@ed.ac.uk>
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; either version 2 of the License, or
-  (at your option) any later version.
-
-  This program is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License along
-  with this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-  See the full license in the file "LICENSE" in the top level distribution
-directory
-  *************************************************************************************/
-/*  END LEGAL */
-
-#ifndef SCALAR_INT_ACTION_H
-#define SCALAR_INT_ACTION_H
-
-// Note: this action can completely absorb the ScalarAction for real float fields
-// use the scalarObjs to generalise the structure
-
-namespace Grid
-{
-// FIXME drop the QCD namespace everywhere here
-
-template <class Impl, int Ndim>
-class ScalarInteractionAction : public QCD::Action<typename Impl::Field>
-{
-public:
-  INHERIT_FIELD_TYPES(Impl);
-
-private:
-  RealD mass_square;
-  RealD lambda;
-  RealD g;
-  const unsigned int N = Impl::Group::Dimension;
-
-  typedef typename Field::vector_object vobj;
-  typedef CartesianStencil<vobj, vobj> Stencil;
-
-  SimpleCompressor<vobj> compressor;
-  int npoint = 2 * Ndim;
-  std::vector<int> directions;    //
-  std::vector<int> displacements; //
-
-public:
-  ScalarInteractionAction(RealD ms, RealD l, RealD gval) : mass_square(ms), lambda(l), g(gval), displacements(2 * Ndim, 0), directions(2 * Ndim, 0)
-  {
-    for (int mu = 0; mu < Ndim; mu++)
-    {
-      directions[mu] = mu;
-      directions[mu + Ndim] = mu;
-      displacements[mu] = 1;
-      displacements[mu + Ndim] = -1;
-    }
-  }
-
-  virtual std::string LogParameters()
-  {
-    std::stringstream sstream;
-    sstream << GridLogMessage << "[ScalarAction] lambda      : " << lambda << std::endl;
-    sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl;
-    sstream << GridLogMessage << "[ScalarAction] g           : " << g << std::endl;
-    return sstream.str();
-  }
-
-  virtual std::string action_name() { return "ScalarAction"; }
-
-  virtual void refresh(const Field &U, GridParallelRNG &pRNG) {}
-
-  virtual RealD S(const Field &p)
-  {
-    assert(p._grid->Nd() == Ndim);
-    static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
-    phiStencil.HaloExchange(p, compressor);
-    Field action(p._grid), pshift(p._grid), phisquared(p._grid);
-    phisquared = p * p;
-    action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared;
-    for (int mu = 0; mu < Ndim; mu++)
-    {
-      //  pshift = Cshift(p, mu, +1);  // not efficient, implement with stencils
-      parallel_for(int i = 0; i < p._grid->oSites(); i++)
-      {
-        int permute_type;
-        StencilEntry *SE;
-        vobj temp2;
-        const vobj *temp, *t_p;
-
-        SE = phiStencil.GetEntry(permute_type, mu, i);
-        t_p = &p._odata[i];
-        if (SE->_is_local)
-        {
-          temp = &p._odata[SE->_offset];
-          if (SE->_permute)
-          {
-            permute(temp2, *temp, permute_type);
-            action._odata[i] -= temp2 * (*t_p) + (*t_p) * temp2;
-          }
-          else
-          {
-            action._odata[i] -= (*temp) * (*t_p) + (*t_p) * (*temp);
-          }
-        }
-        else
-        {
-          action._odata[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset];
-        }
-      }
-      //  action -= pshift*p + p*pshift;
-    }
-    // NB the trace in the algebra is normalised to 1/2
-    // minus sign coming from the antihermitian fields
-    return -(TensorRemove(sum(trace(action)))).real() * N / g;
-  };
-
-  virtual void deriv(const Field &p, Field &force)
-  {
-    double t0 = usecond();
-    assert(p._grid->Nd() == Ndim);
-    force = (2. * Ndim + mass_square) * p - 2. * lambda * p * p * p;
-    double interm_t = usecond();
-
-    // move this outside
-    static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
-
-    phiStencil.HaloExchange(p, compressor);
-    double halo_t = usecond();
-    int chunk = 128;
-    //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1);
-
-    // inverting the order of the loops slows down the code(! g++ 7)
-    // cannot try to reduce the number of  force writes by factor npoint...
-    // use cache blocking
-    for (int point = 0; point < npoint; point++)
-    {
-
-#pragma omp parallel 
-{
-        int permute_type;
-        StencilEntry *SE;
-        const vobj *temp;
-
-#pragma omp for schedule(static, chunk)
-      for (int i = 0; i < p._grid->oSites(); i++)
-      {
-        SE = phiStencil.GetEntry(permute_type, point, i);
-        // prefetch next p?
-
-        if (SE->_is_local)
-        {
-          temp = &p._odata[SE->_offset];
-      
-          if (SE->_permute)
-          {
-            vobj temp2;
-            permute(temp2, *temp, permute_type);
-            force._odata[i] -= temp2;
-          }
-          else
-          {
-            force._odata[i] -= *temp; // slow part. Dominated by this read/write (BW)
-          }
-        }
-        else
-        {
-          force._odata[i] -= phiStencil.CommBuf()[SE->_offset];
-        }
-      }
-
-    }
-  }
-  force *= N / g;
-
-  double t1 = usecond();
-  double total_time = (t1 - t0) / 1e6;
-  double interm_time = (interm_t - t0) / 1e6;
-  double halo_time = (halo_t - interm_t) / 1e6;
-  double stencil_time = (t1 - halo_t) / 1e6;
-  std::cout << GridLogIntegrator << "Total time for force computation (s)       : " << total_time << std::endl;
-  std::cout << GridLogIntegrator << "Intermediate time for force computation (s): " << interm_time << std::endl;
-  std::cout << GridLogIntegrator << "Halo time in force computation (s)         : " << halo_time << std::endl;
-  std::cout << GridLogIntegrator << "Stencil time in force computation (s)      : " << stencil_time << std::endl;
-  double flops = p._grid->gSites() * (14 * N * N * N + 18 * N * N + 2);
-  double flops_no_stencil = p._grid->gSites() * (14 * N * N * N + 6 * N * N + 2);
-  double Gflops = flops / (total_time * 1e9);
-  double Gflops_no_stencil = flops_no_stencil / (interm_time * 1e9);
-  std::cout << GridLogIntegrator << "Flops: " << flops << "  - Gflop/s : " << Gflops << std::endl;
-  std::cout << GridLogIntegrator << "Flops NS: " << flops_no_stencil << "  - Gflop/s NS: " << Gflops_no_stencil << std::endl;
-}
-};
-
-} // namespace Grid
-
-#endif // SCALAR_INT_ACTION_H
--- a/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h
+++ b/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h
@@ -1,122 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./lib/qcd/hmc/ScidacCheckpointer.h
-
-Copyright (C) 2018
-
-Author: Guido Cossu <guido.cossu@ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef SCIDAC_CHECKPOINTER
-#define SCIDAC_CHECKPOINTER
-
-#ifdef HAVE_LIME
-
-#include <iostream>
-#include <sstream>
-#include <string>
-
-namespace Grid {
-namespace QCD {
-
-// For generic fields
-template <class Implementation, class Metadata>
-class ScidacHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
- private:
-  CheckpointerParameters Params;
-  Metadata MData;
-
-  typedef typename Implementation::Field Field;
-
- public:
-  //INHERIT_GIMPL_TYPES(Implementation);
-
-  ScidacHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
-  ScidacHmcCheckpointer(const CheckpointerParameters &Params_, const Metadata& M_):MData(M_) { initialize(Params_); }
-
-  void initialize(const CheckpointerParameters &Params_) {
-    Params = Params_;
-
-    // check here that the format is valid
-    int ieee32big = (Params.format == std::string("IEEE32BIG"));
-    int ieee32    = (Params.format == std::string("IEEE32"));
-    int ieee64big = (Params.format == std::string("IEEE64BIG"));
-    int ieee64    = (Params.format == std::string("IEEE64"));
-
-    if (!(ieee64big || ieee32 || ieee32big || ieee64)) {
-      std::cout << GridLogError << "Unrecognized file format " << Params.format
-                << std::endl;
-      std::cout << GridLogError
-                << "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64"
-                << std::endl;
-
-      exit(1);
-    }
-  }
-
-  void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG,
-                          GridParallelRNG &pRNG) {
-    if ((traj % Params.saveInterval) == 0) {
-      std::string config, rng;
-      this->build_filenames(traj, Params, config, rng);
-      GridBase *grid = U._grid;
-      uint32_t nersc_csum,scidac_csuma,scidac_csumb;
-      BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
-      ScidacWriter _ScidacWriter(grid->IsBoss());
-      _ScidacWriter.open(config);
-      _ScidacWriter.writeScidacFieldRecord(U, MData);
-      _ScidacWriter.close();
-
-      std::cout << GridLogMessage << "Written Scidac Configuration on " << config << std::endl;
-    }
-  };
-
-  void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG,
-                         GridParallelRNG &pRNG) {
-    std::string config, rng;
-    this->build_filenames(traj, Params, config, rng);
-    this->check_filename(rng);
-    this->check_filename(config);
-
-
-    uint32_t nersc_csum,scidac_csuma,scidac_csumb;
-    BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
-
-    Metadata md_content;
-    ScidacReader _ScidacReader;
-    _ScidacReader.open(config);
-    _ScidacReader.readScidacFieldRecord(U,md_content);  // format from the header
-    _ScidacReader.close();
-
-    std::cout << GridLogMessage << "Read Scidac Configuration from " << config
-              << " checksum " << std::hex 
-	      << nersc_csum<<"/"
-	      << scidac_csuma<<"/"
-	      << scidac_csumb
-	      << std::dec << std::endl;
-  };
-};
-}
-}
-
-#endif  // HAVE_LIME
-#endif  // ILDG_CHECKPOINTER
--- a/Grid/qcd/observables/polyakov_loop.h
+++ b/Grid/qcd/observables/polyakov_loop.h
@@ -1,68 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./lib/qcd/modules/polyakov_line.h
-
-Copyright (C) 2017
-
-Author: David Preti <david.preti@csic.es>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef HMC_POLYAKOV_H
-#define HMC_POLYAKOV_H
-
-namespace Grid {
-namespace QCD {
-
-// this is only defined for a gauge theory
-template <class Impl>
-class PolyakovLogger : public HmcObservable<typename Impl::Field> {
- public:
-  // here forces the Impl to be of gauge fields
-  // if not the compiler will complain
-  INHERIT_GIMPL_TYPES(Impl);
-
-  // necessary for HmcObservable compatibility
-  typedef typename Impl::Field Field;
-
-  void TrajectoryComplete(int traj,
-                          Field &U,
-                          GridSerialRNG &sRNG,
-                          GridParallelRNG &pRNG) {
-
-    ComplexD polyakov = WilsonLoops<Impl>::avgPolyakovLoop(U);
-
-    int def_prec = std::cout.precision();
-
-    std::cout << GridLogMessage
-        << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
-        << "Polyakov Loop: [ " << traj << " ] "<< polyakov << std::endl;
-
-    std::cout.precision(def_prec);
-
-  }
-};
-
-}  // namespace QCD
-}  // namespace Grid
-
-#endif  // HMC_POLYAKOV_H
--- a/Grid/qcd/utils/A2Autils.h
+++ b/Grid/qcd/utils/A2Autils.h
--- a/Grid/qcd/utils/CovariantSmearing.h
+++ b/Grid/qcd/utils/CovariantSmearing.h
@@ -1,87 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./lib/qcd/action/scalar/CovariantLaplacian.h
-
-Copyright (C) 2016
-
-Author: Azusa Yamaguchi
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-#pragma once
-
-namespace Grid {
-namespace QCD {
-
-template <class Gimpl> class CovariantSmearing : public Gimpl 
-{
-public:
-  INHERIT_GIMPL_TYPES(Gimpl);
-
-  typedef typename Gimpl::GaugeLinkField GaugeMat;
-  typedef typename Gimpl::GaugeField GaugeLorentz;
-
-  template<typename T>
-  static void GaussianSmear(const std::vector<LatticeColourMatrix>& U, 
-			    T& chi, 
-			    const Real& width, int Iterations, int orthog)
-  {
-    GridBase *grid = chi._grid;
-    T psi(grid);
-
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Follow Chroma conventions for width to keep compatibility with previous data
-    // Free field iterates 
-    //   chi = (1 - w^2/4N p^2)^N chi
-    //
-    //       ~ (e^(-w^2/4N p^2)^N chi
-    //       ~ (e^(-w^2/4 p^2) chi
-    //       ~ (e^(-w'^2/2 p^2) chi          [ w' = w/sqrt(2) ]
-    //
-    // Which in coordinate space is proportional to
-    //
-    //   e^(-x^2/w^2) = e^(-x^2/2w'^2) 
-    //
-    // The 4 is a bit unconventional from Gaussian width perspective, but... it's Chroma convention.
-    // 2nd derivative approx d^2/dx^2  =  x+mu + x-mu - 2x
-    //
-    // d^2/dx^2 = - p^2
-    //
-    // chi = ( 1 + w^2/4N d^2/dx^2 )^N chi
-    //
-    ////////////////////////////////////////////////////////////////////////////////////
-    Real coeff = (width*width) / Real(4*Iterations);
-  
-    int dims = Nd;
-    if( orthog < Nd ) dims=Nd-1;
-
-    for(int n = 0; n < Iterations; ++n) {
-      psi = (-2.0*dims)*chi;
-      for(int mu=0;mu<Nd;mu++) {
-	if ( mu != orthog ) { 
-	  psi = psi + Gimpl::CovShiftForward(U[mu],mu,chi);    
-	  psi = psi + Gimpl::CovShiftBackward(U[mu],mu,chi);    
-	}
-      }
-      chi = chi + coeff*psi;
-    }
-  }
-};
-}}
--- a/Grid/serialisation/BaseIO.h
+++ b/Grid/serialisation/BaseIO.h
@@ -1,660 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/serialisation/BaseIO.h
-
-    Copyright (C) 2015
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: Guido Cossu <guido.cossu@ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#ifndef GRID_SERIALISATION_ABSTRACT_READER_H
-#define GRID_SERIALISATION_ABSTRACT_READER_H
-
-#include <type_traits>
-#include <Grid/tensors/Tensors.h>
-#include <Grid/serialisation/VectorUtils.h>
-#include <Grid/Eigen/unsupported/CXX11/Tensor>
-
-namespace Grid {
-  namespace EigenIO {
-    // EigenIO works for scalars that are not just Grid supported scalars
-    template<typename T, typename V = void> struct is_complex : public std::false_type {};
-    // Support all complex types (not just Grid complex types) - even if the definitions overlap (!)
-    template<typename T> struct is_complex<             T , typename
-        std::enable_if< ::Grid::is_complex<             T >::value>::type> : public std::true_type {};
-    template<typename T> struct is_complex<std::complex<T>, typename
-        std::enable_if<!::Grid::is_complex<std::complex<T>>::value>::type> : public std::true_type {};
-
-    // Helpers to support I/O for Eigen tensors of arithmetic scalars, complex types, or Grid tensors
-    template<typename T, typename V = void> struct is_scalar : public std::false_type {};
-    template<typename T> struct is_scalar<T, typename std::enable_if<std::is_arithmetic<T>::value || is_complex<T>::value>::type> : public std::true_type {};
-
-    // Is this an Eigen tensor
-    template<typename T> struct is_tensor : std::integral_constant<bool,
-      std::is_base_of<Eigen::TensorBase<T, Eigen::ReadOnlyAccessors>, T>::value> {};
-
-    // Is this an Eigen tensor of a supported scalar
-    template<typename T, typename V = void> struct is_tensor_of_scalar : public std::false_type {};
-    template<typename T> struct is_tensor_of_scalar<T, typename std::enable_if<is_tensor<T>::value && is_scalar<typename T::Scalar>::value>::type> : public std::true_type {};
-
-    // Is this an Eigen tensor of a supported container
-    template<typename T, typename V = void> struct is_tensor_of_container : public std::false_type {};
-    template<typename T> struct is_tensor_of_container<T, typename std::enable_if<is_tensor<T>::value && isGridTensor<typename T::Scalar>::value>::type> : public std::true_type {};
-
-    // These traits describe the scalars inside Eigen tensors
-    // I wish I could define these in reference to the scalar type (so there would be fewer traits defined)
-    // but I'm unable to find a syntax to make this work
-    template<typename T, typename V = void> struct Traits {};
-    // Traits are the default for scalars, or come from GridTypeMapper for GridTensors
-    template<typename T> struct Traits<T, typename std::enable_if<is_tensor_of_scalar<T>::value>::type>
-      : public GridTypeMapper_Base {
-      using scalar_type   = typename T::Scalar; // ultimate base scalar
-      static constexpr bool is_complex = ::Grid::EigenIO::is_complex<scalar_type>::value;
-    };
-    // Traits are the default for scalars, or come from GridTypeMapper for GridTensors
-    template<typename T> struct Traits<T, typename std::enable_if<is_tensor_of_container<T>::value>::type> {
-      using BaseTraits  = GridTypeMapper<typename T::Scalar>;
-      using scalar_type = typename BaseTraits::scalar_type; // ultimate base scalar
-      static constexpr bool   is_complex = ::Grid::EigenIO::is_complex<scalar_type>::value;
-      static constexpr int   TensorLevel = BaseTraits::TensorLevel;
-      static constexpr int          Rank = BaseTraits::Rank;
-      static constexpr std::size_t count = BaseTraits::count;
-      static constexpr int Dimension(int dim) { return BaseTraits::Dimension(dim); }
-    };
-
-    // Is this a fixed-size Eigen tensor
-    template<typename T> struct is_tensor_fixed : public std::false_type {};
-    template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType>
-    struct is_tensor_fixed<Eigen::TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType>>
-        : public std::true_type {};
-    template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType,
-              int MapOptions_, template <class> class MapPointer_>
-    struct is_tensor_fixed<Eigen::TensorMap<Eigen::TensorFixedSize<Scalar_, Dimensions_,
-                                            Options_, IndexType>, MapOptions_, MapPointer_>>
-        : public std::true_type {};
-
-    // Is this a variable-size Eigen tensor
-    template<typename T, typename V = void> struct is_tensor_variable : public std::false_type {};
-    template<typename T> struct is_tensor_variable<T, typename std::enable_if<is_tensor<T>::value
-        && !is_tensor_fixed<T>::value>::type> : public std::true_type {};
-  }
-
-  // Abstract writer/reader classes ////////////////////////////////////////////
-  // static polymorphism implemented using CRTP idiom
-  class Serializable;
-
-  // Static abstract writer
-  template <typename T>
-  class Writer
-  {
-  public:
-    Writer(void);
-    virtual ~Writer(void) = default;
-    void push(const std::string &s);
-    void pop(void);
-    template <typename U>
-    typename std::enable_if<std::is_base_of<Serializable, U>::value>::type
-    write(const std::string& s, const U &output);
-    template <typename U>
-    typename std::enable_if<!std::is_base_of<Serializable, U>::value && !EigenIO::is_tensor<U>::value>::type
-    write(const std::string& s, const U &output);
-    template <typename U>
-    void write(const std::string &s, const iScalar<U> &output);
-    template <typename U, int N>
-    void write(const std::string &s, const iVector<U, N> &output);
-    template <typename U, int N>
-    void write(const std::string &s, const iMatrix<U, N> &output);
-    template <typename ETensor>
-    typename std::enable_if<EigenIO::is_tensor<ETensor>::value>::type
-    write(const std::string &s, const ETensor &output);
-
-    // Helper functions for Scalar vs Container specialisations
-    template <typename ETensor>
-    inline typename std::enable_if<EigenIO::is_tensor_of_scalar<ETensor>::value,
-    const typename ETensor::Scalar *>::type
-    getFirstScalar(const ETensor &output)
-    {
-      return output.data();
-    }
-    
-    template <typename ETensor>
-    inline typename std::enable_if<EigenIO::is_tensor_of_container<ETensor>::value,
-    const typename EigenIO::Traits<ETensor>::scalar_type *>::type
-    getFirstScalar(const ETensor &output)
-    {
-      return output.data()->begin();
-    }
-    
-    template <typename S>
-    inline typename std::enable_if<EigenIO::is_scalar<S>::value, void>::type
-    copyScalars(S * &pCopy, const S &Source)
-    {
-      * pCopy ++ = Source;
-    }
-    
-    template <typename S>
-    inline typename std::enable_if<isGridTensor<S>::value, void>::type
-    copyScalars(typename GridTypeMapper<S>::scalar_type * &pCopy, const S &Source)
-    {
-      for( const typename GridTypeMapper<S>::scalar_type &item : Source )
-        * pCopy ++ = item;
-    }
-
-    void         scientificFormat(const bool set);
-    bool         isScientific(void);
-    void         setPrecision(const unsigned int prec);
-    unsigned int getPrecision(void);
-  private:
-    T            *upcast;
-    bool         scientific_{false};
-    unsigned int prec_{0};
-  };
-  
-  // Static abstract reader
-  template <typename T>
-  class Reader
-  {
-  public:
-    Reader(void);
-    virtual ~Reader(void) = default;
-    bool push(const std::string &s);
-    void pop(void);
-    template <typename U>
-    typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type
-    read(const std::string& s, U &output);
-    template <typename U>
-    typename std::enable_if<!std::is_base_of<Serializable, U>::value
-                         && !EigenIO::is_tensor<U>::value, void>::type
-    read(const std::string& s, U &output);
-    template <typename U>
-    void read(const std::string &s, iScalar<U> &output);
-    template <typename U, int N>
-    void read(const std::string &s, iVector<U, N> &output);
-    template <typename U, int N>
-    void read(const std::string &s, iMatrix<U, N> &output);
-    template <typename ETensor>
-    typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type
-    read(const std::string &s, ETensor &output);
-    template <typename ETensor>
-    typename std::enable_if<EigenIO::is_tensor_fixed<ETensor>::value, void>::type
-    Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims );
-    template <typename ETensor>
-    typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type
-    Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims );
-  
-    // Helper functions for Scalar vs Container specialisations
-    template <typename S>
-    inline typename std::enable_if<EigenIO::is_scalar<S>::value, void>::type
-    copyScalars(S &Dest, const S * &pSource)
-    {
-      Dest = * pSource ++;
-    }
-    
-    template <typename S>
-    inline typename std::enable_if<isGridTensor<S>::value, void>::type
-    copyScalars(S &Dest, const typename GridTypeMapper<S>::scalar_type * &pSource)
-    {
-      for( typename GridTypeMapper<S>::scalar_type &item : Dest )
-        item = * pSource ++;
-    }
-    
-  protected:
-    template <typename U>
-    void fromString(U &output, const std::string &s);
-  private:
-    T *upcast;
-  };
-
-   // What is the vtype
-  template<typename T> struct isReader {
-    static const bool value = false;
-  };
-  template<typename T> struct isWriter {
-    static const bool value = false;
-  };
-
-  // Writer template implementation
-  template <typename T>
-  Writer<T>::Writer(void)
-  {
-    upcast = static_cast<T *>(this);
-  }
-  
-  template <typename T>
-  void Writer<T>::push(const std::string &s)
-  {
-    upcast->push(s);
-  }
-  
-  template <typename T>
-  void Writer<T>::pop(void)
-  {
-    upcast->pop();
-  }
-  
-  template <typename T>
-  template <typename U>
-  typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type
-  Writer<T>::write(const std::string &s, const U &output)
-  {
-    U::write(*this, s, output);
-  }
-  
-  template <typename T>
-  template <typename U>
-  typename std::enable_if<!std::is_base_of<Serializable, U>::value
-                       && !EigenIO::is_tensor<U>::value, void>::type
-  Writer<T>::write(const std::string &s, const U &output)
-  {
-    upcast->writeDefault(s, output);
-  }
-
-
-  template <typename T>
-  template <typename U>
-  void Writer<T>::write(const std::string &s, const iScalar<U> &output)
-  {
-    upcast->writeDefault(s, tensorToVec(output));
-  }
-
-  template <typename T>
-  template <typename U, int N>
-  void Writer<T>::write(const std::string &s, const iVector<U, N> &output)
-  {
-    upcast->writeDefault(s, tensorToVec(output));
-  }
-
-  template <typename T>
-  template <typename U, int N>
-  void Writer<T>::write(const std::string &s, const iMatrix<U, N> &output)
-  {
-    upcast->writeDefault(s, tensorToVec(output));
-  }
-  
-  // Eigen::Tensors of Grid tensors (iScalar, iVector, iMatrix)
-  template <typename T>
-  template <typename ETensor>
-  typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type
-  Writer<T>::write(const std::string &s, const ETensor &output)
-  {
-    using Index = typename ETensor::Index;
-    using Container = typename ETensor::Scalar; // NB: could be same as scalar
-    using Traits = EigenIO::Traits<ETensor>;
-    using Scalar = typename Traits::scalar_type; // type of the underlying scalar
-    constexpr unsigned int TensorRank{ETensor::NumIndices};
-    constexpr unsigned int ContainerRank{Traits::Rank}; // Only non-zero for containers
-    constexpr unsigned int TotalRank{TensorRank + ContainerRank};
-    const Index NumElements{output.size()};
-    assert( NumElements > 0 );
-
-    // Get the dimensionality of the tensor
-    std::vector<std::size_t>  TotalDims(TotalRank);
-    for(auto i = 0; i < TensorRank; i++ ) {
-      auto dim = output.dimension(i);
-      TotalDims[i] = static_cast<size_t>(dim);
-      assert( TotalDims[i] == dim ); // check we didn't lose anything in the conversion
-    }
-    for(auto i = 0; i < ContainerRank; i++ )
-      TotalDims[TensorRank + i] = Traits::Dimension(i);
-
-    // If the Tensor isn't in Row-Major order, then we'll need to copy it's data
-    const bool CopyData{NumElements > 1 && ETensor::Layout != Eigen::StorageOptions::RowMajor};
-    const Scalar * pWriteBuffer;
-    std::vector<Scalar> CopyBuffer;
-    const Index TotalNumElements = NumElements * Traits::count;
-    if( !CopyData ) {
-      pWriteBuffer = getFirstScalar( output );
-    } else {
-      // Regardless of the Eigen::Tensor storage order, the copy will be Row Major
-      CopyBuffer.resize( TotalNumElements );
-      Scalar * pCopy = &CopyBuffer[0];
-      pWriteBuffer = pCopy;
-      std::array<Index, TensorRank> MyIndex;
-      for( auto &idx : MyIndex ) idx = 0;
-      for( auto n = 0; n < NumElements; n++ ) {
-        const Container & c = output( MyIndex );
-        copyScalars( pCopy, c );
-        // Now increment the index
-        for( int i = output.NumDimensions - 1; i >= 0 && ++MyIndex[i] == output.dimension(i); i-- )
-          MyIndex[i] = 0;
-      }
-    }
-    upcast->template writeMultiDim<Scalar>(s, TotalDims, pWriteBuffer, TotalNumElements);
-  }
-
-  template <typename T>
-  void Writer<T>::scientificFormat(const bool set)
-  {
-    scientific_ = set;
-  }
-
-  template <typename T>
-  bool Writer<T>::isScientific(void)
-  {
-    return scientific_;
-  }
-
-  template <typename T>
-  void Writer<T>::setPrecision(const unsigned int prec)
-  {
-    prec_ = prec;
-  }
-
-  template <typename T>
-  unsigned int Writer<T>::getPrecision(void)
-  {
-    return prec_;
-  }
-  
-  // Reader template implementation
-  template <typename T>
-  Reader<T>::Reader(void)
-  {
-    upcast = static_cast<T *>(this);
-  }
-  
-  template <typename T>
-  bool Reader<T>::push(const std::string &s)
-  {
-    return upcast->push(s);
-  }
-  
-  template <typename T>
-  void Reader<T>::pop(void)
-  {
-    upcast->pop();
-  }
-  
-  template <typename T>
-  template <typename U>
-  typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type
-  Reader<T>::read(const std::string &s, U &output)
-  {
-    U::read(*this, s, output);
-  }
-  
-  template <typename T>
-  template <typename U>
-  typename std::enable_if<!std::is_base_of<Serializable, U>::value
-                       && !EigenIO::is_tensor<U>::value, void>::type
-  Reader<T>::read(const std::string &s, U &output)
-  {
-    upcast->readDefault(s, output);
-  }
-
-  template <typename T>
-  template <typename U>
-  void Reader<T>::read(const std::string &s, iScalar<U> &output)
-  {
-    typename TensorToVec<iScalar<U>>::type v;
-
-    upcast->readDefault(s, v);
-    vecToTensor(output, v);
-  }
-
-  template <typename T>
-  template <typename U, int N>
-  void Reader<T>::read(const std::string &s, iVector<U, N> &output)
-  {
-    typename TensorToVec<iVector<U, N>>::type v;
-    
-    upcast->readDefault(s, v);
-    vecToTensor(output, v);
-  }
-  
-  template <typename T>
-  template <typename U, int N>
-  void Reader<T>::read(const std::string &s, iMatrix<U, N> &output)
-  {
-    typename TensorToVec<iMatrix<U, N>>::type v;
-    
-    upcast->readDefault(s, v);
-    vecToTensor(output, v);
-  }
-
-  template <typename T>
-  template <typename ETensor>
-  typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type
-  Reader<T>::read(const std::string &s, ETensor &output)
-  {
-    using Index = typename ETensor::Index;
-    using Container = typename ETensor::Scalar; // NB: could be same as scalar
-    using Traits = EigenIO::Traits<ETensor>;
-    using Scalar = typename Traits::scalar_type; // type of the underlying scalar
-    constexpr unsigned int TensorRank{ETensor::NumIndices};
-    constexpr unsigned int ContainerRank{Traits::Rank}; // Only non-zero for containers
-    constexpr unsigned int TotalRank{TensorRank + ContainerRank};
-    using ETDims = std::array<Index, TensorRank>; // Dimensions of the tensor
-
-    // read the (flat) data and dimensionality
-    std::vector<std::size_t> dimData;
-    std::vector<Scalar> buf;
-    upcast->readMultiDim( s, buf, dimData );
-    assert(dimData.size() == TotalRank && "EigenIO: Tensor rank mismatch" );
-    // Make sure that the number of elements read matches dimensions read
-    std::size_t NumContainers = 1;
-    for( auto i = 0 ; i < TensorRank ; i++ )
-      NumContainers *= dimData[i];
-    // If our scalar object is a Container, make sure it's dimensions match what we read back
-    std::size_t ElementsPerContainer = 1;
-    for( auto i = 0 ; i < ContainerRank ; i++ ) {
-      assert( dimData[TensorRank+i] == Traits::Dimension(i) && "Tensor Container dimensions don't match data" );
-      ElementsPerContainer *= dimData[TensorRank+i];
-    }
-    assert( NumContainers * ElementsPerContainer == buf.size() && "EigenIO: Number of elements != product of dimensions" );
-    // Now see whether the tensor is the right shape, or can be made to be
-    const auto & dims = output.dimensions();
-    bool bShapeOK = (output.data() != nullptr);
-    for( auto i = 0; bShapeOK && i < TensorRank ; i++ )
-      if( dims[i] != dimData[i] )
-        bShapeOK = false;
-    // Make the tensor the same size as the data read
-    ETDims MyIndex;
-    if( !bShapeOK ) {
-      for( auto i = 0 ; i < TensorRank ; i++ )
-        MyIndex[i] = dimData[i];
-      Reshape(output, MyIndex);
-    }
-    // Copy the data into the tensor
-    for( auto &d : MyIndex ) d = 0;
-    const Scalar * pSource = &buf[0];
-    for( std::size_t n = 0 ; n < NumContainers ; n++ ) {
-      Container & c = output( MyIndex );
-      copyScalars( c, pSource );
-      // Now increment the index
-      for( int i = TensorRank - 1; i != -1 && ++MyIndex[i] == dims[i]; i-- )
-        MyIndex[i] = 0;
-    }
-    assert( pSource == &buf[NumContainers * ElementsPerContainer] );
-  }
-
-  template <typename T>
-  template <typename ETensor>
-  typename std::enable_if<EigenIO::is_tensor_fixed<ETensor>::value, void>::type
-  Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims )
-  {
-    assert( 0 && "EigenIO: Fixed tensor dimensions can't be changed" );
-  }
-
-  template <typename T>
-  template <typename ETensor>
-  typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type
-  Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims )
-  {
-    //t.reshape( dims );
-    t.resize( dims );
-  }
-
-  template <typename T>
-  template <typename U>
-  void Reader<T>::fromString(U &output, const std::string &s)
-  {
-    std::istringstream is(s);
-    
-    is.exceptions(std::ios::failbit);
-    try
-    {
-      is >> std::boolalpha >> output;
-    }
-    catch(std::ios_base::failure &e)
-    {
-      std::cerr << "numerical conversion failure on '" << s << "' ";
-      std::cerr << "(typeid: " << typeid(U).name() << ")" << std::endl;
-      abort();
-    }
-  }
-
-  // serializable base class ///////////////////////////////////////////////////
-  class Serializable
-  {
-  public:
-    template <typename T>
-    static inline void write(Writer<T> &WR,const std::string &s,
-                             const Serializable &obj)
-    {}
-    
-    template <typename T>
-    static inline void read(Reader<T> &RD,const std::string &s,
-                            Serializable &obj)
-    {}
-    
-    friend inline std::ostream & operator<<(std::ostream &os,
-                                            const Serializable &obj)
-    {
-      return os;
-    }
-
-    template <typename T1, typename T2>
-    static inline typename std::enable_if<!EigenIO::is_tensor<T1>::value || !EigenIO::is_tensor<T2>::value, bool>::type
-    CompareMember(const T1 &lhs, const T2 &rhs) {
-      return lhs == rhs;
-    }
-
-    template <typename T1, typename T2>
-    static inline typename std::enable_if<EigenIO::is_tensor<T1>::value && EigenIO::is_tensor<T2>::value, bool>::type
-    CompareMember(const T1 &lhs, const T2 &rhs) {
-      // First check whether dimensions match (Eigen tensor library will assert if they don't match)
-      bool bReturnValue = (T1::NumIndices == T2::NumIndices);
-      for( auto i = 0 ; bReturnValue && i < T1::NumIndices ; i++ )
-          bReturnValue = ( lhs.dimension(i) == rhs.dimension(i) );
-      if( bReturnValue ) {
-        Eigen::Tensor<bool, 0, T1::Options> bResult = (lhs == rhs).all();
-        bReturnValue = bResult(0);
-      }
-      return bReturnValue;
-    }
-
-    template <typename T>
-    static inline typename std::enable_if<EigenIO::is_tensor<T>::value, bool>::type
-    CompareMember(const std::vector<T> &lhs, const std::vector<T> &rhs) {
-      const auto NumElements = lhs.size();
-      bool bResult = ( NumElements == rhs.size() );
-      for( auto i = 0 ; i < NumElements && bResult ; i++ )
-        bResult = CompareMember(lhs[i], rhs[i]);
-      return bResult;
-    }
-
-    template <typename T>
-    static inline typename std::enable_if<!EigenIO::is_tensor<T>::value, void>::type
-    WriteMember(std::ostream &os, const T &object) {
-      os << object;
-    }
-    
-    template <typename T>
-    static inline typename std::enable_if<EigenIO::is_tensor<T>::value, void>::type
-    WriteMember(std::ostream &os, const T &object) {
-      using Index = typename T::Index;
-      const Index NumElements{object.size()};
-      assert( NumElements > 0 );
-      Index count = 1;
-      os << "T<";
-      for( int i = 0; i < T::NumIndices; i++ ) {
-        Index dim = object.dimension(i);
-        count *= dim;
-        if( i )
-          os << ",";
-        os << dim;
-      }
-      assert( count == NumElements && "Number of elements doesn't match tensor dimensions" );
-      os << ">{";
-      const typename T::Scalar * p = object.data();
-      for( Index i = 0; i < count; i++ ) {
-        if( i )
-          os << ",";
-        os << *p++;
-      }
-      os << "}";
-    }
-  };
-
-  // Generic writer interface //////////////////////////////////////////////////
-  template <typename T>
-  inline void push(Writer<T> &w, const std::string &s) {
-    w.push(s);
-  }
-  
-  template <typename T>
-  inline void push(Writer<T> &w, const char *s)
-  {
-    w.push(std::string(s));
-  }
-  
-  template <typename T>
-  inline void pop(Writer<T> &w)
-  {
-    w.pop();
-  }
-  
-  template <typename T, typename U>
-  inline void write(Writer<T> &w, const std::string& s, const U &output)
-  {
-    w.write(s, output);
-  }
-  
-  // Generic reader interface //////////////////////////////////////////////////
-  template <typename T>
-  inline bool push(Reader<T> &r, const std::string &s)
-  {
-    return r.push(s);
-  }
-  
-  template <typename T>
-  inline bool push(Reader<T> &r, const char *s)
-  {
-    return r.push(std::string(s));
-  }
-  
-  template <typename T>
-  inline void pop(Reader<T> &r)
-  {
-    r.pop();
-  }
-  
-  template <typename T, typename U>
-  inline void read(Reader<T> &r, const std::string &s, U &output)
-  {
-    r.read(s, output);
-  }
-}
-
-#endif
--- a/Grid/serialisation/VectorUtils.h
+++ b/Grid/serialisation/VectorUtils.h
@@ -1,479 +0,0 @@
-/*************************************************************************************
- 
- Grid physics library, www.github.com/paboyle/Grid
- 
- Source file: ./Grid/serialisation/VectorUtils.h
- 
- Copyright (C) 2015
- 
- Author: Antonin Portelli <antonin.portelli@me.com>
- Author: Peter Boyle <paboyle@ph.ed.ac.uk>
- Author: paboyle <paboyle@ph.ed.ac.uk>
- 
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- 
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU General Public License for more details.
- 
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- 
- See the full license in the file "LICENSE" in the top level distribution directory
- *************************************************************************************/
-/*  END LEGAL */
-#ifndef GRID_SERIALISATION_VECTORUTILS_H
-#define GRID_SERIALISATION_VECTORUTILS_H
-
-#include <type_traits>
-#include <Grid/tensors/Tensors.h>
-
-namespace Grid {
-  // Pair IO utilities /////////////////////////////////////////////////////////
-  // helper function to parse input in the format "<obj1 obj2>"
-  template <typename T1, typename T2>
-  inline std::istream & operator>>(std::istream &is, std::pair<T1, T2> &buf)
-  {
-    T1 buf1;
-    T2 buf2;
-    char c;
-
-    // Search for "pair" delimiters.
-    do
-    {
-      is.get(c);
-    } while (c != '(' && !is.eof());
-    if (c == '(')
-    {
-      int start = is.tellg();
-      do
-      {
-        is.get(c);
-      } while (c != ')' && !is.eof());
-      if (c == ')')
-      {
-        int end = is.tellg();
-        int psize = end - start - 1;
-
-        // Only read data between pair limiters.
-        is.seekg(start);
-        std::string tmpstr(psize, ' ');
-        is.read(&tmpstr[0], psize);
-        std::istringstream temp(tmpstr);
-        temp >> buf1 >> buf2;
-        buf = std::make_pair(buf1, buf2);
-        is.seekg(end);
-      }
-    }
-    is.peek();
-    return is;
-  }
-  
-  // output to streams for pairs
-  template <class T1, class T2>
-  inline std::ostream & operator<<(std::ostream &os, const std::pair<T1, T2> &p)
-  {
-    os << "(" << p.first << " " << p.second << ")";
-    return os;
-  }
-  
-  // std::vector<std:vector<...>> nested to specified Rank //////////////////////////////////
-  template<typename T, unsigned int Rank>
-  struct NestedStdVector {
-    typedef typename std::vector<typename NestedStdVector<T, Rank - 1>::type> type;
-  };
-  
-  template<typename T>
-  struct NestedStdVector<T,0> {
-    typedef T type;
-  };
-  
-  // Grid scalar tensors to nested std::vectors //////////////////////////////////
-  template <typename T>
-  struct TensorToVec
-  {
-    typedef T type;
-  };
-
-  template <typename T>
-  struct TensorToVec<iScalar<T>>
-  {
-    typedef typename TensorToVec<T>::type type;
-  };
-
-  template <typename T, int N>
-  struct TensorToVec<iVector<T, N>>
-  {
-    typedef typename std::vector<typename TensorToVec<T>::type> type;
-  };
-
-  template <typename T, int N>
-  struct TensorToVec<iMatrix<T, N>>
-  {
-    typedef typename std::vector<std::vector<typename TensorToVec<T>::type>> type;
-  };
-
-  template <typename T>
-  void tensorDim(std::vector<size_t> &dim, const T &t, const bool wipe = true)
-  {
-    if (wipe)
-    {
-      dim.clear();
-    }
-  }
-
-  template <typename T>
-  void tensorDim(std::vector<size_t> &dim, const iScalar<T> &t, const bool wipe = true)
-  {
-    if (wipe)
-    {
-      dim.clear();
-    }
-    tensorDim(dim, t._internal, false);
-  }
-
-  template <typename T, int N>
-  void tensorDim(std::vector<size_t> &dim, const iVector<T, N> &t, const bool wipe = true)
-  {
-    if (wipe)
-    {
-      dim.clear();
-    }
-    dim.push_back(N);
-    tensorDim(dim, t._internal[0], false);
-  }
-
-  template <typename T, int N>
-  void tensorDim(std::vector<size_t> &dim, const iMatrix<T, N> &t, const bool wipe = true)
-  {
-    if (wipe)
-    {
-      dim.clear();
-    }
-    dim.push_back(N);
-    dim.push_back(N);
-    tensorDim(dim, t._internal[0][0], false);
-  }
-
-  template <typename T>
-  typename TensorToVec<T>::type tensorToVec(const T &t)
-  {
-    return t;
-  }
-
-  template <typename T>
-  typename TensorToVec<iScalar<T>>::type tensorToVec(const iScalar<T>& t)
-  {
-    return tensorToVec(t._internal);
-  }
-
-  template <typename T, int N>
-  typename TensorToVec<iVector<T, N>>::type tensorToVec(const iVector<T, N>& t)
-  {
-    typename TensorToVec<iVector<T, N>>::type v;
-
-    v.resize(N);
-    for (unsigned int i = 0; i < N; i++) 
-    {
-      v[i] = tensorToVec(t._internal[i]);
-    }
-
-    return v;
-  }
-
-  template <typename T, int N>
-  typename TensorToVec<iMatrix<T, N>>::type tensorToVec(const iMatrix<T, N>& t)
-  {
-    typename TensorToVec<iMatrix<T, N>>::type v;
-
-    v.resize(N);
-    for (unsigned int i = 0; i < N; i++)
-    {
-      v[i].resize(N);
-      for (unsigned int j = 0; j < N; j++) 
-      {
-        v[i][j] = tensorToVec(t._internal[i][j]);
-      }
-    }
-
-    return v;
-  }
-
-  template <typename T>
-  void vecToTensor(T &t, const typename TensorToVec<T>::type &v)
-  {
-    t = v;
-  }
-
-
-  template <typename T>
-  void vecToTensor(iScalar<T> &t, const typename TensorToVec<iScalar<T>>::type &v)
-  {
-    vecToTensor(t._internal, v);
-  }
-
-  template <typename T, int N>
-  void vecToTensor(iVector<T, N> &t, const typename TensorToVec<iVector<T, N>>::type &v)
-  {
-    for (unsigned int i = 0; i < N; i++) 
-    {
-      vecToTensor(t._internal[i], v[i]);
-    }
-  }
-
-  template <typename T, int N>
-  void vecToTensor(iMatrix<T, N> &t, const typename TensorToVec<iMatrix<T, N>>::type &v)
-  {
-    for (unsigned int i = 0; i < N; i++)
-    for (unsigned int j = 0; j < N; j++)
-    {
-      vecToTensor(t._internal[i][j], v[i][j]);
-    }
-  }
-
-  // Vector element trait //////////////////////////////////////////////////////  
-  template <typename T>
-  struct element
-  {
-    typedef T type;
-    static constexpr bool is_number = false;
-  };
-  
-  template <typename T>
-  struct element<std::vector<T>>
-  {
-    typedef typename element<T>::type type;
-    static constexpr bool is_number = std::is_arithmetic<T>::value
-                                      or is_complex<T>::value
-                                      or element<T>::is_number;
-  };
-  
-  // Vector flattening utility class ////////////////////////////////////////////
-  // Class to flatten a multidimensional std::vector
-  template <typename V>
-  class Flatten
-  {
-  public:
-    typedef typename element<V>::type Element;
-  public:
-    explicit                     Flatten(const V &vector);
-    const V &                    getVector(void);
-    const std::vector<Element> & getFlatVector(void);
-    const std::vector<size_t>  & getDim(void);
-  private:
-    void accumulate(const Element &e);
-    template <typename W>
-    void accumulate(const W &v);
-    void accumulateDim(const Element &e);
-    template <typename W>
-    void accumulateDim(const W &v);
-  private:
-    const V              &vector_;
-    std::vector<Element> flatVector_;
-    std::vector<size_t>  dim_;
-  };
-  
-  // Class to reconstruct a multidimensional std::vector
-  template <typename V>
-  class Reconstruct
-  {
-  public:
-    typedef typename element<V>::type Element;
-  public:
-    Reconstruct(const std::vector<Element> &flatVector,
-                const std::vector<size_t> &dim);
-    const V &                    getVector(void);
-    const std::vector<Element> & getFlatVector(void);
-    const std::vector<size_t>  & getDim(void);
-  private:
-    void fill(std::vector<Element> &v);
-    template <typename W>
-    void fill(W &v);
-    void resize(std::vector<Element> &v, const unsigned int dim);
-    template <typename W>
-    void resize(W &v, const unsigned int dim);
-  private:
-    V                          vector_;
-    const std::vector<Element> &flatVector_;
-    std::vector<size_t>        dim_;
-    size_t                     ind_{0};
-    unsigned int               dimInd_{0};
-  };
-
-  // Flatten class template implementation
-  template <typename V>
-  void Flatten<V>::accumulate(const Element &e)
-  {
-    flatVector_.push_back(e);
-  }
-  
-  template <typename V>
-  template <typename W>
-  void Flatten<V>::accumulate(const W &v)
-  {
-    for (auto &e: v)
-    {
-      accumulate(e);
-    }
-  }
-  
-  template <typename V>
-  void Flatten<V>::accumulateDim(const Element &e) {};
-  
-  template <typename V>
-  template <typename W>
-  void Flatten<V>::accumulateDim(const W &v)
-  {
-    dim_.push_back(v.size());
-    accumulateDim(v[0]);
-  }
-  
-  template <typename V>
-  Flatten<V>::Flatten(const V &vector)
-  : vector_(vector)
-  {
-    accumulate(vector_);
-    accumulateDim(vector_);
-  }
-  
-  template <typename V>
-  const V & Flatten<V>::getVector(void)
-  {
-    return vector_;
-  }
-  
-  template <typename V>
-  const std::vector<typename Flatten<V>::Element> &
-  Flatten<V>::getFlatVector(void)
-  {
-    return flatVector_;
-  }
-  
-  template <typename V>
-  const std::vector<size_t> & Flatten<V>::getDim(void)
-  {
-    return dim_;
-  }
-  
-  // Reconstruct class template implementation
-  template <typename V>
-  void Reconstruct<V>::fill(std::vector<Element> &v)
-  {
-    for (auto &e: v)
-    {
-      e = flatVector_[ind_++];
-    }
-  }
-  
-  template <typename V>
-  template <typename W>
-  void Reconstruct<V>::fill(W &v)
-  {
-    for (auto &e: v)
-    {
-      fill(e);
-    }
-  }
-  
-  template <typename V>
-  void Reconstruct<V>::resize(std::vector<Element> &v, const unsigned int dim)
-  {
-    v.resize(dim_[dim]);
-  }
-  
-  template <typename V>
-  template <typename W>
-  void Reconstruct<V>::resize(W &v, const unsigned int dim)
-  {
-    v.resize(dim_[dim]);
-    for (auto &e: v)
-    {
-      resize(e, dim + 1);
-    }
-  }
-  
-  template <typename V>
-  Reconstruct<V>::Reconstruct(const std::vector<Element> &flatVector,
-                              const std::vector<size_t> &dim)
-  : flatVector_(flatVector)
-  , dim_(dim)
-  {
-    resize(vector_, 0);
-    fill(vector_);
-  }
-  
-  template <typename V>
-  const V & Reconstruct<V>::getVector(void)
-  {
-    return vector_;
-  }
-  
-  template <typename V>
-  const std::vector<typename Reconstruct<V>::Element> &
-  Reconstruct<V>::getFlatVector(void)
-  {
-    return flatVector_;
-  }
-  
-  template <typename V>
-  const std::vector<size_t> & Reconstruct<V>::getDim(void)
-  {
-    return dim_;
-  }
-
-  // Vector IO utilities ///////////////////////////////////////////////////////
-  // helper function to read space-separated values
-  template <typename T>
-  std::vector<T> strToVec(const std::string s)
-  {
-    std::istringstream sstr(s);
-    T                  buf;
-    std::vector<T>     v;
-    
-    while(!sstr.eof())
-    {
-      sstr >> buf;
-      v.push_back(buf);
-    }
-    
-    return v;
-  }
-  
-  // output to streams for vectors
-  template < class T >
-  inline std::ostream & operator<<(std::ostream &os, const std::vector<T> &v)
-  {
-    os << "[";
-    for (unsigned int i = 0; i < v.size(); ++i)
-    {
-      os << v[i];
-      if (i < v.size() - 1)
-      {
-        os << " ";
-      }
-    }
-    os << "]";
-    
-    return os;
-  }
-}
-
-// helper function to read space-separated values
-template <typename T>
-std::string vecToStr(const std::vector<T> &v)
-{
-  using Grid::operator<<;
-  
-  std::ostringstream sstr;
-
-  sstr << v;
-
-  return sstr.str();
-}
-
-#endif
--- a/Grid/serialisation/XmlIO.cc
+++ b/Grid/serialisation/XmlIO.cc
@@ -1,188 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/serialisation/XmlIO.cc
-
-    Copyright (C) 2015
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: paboyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/GridCore.h>
-
-using namespace Grid;
-
-void Grid::xmlCheckParse(const pugi::xml_parse_result &result, const std::string name)
-{
-  if (!result) 
-  {
-    std::cerr << "XML parsing error for " << name << std::endl;
-    std::cerr << "XML error description: " << result.description() << std::endl;
-    std::cerr << "XML error offset     : " << result.offset << std::endl;
-    abort();
-  }
-}
-
-// Writer implementation ///////////////////////////////////////////////////////
-XmlWriter::XmlWriter(const std::string &fileName, std::string toplev) : fileName_(fileName)
-{
-  if ( toplev == std::string("") ) {
-    node_=doc_;
-  } else { 
-    node_=doc_.append_child();
-    node_.set_name(toplev.c_str());
-  }
-}
-
-XmlWriter::~XmlWriter(void)
-{
-  if ( fileName_ != std::string("") ) { 
-    doc_.save_file(fileName_.c_str(), indent_.c_str());
-  }
-}
-
-void XmlWriter::push(const std::string &s)
-{
-  node_ = node_.append_child(s.c_str());
-}
-
-void XmlWriter::pushXmlString(const std::string &s)
-{
-  pugi::xml_document doc;
-  auto               result = doc.load_buffer(s.c_str(), s.size());
-
-  xmlCheckParse(result, "fragment\n'" + s +"'");
-  for (pugi::xml_node child = doc.first_child(); child; child = child.next_sibling())
-  {
-      node_ = node_.append_copy(child);
-  }
-  pop();
-}
-
-void XmlWriter::pop(void)
-{
-  node_ = node_.parent();
-}
-
-std::string XmlWriter::docString(void)
-{
-  std::ostringstream oss; 
-  doc_.save(oss, indent_.c_str());
-  return oss.str();
-}
-
-std::string XmlWriter::string(void)
-{
-  std::ostringstream oss; 
-  doc_.save(oss, indent_.c_str(), pugi::format_default | pugi::format_no_declaration);
-  return oss.str();
-}
-
-// Reader implementation ///////////////////////////////////////////////////////
-XmlReader::XmlReader(const std::string &s,  const bool isBuffer, 
-                     std::string toplev) 
-{
-  pugi::xml_parse_result result;
-  
-  if (isBuffer)
-  {
-    fileName_ = "<string>";
-    result    = doc_.load_string(s.c_str());
-    xmlCheckParse(result, "string\n'" + s + "'");
-  }
-  else
-  {
-    fileName_ = s;
-    result    = doc_.load_file(s.c_str());
-    xmlCheckParse(result, "file '" + fileName_ + "'");
-  }
-  if ( toplev == std::string("") ) {
-  node_ = doc_;
-  } else { 
-    node_ = doc_.child(toplev.c_str());
-  }
-}
-
-#define XML_SAFE_NODE(expr)\
-if (expr)\
-{\
-  node_ = expr;\
-  return true;\
-}\
-else\
-{\
-  return false;\
-}
-
-bool XmlReader::push(const std::string &s)
-{
-  if (s.empty())
-  {
-    XML_SAFE_NODE(node_.first_child());
-  }
-  else
-  {
-    XML_SAFE_NODE(node_.child(s.c_str()));
-  }
-}
-
-void XmlReader::pop(void)
-{
-  node_ = node_.parent();
-}
-
-bool XmlReader::nextElement(const std::string &s)
-{
-  if (s.empty())
-  {
-    XML_SAFE_NODE(node_.next_sibling());
-  }
-  else
-  {
-    XML_SAFE_NODE(node_.next_sibling(s.c_str()));
-  }
-}
-
-void XmlReader::readCurrentSubtree(std::string &s)
-{
-  std::ostringstream oss; 
-  pugi::xml_document doc;
-
-  doc.append_copy(node_);
-  doc.save(oss, indent_.c_str(), pugi::format_default | pugi::format_no_declaration);
-  s = oss.str();
-}
-
-template <>
-void XmlReader::readDefault(const std::string &s, std::string &output)
-{
-  if (node_.child(s.c_str()))
-  {
-    output = node_.child(s.c_str()).first_child().value();
-  }
-  else
-  {
-    std::cout << GridLogWarning << "XML: cannot open node '" << s << "'";
-    std::cout << std::endl;
-
-    output = ""; 
-  }
-}
--- a/Grid/serialisation/XmlIO.h
+++ b/Grid/serialisation/XmlIO.h
@@ -1,246 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/serialisation/XmlIO.h
-
-    Copyright (C) 2015
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: paboyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#ifndef GRID_SERIALISATION_XML_READER_H
-#define GRID_SERIALISATION_XML_READER_H
-
-#include <iostream>
-#include <iomanip>
-#include <fstream>
-#include <sstream>
-#include <math.h>
-#include <vector>
-#include <cassert>
-
-#include <Grid/pugixml/pugixml.h>
-#include <Grid/GridCore.h>
-
-namespace Grid
-{
-  void xmlCheckParse(const pugi::xml_parse_result &result, const std::string name);
-  
-  class XmlWriter: public Writer<XmlWriter>
-  {    
-  public:
-    XmlWriter(const std::string &fileName, std::string toplev = std::string("grid") );
-    virtual ~XmlWriter(void);
-    void push(const std::string &s);
-    void pushXmlString(const std::string &s);
-    void pop(void);
-    template <typename U>
-    void writeDefault(const std::string &s, const U &x);
-    template <typename U>
-    void writeDefault(const std::string &s, const std::vector<U> &x);
-    template <typename U>
-    void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
-    std::string docString(void);
-    std::string string(void);
-  private:
-    const std::string  indent_{"  "};
-    pugi::xml_document doc_;
-    pugi::xml_node     node_;
-    std::string        fileName_;
-  };
-  
-  class XmlReader: public Reader<XmlReader>
-  {
-  public:
-    XmlReader(const std::string &fileName, const bool isBuffer = false, 
-              std::string toplev = std::string("grid") );
-    virtual ~XmlReader(void) = default;
-    bool push(const std::string &s = "");
-    void pop(void);
-    bool nextElement(const std::string &s = "");
-    template <typename U>
-    void readDefault(const std::string &s, U &output);
-    template <typename U>
-    void readDefault(const std::string &s, std::vector<U> &output);
-    template <typename U>
-    void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
-    void readCurrentSubtree(std::string &s);
-  private:
-    void checkParse(const pugi::xml_parse_result &result, const std::string name);
-  private:
-    const std::string  indent_{"  "};
-    pugi::xml_document doc_;
-    pugi::xml_node     node_;
-    std::string        fileName_;
-  };
-
-  template <>
-  struct isReader< XmlReader > {
-    static const bool value = true;
-  };
-
-  template <>
-  struct isWriter<XmlWriter > {
-    static const bool value = true;
-  };
-  
-  // Writer template implementation ////////////////////////////////////////////
-  template <typename U>
-  void XmlWriter::writeDefault(const std::string &s, const U &x)
-  {
-    std::ostringstream os;
-    
-    if (getPrecision())
-    {
-      os.precision(getPrecision());
-    }
-    if (isScientific())
-    {
-      os << std::scientific;
-    }
-    os << std::boolalpha << x;
-    pugi::xml_node leaf = node_.append_child(s.c_str());
-    leaf.append_child(pugi::node_pcdata).set_value(os.str().c_str());
-  }
-  
-  template <typename U>
-  void XmlWriter::writeDefault(const std::string &s, const std::vector<U> &x)
-  {
-    push(s);
-    for( auto &u : x )
-    {
-      write("elem", u);
-    }
-    pop();
-  }
-
-  template <typename U>
-  void XmlWriter::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements)
-  {
-    push(s);
-    size_t count = 1;
-    const int Rank = static_cast<int>( Dimensions.size() );
-    write("rank", Rank );
-    std::vector<size_t> MyIndex( Rank );
-    for( auto d : Dimensions ) {
-      write("dim", d);
-      count *= d;
-    }
-    assert( count == NumElements && "XmlIO : element count doesn't match dimensions" );
-    static const char sName[] = "tensor";
-    for( int i = 0 ; i < Rank ; i++ ) {
-      MyIndex[i] = 0;
-      push(sName);
-    }
-    while (NumElements--) {
-      write("elem", *pDataRowMajor++);
-      int i;
-      for( i = Rank - 1 ; i != -1 && ++MyIndex[i] == Dimensions[i] ; i-- )
-        MyIndex[i] = 0;
-      int Rollover = Rank - 1 - i;
-      for( i = 0 ; i < Rollover ; i++ )
-        pop();
-      for( i = 0 ; NumElements && i < Rollover ; i++ )
-        push(sName);
-    }
-    pop();
-  }
-
-  // Reader template implementation ////////////////////////////////////////////
-  template <typename U>
-  void XmlReader::readDefault(const std::string &s, U &output)
-  {
-    std::string buf;
-    
-    readDefault(s, buf);
-    fromString(output, buf);
-  }
-  
-  template <>
-  void XmlReader::readDefault(const std::string &s, std::string &output);
-  
-  template <typename U>
-  void XmlReader::readDefault(const std::string &s, std::vector<U> &output)
-  {
-    if (!push(s))
-    {
-      std::cout << GridLogWarning << "XML: cannot open node '" << s << "'";
-      std::cout << std::endl;
-    } else {
-      for(unsigned int i = 0; node_.child("elem"); )
-      {
-        output.resize(i + 1);
-        read("elem", output[i++]);
-        node_.child("elem").set_name("elem-done");
-      }
-      pop();
-    }
-  }
-
-  template <typename U>
-  void XmlReader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
-  {
-    if (!push(s))
-    {
-      std::cout << GridLogWarning << "XML: cannot open node '" << s << "'";
-      std::cout << std::endl;
-    } else {
-      static const char sName[] = "tensor";
-      static const char sNameDone[] = "tensor-done";
-      int Rank;
-      read("rank", Rank);
-      dim.resize( Rank );
-      size_t NumElements = 1;
-      for( auto &d : dim )
-      {
-        read("dim", d);
-        node_.child("dim").set_name("dim-done");
-        NumElements *= d;
-      }
-      buf.resize( NumElements );
-      std::vector<size_t> MyIndex( Rank );
-      for( int i = 0 ; i < Rank ; i++ ) {
-        MyIndex[i] = 0;
-        push(sName);
-      }
-
-      for( auto &x : buf )
-      {
-        NumElements--;
-        read("elem", x);
-        node_.child("elem").set_name("elem-done");
-        int i;
-        for( i = Rank - 1 ; i != -1 && ++MyIndex[i] == dim[i] ; i-- )
-          MyIndex[i] = 0;
-        int Rollover = Rank - 1 - i;
-        for( i = 0 ; i < Rollover ; i++ ) {
-          node_.set_name(sNameDone);
-          pop();
-        }
-        for( i = 0 ; NumElements && i < Rollover ; i++ )
-          push(sName);
-      }
-      pop();
-    }
-  }
-}
-#endif
--- a/Grid/util/Profiling.h
+++ b/Grid/util/Profiling.h
@@ -1,72 +0,0 @@
-   /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/util/Profiling.h
-
-    Copyright (C) 2018
-
-    Author: Guido Cossu <guido.cossu@ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-
-#ifndef GRID_PERF_PROFILING_H
-#define GRID_PERF_PROFILING_H
-
-#include <sstream>
-#include <iostream>
-#include <functional>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/wait.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <signal.h>
-
-struct System
-{
-    static void profile(const std::string& name,std::function<void()> body) {
-        std::string filename = name.find(".data") == std::string::npos ? (name + ".data") : name;
-
-        // Launch profiler
-        pid_t pid;
-        std::stringstream s;
-        s << getpid();
-        pid = fork();
-        if (pid == 0) {
-            auto fd=open("/dev/null",O_RDWR);
-            dup2(fd,1);
-            dup2(fd,2);
-            exit(execl("/usr/bin/perf","perf","record","-o",filename.c_str(),"-p",s.str().c_str(),nullptr));
-        }
-
-        // Run body
-        body();
-
-        // Kill profiler  
-        kill(pid,SIGINT);
-        waitpid(pid,nullptr,0);
-    }
-
-    static void profile(std::function<void()> body) {
-        profile("perf.data",body);
-    }
-};
-
-#endif // GRID_PERF_PROFILING_H
--- a/Grid/util/Sha.h
+++ b/Grid/util/Sha.h
@@ -1,99 +0,0 @@
-   /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/util/Sha.h
-
-    Copyright (C) 2018
-
-    Author: Peter Boyle
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-extern "C" {
-#include <openssl/sha.h>
-}
-#ifdef USE_IPP
-#include "ipp.h"
-#endif
-
-#pragma once
-
-class GridChecksum
-{
-public:
-  static inline uint32_t crc32(const void *data, size_t bytes)
-  {
-    return ::crc32(0L,(unsigned char *)data,bytes);
-  }
-
-#ifdef USE_IPP
-  static inline uint32_t crc32c(const void* data, size_t bytes)
-  {
-      uint32_t crc32c = ~(uint32_t)0;
-      ippsCRC32C_8u(reinterpret_cast<const unsigned char *>(data), bytes, &crc32c);
-      ippsSwapBytes_32u_I(&crc32c, 1);
-  
-      return ~crc32c;
-  }
-#endif
-
-  template <typename T>
-  static inline std::string sha256_string(const std::vector<T> &hash)
-  {
-    std::stringstream sha;
-    std::string       s;
-
-    for(unsigned int i = 0; i < hash.size(); i++) 
-    { 
-        sha << std::hex << static_cast<unsigned int>(hash[i]);
-    }
-    s = sha.str();
-
-    return s;
-  }
-  static inline std::vector<unsigned char> sha256(const void *data,size_t bytes)
-  {
-    std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH);
-    SHA256_CTX sha256;
-    SHA256_Init  (&sha256);
-    SHA256_Update(&sha256, data,bytes);
-    SHA256_Final (&hash[0], &sha256);
-    return hash;
-  }
-  static inline std::vector<int> sha256_seeds(const std::string &s)
-  {
-    std::vector<int> seeds;
-    std::vector<unsigned char> uchars = sha256((void *)s.c_str(),s.size());
-    for(int i=0;i<uchars.size();i++) seeds.push_back(uchars[i]);
-    return seeds;
-  }
-};
-
-/*
-int main(int argc,char **argv)
-{
-  std::string s("The quick brown fox jumps over the lazy dog");
-  auto csum = GridChecksum::sha256_seeds(s);
-  std::cout << "SHA256 sum is 0x";
-  for(int i=0;i<csum.size;i++) { 
-    std::cout << std::hex << csum[i];
-  }
-  std::cout << std::endl;
-}
-*/
--- a/Grid/util/version.cc
+++ b/Grid/util/version.cc
@@ -1,12 +0,0 @@
-#include <iostream>
-#include "Version.h"
-namespace Grid {
-  void printHash(){
-#ifdef GITHASH
-    std::cout << "Current Grid git commit hash=" << GITHASH << std::endl;
-#else
-    std::cout << "Current Grid git commit hash is undefined. Check makefile." << std::endl;
-#endif
-#undef GITHASH
-}
-}
--- a/HMC/Makefile.am
+++ b/HMC/Makefile.am
@@ -1,6 +0,0 @@
-SUBDIRS = . 
-
-include Make.inc
-
-
-
--- a/HMC/Mobius2p1f.cc
+++ b/HMC/Mobius2p1f.cc
@@ -1,198 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./tests/Test_hmc_EODWFRatio.cc
-
-Copyright (C) 2015-2016
-
-Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
-Author: Guido Cossu <guido.cossu@ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Grid/Grid.h>
-
-int main(int argc, char **argv) {
-  using namespace Grid;
-  using namespace Grid::QCD;
-
-  Grid_init(&argc, &argv);
-  int threads = GridThread::GetThreads();
-  // here make a routine to print all the relevant information on the run
-  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
-
-   // Typedefs to simplify notation
-  typedef WilsonImplR FermionImplPolicy;
-  typedef MobiusFermionR FermionAction;
-  typedef typename FermionAction::FermionField FermionField;
-
-  typedef Grid::XmlReader       Serialiser;
-  
-  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
-  IntegratorParameters MD;
-  //  typedef GenericHMCRunner<LeapFrog> HMCWrapper; 
-  //  MD.name    = std::string("Leap Frog");
-  //  typedef GenericHMCRunner<ForceGradient> HMCWrapper; 
-  //  MD.name    = std::string("Force Gradient");
-  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper; 
-  MD.name    = std::string("MinimumNorm2");
-  MD.MDsteps = 20;
-  MD.trajL   = 1.0;
-  
-  HMCparameters HMCparams;
-  HMCparams.StartTrajectory  = 0;
-  HMCparams.Trajectories     = 200;
-  HMCparams.NoMetropolisUntil=  20;
-  // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
-  HMCparams.StartingType     =std::string("ColdStart");
-  HMCparams.MD = MD;
-  HMCWrapper TheHMC(HMCparams);
-
-  // Grid from the command line arguments --grid and --mpi
-  TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
-  
-  CheckpointerParameters CPparams;
-  CPparams.config_prefix = "ckpoint_EODWF_lat";
-  CPparams.rng_prefix    = "ckpoint_EODWF_rng";
-  CPparams.saveInterval  = 10;
-  CPparams.format        = "IEEE64BIG";
-  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
-
-  RNGModuleParameters RNGpar;
-  RNGpar.serial_seeds = "1 2 3 4 5";
-  RNGpar.parallel_seeds = "6 7 8 9 10";
-  TheHMC.Resources.SetRNGSeeds(RNGpar);
-
-  // Construct observables
-  // here there is too much indirection 
-  typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
-  TheHMC.Resources.AddObservable<PlaqObs>();
-  //////////////////////////////////////////////
-
-  const int Ls      = 16;
-  Real beta         = 2.13;
-  Real light_mass   = 0.01;
-  Real strange_mass = 0.04;
-  Real pv_mass      = 1.0;
-  RealD M5  = 1.8;
-  RealD b   = 1.0; // Scale factor two
-  RealD c   = 0.0;
-
-  OneFlavourRationalParams OFRp;
-  OFRp.lo       = 1.0e-2;
-  OFRp.hi       = 64;
-  OFRp.MaxIter  = 10000;
-  OFRp.tolerance= 1.0e-10;
-  OFRp.degree   = 14;
-  OFRp.precision= 40;
-
-  std::vector<Real> hasenbusch({ 0.1 });
-
-  auto GridPtr   = TheHMC.Resources.GetCartesian();
-  auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
-  auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
-  auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
-
-  IwasakiGaugeActionR GaugeAction(beta);
-
-  // temporarily need a gauge field
-  LatticeGaugeField U(GridPtr);
-
-  // These lines are unecessary if BC are all periodic
-  std::vector<Complex> boundary = {1,1,1,-1};
-  FermionAction::ImplParams Params(boundary);
-  
-  double StoppingCondition = 1e-10;
-  double MaxCGIterations = 30000;
-  ConjugateGradient<FermionField>  CG(StoppingCondition,MaxCGIterations);
-
-  ////////////////////////////////////
-  // Collect actions
-  ////////////////////////////////////
-  ActionLevel<HMCWrapper::Field> Level1(1);
-  ActionLevel<HMCWrapper::Field> Level2(4);
-
-  ////////////////////////////////////
-  // Strange action
-  ////////////////////////////////////
-
-  //  FermionAction StrangeOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_mass,M5,b,c, Params);
-  //  DomainWallEOFAFermionR Strange_Op_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5);
-  //  DomainWallEOFAFermionR Strange_Op_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5);
-  //  ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L,Strange_Op_R,CG,ofp, false);
-
-  FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
-  FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass,  M5,b,c, Params);
-
-  //  OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp);
-  OneFlavourRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp);
-  //  TwoFlavourRationalTesterPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion1F(StrangeOp,OFRp);
-  //  TwoFlavourPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion2F(StrangeOp,CG,CG);
-  //  Level1.push_back(&StrangePseudoFermion2F);
-  //  Level1.push_back(&StrangePseudoFermion);
-
-  ////////////////////////////////////
-  // up down action
-  ////////////////////////////////////
-  std::vector<Real> light_den;
-  std::vector<Real> light_num;
-
-  int n_hasenbusch = hasenbusch.size();
-  light_den.push_back(light_mass);
-  for(int h=0;h<n_hasenbusch;h++){
-    light_den.push_back(hasenbusch[h]);
-    light_num.push_back(hasenbusch[h]);
-  }
-  light_num.push_back(pv_mass);
-
-  std::vector<FermionAction *> Numerators;
-  std::vector<FermionAction *> Denominators;
-  std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
-
-  for(int h=0;h<n_hasenbusch+1;h++){
-    std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl;
-    Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
-    Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
-    Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG));
-  }
-
-  for(int h=0;h<n_hasenbusch+1;h++){
-    Level1.push_back(Quotients[h]);
-  }
-
-  /////////////////////////////////////////////////////////////
-  // Gauge action
-  /////////////////////////////////////////////////////////////
-  Level2.push_back(&GaugeAction);
-  TheHMC.TheAction.push_back(Level1);
-  TheHMC.TheAction.push_back(Level2);
-  std::cout << GridLogMessage << " Action complete "<< std::endl;
-
-  /////////////////////////////////////////////////////////////
-  // HMC parameters are serialisable
-
-  std::cout << GridLogMessage << " Running the HMC "<< std::endl;
-  TheHMC.Run();  // no smearing
-
-  Grid_finalize();
-} // main
-
-
-
--- a/HMC/Mobius2p1fEOFA.cc
+++ b/HMC/Mobius2p1fEOFA.cc
@@ -1,452 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: 
-
-Copyright (C) 2015-2016
-
-Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
-Author: Guido Cossu
-Author: David Murphy
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Grid/Grid.h>
-
-#ifdef GRID_DEFAULT_PRECISION_DOUBLE
-#define MIXED_PRECISION
-#endif
-
-namespace Grid{ 
-  namespace QCD{
-
-  /*
-   * Need a plan for gauge field update for mixed precision in HMC                      (2x speed up)
-   *    -- Store the single prec action operator.
-   *    -- Clone the gauge field from the operator function argument.
-   *    -- Build the mixed precision operator dynamically from the passed operator and single prec clone.
-   */
-
-  template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF> 
-  class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
-  public:
-    typedef typename FermionOperatorD::FermionField FieldD;
-    typedef typename FermionOperatorF::FermionField FieldF;
-
-    RealD   Tolerance;
-    RealD   InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
-    Integer MaxInnerIterations;
-    Integer MaxOuterIterations;
-    GridBase* SinglePrecGrid4; //Grid for single-precision fields
-    GridBase* SinglePrecGrid5; //Grid for single-precision fields
-    RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
-
-    FermionOperatorF &FermOpF;
-    FermionOperatorD &FermOpD;;
-    SchurOperatorF &LinOpF;
-    SchurOperatorD &LinOpD;
-
-    Integer TotalInnerIterations; //Number of inner CG iterations
-    Integer TotalOuterIterations; //Number of restarts
-    Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
-
-    MixedPrecisionConjugateGradientOperatorFunction(RealD tol, 
-						    Integer maxinnerit, 
-						    Integer maxouterit, 
-						    GridBase* _sp_grid4, 
-						    GridBase* _sp_grid5, 
-						    FermionOperatorF &_FermOpF,
-						    FermionOperatorD &_FermOpD,
-						    SchurOperatorF   &_LinOpF,
-						    SchurOperatorD   &_LinOpD): 
-      LinOpF(_LinOpF),
-      LinOpD(_LinOpD),
-      FermOpF(_FermOpF),
-      FermOpD(_FermOpD),
-      Tolerance(tol), 
-      InnerTolerance(tol), 
-      MaxInnerIterations(maxinnerit), 
-      MaxOuterIterations(maxouterit), 
-      SinglePrecGrid4(_sp_grid4),
-      SinglePrecGrid5(_sp_grid5),
-      OuterLoopNormMult(100.) 
-    { 
-      /* Debugging instances of objects; references are stored
-      std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl;
-      std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl;
-      std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl;
-      std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl;
-      */
-    };
-
-    void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
-
-      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
-
-      SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
-      
-      //      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl;
-      //      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpD " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl;
-      // Assumption made in code to extract gauge field
-      // We could avoid storing LinopD reference alltogether ?
-      assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
-
-      ////////////////////////////////////////////////////////////////////////////////////
-      // Must snarf a single precision copy of the gauge field in Linop_d argument
-      ////////////////////////////////////////////////////////////////////////////////////
-      typedef typename FermionOperatorF::GaugeField GaugeFieldF;
-      typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF;
-      typedef typename FermionOperatorD::GaugeField GaugeFieldD;
-      typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD;
-
-      GridBase * GridPtrF = SinglePrecGrid4;
-      GridBase * GridPtrD = FermOpD.Umu._grid;
-      GaugeFieldF     U_f  (GridPtrF);
-      GaugeLinkFieldF Umu_f(GridPtrF);
-      //      std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d
-      //      std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d
-
-      ////////////////////////////////////////////////////////////////////////////////////
-      // Moving this to a Clone method of fermion operator would allow to duplicate the 
-      // physics parameters and decrease gauge field copies
-      ////////////////////////////////////////////////////////////////////////////////////
-      GaugeLinkFieldD Umu_d(GridPtrD);
-      for(int mu=0;mu<Nd*2;mu++){ 
-	Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu);
-	precisionChange(Umu_f,Umu_d);
-	PokeIndex<LorentzIndex>(FermOpF.Umu, Umu_f, mu);
-      }
-      pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
-      pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
-
-      ////////////////////////////////////////////////////////////////////////////////////
-      // Could test to make sure that LinOpF and LinOpD agree to single prec?
-      ////////////////////////////////////////////////////////////////////////////////////
-      /*
-      GridBase *Fgrid = psi._grid;
-      FieldD tmp2(Fgrid);
-      FieldD tmp1(Fgrid);
-      LinOpU.Op(src,tmp1);
-      LinOpD.Op(src,tmp2);
-      std::cout << " Double gauge field "<< norm2(FermOpD.Umu)<<std::endl;
-      std::cout << " Single gauge field "<< norm2(FermOpF.Umu)<<std::endl;
-      std::cout << " Test of operators "<<norm2(tmp1)<<std::endl;
-      std::cout << " Test of operators "<<norm2(tmp2)<<std::endl;
-      tmp1=tmp1-tmp2;
-      std::cout << " Test of operators diff "<<norm2(tmp1)<<std::endl;
-      */
-
-      ////////////////////////////////////////////////////////////////////////////////////
-      // Make a mixed precision conjugate gradient
-      ////////////////////////////////////////////////////////////////////////////////////
-      MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
-      std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
-      MPCG(src,psi);
-    }
-  };
-}};
-
-int main(int argc, char **argv) {
-  using namespace Grid;
-  using namespace Grid::QCD;
-
-  Grid_init(&argc, &argv);
-  int threads = GridThread::GetThreads();
-  // here make a routine to print all the relevant information on the run
-  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
-
-   // Typedefs to simplify notation
-  typedef WilsonImplR FermionImplPolicy;
-  typedef MobiusFermionR FermionAction;
-  typedef MobiusFermionF FermionActionF;
-  typedef MobiusEOFAFermionR FermionEOFAAction;
-  typedef MobiusEOFAFermionF FermionEOFAActionF;
-  typedef typename FermionAction::FermionField FermionField;
-  typedef typename FermionActionF::FermionField FermionFieldF;
-
-  typedef Grid::XmlReader       Serialiser;
-  
-  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
-  IntegratorParameters MD;
-  //  typedef GenericHMCRunner<LeapFrog> HMCWrapper; 
-  //  MD.name    = std::string("Leap Frog");
-  typedef GenericHMCRunner<ForceGradient> HMCWrapper; 
-  MD.name    = std::string("Force Gradient");
-  //  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper; 
-  //  MD.name    = std::string("MinimumNorm2");
-  MD.MDsteps = 6;
-  MD.trajL   = 1.0;
-  
-  HMCparameters HMCparams;
-  HMCparams.StartTrajectory  = 590;
-  HMCparams.Trajectories     = 1000;
-  HMCparams.NoMetropolisUntil=  0;
-  //  "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
-  //  HMCparams.StartingType     =std::string("ColdStart");
-  HMCparams.StartingType     =std::string("CheckpointStart");
-  HMCparams.MD = MD;
-  HMCWrapper TheHMC(HMCparams);
-
-  // Grid from the command line arguments --grid and --mpi
-  TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
-  
-  CheckpointerParameters CPparams;
-  CPparams.config_prefix = "ckpoint_EODWF_lat";
-  CPparams.rng_prefix    = "ckpoint_EODWF_rng";
-  CPparams.saveInterval  = 10;
-  CPparams.format        = "IEEE64BIG";
-  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
-
-  RNGModuleParameters RNGpar;
-  RNGpar.serial_seeds = "1 2 3 4 5";
-  RNGpar.parallel_seeds = "6 7 8 9 10";
-  TheHMC.Resources.SetRNGSeeds(RNGpar);
-
-  // Construct observables
-  // here there is too much indirection 
-  typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
-  TheHMC.Resources.AddObservable<PlaqObs>();
-  //////////////////////////////////////////////
-
-  const int Ls      = 16;
-  Real beta         = 2.13;
-  Real light_mass   = 0.01;
-  Real strange_mass = 0.04;
-  Real pv_mass      = 1.0;
-  RealD M5  = 1.8;
-  RealD b   = 1.0; 
-  RealD c   = 0.0;
-
-  std::vector<Real> hasenbusch({ 0.1, 0.3, 0.6 });
-
-  auto GridPtr   = TheHMC.Resources.GetCartesian();
-  auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
-  auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
-  auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
-
-  std::vector<int> latt  = GridDefaultLatt();
-  std::vector<int> mpi   = GridDefaultMpi();
-  std::vector<int> simdF = GridDefaultSimd(Nd,vComplexF::Nsimd());
-  std::vector<int> simdD = GridDefaultSimd(Nd,vComplexD::Nsimd());
-  auto GridPtrF   = SpaceTimeGrid::makeFourDimGrid(latt,simdF,mpi);
-  auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
-  auto FGridF     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF);
-  auto FrbGridF   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF);
-
-  IwasakiGaugeActionR GaugeAction(beta);
-
-  // temporarily need a gauge field
-  LatticeGaugeField U(GridPtr);
-  LatticeGaugeFieldF UF(GridPtrF);
-
-  // These lines are unecessary if BC are all periodic
-  std::vector<Complex> boundary = {1,1,1,-1};
-  FermionAction::ImplParams Params(boundary);
-  FermionActionF::ImplParams ParamsF(boundary);
-  
-  double ActionStoppingCondition     = 1e-10;
-  double DerivativeStoppingCondition = 1e-6;
-  double MaxCGIterations = 30000;
-
-  ////////////////////////////////////
-  // Collect actions
-  ////////////////////////////////////
-  ActionLevel<HMCWrapper::Field> Level1(1);
-  ActionLevel<HMCWrapper::Field> Level2(8);
-
-  ////////////////////////////////////
-  // Strange action
-  ////////////////////////////////////
-  typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
-  typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
-  typedef SchurDiagMooeeOperator<FermionEOFAActionF,FermionFieldF> LinearOperatorEOFAF;
-  typedef SchurDiagMooeeOperator<FermionEOFAAction ,FermionField > LinearOperatorEOFAD;
-
-  typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG;
-  typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusEOFAFermionD,MobiusEOFAFermionF,LinearOperatorEOFAD,LinearOperatorEOFAF> MxPCG_EOFA;
-
-  // DJM: setup for EOFA ratio (Mobius)
-  OneFlavourRationalParams OFRp;
-  OFRp.lo       = 0.1;
-  OFRp.hi       = 25.0;
-  OFRp.MaxIter  = 10000;
-  OFRp.tolerance= 1.0e-9;
-  OFRp.degree   = 14;
-  OFRp.precision= 50;
-
-  
-  MobiusEOFAFermionR Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
-  MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
-  MobiusEOFAFermionR Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c);
-  MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c);
-
-  ConjugateGradient<FermionField>      ActionCG(ActionStoppingCondition,MaxCGIterations);
-  ConjugateGradient<FermionField>  DerivativeCG(DerivativeStoppingCondition,MaxCGIterations);
-#ifdef MIXED_PRECISION
-  const int MX_inner = 1000;
-  // Mixed precision EOFA
-  LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L);
-  LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R);
-  LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF);
-  LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF);
-
-  MxPCG_EOFA ActionCGL(ActionStoppingCondition,
-		       MX_inner,
-		       MaxCGIterations,
-		       GridPtrF,
-		       FrbGridF,
-		       Strange_Op_LF,Strange_Op_L,
-		       Strange_LinOp_LF,Strange_LinOp_L);
-
-  MxPCG_EOFA DerivativeCGL(DerivativeStoppingCondition,
-			   MX_inner,
-			   MaxCGIterations,
-			   GridPtrF,
-			   FrbGridF,
-			   Strange_Op_LF,Strange_Op_L,
-			   Strange_LinOp_LF,Strange_LinOp_L);
-  
-  MxPCG_EOFA ActionCGR(ActionStoppingCondition,
-		       MX_inner,
-		       MaxCGIterations,
-		       GridPtrF,
-		       FrbGridF,
-		       Strange_Op_RF,Strange_Op_R,
-		       Strange_LinOp_RF,Strange_LinOp_R);
-  
-  MxPCG_EOFA DerivativeCGR(DerivativeStoppingCondition,
-			   MX_inner,
-			   MaxCGIterations,
-			   GridPtrF,
-			   FrbGridF,
-			   Strange_Op_RF,Strange_Op_R,
-			   Strange_LinOp_RF,Strange_LinOp_R);
-
-  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> 
-    EOFA(Strange_Op_L, Strange_Op_R, 
-	 ActionCG, 
-	 ActionCGL, ActionCGR,
-	 DerivativeCGL, DerivativeCGR,
-	 OFRp, true);
-#else
-  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> 
-    EOFA(Strange_Op_L, Strange_Op_R, 
-	 ActionCG,
-	 ActionCG, ActionCG,
-	 DerivativeCG, DerivativeCG,
-	 OFRp, true);
-#endif
-  Level1.push_back(&EOFA);
-
-  ////////////////////////////////////
-  // up down action
-  ////////////////////////////////////
-  std::vector<Real> light_den;
-  std::vector<Real> light_num;
-
-  int n_hasenbusch = hasenbusch.size();
-  light_den.push_back(light_mass);
-  for(int h=0;h<n_hasenbusch;h++){
-    light_den.push_back(hasenbusch[h]);
-    light_num.push_back(hasenbusch[h]);
-  }
-  light_num.push_back(pv_mass);
-
-  //////////////////////////////////////////////////////////////
-  // Forced to replicate the MxPCG and DenominatorsF etc.. because
-  // there is no convenient way to "Clone" physics params from double op
-  // into single op for any operator pair.
-  // Same issue prevents using MxPCG in the Heatbath step
-  //////////////////////////////////////////////////////////////
-  std::vector<FermionAction *> Numerators;
-  std::vector<FermionAction *> Denominators;
-  std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
-  std::vector<MxPCG *> ActionMPCG;
-  std::vector<MxPCG *> MPCG;
-  std::vector<FermionActionF *> DenominatorsF;
-  std::vector<LinearOperatorD *> LinOpD;
-  std::vector<LinearOperatorF *> LinOpF; 
-
-  for(int h=0;h<n_hasenbusch+1;h++){
-
-    std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl;
-
-    Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
-    Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
-
-#ifdef MIXED_PRECISION
-    ////////////////////////////////////////////////////////////////////////////
-    // Mixed precision CG for 2f force
-    ////////////////////////////////////////////////////////////////////////////
-
-    DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsF));
-    LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
-    LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h]));
-
-    MPCG.push_back(new MxPCG(DerivativeStoppingCondition,
-			     MX_inner,
-			     MaxCGIterations,
-			     GridPtrF,
-			     FrbGridF,
-			     *DenominatorsF[h],*Denominators[h],
-			     *LinOpF[h], *LinOpD[h]) );
-
-    ActionMPCG.push_back(new MxPCG(ActionStoppingCondition,
-				   MX_inner,
-				   MaxCGIterations,
-				   GridPtrF,
-				   FrbGridF,
-				   *DenominatorsF[h],*Denominators[h],
-				   *LinOpF[h], *LinOpD[h]) );
-
-    // Heatbath not mixed yet. As inverts numerators not so important as raised mass.
-    Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],ActionCG));
-#else
-    ////////////////////////////////////////////////////////////////////////////
-    // Standard CG for 2f force
-    ////////////////////////////////////////////////////////////////////////////
-    Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG));
-#endif
-
-  }
-
-  for(int h=0;h<n_hasenbusch+1;h++){
-    Level1.push_back(Quotients[h]);
-  }
-
-  /////////////////////////////////////////////////////////////
-  // Gauge action
-  /////////////////////////////////////////////////////////////
-  Level2.push_back(&GaugeAction);
-  TheHMC.TheAction.push_back(Level1);
-  TheHMC.TheAction.push_back(Level2);
-  std::cout << GridLogMessage << " Action complete "<< std::endl;
-
-  /////////////////////////////////////////////////////////////
-  // HMC parameters are serialisable
-
-  std::cout << GridLogMessage << " Running the HMC "<< std::endl;
-  TheHMC.Run();  // no smearing
-
-  Grid_finalize();
-} // main
-
-
-
--- a/HMC/Mobius2p1fRHMC.cc
+++ b/HMC/Mobius2p1fRHMC.cc
@@ -1,198 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Source file: ./tests/Test_hmc_EODWFRatio.cc
-
-Copyright (C) 2015-2016
-
-Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
-Author: Guido Cossu <guido.cossu@ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Grid/Grid.h>
-
-int main(int argc, char **argv) {
-  using namespace Grid;
-  using namespace Grid::QCD;
-
-  Grid_init(&argc, &argv);
-  int threads = GridThread::GetThreads();
-  // here make a routine to print all the relevant information on the run
-  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
-
-   // Typedefs to simplify notation
-  typedef WilsonImplR FermionImplPolicy;
-  typedef MobiusFermionR FermionAction;
-  typedef typename FermionAction::FermionField FermionField;
-
-  typedef Grid::XmlReader       Serialiser;
-  
-  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
-  IntegratorParameters MD;
-  //  typedef GenericHMCRunner<LeapFrog> HMCWrapper; 
-  //  MD.name    = std::string("Leap Frog");
-  //  typedef GenericHMCRunner<ForceGradient> HMCWrapper; 
-  //  MD.name    = std::string("Force Gradient");
-  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper; 
-  MD.name    = std::string("MinimumNorm2");
-  MD.MDsteps = 20;
-  MD.trajL   = 1.0;
-  
-  HMCparameters HMCparams;
-  HMCparams.StartTrajectory  = 30;
-  HMCparams.Trajectories     = 200;
-  HMCparams.NoMetropolisUntil=  0;
-  // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
-  //  HMCparams.StartingType     =std::string("ColdStart");
-  HMCparams.StartingType     =std::string("CheckpointStart");
-  HMCparams.MD = MD;
-  HMCWrapper TheHMC(HMCparams);
-
-  // Grid from the command line arguments --grid and --mpi
-  TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
-  
-  CheckpointerParameters CPparams;
-  CPparams.config_prefix = "ckpoint_EODWF_lat";
-  CPparams.rng_prefix    = "ckpoint_EODWF_rng";
-  CPparams.saveInterval  = 10;
-  CPparams.format        = "IEEE64BIG";
-  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
-
-  RNGModuleParameters RNGpar;
-  RNGpar.serial_seeds = "1 2 3 4 5";
-  RNGpar.parallel_seeds = "6 7 8 9 10";
-  TheHMC.Resources.SetRNGSeeds(RNGpar);
-
-  // Construct observables
-  // here there is too much indirection 
-  typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
-  TheHMC.Resources.AddObservable<PlaqObs>();
-  //////////////////////////////////////////////
-
-  const int Ls      = 16;
-  Real beta         = 2.13;
-  Real light_mass   = 0.01;
-  Real strange_mass = 0.04;
-  Real pv_mass      = 1.0;
-  RealD M5  = 1.8;
-  RealD b   = 1.0; 
-  RealD c   = 0.0;
-  
-  // FIXME:
-  // Same in MC and MD 
-  // Need to mix precision too
-  OneFlavourRationalParams OFRp;
-  OFRp.lo       = 4.0e-3;
-  OFRp.hi       = 30.0;
-  OFRp.MaxIter  = 10000;
-  OFRp.tolerance= 1.0e-10;
-  OFRp.degree   = 16;
-  OFRp.precision= 50;
-
-  std::vector<Real> hasenbusch({ 0.1 });
-
-  auto GridPtr   = TheHMC.Resources.GetCartesian();
-  auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
-  auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
-  auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
-
-  IwasakiGaugeActionR GaugeAction(beta);
-
-  // temporarily need a gauge field
-  LatticeGaugeField U(GridPtr);
-
-  // These lines are unecessary if BC are all periodic
-  std::vector<Complex> boundary = {1,1,1,-1};
-  FermionAction::ImplParams Params(boundary);
-  
-  double StoppingCondition = 1e-10;
-  double MaxCGIterations = 30000;
-  ConjugateGradient<FermionField>  CG(StoppingCondition,MaxCGIterations);
-
-  ////////////////////////////////////
-  // Collect actions
-  ////////////////////////////////////
-  ActionLevel<HMCWrapper::Field> Level1(1);
-  ActionLevel<HMCWrapper::Field> Level2(4);
-
-  ////////////////////////////////////
-  // Strange action
-  ////////////////////////////////////
-
-  //  FermionAction StrangeOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_mass,M5,b,c, Params);
-  //  DomainWallEOFAFermionR Strange_Op_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5);
-  //  DomainWallEOFAFermionR Strange_Op_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5);
-  //  ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L,Strange_Op_R,CG,ofp, false);
-
-  FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
-  FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass,  M5,b,c, Params);
-
-  OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp);
-  Level1.push_back(&StrangePseudoFermion);
-
-  ////////////////////////////////////
-  // up down action
-  ////////////////////////////////////
-  std::vector<Real> light_den;
-  std::vector<Real> light_num;
-
-  int n_hasenbusch = hasenbusch.size();
-  light_den.push_back(light_mass);
-  for(int h=0;h<n_hasenbusch;h++){
-    light_den.push_back(hasenbusch[h]);
-    light_num.push_back(hasenbusch[h]);
-  }
-  light_num.push_back(pv_mass);
-
-  std::vector<FermionAction *> Numerators;
-  std::vector<FermionAction *> Denominators;
-  std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
-
-  for(int h=0;h<n_hasenbusch+1;h++){
-    std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl;
-    Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
-    Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
-    Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG));
-  }
-
-  for(int h=0;h<n_hasenbusch+1;h++){
-    Level1.push_back(Quotients[h]);
-  }
-
-  /////////////////////////////////////////////////////////////
-  // Gauge action
-  /////////////////////////////////////////////////////////////
-  Level2.push_back(&GaugeAction);
-  TheHMC.TheAction.push_back(Level1);
-  TheHMC.TheAction.push_back(Level2);
-  std::cout << GridLogMessage << " Action complete "<< std::endl;
-
-  /////////////////////////////////////////////////////////////
-  // HMC parameters are serialisable
-
-  std::cout << GridLogMessage << " Running the HMC "<< std::endl;
-  TheHMC.Run();  // no smearing
-
-  Grid_finalize();
-} // main
-
-
-
--- a/HMC/README
+++ b/HMC/README
@@ -1,109 +0,0 @@
-********************************************************************
-TODO: 
-********************************************************************
-
-i) Got mixed precision in 2f and EOFA force and action solves.
-   But need mixed precision in the heatbath solve. Best for Fermop to have a "clone" method, to
-   reduce the number of solver and action objects. Needed ideally for the EOFA heatbath.
-   15% perhaps
-   Combine with 2x trajectory length?
-
-ii) Rational on EOFA HB  -- relax order
-                         -- Test the approx as per David email
-
-Resume / roll.sh 
-
----------------------------------------------------------------
-
- 16^3 Currently 10 traj per hour
-
- EOFA use a different derivative solver from action solver
- EOFA fix Davids hack to the SchurRedBlack guessing
-
-*** Reduce precision/tolerance  in EOFA with second CG param.                          (10% speed up)
-*** Force gradient - reduced precision solve for the gradient                          (4/3x speedup)
-
-
-*** Need a plan for gauge field update for mixed precision in HMC                      (2x speed up)
-    -- Store the single prec action operator.
-    -- Clone the gauge field from the operator function argument.
-    -- Build the mixed precision operator dynamically from the passed operator and single prec clone.
-
-*** Mixed precision CG into EOFA portion         
-*** Further reduce precision in forces to 10^-6 ?
-
-*** Overall: a 3x or so is still possible => 500s -> 160s and 20 traj per hour on 16^3.
-
- Use mixed precision CG in HMC                           
- SchurRedBlack.h: stop use of operator function; use LinearOperator or similar instead.
- Or make an OperatorFunction for mixed precision as a wrapper
-
-********************************************************************
-* Signed off 2+1f HMC with Hasenbush and strange RHMC 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish
-* Signed off 2+1f HMC with Hasenbush and strange EOFA 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish
-* Wilson plaquette cross checked against CPS and literature GwilsonFnone
-********************************************************************
-
-********************************************************************
-* RHMC: Timesteps & eigenranges matched from previous CPS 16^3 x 32 runs:
-********************************************************************
-
-****
-Strange (m=0.04)  has eigenspan 
-**** 
-16^3 done as 1+1+1 with separate PV's. 
-/dirac1/archive/QCDOC/host/QCDDWF/DWF/2+1f/16nt32/IWASAKI/b2.13/ls16/M1_8/ms0.04/mu0.01/rhmc_multitimescale/evol5/work
-****
-2+1f 16^3  - [ 4e^-4, 2.42 ]    for strange
-
-****
-24^3 done as 1+1+1 at strange, and single quotient https://arxiv.org/pdf/0804.0473.pdf Eq 83,
-****
-double lambda_low =   4.0000000000000002e-04 <- strange
-double lambda_low =   1.0000000000000000e-02 <- pauli villars
-And high = 2.5
-
-Array bsn_mass[3] = { 
-double bsn_mass[0] =   1.0000000000000000e+00
-double bsn_mass[1] =   1.0000000000000000e+00
-double bsn_mass[2] =   1.0000000000000000e+00
-}
-Array frm_mass[3] = { 
-double frm_mass[0] =   4.0000000000000001e-02
-double frm_mass[1] =   4.0000000000000001e-02
-double frm_mass[2] =   4.0000000000000001e-02
-}
-
-***
-32^3 
-/dirac1/archive/QCDOC/host/QCDDWF/DWF/2+1f/32nt64/IWASAKI/b2.25/ls16/M1_8/ms0.03/mu0.004/evol6/work
-***
-Similar det scheme
-double lambda_low =   4.0000000000000002e-04
-double lambda_low =   1.0000000000000000e-02
-
-Array bsn_mass[3] = { 
-double bsn_mass[0] =   1.0000000000000000e+00
-double bsn_mass[1] =   1.0000000000000000e+00
-double bsn_mass[2] =   1.0000000000000000e+00
-}
-Array frm_mass[3] = { 
-double frm_mass[0] =   3.0000000000000002e-02
-double frm_mass[1] =   3.0000000000000002e-02
-double frm_mass[2] =   3.0000000000000002e-02
-}
-
-********************************************************************
-* Grid: Power method bounds check
-********************************************************************
- Finding largest eigenvalue approx 25 not 2.5
- Conventions:
-
-Grid MpcDagMpc based on:
-
-   (Moo-Moe Mee^-1 Meo)^dag(Moo-Moe Mee^-1 Meo)
-
- with  Moo = 5-M5 = 3.2
- CPS use(d) Moo = 1
- Eigenrange in Grid is 3.2^2 rescaled so factor of 10 accounted for
-
--- a/Hadrons/A2AMatrix.hpp
+++ b/Hadrons/A2AMatrix.hpp
@@ -1,746 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/A2AMatrix.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef A2A_Matrix_hpp_
-#define A2A_Matrix_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/TimerArray.hpp>
-#include <Grid/Eigen/unsupported/CXX11/Tensor>
-#ifdef USE_MKL
-#include "mkl.h"
-#include "mkl_cblas.h"
-#endif
-
-#ifndef HADRONS_A2AM_NAME 
-#define HADRONS_A2AM_NAME "a2aMatrix"
-#endif
-
-#ifndef HADRONS_A2AM_IO_TYPE
-#define HADRONS_A2AM_IO_TYPE ComplexF
-#endif
-
-#define HADRONS_A2AM_PARALLEL_IO
-
-BEGIN_HADRONS_NAMESPACE
-
-// general A2A matrix set based on Eigen tensors and Grid-allocated memory
-// Dimensions:
-//   0 - ext - external field (momentum, EM field, ...)
-//   1 - str - spin-color structure
-//   2 - t   - timeslice
-//   3 - i   - left  A2A mode index
-//   4 - j   - right A2A mode index
-template <typename T>
-using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>;
-
-template <typename T>
-using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>;
-
-template <typename T>
-using A2AMatrixTr = Eigen::Matrix<T, -1, -1, Eigen::ColMajor>;
-
-/******************************************************************************
- *                      Abstract class for A2A kernels                        *
- ******************************************************************************/
-template <typename T, typename Field>
-class A2AKernel
-{
-public:
-    A2AKernel(void) = default;
-    virtual ~A2AKernel(void) = default;
-    virtual void operator()(A2AMatrixSet<T> &m, const Field *left, const Field *right,
-                          const unsigned int orthogDim, double &time) = 0;
-    virtual double flops(const unsigned int blockSizei, const unsigned int blockSizej) = 0;
-    virtual double bytes(const unsigned int blockSizei, const unsigned int blockSizej) = 0;
-};
-
-/******************************************************************************
- *                  Class to handle A2A matrix block HDF5 I/O                 *
- ******************************************************************************/
-template <typename T>
-class A2AMatrixIo
-{
-public:
-    // constructors
-    A2AMatrixIo(void) = default;
-    A2AMatrixIo(std::string filename, std::string dataname, 
-                const unsigned int nt, const unsigned int ni = 0,
-                const unsigned int nj = 0);
-    // destructor
-    ~A2AMatrixIo(void) = default;
-    // access
-    unsigned int getNi(void) const;
-    unsigned int getNj(void) const;
-    unsigned int getNt(void) const;
-    size_t       getSize(void) const;
-    // file allocation
-    template <typename MetadataType>
-    void initFile(const MetadataType &d, const unsigned int chunkSize);
-    // block I/O
-    void saveBlock(const T *data, const unsigned int i, const unsigned int j,
-                   const unsigned int blockSizei, const unsigned int blockSizej);
-    void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str,
-                   const unsigned int i, const unsigned int j);
-    template <template <class> class Vec, typename VecT>
-    void load(Vec<VecT> &v, double *tRead = nullptr);
-private:
-    std::string  filename_{""}, dataname_{""};
-    unsigned int nt_{0}, ni_{0}, nj_{0};
-};
-
-/******************************************************************************
- *                  Wrapper for A2A matrix block computation                  *
- ******************************************************************************/
-template <typename T, typename Field, typename MetadataType, typename TIo = T>
-class A2AMatrixBlockComputation
-{
-private:
-    struct IoHelper
-    {
-        A2AMatrixIo<TIo> io;
-        MetadataType     md;
-        unsigned int     e, s, i, j;
-    };
-    typedef std::function<std::string(const unsigned int, const unsigned int)>  FilenameFn;
-    typedef std::function<MetadataType(const unsigned int, const unsigned int)> MetadataFn;
-public:
-    // constructor
-    A2AMatrixBlockComputation(GridBase *grid,
-                              const unsigned int orthogDim,
-                              const unsigned int next,
-                              const unsigned int nstr,
-                              const unsigned int blockSize,
-                              const unsigned int cacheBlockSize,
-                              TimerArray *tArray = nullptr);
-    // execution
-    void execute(const std::vector<Field> &left, 
-                 const std::vector<Field> &right,
-                 A2AKernel<T, Field> &kernel,
-                 const FilenameFn &ionameFn,
-                 const FilenameFn &filenameFn,
-                 const MetadataFn &metadataFn);
-private:
-    // I/O handler
-    void saveBlock(const A2AMatrixSet<TIo> &m, IoHelper &h);
-private:
-    TimerArray            *tArray_;
-    GridBase              *grid_;
-    unsigned int          orthogDim_, nt_, next_, nstr_, blockSize_, cacheBlockSize_;
-    Vector<T>             mCache_;
-    Vector<TIo>           mBuf_;
-    std::vector<IoHelper> nodeIo_;
-};
-
-/******************************************************************************
- *                       A2A matrix contraction kernels                       *
- ******************************************************************************/
-class A2AContraction
-{
-public:
-    // accTrMul(acc, a, b): acc += tr(a*b)
-    template <typename C, typename MatLeft, typename MatRight>
-    static inline void accTrMul(C &acc, const MatLeft &a, const MatRight &b)
-    {
-        if ((MatLeft::Options == Eigen::RowMajor) and
-            (MatRight::Options == Eigen::ColMajor))
-        {
-            parallel_for (unsigned int r = 0; r < a.rows(); ++r)
-            {
-                C tmp;
-#ifdef USE_MKL
-                dotuRow(tmp, r, a, b);
-#else
-                tmp = a.row(r).conjugate().dot(b.col(r));
-#endif
-                parallel_critical
-                {
-                    acc += tmp;
-                }
-            }
-        }
-        else
-        {
-            parallel_for (unsigned int c = 0; c < a.cols(); ++c)
-            {
-                C tmp;
-#ifdef USE_MKL 
-                dotuCol(tmp, c, a, b);
-#else
-                tmp = a.col(c).conjugate().dot(b.row(c));
-#endif
-                parallel_critical
-                {
-                    acc += tmp;
-                }
-            }
-        }
-    }
-
-    template <typename MatLeft, typename MatRight>
-    static inline double accTrMulFlops(const MatLeft &a, const MatRight &b)
-    {
-        double n = a.rows()*a.cols();
-
-        return 8.*n;
-    }
-
-    // mul(res, a, b): res = a*b
-#ifdef USE_MKL
-    template <template <class, int...> class Mat, int... Opts>
-    static inline void mul(Mat<ComplexD, Opts...> &res, 
-                           const Mat<ComplexD, Opts...> &a, 
-                           const Mat<ComplexD, Opts...> &b)
-    {
-        static const ComplexD one(1., 0.), zero(0., 0.);
-
-        if ((res.rows() != a.rows()) or (res.cols() != b.cols()))
-        {
-            res.resize(a.rows(), b.cols());
-        }
-        if (Mat<ComplexD, Opts...>::Options == Eigen::RowMajor)
-        {
-            cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
-                        a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
-                        res.data(), res.cols());
-        }
-        else if (Mat<ComplexD, Opts...>::Options == Eigen::ColMajor)
-        {
-            cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
-                        a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
-                        res.data(), res.rows());
-        }
-    }
-
-    template <template <class, int...> class Mat, int... Opts>
-    static inline void mul(Mat<ComplexF, Opts...> &res, 
-                           const Mat<ComplexF, Opts...> &a, 
-                           const Mat<ComplexF, Opts...> &b)
-    {
-        static const ComplexF one(1., 0.), zero(0., 0.);
-
-        if ((res.rows() != a.rows()) or (res.cols() != b.cols()))
-        {
-            res.resize(a.rows(), b.cols());
-        }
-        if (Mat<ComplexF, Opts...>::Options == Eigen::RowMajor)
-        {
-            cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
-                        a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
-                        res.data(), res.cols());
-        }
-        else if (Mat<ComplexF, Opts...>::Options == Eigen::ColMajor)
-        {
-            cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
-                        a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
-                        res.data(), res.rows());
-        }
-    }
-#else
-    template <typename Mat>
-    static inline void mul(Mat &res, const Mat &a, const Mat &b)
-    {
-        res = a*b;
-    }
-#endif
-    template <typename Mat>
-    static inline double mulFlops(const Mat &a, const Mat &b)
-    {
-        double nr = a.rows(), nc = a.cols();
-
-        return nr*nr*(6.*nc + 2.*(nc - 1.));
-    }
-private:
-    template <typename C, typename MatLeft, typename MatRight>
-    static inline void makeDotRowPt(C * &aPt, unsigned int &aInc, C * &bPt, 
-                                    unsigned int &bInc, const unsigned int aRow, 
-                                    const MatLeft &a, const MatRight &b)
-    {
-        if (MatLeft::Options == Eigen::RowMajor)
-        {
-            aPt  = a.data() + aRow*a.cols();
-            aInc = 1;
-        }
-        else if (MatLeft::Options == Eigen::ColMajor)
-        {
-            aPt  = a.data() + aRow;
-            aInc = a.rows();
-        }
-        if (MatRight::Options == Eigen::RowMajor)
-        {
-            bPt  = b.data() + aRow;
-            bInc = b.cols();
-        }
-        else if (MatRight::Options == Eigen::ColMajor)
-        {
-            bPt  = b.data() + aRow*b.rows();
-            bInc = 1;
-        }
-    }
-
-#ifdef USE_MKL
-    template <typename C, typename MatLeft, typename MatRight>
-    static inline void makeDotColPt(C * &aPt, unsigned int &aInc, C * &bPt, 
-                                    unsigned int &bInc, const unsigned int aCol, 
-                                    const MatLeft &a, const MatRight &b)
-    {
-        if (MatLeft::Options == Eigen::RowMajor)
-        {
-            aPt  = a.data() + aCol;
-            aInc = a.cols();
-        }
-        else if (MatLeft::Options == Eigen::ColMajor)
-        {
-            aPt  = a.data() + aCol*a.rows();
-            aInc = 1;
-        }
-        if (MatRight::Options == Eigen::RowMajor)
-        {
-            bPt  = b.data() + aCol*b.cols();
-            bInc = 1;
-        }
-        else if (MatRight::Options == Eigen::ColMajor)
-        {
-            bPt  = b.data() + aCol;
-            bInc = b.rows();
-        }
-    }
-
-    template <typename MatLeft, typename MatRight>
-    static inline void dotuRow(ComplexF &res, const unsigned int aRow,
-                               const MatLeft &a, const MatRight &b)
-    {
-        const ComplexF *aPt, *bPt;
-        unsigned int   aInc, bInc;
-
-        makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b);
-        cblas_cdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
-    }
-
-    template <typename MatLeft, typename MatRight>
-    static inline void dotuCol(ComplexF &res, const unsigned int aCol,
-                               const MatLeft &a, const MatRight &b)
-    {
-        const ComplexF *aPt, *bPt;
-        unsigned int   aInc, bInc;
-
-        makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b);
-        cblas_cdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
-    }
-
-    template <typename MatLeft, typename MatRight>
-    static inline void dotuRow(ComplexD &res, const unsigned int aRow,
-                               const MatLeft &a, const MatRight &b)
-    {
-        const ComplexD *aPt, *bPt;
-        unsigned int   aInc, bInc;
-
-        makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b);
-        cblas_zdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
-    }
-
-    template <typename MatLeft, typename MatRight>
-    static inline void dotuCol(ComplexD &res, const unsigned int aCol,
-                               const MatLeft &a, const MatRight &b)
-    {
-        const ComplexD *aPt, *bPt;
-        unsigned int   aInc, bInc;
-
-        makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b);
-        cblas_zdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
-    }
-#endif
-};
-
-/******************************************************************************
- *                     A2AMatrixIo template implementation                    *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename T>
-A2AMatrixIo<T>::A2AMatrixIo(std::string filename, std::string dataname, 
-                            const unsigned int nt, const unsigned int ni,
-                            const unsigned int nj)
-: filename_(filename), dataname_(dataname)
-, nt_(nt), ni_(ni), nj_(nj)
-{}
-
-// access //////////////////////////////////////////////////////////////////////
-template <typename T>
-unsigned int A2AMatrixIo<T>::getNt(void) const
-{
-    return nt_;
-}
-
-template <typename T>
-unsigned int A2AMatrixIo<T>::getNi(void) const
-{
-    return ni_;
-}
-
-template <typename T>
-unsigned int A2AMatrixIo<T>::getNj(void) const
-{
-    return nj_;
-}
-
-template <typename T>
-size_t A2AMatrixIo<T>::getSize(void) const
-{
-    return nt_*ni_*nj_*sizeof(T);
-}
-
-// file allocation /////////////////////////////////////////////////////////////
-template <typename T>
-template <typename MetadataType>
-void A2AMatrixIo<T>::initFile(const MetadataType &d, const unsigned int chunkSize)
-{
-#ifdef HAVE_HDF5
-    std::vector<hsize_t>    dim = {static_cast<hsize_t>(nt_), 
-                                   static_cast<hsize_t>(ni_), 
-                                   static_cast<hsize_t>(nj_)},
-                            chunk = {static_cast<hsize_t>(nt_), 
-                                     static_cast<hsize_t>(chunkSize), 
-                                     static_cast<hsize_t>(chunkSize)};
-    H5NS::DataSpace         dataspace(dim.size(), dim.data());
-    H5NS::DataSet           dataset;
-    H5NS::DSetCreatPropList plist;
-    
-    // create empty file just with metadata
-    {
-        Hdf5Writer writer(filename_);
-        write(writer, dataname_, d);
-    }
-
-    // create the dataset
-    Hdf5Reader reader(filename_, false);
-
-    push(reader, dataname_);
-    auto &group = reader.getGroup();
-    plist.setChunk(chunk.size(), chunk.data());
-    plist.setFletcher32();
-    dataset = group.createDataSet(HADRONS_A2AM_NAME, Hdf5Type<T>::type(), dataspace, plist);
-#else
-    HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
-#endif
-}
-
-// block I/O ///////////////////////////////////////////////////////////////////
-template <typename T>
-void A2AMatrixIo<T>::saveBlock(const T *data, 
-                               const unsigned int i, 
-                               const unsigned int j,
-                               const unsigned int blockSizei,
-                               const unsigned int blockSizej)
-{
-#ifdef HAVE_HDF5
-    Hdf5Reader           reader(filename_, false);
-    std::vector<hsize_t> count = {nt_, blockSizei, blockSizej},
-                         offset = {0, static_cast<hsize_t>(i),
-                                   static_cast<hsize_t>(j)},
-                         stride = {1, 1, 1},
-                         block  = {1, 1, 1}; 
-    H5NS::DataSpace      memspace(count.size(), count.data()), dataspace;
-    H5NS::DataSet        dataset;
-    size_t               shift;
-
-    push(reader, dataname_);
-    auto &group = reader.getGroup();
-    dataset     = group.openDataSet(HADRONS_A2AM_NAME);
-    dataspace   = dataset.getSpace();
-    dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
-                              stride.data(), block.data());
-    dataset.write(data, Hdf5Type<T>::type(), memspace, dataspace);
-#else
-    HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
-#endif
-}
-
-template <typename T>
-void A2AMatrixIo<T>::saveBlock(const A2AMatrixSet<T> &m,
-                               const unsigned int ext, const unsigned int str,
-                               const unsigned int i, const unsigned int j)
-{
-    unsigned int blockSizei = m.dimension(3);
-    unsigned int blockSizej = m.dimension(4);
-    unsigned int nstr       = m.dimension(1);
-    size_t       offset     = (ext*nstr + str)*nt_*blockSizei*blockSizej;
-
-    saveBlock(m.data() + offset, i, j, blockSizei, blockSizej);
-}
-
-template <typename T>
-template <template <class> class Vec, typename VecT>
-void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead)
-{
-#ifdef HAVE_HDF5
-    Hdf5Reader           reader(filename_);
-    std::vector<hsize_t> hdim;
-    H5NS::DataSet        dataset;
-    H5NS::DataSpace      dataspace;
-    H5NS::CompType       datatype;
-    
-    push(reader, dataname_);
-    auto &group = reader.getGroup();
-    dataset     = group.openDataSet(HADRONS_A2AM_NAME);
-    datatype    = dataset.getCompType();
-    dataspace   = dataset.getSpace();
-    hdim.resize(dataspace.getSimpleExtentNdims());
-    dataspace.getSimpleExtentDims(hdim.data());
-    if ((nt_*ni_*nj_ != 0) and
-        ((hdim[0] != nt_) or (hdim[1] != ni_) or (hdim[2] != nj_)))
-    {
-        HADRONS_ERROR(Size, "all-to-all matrix size mismatch (got "
-            + std::to_string(hdim[0]) + "x" + std::to_string(hdim[1]) + "x"
-            + std::to_string(hdim[2]) + ", expected "
-            + std::to_string(nt_) + "x" + std::to_string(ni_) + "x"
-            + std::to_string(nj_));
-    }
-    else if (ni_*nj_ == 0)
-    {
-        if (hdim[0] != nt_)
-        {
-            HADRONS_ERROR(Size, "all-to-all time size mismatch (got "
-                + std::to_string(hdim[0]) + ", expected "
-                + std::to_string(nt_) + ")");
-        }
-        ni_ = hdim[1];
-        nj_ = hdim[2];
-    }
-
-    A2AMatrix<T>         buf(ni_, nj_);
-    std::vector<hsize_t> count    = {1, static_cast<hsize_t>(ni_),
-                                     static_cast<hsize_t>(nj_)},
-                         stride   = {1, 1, 1},
-                         block    = {1, 1, 1},
-                         memCount = {static_cast<hsize_t>(ni_),
-                                     static_cast<hsize_t>(nj_)};
-    H5NS::DataSpace      memspace(memCount.size(), memCount.data());
-
-    std::cout << "Loading timeslice";
-    std::cout.flush();
-    *tRead = 0.;
-    for (unsigned int tp1 = nt_; tp1 > 0; --tp1)
-    {
-        unsigned int         t      = tp1 - 1;
-        std::vector<hsize_t> offset = {static_cast<hsize_t>(t), 0, 0};
-        
-        if (t % 10 == 0)
-        {
-            std::cout << " " << t;
-            std::cout.flush();
-        }
-        dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
-                                  stride.data(), block.data());
-        if (tRead) *tRead -= usecond();    
-        dataset.read(buf.data(), datatype, memspace, dataspace);
-        if (tRead) *tRead += usecond();
-        v[t] = buf.template cast<VecT>();
-    }
-    std::cout << std::endl;
-#else
-    HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
-#endif
-}
-
-/******************************************************************************
- *               A2AMatrixBlockComputation template implementation            *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename T, typename Field, typename MetadataType, typename TIo>
-A2AMatrixBlockComputation<T, Field, MetadataType, TIo>
-::A2AMatrixBlockComputation(GridBase *grid,
-                            const unsigned int orthogDim,
-                            const unsigned int next, 
-                            const unsigned int nstr,
-                            const unsigned int blockSize, 
-                            const unsigned int cacheBlockSize,
-                            TimerArray *tArray)
-: grid_(grid), nt_(grid->GlobalDimensions()[orthogDim]), orthogDim_(orthogDim)
-, next_(next), nstr_(nstr), blockSize_(blockSize), cacheBlockSize_(cacheBlockSize)
-, tArray_(tArray)
-{
-    mCache_.resize(nt_*next_*nstr_*cacheBlockSize_*cacheBlockSize_);
-    mBuf_.resize(nt_*next_*nstr_*blockSize_*blockSize_);
-}
-
-#define START_TIMER(name) if (tArray_) tArray_->startTimer(name)
-#define STOP_TIMER(name)  if (tArray_) tArray_->stopTimer(name)
-#define GET_TIMER(name)   ((tArray_ != nullptr) ? tArray_->getDTimer(name) : 0.)
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename T, typename Field, typename MetadataType, typename TIo>
-void A2AMatrixBlockComputation<T, Field, MetadataType, TIo>
-::execute(const std::vector<Field> &left, const std::vector<Field> &right,
-          A2AKernel<T, Field> &kernel, const FilenameFn &ionameFn,
-          const FilenameFn &filenameFn, const MetadataFn &metadataFn)
-{
-    //////////////////////////////////////////////////////////////////////////
-    // i,j   is first  loop over blockSize_ factors
-    // ii,jj is second loop over cacheBlockSize_ factors for high perf contractions
-    // iii,jjj are loops within cacheBlock
-    // Total index is sum of these  i+ii+iii etc...
-    //////////////////////////////////////////////////////////////////////////
-    int    N_i = left.size();
-    int    N_j = right.size();
-    double flops, bytes, t_kernel;
-    double nodes = grid_->NodeCount();
-    
-    int NBlock_i = N_i/blockSize_ + (((N_i % blockSize_) != 0) ? 1 : 0);
-    int NBlock_j = N_j/blockSize_ + (((N_j % blockSize_) != 0) ? 1 : 0);
-
-    for(int i=0;i<N_i;i+=blockSize_)
-    for(int j=0;j<N_j;j+=blockSize_)
-    {
-        // Get the W and V vectors for this block^2 set of terms
-        int N_ii = MIN(N_i-i,blockSize_);
-        int N_jj = MIN(N_j-j,blockSize_);
-        A2AMatrixSet<TIo> mBlock(mBuf_.data(), next_, nstr_, nt_, N_ii, N_jj);
-
-        LOG(Message) << "All-to-all matrix block " 
-                     << j/blockSize_ + NBlock_j*i/blockSize_ + 1 
-                     << "/" << NBlock_i*NBlock_j << " [" << i <<" .. " 
-                     << i+N_ii-1 << ", " << j <<" .. " << j+N_jj-1 << "]" 
-                     << std::endl;
-        // Series of cache blocked chunks of the contractions within this block
-        flops    = 0.0;
-        bytes    = 0.0;
-        t_kernel = 0.0;
-        for(int ii=0;ii<N_ii;ii+=cacheBlockSize_)
-        for(int jj=0;jj<N_jj;jj+=cacheBlockSize_)
-        {
-            double t;
-            int N_iii = MIN(N_ii-ii,cacheBlockSize_);
-            int N_jjj = MIN(N_jj-jj,cacheBlockSize_);
-            A2AMatrixSet<T> mCacheBlock(mCache_.data(), next_, nstr_, nt_, N_iii, N_jjj);
-
-            START_TIMER("kernel");
-            kernel(mCacheBlock, &left[i+ii], &right[j+jj], orthogDim_, t);
-            STOP_TIMER("kernel");
-            t_kernel += t;
-            flops    += kernel.flops(N_iii, N_jjj);
-            bytes    += kernel.bytes(N_iii, N_jjj);
-
-            START_TIMER("cache copy");
-            parallel_for_nest5(int e =0;e<next_;e++)
-            for(int s =0;s< nstr_;s++)
-            for(int t =0;t< nt_;t++)
-            for(int iii=0;iii< N_iii;iii++)
-            for(int jjj=0;jjj< N_jjj;jjj++)
-            {
-                mBlock(e,s,t,ii+iii,jj+jjj) = mCacheBlock(e,s,t,iii,jjj);
-            }
-            STOP_TIMER("cache copy");
-        }
-
-        // perf
-        LOG(Message) << "Kernel perf " << flops/t_kernel/1.0e3/nodes 
-                     << " Gflop/s/node " << std::endl;
-        LOG(Message) << "Kernel perf " << bytes/t_kernel*1.0e6/1024/1024/1024/nodes 
-                     << " GB/s/node "  << std::endl;
-
-        // IO
-        double       blockSize, ioTime;
-        unsigned int myRank = grid_->ThisRank(), nRank  = grid_->RankCount();
-    
-        LOG(Message) << "Writing block to disk" << std::endl;
-        ioTime = -GET_TIMER("IO: write block");
-        START_TIMER("IO: total");
-        makeFileDir(filenameFn(0, 0), grid_);
-#ifdef HADRONS_A2AM_PARALLEL_IO
-        grid_->Barrier();
-        // make task list for current node
-        nodeIo_.clear();
-        for(int f = myRank; f < next_*nstr_; f += nRank)
-        {
-            IoHelper h;
-
-            h.i  = i;
-            h.j  = j;
-            h.e  = f/nstr_;
-            h.s  = f % nstr_;
-            h.io = A2AMatrixIo<TIo>(filenameFn(h.e, h.s), 
-                                    ionameFn(h.e, h.s), nt_, N_i, N_j);
-            h.md = metadataFn(h.e, h.s);
-            nodeIo_.push_back(h);
-        }
-        // parallel IO
-        for (auto &h: nodeIo_)
-        {
-            saveBlock(mBlock, h);
-        }
-        grid_->Barrier();
-#else
-        // serial IO, for testing purposes only
-        for(int e = 0; e < next_; e++)
-        for(int s = 0; s < nstr_; s++)
-        {
-            IoHelper h;
-
-            h.i  = i;
-            h.j  = j;
-            h.e  = e;
-            h.s  = s;
-            h.io = A2AMatrixIo<TIo>(filenameFn(h.e, h.s), 
-                                    ionameFn(h.e, h.s), nt_, N_i, N_j);
-            h.md = metadataFn(h.e, h.s);
-            saveBlock(mfBlock, h);
-        }
-#endif
-        STOP_TIMER("IO: total");
-        blockSize  = static_cast<double>(next_*nstr_*nt_*N_ii*N_jj*sizeof(TIo));
-        ioTime    += GET_TIMER("IO: write block");
-        LOG(Message) << "HDF5 IO done " << sizeString(blockSize) << " in "
-                     << ioTime  << " us (" 
-                     << blockSize/ioTime*1.0e6/1024/1024
-                     << " MB/s)" << std::endl;
-    }
-}
-
-// I/O handler /////////////////////////////////////////////////////////////////
-template <typename T, typename Field, typename MetadataType, typename TIo>
-void A2AMatrixBlockComputation<T, Field, MetadataType, TIo>
-::saveBlock(const A2AMatrixSet<TIo> &m, IoHelper &h)
-{
-    if ((h.i == 0) and (h.j == 0))
-    {
-        START_TIMER("IO: file creation");
-        h.io.initFile(h.md, blockSize_);
-        STOP_TIMER("IO: file creation");
-    }
-    START_TIMER("IO: write block");
-    h.io.saveBlock(m, h.e, h.s, h.i, h.j);
-    STOP_TIMER("IO: write block");
-}
-
-#undef START_TIMER
-#undef STOP_TIMER
-#undef GET_TIMER
-
-END_HADRONS_NAMESPACE
-
-#endif // A2A_Matrix_hpp_
--- a/Hadrons/A2AVectors.hpp
+++ b/Hadrons/A2AVectors.hpp
@@ -1,342 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/A2AVectors.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: fionnoh <fionnoh@gmail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef A2A_Vectors_hpp_
-#define A2A_Vectors_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Environment.hpp>
-#include <Hadrons/Solver.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                 Class to generate V & W all-to-all vectors                 *
- ******************************************************************************/
-template <typename FImpl>
-class A2AVectorsSchurDiagTwo
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-    SOLVER_TYPE_ALIASES(FImpl,);
-public:
-    A2AVectorsSchurDiagTwo(FMat &action, Solver &solver);
-    virtual ~A2AVectorsSchurDiagTwo(void) = default;
-    void makeLowModeV(FermionField &vout, 
-                      const FermionField &evec, const Real &eval);
-    void makeLowModeV5D(FermionField &vout_4d, FermionField &vout_5d, 
-                        const FermionField &evec, const Real &eval);
-    void makeLowModeW(FermionField &wout, 
-                      const FermionField &evec, const Real &eval);
-    void makeLowModeW5D(FermionField &wout_4d, FermionField &wout_5d, 
-                        const FermionField &evec, const Real &eval);
-    void makeHighModeV(FermionField &vout, const FermionField &noise);
-    void makeHighModeV5D(FermionField &vout_4d, FermionField &vout_5d, 
-                         const FermionField &noise_5d);
-    void makeHighModeW(FermionField &wout, const FermionField &noise);
-    void makeHighModeW5D(FermionField &vout_5d, FermionField &wout_5d, 
-                         const FermionField &noise_5d);
-private:
-    FMat                                     &action_;
-    Solver                                   &solver_;
-    GridBase                                 *fGrid_, *frbGrid_, *gGrid_;
-    bool                                     is5d_;
-    FermionField                             src_o_, sol_e_, sol_o_, tmp_, tmp5_;
-    SchurDiagTwoOperator<FMat, FermionField> op_;
-};
-
-/******************************************************************************
- *                  Methods for V & W all-to-all vectors I/O                  *
- ******************************************************************************/
-class A2AVectorsIo
-{
-public:
-    struct Record: Serializable
-    {
-        GRID_SERIALIZABLE_CLASS_MEMBERS(Record,
-                                        unsigned int, index);
-        Record(void): index(0) {}
-    };
-public:
-    template <typename Field>
-    static void write(const std::string fileStem, std::vector<Field> &vec, 
-                      const bool multiFile, const int trajectory = -1);
-    template <typename Field>
-    static void read(std::vector<Field> &vec, const std::string fileStem,
-                     const bool multiFile, const int trajectory = -1);
-private:
-    static inline std::string vecFilename(const std::string stem, const int traj, 
-                                          const bool multiFile)
-    {
-        std::string t = (traj < 0) ? "" : ("." + std::to_string(traj));
-
-        if (multiFile)
-        {
-            return stem + t;
-        }
-        else
-        {
-            return stem + t + ".bin";
-        }
-    }
-};
-
-/******************************************************************************
- *               A2AVectorsSchurDiagTwo template implementation               *
- ******************************************************************************/
-template <typename FImpl>
-A2AVectorsSchurDiagTwo<FImpl>::A2AVectorsSchurDiagTwo(FMat &action, Solver &solver)
-: action_(action)
-, solver_(solver)
-, fGrid_(action_.FermionGrid())
-, frbGrid_(action_.FermionRedBlackGrid())
-, gGrid_(action_.GaugeGrid())
-, src_o_(frbGrid_)
-, sol_e_(frbGrid_)
-, sol_o_(frbGrid_)
-, tmp_(frbGrid_)
-, tmp5_(fGrid_)
-, op_(action_)
-{}
-
-template <typename FImpl>
-void A2AVectorsSchurDiagTwo<FImpl>::makeLowModeV(FermionField &vout, const FermionField &evec, const Real &eval)
-{
-    src_o_ = evec;
-    src_o_.checkerboard = Odd;
-    pickCheckerboard(Even, sol_e_, vout);
-    pickCheckerboard(Odd, sol_o_, vout);
-
-    /////////////////////////////////////////////////////
-    // v_ie = -(1/eval_i) * MeeInv Meo MooInv evec_i
-    /////////////////////////////////////////////////////
-    action_.MooeeInv(src_o_, tmp_);
-    assert(tmp_.checkerboard == Odd);
-    action_.Meooe(tmp_, sol_e_);
-    assert(sol_e_.checkerboard == Even);
-    action_.MooeeInv(sol_e_, tmp_);
-    assert(tmp_.checkerboard == Even);
-    sol_e_ = (-1.0 / eval) * tmp_;
-    assert(sol_e_.checkerboard == Even);
-
-    /////////////////////////////////////////////////////
-    // v_io = (1/eval_i) * MooInv evec_i
-    /////////////////////////////////////////////////////
-    action_.MooeeInv(src_o_, tmp_);
-    assert(tmp_.checkerboard == Odd);
-    sol_o_ = (1.0 / eval) * tmp_;
-    assert(sol_o_.checkerboard == Odd);
-    setCheckerboard(vout, sol_e_);
-    assert(sol_e_.checkerboard == Even);
-    setCheckerboard(vout, sol_o_);
-    assert(sol_o_.checkerboard == Odd);
-}
-
-template <typename FImpl>
-void A2AVectorsSchurDiagTwo<FImpl>::makeLowModeV5D(FermionField &vout_4d, FermionField &vout_5d, const FermionField &evec, const Real &eval)
-{
-    makeLowModeV(vout_5d, evec, eval);
-    action_.ExportPhysicalFermionSolution(vout_5d, vout_4d);
-}
-
-template <typename FImpl>
-void A2AVectorsSchurDiagTwo<FImpl>::makeLowModeW(FermionField &wout, const FermionField &evec, const Real &eval)
-{
-    src_o_ = evec;
-    src_o_.checkerboard = Odd;
-    pickCheckerboard(Even, sol_e_, wout);
-    pickCheckerboard(Odd, sol_o_, wout);
-
-    /////////////////////////////////////////////////////
-    // w_ie = - MeeInvDag MoeDag Doo evec_i
-    /////////////////////////////////////////////////////
-    op_.Mpc(src_o_, tmp_);
-    assert(tmp_.checkerboard == Odd);
-    action_.MeooeDag(tmp_, sol_e_);
-    assert(sol_e_.checkerboard == Even);
-    action_.MooeeInvDag(sol_e_, tmp_);
-    assert(tmp_.checkerboard == Even);
-    sol_e_ = (-1.0) * tmp_;
-
-    /////////////////////////////////////////////////////
-    // w_io = Doo evec_i
-    /////////////////////////////////////////////////////
-    op_.Mpc(src_o_, sol_o_);
-    assert(sol_o_.checkerboard == Odd);
-    setCheckerboard(wout, sol_e_);
-    assert(sol_e_.checkerboard == Even);
-    setCheckerboard(wout, sol_o_);
-    assert(sol_o_.checkerboard == Odd);
-}
-
-template <typename FImpl>
-void A2AVectorsSchurDiagTwo<FImpl>::makeLowModeW5D(FermionField &wout_4d, 
-                                                   FermionField &wout_5d, 
-                                                   const FermionField &evec, 
-                                                   const Real &eval)
-{
-    makeLowModeW(tmp5_, evec, eval);
-    action_.DminusDag(tmp5_, wout_5d);
-    action_.ExportPhysicalFermionSource(wout_5d, wout_4d);
-}
-
-template <typename FImpl>
-void A2AVectorsSchurDiagTwo<FImpl>::makeHighModeV(FermionField &vout, 
-                                                  const FermionField &noise)
-{
-    solver_(vout, noise);
-}
-
-template <typename FImpl>
-void A2AVectorsSchurDiagTwo<FImpl>::makeHighModeV5D(FermionField &vout_4d, 
-                                                    FermionField &vout_5d, 
-                                                    const FermionField &noise)
-{
-    if (noise._grid->Dimensions() == fGrid_->Dimensions() - 1)
-    {
-        action_.ImportPhysicalFermionSource(noise, tmp5_);
-    }
-    else
-    {
-        tmp5_ = noise;
-    }
-    makeHighModeV(vout_5d, tmp5_);
-    action_.ExportPhysicalFermionSolution(vout_5d, vout_4d);
-}
-
-template <typename FImpl>
-void A2AVectorsSchurDiagTwo<FImpl>::makeHighModeW(FermionField &wout, 
-                                                  const FermionField &noise)
-{
-    wout = noise;
-}
-
-template <typename FImpl>
-void A2AVectorsSchurDiagTwo<FImpl>::makeHighModeW5D(FermionField &wout_4d, 
-                                                    FermionField &wout_5d, 
-                                                    const FermionField &noise)
-{
-    if (noise._grid->Dimensions() == fGrid_->Dimensions() - 1)
-    {
-        action_.ImportUnphysicalFermion(noise, wout_5d);
-        wout_4d = noise;
-    }
-    else
-    {
-        wout_5d = noise;
-        action_.ExportPhysicalFermionSource(wout_5d, wout_4d);
-    }
-}
-
-/******************************************************************************
- *               all-to-all vectors I/O template implementation               *
- ******************************************************************************/
-template <typename Field>
-void A2AVectorsIo::write(const std::string fileStem, std::vector<Field> &vec, 
-                         const bool multiFile, const int trajectory)
-{
-    Record       record;
-    GridBase     *grid = vec[0]._grid;
-    ScidacWriter binWriter(grid->IsBoss());
-    std::string  filename = vecFilename(fileStem, trajectory, multiFile);
-
-    if (multiFile)
-    {
-        std::string fullFilename;
-
-        for (unsigned int i = 0; i < vec.size(); ++i)
-        {
-            fullFilename = filename + "/elem" + std::to_string(i) + ".bin";
-
-            LOG(Message) << "Writing vector " << i << std::endl;
-            makeFileDir(fullFilename, grid);
-            binWriter.open(fullFilename);
-            record.index = i;
-            binWriter.writeScidacFieldRecord(vec[i], record);
-            binWriter.close();
-        }
-    }
-    else
-    {
-        makeFileDir(filename, grid);
-        binWriter.open(filename);
-        for (unsigned int i = 0; i < vec.size(); ++i)
-        {
-            LOG(Message) << "Writing vector " << i << std::endl;
-            record.index = i;
-            binWriter.writeScidacFieldRecord(vec[i], record);
-        }
-        binWriter.close();
-    }
-}
-
-template <typename Field>
-void A2AVectorsIo::read(std::vector<Field> &vec, const std::string fileStem, 
-                        const bool multiFile, const int trajectory)
-{
-    Record       record;
-    ScidacReader binReader;
-    std::string  filename = vecFilename(fileStem, trajectory, multiFile);
-
-    if (multiFile)
-    {
-        std::string fullFilename;
-
-        for (unsigned int i = 0; i < vec.size(); ++i)
-        {
-            fullFilename = filename + "/elem" + std::to_string(i) + ".bin";
-
-            LOG(Message) << "Reading vector " << i << std::endl;
-            binReader.open(fullFilename);
-            binReader.readScidacFieldRecord(vec[i], record);
-            binReader.close();
-            if (record.index != i)
-            {
-                HADRONS_ERROR(Io, "vector index mismatch");
-            }
-        }
-    }
-    else
-    {
-        binReader.open(filename);
-        for (unsigned int i = 0; i < vec.size(); ++i)
-        {
-            LOG(Message) << "Reading vector " << i << std::endl;
-            binReader.readScidacFieldRecord(vec[i], record);
-            if (record.index != i)
-            {
-                HADRONS_ERROR(Io, "vector index mismatch");
-            }
-        }
-        binReader.close();
-    }
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // A2A_Vectors_hpp_
--- a/Hadrons/Application.cc
+++ b/Hadrons/Application.cc
@@ -1,288 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Application.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Hadrons/Application.hpp>
-#include <Hadrons/GeneticScheduler.hpp>
-#include <Hadrons/Modules.hpp>
-
-using namespace Grid;
-using namespace QCD;
-using namespace Hadrons;
-
-#define BIG_SEP "================"
-#define SEP     "----------------"
-
-/******************************************************************************
- *                       Application implementation                           *
- ******************************************************************************/
-// constructors ////////////////////////////////////////////////////////////////
-#define MACOUT(macro)    macro              << " (" << #macro << ")"
-#define MACOUTS(macro) HADRONS_STR(macro) << " (" << #macro << ")"
-
-Application::Application(void)
-{
-    initLogger();
-    auto dim = GridDefaultLatt(), mpi = GridDefaultMpi(), loc(dim);
-
-    if (dim.size())
-    {
-        locVol_ = 1;
-        for (unsigned int d = 0; d < dim.size(); ++d)
-        {
-            loc[d]  /= mpi[d];
-            locVol_ *= loc[d];
-        }
-        LOG(Message) << "====== HADRONS APPLICATION INITIALISATION ======" << std::endl;
-        LOG(Message) << "** Dimensions" << std::endl;
-        LOG(Message) << "Global lattice: " << dim << std::endl;
-        LOG(Message) << "MPI partition : " << mpi << std::endl;
-        LOG(Message) << "Local lattice : " << loc << std::endl;
-        LOG(Message) << std::endl;
-        LOG(Message) << "** Default parameters (and associated C macros)" << std::endl;
-        LOG(Message) << "ASCII output precision  : " << MACOUT(DEFAULT_ASCII_PREC) << std::endl;
-        LOG(Message) << "Fermion implementation  : " << MACOUTS(FIMPLBASE) << std::endl;
-        LOG(Message) << "z-Fermion implementation: " << MACOUTS(ZFIMPLBASE) << std::endl;
-        LOG(Message) << "Scalar implementation   : " << MACOUTS(SIMPLBASE) << std::endl;
-        LOG(Message) << "Gauge implementation    : " << MACOUTS(GIMPLBASE) << std::endl;
-        LOG(Message) << "Eigenvector base size   : " 
-                    << MACOUT(HADRONS_DEFAULT_LANCZOS_NBASIS) << std::endl;
-        LOG(Message) << "Schur decomposition     : " << MACOUTS(HADRONS_DEFAULT_SCHUR) << std::endl;
-        LOG(Message) << std::endl;
-    }
-}
-
-Application::Application(const Application::GlobalPar &par)
-: Application()
-{
-    setPar(par);
-}
-
-Application::Application(const std::string parameterFileName)
-: Application()
-{
-    parameterFileName_ = parameterFileName;
-}
-
-// access //////////////////////////////////////////////////////////////////////
-void Application::setPar(const Application::GlobalPar &par)
-{
-    par_ = par;
-}
-
-const Application::GlobalPar & Application::getPar(void)
-{
-    return par_;
-}
-
-// execute /////////////////////////////////////////////////////////////////////
-void Application::run(void)
-{
-    LOG(Message) << "====== HADRONS APPLICATION START ======" << std::endl;
-    if (!parameterFileName_.empty() and (vm().getNModule() == 0))
-    {
-        parseParameterFile(parameterFileName_);
-    }
-    if (getPar().runId.empty())
-    {
-        HADRONS_ERROR(Definition, "run id is empty");
-    }
-    LOG(Message) << "RUN ID '" << getPar().runId << "'" << std::endl;
-    BinaryIO::latticeWriteMaxRetry = getPar().parallelWriteMaxRetry;
-    LOG(Message) << "Attempt(s) for resilient parallel I/O: " 
-                 << BinaryIO::latticeWriteMaxRetry << std::endl;
-    vm().setRunId(getPar().runId);
-    vm().printContent();
-    env().printContent();
-    if (getPar().saveSchedule or getPar().scheduleFile.empty())
-    {
-        schedule();
-        if (getPar().saveSchedule)
-        {
-            std::string filename;
-
-            filename = (getPar().scheduleFile.empty()) ? 
-                         "hadrons.sched" : getPar().scheduleFile;
-            saveSchedule(filename);
-        }
-    }
-    else
-    {
-        loadSchedule(getPar().scheduleFile);
-    }
-    printSchedule();
-    if (!getPar().graphFile.empty())
-    {
-        makeFileDir(getPar().graphFile, env().getGrid());
-        vm().dumpModuleGraph(getPar().graphFile);
-    }
-    configLoop();
-}
-
-// parse parameter file ////////////////////////////////////////////////////////
-class ObjectId: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(ObjectId,
-                                    std::string, name,
-                                    std::string, type);
-};
-
-void Application::parseParameterFile(const std::string parameterFileName)
-{
-    XmlReader reader(parameterFileName);
-    GlobalPar par;
-    ObjectId  id;
-    
-    LOG(Message) << "Building application from '" << parameterFileName << "'..." << std::endl;
-    read(reader, "parameters", par);
-    setPar(par);
-    if (!push(reader, "modules"))
-    {
-        HADRONS_ERROR(Parsing, "Cannot open node 'modules' in parameter file '" 
-                              + parameterFileName + "'");
-    }
-    if (!push(reader, "module"))
-    {
-        HADRONS_ERROR(Parsing, "Cannot open node 'modules/module' in parameter file '" 
-                              + parameterFileName + "'");
-    }
-    do
-    {
-        read(reader, "id", id);
-        vm().createModule(id.name, id.type, reader);
-    } while (reader.nextElement("module"));
-    pop(reader);
-    pop(reader);
-}
-
-void Application::saveParameterFile(const std::string parameterFileName, unsigned int prec)
-{
-    LOG(Message) << "Saving application to '" << parameterFileName << "'..." << std::endl;
-    if (env().getGrid()->IsBoss())
-    {
-        XmlWriter          writer(parameterFileName);
-        writer.setPrecision(prec);
-        ObjectId           id;
-        const unsigned int nMod = vm().getNModule();
-
-        write(writer, "parameters", getPar());
-        push(writer, "modules");
-        for (unsigned int i = 0; i < nMod; ++i)
-        {
-            push(writer, "module");
-            id.name = vm().getModuleName(i);
-            id.type = vm().getModule(i)->getRegisteredName();
-            write(writer, "id", id);
-            vm().getModule(i)->saveParameters(writer, "options");
-            pop(writer);
-        }
-        pop(writer);
-        pop(writer);
-    }
-}
-
-// schedule computation ////////////////////////////////////////////////////////
-void Application::schedule(void)
-{
-    if (!scheduled_ and !loadedSchedule_)
-    {
-        program_   = vm().schedule(par_.genetic);
-        scheduled_ = true;
-    }
-}
-
-void Application::saveSchedule(const std::string filename)
-{
-    LOG(Message) << "Saving current schedule to '" << filename << "'..."
-                 << std::endl;
-    if (env().getGrid()->IsBoss())
-    {
-        TextWriter               writer(filename);
-        std::vector<std::string> program;
-        
-        if (!scheduled_)
-        {
-            HADRONS_ERROR(Definition, "Computation not scheduled");
-        }
-
-        for (auto address: program_)
-        {
-            program.push_back(vm().getModuleName(address));
-        }
-        write(writer, "schedule", program);
-    }
-}
-
-void Application::loadSchedule(const std::string filename)
-{
-    TextReader               reader(filename);
-    std::vector<std::string> program;
-    
-    LOG(Message) << "Loading schedule from '" << filename << "'..."
-                 << std::endl;
-    read(reader, "schedule", program);
-    program_.clear();
-    for (auto &name: program)
-    {
-        program_.push_back(vm().getModuleAddress(name));
-    }
-    loadedSchedule_ = true;
-    scheduled_      = true;
-}
-
-void Application::printSchedule(void)
-{
-    if (!scheduled_ and !loadedSchedule_)
-    {
-        HADRONS_ERROR(Definition, "Computation not scheduled");
-    }
-    auto peak = vm().memoryNeeded(program_);
-    LOG(Message) << "Schedule (memory needed: " << sizeString(peak) << "):"
-                 << std::endl;
-    for (unsigned int i = 0; i < program_.size(); ++i)
-    {
-        LOG(Message) << std::setw(4) << i + 1 << ": "
-                     << vm().getModuleName(program_[i]) << std::endl;
-    }
-}
-
-// loop on configurations //////////////////////////////////////////////////////
-void Application::configLoop(void)
-{
-    auto range = par_.trajCounter;
-    
-    for (unsigned int t = range.start; t < range.end; t += range.step)
-    {
-        LOG(Message) << BIG_SEP << " Starting measurement for trajectory " << t
-                     << " " << BIG_SEP << std::endl;
-        vm().setTrajectory(t);
-        vm().executeProgram(program_);
-    }
-    LOG(Message) << BIG_SEP << " End of measurement " << BIG_SEP << std::endl;
-    env().freeAll();
-}
--- a/Hadrons/Archive/Modules/ScalarVP.cc
+++ b/Hadrons/Archive/Modules/ScalarVP.cc
@@ -1,564 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Archive/Modules/ScalarVP.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: James Harrison <jch1g10@soton.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MScalar/ChargedProp.hpp>
-#include <Hadrons/Modules/MScalar/ScalarVP.hpp>
-#include <Hadrons/Modules/MScalar/Scalar.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MScalar;
-
-/*
- * Scalar QED vacuum polarisation up to O(alpha)
- *
- * Conserved vector 2-point function diagram notation:
- *        _______
- *       /       \
- * U_nu *         * U_mu
- *       \_______/
- *
- *                (   adj(S(a\hat{nu}|x)) U_mu(x) S(0|x+a\hat{mu}) U_nu(0)    )
- *          = 2 Re(                             -                             )
- *                ( adj(S(a\hat{nu}|x+a\hat{mu})) adj(U_mu(x)) S(0|x) U_nu(0) )
- *  
- *
- *            _______
- *           /       \
- * free = 1 *         * 1
- *           \_______/
- *
- *
- *
- *             _______
- *            /       \
- * S = iA_nu *         * iA_mu
- *            \_______/
- *
- *
- *         Delta_1
- *         ___*___
- *        /       \
- * X = 1 *         * 1
- *        \___*___/
- *         Delta_1
- *
- *          Delta_1                     Delta_1
- *          ___*___                     ___*___
- *         /       \                   /       \
- *      1 *         * iA_mu  +  iA_nu *         * 1
- *         \_______/                   \_______/
- * 4C =        _______                     _______
- *            /       \                   /       \
- *      +  1 *         * iA_mu  +  iA_nu *         * 1
- *            \___*___/                   \___*___/
- *             Delta_1                     Delta_1
- *
- *     Delta_1   Delta_1
- *          _*___*_             _______
- *         /       \           /       \
- * 2E = 1 *         * 1  +  1 *         * 1
- *         \_______/           \_*___*_/
- *                         Delta_1   Delta_1
- *
- *          Delta_2
- *          ___*___             _______
- *         /       \           /       \
- * 2T = 1 *         * 1  +  1 *         * 1
- *         \_______/           \___*___/
- *                              Delta_2
- *
- *
- *                    _______
- *                   /       \
- * srcT = -A_nu^2/2 *         * 1
- *                   \_______/
- *
- *
- *
- *            _______
- *           /       \
- * snkT = 1 *         * -A_mu^2/2
- *           \_______/
- *
- * Full VP to O(alpha) = free + q^2*(S+X+4C+2E+2T+srcT+snkT)
- */
-
-/******************************************************************************
-*                  TScalarVP implementation                             *
-******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-TScalarVP::TScalarVP(const std::string name)
-: Module<ScalarVPPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-std::vector<std::string> TScalarVP::getInput(void)
-{
-    prop0Name_ = par().scalarProp + "_0";
-    propQName_ = par().scalarProp + "_Q";
-    propSunName_ = par().scalarProp + "_Sun";
-    propTadName_ = par().scalarProp + "_Tad";
-
-	std::vector<std::string> in = {par().emField, prop0Name_, propQName_,
-                                   propSunName_, propTadName_};
-    
-    return in;
-}
-
-std::vector<std::string> TScalarVP::getOutput(void)
-{
-    std::vector<std::string> out;
-    
-    for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-    {
-        // out.push_back(getName() + "_propQ_" + std::to_string(mu));
-
-        for (unsigned int nu = 0; nu < env().getNd(); ++nu)
-        {
-            out.push_back(getName() + "_" + std::to_string(mu)
-                          + "_" + std::to_string(nu));
-        }
-    }
-
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-void TScalarVP::setup(void)
-{
-	freeMomPropName_ = FREEMOMPROP(static_cast<TChargedProp *>(vm().getModule(par().scalarProp))->par().mass);
-	GFSrcName_ = par().scalarProp + "_DinvSrc";
-    fftName_   = par().scalarProp + "_fft";
-	phaseName_.clear();
-	muPropQName_.clear();
-    vpTensorName_.clear();
-    momPhaseName_.clear();
-	for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-    {
-        phaseName_.push_back("_shiftphase_" + std::to_string(mu));
-        muPropQName_.push_back(getName() + "_propQ_" + std::to_string(mu));
-
-        std::vector<std::string> vpTensorName_mu;
-        for (unsigned int nu = 0; nu < env().getNd(); ++nu)
-        {
-            vpTensorName_mu.push_back(getName() + "_" + std::to_string(mu)
-                                      + "_" + std::to_string(nu));
-        }
-        vpTensorName_.push_back(vpTensorName_mu);
-    }
-    if (!par().output.empty())
-    {
-        for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-        {
-            momPhaseName_.push_back("_momentumphase_" + std::to_string(i_p));
-        }
-    }
-
-    for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-	{
-	    envCreateLat(ScalarField, muPropQName_[mu]);
-
-        for (unsigned int nu = 0; nu < env().getNd(); ++nu)
-        {
-            envCreateLat(ScalarField, vpTensorName_[mu][nu]);
-        }
-	}
-    if (!par().output.empty())
-    {
-        momPhasesDone_ = env().hasCreatedObject(momPhaseName_[0]);
-        for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-        {
-            envCacheLat(ScalarField, momPhaseName_[i_p]);
-        }
-    }
-    envTmpLat(ScalarField, "buf");
-    envTmpLat(ScalarField, "result");
-    envTmpLat(ScalarField, "Amu");
-    envTmpLat(ScalarField, "Usnk");
-    envTmpLat(ScalarField, "tmpProp");
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-void TScalarVP::execute(void)
-{
-    // CACHING ANALYTIC EXPRESSIONS
-    makeCaches();
-
-    Complex ci(0.0,1.0);
-    Real    q        = static_cast<TChargedProp *>(vm().getModule(par().scalarProp))->par().charge;
-    auto    &prop0   = envGet(ScalarField, prop0Name_);
-    auto    &propQ   = envGet(ScalarField, propQName_);
-    auto    &propSun = envGet(ScalarField, propSunName_);
-    auto    &propTad = envGet(ScalarField, propTadName_);
-    auto    &GFSrc   = envGet(ScalarField, GFSrcName_);
-    auto    &G       = envGet(ScalarField, freeMomPropName_);
-    auto    &fft     = envGet(FFT, fftName_);
-    phase_.clear();
-    for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-    {
-        auto &phmu = envGet(ScalarField, phaseName_[mu]);
-        phase_.push_back(&phmu);
-    }
-    
-    // PROPAGATORS FROM SHIFTED SOURCES
-    LOG(Message) << "Computing O(q) charged scalar propagators..."
-                 << std::endl;
-    std::vector<ScalarField *> muPropQ;
-    for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-    {
-        auto &propmu = envGet(ScalarField, muPropQName_[mu]);
-
-        // -G*momD1*G*F*tau_mu*Src (momD1 = F*D1*Finv)
-        propmu = adj(*phase_[mu])*GFSrc;
-        momD1(propmu, fft);
-        propmu = -G*propmu;
-        fft.FFT_all_dim(propmu, propmu, FFT::backward);
-
-        muPropQ.push_back(&propmu);
-    }
-
-    // CONTRACTIONS
-    auto        &A = envGet(EmField, par().emField);
-    envGetTmp(ScalarField, buf);
-    envGetTmp(ScalarField, result);
-    envGetTmp(ScalarField, Amu);
-    envGetTmp(ScalarField, Usnk);
-    envGetTmp(ScalarField, tmpProp);
-    TComplex    Anu0, Usrc;
-    std::vector<int> coor0 = {0, 0, 0, 0};
-    std::vector<std::vector<ScalarField *> > vpTensor;
-    for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-    {
-        std::vector<ScalarField *> vpTensor_mu;
-        for (unsigned int nu = 0; nu < env().getNd(); ++nu)
-        {
-            auto &vpmunu = envGet(ScalarField, vpTensorName_[mu][nu]);
-            vpTensor_mu.push_back(&vpmunu);
-        }
-        vpTensor.push_back(vpTensor_mu);
-    }
-
-    // Prepare output data structure if necessary
-    Result outputData;
-    if (!par().output.empty())
-    {
-        outputData.projection.resize(par().outputMom.size());
-        outputData.lattice_size = env().getGrid()->_fdimensions;
-        outputData.mass = static_cast<TChargedProp *>(vm().getModule(par().scalarProp))->par().mass;
-        outputData.charge = q;
-        for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-        {
-            outputData.projection[i_p].momentum = strToVec<int>(par().outputMom[i_p]);
-            outputData.projection[i_p].pi.resize(env().getNd());
-            outputData.projection[i_p].pi_free.resize(env().getNd());
-            outputData.projection[i_p].pi_2E.resize(env().getNd());
-            outputData.projection[i_p].pi_2T.resize(env().getNd());
-            outputData.projection[i_p].pi_S.resize(env().getNd());
-            outputData.projection[i_p].pi_4C.resize(env().getNd());
-            outputData.projection[i_p].pi_X.resize(env().getNd());
-            outputData.projection[i_p].pi_srcT.resize(env().getNd());
-            outputData.projection[i_p].pi_snkT.resize(env().getNd());
-            for (unsigned int nu = 0; nu < env().getNd(); ++nu)
-            {
-                outputData.projection[i_p].pi[nu].resize(env().getNd());
-                outputData.projection[i_p].pi_free[nu].resize(env().getNd());
-                outputData.projection[i_p].pi_2E[nu].resize(env().getNd());
-                outputData.projection[i_p].pi_2T[nu].resize(env().getNd());
-                outputData.projection[i_p].pi_S[nu].resize(env().getNd());
-                outputData.projection[i_p].pi_4C[nu].resize(env().getNd());
-                outputData.projection[i_p].pi_X[nu].resize(env().getNd());
-                outputData.projection[i_p].pi_srcT[nu].resize(env().getNd());
-                outputData.projection[i_p].pi_snkT[nu].resize(env().getNd());
-            }
-        }
-    }
-
-    // Do contractions
-    for (unsigned int nu = 0; nu < env().getNd(); ++nu)
-    {
-        peekSite(Anu0, peekLorentz(A, nu), coor0);
-
-        for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-        {
-            LOG(Message) << "Computing Pi[" << mu << "][" << nu << "]..."
-                         << std::endl;
-            Amu = peekLorentz(A, mu);
-
-            // free
-            tmpProp = Cshift(prop0, nu, -1);     // S_0(0|x-a\hat{\nu})
-                                                 // = S_0(a\hat{\nu}|x)
-            Usrc    = Complex(1.0,0.0);
-            vpContraction(result, prop0, tmpProp, Usrc, mu);
-            *vpTensor[mu][nu] = result;
-            // Do momentum projections if necessary
-            if (!par().output.empty())
-            {
-                for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-                {
-                    project(outputData.projection[i_p].pi_free[mu][nu], result,
-                            i_p);
-                }
-            }
-            tmpProp = result; // Just using tmpProp as a temporary ScalarField
-                              // here (buf is modified by calls to writeVP())
-
-            // srcT
-            result = tmpProp * (-0.5)*Anu0*Anu0;
-            *vpTensor[mu][nu] += q*q*result;
-            // Do momentum projections if necessary
-            if (!par().output.empty())
-            {
-                for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-                {
-                    project(outputData.projection[i_p].pi_srcT[mu][nu], result,
-                            i_p);
-                }
-            }
-
-            // snkT
-            result = tmpProp * (-0.5)*Amu*Amu;
-            *vpTensor[mu][nu] += q*q*result;
-            // Do momentum projections if necessary
-            if (!par().output.empty())
-            {
-                for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-                {
-                    project(outputData.projection[i_p].pi_snkT[mu][nu], result,
-                            i_p);
-                }
-            }
-
-            // S
-            tmpProp = Cshift(prop0, nu, -1);     // S_0(a\hat{\nu}|x)
-            Usrc    = ci*Anu0;
-            Usnk    = ci*Amu;
-            vpContraction(result, prop0, tmpProp, Usrc, Usnk, mu);
-            *vpTensor[mu][nu] += q*q*result;
-            // Do momentum projections if necessary
-            if (!par().output.empty())
-            {
-                for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-                {
-                    project(outputData.projection[i_p].pi_S[mu][nu], result,
-                            i_p);
-                }
-            }
-
-            // 4C
-            tmpProp = Cshift(prop0, nu, -1);     // S_0(a\hat{\nu}|x)
-            Usrc    = Complex(1.0,0.0);
-            Usnk    = ci*Amu;
-            vpContraction(result, propQ, tmpProp, Usrc, Usnk, mu);
-            Usrc    = ci*Anu0;
-            vpContraction(buf, propQ, tmpProp, Usrc, mu);
-            result += buf;
-            vpContraction(buf, prop0, *muPropQ[nu], Usrc, mu);
-            result += buf;
-            Usrc = Complex(1.0,0.0);
-            Usnk = ci*Amu;
-            vpContraction(buf, prop0, *muPropQ[nu], Usrc, Usnk, mu);
-            result += buf;
-            *vpTensor[mu][nu] += q*q*result;
-            // Do momentum projections if necessary
-            if (!par().output.empty())
-            {
-                for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-                {
-                    project(outputData.projection[i_p].pi_4C[mu][nu], result,
-                            i_p);
-                }
-            }
-
-            // X
-            Usrc = Complex(1.0,0.0);
-            vpContraction(result, propQ, *muPropQ[nu], Usrc, mu);
-            *vpTensor[mu][nu] += q*q*result;
-            // Do momentum projections if necessary
-            if (!par().output.empty())
-            {
-                for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-                {
-                    project(outputData.projection[i_p].pi_X[mu][nu], result,
-                            i_p);
-                }
-            }
-
-            // 2E
-            tmpProp = Cshift(prop0, nu, -1);     // S_0(a\hat{\nu}|x)
-            Usrc    = Complex(1.0,0.0);
-            vpContraction(result, propSun, tmpProp, Usrc, mu);
-            tmpProp = Cshift(propSun, nu, -1);     // S_\Sigma(0|x-a\hat{\nu})
-                               //(Note: <S(0|x-a\hat{\nu})> = <S(a\hat{\nu}|x)>)
-            vpContraction(buf, prop0, tmpProp, Usrc, mu);
-            result += buf;
-            *vpTensor[mu][nu] += q*q*result;
-            // Do momentum projections if necessary
-            if (!par().output.empty())
-            {
-                for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-                {
-                    project(outputData.projection[i_p].pi_2E[mu][nu], result,
-                            i_p);
-                }
-            }
-
-            // 2T
-            tmpProp = Cshift(prop0, nu, -1);     // S_0(a\hat{\nu}|x)
-            Usrc    = Complex(1.0,0.0);
-            vpContraction(result, propTad, tmpProp, Usrc, mu);
-            tmpProp = Cshift(propTad, nu, -1);     // S_T(0|x-a\hat{\nu})
-            vpContraction(buf, prop0, tmpProp, Usrc, mu);
-            result += buf;
-            *vpTensor[mu][nu] += q*q*result;
-            // Do momentum projections if necessary
-            if (!par().output.empty())
-            {
-                for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-                {
-                    project(outputData.projection[i_p].pi_2T[mu][nu], result,
-                            i_p);
-                }
-            }
-
-            // Do momentum projections of full VP if necessary
-            if (!par().output.empty())
-            {
-                for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-                {
-                    project(outputData.projection[i_p].pi[mu][nu],
-                            *vpTensor[mu][nu], i_p);
-                }
-            }
-        }
-    }
-
-    // OUTPUT IF NECESSARY
-    if (!par().output.empty())
-    {
-        LOG(Message) << "Saving momentum-projected HVP to '"
-                     << RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
-                     << std::endl;
-        saveResult(par().output, "HVP", outputData);
-    }
-}
-
-void TScalarVP::makeCaches(void)
-{
-    envGetTmp(ScalarField, buf);
-
-    if ( (!par().output.empty()) && (!momPhasesDone_) )
-    {
-        LOG(Message) << "Caching phases for momentum projections..."
-                     << std::endl;
-        std::vector<int> &l = env().getGrid()->_fdimensions;
-        Complex          ci(0.0,1.0);
-
-        // Calculate phase factors
-        for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-        {
-            std::vector<int> mom = strToVec<int>(par().outputMom[i_p]);
-            auto &momph_ip = envGet(ScalarField, momPhaseName_[i_p]);
-            momph_ip = zero;
-            for (unsigned int j = 0; j < env().getNd()-1; ++j)
-            {
-                Real twoPiL = M_PI*2./l[j];
-                LatticeCoordinate(buf, j);
-                buf = mom[j]*twoPiL*buf;
-                momph_ip = momph_ip + buf;
-            }
-            momph_ip = exp(-ci*momph_ip);
-            momPhase_.push_back(&momph_ip);
-        }
-    }
-}
-
-void TScalarVP::vpContraction(ScalarField &vp,
-                   ScalarField &prop_0_x, ScalarField &prop_nu_x,
-                   TComplex u_src, ScalarField &u_snk, int mu)
-{
-    // Note: this function assumes a point source is used.
-    vp = adj(prop_nu_x) * u_snk * Cshift(prop_0_x, mu, 1) * u_src;
-    vp -= Cshift(adj(prop_nu_x), mu, 1) * adj(u_snk) * prop_0_x * u_src;
-    vp = 2.0*real(vp);
-}
-
-void TScalarVP::vpContraction(ScalarField &vp,
-                   ScalarField &prop_0_x, ScalarField &prop_nu_x,
-                   TComplex u_src, int mu)
-{
-    // Note: this function assumes a point source is used.
-    vp = adj(prop_nu_x) * Cshift(prop_0_x, mu, 1) * u_src;
-    vp -= Cshift(adj(prop_nu_x), mu, 1) * prop_0_x * u_src;
-    vp = 2.0*real(vp);
-}
-
-void TScalarVP::project(std::vector<Complex> &projection, const ScalarField &vp, int i_p)
-{
-    std::vector<TComplex>   vecBuf;
-    envGetTmp(ScalarField, buf);
-
-    buf = vp*(*momPhase_[i_p]);
-    sliceSum(buf, vecBuf, Tp);
-    projection.resize(vecBuf.size());
-    for (unsigned int t = 0; t < vecBuf.size(); ++t)
-    {
-        projection[t] = TensorRemove(vecBuf[t]);
-    }
-}
-
-void TScalarVP::momD1(ScalarField &s, FFT &fft)
-{
-    auto        &A = envGet(EmField, par().emField);
-    Complex     ci(0.0,1.0);
-
-    envGetTmp(ScalarField, buf);
-    envGetTmp(ScalarField, result);
-    envGetTmp(ScalarField, Amu);
-
-    result = zero;
-    for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-    {
-        Amu = peekLorentz(A, mu);
-        buf = (*phase_[mu])*s;
-        fft.FFT_all_dim(buf, buf, FFT::backward);
-        buf = Amu*buf;
-        fft.FFT_all_dim(buf, buf, FFT::forward);
-        result = result - ci*buf;
-    }
-    fft.FFT_all_dim(s, s, FFT::backward);
-    for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-    {
-        Amu = peekLorentz(A, mu);
-        buf = Amu*s;
-        fft.FFT_all_dim(buf, buf, FFT::forward);
-        result = result + ci*adj(*phase_[mu])*buf;
-    }
-
-    s = result;
-}
--- a/Hadrons/Archive/Modules/ScalarVP.hpp
+++ b/Hadrons/Archive/Modules/ScalarVP.hpp
@@ -1,129 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Archive/Modules/ScalarVP.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: James Harrison <jch1g10@soton.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_MScalar_ScalarVP_hpp_
-#define Hadrons_MScalar_ScalarVP_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         Scalar vacuum polarisation                         *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MScalar)
-
-class ScalarVPPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(ScalarVPPar,
-                                    std::string, emField,
-                                    std::string, scalarProp,
-                                    std::string, output,
-                                    std::vector<std::string>, outputMom);
-};
-
-class TScalarVP: public Module<ScalarVPPar>
-{
-public:
-    BASIC_TYPE_ALIASES(SIMPL,);
-    typedef PhotonR::GaugeField     EmField;
-    typedef PhotonR::GaugeLinkField EmComp;
-    class Result: Serializable
-    {
-    public:
-        class Projection: Serializable
-        {
-        public:
-            GRID_SERIALIZABLE_CLASS_MEMBERS(Projection,
-                                            std::vector<int>,     momentum,
-                                            std::vector<std::vector<std::vector<Complex>>>, pi,
-                                            std::vector<std::vector<std::vector<Complex>>>, pi_free,
-                                            std::vector<std::vector<std::vector<Complex>>>, pi_2E,
-                                            std::vector<std::vector<std::vector<Complex>>>, pi_2T,
-                                            std::vector<std::vector<std::vector<Complex>>>, pi_S,
-                                            std::vector<std::vector<std::vector<Complex>>>, pi_4C,
-                                            std::vector<std::vector<std::vector<Complex>>>, pi_X,
-                                            std::vector<std::vector<std::vector<Complex>>>, pi_srcT,
-                                            std::vector<std::vector<std::vector<Complex>>>, pi_snkT);
-        };
-        GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
-                                        std::vector<int>,        lattice_size,
-                                        double,                  mass,
-                                        double,                  charge,
-                                        std::vector<Projection>, projection);
-    };
-public:
-    // constructor
-    TScalarVP(const std::string name);
-    // destructor
-    virtual ~TScalarVP(void) {};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-protected:
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-private:
-    void makeCaches(void);
-    // conserved vector two-point contraction
-    void vpContraction(ScalarField &vp,
-                       ScalarField &prop_0_x, ScalarField &prop_nu_x,
-                       TComplex u_src, ScalarField &u_snk, int mu);
-    // conserved vector two-point contraction with unit gauge link at sink
-    void vpContraction(ScalarField &vp,
-                       ScalarField &prop_0_x, ScalarField &prop_nu_x,
-                       TComplex u_src, int mu);
-    // write momentum-projected vacuum polarisation to file(s)
-    void project(std::vector<Complex> &projection, const ScalarField &vp,
-                 int i_p);
-    // momentum-space Delta_1 insertion
-    void momD1(ScalarField &s, FFT &fft);
-private:
-    bool                                        momPhasesDone_;
-    std::string                                 freeMomPropName_, GFSrcName_,
-                                                prop0Name_, propQName_,
-                                                propSunName_, propTadName_,
-                                                fftName_;
-    std::vector<std::string>                    phaseName_, muPropQName_,
-                                                momPhaseName_;
-    std::vector<std::vector<std::string> >      vpTensorName_;
-    std::vector<ScalarField *>                  phase_, momPhase_;
-};
-
-MODULE_REGISTER(ScalarVP, TScalarVP, MScalar);
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_MScalar_ScalarVP_hpp_
--- a/Hadrons/Archive/Modules/TestSeqConserved.cc
+++ b/Hadrons/Archive/Modules/TestSeqConserved.cc
@@ -1,35 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Archive/Modules/TestSeqConserved.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MUtilities/TestSeqConserved.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MUtilities;
-
-template class Grid::Hadrons::MUtilities::TTestSeqConserved<FIMPL>;
-
--- a/Hadrons/Archive/Modules/TestSeqConserved.hpp
+++ b/Hadrons/Archive/Modules/TestSeqConserved.hpp
@@ -1,186 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Archive/Modules/TestSeqConserved.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Lanny91 <andrew.lawson@gmail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_MUtilities_TestSeqConserved_hpp_
-#define Hadrons_MUtilities_TestSeqConserved_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/*
-  Ward Identity contractions using sequential propagators.
- -----------------------------
- 
- * options:
- - q:      point source propagator, 5D if available (string)
- - qSeq:   result of sequential insertion of conserved current using q (string)
- - action: action used for computation of q (string)
- - origin: string giving point source origin of q (string)
- - t_J:    time at which sequential current is inserted (int)
- - mu:     Lorentz index of current inserted (int)
- - curr:   current type, e.g. vector/axial (Current)
-*/
-
-/******************************************************************************
- *                            TestSeqConserved                                *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MUtilities)
-
-class TestSeqConservedPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(TestSeqConservedPar,
-                                    std::string,  q,
-                                    std::string,  qSeq,
-                                    std::string,  action,
-                                    std::string,  origin,
-                                    unsigned int, t_J,
-                                    unsigned int, mu,
-                                    Current,      curr);
-};
-
-template <typename FImpl>
-class TTestSeqConserved: public Module<TestSeqConservedPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TTestSeqConserved(const std::string name);
-    // destructor
-    virtual ~TTestSeqConserved(void) {};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-protected:
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(TestSeqConserved, TTestSeqConserved<FIMPL>, MUtilities);
-
-/******************************************************************************
- *                     TTestSeqConserved implementation                       *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TTestSeqConserved<FImpl>::TTestSeqConserved(const std::string name)
-: Module<TestSeqConservedPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TTestSeqConserved<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().q, par().qSeq, par().action};
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TTestSeqConserved<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TTestSeqConserved<FImpl>::setup(void)
-{
-    auto Ls = env().getObjectLs(par().q);
-    if (Ls != env().getObjectLs(par().action))
-    {
-        HADRONS_ERROR(Size, "Ls mismatch between quark action and propagator");
-    }
-    envTmpLat(PropagatorField, "tmp");
-    envTmpLat(LatticeComplex, "c");
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TTestSeqConserved<FImpl>::execute(void)
-{
-    // Check sequential insertion of current gives same result as conserved 
-    // current sink upon contraction. Assume q uses a point source.
-
-    auto                  &q    = envGet(PropagatorField, par().q);
-    auto                  &qSeq = envGet(PropagatorField, par().qSeq);
-    auto                  &act  = envGet(FMat, par().action);
-    Gamma                 g5(Gamma::Algebra::Gamma5);
-    Gamma::Algebra        gA = (par().curr == Current::Axial) ?
-                                  Gamma::Algebra::Gamma5 :
-                                  Gamma::Algebra::Identity;
-    Gamma                 g(gA);
-    SitePropagator        qSite;
-    Complex               test_S, test_V, check_S, check_V;
-    std::vector<TComplex> check_buf;
-    std::vector<int>      siteCoord;
-
-    envGetTmp(PropagatorField, tmp);
-    envGetTmp(LatticeComplex, c);
-    siteCoord = strToVec<int>(par().origin);
-    peekSite(qSite, qSeq, siteCoord);
-    test_S = trace(qSite*g);
-    test_V = trace(qSite*g*Gamma::gmu[par().mu]);
-    act.ContractConservedCurrent(q, q, tmp, par().curr, par().mu);
-    c = trace(tmp*g);
-    sliceSum(c, check_buf, Tp);
-    check_S = TensorRemove(check_buf[par().t_J]);
-
-    c = trace(tmp*g*Gamma::gmu[par().mu]);
-    sliceSum(c, check_buf, Tp);
-    check_V = TensorRemove(check_buf[par().t_J]);
-
-    LOG(Message) << "Test S  = " << abs(test_S)   << std::endl;
-    LOG(Message) << "Test V  = " << abs(test_V) << std::endl;
-    LOG(Message) << "Check S = " << abs(check_S) << std::endl;
-    LOG(Message) << "Check V = " << abs(check_V) << std::endl;
-
-    // Check difference = 0
-    check_S -= test_S;
-    check_V -= test_V;
-
-    LOG(Message) << "Consistency check for sequential conserved " 
-                 << par().curr << " current insertion: " << std::endl; 
-    LOG(Message) << "Diff S  = " << abs(check_S) << std::endl;
-    LOG(Message) << "Diff V  = " << abs(check_V) << std::endl;
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_TestSeqConserved_hpp_
--- a/Hadrons/Archive/Modules/TestSeqGamma.cc
+++ b/Hadrons/Archive/Modules/TestSeqGamma.cc
@@ -1,35 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Archive/Modules/TestSeqGamma.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MUtilities/TestSeqGamma.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MUtilities;
-
-template class Grid::Hadrons::MUtilities::TTestSeqGamma<FIMPL>;
-
--- a/Hadrons/Archive/Modules/TestSeqGamma.hpp
+++ b/Hadrons/Archive/Modules/TestSeqGamma.hpp
@@ -1,150 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Archive/Modules/TestSeqGamma.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Lanny91 <andrew.lawson@gmail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_MUtilities_TestSeqGamma_hpp_
-#define Hadrons_MUtilities_TestSeqGamma_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                              TestSeqGamma                                  *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MUtilities)
-
-class TestSeqGammaPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(TestSeqGammaPar,
-                                    std::string,    q,
-                                    std::string,    qSeq,
-                                    std::string,    origin,
-                                    Gamma::Algebra, gamma,
-                                    unsigned int,   t_g);
-};
-
-template <typename FImpl>
-class TTestSeqGamma: public Module<TestSeqGammaPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TTestSeqGamma(const std::string name);
-    // destructor
-    virtual ~TTestSeqGamma(void) {};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-protected:
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(TestSeqGamma, TTestSeqGamma<FIMPL>, MUtilities);
-
-/******************************************************************************
- *                      TTestSeqGamma implementation                          *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TTestSeqGamma<FImpl>::TTestSeqGamma(const std::string name)
-: Module<TestSeqGammaPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TTestSeqGamma<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().q, par().qSeq};
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TTestSeqGamma<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TTestSeqGamma<FImpl>::setup(void)
-{
-    envTmpLat(LatticeComplex, "c");
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TTestSeqGamma<FImpl>::execute(void)
-{
-    auto                  &q    = envGet(PropagatorField, par().q);
-    auto                  &qSeq = envGet(PropagatorField, par().qSeq);
-    Gamma                 g5(Gamma::Algebra::Gamma5);
-    Gamma                 g(par().gamma);
-    SitePropagator        qSite;
-    Complex               test, check;
-    std::vector<TComplex> check_buf;
-    std::vector<int>      siteCoord;
-
-    // Check sequential insertion of gamma matrix gives same result as 
-    // insertion of gamma at sink upon contraction. Assume q uses a point 
-    // source.
-    
-    envGetTmp(LatticeComplex, c);
-    siteCoord = strToVec<int>(par().origin);
-    peekSite(qSite, qSeq, siteCoord);
-    test = trace(g*qSite);
-
-    c = trace(adj(g)*g5*adj(q)*g5*g*q);
-    sliceSum(c, check_buf, Tp);
-    check = TensorRemove(check_buf[par().t_g]);
-
-    LOG(Message) << "Seq Result = " << abs(test)  << std::endl;
-    LOG(Message) << "Reference  = " << abs(check) << std::endl;
-
-    // Check difference = 0
-    check -= test;
-
-    LOG(Message) << "Consistency check for sequential " << par().gamma  
-                 << " insertion = " << abs(check) << std::endl;
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_TestSeqGamma_hpp_
--- a/Hadrons/Archive/Modules/VPCounterTerms.cc
+++ b/Hadrons/Archive/Modules/VPCounterTerms.cc
@@ -1,260 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Archive/Modules/VPCounterTerms.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: James Harrison <jch1g10@soton.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MScalar/VPCounterTerms.hpp>
-#include <Hadrons/Modules/MScalar/Scalar.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MScalar;
-
-/******************************************************************************
-*                  TVPCounterTerms implementation                             *
-******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-TVPCounterTerms::TVPCounterTerms(const std::string name)
-: Module<VPCounterTermsPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-std::vector<std::string> TVPCounterTerms::getInput(void)
-{
-    std::vector<std::string> in = {par().source};
-    
-    return in;
-}
-
-std::vector<std::string> TVPCounterTerms::getOutput(void)
-{
-    std::vector<std::string> out;
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-void TVPCounterTerms::setup(void)
-{
-	freeMomPropName_ = FREEMOMPROP(par().mass);
-    phaseName_.clear();
-    for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-    {
-        phaseName_.push_back("_shiftphase_" + std::to_string(mu));
-    }
-    GFSrcName_ = getName() + "_DinvSrc";
-    phatsqName_ = getName() + "_pHatSquared";
-    prop0Name_ = getName() + "_freeProp";
-    twoscalarName_ = getName() + "_2scalarProp";
-    psquaredName_ = getName() + "_psquaredProp";
-    if (!par().output.empty())
-    {
-        for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-        {
-            momPhaseName_.push_back("_momentumphase_" + std::to_string(i_p));
-        }
-    }
-
-    envCreateLat(ScalarField, freeMomPropName_);
-    for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-    {
-        envCreateLat(ScalarField, phaseName_[mu]);
-    }
-    envCreateLat(ScalarField, phatsqName_);
-    envCreateLat(ScalarField, GFSrcName_);
-    envCreateLat(ScalarField, prop0Name_);
-    envCreateLat(ScalarField, twoscalarName_);
-    envCreateLat(ScalarField, psquaredName_);
-    if (!par().output.empty())
-    {
-        for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-        {
-            envCacheLat(ScalarField, momPhaseName_[i_p]);
-        }
-    }
-    envTmpLat(ScalarField, "buf");
-    envTmpLat(ScalarField, "tmp_vp");
-    envTmpLat(ScalarField, "vpPhase");
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-void TVPCounterTerms::execute(void)
-{
-	auto &source = envGet(ScalarField, par().source);
-    Complex     ci(0.0,1.0);
-    FFT         fft(env().getGrid());
-    envGetTmp(ScalarField, buf);
-    envGetTmp(ScalarField, tmp_vp);
-    
-    // Momentum-space free scalar propagator
-    auto &G = envGet(ScalarField, freeMomPropName_);
-    SIMPL::MomentumSpacePropagator(G, par().mass);
-
-    // Phases and hat{p}^2
-    auto &phatsq = envGet(ScalarField, phatsqName_);
-    std::vector<int> &l = env().getGrid()->_fdimensions;
-    
-    LOG(Message) << "Calculating shift phases..." << std::endl;
-    phatsq = zero;
-    for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-    {
-        Real    twoPiL = M_PI*2./l[mu];
-        auto &phmu  = envGet(ScalarField, phaseName_[mu]);
-
-        LatticeCoordinate(buf, mu);
-        phmu = exp(ci*twoPiL*buf);
-        phase_.push_back(&phmu);
-        buf = 2.*sin(.5*twoPiL*buf);
-		phatsq = phatsq + buf*buf;
-    }
-
-    // G*F*src
-    auto &GFSrc       = envGet(ScalarField, GFSrcName_);
-    fft.FFT_all_dim(GFSrc, source, FFT::forward);
-    GFSrc = G*GFSrc;
-
-    // Position-space free scalar propagator
-    auto &prop0       = envGet(ScalarField, prop0Name_);
-    prop0 = GFSrc;
-    fft.FFT_all_dim(prop0, prop0, FFT::backward);
-
-    // Propagators for counter-terms
-    auto &twoscalarProp        = envGet(ScalarField, twoscalarName_);
-    auto &psquaredProp         = envGet(ScalarField, psquaredName_);
-
-    twoscalarProp = G*GFSrc;
-    fft.FFT_all_dim(twoscalarProp, twoscalarProp, FFT::backward);
-
-    psquaredProp = G*phatsq*GFSrc;
-    fft.FFT_all_dim(psquaredProp, psquaredProp, FFT::backward);
-
-    // Prepare output data structure if necessary
-    Result outputData;
-    if (!par().output.empty())
-    {
-        outputData.projection.resize(par().outputMom.size());
-        outputData.lattice_size = env().getGrid()->_fdimensions;
-        outputData.mass = par().mass;
-        for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-        {
-            outputData.projection[i_p].momentum = strToVec<int>(par().outputMom[i_p]);
-            outputData.projection[i_p].twoScalar.resize(env().getNd());
-            outputData.projection[i_p].threeScalar.resize(env().getNd());
-            outputData.projection[i_p].pSquaredInsertion.resize(env().getNd());
-            for (unsigned int nu = 0; nu < env().getNd(); ++nu)
-            {
-                outputData.projection[i_p].twoScalar[nu].resize(env().getNd());
-                outputData.projection[i_p].threeScalar[nu].resize(env().getNd());
-                outputData.projection[i_p].pSquaredInsertion[nu].resize(env().getNd());
-            }
-            // Calculate phase factors
-            auto &momph_ip = envGet(ScalarField, momPhaseName_[i_p]);
-            momph_ip = zero;
-            for (unsigned int j = 0; j < env().getNd()-1; ++j)
-            {
-                Real twoPiL = M_PI*2./l[j];
-                LatticeCoordinate(buf, j);
-                buf = outputData.projection[i_p].momentum[j]*twoPiL*buf;
-                momph_ip = momph_ip + buf;
-            }
-            momph_ip = exp(-ci*momph_ip);
-            momPhase_.push_back(&momph_ip);
-        }
-    }
-
-    // Contractions
-    for (unsigned int nu = 0; nu < env().getNd(); ++nu)
-    {
-    	buf = adj(Cshift(prop0, nu, -1));
-        for (unsigned int mu = 0; mu < env().getNd(); ++mu)
-        {
-            // Two-scalar loop
-            tmp_vp = buf * Cshift(prop0, mu, 1);
-            tmp_vp -= Cshift(buf, mu, 1) * prop0;
-            tmp_vp = 2.0*real(tmp_vp);
-            // Output if necessary
-            if (!par().output.empty())
-            {
-                for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-                {
-                    project(outputData.projection[i_p].twoScalar[mu][nu],
-                            tmp_vp, i_p);
-                }
-            }
-
-        	// Three-scalar loop (no vertex)
-    		tmp_vp = buf * Cshift(twoscalarProp, mu, 1);
-            tmp_vp -= Cshift(buf, mu, 1) * twoscalarProp;
-            tmp_vp = 2.0*real(tmp_vp);
-            // Output if necessary
-            if (!par().output.empty())
-            {
-                for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-                {
-                    project(outputData.projection[i_p].threeScalar[mu][nu],
-                            tmp_vp, i_p);
-                }
-            }
-
-            // Three-scalar loop (hat{p}^2 insertion)
-    		tmp_vp = buf * Cshift(psquaredProp, mu, 1);
-            tmp_vp -= Cshift(buf, mu, 1) * psquaredProp;
-            tmp_vp = 2.0*real(tmp_vp);
-            // Output if necessary
-            if (!par().output.empty())
-            {
-                for (unsigned int i_p = 0; i_p < par().outputMom.size(); ++i_p)
-                {
-                    project(outputData.projection[i_p].pSquaredInsertion[mu][nu],
-                            tmp_vp, i_p);
-                }
-            }
-        }
-    }
-
-    // OUTPUT IF NECESSARY
-    if (!par().output.empty())
-    {
-        LOG(Message) << "Saving momentum-projected correlators to '"
-                     << RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
-                     << std::endl;
-        saveResult(par().output, "scalar_loops", outputData);
-    }
-}
-
-void TVPCounterTerms::project(std::vector<Complex> &projection, const ScalarField &vp, int i_p)
-{
-    std::vector<TComplex>   vecBuf;
-    envGetTmp(ScalarField, vpPhase);
-
-    vpPhase = vp*(*momPhase_[i_p]);
-    sliceSum(vpPhase, vecBuf, Tp);
-    projection.resize(vecBuf.size());
-    for (unsigned int t = 0; t < vecBuf.size(); ++t)
-    {
-        projection[t] = TensorRemove(vecBuf[t]);
-    }
-}
--- a/Hadrons/Archive/Modules/VPCounterTerms.hpp
+++ b/Hadrons/Archive/Modules/VPCounterTerms.hpp
@@ -1,103 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Archive/Modules/VPCounterTerms.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: James Harrison <jch1g10@soton.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_MScalar_VPCounterTerms_hpp_
-#define Hadrons_MScalar_VPCounterTerms_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         VPCounterTerms                                 *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MScalar)
-
-class VPCounterTermsPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(VPCounterTermsPar,
-                                    std::string, source,
-                                    double,      mass,
-                                    std::string, output,
-                                    std::vector<std::string>, outputMom);
-};
-
-class TVPCounterTerms: public Module<VPCounterTermsPar>
-{
-public:
-    BASIC_TYPE_ALIASES(SIMPL,);
-    class Result: Serializable
-    {
-    public:
-        class Projection: Serializable
-        {
-        public:
-            GRID_SERIALIZABLE_CLASS_MEMBERS(Projection,
-                                            std::vector<int>,     momentum,
-                                            std::vector<std::vector<std::vector<Complex>>>, twoScalar,
-                                            std::vector<std::vector<std::vector<Complex>>>, threeScalar,
-                                            std::vector<std::vector<std::vector<Complex>>>, pSquaredInsertion);
-        };
-        GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
-                                        std::vector<int>,        lattice_size,
-                                        double,                  mass,
-                                        std::vector<Projection>, projection);
-    };
-public:
-    // constructor
-    TVPCounterTerms(const std::string name);
-    // destructor
-    virtual ~TVPCounterTerms(void) {};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-protected:
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-private:
-    void project(std::vector<Complex> &projection, const ScalarField &vp, int i_p);
-private:
-    std::string                freeMomPropName_, GFSrcName_, phatsqName_, prop0Name_,
-                               twoscalarName_, twoscalarVertexName_,
-                               psquaredName_, psquaredVertexName_;
-    std::vector<std::string>   phaseName_, momPhaseName_;
-    std::vector<ScalarField *> phase_, momPhase_;
-};
-
-MODULE_REGISTER(VPCounterTerms, TVPCounterTerms, MScalar);
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_MScalar_VPCounterTerms_hpp_
--- a/Hadrons/Archive/Modules/WardIdentity.cc
+++ b/Hadrons/Archive/Modules/WardIdentity.cc
@@ -1,35 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Archive/Modules/WardIdentity.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MContraction/WardIdentity.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MContraction;
-
-template class Grid::Hadrons::MContraction::TWardIdentity<FIMPL>;
-
--- a/Hadrons/Archive/Modules/WardIdentity.hpp
+++ b/Hadrons/Archive/Modules/WardIdentity.hpp
@@ -1,224 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Archive/Modules/WardIdentity.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Lanny91 <andrew.lawson@gmail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_MContraction_WardIdentity_hpp_
-#define Hadrons_MContraction_WardIdentity_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/*
-  Ward Identity contractions
- -----------------------------
- 
- * options:
- - q:          propagator, 5D if available (string)
- - action:     action module used for propagator solution (string)
- - mass:       mass of quark (double)
- - test_axial: whether or not to test PCAC relation.
-*/
-
-/******************************************************************************
- *                              WardIdentity                                  *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MContraction)
-
-class WardIdentityPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(WardIdentityPar,
-                                    std::string, q,
-                                    std::string, action,
-                                    double,      mass,
-                                    bool,        test_axial);
-};
-
-template <typename FImpl>
-class TWardIdentity: public Module<WardIdentityPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TWardIdentity(const std::string name);
-    // destructor
-    virtual ~TWardIdentity(void) {};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-protected:
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-private:
-    unsigned int Ls_;
-};
-
-MODULE_REGISTER_TMP(WardIdentity, TWardIdentity<FIMPL>, MContraction);
-
-/******************************************************************************
- *                     TWardIdentity implementation                           *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TWardIdentity<FImpl>::TWardIdentity(const std::string name)
-: Module<WardIdentityPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TWardIdentity<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().q, par().action};
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TWardIdentity<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TWardIdentity<FImpl>::setup(void)
-{
-    Ls_ = env().getObjectLs(par().q);
-    if (Ls_ != env().getObjectLs(par().action))
-    {
-        HADRONS_ERROR(Size, "Ls mismatch between quark action and propagator");
-    }
-    envTmpLat(PropagatorField, "tmp");
-    envTmpLat(PropagatorField, "vector_WI");
-    if (par().test_axial)
-    {
-        envTmpLat(PropagatorField, "psi");
-        envTmpLat(LatticeComplex,  "PP");
-        envTmpLat(LatticeComplex,  "axial_defect");
-        envTmpLat(LatticeComplex,  "PJ5q");
-    }
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TWardIdentity<FImpl>::execute(void)
-{
-    LOG(Message) << "Performing Ward Identity checks for quark '" << par().q
-                 << "'." << std::endl;
-
-    auto  &q   = envGet(PropagatorField, par().q);
-    auto  &act = envGet(FMat, par().action);
-    Gamma g5(Gamma::Algebra::Gamma5);
-
-    // Compute D_mu V_mu, D here is backward derivative.
-    envGetTmp(PropagatorField, tmp);
-    envGetTmp(PropagatorField, vector_WI);
-    vector_WI    = zero;
-    for (unsigned int mu = 0; mu < Nd; ++mu)
-    {
-        act.ContractConservedCurrent(q, q, tmp, Current::Vector, mu);
-        tmp -= Cshift(tmp, mu, -1);
-        vector_WI += tmp;
-    }
-
-    // Test ward identity D_mu V_mu = 0;
-    LOG(Message) << "Vector Ward Identity check Delta_mu V_mu = " 
-                 << norm2(vector_WI) << std::endl;
-
-    if (par().test_axial)
-    {
-        envGetTmp(PropagatorField, psi);
-        envGetTmp(LatticeComplex, PP);
-        envGetTmp(LatticeComplex, axial_defect);
-        envGetTmp(LatticeComplex, PJ5q);
-        std::vector<TComplex> axial_buf;
-
-        // Compute <P|D_mu A_mu>, D is backwards derivative.
-        axial_defect = zero;
-        for (unsigned int mu = 0; mu < Nd; ++mu)
-        {
-            act.ContractConservedCurrent(q, q, tmp, Current::Axial, mu);
-            tmp -= Cshift(tmp, mu, -1);
-            axial_defect += trace(g5*tmp);
-        }
-
-        // Get <P|J5q> for 5D (zero for 4D) and <P|P>.
-        PJ5q = zero;
-        if (Ls_ > 1)
-        {
-            // <P|P>
-            ExtractSlice(tmp, q, 0, 0);
-            psi  = 0.5 * (tmp - g5*tmp);
-            ExtractSlice(tmp, q, Ls_ - 1, 0);
-            psi += 0.5 * (tmp + g5*tmp);
-            PP = trace(adj(psi)*psi);
-
-            // <P|5Jq>
-            ExtractSlice(tmp, q, Ls_/2 - 1, 0);
-            psi  = 0.5 * (tmp + g5*tmp);
-            ExtractSlice(tmp, q, Ls_/2, 0);
-            psi += 0.5 * (tmp - g5*tmp);
-            PJ5q = trace(adj(psi)*psi);
-        }
-        else
-        {
-            PP = trace(adj(q)*q);
-        }
-
-        // Test ward identity <P|D_mu A_mu> = 2m<P|P> + 2<P|J5q>
-        LOG(Message) << "|D_mu A_mu|^2 = " << norm2(axial_defect) << std::endl;
-        LOG(Message) << "|PP|^2        = " << norm2(PP) << std::endl;
-        LOG(Message) << "|PJ5q|^2      = " << norm2(PJ5q) << std::endl;
-        LOG(Message) << "Axial Ward Identity defect Delta_mu A_mu = "
-                     << norm2(axial_defect) << std::endl;
-    
-        // Axial defect by timeslice.
-        axial_defect -= 2.*(par().mass*PP + PJ5q);
-        LOG(Message) << "Check Axial defect by timeslice" << std::endl;
-        sliceSum(axial_defect, axial_buf, Tp);
-        for (int t = 0; t < axial_buf.size(); ++t)
-        {
-            LOG(Message) << "t = " << t << ": " 
-                         << TensorRemove(axial_buf[t]) << std::endl;
-        }
-    }
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_WardIdentity_hpp_
--- a/Hadrons/DilutedNoise.hpp
+++ b/Hadrons/DilutedNoise.hpp
@@ -1,250 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/DilutedNoise.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
-Author: Vera Guelpers <vmg1n14@soton.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_DilutedNoise_hpp_
-#define Hadrons_DilutedNoise_hpp_
-
-#include <Hadrons/Global.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                   Abstract container for diluted noise                     *
- ******************************************************************************/
-template <typename FImpl>
-class DilutedNoise
-{
-public:
-    typedef typename FImpl::FermionField FermionField;
-public:
-    // constructor/destructor
-    DilutedNoise(GridCartesian *g);
-    DilutedNoise(GridCartesian *g, const unsigned int nNoise);
-    virtual ~DilutedNoise(void) = default;
-    // access
-    std::vector<FermionField> &       getNoise(void);
-    const std::vector<FermionField> & getNoise(void) const;
-    const FermionField &              operator[](const unsigned int i) const;
-    FermionField &                    operator[](const unsigned int i);
-    void                              resize(const unsigned int nNoise);
-    unsigned int                      size(void) const;
-    GridCartesian                     *getGrid(void) const;
-    // generate noise (pure virtual)
-    virtual void generateNoise(GridParallelRNG &rng) = 0;
-private:
-    std::vector<FermionField> noise_;
-    GridCartesian             *grid_;
-    unsigned int              nNoise_;
-};
-
-template <typename FImpl>
-class TimeDilutedSpinColorDiagonalNoise: public DilutedNoise<FImpl>
-{
-public:
-    typedef typename FImpl::FermionField FermionField;
-public:
-    // constructor/destructor
-    TimeDilutedSpinColorDiagonalNoise(GridCartesian *g);
-    virtual ~TimeDilutedSpinColorDiagonalNoise(void) = default;
-    // generate noise
-    virtual void generateNoise(GridParallelRNG &rng);
-private:
-    unsigned int nt_;
-};
-
-template <typename FImpl>
-class FullVolumeSpinColorDiagonalNoise: public DilutedNoise<FImpl>
-{
-public:
-    typedef typename FImpl::FermionField FermionField;
-public:
-    // constructor/destructor
-    FullVolumeSpinColorDiagonalNoise(GridCartesian *g, unsigned int n_src);
-    virtual ~FullVolumeSpinColorDiagonalNoise(void) = default;
-    // generate noise
-    virtual void generateNoise(GridParallelRNG &rng);
-private:
-    unsigned int nSrc_;
-};
-
-
-/******************************************************************************
- *                    DilutedNoise template implementation                    *
- ******************************************************************************/
-template <typename FImpl>
-DilutedNoise<FImpl>::DilutedNoise(GridCartesian *g)
-: grid_(g)
-{}
-
-template <typename FImpl>
-DilutedNoise<FImpl>::DilutedNoise(GridCartesian *g,
-                                  const unsigned int nNoise)
-: DilutedNoise(g)
-{
-    resize(nNoise);
-}
-
-template <typename FImpl>
-std::vector<typename DilutedNoise<FImpl>::FermionField> & DilutedNoise<FImpl>::
-getNoise(void)
-{
-    return noise_;
-}
-
-template <typename FImpl>
-const std::vector<typename DilutedNoise<FImpl>::FermionField> & DilutedNoise<FImpl>::
-getNoise(void) const
-{
-    return noise_;
-}
-
-template <typename FImpl>
-const typename DilutedNoise<FImpl>::FermionField & 
-DilutedNoise<FImpl>::operator[](const unsigned int i) const
-{
-    return noise_[i];
-}
-
-template <typename FImpl>
-typename DilutedNoise<FImpl>::FermionField & 
-DilutedNoise<FImpl>::operator[](const unsigned int i)
-{
-    return noise_[i];
-}
-
-template <typename FImpl>
-void DilutedNoise<FImpl>::resize(const unsigned int nNoise)
-{
-    nNoise_ = nNoise;
-    noise_.resize(nNoise, grid_);
-}
-
-template <typename FImpl>
-unsigned int DilutedNoise<FImpl>::size(void) const
-{  
-    return noise_.size();
-}
-
-template <typename FImpl>
-GridCartesian * DilutedNoise<FImpl>::getGrid(void) const
-{
-    return grid_;
-}
-
-/******************************************************************************
- *        TimeDilutedSpinColorDiagonalNoise template implementation           *
- ******************************************************************************/
-template <typename FImpl>
-TimeDilutedSpinColorDiagonalNoise<FImpl>::
-TimeDilutedSpinColorDiagonalNoise(GridCartesian *g)
-: DilutedNoise<FImpl>(g)
-{
-    nt_ = this->getGrid()->GlobalDimensions().back();
-    this->resize(nt_*Ns*FImpl::Dimension);
-}
-
-template <typename FImpl>
-void TimeDilutedSpinColorDiagonalNoise<FImpl>::generateNoise(GridParallelRNG &rng)
-{
-    typedef decltype(peekColour((*this)[0], 0)) SpinField;
-
-    auto                       &noise = *this;
-    auto                       g      = this->getGrid();
-    auto                       nd     = g->GlobalDimensions().size();
-    auto                       nc     = FImpl::Dimension;
-    Complex                    shift(1., 1.);
-    Lattice<iScalar<vInteger>> tLat(g);
-    LatticeComplex             eta(g), etaCut(g);
-    SpinField                  etas(g);
-    unsigned int               i = 0;
-
-    LatticeCoordinate(tLat, nd - 1);
-    bernoulli(rng, eta);
-    eta = (2.*eta - shift)*(1./::sqrt(2.));
-    for (unsigned int t = 0; t < nt_; ++t)
-    {
-        etaCut = where((tLat == t), eta, 0.*eta);
-        for (unsigned int s = 0; s < Ns; ++s)
-        {
-            etas = zero;
-            pokeSpin(etas, etaCut, s);
-            for (unsigned int c = 0; c < nc; ++c)
-            {
-                noise[i] = zero;
-                pokeColour(noise[i], etas, c);
-                i++;
-            }
-        }
-    }
-}
-
-/******************************************************************************
- *        FullVolumeSpinColorDiagonalNoise template implementation           *
- ******************************************************************************/
-template <typename FImpl>
-FullVolumeSpinColorDiagonalNoise<FImpl>::
-FullVolumeSpinColorDiagonalNoise(GridCartesian *g, unsigned int nSrc)
-: DilutedNoise<FImpl>(g, nSrc*Ns*FImpl::Dimension), nSrc_(nSrc)
-{}
-
-template <typename FImpl>
-void FullVolumeSpinColorDiagonalNoise<FImpl>::generateNoise(GridParallelRNG &rng)
-{
-    typedef decltype(peekColour((*this)[0], 0)) SpinField;
-
-    auto                       &noise = *this;
-    auto                       g      = this->getGrid();
-    auto                       nd     = g->GlobalDimensions().size();
-    auto                       nc     = FImpl::Dimension;
-    Complex                    shift(1., 1.);
-    LatticeComplex             eta(g);
-    SpinField                  etas(g);
-    unsigned int               i = 0;
-
-    bernoulli(rng, eta);
-    eta = (2.*eta - shift)*(1./::sqrt(2.));
-    for (unsigned int n = 0; n < nSrc_; ++n)
-    {
-        for (unsigned int s = 0; s < Ns; ++s)
-        {
-            etas = zero;
-            pokeSpin(etas, eta, s);
-            for (unsigned int c = 0; c < nc; ++c)
-            {
-                noise[i] = zero;
-                pokeColour(noise[i], etas, c);
-                i++;
-            }
-        }
-    }
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_DilutedNoise_hpp_
--- a/Hadrons/DiskVector.hpp
+++ b/Hadrons/DiskVector.hpp
@@ -1,456 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/DiskVector.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_DiskVector_hpp_
-#define Hadrons_DiskVector_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/A2AMatrix.hpp>
-#include <deque>
-#include <sys/stat.h>
-#include <ftw.h>
-#include <unistd.h>
-
-#ifdef DV_DEBUG
-#define DV_DEBUG_MSG(dv, stream) LOG(Debug) << "diskvector " << (dv) << ": " << stream << std::endl
-#else
-#define DV_DEBUG_MSG(dv, stream)
-#endif
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                           Abstract base class                              *
- ******************************************************************************/
-template <typename T>
-class DiskVectorBase
-{
-public:
-    typedef T ObjectType;
-
-    // helper for read/write vector access
-    class RwAccessHelper
-    {
-    public:
-        RwAccessHelper(DiskVectorBase<T> &master, const unsigned int i)
-        : master_(master), cmaster_(master), i_(i) {}
-
-        // operator=: somebody is trying to store a vector element
-        // write to cache and tag as modified
-        T &operator=(const T &obj) const
-        {
-            auto &cache    = *master_.cachePtr_;
-            auto &modified = *master_.modifiedPtr_;
-            auto &index    = *master_.indexPtr_;
-
-            DV_DEBUG_MSG(&master_, "writing to " << i_);
-            master_.cacheInsert(i_, obj);
-            modified[index.at(i_)] = true;
-            
-            return cache[index.at(i_)];
-        }
-
-        // implicit cast to const object reference and redirection
-        // to the const operator[] for read-only operations
-        operator const T&() const
-        {
-            return cmaster_[i_];
-        }
-    private:
-        DiskVectorBase<T>       &master_;
-        const DiskVectorBase<T> &cmaster_;
-        const unsigned int      i_;
-    };
-public:
-    DiskVectorBase(const std::string dirname, const unsigned int size = 0,
-                   const unsigned int cacheSize = 1, const bool clean = true);
-    DiskVectorBase(DiskVectorBase<T> &&v) = default;
-    virtual ~DiskVectorBase(void);
-    const T & operator[](const unsigned int i) const;
-    RwAccessHelper operator[](const unsigned int i);
-    double hitRatio(void) const;
-    void resetStat(void);
-private:
-    virtual void load(T &obj, const std::string filename) const = 0;
-    virtual void save(const std::string filename, const T &obj) const = 0;
-    virtual std::string filename(const unsigned int i) const;
-    void evict(void) const;
-    void fetch(const unsigned int i) const;
-    void cacheInsert(const unsigned int i, const T &obj) const;
-    void clean(void);
-private:
-    std::string                                           dirname_;
-    unsigned int                                          size_, cacheSize_;
-    double                                                access_{0.}, hit_{0.};
-    bool                                                  clean_;
-    // using pointers to allow modifications when class is const
-    // semantic: const means data unmodified, but cache modification allowed
-    std::unique_ptr<std::vector<T>>                       cachePtr_;
-    std::unique_ptr<std::vector<bool>>                    modifiedPtr_;
-    std::unique_ptr<std::map<unsigned int, unsigned int>> indexPtr_;
-    std::unique_ptr<std::stack<unsigned int>>             freePtr_;
-    std::unique_ptr<std::deque<unsigned int>>             loadsPtr_;                
-};
-
-/******************************************************************************
- *                   Specialisation for serialisable classes                  *
- ******************************************************************************/
-template <typename T, typename Reader, typename Writer>
-class SerializableDiskVector: public DiskVectorBase<T>
-{
-public:
-    using DiskVectorBase<T>::DiskVectorBase;
-private:
-    virtual void load(T &obj, const std::string filename) const
-    {
-        Reader reader(filename);
-
-        read(reader, basename(filename), obj);
-    }
-
-    virtual void save(const std::string filename, const T &obj) const
-    {
-        Writer writer(filename);
-
-        write(writer, basename(filename), obj);
-    }
-};
-
-/******************************************************************************
- *                      Specialisation for Eigen matrices                     *
- ******************************************************************************/
-template <typename T>
-using EigenDiskVectorMat = A2AMatrix<T>;
-
-template <typename T>
-class EigenDiskVector: public DiskVectorBase<EigenDiskVectorMat<T>>
-{
-public:
-    using DiskVectorBase<EigenDiskVectorMat<T>>::DiskVectorBase;
-    typedef EigenDiskVectorMat<T> Matrix;
-public:
-    T operator()(const unsigned int i, const Eigen::Index j,
-                 const Eigen::Index k) const
-    {
-        return (*this)[i](j, k);
-    }
-private:
-    virtual void load(EigenDiskVectorMat<T> &obj, const std::string filename) const
-    {
-        std::ifstream f(filename, std::ios::binary);
-        uint32_t      crc, check;
-        Eigen::Index  nRow, nCol;
-        size_t        matSize;
-        double        tRead, tHash;
-
-        f.read(reinterpret_cast<char *>(&crc), sizeof(crc));
-        f.read(reinterpret_cast<char *>(&nRow), sizeof(nRow));
-        f.read(reinterpret_cast<char *>(&nCol), sizeof(nCol));
-        obj.resize(nRow, nCol);
-        matSize = nRow*nCol*sizeof(T);
-        tRead  = -usecond();
-        f.read(reinterpret_cast<char *>(obj.data()), matSize);
-        tRead += usecond();
-        tHash  = -usecond();
-#ifdef USE_IPP
-        check  = GridChecksum::crc32c(obj.data(), matSize);
-#else
-        check  = GridChecksum::crc32(obj.data(), matSize);
-#endif
-        tHash += usecond();
-        DV_DEBUG_MSG(this, "Eigen read " << tRead/1.0e6 << " sec " << matSize/tRead*1.0e6/1024/1024 << " MB/s");
-        DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << check << std::dec 
-                     << " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s");
-        if (crc != check)
-        {
-            HADRONS_ERROR(Io, "checksum failed")
-        }
-    }
-
-    virtual void save(const std::string filename, const EigenDiskVectorMat<T> &obj) const
-    {
-        std::ofstream f(filename, std::ios::binary);
-        uint32_t      crc;
-        Eigen::Index  nRow, nCol;
-        size_t        matSize;
-        double        tWrite, tHash;
-        
-        nRow    = obj.rows();
-        nCol    = obj.cols();
-        matSize = nRow*nCol*sizeof(T);
-        tHash   = -usecond();
-#ifdef USE_IPP
-        crc     = GridChecksum::crc32c(obj.data(), matSize);
-#else
-        crc     = GridChecksum::crc32(obj.data(), matSize);
-#endif
-        tHash  += usecond();
-        f.write(reinterpret_cast<char *>(&crc), sizeof(crc));
-        f.write(reinterpret_cast<char *>(&nRow), sizeof(nRow));
-        f.write(reinterpret_cast<char *>(&nCol), sizeof(nCol));
-        tWrite = -usecond();
-        f.write(reinterpret_cast<const char *>(obj.data()), matSize);
-        tWrite += usecond();
-        DV_DEBUG_MSG(this, "Eigen write " << tWrite/1.0e6 << " sec " << matSize/tWrite*1.0e6/1024/1024 << " MB/s");
-        DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << crc << std::dec
-                     << " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s");
-    }
-};
-
-/******************************************************************************
- *                       DiskVectorBase implementation                         *
- ******************************************************************************/
-template <typename T>
-DiskVectorBase<T>::DiskVectorBase(const std::string dirname, 
-                                  const unsigned int size,
-                                  const unsigned int cacheSize,
-                                  const bool clean)
-: dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean)
-, cachePtr_(new std::vector<T>(size))
-, modifiedPtr_(new std::vector<bool>(size, false))
-, indexPtr_(new std::map<unsigned int, unsigned int>())
-, freePtr_(new std::stack<unsigned int>)
-, loadsPtr_(new std::deque<unsigned int>())
-{
-    struct stat s;
-
-    if(stat(dirname.c_str(), &s) == 0)
-    {
-        HADRONS_ERROR(Io, "directory '" + dirname + "' already exists")
-    }
-    mkdir(dirname);
-    for (unsigned int i = 0; i < cacheSize_; ++i)
-    {
-        freePtr_->push(i);
-    }
-}
-
-template <typename T>
-DiskVectorBase<T>::~DiskVectorBase(void)
-{
-    if (clean_)
-    {
-        clean();
-    }
-}
-
-template <typename T>
-const T & DiskVectorBase<T>::operator[](const unsigned int i) const
-{
-    auto &cache   = *cachePtr_;
-    auto &index   = *indexPtr_;
-    auto &freeInd = *freePtr_;
-    auto &loads   = *loadsPtr_;
-
-    DV_DEBUG_MSG(this, "accessing " << i << " (RO)");
-
-    if (i >= size_)
-    {
-        HADRONS_ERROR(Size, "index out of range");
-    }
-    const_cast<double &>(access_)++;
-    if (index.find(i) == index.end())
-    {
-        // cache miss
-        DV_DEBUG_MSG(this, "cache miss");
-        fetch(i);
-    }
-    else
-    {
-        DV_DEBUG_MSG(this, "cache hit");
-
-        auto pos = std::find(loads.begin(), loads.end(), i);
-
-        const_cast<double &>(hit_)++;
-        loads.erase(pos);
-        loads.push_back(i);
-    }
-
-#ifdef DV_DEBUG
-    std::string msg;
-
-    for (auto &p: loads)
-    {
-        msg += std::to_string(p) + " ";
-    }
-    DV_DEBUG_MSG(this, "in cache: " << msg);
-#endif
-
-    return cache[index.at(i)];
-}
-
-template <typename T>
-typename DiskVectorBase<T>::RwAccessHelper DiskVectorBase<T>::operator[](const unsigned int i)
-{
-    DV_DEBUG_MSG(this, "accessing " << i << " (RW)");
-
-    if (i >= size_)
-    {
-        HADRONS_ERROR(Size, "index out of range");
-    }
-
-    return RwAccessHelper(*this, i);
-}
-
-template <typename T>
-double DiskVectorBase<T>::hitRatio(void) const
-{
-    return hit_/access_;
-}
-
-template <typename T>
-void DiskVectorBase<T>::resetStat(void)
-{
-    access_ = 0.;
-    hit_    = 0.;
-}
-
-template <typename T>
-std::string DiskVectorBase<T>::filename(const unsigned int i) const
-{
-    return dirname_ + "/elem_" + std::to_string(i);
-}
-
-template <typename T>
-void DiskVectorBase<T>::evict(void) const
-{
-    auto &cache    = *cachePtr_;
-    auto &modified = *modifiedPtr_;
-    auto &index    = *indexPtr_;
-    auto &freeInd  = *freePtr_;
-    auto &loads    = *loadsPtr_;
-
-    if (index.size() >= cacheSize_)
-    {
-        unsigned int i = loads.front();
-        
-        DV_DEBUG_MSG(this, "evicting " << i);
-        if (modified[index.at(i)])
-        {
-            DV_DEBUG_MSG(this, "element " << i << " modified, saving to disk");
-            save(filename(i), cache[index.at(i)]);
-        }
-        freeInd.push(index.at(i));
-        index.erase(i);
-        loads.pop_front();
-    }
-}
-
-template <typename T>
-void DiskVectorBase<T>::fetch(const unsigned int i) const
-{
-    auto &cache    = *cachePtr_;
-    auto &modified = *modifiedPtr_;
-    auto &index    = *indexPtr_;
-    auto &freeInd  = *freePtr_;
-    auto &loads    = *loadsPtr_;
-
-    struct stat s;
-
-    DV_DEBUG_MSG(this, "loading " << i << " from disk");
-
-    evict();
-    
-    if(stat(filename(i).c_str(), &s) != 0)
-    {
-        HADRONS_ERROR(Io, "disk vector element " + std::to_string(i) + " uninitialised");
-    }
-    index[i] = freeInd.top();
-    freeInd.pop();
-    load(cache[index.at(i)], filename(i));
-    loads.push_back(i);
-    modified[index.at(i)] = false;
-}
-
-template <typename T>
-void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const
-{
-    auto &cache    = *cachePtr_;
-    auto &modified = *modifiedPtr_;
-    auto &index    = *indexPtr_;
-    auto &freeInd  = *freePtr_;
-    auto &loads    = *loadsPtr_;
-
-    // cache miss, evict and store
-    if (index.find(i) == index.end())
-    {
-        evict();
-        index[i] = freeInd.top();
-        freeInd.pop();
-        cache[index.at(i)] = obj;
-        loads.push_back(i);
-        modified[index.at(i)] = false;
-    }
-    // cache hit, modify current value
-    else
-    {
-        auto pos = std::find(loads.begin(), loads.end(), i);
-        
-        cache[index.at(i)]    = obj;
-        modified[index.at(i)] = true;
-        loads.erase(pos);
-        loads.push_back(i);
-    }
-
-#ifdef DV_DEBUG
-    std::string msg;
-
-    for (auto &p: loads)
-    {
-        msg += std::to_string(p) + " ";
-    }
-    DV_DEBUG_MSG(this, "in cache: " << msg);
-#endif
-}
-
-#ifdef DV_DEBUG
-#undef DV_DEBUG_MSG
-#endif
-
-template <typename T>
-void DiskVectorBase<T>::clean(void)
-{
-    auto unlink = [](const char *fpath, const struct stat *sb, 
-                     int typeflag, struct FTW *ftwbuf)
-    {
-        int rv = remove(fpath);
-
-        if (rv)
-        {
-            HADRONS_ERROR(Io, "cannot remove '" + std::string(fpath) + "': "
-                          + std::string(std::strerror(errno)));
-        }
-
-        return rv;
-    };
-
-    nftw(dirname_.c_str(), unlink, 64, FTW_DEPTH | FTW_PHYS);
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_DiskVector_hpp_
--- a/Hadrons/EigenPack.hpp
+++ b/Hadrons/EigenPack.hpp
@@ -1,416 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/EigenPack.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_EigenPack_hpp_
-#define Hadrons_EigenPack_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Grid/algorithms/iterative/Deflation.h>
-#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
-
-BEGIN_HADRONS_NAMESPACE
-
-// Lanczos type
-#ifndef HADRONS_DEFAULT_LANCZOS_NBASIS
-#define HADRONS_DEFAULT_LANCZOS_NBASIS 60
-#endif
-
-#define HADRONS_DUMP_EP_METADATA(record) \
-LOG(Message) << "Eigenpack metadata:" << std::endl;\
-LOG(Message) << "* operator" << std::endl;\
-LOG(Message) << (record).operatorXml << std::endl;\
-LOG(Message) << "* solver" << std::endl;\
-LOG(Message) << (record).solverXml << std::endl;
-
-struct PackRecord
-{
-    std::string operatorXml, solverXml;
-};
-
-struct VecRecord: Serializable
-{
-    GRID_SERIALIZABLE_CLASS_MEMBERS(VecRecord,
-                                    unsigned int, index,
-                                    double,       eval);
-    VecRecord(void): index(0), eval(0.) {}
-};
-
-namespace EigenPackIo
-{
-    inline void readHeader(PackRecord &record, ScidacReader &binReader)
-    {
-        std::string recordXml;
-
-        binReader.readLimeObject(recordXml, SCIDAC_FILE_XML);
-        XmlReader xmlReader(recordXml, true, "eigenPackPar");
-        xmlReader.push();
-        xmlReader.readCurrentSubtree(record.operatorXml);
-        xmlReader.nextElement();
-        xmlReader.readCurrentSubtree(record.solverXml);
-    }
-
-    template <typename T, typename TIo = T>
-    void readElement(T &evec, RealD &eval, const unsigned int index,
-                     ScidacReader &binReader, TIo *ioBuf = nullptr)
-    {
-        VecRecord vecRecord;
-
-        LOG(Message) << "Reading eigenvector " << index << std::endl;
-        if (ioBuf == nullptr)
-        {
-            binReader.readScidacFieldRecord(evec, vecRecord);
-        }
-        else
-        {
-            binReader.readScidacFieldRecord(*ioBuf, vecRecord);
-            precisionChange(evec, *ioBuf);
-        }
-        if (vecRecord.index != index)
-        {
-            HADRONS_ERROR(Io, "Eigenvector " + std::to_string(index) + " has a"
-                            + " wrong index (expected " + std::to_string(vecRecord.index) 
-                            + ")");
-        }
-        eval = vecRecord.eval;
-    }
-
-    template <typename T, typename TIo = T>
-    static void readPack(std::vector<T> &evec, std::vector<RealD> &eval,
-                         PackRecord &record, const std::string filename, 
-                         const unsigned int size, bool multiFile, 
-                         GridBase *gridIo = nullptr)
-    {
-        std::unique_ptr<TIo> ioBuf{nullptr};
-        ScidacReader         binReader;
-
-        if (typeHash<T>() != typeHash<TIo>())
-        {
-            if (gridIo == nullptr)
-            {
-                HADRONS_ERROR(Definition, 
-                              "I/O type different from vector type but null I/O grid passed");
-            }
-            ioBuf.reset(new TIo(gridIo));
-        }
-        if (multiFile)
-        {
-            std::string fullFilename;
-
-            for(int k = 0; k < size; ++k) 
-            {
-                fullFilename = filename + "/v" + std::to_string(k) + ".bin";
-                binReader.open(fullFilename);
-                readHeader(record, binReader);
-                readElement(evec[k], eval[k], k, binReader, ioBuf.get());
-                binReader.close();
-            }
-        }
-        else
-        {
-            binReader.open(filename);
-            readHeader(record, binReader);
-            for(int k = 0; k < size; ++k) 
-            {
-                readElement(evec[k], eval[k], k, binReader, ioBuf.get());
-            }
-            binReader.close();
-        }
-    }
-
-    inline void writeHeader(ScidacWriter &binWriter, PackRecord &record)
-    {
-        XmlWriter xmlWriter("", "eigenPackPar");
-
-        xmlWriter.pushXmlString(record.operatorXml);
-        xmlWriter.pushXmlString(record.solverXml);
-        binWriter.writeLimeObject(1, 1, xmlWriter, "parameters", SCIDAC_FILE_XML);
-    }
-
-    template <typename T, typename TIo = T>
-    void writeElement(ScidacWriter &binWriter, T &evec, RealD &eval, 
-                      const unsigned int index, TIo *ioBuf, 
-                      T *testBuf = nullptr)
-    {
-        VecRecord vecRecord;
-
-        LOG(Message) << "Writing eigenvector " << index << std::endl;
-        vecRecord.eval  = eval;
-        vecRecord.index = index;
-        if ((ioBuf == nullptr) || (testBuf == nullptr))
-        {
-            binWriter.writeScidacFieldRecord(evec, vecRecord, DEFAULT_ASCII_PREC);
-        }
-        else
-        {
-            precisionChange(*ioBuf, evec);
-            precisionChange(*testBuf, *ioBuf);
-            *testBuf -= evec;
-            LOG(Message) << "Precision diff norm^2 " << norm2(*testBuf) << std::endl;
-            binWriter.writeScidacFieldRecord(*ioBuf, vecRecord, DEFAULT_ASCII_PREC);
-        }   
-    }
-    
-    template <typename T, typename TIo = T>
-    static void writePack(const std::string filename, std::vector<T> &evec, 
-                          std::vector<RealD> &eval, PackRecord &record, 
-                          const unsigned int size, bool multiFile, 
-                          GridBase *gridIo = nullptr)
-    {
-        GridBase             *grid = evec[0]._grid;
-        std::unique_ptr<TIo> ioBuf{nullptr}; 
-        std::unique_ptr<T>   testBuf{nullptr};
-        ScidacWriter         binWriter(grid->IsBoss());
-
-        if (typeHash<T>() != typeHash<TIo>())
-        {
-            if (gridIo == nullptr)
-            {
-                HADRONS_ERROR(Definition, 
-                              "I/O type different from vector type but null I/O grid passed");
-            }
-            ioBuf.reset(new TIo(gridIo));
-            testBuf.reset(new T(grid));
-        }
-        if (multiFile)
-        {
-            std::string fullFilename;
-
-            for(int k = 0; k < size; ++k) 
-            {
-                fullFilename = filename + "/v" + std::to_string(k) + ".bin";
-
-                makeFileDir(fullFilename, grid);
-                binWriter.open(fullFilename);
-                writeHeader(binWriter, record);
-                writeElement(binWriter, evec[k], eval[k], k, ioBuf.get(), testBuf.get());
-                binWriter.close();
-            }
-        }
-        else
-        {
-            makeFileDir(filename, grid);
-            binWriter.open(filename);
-            writeHeader(binWriter, record);
-            for(int k = 0; k < size; ++k) 
-            {
-                writeElement(binWriter, evec[k], eval[k], k, ioBuf.get(), testBuf.get());
-            }
-            binWriter.close();
-        }
-    }
-}
-
-template <typename F>
-class BaseEigenPack
-{
-public:
-    typedef F Field;
-public:
-    std::vector<RealD> eval;
-    std::vector<F>     evec;
-    PackRecord         record;
-public:
-    BaseEigenPack(void)          = default;
-    BaseEigenPack(const size_t size, GridBase *grid)
-    {
-        resize(size, grid);
-    }
-    virtual ~BaseEigenPack(void) = default;
-    void resize(const size_t size, GridBase *grid)
-    {
-        eval.resize(size);
-        evec.resize(size, grid);
-    }
-};
-
-template <typename F, typename FIo = F>
-class EigenPack: public BaseEigenPack<F>
-{
-public:
-    typedef F   Field;
-    typedef FIo FieldIo;
-public:
-    EigenPack(void)          = default;
-    virtual ~EigenPack(void) = default;
-
-    EigenPack(const size_t size, GridBase *grid, GridBase *gridIo = nullptr)
-    : BaseEigenPack<F>(size, grid)
-    {
-        if (typeHash<F>() != typeHash<FIo>())
-        {
-            if (gridIo == nullptr)
-            {
-                HADRONS_ERROR(Definition, 
-                              "I/O type different from vector type but null I/O grid passed");
-            }
-        }
-        gridIo_ = gridIo;
-    }
-
-    virtual void read(const std::string fileStem, const bool multiFile, const int traj = -1)
-    {
-        EigenPackIo::readPack<F, FIo>(this->evec, this->eval, this->record, 
-                                      evecFilename(fileStem, traj, multiFile), 
-                                      this->evec.size(), multiFile, gridIo_);
-        HADRONS_DUMP_EP_METADATA(this->record);
-    }
-
-    virtual void write(const std::string fileStem, const bool multiFile, const int traj = -1)
-    {
-        EigenPackIo::writePack<F, FIo>(evecFilename(fileStem, traj, multiFile), 
-                                       this->evec, this->eval, this->record, 
-                                       this->evec.size(), multiFile, gridIo_);
-    }
-protected:
-    std::string evecFilename(const std::string stem, const int traj, const bool multiFile)
-    {
-        std::string t = (traj < 0) ? "" : ("." + std::to_string(traj));
-
-        if (multiFile)
-        {
-            return stem + t;
-        }
-        else
-        {
-            return stem + t + ".bin";
-        }
-    }
-protected:
-    GridBase *gridIo_;
-};
-
-template <typename FineF, typename CoarseF, 
-          typename FineFIo = FineF, typename CoarseFIo = CoarseF>
-class CoarseEigenPack: public EigenPack<FineF, FineFIo>
-{
-public:
-    typedef CoarseF   CoarseField;
-    typedef CoarseFIo CoarseFieldIo;
-public:      
-    std::vector<CoarseF> evecCoarse;
-    std::vector<RealD>   evalCoarse;
-public:
-    CoarseEigenPack(void)          = default;
-    virtual ~CoarseEigenPack(void) = default;
-
-    CoarseEigenPack(const size_t sizeFine, const size_t sizeCoarse, 
-                    GridBase *gridFine, GridBase *gridCoarse,
-                    GridBase *gridFineIo = nullptr, 
-                    GridBase *gridCoarseIo = nullptr)
-    {
-        if (typeHash<FineF>() != typeHash<FineFIo>())
-        {
-            if (gridFineIo == nullptr)
-            {
-                HADRONS_ERROR(Definition, 
-                              "Fine I/O type different from vector type but null fine I/O grid passed");
-            }
-        }
-        if (typeHash<CoarseF>() != typeHash<CoarseFIo>())
-        {
-            if (gridCoarseIo == nullptr)
-            {
-                HADRONS_ERROR(Definition, 
-                              "Coarse I/O type different from vector type but null coarse I/O grid passed");
-            }
-        }
-        this->gridIo_ = gridFineIo;
-        gridCoarseIo_ = gridCoarseIo;
-        resize(sizeFine, sizeCoarse, gridFine, gridCoarse);
-    }
-
-    void resize(const size_t sizeFine, const size_t sizeCoarse, 
-                GridBase *gridFine, GridBase *gridCoarse)
-    {
-        EigenPack<FineF, FineFIo>::resize(sizeFine, gridFine);
-        evalCoarse.resize(sizeCoarse);
-        evecCoarse.resize(sizeCoarse, gridCoarse);
-    }
-
-    void readFine(const std::string fileStem, const bool multiFile, const int traj = -1)
-    {
-        EigenPack<FineF, FineFIo>::read(fileStem + "_fine", multiFile, traj);
-    }
-
-    void readCoarse(const std::string fileStem, const bool multiFile, const int traj = -1)
-    {
-        PackRecord dummy;
-
-        EigenPackIo::readPack<CoarseF, CoarseFIo>(evecCoarse, evalCoarse, dummy, 
-                              this->evecFilename(fileStem + "_coarse", traj, multiFile), 
-                              evecCoarse.size(), multiFile, gridCoarseIo_);
-    }
-
-    virtual void read(const std::string fileStem, const bool multiFile, const int traj = -1)
-    {
-        readFine(fileStem, multiFile, traj);
-        readCoarse(fileStem, multiFile, traj);
-    }
-
-    void writeFine(const std::string fileStem, const bool multiFile, const int traj = -1)
-    {
-        EigenPack<FineF, FineFIo>::write(fileStem + "_fine", multiFile, traj);
-    }
-
-    void writeCoarse(const std::string fileStem, const bool multiFile, const int traj = -1)
-    {
-        EigenPackIo::writePack<CoarseF, CoarseFIo>(this->evecFilename(fileStem + "_coarse", traj, multiFile), 
-                                                   evecCoarse, evalCoarse, this->record, 
-                                                   evecCoarse.size(), multiFile, gridCoarseIo_);
-    }
-    
-    virtual void write(const std::string fileStem, const bool multiFile, const int traj = -1)
-    {
-        writeFine(fileStem, multiFile, traj);
-        writeCoarse(fileStem, multiFile, traj);
-    }
-private:
-    GridBase *gridCoarseIo_;
-};
-
-template <typename FImpl>
-using BaseFermionEigenPack = BaseEigenPack<typename FImpl::FermionField>;
-
-template <typename FImpl, typename FImplIo = FImpl>
-using FermionEigenPack = EigenPack<typename FImpl::FermionField, typename FImplIo::FermionField>;
-
-template <typename FImpl, int nBasis, typename FImplIo = FImpl>
-using CoarseFermionEigenPack = CoarseEigenPack<
-    typename FImpl::FermionField,
-    typename LocalCoherenceLanczos<typename FImpl::SiteSpinor, 
-                                   typename FImpl::SiteComplex, 
-                                   nBasis>::CoarseField,
-    typename FImplIo::FermionField,
-    typename LocalCoherenceLanczos<typename FImplIo::SiteSpinor, 
-                                   typename FImplIo::SiteComplex, 
-                                   nBasis>::CoarseField>;
-
-#undef HADRONS_DUMP_EP_METADATA
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_EigenPack_hpp_
--- a/Hadrons/Environment.cc
+++ b/Hadrons/Environment.cc
@@ -1,337 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Environment.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Hadrons/Environment.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-using namespace Grid;
-using namespace QCD;
-using namespace Hadrons;
-
-#define ERROR_NO_ADDRESS(address)\
-HADRONS_ERROR_REF(ObjectDefinition, "no object with address " + std::to_string(address), address);
-
-/******************************************************************************
- *                       Environment implementation                           *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-Environment::Environment(void)
-{
-    dim_ = GridDefaultLatt();
-    nd_  = dim_.size();
-    vol_ = 1.;
-    for (auto d: dim_)
-    {
-        vol_ *= d;
-    }
-}
-
-// grids ///////////////////////////////////////////////////////////////////////
-unsigned int Environment::getNd(void) const
-{
-    return nd_;
-}
-
-std::vector<int> Environment::getDim(void) const
-{
-    return dim_;
-}
-
-int Environment::getDim(const unsigned int mu) const
-{
-    return dim_[mu];
-}
-
-double Environment::getVolume(void) const
-{
-    return vol_;
-}
-
-// random number generator /////////////////////////////////////////////////////
-GridParallelRNG * Environment::get4dRng(void)
-{
-    if (rng4d_ == nullptr)
-    {
-        rng4d_.reset(new GridParallelRNG(getGrid()));
-    }
-
-    return rng4d_.get();
-}
-
-// general memory management ///////////////////////////////////////////////////
-void Environment::addObject(const std::string name, const int moduleAddress)
-{
-    if (!hasObject(name))
-    {
-        ObjInfo info;
-        
-        info.name   = name;
-        info.module = moduleAddress;
-        info.data   = nullptr;
-        object_.push_back(std::move(info));
-        objectAddress_[name] = static_cast<unsigned int>(object_.size() - 1);
-    }
-    else
-    {
-        HADRONS_ERROR_REF(ObjectDefinition, "object '" + name + "' already exists",
-                          getObjectAddress(name));
-    }
-}
-
-void Environment::setObjectModule(const unsigned int objAddress,
-                                  const int modAddress)
-{
-    object_[objAddress].module = modAddress;
-}
-
-unsigned int Environment::getMaxAddress(void) const
-{
-    return object_.size();
-}
-
-unsigned int Environment::getObjectAddress(const std::string name) const
-{
-    if (hasObject(name))
-    {
-        return objectAddress_.at(name);
-    }
-    else
-    {
-        HADRONS_ERROR(Definition, "no object with name '" + name + "'");
-    }
-}
-
-std::string Environment::getObjectName(const unsigned int address) const
-{
-    if (hasObject(address))
-    {
-        return object_[address].name;
-    }
-    else
-    {
-        ERROR_NO_ADDRESS(address);
-    }
-}
-
-std::string Environment::getObjectType(const unsigned int address) const
-{
-    if (hasObject(address))
-    {
-        if (object_[address].type)
-        {
-            return typeName(object_[address].type);
-        }
-        else
-        {
-            return "<no type>";
-        }
-    }
-    else
-    {
-        ERROR_NO_ADDRESS(address);
-    }
-}
-
-std::string Environment::getObjectType(const std::string name) const
-{
-    return getObjectType(getObjectAddress(name));
-}
-
-Environment::Size Environment::getObjectSize(const unsigned int address) const
-{
-    if (hasObject(address))
-    {
-        return object_[address].size;
-    }
-    else
-    {
-        ERROR_NO_ADDRESS(address);
-    }
-}
-
-Environment::Size Environment::getObjectSize(const std::string name) const
-{
-    return getObjectSize(getObjectAddress(name));
-}
-
-Environment::Storage Environment::getObjectStorage(const unsigned int address) const
-{
-    if (hasObject(address))
-    {
-        return object_[address].storage;
-    }
-    else
-    {
-        ERROR_NO_ADDRESS(address);
-    }
-}
-
-Environment::Storage Environment::getObjectStorage(const std::string name) const
-{
-    return getObjectStorage(getObjectAddress(name));
-}
-
-int Environment::getObjectModule(const unsigned int address) const
-{
-    if (hasObject(address))
-    {
-        return object_[address].module;
-    }
-    else
-    {
-        ERROR_NO_ADDRESS(address);
-    }
-}
-
-int Environment::getObjectModule(const std::string name) const
-{
-    return getObjectModule(getObjectAddress(name));
-}
-
-unsigned int Environment::getObjectLs(const unsigned int address) const
-{
-    if (hasCreatedObject(address))
-    {
-        return object_[address].Ls;
-    }
-    else
-    {
-        ERROR_NO_ADDRESS(address);
-    }
-}
-
-unsigned int Environment::getObjectLs(const std::string name) const
-{
-    return getObjectLs(getObjectAddress(name));
-}
-
-bool Environment::hasObject(const unsigned int address) const
-{
-    return (address < object_.size());
-}
-
-bool Environment::hasObject(const std::string name) const
-{
-    auto it = objectAddress_.find(name);
-    
-    return ((it != objectAddress_.end()) and hasObject(it->second));
-}
-
-bool Environment::hasCreatedObject(const unsigned int address) const
-{
-    if (hasObject(address))
-    {
-        return (object_[address].data != nullptr);
-    }
-    else
-    {
-        return false;
-    }
-}
-
-bool Environment::hasCreatedObject(const std::string name) const
-{
-    if (hasObject(name))
-    {
-        return hasCreatedObject(getObjectAddress(name));
-    }
-    else
-    {
-        return false;
-    }
-}
-
-bool Environment::isObject5d(const unsigned int address) const
-{
-    return (getObjectLs(address) > 1);
-}
-
-bool Environment::isObject5d(const std::string name) const
-{
-    return (getObjectLs(name) > 1);
-}
-
-Environment::Size Environment::getTotalSize(void) const
-{
-    Environment::Size size = 0;
-    
-    for (auto &o: object_)
-    {
-        size += o.size;
-    }
-    
-    return size;
-}
-
-void Environment::freeObject(const unsigned int address)
-{
-    if (hasCreatedObject(address))
-    {
-        LOG(Message) << "Destroying object '" << object_[address].name
-                     << "'" << std::endl;
-    }
-    object_[address].size = 0;
-    object_[address].type = nullptr;
-    object_[address].data.reset(nullptr);
-}
-
-void Environment::freeObject(const std::string name)
-{
-    freeObject(getObjectAddress(name));
-}
-
-void Environment::freeAll(void)
-{
-    for (unsigned int i = 0; i < object_.size(); ++i)
-    {
-        freeObject(i);
-    }
-}
-
-void Environment::protectObjects(const bool protect)
-{
-    protect_ = protect;
-}
-
-bool Environment::objectsProtected(void) const
-{
-    return protect_;
-}
-
-// print environment content ///////////////////////////////////////////////////
-void Environment::printContent(void) const
-{
-    LOG(Debug) << "Objects: " << std::endl;
-    for (unsigned int i = 0; i < object_.size(); ++i)
-    {
-        LOG(Debug) << std::setw(4) << i << ": "
-                   << getObjectName(i) << " ("
-                   << sizeString(getObjectSize(i)) << ")" << std::endl;
-    }
-}
--- a/Hadrons/Environment.hpp
+++ b/Hadrons/Environment.hpp
@@ -1,585 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Environment.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Environment_hpp_
-#define Hadrons_Environment_hpp_
-
-#include <Hadrons/Global.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         Global environment                                 *
- ******************************************************************************/
-class Object
-{
-public:
-    Object(void) = default;
-    virtual ~Object(void) = default;
-};
-
-template <typename T>
-class Holder: public Object
-{
-public:
-    Holder(void) = default;
-    Holder(T *pt);
-    virtual ~Holder(void) = default;
-    T &       get(void) const;
-    T *       getPt(void) const;
-    void      reset(T *pt);
-private:
-    std::unique_ptr<T> objPt_{nullptr};
-};
-
-#define DEFINE_ENV_ALIAS \
-inline Environment & env(void) const\
-{\
-    return Environment::getInstance();\
-}
-
-#define DEFINE_ENV_LAMBDA \
-auto env = [](void)->Environment &{return Environment::getInstance();}
-
-class Environment
-{
-    SINGLETON(Environment);
-public:
-    typedef SITE_SIZE_TYPE                         Size;
-    typedef std::unique_ptr<GridCartesian>         GridPt;
-    typedef std::unique_ptr<GridRedBlackCartesian> GridRbPt;
-    typedef std::unique_ptr<GridParallelRNG>       RngPt;
-    enum class Storage {object, cache, temporary};
-private:
-    struct ObjInfo
-    {
-        Size                    size{0};
-        Storage                 storage{Storage::object};
-        unsigned int            Ls{0};
-        const std::type_info    *type{nullptr}, *derivedType{nullptr};
-        std::string             name;
-        int                     module{-1};
-        std::unique_ptr<Object> data{nullptr};
-    };
-    typedef std::pair<size_t, unsigned int>     FineGridKey;
-    typedef std::pair<size_t, std::vector<int>> CoarseGridKey;
-public:
-    // grids
-    template <typename VType = vComplex>
-    void                    createGrid(const unsigned int Ls);
-    template <typename VType = vComplex>
-    void                    createCoarseGrid(const std::vector<int> &blockSize,
-                                             const unsigned int Ls);
-    template <typename VType = vComplex>
-    GridCartesian *         getGrid(void);
-    template <typename VType = vComplex>
-    GridRedBlackCartesian * getRbGrid(void);
-    template <typename VType = vComplex>
-    GridCartesian *         getCoarseGrid(const std::vector<int> &blockSize);
-    template <typename VType = vComplex>
-    GridCartesian *         getGrid(const unsigned int Ls);
-    template <typename VType = vComplex>
-    GridRedBlackCartesian * getRbGrid(const unsigned int Ls);
-    template <typename VType = vComplex>
-    GridCartesian *         getCoarseGrid(const std::vector<int> &blockSize,
-                                          const unsigned int Ls);
-    std::vector<int>        getDim(void) const;
-    int                     getDim(const unsigned int mu) const;
-    unsigned int            getNd(void) const;
-    double                  getVolume(void) const;
-    // random number generator
-    GridParallelRNG *       get4dRng(void);
-    // general memory management
-    void                    addObject(const std::string name,
-                                      const int moduleAddress = -1);
-    template <typename B, typename T, typename ... Ts>
-    void                    createDerivedObject(const std::string name,
-                                                const Environment::Storage storage,
-                                                const unsigned int Ls,
-                                                Ts && ... args);
-    template <typename T, typename ... Ts>
-    void                    createObject(const std::string name,
-                                         const Environment::Storage storage,
-                                         const unsigned int Ls,
-                                         Ts && ... args);
-    void                    setObjectModule(const unsigned int objAddress,
-                                            const int modAddress);
-    template <typename B, typename T>
-    T *                     getDerivedObject(const unsigned int address) const;
-    template <typename B, typename T>
-    T *                     getDerivedObject(const std::string name) const;
-    template <typename T>
-    T *                     getObject(const unsigned int address) const;
-    template <typename T>
-    T *                     getObject(const std::string name) const;
-    unsigned int            getMaxAddress(void) const;
-    unsigned int            getObjectAddress(const std::string name) const;
-    std::string             getObjectName(const unsigned int address) const;
-    std::string             getObjectType(const unsigned int address) const;
-    std::string             getObjectType(const std::string name) const;
-    Size                    getObjectSize(const unsigned int address) const;
-    Size                    getObjectSize(const std::string name) const;
-    Storage                 getObjectStorage(const unsigned int address) const;
-    Storage                 getObjectStorage(const std::string name) const;
-    int                     getObjectModule(const unsigned int address) const;
-    int                     getObjectModule(const std::string name) const;
-    unsigned int            getObjectLs(const unsigned int address) const;
-    unsigned int            getObjectLs(const std::string name) const;
-    bool                    hasObject(const unsigned int address) const;
-    bool                    hasObject(const std::string name) const;
-    bool                    hasCreatedObject(const unsigned int address) const;
-    bool                    hasCreatedObject(const std::string name) const;
-    bool                    isObject5d(const unsigned int address) const;
-    bool                    isObject5d(const std::string name) const;
-    template <typename T>
-    bool                    isObjectOfType(const unsigned int address) const;
-    template <typename T>
-    bool                    isObjectOfType(const std::string name) const;
-    Environment::Size       getTotalSize(void) const;
-    void                    freeObject(const unsigned int address);
-    void                    freeObject(const std::string name);
-    void                    freeAll(void);
-    void                    protectObjects(const bool protect);
-    bool                    objectsProtected(void) const;
-    // print environment content
-    void                    printContent(void) const;
-private:
-    // general
-    double                              vol_;
-    bool                                protect_{true};
-    // grids
-    std::vector<int>                    dim_;
-    std::map<FineGridKey, GridPt>       grid4d_;
-    std::map<FineGridKey, GridPt>       grid5d_;
-    std::map<FineGridKey, GridRbPt>     gridRb4d_;
-    std::map<FineGridKey, GridRbPt>     gridRb5d_;
-    std::map<CoarseGridKey, GridPt>     gridCoarse4d_;
-    std::map<CoarseGridKey, GridPt>     gridCoarse5d_;
-    unsigned int                        nd_;
-    // random number generator
-    RngPt                               rng4d_{nullptr};
-    // object store
-    std::vector<ObjInfo>                object_;
-    std::map<std::string, unsigned int> objectAddress_;
-};
-
-/******************************************************************************
- *                       Holder template implementation                       *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename T>
-Holder<T>::Holder(T *pt)
-: objPt_(pt)
-{}
-
-// access //////////////////////////////////////////////////////////////////////
-template <typename T>
-T & Holder<T>::get(void) const
-{
-    return *objPt_.get();
-}
-
-template <typename T>
-T * Holder<T>::getPt(void) const
-{
-    return objPt_.get();
-}
-
-template <typename T>
-void Holder<T>::reset(T *pt)
-{
-    objPt_.reset(pt);
-}
-
-/******************************************************************************
- *                     Environment template implementation                    *
- ******************************************************************************/
-// grids ///////////////////////////////////////////////////////////////////////
-#define HADRONS_DUMP_GRID(...)\
-LOG(Debug) << "New grid " << (__VA_ARGS__) << std::endl;\
-LOG(Debug) << " - cb  : " << (__VA_ARGS__)->_isCheckerBoarded << std::endl;\
-LOG(Debug) << " - fdim: " << (__VA_ARGS__)->_fdimensions << std::endl;\
-LOG(Debug) << " - gdim: " << (__VA_ARGS__)->_gdimensions << std::endl;\
-LOG(Debug) << " - ldim: " << (__VA_ARGS__)->_ldimensions << std::endl;\
-LOG(Debug) << " - rdim: " << (__VA_ARGS__)->_rdimensions << std::endl;
-
-template <typename VType>
-void Environment::createGrid(const unsigned int Ls)
-{
-    size_t hash = typeHash<VType>();
-
-    if (grid4d_.find({hash, 1}) == grid4d_.end())
-    {
-        grid4d_[{hash, 1}].reset(
-            SpaceTimeGrid::makeFourDimGrid(getDim(), 
-                                        GridDefaultSimd(getNd(), VType::Nsimd()),
-                                        GridDefaultMpi()));
-        HADRONS_DUMP_GRID(grid4d_[{hash, 1}].get());
-        gridRb4d_[{hash, 1}].reset(
-            SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_[{hash, 1}].get()));
-        HADRONS_DUMP_GRID(gridRb4d_[{hash, 1}].get());
-    }
-    if (grid5d_.find({hash, Ls}) == grid5d_.end())
-    {
-        auto g = grid4d_[{hash, 1}].get();
-        
-        grid5d_[{hash, Ls}].reset(SpaceTimeGrid::makeFiveDimGrid(Ls, g));
-        HADRONS_DUMP_GRID(grid5d_[{hash, Ls}].get());
-        gridRb5d_[{hash, Ls}].reset(SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, g));
-        HADRONS_DUMP_GRID(gridRb5d_[{hash, Ls}].get());
-    }
-}
-
-template <typename VType>
-void Environment::createCoarseGrid(const std::vector<int> &blockSize,
-                                   const unsigned int Ls)
-{
-    int              nd      = getNd();
-    std::vector<int> fineDim = getDim(), coarseDim(nd);
-    unsigned int     cLs;
-    auto             key4d = blockSize, key5d = blockSize;
-    size_t           hash  = typeHash<VType>();
-
-    createGrid(Ls);
-    for (int d = 0; d < coarseDim.size(); d++)
-    {
-        coarseDim[d] = fineDim[d]/blockSize[d];
-        if (coarseDim[d]*blockSize[d] != fineDim[d])
-        {
-            HADRONS_ERROR(Size, "Fine dimension " + std::to_string(d) 
-                         + " (" + std::to_string(fineDim[d]) 
-                         + ") not divisible by coarse dimension ("
-                         + std::to_string(coarseDim[d]) + ")"); 
-        }
-    }
-    if (blockSize.size() > nd)
-    {
-        cLs = Ls/blockSize[nd];
-        if (cLs*blockSize[nd] != Ls)
-        {
-            HADRONS_ERROR(Size, "Fine Ls (" + std::to_string(Ls) 
-                         + ") not divisible by coarse Ls ("
-                         + std::to_string(cLs) + ")");
-        }
-    }
-    else
-    {
-        cLs = Ls;
-    }
-    key4d.resize(nd);
-    key5d.push_back(Ls);
-
-    CoarseGridKey hkey4d = {hash, key4d}, hkey5d = {hash, key5d};
-
-    if (gridCoarse4d_.find(hkey4d) == gridCoarse4d_.end())
-    {
-        gridCoarse4d_[hkey4d].reset(
-            SpaceTimeGrid::makeFourDimGrid(coarseDim, 
-                GridDefaultSimd(nd, VType::Nsimd()), GridDefaultMpi()));
-        HADRONS_DUMP_GRID(gridCoarse4d_[hkey4d].get());
-    }
-    if (gridCoarse5d_.find(hkey5d) == gridCoarse5d_.end())
-    {
-        gridCoarse5d_[hkey5d].reset(
-            SpaceTimeGrid::makeFiveDimGrid(cLs, gridCoarse4d_[hkey4d].get()));
-        HADRONS_DUMP_GRID(gridCoarse5d_[hkey5d].get());
-    }
-}
-
-#undef HADRONS_DUMP_GRID
-
-template <typename VType>
-GridCartesian * Environment::getGrid(void)
-{
-    FineGridKey key = {typeHash<VType>(), 1};
-
-    auto it = grid4d_.find(key);
-
-    if (it != grid4d_.end())
-    {
-        return it->second.get();
-    }
-    else
-    {
-        createGrid<VType>(1);
-
-        return grid4d_.at(key).get();
-    }
-}
-
-template <typename VType>
-GridRedBlackCartesian * Environment::getRbGrid(void)
-{
-    FineGridKey key = {typeHash<VType>(), 1};
-    auto        it  = gridRb4d_.find(key);
-
-    if (it != gridRb4d_.end())
-    {
-        return it->second.get();
-    }
-    else
-    {
-        createGrid<VType>(1);
-
-        return gridRb4d_.at(key).get();
-    }
-}
-
-template <typename VType>
-GridCartesian * Environment::getCoarseGrid(const std::vector<int> &blockSize)
-{
-    std::vector<int> s = blockSize;
-
-    s.resize(getNd());
-
-    CoarseGridKey key = {typeHash<VType>(), s};
-    auto          it  = gridCoarse4d_.find(key);
-
-    if (it != gridCoarse4d_.end())
-    {
-        return it->second.get();
-    }
-    else
-    {
-        createCoarseGrid<VType>(blockSize, 1);
-        
-        return gridCoarse4d_.at(key).get();
-    }
-}
-
-template <typename VType>
-GridCartesian * Environment::getGrid(const unsigned int Ls)
-{
-    FineGridKey key = {typeHash<VType>(), Ls};
-    auto        it  = grid5d_.find(key);
-
-    if (it != grid5d_.end())
-    {
-        return it->second.get();
-    }
-    else
-    {
-        createGrid<VType>(Ls);
-
-        return grid5d_.at(key).get();
-    }
-}
-
-template <typename VType>
-GridRedBlackCartesian * Environment::getRbGrid(const unsigned int Ls)
-{
-    FineGridKey key = {typeHash<VType>(), Ls};
-    auto        it  = gridRb5d_.find(key);
-
-    if (it != gridRb5d_.end())
-    {
-        return it->second.get();
-    }
-    else
-    {
-        createGrid<VType>(Ls);
-
-        return gridRb5d_.at(key).get();
-    }
-}
-
-template <typename VType>
-GridCartesian * Environment::getCoarseGrid(const std::vector<int> &blockSize,
-                                           const unsigned int Ls)
-{
-    std::vector<int> s = blockSize;
-
-    s.push_back(Ls);
-
-    CoarseGridKey key = {typeHash<VType>(), s};
-
-    auto it = gridCoarse5d_.find(key);
-    if (it != gridCoarse5d_.end())
-    {
-        return it->second.get();
-    }
-    else
-    {
-        createCoarseGrid<VType>(blockSize, Ls);
-
-        return gridCoarse5d_.at(key).get();
-    }
-}
-
-
-// general memory management ///////////////////////////////////////////////////
-template <typename B, typename T, typename ... Ts>
-void Environment::createDerivedObject(const std::string name,
-                                      const Environment::Storage storage,
-                                      const unsigned int Ls,
-                                      Ts && ... args)
-{
-    if (!hasObject(name))
-    {
-        addObject(name);
-    }
-    
-    unsigned int address = getObjectAddress(name);
-    
-    if (!object_[address].data or !objectsProtected())
-    {
-        MemoryStats memStats;
-    
-        if (!MemoryProfiler::stats)
-        {
-            MemoryProfiler::stats = &memStats;
-        }
-        size_t initMem               = MemoryProfiler::stats->currentlyAllocated;
-        object_[address].storage     = storage;
-        object_[address].Ls          = Ls;
-        object_[address].data.reset(new Holder<B>(new T(std::forward<Ts>(args)...)));
-        object_[address].size        = MemoryProfiler::stats->maxAllocated - initMem;
-        object_[address].type        = typeIdPt<B>();
-        object_[address].derivedType = typeIdPt<T>();
-        if (MemoryProfiler::stats == &memStats)
-        {
-            MemoryProfiler::stats = nullptr;
-        }
-    }
-    // object already exists, no error if it is a cache, error otherwise
-    else if ((object_[address].storage               != Storage::cache) or 
-             (object_[address].storage               != storage)        or
-             (object_[address].name                  != name)           or
-             (typeHash(object_[address].type)        != typeHash<B>())  or
-             (typeHash(object_[address].derivedType) != typeHash<T>()))
-    {
-        HADRONS_ERROR_REF(ObjectDefinition, "object '" + name + "' already allocated", address);
-    }
-}
-
-template <typename T, typename ... Ts>
-void Environment::createObject(const std::string name, 
-                               const Environment::Storage storage,
-                               const unsigned int Ls,
-                               Ts && ... args)
-{
-    createDerivedObject<T, T>(name, storage, Ls, std::forward<Ts>(args)...);
-}
-
-template <typename B, typename T>
-T * Environment::getDerivedObject(const unsigned int address) const
-{
-    if (hasObject(address))
-    {
-        if (hasCreatedObject(address))
-        {
-            if (auto h = dynamic_cast<Holder<B> *>(object_[address].data.get()))
-            {
-                if (&typeid(T) == &typeid(B))
-                {
-                    return dynamic_cast<T *>(h->getPt());
-                }
-                else
-                {
-                    if (auto hder = dynamic_cast<T *>(h->getPt()))
-                    {
-                        return hder;
-                    }
-                    else
-                    {
-                        HADRONS_ERROR_REF(ObjectType, "object with address " +
-                            std::to_string(address) +
-                            " cannot be casted to '" + typeName(&typeid(T)) +
-                            "' (has type '" + typeName(&typeid(h->get())) + "')", address);
-                    }
-                }
-            }
-            else
-            {
-                HADRONS_ERROR_REF(ObjectType, "object with address " + 
-                            std::to_string(address) +
-                            " does not have type '" + typeName(&typeid(B)) +
-                            "' (has type '" + getObjectType(address) + "')", address);
-            }
-        }
-        else
-        {
-            HADRONS_ERROR_REF(ObjectDefinition, "object with address " + 
-                              std::to_string(address) + " is empty", address);
-        }
-    }
-    else
-    {
-        HADRONS_ERROR_REF(ObjectDefinition, "no object with address " + 
-                          std::to_string(address), address);
-    }
-}
-
-template <typename B, typename T>
-T * Environment::getDerivedObject(const std::string name) const
-{
-    return getDerivedObject<B, T>(getObjectAddress(name));
-}
-
-template <typename T>
-T * Environment::getObject(const unsigned int address) const
-{
-    return getDerivedObject<T, T>(address);
-}
-
-template <typename T>
-T * Environment::getObject(const std::string name) const
-{
-    return getObject<T>(getObjectAddress(name));
-}
-
-template <typename T>
-bool Environment::isObjectOfType(const unsigned int address) const
-{
-    if (hasObject(address))
-    {
-        if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get()))
-        {
-            return true;
-        }
-        else
-        {
-            return false;
-        }
-    }
-    else
-    {
-        HADRONS_ERROR_REF(ObjectDefinition, "no object with address " 
-                          + std::to_string(address), address);
-    }
-}
-
-template <typename T>
-bool Environment::isObjectOfType(const std::string name) const
-{
-    return isObjectOfType<T>(getObjectAddress(name));
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Environment_hpp_
--- a/Hadrons/Exceptions.cc
+++ b/Hadrons/Exceptions.cc
@@ -1,102 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Exceptions.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Hadrons/Exceptions.hpp>
-#include <Hadrons/VirtualMachine.hpp>
-#include <Hadrons/Module.hpp>
-
-#ifndef ERR_SUFF
-#define ERR_SUFF " (" + loc + ")"
-#endif
-
-#define CTOR_EXC(name, init) \
-name::name(std::string msg, std::string loc)\
-:init\
-{}
-
-#define CTOR_EXC_REF(name, init) \
-name::name(std::string msg, std::string loc, const unsigned int address)\
-:init\
-{}
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace Exceptions;
-
-// backtrace cache
-std::vector<std::string> Grid::Hadrons::Exceptions::backtraceStr;
-
-// logic errors
-CTOR_EXC(Logic, logic_error(msg + ERR_SUFF))
-CTOR_EXC(Definition, Logic("definition error: " + msg, loc))
-CTOR_EXC(Implementation, Logic("implementation error: " + msg, loc))
-CTOR_EXC(Range, Logic("range error: " + msg, loc))
-CTOR_EXC(Size, Logic("size error: " + msg, loc))
-
-// runtime errors
-CTOR_EXC(Runtime, runtime_error(msg + ERR_SUFF))
-CTOR_EXC(Argument, Runtime("argument error: " + msg, loc))
-CTOR_EXC(Io, Runtime("IO error: " + msg, loc))
-CTOR_EXC(Memory, Runtime("memory error: " + msg, loc))
-CTOR_EXC(Parsing, Runtime("parsing error: " + msg, loc))
-CTOR_EXC(Program, Runtime("program error: " + msg, loc))
-CTOR_EXC(System, Runtime("system error: " + msg, loc))
-
-// virtual machine errors
-CTOR_EXC_REF(ObjectDefinition, RuntimeRef("object definition error: " + msg, loc, address));
-CTOR_EXC_REF(ObjectType, RuntimeRef("object type error: " + msg, loc, address));
-
-// abort functions
-void Grid::Hadrons::Exceptions::abort(const std::exception& e)
-{
-    auto &vm = VirtualMachine::getInstance();
-    int  mod = vm.getCurrentModule();
-
-    LOG(Error) << "FATAL ERROR -- Exception " << typeName(&typeid(e)) 
-               << std::endl;
-    if (mod >= 0)
-    {
-        LOG(Error) << "During execution of module '"
-                    << vm.getModuleName(mod) << "' (address " << mod << ")"
-                    << std::endl;
-    }
-    LOG(Error) << e.what() << std::endl;
-    if (!backtraceStr.empty())
-    {
-        LOG(Error) << "-- BACKTRACE --------------" << std::endl;
-        for (auto &s: backtraceStr)
-        {
-            LOG(Error) << s << std::endl;
-        }
-        LOG(Error) << "---------------------------" << std::endl;
-    }
-    LOG(Error) << "Aborting program" << std::endl;
-    Grid_finalize();
-
-    exit(EXIT_FAILURE);
-}
--- a/Hadrons/Exceptions.hpp
+++ b/Hadrons/Exceptions.hpp
@@ -1,129 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Exceptions.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Exceptions_hpp_
-#define Hadrons_Exceptions_hpp_
-
-#include <stdexcept>
-#include <execinfo.h>
-#ifndef Hadrons_Global_hpp_
-#include <Hadrons/Global.hpp>
-#endif
-
-#define HADRONS_SRC_LOC std::string(__FUNCTION__) + " at " \
-                        + std::string(__FILE__) + ":" + std::to_string(__LINE__)
-#define HADRONS_BACKTRACE_MAX 128
-#ifdef HAVE_EXECINFO_H
-#define HADRONS_CACHE_BACKTRACE \
-{\
-    void* _callstack[HADRONS_BACKTRACE_MAX];\
-    int _i, _frames = backtrace(_callstack, HADRONS_BACKTRACE_MAX);\
-    char** _strs = backtrace_symbols(_callstack, _frames);\
-    Grid::Hadrons::Exceptions::backtraceStr.clear();\
-    for (_i = 0; _i < _frames; ++_i)\
-    {\
-        Hadrons::Exceptions::backtraceStr.push_back(std::string(_strs[_i]));\
-    }\
-    free(_strs);\
-}
-#else
-#define HADRONS_CACHE_BACKTRACE \
-Grid::Hadrons::Exceptions::backtraceStr.clear();\
-Grid::Hadrons::Exceptions::backtraceStr.push_back("<backtrace not supported>");
-#endif
-
-#define HADRONS_ERROR(exc, msg)\
-HADRONS_CACHE_BACKTRACE \
-throw(Exceptions::exc(msg, HADRONS_SRC_LOC));
-
-#define HADRONS_ERROR_REF(exc, msg, address)\
-HADRONS_CACHE_BACKTRACE \
-throw(Exceptions::exc(msg, HADRONS_SRC_LOC, address));
-
-#define DECL_EXC(name, base) \
-class name: public base\
-{\
-public:\
-    name(std::string msg, std::string loc);\
-}
-
-#define DECL_EXC_REF(name, base) \
-class name: public base\
-{\
-public:\
-    name(std::string msg, std::string loc, const unsigned int address);\
-}
-
-BEGIN_HADRONS_NAMESPACE
-
-namespace Exceptions
-{
-    // backtrace cache
-    extern std::vector<std::string> backtraceStr;
-
-    // logic errors
-    DECL_EXC(Logic, std::logic_error);
-    DECL_EXC(Definition, Logic);
-    DECL_EXC(Implementation, Logic);
-    DECL_EXC(Range, Logic);
-    DECL_EXC(Size, Logic);
-
-    // runtime errors
-    DECL_EXC(Runtime, std::runtime_error);
-    DECL_EXC(Argument, Runtime);
-    DECL_EXC(Io, Runtime);
-    DECL_EXC(Memory, Runtime);
-    DECL_EXC(Parsing, Runtime);
-    DECL_EXC(Program, Runtime);
-    DECL_EXC(System, Runtime);
-
-    // virtual machine errors
-    class RuntimeRef: public Runtime
-    {
-    public:
-        RuntimeRef(std::string msg, std::string loc, const unsigned int address)
-        : Runtime(msg, loc), address_(address)
-        {}
-        unsigned int getAddress(void) const
-        {
-            return address_;
-        }
-    private:
-        unsigned int address_;
-    };
-
-    DECL_EXC_REF(ObjectDefinition, RuntimeRef);
-    DECL_EXC_REF(ObjectType, RuntimeRef);
-
-    // abort functions
-    void abort(const std::exception& e);
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Exceptions_hpp_
--- a/Hadrons/Global.cc
+++ b/Hadrons/Global.cc
@@ -1,214 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Global.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Hadrons/Global.hpp>
-
-using namespace Grid;
-using namespace QCD;
-using namespace Hadrons;
-
-HadronsLogger Hadrons::HadronsLogError(1,"Error");
-HadronsLogger Hadrons::HadronsLogWarning(1,"Warning");
-HadronsLogger Hadrons::HadronsLogMessage(1,"Message");
-HadronsLogger Hadrons::HadronsLogIterative(1,"Iterative");
-HadronsLogger Hadrons::HadronsLogDebug(1,"Debug");
-HadronsLogger Hadrons::HadronsLogIRL(1,"IRL");
-
-void Hadrons::initLogger(void)
-{
-    auto w  = std::string("Hadrons").length();
-    int  cw = 8;
-
-
-    GridLogError.setTopWidth(w);
-    GridLogWarning.setTopWidth(w);
-    GridLogMessage.setTopWidth(w);
-    GridLogIterative.setTopWidth(w);
-    GridLogDebug.setTopWidth(w);
-    GridLogIRL.setTopWidth(w);
-    GridLogError.setChanWidth(cw);
-    GridLogWarning.setChanWidth(cw);
-    GridLogMessage.setChanWidth(cw);
-    GridLogIterative.setChanWidth(cw);
-    GridLogDebug.setChanWidth(cw);
-    GridLogIRL.setChanWidth(cw);
-    HadronsLogError.Active(true);
-    HadronsLogWarning.Active(true);
-    HadronsLogMessage.Active(GridLogMessage.isActive());
-    HadronsLogIterative.Active(GridLogIterative.isActive());
-    HadronsLogDebug.Active(GridLogDebug.isActive());
-    HadronsLogIRL.Active(GridLogIRL.isActive());
-    HadronsLogError.setChanWidth(cw);
-    HadronsLogWarning.setChanWidth(cw);
-    HadronsLogMessage.setChanWidth(cw);
-    HadronsLogIterative.setChanWidth(cw);
-    HadronsLogDebug.setChanWidth(cw);
-    HadronsLogIRL.setChanWidth(cw);
-}
-
-// type utilities //////////////////////////////////////////////////////////////
-size_t Hadrons::typeHash(const std::type_info *info)
-{
-    return info->hash_code();
-}
-
-constexpr unsigned int maxNameSize = 1024u;
-
-std::string Hadrons::typeName(const std::type_info *info)
-{
-    char        *buf;
-    std::string name;
-    
-    buf  = abi::__cxa_demangle(info->name(), nullptr, nullptr, nullptr);
-    name = buf;
-    free(buf);
-    
-    return name;
-}
-
-// default writers/readers /////////////////////////////////////////////////////
-#ifdef HAVE_HDF5
-const std::string Hadrons::resultFileExt = "h5";
-#else
-const std::string Hadrons::resultFileExt = "xml";
-#endif
-
-// recursive mkdir /////////////////////////////////////////////////////////////
-int Hadrons::mkdir(const std::string dirName)
-{
-    if (!dirName.empty() and access(dirName.c_str(), R_OK|W_OK|X_OK))
-    {
-        mode_t mode755;
-        char   tmp[MAX_PATH_LENGTH];
-        char   *p = NULL;
-        size_t len;
-
-        mode755 = S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
-
-        snprintf(tmp, sizeof(tmp), "%s", dirName.c_str());
-        len = strlen(tmp);
-        if(tmp[len - 1] == '/')
-        {
-            tmp[len - 1] = 0;
-        }
-        for(p = tmp + 1; *p; p++)
-        {
-            if(*p == '/')
-            {
-                *p = 0;
-                ::mkdir(tmp, mode755);
-                *p = '/';
-            }
-        }
-
-        return ::mkdir(tmp, mode755);
-    }
-    else
-    {
-        return 0;
-    }
-}
-
-std::string Hadrons::basename(const std::string &s)
-{
-    constexpr char sep = '/';
-    size_t         i   = s.rfind(sep, s.length());
-    
-    if (i != std::string::npos)
-    {
-        return s.substr(i+1, s.length() - i);
-    }
-    else
-    {
-        return s;
-    }
-}
-
-std::string Hadrons::dirname(const std::string &s)
-{
-    constexpr char sep = '/';
-    size_t         i   = s.rfind(sep, s.length());
-    
-    if (i != std::string::npos)
-    {
-        return s.substr(0, i);
-    }
-    else
-    {
-        return "";
-    }
-}
-
-void Hadrons::makeFileDir(const std::string filename, GridBase *g)
-{
-    bool doIt = true;
-
-    if (g)
-    {
-        doIt = g->IsBoss();
-    }
-    if (doIt)
-    {
-        std::string dir    = dirname(filename);
-        int         status = mkdir(dir);
-
-        if (status)
-        {
-            HADRONS_ERROR(Io, "cannot create directory '" + dir
-                          + "' ( " + std::strerror(errno) + ")");
-        }
-    }
-}
-
-void Hadrons::printTimeProfile(const std::map<std::string, GridTime> &timing, 
-                               GridTime total)
-{
-    typedef decltype(total.count()) Count;
-
-    std::map<Count, std::string, std::greater<Count>> rtiming;
-    const double dtotal = static_cast<double>(total.count());
-    auto cf = std::cout.flags();
-    auto p  = std::cout.precision();
-    unsigned int width = 0;
-
-    for (auto &t: timing)
-    {
-        width = std::max(width, static_cast<unsigned int>(t.first.length()));
-        rtiming[t.second.count()] = t.first;
-    }
-    for (auto &rt: rtiming)
-    {
-        LOG(Message) << std::setw(width) << rt.second << ": " 
-                     << rt.first << " us (" << std::fixed 
-                     << std::setprecision(1) 
-                     << static_cast<double>(rt.first)/dtotal*100 << "%)"
-                     << std::endl;
-    }
-    std::cout.flags(cf);
-    std::cout.precision(p);
-}
--- a/Hadrons/Global.hpp
+++ b/Hadrons/Global.hpp
@@ -1,283 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Global.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Lanny91 <andrew.lawson@gmail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Global_hpp_
-#define Hadrons_Global_hpp_
-
-#include <set>
-#include <stack>
-#include <regex>
-#include <Grid/Grid.h>
-#include <cxxabi.h>
-
-#ifndef SITE_SIZE_TYPE
-#define SITE_SIZE_TYPE size_t
-#endif
-
-#ifndef DEFAULT_ASCII_PREC
-#define DEFAULT_ASCII_PREC 16
-#endif
-
-#define ARG(...) __VA_ARGS__
-
-/* the 'using Grid::operator<<;' statement prevents a very nasty compilation
- * error with GCC 5 (clang & GCC 6 compile fine without it).
- */
-
-#define BEGIN_HADRONS_NAMESPACE \
-namespace Grid {\
-using namespace QCD;\
-namespace Hadrons {\
-using Grid::operator<<;\
-using Grid::operator>>;
-#define END_HADRONS_NAMESPACE }}
-
-#define BEGIN_MODULE_NAMESPACE(name)\
-namespace name {\
-using Grid::operator<<;\
-using Grid::operator>>;
-
-#define END_MODULE_NAMESPACE }
-
-#define _HADRONS_IMPL(impl, sub) impl##sub
-#define HADRONS_IMPL(impl, sub)   _HADRONS_IMPL(impl, sub)
-
-#ifndef FIMPLBASE
-#define FIMPLBASE WilsonImpl
-#endif
-#define FIMPL  HADRONS_IMPL(FIMPLBASE, R)
-#define FIMPLF HADRONS_IMPL(FIMPLBASE, F)
-#define FIMPLD HADRONS_IMPL(FIMPLBASE, D)
-
-#ifndef ZFIMPLBASE
-#define ZFIMPLBASE ZWilsonImpl
-#endif
-#define ZFIMPL  HADRONS_IMPL(ZFIMPLBASE, R)
-#define ZFIMPLF HADRONS_IMPL(ZFIMPLBASE, F)
-#define ZFIMPLD HADRONS_IMPL(ZFIMPLBASE, D)
-
-#ifndef SIMPLBASE
-#define SIMPLBASE ScalarImplC
-#endif
-#define SIMPL  HADRONS_IMPL(SIMPLBASE, R)
-#define SIMPLF HADRONS_IMPL(SIMPLBASE, F)
-#define SIMPLD HADRONS_IMPL(SIMPLBASE, D)
-
-#ifndef GIMPLBASE
-#define GIMPLBASE PeriodicGimpl
-#endif
-#define GIMPL  HADRONS_IMPL(GIMPLBASE, R)
-#define GIMPLF HADRONS_IMPL(GIMPLBASE, F)
-#define GIMPLD HADRONS_IMPL(GIMPLBASE, D)
-
-BEGIN_HADRONS_NAMESPACE
-
-// type aliases
-#define BASIC_TYPE_ALIASES(Impl, suffix)\
-typedef typename Impl::Field                         ScalarField##suffix;\
-typedef typename Impl::PropagatorField               PropagatorField##suffix;\
-typedef typename Impl::SitePropagator::scalar_object SitePropagator##suffix;\
-typedef typename Impl::ComplexField                  ComplexField##suffix;\
-typedef std::vector<SitePropagator##suffix>          SlicedPropagator##suffix;\
-typedef std::vector<typename ComplexField##suffix::vector_object::scalar_object> SlicedComplex##suffix;
-
-#define FERM_TYPE_ALIASES(FImpl, suffix)\
-BASIC_TYPE_ALIASES(FImpl, suffix);\
-typedef FermionOperator<FImpl>                     FMat##suffix;\
-typedef typename FImpl::FermionField               FermionField##suffix;\
-typedef typename FImpl::GaugeField                 GaugeField##suffix;\
-typedef typename FImpl::DoubledGaugeField          DoubledGaugeField##suffix;\
-typedef Lattice<iSpinMatrix<typename FImpl::Simd>> SpinMatrixField##suffix;
-
-#define GAUGE_TYPE_ALIASES(GImpl, suffix)\
-typedef typename GImpl::GaugeField GaugeField##suffix;
-
-#define SOLVER_TYPE_ALIASES(FImpl, suffix)\
-typedef Solver<FImpl> Solver##suffix;
-
-#define SINK_TYPE_ALIASES(suffix)\
-typedef std::function<SlicedPropagator##suffix\
-                      (const PropagatorField##suffix &)> SinkFn##suffix;
-
-// logger
-class HadronsLogger: public Logger
-{
-public:
-    HadronsLogger(int on, std::string nm): Logger("Hadrons", on, nm,
-                                                  GridLogColours, "BLACK"){};
-};
-
-#define LOG(channel) std::cout << HadronsLog##channel
-#define HADRONS_DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl;
-
-extern HadronsLogger HadronsLogError;
-extern HadronsLogger HadronsLogWarning;
-extern HadronsLogger HadronsLogMessage;
-extern HadronsLogger HadronsLogIterative;
-extern HadronsLogger HadronsLogDebug;
-extern HadronsLogger HadronsLogIRL;
-
-void initLogger(void);
-
-// singleton pattern
-#define SINGLETON(name)\
-public:\
-    name(const name &e) = delete;\
-    void operator=(const name &e) = delete;\
-    static name & getInstance(void)\
-    {\
-        static name e;\
-        return e;\
-    }\
-private:\
-    name(void);
-
-#define SINGLETON_DEFCTOR(name)\
-public:\
-    name(const name &e) = delete;\
-    void operator=(const name &e) = delete;\
-    static name & getInstance(void)\
-    {\
-        static name e;\
-        return e;\
-    }\
-private:\
-    name(void) = default;
-
-// type utilities
-template <typename T>
-const std::type_info * typeIdPt(const T &x)
-{
-    return &typeid(x);
-}
-
-template <typename T>
-const std::type_info * typeIdPt(void)
-{
-    return &typeid(T);
-}
-
-size_t typeHash(const std::type_info *info);
-
-template <typename T>
-size_t typeHash(const T &x)
-{
-    return typeHash(typeIdPt(x));
-}
-
-template <typename T>
-size_t typeHash(void)
-{
-    return typeHash(typeIdPt<T>());
-}
-
-std::string typeName(const std::type_info *info);
-
-template <typename T>
-std::string typeName(const T &x)
-{
-    return typeName(typeIdPt(x));
-}
-
-template <typename T>
-std::string typeName(void)
-{
-    return typeName(typeIdPt<T>());
-}
-
-// default writers/readers
-extern const std::string resultFileExt;
-
-#ifdef HAVE_HDF5
-typedef Hdf5Reader ResultReader;
-typedef Hdf5Writer ResultWriter;
-#else
-typedef XmlReader ResultReader;
-typedef XmlWriter ResultWriter;
-#endif
-
-#define RESULT_FILE_NAME(name, traj) \
-name + "." + std::to_string(traj) + "." + resultFileExt
-
-// recursive mkdir
-#define MAX_PATH_LENGTH 512u
-int         mkdir(const std::string dirName);
-std::string basename(const std::string &s);
-std::string dirname(const std::string &s);
-void        makeFileDir(const std::string filename, GridBase *g = nullptr);
-
-// default Schur convention
-#ifndef HADRONS_DEFAULT_SCHUR 
-#define HADRONS_DEFAULT_SCHUR DiagTwo
-#endif
-#define _HADRONS_SCHUR_OP_(conv) Schur##conv##Operator
-#define HADRONS_SCHUR_OP(conv) _HADRONS_SCHUR_OP_(conv)
-#define HADRONS_DEFAULT_SCHUR_OP HADRONS_SCHUR_OP(HADRONS_DEFAULT_SCHUR)
-#define _HADRONS_SCHUR_SOLVE_(conv) SchurRedBlack##conv##Solve
-#define HADRONS_SCHUR_SOLVE(conv) _HADRONS_SCHUR_SOLVE_(conv)
-#define HADRONS_DEFAULT_SCHUR_SOLVE HADRONS_SCHUR_SOLVE(HADRONS_DEFAULT_SCHUR)
-#define _HADRONS_SCHUR_A2A_(conv) A2AVectorsSchur##conv
-#define HADRONS_SCHUR_A2A(conv) _HADRONS_SCHUR_A2A_(conv)
-#define HADRONS_DEFAULT_SCHUR_A2A HADRONS_SCHUR_A2A(HADRONS_DEFAULT_SCHUR)
-
-// stringify macro
-#define _HADRONS_STR(x) #x
-#define HADRONS_STR(x) _HADRONS_STR(x)
-
-// pretty print time profile
-void printTimeProfile(const std::map<std::string, GridTime> &timing, GridTime total);
-
-// token replacement utility
-template <typename T>
-void tokenReplace(std::string &str, const std::string token,
-                  const T &x, const std::string mark = "@")
-{
-    std::string fullToken = mark + token + mark;
-    
-    auto pos = str.find(fullToken);
-    if (pos != std::string::npos)
-    {
-        str.replace(pos, fullToken.size(), std::to_string(x));
-    }
-}
-
-// generic correlator class
-template <typename Metadata, typename Scalar = Complex>
-struct Correlator: Serializable
-{
-    GRID_SERIALIZABLE_CLASS_MEMBERS(ARG(Correlator<Metadata, Scalar>),
-                                    Metadata,             info,
-                                    std::vector<Complex>, corr);
-};
-
-END_HADRONS_NAMESPACE
-
-#include <Hadrons/Exceptions.hpp>
-
-#endif // Hadrons_Global_hpp_
--- a/Hadrons/Makefile.am
+++ b/Hadrons/Makefile.am
@@ -1,37 +0,0 @@
-SUBDIRS = . Utilities
-
-lib_LIBRARIES = libHadrons.a
-
-include modules.inc
-
-libHadrons_a_SOURCES = \
-    Application.cc     \
-    Environment.cc     \
-	Exceptions.cc      \
-    Global.cc          \
-    Module.cc		   \
-	TimerArray.cc      \
-	VirtualMachine.cc  \
-	$(modules_cc)
-	
-libHadrons_adir = $(includedir)/Hadrons
-nobase_libHadrons_a_HEADERS = \
-	A2AVectors.hpp            \
-	A2AMatrix.hpp             \
-	Application.hpp           \
-	DilutedNoise.hpp          \
-	DiskVector.hpp            \
-	EigenPack.hpp             \
-	Environment.hpp           \
-	Exceptions.hpp            \
-	Factory.hpp               \
-	GeneticScheduler.hpp      \
-	Global.hpp                \
-	Graph.hpp                 \
-	Module.hpp                \
-	Modules.hpp               \
-	ModuleFactory.hpp         \
-	Solver.hpp                \
-	TimerArray.hpp            \
-	VirtualMachine.hpp        \
-	$(modules_hpp)
--- a/Hadrons/Modules.hpp
+++ b/Hadrons/Modules.hpp
@@ -1,71 +0,0 @@
-#include <Hadrons/Modules/MSource/Gauss.hpp>
-#include <Hadrons/Modules/MSource/Momentum.hpp>
-#include <Hadrons/Modules/MSource/SeqAslash.hpp>
-#include <Hadrons/Modules/MSource/Z2.hpp>
-#include <Hadrons/Modules/MSource/Point.hpp>
-#include <Hadrons/Modules/MSource/SeqGamma.hpp>
-#include <Hadrons/Modules/MSource/Convolution.hpp>
-#include <Hadrons/Modules/MSource/Wall.hpp>
-#include <Hadrons/Modules/MSource/SeqConserved.hpp>
-#include <Hadrons/Modules/MScalarSUN/Div.hpp>
-#include <Hadrons/Modules/MScalarSUN/TrKinetic.hpp>
-#include <Hadrons/Modules/MScalarSUN/TrPhi.hpp>
-#include <Hadrons/Modules/MScalarSUN/TwoPoint.hpp>
-#include <Hadrons/Modules/MScalarSUN/Grad.hpp>
-#include <Hadrons/Modules/MScalarSUN/Utils.hpp>
-#include <Hadrons/Modules/MScalarSUN/StochFreeField.hpp>
-#include <Hadrons/Modules/MScalarSUN/EMT.hpp>
-#include <Hadrons/Modules/MScalarSUN/TrMag.hpp>
-#include <Hadrons/Modules/MScalarSUN/TwoPointNPR.hpp>
-#include <Hadrons/Modules/MScalarSUN/TransProj.hpp>
-#include <Hadrons/Modules/MNoise/TimeDilutedSpinColorDiagonal.hpp>
-#include <Hadrons/Modules/MNoise/FullVolumeSpinColorDiagonal.hpp>
-#include <Hadrons/Modules/MScalar/FreeProp.hpp>
-#include <Hadrons/Modules/MScalar/Scalar.hpp>
-#include <Hadrons/Modules/MScalar/ChargedProp.hpp>
-#include <Hadrons/Modules/MAction/Wilson.hpp>
-#include <Hadrons/Modules/MAction/ScaledDWF.hpp>
-#include <Hadrons/Modules/MAction/MobiusDWF.hpp>
-#include <Hadrons/Modules/MAction/WilsonClover.hpp>
-#include <Hadrons/Modules/MAction/ZMobiusDWF.hpp>
-#include <Hadrons/Modules/MAction/DWF.hpp>
-#include <Hadrons/Modules/MGauge/UnitEm.hpp>
-#include <Hadrons/Modules/MGauge/Electrify.hpp>
-#include <Hadrons/Modules/MGauge/StoutSmearing.hpp>
-#include <Hadrons/Modules/MGauge/Random.hpp>
-#include <Hadrons/Modules/MGauge/FundtoHirep.hpp>
-#include <Hadrons/Modules/MGauge/GaugeFix.hpp>
-#include <Hadrons/Modules/MGauge/Unit.hpp>
-#include <Hadrons/Modules/MGauge/StochEm.hpp>
-#include <Hadrons/Modules/MUtilities/RandomVectors.hpp>
-#include <Hadrons/Modules/MUtilities/PrecisionCast.hpp>
-#include <Hadrons/Modules/MIO/LoadCosmHol.hpp>
-#include <Hadrons/Modules/MIO/LoadA2AVectors.hpp>
-#include <Hadrons/Modules/MIO/LoadEigenPack.hpp>
-#include <Hadrons/Modules/MIO/LoadNersc.hpp>
-#include <Hadrons/Modules/MIO/LoadBinary.hpp>
-#include <Hadrons/Modules/MIO/LoadCoarseEigenPack.hpp>
-#include <Hadrons/Modules/MContraction/WeakEye3pt.hpp>
-#include <Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp>
-#include <Hadrons/Modules/MContraction/Gamma3pt.hpp>
-#include <Hadrons/Modules/MContraction/A2AMesonField.hpp>
-#include <Hadrons/Modules/MContraction/A2ALoop.hpp>
-#include <Hadrons/Modules/MContraction/WeakNonEye3pt.hpp>
-#include <Hadrons/Modules/MContraction/DiscLoop.hpp>
-#include <Hadrons/Modules/MContraction/A2AAslashField.hpp>
-#include <Hadrons/Modules/MContraction/Baryon.hpp>
-#include <Hadrons/Modules/MContraction/Meson.hpp>
-#include <Hadrons/Modules/MNPR/FourQuark.hpp>
-#include <Hadrons/Modules/MNPR/Bilinear.hpp>
-#include <Hadrons/Modules/MNPR/Amputate.hpp>
-#include <Hadrons/Modules/MSolver/A2AAslashVectors.hpp>
-#include <Hadrons/Modules/MSolver/RBPrecCG.hpp>
-#include <Hadrons/Modules/MSolver/Guesser.hpp>
-#include <Hadrons/Modules/MSolver/LocalCoherenceLanczos.hpp>
-#include <Hadrons/Modules/MSolver/A2AVectors.hpp>
-#include <Hadrons/Modules/MSolver/MixedPrecisionRBPrecCG.hpp>
-#include <Hadrons/Modules/MFermion/FreeProp.hpp>
-#include <Hadrons/Modules/MFermion/GaugeProp.hpp>
-#include <Hadrons/Modules/MFermion/EMLepton.hpp>
-#include <Hadrons/Modules/MSink/Smear.hpp>
-#include <Hadrons/Modules/MSink/Point.hpp>
--- a/Hadrons/Modules/MAction/DWF.cc
+++ b/Hadrons/Modules/MAction/DWF.cc
@@ -1,37 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MAction/DWF.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MAction/DWF.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MAction;
-
-template class Grid::Hadrons::MAction::TDWF<FIMPL>;
-#ifdef GRID_DEFAULT_PRECISION_DOUBLE
-template class Grid::Hadrons::MAction::TDWF<FIMPLF>;
-#endif
--- a/Hadrons/Modules/MAction/MobiusDWF.cc
+++ b/Hadrons/Modules/MAction/MobiusDWF.cc
@@ -1,37 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MAction/MobiusDWF.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MAction/MobiusDWF.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MAction;
-
-template class Grid::Hadrons::MAction::TMobiusDWF<FIMPL>;
-#ifdef GRID_DEFAULT_PRECISION_DOUBLE
-template class Grid::Hadrons::MAction::TMobiusDWF<FIMPLF>;
-#endif
--- a/Hadrons/Modules/MAction/MobiusDWF.hpp
+++ b/Hadrons/Modules/MAction/MobiusDWF.hpp
@@ -1,156 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MAction/MobiusDWF.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_MAction_MobiusDWF_hpp_
-#define Hadrons_MAction_MobiusDWF_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                      Mobius domain-wall fermion action                     *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MAction)
-
-class MobiusDWFPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(MobiusDWFPar,
-                                    std::string , gauge,
-                                    unsigned int, Ls,
-                                    double      , mass,
-                                    double      , M5,
-                                    double      , b,
-                                    double      , c,
-                                    std::string , boundary,
-                                    std::string , twist);
-};
-
-template <typename FImpl>
-class TMobiusDWF: public Module<MobiusDWFPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TMobiusDWF(const std::string name);
-    // destructor
-    virtual ~TMobiusDWF(void) {};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(MobiusDWF, TMobiusDWF<FIMPL>, MAction);
-#ifdef GRID_DEFAULT_PRECISION_DOUBLE
-MODULE_REGISTER_TMP(MobiusDWFF, TMobiusDWF<FIMPLF>, MAction);
-#endif
-
-/******************************************************************************
- *                      TMobiusDWF implementation                             *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TMobiusDWF<FImpl>::TMobiusDWF(const std::string name)
-: Module<MobiusDWFPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TMobiusDWF<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().gauge};
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TMobiusDWF<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TMobiusDWF<FImpl>::setup(void)
-{
-    LOG(Message) << "Setting up Mobius domain wall fermion matrix with m= "
-                 << par().mass << ", M5= " << par().M5 << ", Ls= " << par().Ls 
-                 << ", b= " << par().b << ", c= " << par().c
-                 << " using gauge field '" << par().gauge << "'"
-                 << std::endl;
-                 
-    auto &U    = envGet(GaugeField, par().gauge);
-    auto &g4   = *envGetGrid(FermionField);
-    auto &grb4 = *envGetRbGrid(FermionField);
-    auto &g5   = *envGetGrid(FermionField, par().Ls);
-    auto &grb5 = *envGetRbGrid(FermionField, par().Ls);
-    typename MobiusFermion<FImpl>::ImplParams implParams;
-    if (!par().boundary.empty())
-    {
-        implParams.boundary_phases = strToVec<Complex>(par().boundary);
-    }
-    if (!par().twist.empty())
-    {
-        implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
-    }
-    LOG(Message) << "Fermion boundary conditions: " << implParams.boundary_phases
-                 << std::endl;
-    LOG(Message) << "Twists: " << implParams.twist_n_2pi_L
-                 << std::endl;
-    if (implParams.boundary_phases.size() != env().getNd())
-    {
-        HADRONS_ERROR(Size, "Wrong number of boundary phase");
-    }
-    if (implParams.twist_n_2pi_L.size() != env().getNd())
-    {
-        HADRONS_ERROR(Size, "Wrong number of twist");
-    }
-    envCreateDerived(FMat, MobiusFermion<FImpl>, getName(), par().Ls, U, g5,
-                     grb5, g4, grb4, par().mass, par().M5, par().b, par().c,
-                     implParams);
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TMobiusDWF<FImpl>::execute(void)
-{}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_MAction_MobiusDWF_hpp_
--- a/Hadrons/Modules/MAction/ScaledDWF.cc
+++ b/Hadrons/Modules/MAction/ScaledDWF.cc
@@ -1,37 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MAction/ScaledDWF.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MAction/ScaledDWF.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MAction;
-
-template class Grid::Hadrons::MAction::TScaledDWF<FIMPL>;
-#ifdef GRID_DEFAULT_PRECISION_DOUBLE
-template class Grid::Hadrons::MAction::TScaledDWF<FIMPLF>;
-#endif
--- a/Hadrons/Modules/MAction/ScaledDWF.hpp
+++ b/Hadrons/Modules/MAction/ScaledDWF.hpp
@@ -1,155 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MAction/ScaledDWF.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_MAction_ScaledDWF_hpp_
-#define Hadrons_MAction_ScaledDWF_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                      Scaled domain wall fermion                            *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MAction)
-
-class ScaledDWFPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(ScaledDWFPar,
-                                    std::string , gauge,
-                                    unsigned int, Ls,
-                                    double      , mass,
-                                    double      , M5,
-                                    double      , scale,
-                                    std::string , boundary,
-                                    std::string , twist);
-};
-
-template <typename FImpl>
-class TScaledDWF: public Module<ScaledDWFPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TScaledDWF(const std::string name);
-    // destructor
-    virtual ~TScaledDWF(void) {};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(ScaledDWF, TScaledDWF<FIMPL>, MAction);
-#ifdef GRID_DEFAULT_PRECISION_DOUBLE
-MODULE_REGISTER_TMP(ScaledDWFF, TScaledDWF<FIMPLF>, MAction);
-#endif
-
-/******************************************************************************
- *                      TScaledDWF implementation                             *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TScaledDWF<FImpl>::TScaledDWF(const std::string name)
-: Module<ScaledDWFPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TScaledDWF<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().gauge};
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TScaledDWF<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TScaledDWF<FImpl>::setup(void)
-{
-    LOG(Message) << "Setting up scaled domain wall fermion matrix with m= "
-                 << par().mass << ", M5= " << par().M5 << ", Ls= " << par().Ls 
-                 << ", scale= " << par().scale
-                 << " using gauge field '" << par().gauge << "'"
-                 << std::endl;
-
-    auto &U    = envGet(GaugeField, par().gauge);
-    auto &g4   = *envGetGrid(FermionField);
-    auto &grb4 = *envGetRbGrid(FermionField);
-    auto &g5   = *envGetGrid(FermionField, par().Ls);
-    auto &grb5 = *envGetRbGrid(FermionField, par().Ls);
-    typename ScaledShamirFermion<FImpl>::ImplParams implParams;
-    if (!par().boundary.empty())
-    {
-        implParams.boundary_phases = strToVec<Complex>(par().boundary);
-    }
-    if (!par().twist.empty())
-    {
-        implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
-    }
-    LOG(Message) << "Fermion boundary conditions: " << implParams.boundary_phases
-                 << std::endl;
-    LOG(Message) << "Twists: " << implParams.twist_n_2pi_L
-                 << std::endl;
-    if (implParams.boundary_phases.size() != env().getNd())
-    {
-        HADRONS_ERROR(Size, "Wrong number of boundary phase");
-    }
-    if (implParams.twist_n_2pi_L.size() != env().getNd())
-    {
-        HADRONS_ERROR(Size, "Wrong number of twist");
-    }
-    envCreateDerived(FMat, ScaledShamirFermion<FImpl>, getName(), par().Ls, U, g5,
-                     grb5, g4, grb4, par().mass, par().M5, par().scale,
-                     implParams);
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TScaledDWF<FImpl>::execute(void)
-{}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_MAction_ScaledDWF_hpp_
--- a/Hadrons/Modules/MAction/Wilson.cc
+++ b/Hadrons/Modules/MAction/Wilson.cc
@@ -1,37 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MAction/Wilson.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MAction/Wilson.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MAction;
-
-template class Grid::Hadrons::MAction::TWilson<FIMPL>;
-#ifdef GRID_DEFAULT_PRECISION_DOUBLE
-template class Grid::Hadrons::MAction::TWilson<FIMPLF>;
-#endif
--- a/Hadrons/Modules/MAction/WilsonClover.cc
+++ b/Hadrons/Modules/MAction/WilsonClover.cc
@@ -1,37 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MAction/WilsonClover.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MAction/WilsonClover.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MAction;
-
-template class Grid::Hadrons::MAction::TWilsonClover<FIMPL>;
-#ifdef GRID_DEFAULT_PRECISION_DOUBLE
-template class Grid::Hadrons::MAction::TWilsonClover<FIMPLF>;
-#endif
--- a/Hadrons/Modules/MAction/WilsonClover.hpp
+++ b/Hadrons/Modules/MAction/WilsonClover.hpp
@@ -1,157 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MAction/WilsonClover.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Guido Cossu <guido.cossu@ed.ac.uk>
-Author: pretidav <david.preti@csic.es>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_MAction_WilsonClover_hpp_
-#define Hadrons_MAction_WilsonClover_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         Wilson clover quark action                         *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MAction)
-
-class WilsonCloverPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonCloverPar,
-                                    std::string, gauge,
-                                    double     , mass,
-				                    double     , csw_r,
-				                    double     , csw_t,
-				                    WilsonAnisotropyCoefficients ,clover_anisotropy,
-                                    std::string, boundary,
-                                    std::string, twist
-				    );
-};
-
-template <typename FImpl>
-class TWilsonClover: public Module<WilsonCloverPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TWilsonClover(const std::string name);
-    // destructor
-    virtual ~TWilsonClover(void) {};
-    // dependencies/products
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(WilsonClover, TWilsonClover<FIMPL>, MAction);
-#ifdef GRID_DEFAULT_PRECISION_DOUBLE
-MODULE_REGISTER_TMP(WilsonCloverF, TWilsonClover<FIMPLF>, MAction);
-#endif
-
-/******************************************************************************
- *                    TWilsonClover template implementation                   *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TWilsonClover<FImpl>::TWilsonClover(const std::string name)
-: Module<WilsonCloverPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TWilsonClover<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().gauge};
-
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TWilsonClover<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TWilsonClover<FImpl>::setup(void)
-{
-    LOG(Message) << "Setting up Wilson clover fermion matrix with m= " << par().mass
-                 << " using gauge field '" << par().gauge << "'" << std::endl;
-    LOG(Message) << "Clover term csw_r: " << par().csw_r
-                 << " csw_t: " << par().csw_t
-                 << std::endl;
-                 
-    auto &U      = envGet(GaugeField, par().gauge);
-    auto &grid   = *envGetGrid(FermionField);
-    auto &gridRb = *envGetRbGrid(FermionField);
-    typename WilsonCloverFermion<FImpl>::ImplParams implParams;
-    if (!par().boundary.empty())
-    {
-        implParams.boundary_phases = strToVec<Complex>(par().boundary);
-    }
-    if (!par().twist.empty())
-    {
-        implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
-    }
-    LOG(Message) << "Fermion boundary conditions: " << implParams.boundary_phases
-                 << std::endl;
-    LOG(Message) << "Twists: " << implParams.twist_n_2pi_L
-                 << std::endl;
-    if (implParams.boundary_phases.size() != env().getNd())
-    {
-        HADRONS_ERROR(Size, "Wrong number of boundary phase");
-    }
-    if (implParams.twist_n_2pi_L.size() != env().getNd())
-    {
-        HADRONS_ERROR(Size, "Wrong number of twist");
-    }
-    envCreateDerived(FMat, WilsonCloverFermion<FImpl>, getName(), 1, U, grid,
-                     gridRb, par().mass, par().csw_r, par().csw_t, 
-                     par().clover_anisotropy, implParams); 
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TWilsonClover<FImpl>::execute()
-{}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_WilsonClover_hpp_
--- a/Hadrons/Modules/MAction/ZMobiusDWF.cc
+++ b/Hadrons/Modules/MAction/ZMobiusDWF.cc
@@ -1,37 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MAction/ZMobiusDWF.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MAction/ZMobiusDWF.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MAction;
-
-template class Grid::Hadrons::MAction::TZMobiusDWF<ZFIMPL>;
-#ifdef GRID_DEFAULT_PRECISION_DOUBLE
-template class Grid::Hadrons::MAction::TZMobiusDWF<ZFIMPLF>;
-#endif
--- a/Hadrons/Modules/MAction/ZMobiusDWF.hpp
+++ b/Hadrons/Modules/MAction/ZMobiusDWF.hpp
@@ -1,163 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MAction/ZMobiusDWF.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_MAction_ZMobiusDWF_hpp_
-#define Hadrons_MAction_ZMobiusDWF_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                      z-Mobius domain-wall fermion action                   *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MAction)
-
-class ZMobiusDWFPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(ZMobiusDWFPar,
-                                    std::string                      , gauge,
-                                    unsigned int                     , Ls,
-                                    double                           , mass,
-                                    double                           , M5,
-                                    double                           , b,
-                                    double                           , c,
-                                    std::vector<std::complex<double>>, omega,
-                                    std::string                      , boundary,
-                                    std::string                      , twist);
-};
-
-template <typename FImpl>
-class TZMobiusDWF: public Module<ZMobiusDWFPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TZMobiusDWF(const std::string name);
-    // destructor
-    virtual ~TZMobiusDWF(void) {};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(ZMobiusDWF, TZMobiusDWF<ZFIMPL>, MAction);
-#ifdef GRID_DEFAULT_PRECISION_DOUBLE
-MODULE_REGISTER_TMP(ZMobiusDWFF, TZMobiusDWF<ZFIMPLF>, MAction);
-#endif
-
-/******************************************************************************
- *                     TZMobiusDWF implementation                             *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TZMobiusDWF<FImpl>::TZMobiusDWF(const std::string name)
-: Module<ZMobiusDWFPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TZMobiusDWF<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().gauge};
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TZMobiusDWF<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TZMobiusDWF<FImpl>::setup(void)
-{
-    LOG(Message) << "Setting up z-Mobius domain wall fermion matrix with m= "
-                 << par().mass << ", M5= " << par().M5 << ", Ls= " << par().Ls 
-                 << ", b= " << par().b << ", c= " << par().c
-                 << " using gauge field '" << par().gauge << "'"
-                 << std::endl;
-    LOG(Message) << "Omegas: " << std::endl;
-    for (unsigned int i = 0; i < par().omega.size(); ++i)
-    {
-        LOG(Message) << "  omega[" << i << "]= " << par().omega[i] << std::endl;
-    }
-
-    auto &U    = envGet(GaugeField, par().gauge);
-    auto &g4   = *envGetGrid(FermionField);
-    auto &grb4 = *envGetRbGrid(FermionField);
-    auto &g5   = *envGetGrid(FermionField, par().Ls);
-    auto &grb5 = *envGetRbGrid(FermionField, par().Ls);
-    auto omega = par().omega;
-    typename ZMobiusFermion<FImpl>::ImplParams implParams;
-    if (!par().boundary.empty())
-    {
-        implParams.boundary_phases = strToVec<Complex>(par().boundary);
-    }
-    if (!par().twist.empty())
-    {
-        implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
-    }
-    LOG(Message) << "Fermion boundary conditions: " << implParams.boundary_phases
-                 << std::endl;
-    LOG(Message) << "Twists: " << implParams.twist_n_2pi_L
-                 << std::endl;
-    if (implParams.boundary_phases.size() != env().getNd())
-    {
-        HADRONS_ERROR(Size, "Wrong number of boundary phase");
-    }
-    if (implParams.twist_n_2pi_L.size() != env().getNd())
-    {
-        HADRONS_ERROR(Size, "Wrong number of twist");
-    }
-    envCreateDerived(FMat, ZMobiusFermion<FImpl>, getName(), par().Ls, U, g5,
-                     grb5, g4, grb4, par().mass, par().M5, omega,
-                     par().b, par().c, implParams);
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TZMobiusDWF<FImpl>::execute(void)
-{}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_MAction_ZMobiusDWF_hpp_
--- a/Hadrons/Modules/MContraction/A2AAslashField.cc
+++ b/Hadrons/Modules/MContraction/A2AAslashField.cc
@@ -1,34 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/A2AAslashField.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MContraction/A2AAslashField.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MContraction;
-
-template class Grid::Hadrons::MContraction::TA2AAslashField<FIMPL, PhotonR>;
--- a/Hadrons/Modules/MContraction/A2AAslashField.hpp
+++ b/Hadrons/Modules/MContraction/A2AAslashField.hpp
@@ -1,246 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/A2AAslashField.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_MContraction_A2AAslashField_hpp_
-#define Hadrons_MContraction_A2AAslashField_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-#include <Hadrons/A2AMatrix.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         A2AAslashField                                 *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MContraction)
-
-class A2AAslashFieldPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(A2AAslashFieldPar,
-                                    int, cacheBlock,
-                                    int, block,
-                                    std::string, left,
-                                    std::string, right,
-                                    std::string, output,
-                                    std::vector<std::string>, emField);
-};
-
-class A2AAslashFieldMetadata: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(A2AAslashFieldMetadata,
-                                    std::string, emFieldName);
-};
-
-template <typename T, typename FImpl>
-class AslashFieldKernel: public A2AKernel<T, typename FImpl::FermionField>
-{
-public:
-    typedef typename FImpl::FermionField FermionField;
-public:
-    AslashFieldKernel(const std::vector<LatticeComplex> &emB0,
-                      const std::vector<LatticeComplex> &emB1,
-                      GridBase *grid)
-    : emB0_(emB0), emB1_(emB1), grid_(grid)
-    {
-        vol_ = 1.;
-        for (auto &d: grid_->GlobalDimensions())
-        {
-            vol_ *= d;
-        }
-    }
-
-    virtual ~AslashFieldKernel(void) = default;
-    virtual void operator()(A2AMatrixSet<T> &m, const FermionField *left, 
-                            const FermionField *right,
-                            const unsigned int orthogDim, double &t)
-    {
-        A2Autils<FImpl>::AslashField(m, left, right, emB0_, emB1_, orthogDim, &t);
-    }
-
-    virtual double flops(const unsigned int blockSizei, const unsigned int blockSizej)
-    {
-        return 0.;
-    }
-
-    virtual double bytes(const unsigned int blockSizei, const unsigned int blockSizej)
-    {
-        return 0.;
-    }
-private:
-    const std::vector<LatticeComplex> &emB0_, &emB1_;
-    GridBase                          *grid_;
-    double                            vol_;
-};
-
-template <typename FImpl, typename PhotonImpl>
-class TA2AAslashField: public Module<A2AAslashFieldPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-    typedef typename PhotonImpl::GaugeField EmField;
-    typedef A2AMatrixBlockComputation<Complex, 
-                                      FermionField, 
-                                      A2AAslashFieldMetadata, 
-                                      HADRONS_A2AM_IO_TYPE> Computation;
-    typedef AslashFieldKernel<Complex, FImpl> Kernel;
-public:
-    // constructor
-    TA2AAslashField(const std::string name);
-    // destructor
-    virtual ~TA2AAslashField(void) {};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(A2AAslashField, ARG(TA2AAslashField<FIMPL, PhotonR>), MContraction);
-
-/******************************************************************************
- *                 TA2AAslashField implementation                             *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl, typename PhotonImpl>
-TA2AAslashField<FImpl, PhotonImpl>::TA2AAslashField(const std::string name)
-: Module<A2AAslashFieldPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl, typename PhotonImpl>
-std::vector<std::string> TA2AAslashField<FImpl, PhotonImpl>::getInput(void)
-{
-    std::vector<std::string> in = par().emField;
-    
-    in.push_back(par().left);
-    in.push_back(par().right);
-
-    return in;
-}
-
-template <typename FImpl, typename PhotonImpl>
-std::vector<std::string> TA2AAslashField<FImpl, PhotonImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl, typename PhotonImpl>
-void TA2AAslashField<FImpl, PhotonImpl>::setup(void)
-{
-    envTmp(Computation, "computation", 1, envGetGrid(FermionField), 
-           env().getNd() - 1, par().emField.size(), 1, par().block, 
-           par().cacheBlock, this);
-    envTmp(std::vector<ComplexField>, "B0", 1, 
-           par().emField.size(), envGetGrid(ComplexField));
-    envTmp(std::vector<ComplexField>, "B1", 1, 
-           par().emField.size(), envGetGrid(ComplexField));
-    envTmpLat(ComplexField, "Amu");
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl, typename PhotonImpl>
-void TA2AAslashField<FImpl, PhotonImpl>::execute(void)
-{
-    auto &left  = envGet(std::vector<FermionField>, par().left);
-    auto &right = envGet(std::vector<FermionField>, par().right);
-
-    int nt         = env().getDim().back();
-    int N_i        = left.size();
-    int N_j        = right.size();
-    int nem        = par().emField.size();
-    int block      = par().block;
-    int cacheBlock = par().cacheBlock;
-
-    LOG(Message) << "Computing all-to-all A-slash fields" << std::endl;
-    LOG(Message) << "Left: '" << par().left << "' Right: '" << par().right << "'" << std::endl;
-    LOG(Message) << "EM fields:" << std::endl;
-    for (auto &name: par().emField)
-    {
-        LOG(Message) << "  " << name << std::endl;
-    }
-    LOG(Message) << "A-slash field size: " << nt << "*" << N_i << "*" << N_j 
-                 << " (filesize " << sizeString(nt*N_i*N_j*sizeof(HADRONS_A2AM_IO_TYPE)) 
-                 << "/EM field)" << std::endl;
-    
-    // preparing "B" complexified fields
-    startTimer("Complexify EM fields");
-    envGetTmp(std::vector<ComplexField>, B0);
-    envGetTmp(std::vector<ComplexField>, B1);
-    for (unsigned int i = 0; i < par().emField.size(); ++i)
-    {
-        auto &A = envGet(EmField, par().emField[i]);
-        envGetTmp(ComplexField, Amu);
-
-        B0[i]  = peekLorentz(A, 0);
-        B0[i] += timesI(peekLorentz(A, 1));
-        B1[i]  = peekLorentz(A, 2);
-        B1[i] += timesI(peekLorentz(A, 3));
-    }
-    stopTimer("Complexify EM fields");
-
-    // I/O name & metadata lambdas
-    auto ionameFn = [this](const unsigned int em, const unsigned int dummy)
-    {
-        return par().emField[em];
-    };
-
-    auto filenameFn = [this, &ionameFn](const unsigned int em, const unsigned int dummy)
-    {
-        return par().output + "." + std::to_string(vm().getTrajectory()) 
-               + "/" + ionameFn(em, dummy) + ".h5";
-    };
-
-    auto metadataFn = [this](const unsigned int em, const unsigned int dummy)
-    {
-        A2AAslashFieldMetadata md;
-
-        md.emFieldName = par().emField[em];
-        
-        return md;
-    };
-
-    // executing computation
-    Kernel kernel(B0, B1, envGetGrid(FermionField));
-
-    envGetTmp(Computation, computation);
-    computation.execute(left, right, kernel, ionameFn, filenameFn, metadataFn);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_MContraction_A2AAslashField_hpp_
--- a/Hadrons/Modules/MContraction/A2ALoop.cc
+++ b/Hadrons/Modules/MContraction/A2ALoop.cc
@@ -1,34 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/A2ALoop.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MContraction/A2ALoop.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MContraction;
-
-template class Grid::Hadrons::MContraction::TA2ALoop<FIMPL>;
--- a/Hadrons/Modules/MContraction/A2AMesonField.cc
+++ b/Hadrons/Modules/MContraction/A2AMesonField.cc
@@ -1,35 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/A2AMesonField.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: paboyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MContraction/A2AMesonField.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MContraction;
-
-template class Grid::Hadrons::MContraction::TA2AMesonField<FIMPL>;
--- a/Hadrons/Modules/MContraction/A2AMesonField.hpp
+++ b/Hadrons/Modules/MContraction/A2AMesonField.hpp
@@ -1,315 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/A2AMesonField.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: paboyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_MContraction_A2AMesonField_hpp_
-#define Hadrons_MContraction_A2AMesonField_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-#include <Hadrons/A2AMatrix.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                     All-to-all meson field creation                        *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MContraction)
-
-class A2AMesonFieldPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMesonFieldPar,
-                                    int, cacheBlock,
-                                    int, block,
-                                    std::string, left,
-                                    std::string, right,
-                                    std::string, output,
-                                    std::string, gammas,
-                                    std::vector<std::string>, mom);
-};
-
-class A2AMesonFieldMetadata: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMesonFieldMetadata,
-                                    std::vector<RealF>, momentum,
-                                    Gamma::Algebra, gamma);
-};
-
-template <typename T, typename FImpl>
-class MesonFieldKernel: public A2AKernel<T, typename FImpl::FermionField>
-{
-public:
-    typedef typename FImpl::FermionField FermionField;
-public:
-    MesonFieldKernel(const std::vector<Gamma::Algebra> &gamma,
-                     const std::vector<LatticeComplex> &mom,
-                     GridBase *grid)
-    : gamma_(gamma), mom_(mom), grid_(grid)
-    {
-        vol_ = 1.;
-        for (auto &d: grid_->GlobalDimensions())
-        {
-            vol_ *= d;
-        }
-    }
-
-    virtual ~MesonFieldKernel(void) = default;
-    virtual void operator()(A2AMatrixSet<T> &m, const FermionField *left, 
-                            const FermionField *right,
-                            const unsigned int orthogDim, double &t)
-    {
-        A2Autils<FImpl>::MesonField(m, left, right, gamma_, mom_, orthogDim, &t);
-    }
-
-    virtual double flops(const unsigned int blockSizei, const unsigned int blockSizej)
-    {
-        return vol_*(2*8.0+6.0+8.0*mom_.size())*blockSizei*blockSizej*gamma_.size();
-    }
-
-    virtual double bytes(const unsigned int blockSizei, const unsigned int blockSizej)
-    {
-        return vol_*(12.0*sizeof(T))*blockSizei*blockSizej
-               +  vol_*(2.0*sizeof(T)*mom_.size())*blockSizei*blockSizej*gamma_.size();
-    }
-private:
-    const std::vector<Gamma::Algebra> &gamma_;
-    const std::vector<LatticeComplex> &mom_;
-    GridBase                          *grid_;
-    double                            vol_;
-};
-
-template <typename FImpl>
-class TA2AMesonField : public Module<A2AMesonFieldPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-    typedef A2AMatrixBlockComputation<Complex, 
-                                      FermionField, 
-                                      A2AMesonFieldMetadata, 
-                                      HADRONS_A2AM_IO_TYPE> Computation;
-    typedef MesonFieldKernel<Complex, FImpl> Kernel;
-public:
-    // constructor
-    TA2AMesonField(const std::string name);
-    // destructor
-    virtual ~TA2AMesonField(void){};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-private:
-    bool                               hasPhase_{false};
-    std::string                        momphName_;
-    std::vector<Gamma::Algebra>        gamma_;
-    std::vector<std::vector<Real>>     mom_;
-};
-
-MODULE_REGISTER(A2AMesonField, ARG(TA2AMesonField<FIMPL>), MContraction);
-
-/******************************************************************************
-*                  TA2AMesonField implementation                             *
-******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TA2AMesonField<FImpl>::TA2AMesonField(const std::string name)
-: Module<A2AMesonFieldPar>(name)
-, momphName_(name + "_momph")
-{
-}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TA2AMesonField<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().left, par().right};
-
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TA2AMesonField<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {};
-
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TA2AMesonField<FImpl>::setup(void)
-{
-    gamma_.clear();
-    mom_.clear();
-    if (par().gammas == "all")
-    {
-        gamma_ = {
-            Gamma::Algebra::Gamma5,
-            Gamma::Algebra::Identity,    
-            Gamma::Algebra::GammaX,
-            Gamma::Algebra::GammaY,
-            Gamma::Algebra::GammaZ,
-            Gamma::Algebra::GammaT,
-            Gamma::Algebra::GammaXGamma5,
-            Gamma::Algebra::GammaYGamma5,
-            Gamma::Algebra::GammaZGamma5,
-            Gamma::Algebra::GammaTGamma5,
-            Gamma::Algebra::SigmaXY,
-            Gamma::Algebra::SigmaXZ,
-            Gamma::Algebra::SigmaXT,
-            Gamma::Algebra::SigmaYZ,
-            Gamma::Algebra::SigmaYT,
-            Gamma::Algebra::SigmaZT
-        };
-    }
-    else
-    {
-        gamma_ = strToVec<Gamma::Algebra>(par().gammas);
-    }
-    for (auto &pstr: par().mom)
-    {
-        auto p = strToVec<Real>(pstr);
-
-        if (p.size() != env().getNd() - 1)
-        {
-            HADRONS_ERROR(Size, "Momentum has " + std::to_string(p.size())
-                                + " components instead of " 
-                                + std::to_string(env().getNd() - 1));
-        }
-        mom_.push_back(p);
-    }
-    envCache(std::vector<ComplexField>, momphName_, 1, 
-             par().mom.size(), envGetGrid(ComplexField));
-    envTmpLat(ComplexField, "coor");
-    envTmp(Computation, "computation", 1, envGetGrid(FermionField), 
-           env().getNd() - 1, mom_.size(), gamma_.size(), par().block, 
-           par().cacheBlock, this);
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TA2AMesonField<FImpl>::execute(void)
-{
-    auto &left  = envGet(std::vector<FermionField>, par().left);
-    auto &right = envGet(std::vector<FermionField>, par().right);
-
-    int nt         = env().getDim().back();
-    int N_i        = left.size();
-    int N_j        = right.size();
-    int ngamma     = gamma_.size();
-    int nmom       = mom_.size();
-    int block      = par().block;
-    int cacheBlock = par().cacheBlock;
-
-    LOG(Message) << "Computing all-to-all meson fields" << std::endl;
-    LOG(Message) << "Left: '" << par().left << "' Right: '" << par().right << "'" << std::endl;
-    LOG(Message) << "Momenta:" << std::endl;
-    for (auto &p: mom_)
-    {
-        LOG(Message) << "  " << p << std::endl;
-    }
-    LOG(Message) << "Spin bilinears:" << std::endl;
-    for (auto &g: gamma_)
-    {
-        LOG(Message) << "  " << g << std::endl;
-    }
-    LOG(Message) << "Meson field size: " << nt << "*" << N_i << "*" << N_j 
-                 << " (filesize " << sizeString(nt*N_i*N_j*sizeof(HADRONS_A2AM_IO_TYPE)) 
-                 << "/momentum/bilinear)" << std::endl;
-
-    auto &ph = envGet(std::vector<ComplexField>, momphName_);
-
-    if (!hasPhase_)
-    {
-        startTimer("Momentum phases");
-        for (unsigned int j = 0; j < nmom; ++j)
-        {
-            Complex           i(0.0,1.0);
-            std::vector<Real> p;
-
-            envGetTmp(ComplexField, coor);
-            ph[j] = zero;
-            for(unsigned int mu = 0; mu < mom_[j].size(); mu++)
-            {
-                LatticeCoordinate(coor, mu);
-                ph[j] = ph[j] + (mom_[j][mu]/env().getDim(mu))*coor;
-            }
-            ph[j] = exp((Real)(2*M_PI)*i*ph[j]);
-        }
-        hasPhase_ = true;
-        stopTimer("Momentum phases");
-    }
-
-    auto ionameFn = [this](const unsigned int m, const unsigned int g)
-    {
-        std::stringstream ss;
-
-        ss << gamma_[g] << "_";
-        for (unsigned int mu = 0; mu < mom_[m].size(); ++mu)
-        {
-            ss << mom_[m][mu] << ((mu == mom_[m].size() - 1) ? "" : "_");
-        }
-
-        return ss.str();
-    };
-
-    auto filenameFn = [this, &ionameFn](const unsigned int m, const unsigned int g)
-    {
-        return par().output + "." + std::to_string(vm().getTrajectory()) 
-               + "/" + ionameFn(m, g) + ".h5";
-    };
-
-    auto metadataFn = [this](const unsigned int m, const unsigned int g)
-    {
-        A2AMesonFieldMetadata md;
-
-        for (auto pmu: mom_[m])
-        {
-            md.momentum.push_back(pmu);
-        }
-        md.gamma = gamma_[g];
-        
-        return md;
-    };
-
-    Kernel      kernel(gamma_, ph, envGetGrid(FermionField));
-
-    envGetTmp(Computation, computation);
-    computation.execute(left, right, kernel, ionameFn, filenameFn, metadataFn);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_MContraction_A2AMesonField_hpp_
--- a/Hadrons/Modules/MContraction/Baryon.cc
+++ b/Hadrons/Modules/MContraction/Baryon.cc
@@ -1,35 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/Baryon.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MContraction/Baryon.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MContraction;
-
-template class Grid::Hadrons::MContraction::TBaryon<FIMPL,FIMPL,FIMPL>;
-
--- a/Hadrons/Modules/MContraction/DiscLoop.cc
+++ b/Hadrons/Modules/MContraction/DiscLoop.cc
@@ -1,35 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/DiscLoop.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MContraction/DiscLoop.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MContraction;
-
-template class Grid::Hadrons::MContraction::TDiscLoop<FIMPL>;
-
--- a/Hadrons/Modules/MContraction/Gamma3pt.cc
+++ b/Hadrons/Modules/MContraction/Gamma3pt.cc
@@ -1,35 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/Gamma3pt.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MContraction/Gamma3pt.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MContraction;
-
-template class Grid::Hadrons::MContraction::TGamma3pt<FIMPL,FIMPL,FIMPL>;
-
--- a/Hadrons/Modules/MContraction/Meson.cc
+++ b/Hadrons/Modules/MContraction/Meson.cc
@@ -1,35 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/Meson.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MContraction/Meson.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MContraction;
-
-template class Grid::Hadrons::MContraction::TMeson<FIMPL,FIMPL>;
-
--- a/Hadrons/Modules/MContraction/WeakEye3pt.cc
+++ b/Hadrons/Modules/MContraction/WeakEye3pt.cc
@@ -1,34 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/WeakEye3pt.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MContraction/WeakEye3pt.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MContraction;
-
-template class Grid::Hadrons::MContraction::TWeakEye3pt<FIMPL>;
--- a/Hadrons/Modules/MContraction/WeakEye3pt.hpp
+++ b/Hadrons/Modules/MContraction/WeakEye3pt.hpp
@@ -1,200 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/WeakEye3pt.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Lanny91 <andrew.lawson@gmail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_MContraction_WeakEye3pt_hpp_
-#define Hadrons_MContraction_WeakEye3pt_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/*
- * Weak Hamiltonian meson 3-pt diagrams, eye topologies.
- * 
- * Schematics:       loop                 |                  
- *                  /-<-¬                 |                             
- *                 /     \                |            qbl     G     qbr
- *                 \     /                |        /----<------*------<----¬         
- *            qbl   \   /    qbr          |       /          /-*-¬          \
- *       /-----<-----* *-----<----¬       |      /          /  G  \          \
- *  gIn *            G G           * gOut | gIn *           \     /  loop    * gOut
- *       \                        /       |      \           \->-/          /   
- *        \                      /        |       \                        /       
- *         \---------->---------/         |        \----------->----------/        
- *                   qs                   |                   qs                  
- *                                        |
- *                one trace               |                two traces
- * 
- * one trace : tr(qbr*gOut*qs*adj(gIn)*g5*adj(qbl)*g5*G*loop*G)
- * two traces: tr(qbr*gOut*qs*adj(gIn)*g5*adj(qbl)*g5*G)*tr(loop*G)
- * 
- */
-
-BEGIN_MODULE_NAMESPACE(MContraction)
-
-class WeakEye3ptPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(WeakEye3ptPar,
-                                    std::string,    qBarLeft,
-                                    std::string,    qBarRight,
-                                    std::string,    qSpectator,
-                                    std::string,    loop,
-                                    unsigned int,   tOut,
-                                    Gamma::Algebra, gammaIn,
-                                    Gamma::Algebra, gammaOut,
-                                    std::string,    output);
-};
-
-template <typename FImpl>
-class TWeakEye3pt: public Module<WeakEye3ptPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-    class Metadata: Serializable
-    {
-    public:
-        GRID_SERIALIZABLE_CLASS_MEMBERS(Metadata,
-                                        Gamma::Algebra, in,
-                                        Gamma::Algebra, out,
-                                        Gamma::Algebra, op,
-                                        unsigned int,   trace);
-    };
-    typedef Correlator<Metadata> Result;
-public:
-    // constructor
-    TWeakEye3pt(const std::string name);
-    // destructor
-    virtual ~TWeakEye3pt(void) {};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(WeakEye3pt, TWeakEye3pt<FIMPL>, MContraction);
-
-/******************************************************************************
- *                        TWeakEye3pt implementation                          *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TWeakEye3pt<FImpl>::TWeakEye3pt(const std::string name)
-: Module<WeakEye3ptPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TWeakEye3pt<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().qBarLeft, par().qBarRight, 
-                                   par().qSpectator, par().loop};
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TWeakEye3pt<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TWeakEye3pt<FImpl>::setup(void)
-{
-    envTmpLat(ComplexField, "corr");
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TWeakEye3pt<FImpl>::execute(void)
-{
-    LOG(Message) << "Computing mesonic weak 3pt contractions, eye topologies" << std::endl;
-    LOG(Message) << "gIn : " << par().gammaIn << std::endl;
-    LOG(Message) << "gOut: " << par().gammaIn << std::endl;
-    LOG(Message) << "tOut: " << par().tOut << std::endl;
-    LOG(Message) << "qbl : " << par().qBarLeft << std::endl;
-    LOG(Message) << "qbr : " << par().qBarRight << std::endl;
-    LOG(Message) << "qs  : " << par().qSpectator << std::endl;
-    LOG(Message) << "loop: " << par().loop << std::endl;
-
-    std::vector<Result> result;
-    Result              r;
-    auto                &qbl  = envGet(PropagatorField, par().qBarLeft);
-    auto                &qbr  = envGet(PropagatorField, par().qBarRight);
-    auto                &loop = envGet(PropagatorField, par().loop);
-    auto                &qs   = envGet(SlicedPropagator, par().qSpectator);
-    auto                qst   = qs[par().tOut];
-    Gamma               gIn(par().gammaIn), gOut(par().gammaOut);
-    Gamma               g5(Gamma::Algebra::Gamma5);
-
-    envGetTmp(ComplexField, corr);
-    r.info.in  = par().gammaIn;
-    r.info.out = par().gammaOut;
-    for (auto &G: Gamma::gall)
-    {
-        SlicedComplex buf;
-
-        r.info.op = G.g;
-        // one trace
-        corr = trace(qbr*gOut*qst*adj(gIn)*g5*adj(qbl)*g5*G*loop*G);
-        sliceSum(corr, buf, Tp);
-        r.corr.clear();
-        for (unsigned int t = 0; t < buf.size(); ++t)
-        {
-            r.corr.push_back(TensorRemove(buf[t]));
-        }
-        r.info.trace = 1;
-        result.push_back(r);
-        // two traces
-        corr = trace(qbr*gOut*qst*adj(gIn)*g5*adj(qbl)*g5*G)*trace(loop*G);
-        sliceSum(corr, buf, Tp);
-        r.corr.clear();
-        for (unsigned int t = 0; t < buf.size(); ++t)
-        {
-            r.corr.push_back(TensorRemove(buf[t]));
-        }
-        r.info.trace = 2;
-        result.push_back(r);
-    }
-    saveResult(par().output, "weakEye3pt", result);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_MContraction_WeakEye3pt_hpp_
--- a/Hadrons/Modules/MContraction/WeakMesonDecayKl2.cc
+++ b/Hadrons/Modules/MContraction/WeakMesonDecayKl2.cc
@@ -1,36 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/WeakMesonDecayKl2.cc
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MContraction;
-
-template class Grid::Hadrons::MContraction::TWeakMesonDecayKl2<FIMPL>;
-
--- a/Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp
+++ b/Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp
@@ -1,185 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
-
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_MContraction_WeakMesonDecayKl2_hpp_
-#define Hadrons_MContraction_WeakMesonDecayKl2_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/*
-* Kl2 contraction
-* -----------------------------
-*
-* contraction for Kl2 decay, including the lepton
-*
-* 	trace(q1*adj(q2)*g5*gL[mu]) * (gL[mu] * lepton)_{a,b}
-*
-* with open spinor indices (a,b) for the lepton part
-*
-*             q1                  lepton
-*        /------------\       /------------
-*       /              \     /
-*      /                \H_W/
-* g_5 *                  * * 
-*      \                /
-*       \              / 
-*        \____________/
-*             q2
-*
-* * options:
-* - q1: input propagator 1 (string)
-* - q2: input propagator 2 (string)
-* - lepton: input lepton (string)
-*/
-
-/******************************************************************************
- *                               TWeakMesonDecayKl2                           *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MContraction)
-
-class WeakMesonDecayKl2Par: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(WeakMesonDecayKl2Par,
-                                    std::string, q1,
-                                    std::string, q2,
-                                    std::string, lepton,
-				                    std::string, output);
-};
-
-template <typename FImpl>
-class TWeakMesonDecayKl2: public Module<WeakMesonDecayKl2Par>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-    typedef typename SpinMatrixField::vector_object::scalar_object SpinMatrix;
-    class Result: Serializable
-    {
-    public:
-        GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
-                                        std::vector<SpinMatrix>, corr);
-    };
-public:
-    // constructor
-    TWeakMesonDecayKl2(const std::string name);
-    // destructor
-    virtual ~TWeakMesonDecayKl2(void) {};
-    // dependencies/products
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-protected:
-    // execution
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(WeakMesonDecayKl2, TWeakMesonDecayKl2<FIMPL>, MContraction);
-
-/******************************************************************************
- *                           TWeakMesonDecayKl2 implementation                   *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TWeakMesonDecayKl2<FImpl>::TWeakMesonDecayKl2(const std::string name)
-: Module<WeakMesonDecayKl2Par>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TWeakMesonDecayKl2<FImpl>::getInput(void)
-{
-    std::vector<std::string> input = {par().q1, par().q2, par().lepton};
-    
-    return input;
-}
-
-template <typename FImpl>
-std::vector<std::string> TWeakMesonDecayKl2<FImpl>::getOutput(void)
-{
-    std::vector<std::string> output = {};
-    
-    return output;
-}
-
-// setup ////////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TWeakMesonDecayKl2<FImpl>::setup(void)
-{
-    envTmpLat(ComplexField, "c");
-    envTmpLat(PropagatorField, "prop_buf");
-    envCreateLat(PropagatorField, getName());
-    envTmpLat(SpinMatrixField, "buf");
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TWeakMesonDecayKl2<FImpl>::execute(void)
-{
-    LOG(Message) << "Computing QED Kl2 contractions '" << getName() << "' using"
-                 << " quarks '" << par().q1 << "' and '" << par().q2 << "' and"
-		         << "lepton '"  << par().lepton << "'" << std::endl;
-
-    Gamma                   g5(Gamma::Algebra::Gamma5);
-    int                     nt = env().getDim(Tp);
-    std::vector<SpinMatrix> res_summed;
-    Result                  r;
-
-    auto &res    = envGet(PropagatorField, getName()); res = zero;
-    auto &q1     = envGet(PropagatorField, par().q1);
-    auto &q2     = envGet(PropagatorField, par().q2);
-    auto &lepton = envGet(PropagatorField, par().lepton);
-    envGetTmp(SpinMatrixField, buf);
-    envGetTmp(ComplexField, c);
-    envGetTmp(PropagatorField, prop_buf);  
-
-    for (unsigned int mu = 0; mu < 4; ++mu)
-    {
-        c = zero;
-        //hadronic part: trace(q1*adj(q2)*g5*gL[mu]) 
-        c = trace(q1*adj(q2)*g5*GammaL(Gamma::gmu[mu]));
-        prop_buf = 1.;
-        //multiply lepton part
-        res += c * prop_buf * GammaL(Gamma::gmu[mu]) * lepton;
-    }
-    buf = peekColour(res, 0, 0);
-    sliceSum(buf, r.corr, Tp);
-    saveResult(par().output, "weakdecay", r);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_MContraction_WeakMesonDecayKl2_hpp_
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Guido Cossu	f4e6824f22	Minor changes	2017-10-09 09:44:03 +01:00
Guido Cossu	ac5cfd33a6	Fixing a compilation error	2017-10-04 14:29:01 +01:00
Guido Cossu	f605230bbb	Added laplacian operator for smearing sources	2017-10-04 13:54:54 +01:00