Reorganise a little to let the PV inverter be defined outside

the Reconstruct class. This lets the multiple choices for PV inversion be composed without changing the routine and no if/else case enumeration. Implemented SchurDiagMooee PV inversion (red black) and Unprec PV inversion. Red black cuts from 190 iterations to 90 iterations at 10^-12 on 8^4 test system Will revisit multiple Schur options and add a Fourier based multishift PV inverse, similar to the one Rudy Arthur did in BFM
4d 5d reconstruction code & test
2025-11-10 08:39:31 +00:00 · 2018-10-10 13:22:01 +01:00 · 2018-10-09 18:37:20 +01:00 · 2018-10-09 17:41:56 +01:00
60 changed files with 1315 additions and 3782 deletions
--- a/Grid/algorithms/Algorithms.h
+++ b/Grid/algorithms/Algorithms.h
@@ -52,6 +52,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/CoarsenedMatrix.h>
 #include <Grid/algorithms/FFT.h>

+
 // EigCg
 // Pcg
 // Hdcg
--- a/Grid/algorithms/iterative/ConjugateGradient.h
+++ b/Grid/algorithms/iterative/ConjugateGradient.h
@@ -150,13 +150,13 @@ class ConjugateGradient : public OperatorFunction<Field> {
 	std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
 	std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;

-        std::cout << GridLogPerformance << "Time breakdown "<<std::endl;
-	std::cout << GridLogPerformance << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl;
-	std::cout << GridLogPerformance << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
-	std::cout << GridLogPerformance << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
-	std::cout << GridLogPerformance << "\tInner      " << InnerTimer.Elapsed() <<std::endl;
-	std::cout << GridLogPerformance << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl;
-	std::cout << GridLogPerformance << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
+        std::cout << GridLogMessage << "Time breakdown "<<std::endl;
+	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl;
+	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
+	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
+	std::cout << GridLogMessage << "\tInner      " << InnerTimer.Elapsed() <<std::endl;
+	std::cout << GridLogMessage << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl;
+	std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;

        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);

--- a/Grid/algorithms/iterative/Reconstruct5Dprop.h
+++ b/Grid/algorithms/iterative/Reconstruct5Dprop.h
@@ -30,6 +30,49 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 namespace Grid {
 namespace QCD {

+
+template<class Field>
+class PauliVillarsSolverUnprec
+{
+ public:
+  ConjugateGradient<Field> & CG;
+  PauliVillarsSolverUnprec(  ConjugateGradient<Field> &_CG) : CG(_CG){};
+
+  template<class Matrix>
+  void operator() (Matrix &_Matrix,const Field &src,Field &sol)
+  {
+    RealD m = _Matrix.Mass();
+    Field A  (_Matrix.FermionGrid());
+
+    MdagMLinearOperator<Matrix,Field> HermOp(_Matrix);
+
+    _Matrix.SetMass(1.0);
+    _Matrix.Mdag(src,A);
+    CG(HermOp,A,sol);
+    _Matrix.SetMass(m);
+  };
+};
+
+template<class Field>
+class PauliVillarsSolverRBprec
+{
+ public:
+  ConjugateGradient<Field> & CG;
+  PauliVillarsSolverRBprec(  ConjugateGradient<Field> &_CG) : CG(_CG){};
+
+  template<class Matrix>
+  void operator() (Matrix &_Matrix,const Field &src,Field &sol)
+  {
+    RealD m = _Matrix.Mass();
+    Field A  (_Matrix.FermionGrid());
+
+    _Matrix.SetMass(1.0);
+    SchurRedBlackDiagMooeeSolve<Field> SchurSolver(CG);
+    SchurSolver(_Matrix,src,sol);
+    _Matrix.SetMass(m);
+  };
+};
+
 template<class Field,class PVinverter> class Reconstruct5DfromPhysical {
 private:
  PVinverter & PauliVillarsSolver;
@@ -42,12 +85,20 @@ template<class Field,class PVinverter> class Reconstruct5DfromPhysical {
 // of the Mobius exact AMA corrections.
 //
 // TODO : understand absence of contact term in eqns in Hantao's thesis
- //        sol4 is contact term subtracted, but thesis & Brower's paper suggests not.
+ //        sol4 is contact term subtracted.
 //
- // Step 1: Localise PV inverse in a routine. [DONE]
+ // Options
+ // a) Defect correction approach:
+ //    1) Compute defect from current soln (initially guess).
+ //       This is ...... outerToInner check !!!!
+ //    2) Deflated Zmobius solve to get 4d soln
+ //       Ensure deflation is working
+ //    3) Refine 5d Outer using the inner 4d delta soln
+ // 
+ // Step 1: localise PV inverse in a routine. [DONE]
 // Step 2: Schur based PV inverse            [DONE]
- // Step 3: Fourier accelerated PV inverse    [DONE]
- //
+ // Step 3: Fourier accelerated PV inverse
+ // Step 4: 
 /////////////////////////////////////////////////////
 
  Reconstruct5DfromPhysical(PVinverter &_PauliVillarsSolver) 
--- a/Grid/lattice/Lattice_rng.h
+++ b/Grid/lattice/Lattice_rng.h
@@ -392,10 +392,14 @@ namespace Grid {

    void SeedUniqueString(const std::string &s){
      std::vector<int> seeds;
+      std::stringstream sha;
      seeds = GridChecksum::sha256_seeds(s);
+      for(int i=0;i<seeds.size();i++) { 
+        sha << std::hex << seeds[i];
+      }
      std::cout << GridLogMessage << "Intialising parallel RNG with unique string '" 
                << s << "'" << std::endl;
-      std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
+      std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
      SeedFixedIntegers(seeds);
    }
    void SeedFixedIntegers(const std::vector<int> &seeds){
--- a/Grid/lattice/Lattice_transfer.h
+++ b/Grid/lattice/Lattice_transfer.h
@@ -485,7 +485,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int


 template<class vobj>
-void ExtractSliceLocal(Lattice<vobj> &lowDim, const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
+void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
 {
  typedef typename vobj::scalar_object sobj;

@@ -520,7 +520,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim, const Lattice<vobj> & higherDim,in


 template<class vobj>
-void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine)
+void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
 {
  typedef typename vobj::scalar_object sobj;

--- a/Grid/qcd/QCD.h
+++ b/Grid/qcd/QCD.h
@@ -90,20 +90,17 @@ namespace QCD {
    // That probably makes for GridRedBlack4dCartesian grid.

    // s,sp,c,spc,lc
-
-    template<typename vtype> using iSinglet                     = iScalar<iScalar<iScalar<vtype> > >;
-    template<typename vtype> using iSpinMatrix                  = iScalar<iMatrix<iScalar<vtype>, Ns> >;
-    template<typename vtype> using iColourMatrix                = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
-    template<typename vtype> using iSpinColourMatrix            = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
-    template<typename vtype> using iLorentzColourMatrix         = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
-    template<typename vtype> using iDoubleStoredColourMatrix    = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
-    template<typename vtype> using iSpinVector                  = iScalar<iVector<iScalar<vtype>, Ns> >;
-    template<typename vtype> using iColourVector                = iScalar<iScalar<iVector<vtype, Nc> > >;
-    template<typename vtype> using iSpinColourVector            = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
-    template<typename vtype> using iHalfSpinVector              = iScalar<iVector<iScalar<vtype>, Nhs> >;
-    template<typename vtype> using iHalfSpinColourVector        = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
-    template<typename vtype> using iSpinColourSpinColourMatrix  = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
-
+    template<typename vtype> using iSinglet                   = iScalar<iScalar<iScalar<vtype> > >;
+    template<typename vtype> using iSpinMatrix                = iScalar<iMatrix<iScalar<vtype>, Ns> >;
+    template<typename vtype> using iColourMatrix              = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
+    template<typename vtype> using iSpinColourMatrix          = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
+    template<typename vtype> using iLorentzColourMatrix       = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
+    template<typename vtype> using iDoubleStoredColourMatrix  = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
+    template<typename vtype> using iSpinVector                = iScalar<iVector<iScalar<vtype>, Ns> >;
+    template<typename vtype> using iColourVector              = iScalar<iScalar<iVector<vtype, Nc> > >;
+    template<typename vtype> using iSpinColourVector          = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
+    template<typename vtype> using iHalfSpinVector            = iScalar<iVector<iScalar<vtype>, Nhs> >;
+    template<typename vtype> using iHalfSpinColourVector      = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;

    template<typename vtype> using iGparitySpinColourVector       = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
    template<typename vtype> using iGparityHalfSpinColourVector   = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
@@ -130,28 +127,10 @@ namespace QCD {
    typedef iSpinColourMatrix<Complex  >    SpinColourMatrix;
    typedef iSpinColourMatrix<ComplexF >    SpinColourMatrixF;
    typedef iSpinColourMatrix<ComplexD >    SpinColourMatrixD;
-    
+
    typedef iSpinColourMatrix<vComplex >    vSpinColourMatrix;
    typedef iSpinColourMatrix<vComplexF>    vSpinColourMatrixF;
    typedef iSpinColourMatrix<vComplexD>    vSpinColourMatrixD;
-    
-    // SpinColourSpinColour matrix
-    typedef iSpinColourSpinColourMatrix<Complex  >    SpinColourSpinColourMatrix;
-    typedef iSpinColourSpinColourMatrix<ComplexF >    SpinColourSpinColourMatrixF;
-    typedef iSpinColourSpinColourMatrix<ComplexD >    SpinColourSpinColourMatrixD;
-
-    typedef iSpinColourSpinColourMatrix<vComplex >    vSpinColourSpinColourMatrix;
-    typedef iSpinColourSpinColourMatrix<vComplexF>    vSpinColourSpinColourMatrixF;
-    typedef iSpinColourSpinColourMatrix<vComplexD>    vSpinColourSpinColourMatrixD;
-
-    // SpinColourSpinColour matrix
-    typedef iSpinColourSpinColourMatrix<Complex  >    SpinColourSpinColourMatrix;
-    typedef iSpinColourSpinColourMatrix<ComplexF >    SpinColourSpinColourMatrixF;
-    typedef iSpinColourSpinColourMatrix<ComplexD >    SpinColourSpinColourMatrixD;
-
-    typedef iSpinColourSpinColourMatrix<vComplex >    vSpinColourSpinColourMatrix;
-    typedef iSpinColourSpinColourMatrix<vComplexF>    vSpinColourSpinColourMatrixF;
-    typedef iSpinColourSpinColourMatrix<vComplexD>    vSpinColourSpinColourMatrixD;

    // LorentzColour
    typedef iLorentzColourMatrix<Complex  > LorentzColourMatrix;
@@ -250,9 +229,6 @@ namespace QCD {
    typedef Lattice<vSpinColourMatrixF>     LatticeSpinColourMatrixF;
    typedef Lattice<vSpinColourMatrixD>     LatticeSpinColourMatrixD;

-    typedef Lattice<vSpinColourSpinColourMatrix>      LatticeSpinColourSpinColourMatrix;
-    typedef Lattice<vSpinColourSpinColourMatrixF>     LatticeSpinColourSpinColourMatrixF;
-    typedef Lattice<vSpinColourSpinColourMatrixD>     LatticeSpinColourSpinColourMatrixD;

    typedef Lattice<vLorentzColourMatrix>  LatticeLorentzColourMatrix;
    typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
--- a/Grid/qcd/action/fermion/CayleyFermion5D.cc
+++ b/Grid/qcd/action/fermion/CayleyFermion5D.cc
@@ -485,13 +485,9 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
    
  double bpc = b+c;
  double bmc = b-c;
-  _b = b;
-  _c = c;
-  _gamma  = gamma; // Save the parameters so we can change mass later.
-  _zolo_hi= zolo_hi;
  for(int i=0; i < Ls; i++){
    as[i] = 1.0;
-    omega[i] = _gamma[i]*_zolo_hi; //NB reciprocal relative to Chroma NEF code
+    omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
    assert(omega[i]!=Coeff_t(0.0));
    bs[i] = 0.5*(bpc/omega[i] + bmc);
    cs[i] = 0.5*(bpc/omega[i] - bmc);
--- a/Grid/qcd/action/fermion/CayleyFermion5D.h
+++ b/Grid/qcd/action/fermion/CayleyFermion5D.h
@@ -97,10 +97,7 @@ namespace Grid {
      // Support for MADWF tricks
      ///////////////////////////////////////////////////////////////
      RealD Mass(void) { return mass; };
-      void  SetMass(RealD _mass) { 
-	mass=_mass; 
-	SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c);  // Reset coeffs
-      } ;
+      void  SetMass(RealD _mass) { mass=_mass; } ;
      void  P(const FermionField &psi, FermionField &chi);
      void  Pdag(const FermionField &psi, FermionField &chi);

@@ -150,12 +147,6 @@ namespace Grid {
      //    protected:
      RealD mass;

-      // Save arguments to SetCoefficientsInternal
-      std::vector<Coeff_t> _gamma;
-      RealD                _zolo_hi;
-      RealD                _b;
-      RealD                _c;
-
      // Cayley form Moebius (tanh and zolotarev)
      std::vector<Coeff_t> omega;
      std::vector<Coeff_t> bs;    // S dependent coeffs
--- a/Grid/qcd/action/fermion/Fermion.h
+++ b/Grid/qcd/action/fermion/Fermion.h
@@ -80,24 +80,12 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 ///////////////////////////////////////////////////////////////////////////////
 #include <Grid/qcd/action/fermion/g5HermitianLinop.h>

-///////////////////////////////////////////////////////////////////////////////
-// Fourier accelerated Pauli Villars inverse support
-///////////////////////////////////////////////////////////////////////////////
-#include <Grid/qcd/action/fermion/WilsonTMFermion5D.h>   
-
-////////////////////////////////////////////////////////////////////////////////
-// Move this group to a DWF specific tools/algorithms subdir? 
-////////////////////////////////////////////////////////////////////////////////
-#include <Grid/qcd/action/fermion/FourierAcceleratedPV.h>
-#include <Grid/qcd/action/fermion/PauliVillarsInverters.h>
-#include <Grid/qcd/action/fermion/Reconstruct5Dprop.h>
-#include <Grid/qcd/action/fermion/MADWF.h>
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // More maintainable to maintain the following typedef list centrally, as more "impl" targets
 // are added, (e.g. extension for gparity, half precision project in comms etc..)
 ////////////////////////////////////////////////////////////////////////////////////////////////////

+
 // Cayley 5d
 namespace Grid {
  namespace QCD {
--- a/Grid/qcd/action/fermion/FermionOperator.h
+++ b/Grid/qcd/action/fermion/FermionOperator.h
@@ -68,6 +68,7 @@ namespace Grid {
      virtual int    ConstEE(void) { return 1; }; // clover returns zero as EE depends on gauge field
      virtual int    isTrivialEE(void) { return 0; };
      virtual RealD  Mass(void) {return 0.0;};
+      virtual void SetMass(RealD _mass) { return; };

      // half checkerboard operaions
      virtual void   Meooe       (const FermionField &in, FermionField &out)=0;
--- a/Grid/qcd/action/fermion/FermionOperatorImpl.h
+++ b/Grid/qcd/action/fermion/FermionOperatorImpl.h
@@ -141,7 +141,6 @@ namespace QCD {
  ////////////////////////////////////////////////////////////////////////
  
 #define INHERIT_FIMPL_TYPES(Impl)\
-  typedef Impl Impl_t;							\
  typedef typename Impl::FermionField           FermionField;		\
  typedef typename Impl::PropagatorField     PropagatorField;		\
  typedef typename Impl::DoubledGaugeField DoubledGaugeField;		\
--- a/Grid/qcd/action/fermion/FourierAcceleratedPV.h
+++ b/Grid/qcd/action/fermion/FourierAcceleratedPV.h
@@ -1,237 +0,0 @@
-
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/qcd/action/fermion/FourierAcceleratedPV.h
-
-    Copyright (C) 2015
-
-Author: Christoph Lehner (lifted with permission by Peter Boyle, brought back to Grid)
-Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#pragma once
-namespace Grid {
-namespace QCD {
-
-  template<typename M>
-    void get_real_const_bc(M& m, RealD& _b, RealD& _c) {
-    ComplexD b,c;
-    b=m.bs[0];
-    c=m.cs[0];
-    std::cout << GridLogMessage << "b=" << b << ", c=" << c << std::endl;
-    for (size_t i=1;i<m.bs.size();i++) {
-      assert(m.bs[i] == b);
-      assert(m.cs[i] == c);
-    }
-    assert(b.imag() == 0.0);
-    assert(c.imag() == 0.0);
-    _b = b.real();
-    _c = c.real();
-  }
-
-
-template<typename Vi, typename M, typename G>
-class FourierAcceleratedPV {
- public:
-
-  ConjugateGradient<Vi> &cg;
-  M& dwfPV;
-  G& Umu;
-  GridCartesian* grid5D;
-  GridRedBlackCartesian* gridRB5D;
-  int group_in_s;
-
-  FourierAcceleratedPV(M& _dwfPV, G& _Umu, ConjugateGradient<Vi> &_cg, int _group_in_s = 2) 
-   : dwfPV(_dwfPV), Umu(_Umu), cg(_cg), group_in_s(_group_in_s) 
-  {
-    assert( dwfPV.FermionGrid()->_fdimensions[0] % (2*group_in_s) == 0);
-    grid5D = QCD::SpaceTimeGrid::makeFiveDimGrid(2*group_in_s, (GridCartesian*)Umu._grid);
-    gridRB5D = QCD::SpaceTimeGrid::makeFiveDimRedBlackGrid(2*group_in_s, (GridCartesian*)Umu._grid);
-  }
-
-  void rotatePV(const Vi& _src, Vi& dst, bool forward) const {
-
-    GridStopWatch gsw1, gsw2;
-
-    typedef typename Vi::scalar_type Coeff_t;
-    int Ls = dst._grid->_fdimensions[0];
-
-    Vi _tmp(dst._grid);
-    double phase = M_PI / (double)Ls;
-    Coeff_t bzero(0.0,0.0);
-
-    FFT theFFT((GridCartesian*)dst._grid);
-
-    if (!forward) {
-      gsw1.Start();
-      for (int s=0;s<Ls;s++) {
-	Coeff_t a(::cos(phase*s),-::sin(phase*s));
-	axpby_ssp(_tmp,a,_src,bzero,_src,s,s);
-      }
-      gsw1.Stop();
-
-      gsw2.Start();
-      theFFT.FFT_dim(dst,_tmp,0,FFT::forward);
-      gsw2.Stop();
-
-    } else {
-
-      gsw2.Start();
-      theFFT.FFT_dim(_tmp,_src,0,FFT::backward);
-      gsw2.Stop();
-
-      gsw1.Start();
-      for (int s=0;s<Ls;s++) {
-	Coeff_t a(::cos(phase*s),::sin(phase*s));
-	axpby_ssp(dst,a,_tmp,bzero,_tmp,s,s);
-      }
-      gsw1.Stop();
-    }
-
-    std::cout << GridLogMessage << "Timing rotatePV: " << gsw1.Elapsed() << ", " << gsw2.Elapsed() << std::endl;
-
-  }
-
-  void pvInv(const Vi& _src, Vi& _dst) const {
-
-    std::cout << GridLogMessage << "Fourier-Accelerated Outer Pauli Villars"<<std::endl;
-
-    typedef typename Vi::scalar_type Coeff_t;
-    int Ls = _dst._grid->_fdimensions[0];
-
-    GridStopWatch gswT;
-    gswT.Start();
-
-    RealD b,c;
-    get_real_const_bc(dwfPV,b,c);
-    RealD M5 = dwfPV.M5;
-    
-    // U(true) Rightinv TMinv U(false) = Minv
-
-    Vi _src_diag(_dst._grid);
-    Vi _src_diag_slice(dwfPV.GaugeGrid());
-    Vi _dst_diag_slice(dwfPV.GaugeGrid());
-    Vi _src_diag_slices(grid5D);
-    Vi _dst_diag_slices(grid5D);
-    Vi _dst_diag(_dst._grid);
-
-    rotatePV(_src,_src_diag,false);
-
-    // now do TM solves
-    Gamma G5(Gamma::Algebra::Gamma5);
-
-    GridStopWatch gswA, gswB;
-
-    gswA.Start();
-
-    typedef typename M::Impl_t Impl;
-    //WilsonTMFermion<Impl> tm(x.Umu,*x.UGridF,*x.UrbGridF,0.0,0.0,solver_outer.parent.par.wparams_f);
-    std::vector<RealD> vmass(grid5D->_fdimensions[0],0.0);
-    std::vector<RealD> vmu(grid5D->_fdimensions[0],0.0);
-
-    WilsonTMFermion5D<Impl> tm(Umu,*grid5D,*gridRB5D,
-			   *(GridCartesian*)dwfPV.GaugeGrid(),
-			   *(GridRedBlackCartesian*)dwfPV.GaugeRedBlackGrid(),
-			   vmass,vmu);
-    
-    //SchurRedBlackDiagTwoSolve<Vi> sol(cg);
-    SchurRedBlackDiagMooeeSolve<Vi> sol(cg); // same performance as DiagTwo
-    gswA.Stop();
-
-    gswB.Start();
-
-    for (int sgroup=0;sgroup<Ls/2/group_in_s;sgroup++) {
-
-      for (int sidx=0;sidx<group_in_s;sidx++) {
-
-	int s = sgroup*group_in_s + sidx;
-	int sprime = Ls-s-1;
-
-	RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
-	RealD cosp = ::cos(phase);
-	RealD sinp = ::sin(phase);
-	RealD denom = b*b + c*c + 2.0*b*c*cosp;
-	RealD mass = -(b*b*M5 + c*(1.0 - cosp + c*M5) + b*(-1.0 + cosp + 2.0*c*cosp*M5))/denom;
-	RealD mu = (b+c)*sinp/denom;
-
-	vmass[2*sidx + 0] = mass;
-	vmass[2*sidx + 1] = mass;
-	vmu[2*sidx + 0] = mu;
-	vmu[2*sidx + 1] = -mu;
-
-      }
-
-      tm.update(vmass,vmu);
-
-      for (int sidx=0;sidx<group_in_s;sidx++) {
-
-	int s = sgroup*group_in_s + sidx;
-	int sprime = Ls-s-1;
-
-	ExtractSlice(_src_diag_slice,_src_diag,s,0);
-	InsertSlice(_src_diag_slice,_src_diag_slices,2*sidx + 0,0);
-
-	ExtractSlice(_src_diag_slice,_src_diag,sprime,0);
-	InsertSlice(_src_diag_slice,_src_diag_slices,2*sidx + 1,0);
-
-      }
-
-      GridStopWatch gsw;
-      gsw.Start();
-      _dst_diag_slices = zero; // zero guess
-      sol(tm,_src_diag_slices,_dst_diag_slices);
-      gsw.Stop();
-      std::cout << GridLogMessage << "Solve[sgroup=" << sgroup << "] completed in " << gsw.Elapsed() << ", " << gswA.Elapsed() << std::endl;
-
-      for (int sidx=0;sidx<group_in_s;sidx++) {
-
-	int s = sgroup*group_in_s + sidx;
-	int sprime = Ls-s-1;
-
-	RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
-	RealD cosp = ::cos(phase);
-	RealD sinp = ::sin(phase);
-
-	// now rotate with inverse of
-	Coeff_t pA = b + c*cosp;
-	Coeff_t pB = - Coeff_t(0.0,1.0)*c*sinp;
-	Coeff_t pABden = pA*pA - pB*pB;
-	// (pA + pB * G5) * (pA - pB*G5) = (pA^2 - pB^2)
-      
-	ExtractSlice(_dst_diag_slice,_dst_diag_slices,2*sidx + 0,0);
-	_dst_diag_slice = (pA/pABden) * _dst_diag_slice - (pB/pABden) * (G5 * _dst_diag_slice);
-	InsertSlice(_dst_diag_slice,_dst_diag,s,0);
-	
-	ExtractSlice(_dst_diag_slice,_dst_diag_slices,2*sidx + 1,0);
-	_dst_diag_slice = (pA/pABden) * _dst_diag_slice + (pB/pABden) * (G5 * _dst_diag_slice);
-	InsertSlice(_dst_diag_slice,_dst_diag,sprime,0);
-      }
-    }
-    gswB.Stop();
-
-    rotatePV(_dst_diag,_dst,true);
-
-    gswT.Stop();
-    std::cout << GridLogMessage << "PV completed in " << gswT.Elapsed() << " (Setup: " << gswA.Elapsed() << ", s-loop: " << gswB.Elapsed() << ")" << std::endl;
-  }
-
-};
-}}
--- a/Grid/qcd/action/fermion/MADWF.h
+++ b/Grid/qcd/action/fermion/MADWF.h
@@ -1,193 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/MADWF.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#pragma once
-
-namespace Grid {
-namespace QCD {
-
-template <class Fieldi, class Fieldo,IfNotSame<Fieldi,Fieldo> X=0>
-inline void convert(const Fieldi &from,Fieldo &to) 
-{
-  precisionChange(to,from);
-}
-template <class Fieldi, class Fieldo,IfSame<Fieldi,Fieldo> X=0>
-inline void convert(const Fieldi &from,Fieldo &to) 
-{
-  to=from;
-}
-
-template<class Matrixo,class Matrixi,class PVinverter,class SchurSolver, class Guesser> 
-class MADWF 
-{
- private:
-  typedef typename Matrixo::FermionField FermionFieldo;
-  typedef typename Matrixi::FermionField FermionFieldi;
-
-  PVinverter  & PauliVillarsSolvero;// For the outer field
-  SchurSolver & SchurSolveri;       // For the inner approx field
-  Guesser     & Guesseri;           // To deflate the inner approx solves
-
-  Matrixo & Mato;                   // Action object for outer
-  Matrixi & Mati;                   // Action object for inner
-
-  RealD target_resid;
-  int   maxiter;
- public:
-
-  MADWF(Matrixo &_Mato,
-	Matrixi &_Mati, 
-	PVinverter &_PauliVillarsSolvero, 
-	SchurSolver &_SchurSolveri,
-	Guesser & _Guesseri,
-	RealD resid,
-	int _maxiter) :
-
-  Mato(_Mato),Mati(_Mati),
-    SchurSolveri(_SchurSolveri),
-    PauliVillarsSolvero(_PauliVillarsSolvero),Guesseri(_Guesseri)
-  {   
-    target_resid=resid;
-    maxiter     =_maxiter; 
-  };
-
-  void operator() (const FermionFieldo &src4,FermionFieldo &sol5)
-  {
-    std::cout << GridLogMessage<< " ************************************************" << std::endl;
-    std::cout << GridLogMessage<< "  MADWF-like algorithm                           " << std::endl;
-    std::cout << GridLogMessage<< " ************************************************" << std::endl;
-
-    FermionFieldi    c0i(Mati.GaugeGrid()); // 4d 
-    FermionFieldi    y0i(Mati.GaugeGrid()); // 4d
-    FermionFieldo    c0 (Mato.GaugeGrid()); // 4d 
-    FermionFieldo    y0 (Mato.GaugeGrid()); // 4d
-
-    FermionFieldo    A(Mato.FermionGrid()); // Temporary outer
-    FermionFieldo    B(Mato.FermionGrid()); // Temporary outer
-    FermionFieldo    b(Mato.FermionGrid()); // 5d source
-
-    FermionFieldo    c(Mato.FermionGrid()); // PVinv source; reused so store
-    FermionFieldo    defect(Mato.FermionGrid()); // 5d source
-
-    FermionFieldi   ci(Mati.FermionGrid()); 
-    FermionFieldi   yi(Mati.FermionGrid()); 
-    FermionFieldi   xi(Mati.FermionGrid()); 
-    FermionFieldi srci(Mati.FermionGrid()); 
-    FermionFieldi   Ai(Mati.FermionGrid()); 
-
-    RealD m=Mati.Mass();
-
-    ///////////////////////////////////////
-    //Import source, include Dminus factors
-    ///////////////////////////////////////
-    Mato.ImportPhysicalFermionSource(src4,b); 
-    std::cout << GridLogMessage << " src4 " <<norm2(src4)<<std::endl;
-    std::cout << GridLogMessage << " b    " <<norm2(b)<<std::endl;
-
-    defect = b;
-    sol5=zero;
-    for (int i=0;i<maxiter;i++) {
-
-      ///////////////////////////////////////
-      // Set up c0 from current defect
-      ///////////////////////////////////////
-      PauliVillarsSolvero(Mato,defect,A);
-      Mato.Pdag(A,c);
-      ExtractSlice(c0, c, 0 , 0);
-
-      ////////////////////////////////////////////////
-      // Solve the inner system with surface term c0
-      ////////////////////////////////////////////////
-      ci = zero;  
-      convert(c0,c0i); // Possible precison change
-      InsertSlice(c0i,ci,0, 0);
-
-      // Dwm P y = Dwm x = D(1) P (c0,0,0,0)^T
-      Mati.P(ci,Ai);
-      Mati.SetMass(1.0);      Mati.M(Ai,srci);      Mati.SetMass(m);
-      SchurSolveri(Mati,srci,xi,Guesseri); 
-      Mati.Pdag(xi,yi);
-      ExtractSlice(y0i, yi, 0 , 0);
-      convert(y0i,y0); // Possible precision change
-
-      //////////////////////////////////////
-      // Propagate solution back to outer system
-      // Build Pdag PV^-1 Dm P [-sol4,c2,c3... cL]
-      //////////////////////////////////////
-      c0 = - y0;
-      InsertSlice(c0, c, 0   , 0);
-
-      /////////////////////////////
-      // Reconstruct the bulk solution Pdag PV^-1 Dm P 
-      /////////////////////////////
-      Mato.P(c,B);
-      Mato.M(B,A);
-      PauliVillarsSolvero(Mato,A,B);
-      Mato.Pdag(B,A);
-
-      //////////////////////////////
-      // Reinsert surface prop
-      //////////////////////////////
-      InsertSlice(y0,A,0,0);
-
-      //////////////////////////////
-      // Convert from y back to x 
-      //////////////////////////////
-      Mato.P(A,B);
-
-      //         sol5' = sol5 + M^-1 defect
-      //               = sol5 + M^-1 src - M^-1 M sol5  ...
-      sol5 = sol5 + B;
-      std::cout << GridLogMessage << "***************************************" <<std::endl;
-      std::cout << GridLogMessage << " Sol5 update "<<std::endl;
-      std::cout << GridLogMessage << "***************************************" <<std::endl;
-      std::cout << GridLogMessage << " Sol5 now "<<norm2(sol5)<<std::endl;
-      std::cout << GridLogMessage << " delta    "<<norm2(B)<<std::endl;
-
-       // New defect  = b - M sol5
-       Mato.M(sol5,A);
-       defect = b - A;
-
-       std::cout << GridLogMessage << " defect   "<<norm2(defect)<<std::endl;
-
-       double resid = ::sqrt(norm2(defect) / norm2(b));
-       std::cout << GridLogMessage << "Residual " << i << ": " << resid  << std::endl;
-       std::cout << GridLogMessage << "***************************************" <<std::endl;
-
-       if (resid < target_resid) {
-	 return;
-       }
-    }
-
-    std::cout << GridLogMessage << "MADWF : Exceeded maxiter "<<std::endl;
-    assert(0);
-
-  }
-
-};
-
-}}
--- a/Grid/qcd/action/fermion/PauliVillarsInverters.h
+++ b/Grid/qcd/action/fermion/PauliVillarsInverters.h
@@ -1,95 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/SchurRedBlack.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#pragma once
-
-namespace Grid {
-namespace QCD {
-
-template<class Field>
-class PauliVillarsSolverUnprec
-{
- public:
-  ConjugateGradient<Field> & CG;
-  PauliVillarsSolverUnprec(  ConjugateGradient<Field> &_CG) : CG(_CG){};
-
-  template<class Matrix>
-  void operator() (Matrix &_Matrix,const Field &src,Field &sol)
-  {
-    RealD m = _Matrix.Mass();
-    Field A  (_Matrix.FermionGrid());
-
-    MdagMLinearOperator<Matrix,Field> HermOp(_Matrix);
-
-    _Matrix.SetMass(1.0);
-    _Matrix.Mdag(src,A);
-    CG(HermOp,A,sol);
-    _Matrix.SetMass(m);
-  };
-};
-
-template<class Field,class SchurSolverType>
-class PauliVillarsSolverRBprec
-{
- public:
-  SchurSolverType & SchurSolver;
-  PauliVillarsSolverRBprec( SchurSolverType &_SchurSolver) : SchurSolver(_SchurSolver){};
-
-  template<class Matrix>
-  void operator() (Matrix &_Matrix,const Field &src,Field &sol)
-  {
-    RealD m = _Matrix.Mass();
-    Field A  (_Matrix.FermionGrid());
-
-    _Matrix.SetMass(1.0);
-    SchurSolver(_Matrix,src,sol);
-    _Matrix.SetMass(m);
-  };
-};
-
-template<class Field,class GaugeField>
-class PauliVillarsSolverFourierAccel
-{
- public:
-  GaugeField      & Umu;
-  ConjugateGradient<Field> & CG;
-
-  PauliVillarsSolverFourierAccel(GaugeField &_Umu,ConjugateGradient<Field> &_CG) :  Umu(_Umu), CG(_CG)
-  {
-  };
-
-  template<class Matrix>
-  void operator() (Matrix &_Matrix,const Field &src,Field &sol)
-  {
-    FourierAcceleratedPV<Field, Matrix, typename Matrix::GaugeField > faPV(_Matrix,Umu,CG) ;
-    faPV.pvInv(src,sol);
-  };
-};
-
-
-}
-}
--- a/Grid/qcd/action/fermion/WilsonTMFermion5D.h
+++ b/Grid/qcd/action/fermion/WilsonTMFermion5D.h
@@ -1,155 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/qcd/action/fermion/WilsonTMFermion5D.h
-
-    Copyright (C) 2015
-
-Author: paboyle <paboyle@ph.ed.ac.uk> ; NB Christoph did similar in GPT
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#pragma once 
-
-#include <Grid/qcd/action/fermion/FermionCore.h>
-#include <Grid/qcd/action/fermion/WilsonFermion.h>
-
-
-namespace Grid {
-
-  namespace QCD {
-    
-    template<class Impl>
-      class WilsonTMFermion5D : public WilsonFermion5D<Impl>
-      {
-      public:
-	INHERIT_IMPL_TYPES(Impl);
-      public:
-
-	virtual void   Instantiatable(void) {};
-
-	// Constructors
-        WilsonTMFermion5D(GaugeField &_Umu,
-			  GridCartesian         &Fgrid,
-			  GridRedBlackCartesian &Frbgrid, 
-			  GridCartesian         &Ugrid,
-			  GridRedBlackCartesian &Urbgrid, 
-			  const std::vector<RealD> _mass,
-			  const std::vector<RealD> _mu,
-			  const ImplParams &p= ImplParams()
-			  ) :
-	WilsonFermion5D<Impl>(_Umu,
-			      Fgrid,
-			      Frbgrid,
-			      Ugrid,
-			      Urbgrid,
-			      4.0,p)
-	
-	  {
-	    update(_mass,_mu);
-	  }
-
-	virtual void Meooe(const FermionField &in, FermionField &out) {
-	  if (in.checkerboard == Odd) {
-	    this->DhopEO(in, out, DaggerNo);
-	  } else {
-	    this->DhopOE(in, out, DaggerNo);
-	  }
-	}
-
-	virtual void MeooeDag(const FermionField &in, FermionField &out) {
-	  if (in.checkerboard == Odd) {
-	    this->DhopEO(in, out, DaggerYes);
-	  } else {
-	    this->DhopOE(in, out, DaggerYes);
-	  }
-	}	
-	
-	// allow override for twisted mass and clover
-	virtual void Mooee(const FermionField &in, FermionField &out) {
-	  out.checkerboard = in.checkerboard;
-	  //axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
-	  for (int s=0;s<(int)this->mass.size();s++) {
-	    ComplexD a = 4.0+this->mass[s];
-	    ComplexD b(0.0,this->mu[s]);
-	    axpbg5y_ssp(out,a,in,b,in,s,s);
-	  }
-	}
-
-	virtual void MooeeDag(const FermionField &in, FermionField &out) {
-	  out.checkerboard = in.checkerboard;
-	  for (int s=0;s<(int)this->mass.size();s++) {
-	    ComplexD a = 4.0+this->mass[s];
-	    ComplexD b(0.0,-this->mu[s]);
-	    axpbg5y_ssp(out,a,in,b,in,s,s);
-	  }
-	}
-	virtual void MooeeInv(const FermionField &in, FermionField &out) {
-	  for (int s=0;s<(int)this->mass.size();s++) {
-	    RealD m    = this->mass[s];
-	    RealD tm   = this->mu[s];
-	    RealD mtil = 4.0+this->mass[s];
-	    RealD sq   = mtil*mtil+tm*tm;
-	    ComplexD a    = mtil/sq;
-	    ComplexD b(0.0, -tm /sq);
-	    axpbg5y_ssp(out,a,in,b,in,s,s);
-	  }
-	}
-	virtual void MooeeInvDag(const FermionField &in, FermionField &out) {
-	  for (int s=0;s<(int)this->mass.size();s++) {
-	    RealD m    = this->mass[s];
-	    RealD tm   = this->mu[s];
-	    RealD mtil = 4.0+this->mass[s];
-	    RealD sq   = mtil*mtil+tm*tm;
-	    ComplexD a    = mtil/sq;
-	    ComplexD b(0.0,tm /sq);
-	    axpbg5y_ssp(out,a,in,b,in,s,s);
-	  }
-	}
-
-	virtual RealD M(const FermionField &in, FermionField &out) {
-	  out.checkerboard = in.checkerboard;
-	  this->Dhop(in, out, DaggerNo);
-	  FermionField tmp(out._grid);
-	  for (int s=0;s<(int)this->mass.size();s++) {
-	    ComplexD a = 4.0+this->mass[s];
-	    ComplexD b(0.0,this->mu[s]);
-	    axpbg5y_ssp(tmp,a,in,b,in,s,s);
-	  }
-	  return axpy_norm(out, 1.0, tmp, out);
-	}
-	
-	// needed for fast PV
-	void update(const std::vector<RealD>& _mass, const std::vector<RealD>& _mu) {
-	  assert(_mass.size() == _mu.size());
-	  assert(_mass.size() == this->FermionGrid()->_fdimensions[0]);
-	  this->mass = _mass;
-	  this->mu = _mu;
-	}
-	
-      private:
-	std::vector<RealD> mu;
-	std::vector<RealD> mass;
-	
-      };
-   
-    typedef WilsonTMFermion5D<WilsonImplF> WilsonTMFermion5DF; 
-    typedef WilsonTMFermion5D<WilsonImplD> WilsonTMFermion5DD; 
-
-}}
--- a/Grid/qcd/utils/A2Autils.h
+++ b/Grid/qcd/utils/A2Autils.h
@@ -27,13 +27,12 @@ public:

  typedef iSpinColourMatrix<vector_type> SpinColourMatrix_v;

-  template <typename TensorType> // output: rank 5 tensor, e.g. Eigen::Tensor<ComplexD, 5>
-  static void MesonField(TensorType &mat, 
+  static void MesonField(Eigen::Tensor<ComplexD,5> &mat, 
 			 const FermionField *lhs_wi,
 			 const FermionField *rhs_vj,
 			 std::vector<Gamma::Algebra> gammas,
 			 const std::vector<ComplexField > &mom,
-			 int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr);
+			 int orthogdim);

  static void PionFieldWVmom(Eigen::Tensor<ComplexD,4> &mat, 
 			     const FermionField *wi,
@@ -60,14 +59,6 @@ public:
 			  const FermionField *vj,
 			  int orthogdim);

-  template <typename TensorType> // output: rank 5 tensor, e.g. Eigen::Tensor<ComplexD, 5>
-  static void AslashField(TensorType &mat, 
-        const FermionField *lhs_wi,
-        const FermionField *rhs_vj,
-        const std::vector<ComplexField> &emB0,
-        const std::vector<ComplexField> &emB1,
-        int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr);
-
  static void ContractWWVV(std::vector<PropagatorField> &WWVV,
 			   const Eigen::Tensor<ComplexD,3> &WW_sd,
 			   const FermionField *vs,
@@ -101,14 +92,13 @@ public:
 #endif
 };

-template <class FImpl>
-template <typename TensorType>
-void A2Autils<FImpl>::MesonField(TensorType &mat, 
+template<class FImpl>
+void A2Autils<FImpl>::MesonField(Eigen::Tensor<ComplexD,5> &mat, 
 				 const FermionField *lhs_wi,
 				 const FermionField *rhs_vj,
 				 std::vector<Gamma::Algebra> gammas,
 				 const std::vector<ComplexField > &mom,
-				 int orthogdim, double *t_kernel, double *t_gsum) 
+				 int orthogdim) 
 {
  typedef typename FImpl::SiteSpinor vobj;

@@ -156,7 +146,6 @@ void A2Autils<FImpl>::MesonField(TensorType &mat,
  int stride=grid->_slice_stride[orthogdim];

  // potentially wasting cores here if local time extent too small
-  if (t_kernel) *t_kernel = -usecond();
  parallel_for(int r=0;r<rd;r++){

    int so=r*grid->_ostride[orthogdim]; // base offset for start of plane 
@@ -223,7 +212,7 @@ void A2Autils<FImpl>::MesonField(TensorType &mat,
      }
    }}}
  }
-  if (t_kernel) *t_kernel += usecond();
+
  assert(mat.dimension(0) == Nmom);
  assert(mat.dimension(1) == Ngamma);
  assert(mat.dimension(2) == Nt);
@@ -267,9 +256,9 @@ void A2Autils<FImpl>::MesonField(TensorType &mat,
  // Vector size is 7 x 16 x 32 x 16 x 16 x sizeof(complex) = 2MB - 60MB depending on volume
  // Healthy size that should suffice
  ////////////////////////////////////////////////////////////////////
-  if (t_gsum) *t_gsum = -usecond();
+
  grid->GlobalSumVector(&mat(0,0,0,0,0),Nmom*Ngamma*Nt*Lblock*Rblock);
-  if (t_gsum) *t_gsum += usecond();
+
 }


@@ -625,189 +614,6 @@ void A2Autils<FImpl>::PionFieldVV(Eigen::Tensor<ComplexD,3> &mat,
  PionFieldXX(mat,vi,vj,orthogdim,nog5);
 }

-// "A-slash" field w_i(x)^dag * i * A_mu * gamma_mu * v_j(x)
-//
-// With:
-//
-// B_0 = A_0 + i A_1
-// B_1 = A_2 + i A_3
-// 
-// then in spin space
-// 
-//                 ( 0          0          -conj(B_1) -B_0 )
-// i * A_mu g_mu = ( 0          0          -conj(B_0)  B_1 )
-//                 ( B_1        B_0        0          0    )
-//                 ( conj(B_0)  -conj(B_1) 0          0    )
-template <class FImpl>
-template <typename TensorType>
-void A2Autils<FImpl>::AslashField(TensorType &mat, 
-          const FermionField *lhs_wi,
-          const FermionField *rhs_vj,
-          const std::vector<ComplexField> &emB0,
-          const std::vector<ComplexField> &emB1,
-          int orthogdim, double *t_kernel, double *t_gsum) 
-{
-    typedef typename FermionField::vector_object vobj;
-    typedef typename vobj::scalar_object         sobj;
-    typedef typename vobj::scalar_type           scalar_type;
-    typedef typename vobj::vector_type           vector_type;
-
-    typedef iSpinMatrix<vector_type> SpinMatrix_v;
-    typedef iSpinMatrix<scalar_type> SpinMatrix_s;
-    typedef iSinglet<vector_type>    Singlet_v;
-    typedef iSinglet<scalar_type>    Singlet_s;
-    
-    int Lblock = mat.dimension(3); 
-    int Rblock = mat.dimension(4);
-
-    GridBase *grid = lhs_wi[0]._grid;
-    
-    const int    Nd = grid->_ndimension;
-    const int Nsimd = grid->Nsimd();
-
-    int Nt  = grid->GlobalDimensions()[orthogdim];
-    int Nem = emB0.size();
-    assert(emB1.size() == Nem);
-
-    int fd=grid->_fdimensions[orthogdim];
-    int ld=grid->_ldimensions[orthogdim];
-    int rd=grid->_rdimensions[orthogdim];
-
-    // will locally sum vectors first
-    // sum across these down to scalars
-    // splitting the SIMD
-    int MFrvol = rd*Lblock*Rblock*Nem;
-    int MFlvol = ld*Lblock*Rblock*Nem;
-
-    Vector<vector_type> lvSum(MFrvol);
-    parallel_for (int r = 0; r < MFrvol; r++)
-    {
-        lvSum[r] = zero;
-    }
-
-    Vector<scalar_type> lsSum(MFlvol);             
-    parallel_for (int r = 0; r < MFlvol; r++)
-    {
-        lsSum[r] = scalar_type(0.0);
-    }
-
-    int e1=    grid->_slice_nblock[orthogdim];
-    int e2=    grid->_slice_block [orthogdim];
-    int stride=grid->_slice_stride[orthogdim];
-
-    // Nested parallelism would be ok
-    // Wasting cores here. Test case r
-    if (t_kernel) *t_kernel = -usecond();
-    parallel_for(int r=0;r<rd;r++)
-    {
-        int so=r*grid->_ostride[orthogdim]; // base offset for start of plane 
-
-        for(int n=0;n<e1;n++)
-        for(int b=0;b<e2;b++)
-        {
-            int ss= so+n*stride+b;
-
-            for(int i=0;i<Lblock;i++)
-            {
-                auto left = conjugate(lhs_wi[i]._odata[ss]);
-
-                for(int j=0;j<Rblock;j++)
-                {
-                    SpinMatrix_v vv;
-                    auto right = rhs_vj[j]._odata[ss];
-
-                    for(int s1=0;s1<Ns;s1++)
-                    for(int s2=0;s2<Ns;s2++)
-                    {
-                        vv()(s1,s2)() = left()(s2)(0) * right()(s1)(0)
-                                        + left()(s2)(1) * right()(s1)(1)
-                                        + left()(s2)(2) * right()(s1)(2);
-                    }
-                    
-                    // After getting the sitewise product do the mom phase loop
-                    int base = Nem*i+Nem*Lblock*j+Nem*Lblock*Rblock*r;
-
-                    for ( int m=0;m<Nem;m++)
-                    {
-                        int idx  = m+base;
-                        auto b0  = emB0[m]._odata[ss];
-                        auto b1  = emB1[m]._odata[ss];
-                        auto cb0 = conjugate(b0);
-                        auto cb1 = conjugate(b1);
-
-                        lvSum[idx] += - vv()(3,0)()*b0()()()  - vv()(2,0)()*cb1()()()
-                                      + vv()(3,1)()*b1()()()  - vv()(2,1)()*cb0()()()
-                                      + vv()(0,2)()*b1()()()  + vv()(1,2)()*b0()()()
-                                      + vv()(0,3)()*cb0()()() - vv()(1,3)()*cb1()()();
-                    }
-                }
-            }
-        }
-    }
-
-    // Sum across simd lanes in the plane, breaking out orthog dir.
-    parallel_for(int rt=0;rt<rd;rt++)
-    {
-        std::vector<int> icoor(Nd);
-        std::vector<scalar_type> extracted(Nsimd);               
-
-        for(int i=0;i<Lblock;i++)
-        for(int j=0;j<Rblock;j++)
-        for(int m=0;m<Nem;m++)
-        {
-
-            int ij_rdx = m+Nem*i+Nem*Lblock*j+Nem*Lblock*Rblock*rt;
-
-            extract<vector_type,scalar_type>(lvSum[ij_rdx],extracted);
-            for(int idx=0;idx<Nsimd;idx++)
-            {
-                grid->iCoorFromIindex(icoor,idx);
-
-                int ldx    = rt+icoor[orthogdim]*rd;
-                int ij_ldx = m+Nem*i+Nem*Lblock*j+Nem*Lblock*Rblock*ldx;
-
-                lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx];
-            }
-        }
-    }
-    if (t_kernel) *t_kernel += usecond();
-
-    // ld loop and local only??
-    int pd = grid->_processors[orthogdim];
-    int pc = grid->_processor_coor[orthogdim];
-    parallel_for_nest2(int lt=0;lt<ld;lt++)
-    {
-        for(int pt=0;pt<pd;pt++)
-        {
-            int t = lt + pt*ld;
-            if (pt == pc)
-            {
-                for(int i=0;i<Lblock;i++)
-                for(int j=0;j<Rblock;j++)
-                for(int m=0;m<Nem;m++)
-                {
-                    int ij_dx = m+Nem*i + Nem*Lblock * j + Nem*Lblock * Rblock * lt;
-
-                    mat(m,0,t,i,j) = lsSum[ij_dx];
-                }
-            } 
-            else 
-            { 
-                const scalar_type zz(0.0);
-
-                for(int i=0;i<Lblock;i++)
-                for(int j=0;j<Rblock;j++)
-                for(int m=0;m<Nem;m++)
-                {
-                    mat(m,0,t,i,j) = zz;
-                }
-            }
-        }
-    }
-    if (t_gsum) *t_gsum = -usecond();
-    grid->GlobalSumVector(&mat(0,0,0,0,0),Nem*Nt*Lblock*Rblock);
-    if (t_gsum) *t_gsum += usecond();
-}

 ////////////////////////////////////////////
 // Schematic thoughts about more generalised four quark insertion
--- a/Grid/util/Sha.h
+++ b/Grid/util/Sha.h
@@ -38,21 +38,7 @@ public:
  {
    return ::crc32(0L,(unsigned char *)data,bytes);
  }
-  template <typename T>
-  static inline std::string sha256_string(const std::vector<T> &hash)
-  {
-    std::stringstream sha;
-    std::string       s;
-
-    for(unsigned int i = 0; i < hash.size(); i++) 
-    { 
-        sha << std::hex << static_cast<unsigned int>(hash[i]);
-    }
-    s = sha.str();
-
-    return s;
-  }
-  static inline std::vector<unsigned char> sha256(const void *data,size_t bytes)
+  static inline std::vector<unsigned char> sha256(void *data,size_t bytes)
  {
    std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH);
    SHA256_CTX sha256;
--- a/Hadrons/A2AMatrix.hpp
+++ b/Hadrons/A2AMatrix.hpp
@@ -7,7 +7,6 @@ Source file: Hadrons/A2AMatrix.hpp
 Copyright (C) 2015-2018

 Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -30,128 +29,38 @@ See the full license in the file "LICENSE" in the top level distribution directo
 #define A2A_Matrix_hpp_

 #include <Hadrons/Global.hpp>
-#include <Hadrons/TimerArray.hpp>
-#include <Grid/Eigen/unsupported/CXX11/Tensor>
-
-#ifndef HADRONS_A2AM_NAME 
-#define HADRONS_A2AM_NAME "a2aMatrix"
-#endif
-
-#define HADRONS_A2AM_PARALLEL_IO

 BEGIN_HADRONS_NAMESPACE

-// general A2A matrix set based on Eigen tensors and Grid-allocated memory
-// Dimensions:
-//   0 - ext - external field (momentum, EM field, ...)
-//   1 - str - spin-color structure
-//   2 - t   - timeslice
-//   3 - i   - left  A2A mode index
-//   4 - j   - right A2A mode index
-template <typename T>
-using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>;
-
-/******************************************************************************
- *                      Abstract class for A2A kernels                        *
- ******************************************************************************/
-template <typename T, typename Field>
-class A2AKernel
-{
-public:
-    A2AKernel(void) = default;
-    virtual ~A2AKernel(void) = default;
-    virtual void operator()(A2AMatrixSet<T> &m, const Field *left, const Field *right,
-                          const unsigned int orthogDim, double &time) = 0;
-    virtual double flops(const unsigned int blockSizei, const unsigned int blockSizej) = 0;
-    virtual double bytes(const unsigned int blockSizei, const unsigned int blockSizej) = 0;
-};
-
-/******************************************************************************
- *                  Class to handle A2A matrix block HDF5 I/O                 *
- ******************************************************************************/
-template <typename T>
+template <typename T, typename MetadataType>
 class A2AMatrixIo
 {
 public:
-    // constructors
    A2AMatrixIo(void) = default;
    A2AMatrixIo(std::string filename, std::string dataname, 
                const unsigned int nt, const unsigned int ni,
                const unsigned int nj);
-    // destructor
    ~A2AMatrixIo(void) = default;
-    // file allocation
-    template <typename MetadataType>
    void initFile(const MetadataType &d, const unsigned int chunkSize);
-    // block I/O
    void saveBlock(const T *data, const unsigned int i, const unsigned int j,
                   const unsigned int blockSizei, const unsigned int blockSizej);
-    void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str,
-                   const unsigned int i, const unsigned int j);
 private:
    std::string  filename_, dataname_;
    unsigned int nt_, ni_, nj_;
 };

-/******************************************************************************
- *                  Wrapper for A2A matrix block computation                  *
- ******************************************************************************/
-template <typename T, typename Field, typename MetadataType, typename TIo = T>
-class A2AMatrixBlockComputation
-{
-private:
-    struct IoHelper
-    {
-        A2AMatrixIo<TIo> io;
-        MetadataType     md;
-        unsigned int     e, s, i, j;
-    };
-    typedef std::function<std::string(const unsigned int, const unsigned int)>  FilenameFn;
-    typedef std::function<MetadataType(const unsigned int, const unsigned int)> MetadataFn;
-public:
-    // constructor
-    A2AMatrixBlockComputation(GridBase *grid,
-                              const unsigned int orthogDim,
-                              const unsigned int next,
-                              const unsigned int nstr,
-                              const unsigned int blockSize,
-                              const unsigned int cacheBlockSize,
-                              TimerArray *tArray = nullptr);
-    // execution
-    void execute(const std::vector<Field> &left, 
-                 const std::vector<Field> &right,
-                 A2AKernel<T, Field> &kernel,
-                 const FilenameFn &ionameFn,
-                 const FilenameFn &filenameFn,
-                 const MetadataFn &metadataFn);
-private:
-    // I/O handler
-    void saveBlock(const A2AMatrixSet<TIo> &m, IoHelper &h);
-private:
-    TimerArray            *tArray_;
-    GridBase              *grid_;
-    unsigned int          orthogDim_, nt_, next_, nstr_, blockSize_, cacheBlockSize_;
-    Vector<T>             mCache_;
-    Vector<TIo>           mBuf_;
-    std::vector<IoHelper> nodeIo_;
-};
-
-/******************************************************************************
- *                     A2AMatrixIo template implementation                    *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename T>
-A2AMatrixIo<T>::A2AMatrixIo(std::string filename, std::string dataname, 
-                            const unsigned int nt, const unsigned int ni,
-                            const unsigned int nj)
+template <typename T, typename MetadataType>
+A2AMatrixIo<T, MetadataType>::A2AMatrixIo(std::string filename, 
+                                          std::string dataname, 
+                                          const unsigned int nt, 
+                                          const unsigned int ni,
+                                          const unsigned int nj)
 : filename_(filename), dataname_(dataname)
 , nt_(nt), ni_(ni), nj_(nj)
 {}

-// file allocation /////////////////////////////////////////////////////////////
-template <typename T>
-template <typename MetadataType>
-void A2AMatrixIo<T>::initFile(const MetadataType &d, const unsigned int chunkSize)
+template <typename T, typename MetadataType>
+void A2AMatrixIo<T, MetadataType>::initFile(const MetadataType &d, const unsigned int chunkSize)
 {
 #ifdef HAVE_HDF5
    std::vector<hsize_t>    dim = {static_cast<hsize_t>(nt_), 
@@ -176,19 +85,18 @@ void A2AMatrixIo<T>::initFile(const MetadataType &d, const unsigned int chunkSiz
    push(reader, dataname_);
    auto &group = reader.getGroup();
    plist.setChunk(chunk.size(), chunk.data());
-    dataset = group.createDataSet(HADRONS_A2AM_NAME, Hdf5Type<T>::type(), dataspace, plist);
+    dataset = group.createDataSet("data", Hdf5Type<T>::type(), dataspace, plist);
 #else
    HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
 #endif
 }

-// block I/O ///////////////////////////////////////////////////////////////////
-template <typename T>
-void A2AMatrixIo<T>::saveBlock(const T *data, 
-                               const unsigned int i, 
-                               const unsigned int j,
-                               const unsigned int blockSizei,
-                               const unsigned int blockSizej)
+template <typename T, typename MetadataType>
+void A2AMatrixIo<T, MetadataType>::saveBlock(const T *data, 
+                                             const unsigned int i, 
+                                             const unsigned int j,
+                                             const unsigned int blockSizei,
+                                             const unsigned int blockSizej)
 {
 #ifdef HAVE_HDF5
    Hdf5Reader           reader(filename_);
@@ -203,7 +111,7 @@ void A2AMatrixIo<T>::saveBlock(const T *data,

    push(reader, dataname_);
    auto &group = reader.getGroup();
-    dataset     = group.openDataSet(HADRONS_A2AM_NAME);
+    dataset     = group.openDataSet("data");
    dataspace   = dataset.getSpace();
    dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
                              stride.data(), block.data());
@@ -213,193 +121,6 @@ void A2AMatrixIo<T>::saveBlock(const T *data,
 #endif
 }

-template <typename T>
-void A2AMatrixIo<T>::saveBlock(const A2AMatrixSet<T> &m,
-                               const unsigned int ext, const unsigned int str,
-                               const unsigned int i, const unsigned int j)
-{
-    unsigned int blockSizei = m.dimension(3);
-    unsigned int blockSizej = m.dimension(4);
-    unsigned int nstr       = m.dimension(1);
-    size_t       offset     = (ext*nstr + str)*nt_*blockSizei*blockSizej;
-
-    saveBlock(m.data() + offset, i, j, blockSizei, blockSizej);
-}
-
-/******************************************************************************
- *               A2AMatrixBlockComputation template implementation            *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename T, typename Field, typename MetadataType, typename TIo>
-A2AMatrixBlockComputation<T, Field, MetadataType, TIo>
-::A2AMatrixBlockComputation(GridBase *grid,
-                            const unsigned int orthogDim,
-                            const unsigned int next, 
-                            const unsigned int nstr,
-                            const unsigned int blockSize, 
-                            const unsigned int cacheBlockSize,
-                            TimerArray *tArray)
-: grid_(grid), nt_(grid->GlobalDimensions()[orthogDim]), orthogDim_(orthogDim)
-, next_(next), nstr_(nstr), blockSize_(blockSize), cacheBlockSize_(cacheBlockSize)
-, tArray_(tArray)
-{
-    mCache_.resize(nt_*next_*nstr_*cacheBlockSize_*cacheBlockSize_);
-    mBuf_.resize(nt_*next_*nstr_*blockSize_*blockSize_);
-}
-
-#define START_TIMER(name) if (tArray_) tArray_->startTimer(name)
-#define STOP_TIMER(name)  if (tArray_) tArray_->stopTimer(name)
-#define GET_TIMER(name)   ((tArray_ != nullptr) ? tArray_->getDTimer(name) : 0.)
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename T, typename Field, typename MetadataType, typename TIo>
-void A2AMatrixBlockComputation<T, Field, MetadataType, TIo>
-::execute(const std::vector<Field> &left, const std::vector<Field> &right,
-          A2AKernel<T, Field> &kernel, const FilenameFn &ionameFn,
-          const FilenameFn &filenameFn, const MetadataFn &metadataFn)
-{
-    //////////////////////////////////////////////////////////////////////////
-    // i,j   is first  loop over blockSize_ factors
-    // ii,jj is second loop over cacheBlockSize_ factors for high perf contractions
-    // iii,jjj are loops within cacheBlock
-    // Total index is sum of these  i+ii+iii etc...
-    //////////////////////////////////////////////////////////////////////////
-    int    N_i = left.size();
-    int    N_j = right.size();
-    double flops, bytes, t_kernel;
-    double nodes = grid_->NodeCount();
-    
-    int NBlock_i = N_i/blockSize_ + (((N_i % blockSize_) != 0) ? 1 : 0);
-    int NBlock_j = N_j/blockSize_ + (((N_j % blockSize_) != 0) ? 1 : 0);
-
-    for(int i=0;i<N_i;i+=blockSize_)
-    for(int j=0;j<N_j;j+=blockSize_)
-    {
-        // Get the W and V vectors for this block^2 set of terms
-        int N_ii = MIN(N_i-i,blockSize_);
-        int N_jj = MIN(N_j-j,blockSize_);
-        A2AMatrixSet<TIo> mBlock(mBuf_.data(), next_, nstr_, nt_, N_ii, N_jj);
-
-        LOG(Message) << "All-to-all matrix block " 
-                     << j/blockSize_ + NBlock_j*i/blockSize_ + 1 
-                     << "/" << NBlock_i*NBlock_j << " [" << i <<" .. " 
-                     << i+N_ii-1 << ", " << j <<" .. " << j+N_jj-1 << "]" 
-                     << std::endl;
-        // Series of cache blocked chunks of the contractions within this block
-        flops    = 0.0;
-        bytes    = 0.0;
-        t_kernel = 0.0;
-        for(int ii=0;ii<N_ii;ii+=cacheBlockSize_)
-        for(int jj=0;jj<N_jj;jj+=cacheBlockSize_)
-        {
-            double t;
-            int N_iii = MIN(N_ii-ii,cacheBlockSize_);
-            int N_jjj = MIN(N_jj-jj,cacheBlockSize_);
-            A2AMatrixSet<T> mCacheBlock(mCache_.data(), next_, nstr_, nt_, N_iii, N_jjj);
-
-            START_TIMER("kernel");
-            kernel(mCacheBlock, &left[i+ii], &right[j+jj], orthogDim_, t);
-            STOP_TIMER("kernel");
-            t_kernel += t;
-            flops    += kernel.flops(N_iii, N_jjj);
-            bytes    += kernel.bytes(N_iii, N_jjj);
-
-            START_TIMER("cache copy");
-            parallel_for_nest5(int e =0;e<next_;e++)
-            for(int s =0;s< nstr_;s++)
-            for(int t =0;t< nt_;t++)
-            for(int iii=0;iii< N_iii;iii++)
-            for(int jjj=0;jjj< N_jjj;jjj++)
-            {
-                mBlock(e,s,t,ii+iii,jj+jjj) = mCacheBlock(e,s,t,iii,jjj);
-            }
-            STOP_TIMER("cache copy");
-        }
-
-        // perf
-        LOG(Message) << "Kernel perf " << flops/t_kernel/1.0e3/nodes 
-                     << " Gflop/s/node " << std::endl;
-        LOG(Message) << "Kernel perf " << bytes/t_kernel*1.0e6/1024/1024/1024/nodes 
-                     << " GB/s/node "  << std::endl;
-
-        // IO
-        double       blockSize, ioTime;
-        unsigned int myRank = grid_->ThisRank(), nRank  = grid_->RankCount();
-    
-        LOG(Message) << "Writing block to disk" << std::endl;
-        ioTime = -GET_TIMER("IO: write block");
-        START_TIMER("IO: total");
-        makeFileDir(filenameFn(0, 0), grid_);
-#ifdef HADRONS_A2AM_PARALLEL_IO
-        grid_->Barrier();
-        // make task list for current node
-        nodeIo_.clear();
-        for(int f = myRank; f < next_*nstr_; f += nRank)
-        {
-            IoHelper h;
-
-            h.i  = i;
-            h.j  = j;
-            h.e  = f/nstr_;
-            h.s  = f % nstr_;
-            h.io = A2AMatrixIo<TIo>(filenameFn(h.e, h.s), 
-                                    ionameFn(h.e, h.s), nt_, N_i, N_j);
-            h.md = metadataFn(h.e, h.s);
-            nodeIo_.push_back(h);
-        }
-        // parallel IO
-        for (auto &h: nodeIo_)
-        {
-            saveBlock(mBlock, h);
-        }
-        grid_->Barrier();
-#else
-        // serial IO, for testing purposes only
-        for(int e = 0; e < next_; e++)
-        for(int s = 0; s < nstr_; s++)
-        {
-            IoHelper h;
-
-            h.i  = i;
-            h.j  = j;
-            h.e  = e;
-            h.s  = s;
-            h.io = A2AMatrixIo<TIo>(filenameFn(h.e, h.s), 
-                                    ionameFn(h.e, h.s), nt_, N_i, N_j);
-            h.md = metadataFn(h.e, h.s);
-            saveBlock(mfBlock, h);
-        }
-#endif
-        STOP_TIMER("IO: total");
-        blockSize  = static_cast<double>(next_*nstr_*nt_*N_ii*N_jj*sizeof(TIo));
-        ioTime    += GET_TIMER("IO: write block");
-        LOG(Message) << "HDF5 IO done " << sizeString(blockSize) << " in "
-                     << ioTime  << " us (" 
-                     << blockSize/ioTime*1.0e6/1024/1024
-                     << " MB/s)" << std::endl;
-    }
-}
-
-// I/O handler /////////////////////////////////////////////////////////////////
-template <typename T, typename Field, typename MetadataType, typename TIo>
-void A2AMatrixBlockComputation<T, Field, MetadataType, TIo>
-::saveBlock(const A2AMatrixSet<TIo> &m, IoHelper &h)
-{
-    if ((h.i == 0) and (h.j == 0))
-    {
-        START_TIMER("IO: file creation");
-        h.io.initFile(h.md, blockSize_);
-        STOP_TIMER("IO: file creation");
-    }
-    START_TIMER("IO: write block");
-    h.io.saveBlock(m, h.e, h.s, h.i, h.j);
-    STOP_TIMER("IO: write block");
-}
-
-#undef START_TIMER
-#undef STOP_TIMER
-#undef GET_TIMER
-
 END_HADRONS_NAMESPACE

 #endif // A2A_Matrix_hpp_
--- a/Hadrons/DiskVector.hpp
+++ b/Hadrons/DiskVector.hpp
@@ -34,12 +34,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
 #include <ftw.h>
 #include <unistd.h>

-#ifdef DV_DEBUG
-#define DV_DEBUG_MSG(dv, stream) LOG(Debug) << "diskvector " << (dv) << ": " << stream << std::endl
-#else
-#define DV_DEBUG_MSG(dv, stream)
-#endif
-
 BEGIN_HADRONS_NAMESPACE

 /******************************************************************************
@@ -62,7 +56,9 @@ public:
        // write to disk and cache
        T &operator=(const T &obj) const
        {
-            DV_DEBUG_MSG(&master_, "writing to " << i_);
+#ifdef DV_DEBUG
+            LOG(Debug) << "diskvector " << &master_ << ": writing to " << i_ << std::endl;
+#endif
            master_.cacheInsert(i_, obj);
            master_.save(master_.filename(i_), obj);
            
@@ -86,8 +82,6 @@ public:
    virtual ~DiskVectorBase(void);
    const T & operator[](const unsigned int i) const;
    RwAccessHelper operator[](const unsigned int i);
-    double hitRatio(void) const;
-    void resetStat(void);
 private:
    virtual void load(T &obj, const std::string filename) const = 0;
    virtual void save(const std::string filename, const T &obj) const = 0;
@@ -99,7 +93,6 @@ private:
 private:
    std::string                                dirname_;
    unsigned int                               size_, cacheSize_;
-    double                                     access_{0.}, hit_{0.};
    bool                                       clean_;
    // using pointers to allow modifications when class is const
    // semantic: const means data unmodified, but cache modification allowed
@@ -122,7 +115,6 @@ private:

        read(reader, basename(filename), obj);
    }
-
    virtual void save(const std::string filename, const T &obj) const
    {
        Writer writer(filename);
@@ -131,76 +123,13 @@ private:
    }
 };

-/******************************************************************************
- *                      Specialisation for Eigen matrices                     *
- ******************************************************************************/
-template <typename T>
-using EigenDiskVectorMat = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>;
-
-template <typename T>
-class EigenDiskVector: public DiskVectorBase<EigenDiskVectorMat<T>>
-{
-public:
-    using DiskVectorBase<EigenDiskVectorMat<T>>::DiskVectorBase;
-    typedef EigenDiskVectorMat<T> Matrix;
-public:
-    T operator()(const unsigned int i, const Eigen::Index j,
-                 const Eigen::Index k) const
-    {
-        return (*this)[i](j, k);
-    }
-private:
-    virtual void load(EigenDiskVectorMat<T> &obj, const std::string filename) const
-    {
-        std::ifstream              f(filename, std::ios::binary);
-        std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH);
-        Eigen::Index               nRow, nCol;
-        size_t                     matSize;
-        double                     t;
-
-        f.read(reinterpret_cast<char *>(hash.data()), hash.size()*sizeof(unsigned char));
-        f.read(reinterpret_cast<char *>(&nRow), sizeof(Eigen::Index));
-        f.read(reinterpret_cast<char *>(&nCol), sizeof(Eigen::Index));
-        obj.resize(nRow, nCol);
-        matSize = nRow*nCol*sizeof(T);
-        t  = -usecond();
-        f.read(reinterpret_cast<char *>(obj.data()), matSize);
-        t += usecond();
-        DV_DEBUG_MSG(this, "Eigen read " << matSize/t*1.0e6/1024/1024 << " MB/s");
-        auto check = GridChecksum::sha256(obj.data(), matSize);
-        DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(check));
-        if (hash != check)
-        {
-            HADRONS_ERROR(Io, "checksum failed")
-        }
-    }
-
-    virtual void save(const std::string filename, const EigenDiskVectorMat<T> &obj) const
-    {
-        std::ofstream              f(filename, std::ios::binary);
-        std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH);
-        Eigen::Index               nRow, nCol;
-        size_t                     matSize;
-        double                     t;
-        
-        nRow    = obj.rows();
-        nCol    = obj.cols();
-        matSize = nRow*nCol*sizeof(T);
-        hash    = GridChecksum::sha256(obj.data(), matSize);
-        DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(hash));
-        f.write(reinterpret_cast<char *>(hash.data()), hash.size()*sizeof(unsigned char));
-        f.write(reinterpret_cast<char *>(&nRow), sizeof(Eigen::Index));
-        f.write(reinterpret_cast<char *>(&nCol), sizeof(Eigen::Index));
-        t  = -usecond();
-        f.write(reinterpret_cast<const char *>(obj.data()), matSize);
-        t += usecond();
-        DV_DEBUG_MSG(this, "Eigen write " << matSize/t*1.0e6/1024/1024 << " MB/s");
-    }
-};
-
 /******************************************************************************
 *                       DiskVectorBase implementation                         *
 ******************************************************************************/
+#ifdef DV_DEBUG
+#define DV_DEBUG_MSG(stream) LOG(Debug) << "diskvector " << this << ": " << stream << std::endl
+#endif
+
 template <typename T>
 DiskVectorBase<T>::DiskVectorBase(const std::string dirname, 
                                  const unsigned int size,
@@ -231,29 +160,28 @@ DiskVectorBase<T>::~DiskVectorBase(void)
 template <typename T>
 const T & DiskVectorBase<T>::operator[](const unsigned int i) const
 {
-    auto &cache  = *cachePtr_;
-    auto &loads  = *loadsPtr_;
+    auto &cache = *cachePtr_;
+    auto &loads = *loadsPtr_;

-    DV_DEBUG_MSG(this, "accessing " << i << " (RO)");
+    DV_DEBUG_MSG("accessing " << i << " (RO)");

    if (i >= size_)
    {
        HADRONS_ERROR(Size, "index out of range");
    }
-    const_cast<double &>(access_)++;
+
    if (cache.find(i) == cache.end())
    {
        // cache miss
-        DV_DEBUG_MSG(this, "cache miss");
+        DV_DEBUG_MSG("cache miss");
        fetch(i);
    }
    else
    {
-        DV_DEBUG_MSG(this, "cache hit");
+        DV_DEBUG_MSG("cache hit");

        auto pos = std::find(loads.begin(), loads.end(), i);

-        const_cast<double &>(hit_)++;
        loads.erase(pos);
        loads.push_back(i);
    }
@@ -265,7 +193,7 @@ const T & DiskVectorBase<T>::operator[](const unsigned int i) const
    {
        msg += std::to_string(p) + " ";
    }
-    DV_DEBUG_MSG(this, "in cache: " << msg);
+    DV_DEBUG_MSG("in cache: " << msg);
 #endif

    return cache.at(i);
@@ -274,7 +202,7 @@ const T & DiskVectorBase<T>::operator[](const unsigned int i) const
 template <typename T>
 typename DiskVectorBase<T>::RwAccessHelper DiskVectorBase<T>::operator[](const unsigned int i)
 {
-    DV_DEBUG_MSG(this, "accessing " << i << " (RW)");
+    DV_DEBUG_MSG("accessing " << i << " (RW)");

    if (i >= size_)
    {
@@ -284,19 +212,6 @@ typename DiskVectorBase<T>::RwAccessHelper DiskVectorBase<T>::operator[](const u
    return RwAccessHelper(*this, i);
 }

-template <typename T>
-double DiskVectorBase<T>::hitRatio(void) const
-{
-    return hit_/access_;
-}
-
-template <typename T>
-void DiskVectorBase<T>::resetStat(void)
-{
-    access_ = 0.;
-    hit_    = 0.;
-}
-
 template <typename T>
 std::string DiskVectorBase<T>::filename(const unsigned int i) const
 {
@@ -311,7 +226,7 @@ void DiskVectorBase<T>::evict(void) const

    if (cache.size() >= cacheSize_)
    {
-        DV_DEBUG_MSG(this, "evicting " << loads.front());
+        DV_DEBUG_MSG("evicting " << loads.front());
        cache.erase(loads.front());
        loads.pop_front();
    }
@@ -324,7 +239,7 @@ void DiskVectorBase<T>::fetch(const unsigned int i) const
    auto &loads = *loadsPtr_;
    struct stat s;

-    DV_DEBUG_MSG(this, "loading " << i << " from disk");
+    DV_DEBUG_MSG("loading " << i << " from disk");

    evict();
    if(stat(filename(i).c_str(), &s) != 0)
@@ -352,7 +267,7 @@ void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const
    {
        msg += std::to_string(p) + " ";
    }
-    DV_DEBUG_MSG(this, "in cache: " << msg);
+    DV_DEBUG_MSG("in cache: " << msg);
 #endif
 }

--- a/Hadrons/EigenPack.hpp
+++ b/Hadrons/EigenPack.hpp
@@ -39,12 +39,12 @@ BEGIN_HADRONS_NAMESPACE
 #define HADRONS_DEFAULT_LANCZOS_NBASIS 60
 #endif

-#define HADRONS_DUMP_EP_METADATA(record) \
+#define HADRONS_DUMP_EP_METADATA \
 LOG(Message) << "Eigenpack metadata:" << std::endl;\
 LOG(Message) << "* operator" << std::endl;\
-LOG(Message) << (record).operatorXml << std::endl;\
+LOG(Message) << record.operatorXml << std::endl;\
 LOG(Message) << "* solver" << std::endl;\
-LOG(Message) << (record).solverXml << std::endl;
+LOG(Message) << record.solverXml << std::endl;

 struct PackRecord
 {
@@ -59,9 +59,66 @@ struct VecRecord: Serializable
    VecRecord(void): index(0), eval(0.) {}
 };

-namespace EigenPackIo
+template <typename F>
+class EigenPack
 {
-    inline void readHeader(PackRecord &record, ScidacReader &binReader)
+public:
+    typedef F Field;
+public:
+    std::vector<RealD> eval;
+    std::vector<F>     evec;
+    PackRecord         record;
+public:
+    EigenPack(void)          = default;
+    virtual ~EigenPack(void) = default;
+
+    EigenPack(const size_t size, GridBase *grid)
+    {
+        resize(size, grid);
+    }
+
+    void resize(const size_t size, GridBase *grid)
+    {
+        eval.resize(size);
+        evec.resize(size, grid);
+    }
+
+    virtual void read(const std::string fileStem, const bool multiFile, const int traj = -1)
+    {
+        if (multiFile)
+        {
+            for(int k = 0; k < evec.size(); ++k)
+            {
+                basicReadSingle(evec[k], eval[k], evecFilename(fileStem, k, traj), k);
+                if (k == 0)
+                {
+                    HADRONS_DUMP_EP_METADATA;
+                }
+            }
+        }
+        else
+        {
+            basicRead(evec, eval, evecFilename(fileStem, -1, traj), evec.size());
+            HADRONS_DUMP_EP_METADATA;
+        }
+    }
+
+    virtual void write(const std::string fileStem, const bool multiFile, const int traj = -1)
+    {
+        if (multiFile)
+        {
+            for(int k = 0; k < evec.size(); ++k)
+            {
+                basicWriteSingle(evecFilename(fileStem, k, traj), evec[k], eval[k], k);
+            }
+        }
+        else
+        {
+            basicWrite(evecFilename(fileStem, -1, traj), evec, eval, evec.size());
+        }
+    }
+
+    static void readHeader(PackRecord &record, ScidacReader &binReader)
    {
        std::string recordXml;

@@ -73,75 +130,13 @@ namespace EigenPackIo
        xmlReader.readCurrentSubtree(record.solverXml);
    }

-    template <typename T, typename TIo = T>
-    void readElement(T &evec, RealD &eval, const unsigned int index,
-                     ScidacReader &binReader, TIo *ioBuf = nullptr)
+    template <typename T>
+    static void readElement(T &evec, VecRecord &vecRecord, ScidacReader &binReader)
    {
-        VecRecord vecRecord;
-
-        LOG(Message) << "Reading eigenvector " << index << std::endl;
-        if (ioBuf == nullptr)
-        {
-            binReader.readScidacFieldRecord(evec, vecRecord);
-        }
-        else
-        {
-            binReader.readScidacFieldRecord(*ioBuf, vecRecord);
-            precisionChange(evec, *ioBuf);
-        }
-        if (vecRecord.index != index)
-        {
-            HADRONS_ERROR(Io, "Eigenvector " + std::to_string(index) + " has a"
-                            + " wrong index (expected " + std::to_string(vecRecord.index) 
-                            + ")");
-        }
-        eval = vecRecord.eval;
+        binReader.readScidacFieldRecord(evec, vecRecord);
    }

-    template <typename T, typename TIo = T>
-    static void readPack(std::vector<T> &evec, std::vector<RealD> &eval,
-                         PackRecord &record, const std::string filename, 
-                         const unsigned int size, bool multiFile, 
-                         GridBase *gridIo = nullptr)
-    {
-        std::unique_ptr<TIo> ioBuf{nullptr};
-        ScidacReader         binReader;
-
-        if (typeHash<T>() != typeHash<TIo>())
-        {
-            if (gridIo == nullptr)
-            {
-                HADRONS_ERROR(Definition, 
-                              "I/O type different from vector type but null I/O grid passed");
-            }
-            ioBuf.reset(new TIo(gridIo));
-        }
-        if (multiFile)
-        {
-            std::string fullFilename;
-
-            for(int k = 0; k < size; ++k) 
-            {
-                fullFilename = filename + "/v" + std::to_string(k) + ".bin";
-                binReader.open(fullFilename);
-                readHeader(record, binReader);
-                readElement(evec[k], eval[k], k, binReader, ioBuf.get());
-                binReader.close();
-            }
-        }
-        else
-        {
-            binReader.open(filename);
-            readHeader(record, binReader);
-            for(int k = 0; k < size; ++k) 
-            {
-                readElement(evec[k], eval[k], k, binReader, ioBuf.get());
-            }
-            binReader.close();
-        }
-    }
-
-    inline void writeHeader(ScidacWriter &binWriter, PackRecord &record)
+    static void writeHeader(ScidacWriter &binWriter, PackRecord &record)
    {
        XmlWriter xmlWriter("", "eigenPackPar");

@@ -150,217 +145,165 @@ namespace EigenPackIo
        binWriter.writeLimeObject(1, 1, xmlWriter, "parameters", SCIDAC_FILE_XML);
    }

-    template <typename T, typename TIo = T>
-    void writeElement(ScidacWriter &binWriter, T &evec, RealD &eval, 
-                      const unsigned int index, TIo *ioBuf, 
-                      T *testBuf = nullptr)
+    template <typename T>
+    static void writeElement(ScidacWriter &binWriter, T &evec, VecRecord &vecRecord)
    {
-        VecRecord vecRecord;
-
-        LOG(Message) << "Writing eigenvector " << index << std::endl;
-        vecRecord.eval  = eval;
-        vecRecord.index = index;
-        if ((ioBuf == nullptr) || (testBuf == nullptr))
-        {
-            binWriter.writeScidacFieldRecord(evec, vecRecord, DEFAULT_ASCII_PREC);
-        }
-        else
-        {
-            precisionChange(*ioBuf, evec);
-            precisionChange(*testBuf, *ioBuf);
-            *testBuf -= evec;
-            LOG(Message) << "Precision diff norm^2 " << norm2(*testBuf) << std::endl;
-            binWriter.writeScidacFieldRecord(*ioBuf, vecRecord, DEFAULT_ASCII_PREC);
-        }   
-    }
-    
-    template <typename T, typename TIo = T>
-    static void writePack(const std::string filename, std::vector<T> &evec, 
-                          std::vector<RealD> &eval, PackRecord &record, 
-                          const unsigned int size, bool multiFile, 
-                          GridBase *gridIo = nullptr)
-    {
-        GridBase             *grid = evec[0]._grid;
-        std::unique_ptr<TIo> ioBuf{nullptr}; 
-        std::unique_ptr<T>   testBuf{nullptr};
-        ScidacWriter         binWriter(grid->IsBoss());
-
-        if (typeHash<T>() != typeHash<TIo>())
-        {
-            if (gridIo == nullptr)
-            {
-                HADRONS_ERROR(Definition, 
-                              "I/O type different from vector type but null I/O grid passed");
-            }
-            ioBuf.reset(new TIo(gridIo));
-            testBuf.reset(new T(grid));
-        }
-        if (multiFile)
-        {
-            std::string fullFilename;
-
-            for(int k = 0; k < size; ++k) 
-            {
-                fullFilename = filename + "/v" + std::to_string(k) + ".bin";
-
-                makeFileDir(fullFilename, grid);
-                binWriter.open(fullFilename);
-                writeHeader(binWriter, record);
-                writeElement(binWriter, evec[k], eval[k], k, ioBuf.get(), testBuf.get());
-                binWriter.close();
-            }
-        }
-        else
-        {
-            makeFileDir(filename, grid);
-            binWriter.open(filename);
-            writeHeader(binWriter, record);
-            for(int k = 0; k < size; ++k) 
-            {
-                writeElement(binWriter, evec[k], eval[k], k, ioBuf.get(), testBuf.get());
-            }
-            binWriter.close();
-        }
-    }
-}
-
-template <typename F>
-class BaseEigenPack
-{
-public:
-    typedef F Field;
-public:
-    std::vector<RealD> eval;
-    std::vector<F>     evec;
-    PackRecord         record;
-public:
-    BaseEigenPack(void)          = default;
-    BaseEigenPack(const size_t size, GridBase *grid)
-    {
-        resize(size, grid);
-    }
-    virtual ~BaseEigenPack(void) = default;
-    void resize(const size_t size, GridBase *grid)
-    {
-        eval.resize(size);
-        evec.resize(size, grid);
-    }
-};
-
-template <typename F, typename FIo = F>
-class EigenPack: public BaseEigenPack<F>
-{
-public:
-    typedef F   Field;
-    typedef FIo FieldIo;
-public:
-    EigenPack(void)          = default;
-    virtual ~EigenPack(void) = default;
-
-    EigenPack(const size_t size, GridBase *grid, GridBase *gridIo = nullptr)
-    : BaseEigenPack<F>(size, grid)
-    {
-        if (typeHash<F>() != typeHash<FIo>())
-        {
-            if (gridIo == nullptr)
-            {
-                HADRONS_ERROR(Definition, 
-                              "I/O type different from vector type but null I/O grid passed");
-            }
-        }
-        gridIo_ = gridIo;
-    }
-
-    virtual void read(const std::string fileStem, const bool multiFile, const int traj = -1)
-    {
-        EigenPackIo::readPack<F, FIo>(this->evec, this->eval, this->record, 
-                                      evecFilename(fileStem, traj, multiFile), 
-                                      this->evec.size(), multiFile, gridIo_);
-        HADRONS_DUMP_EP_METADATA(this->record);
-    }
-
-    virtual void write(const std::string fileStem, const bool multiFile, const int traj = -1)
-    {
-        EigenPackIo::writePack<F, FIo>(evecFilename(fileStem, traj, multiFile), 
-                                       this->evec, this->eval, this->record, 
-                                       this->evec.size(), multiFile, gridIo_);
+        binWriter.writeScidacFieldRecord(evec, vecRecord, DEFAULT_ASCII_PREC);
    }
 protected:
-    std::string evecFilename(const std::string stem, const int traj, const bool multiFile)
+    std::string evecFilename(const std::string stem, const int vec, const int traj)
    {
        std::string t = (traj < 0) ? "" : ("." + std::to_string(traj));

-        if (multiFile)
-        {
-            return stem + t;
-        }
-        else
+        if (vec == -1)
        {
            return stem + t + ".bin";
        }
+        else
+        {
+            return stem + t + "/v" + std::to_string(vec) + ".bin";
+        }
+    }
+
+    template <typename T>
+    void basicRead(std::vector<T> &evec, std::vector<RealD> &eval,
+                   const std::string filename, const unsigned int size)
+    {
+        ScidacReader binReader;
+
+        binReader.open(filename);
+        readHeader(record, binReader);
+        for(int k = 0; k < size; ++k) 
+        {
+            VecRecord vecRecord;
+
+            LOG(Message) << "Reading eigenvector " << k << std::endl;
+            readElement(evec[k], vecRecord, binReader);
+            if (vecRecord.index != k)
+            {
+                HADRONS_ERROR(Io, "Eigenvector " + std::to_string(k) + " has a"
+                              + " wrong index (expected " + std::to_string(vecRecord.index) 
+                              + ") in file '" + filename + "'");
+            }
+            eval[k] = vecRecord.eval;
+        }
+        binReader.close();
+    }
+
+    template <typename T>
+    void basicReadSingle(T &evec, RealD &eval, const std::string filename, 
+                         const unsigned int index)
+    {
+        ScidacReader binReader;
+        VecRecord    vecRecord;
+
+        binReader.open(filename);
+        readHeader(record, binReader);
+        LOG(Message) << "Reading eigenvector " << index << std::endl;
+        readElement(evec, vecRecord, binReader);
+        if (vecRecord.index != index)
+        {
+            HADRONS_ERROR(Io, "Eigenvector " + std::to_string(index) + " has a"
+                          + " wrong index (expected " + std::to_string(vecRecord.index) 
+                          + ") in file '" + filename + "'");
+        }
+        eval = vecRecord.eval;
+        binReader.close();
+    }
+
+    template <typename T>
+    void basicWrite(const std::string filename, std::vector<T> &evec, 
+                    const std::vector<RealD> &eval, const unsigned int size)
+    {
+        ScidacWriter binWriter(evec[0]._grid->IsBoss());
+
+        makeFileDir(filename, evec[0]._grid);
+        binWriter.open(filename);
+        writeHeader(binWriter, record);
+        for(int k = 0; k < size; ++k) 
+        {
+            VecRecord vecRecord;
+
+            vecRecord.index = k;
+            vecRecord.eval  = eval[k];
+            LOG(Message) << "Writing eigenvector " << k << std::endl;
+            writeElement(binWriter, evec[k], vecRecord);
+        }
+        binWriter.close();
+    }
+
+    template <typename T>
+    void basicWriteSingle(const std::string filename, T &evec, 
+                          const RealD eval, const unsigned int index)
+    {
+        ScidacWriter binWriter(evec._grid->IsBoss());
+        VecRecord    vecRecord;
+
+        makeFileDir(filename, evec._grid);
+        binWriter.open(filename);
+        writeHeader(binWriter, record);
+        vecRecord.index = index;
+        vecRecord.eval  = eval;
+        LOG(Message) << "Writing eigenvector " << index << std::endl;
+        writeElement(binWriter, evec, vecRecord);
+        binWriter.close();
    }
-protected:
-    GridBase *gridIo_;
 };

-template <typename FineF, typename CoarseF, 
-          typename FineFIo = FineF, typename CoarseFIo = CoarseF>
-class CoarseEigenPack: public EigenPack<FineF, FineFIo>
+template <typename FineF, typename CoarseF>
+class CoarseEigenPack: public EigenPack<FineF>
 {
 public:
-    typedef CoarseF CoarseField;         
-    std::vector<CoarseF> evecCoarse;
+    typedef CoarseF CoarseField;
+public:
    std::vector<RealD>   evalCoarse;
+    std::vector<CoarseF> evecCoarse;
 public:
    CoarseEigenPack(void)          = default;
    virtual ~CoarseEigenPack(void) = default;

    CoarseEigenPack(const size_t sizeFine, const size_t sizeCoarse, 
-                    GridBase *gridFine, GridBase *gridCoarse,
-                    GridBase *gridFineIo = nullptr, 
-                    GridBase *gridCoarseIo = nullptr)
+                    GridBase *gridFine, GridBase *gridCoarse)
    {
-        if (typeHash<FineF>() != typeHash<FineFIo>())
-        {
-            if (gridFineIo == nullptr)
-            {
-                HADRONS_ERROR(Definition, 
-                              "Fine I/O type different from vector type but null fine I/O grid passed");
-            }
-        }
-        if (typeHash<CoarseF>() != typeHash<CoarseFIo>())
-        {
-            if (gridCoarseIo == nullptr)
-            {
-                HADRONS_ERROR(Definition, 
-                              "Coarse I/O type different from vector type but null coarse I/O grid passed");
-            }
-        }
-        this->gridIo_ = gridFineIo;
-        gridCoarseIo_ = gridCoarseIo;
        resize(sizeFine, sizeCoarse, gridFine, gridCoarse);
    }

    void resize(const size_t sizeFine, const size_t sizeCoarse, 
                GridBase *gridFine, GridBase *gridCoarse)
    {
-        EigenPack<FineF, FineFIo>::resize(sizeFine, gridFine);
+        EigenPack<FineF>::resize(sizeFine, gridFine);
        evalCoarse.resize(sizeCoarse);
        evecCoarse.resize(sizeCoarse, gridCoarse);
    }

    void readFine(const std::string fileStem, const bool multiFile, const int traj = -1)
    {
-        EigenPack<FineF, FineFIo>::read(fileStem + "_fine", multiFile, traj);
+        if (multiFile)
+        {
+            for(int k = 0; k < this->evec.size(); ++k)
+            {
+                this->basicReadSingle(this->evec[k], this->eval[k], this->evecFilename(fileStem + "_fine", k, traj), k);
+            }
+        }
+        else
+        {
+            this->basicRead(this->evec, this->eval, this->evecFilename(fileStem + "_fine", -1, traj), this->evec.size());
+        }
    }

    void readCoarse(const std::string fileStem, const bool multiFile, const int traj = -1)
    {
-        PackRecord dummy;
-
-        EigenPackIo::readPack<CoarseF, CoarseFIo>(evecCoarse, evalCoarse, dummy, 
-                              this->evecFilename(fileStem + "_coarse", traj, multiFile), 
-                              evecCoarse.size(), multiFile, gridCoarseIo_);
+        if (multiFile)
+        {
+            for(int k = 0; k < evecCoarse.size(); ++k)
+            {
+                this->basicReadSingle(evecCoarse[k], evalCoarse[k], this->evecFilename(fileStem + "_coarse", k, traj), k);
+            }
+        }
+        else
+        {
+            this->basicRead(evecCoarse, evalCoarse, this->evecFilename(fileStem + "_coarse", -1, traj), evecCoarse.size());
+        }
    }

    virtual void read(const std::string fileStem, const bool multiFile, const int traj = -1)
@@ -371,14 +314,32 @@ public:

    void writeFine(const std::string fileStem, const bool multiFile, const int traj = -1)
    {
-        EigenPack<FineF, FineFIo>::write(fileStem + "_fine", multiFile, traj);
+        if (multiFile)
+        {
+            for(int k = 0; k < this->evec.size(); ++k)
+            {
+                this->basicWriteSingle(this->evecFilename(fileStem + "_fine", k, traj), this->evec[k], this->eval[k], k);
+            }
+        }
+        else
+        {
+            this->basicWrite(this->evecFilename(fileStem + "_fine", -1, traj), this->evec, this->eval, this->evec.size());
+        }
    }

    void writeCoarse(const std::string fileStem, const bool multiFile, const int traj = -1)
    {
-        EigenPackIo::writePack<CoarseF, CoarseFIo>(this->evecFilename(fileStem + "_coarse", traj, multiFile), 
-                                                   evecCoarse, evalCoarse, this->record, 
-                                                   evecCoarse.size(), multiFile, gridCoarseIo_);
+        if (multiFile)
+        {
+            for(int k = 0; k < evecCoarse.size(); ++k)
+            {
+                this->basicWriteSingle(this->evecFilename(fileStem + "_coarse", k, traj), evecCoarse[k], evalCoarse[k], k);
+            }
+        }
+        else
+        {
+            this->basicWrite(this->evecFilename(fileStem + "_coarse", -1, traj), evecCoarse, evalCoarse, evecCoarse.size());
+        }
    }
    
    virtual void write(const std::string fileStem, const bool multiFile, const int traj = -1)
@@ -386,25 +347,16 @@ public:
        writeFine(fileStem, multiFile, traj);
        writeCoarse(fileStem, multiFile, traj);
    }
-private:
-    GridBase *gridCoarseIo_;
 };

 template <typename FImpl>
-using BaseFermionEigenPack = BaseEigenPack<typename FImpl::FermionField>;
+using FermionEigenPack = EigenPack<typename FImpl::FermionField>;

-template <typename FImpl, typename FImplIo = FImpl>
-using FermionEigenPack = EigenPack<typename FImpl::FermionField, typename FImplIo::FermionField>;
-
-template <typename FImpl, int nBasis, typename FImplIo = FImpl>
+template <typename FImpl, int nBasis>
 using CoarseFermionEigenPack = CoarseEigenPack<
    typename FImpl::FermionField,
    typename LocalCoherenceLanczos<typename FImpl::SiteSpinor, 
                                   typename FImpl::SiteComplex, 
-                                   nBasis>::CoarseField,
-    typename FImplIo::FermionField,
-    typename LocalCoherenceLanczos<typename FImplIo::SiteSpinor, 
-                                   typename FImplIo::SiteComplex, 
                                   nBasis>::CoarseField>;

 #undef HADRONS_DUMP_EP_METADATA
--- a/Hadrons/Makefile.am
+++ b/Hadrons/Makefile.am
@@ -11,7 +11,6 @@ libHadrons_a_SOURCES = \
 	Exceptions.cc      \
    Global.cc          \
    Module.cc		   \
-	TimerArray.cc      \
 	VirtualMachine.cc
 libHadrons_adir = $(includedir)/Hadrons
 nobase_libHadrons_a_HEADERS = \
@@ -32,5 +31,4 @@ nobase_libHadrons_a_HEADERS = \
 	Modules.hpp               \
 	ModuleFactory.hpp         \
 	Solver.hpp                \
-	TimerArray.hpp            \
 	VirtualMachine.hpp
--- a/Hadrons/Module.cc
+++ b/Hadrons/Module.cc
@@ -66,6 +66,101 @@ void ModuleBase::operator()(void)
    stopAllTimers();
 }

+// timers //////////////////////////////////////////////////////////////////////
+void ModuleBase::startTimer(const std::string &name)
+{
+    if (!name.empty())
+    {
+        timer_[name].Start();
+    }
+}
+
+GridTime ModuleBase::getTimer(const std::string &name)
+{
+    GridTime t;
+    
+    if (!name.empty())
+    {
+        try
+        {
+            bool running = timer_.at(name).isRunning();
+
+            if (running) stopTimer(name);
+            t = timer_.at(name).Elapsed();
+            if (running) startTimer(name);
+        }
+        catch (std::out_of_range &)
+        {
+            t = GridTime::zero();
+        }
+    }
+    else
+    {
+        t = GridTime::zero();
+    }
+
+    return t;
+}
+
+double ModuleBase::getDTimer(const std::string &name)
+{
+    return static_cast<double>(getTimer(name).count());
+}
+
+void ModuleBase::startCurrentTimer(const std::string &name)
+{
+    if (!name.empty())
+    {
+        stopCurrentTimer();
+        startTimer(name);
+        currentTimer_ = name;
+    }
+}
+
+void ModuleBase::stopTimer(const std::string &name)
+{
+    if (timer_.at(name).isRunning())
+    {
+        timer_.at(name).Stop();
+    }
+}
+
+void ModuleBase::stopCurrentTimer(void)
+{
+    if (!currentTimer_.empty())
+    {
+        stopTimer(currentTimer_);
+        currentTimer_ = "";
+    }
+}
+
+void ModuleBase::stopAllTimers(void)
+{
+    for (auto &t: timer_)
+    {
+        stopTimer(t.first);
+    }
+    currentTimer_ = "";
+}
+
+void ModuleBase::resetTimers(void)
+{
+    timer_.clear();
+    currentTimer_ = "";
+}
+
+std::map<std::string, GridTime> ModuleBase::getTimings(void)
+{
+    std::map<std::string, GridTime> timing;
+
+    for (auto &t: timer_)
+    {
+        timing[t.first] = t.second.Elapsed();
+    }
+
+    return timing;
+}
+
 std::string ModuleBase::makeSeedString(void)
 {
    std::string seed;
--- a/Hadrons/Module.hpp
+++ b/Hadrons/Module.hpp
@@ -30,7 +30,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
 #define Hadrons_Module_hpp_

 #include <Hadrons/Global.hpp>
-#include <Hadrons/TimerArray.hpp>
 #include <Hadrons/VirtualMachine.hpp>

 BEGIN_HADRONS_NAMESPACE
@@ -153,7 +152,7 @@ if (env().getGrid()->IsBoss() and !ioStem.empty())\
 *                            Module class                                    *
 ******************************************************************************/
 // base class
-class ModuleBase: public TimerArray
+class ModuleBase
 {
 public:
    // constructor
@@ -181,6 +180,16 @@ public:
    virtual void execute(void) = 0;
    // execution
    void operator()(void);
+    // timers
+    void                            startTimer(const std::string &name);
+    GridTime                        getTimer(const std::string &name);
+    double                          getDTimer(const std::string &name);
+    void                            startCurrentTimer(const std::string &name);
+    void                            stopTimer(const std::string &name);
+    void                            stopCurrentTimer(void);
+    void                            stopAllTimers(void);
+    void                            resetTimers(void);
+    std::map<std::string, GridTime> getTimings(void);
 protected:
    // environment shortcut
    DEFINE_ENV_ALIAS;
--- a/Hadrons/Modules.hpp
+++ b/Hadrons/Modules.hpp
@@ -1,6 +1,6 @@
 #include <Hadrons/Modules/MContraction/Baryon.hpp>
-#include <Hadrons/Modules/MContraction/A2AAslashField.hpp>
 #include <Hadrons/Modules/MContraction/A2AMesonField.hpp>
+#include <Hadrons/Modules/MContraction/A2AMesonFieldKernels.hpp>
 #include <Hadrons/Modules/MContraction/Meson.hpp>
 #include <Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
 #include <Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
@@ -16,7 +16,6 @@
 #include <Hadrons/Modules/MSource/Wall.hpp>
 #include <Hadrons/Modules/MSource/Z2.hpp>
 #include <Hadrons/Modules/MSource/SeqConserved.hpp>
-#include <Hadrons/Modules/MSource/Momentum.hpp>
 #include <Hadrons/Modules/MSink/Smear.hpp>
 #include <Hadrons/Modules/MSink/Point.hpp>
 #include <Hadrons/Modules/MSolver/MixedPrecisionRBPrecCG.hpp>
@@ -28,7 +27,6 @@
 #include <Hadrons/Modules/MGauge/StoutSmearing.hpp>
 #include <Hadrons/Modules/MGauge/Unit.hpp>
 #include <Hadrons/Modules/MGauge/Random.hpp>
-#include <Hadrons/Modules/MGauge/GaugeFix.hpp>
 #include <Hadrons/Modules/MGauge/FundtoHirep.hpp>
 #include <Hadrons/Modules/MGauge/StochEm.hpp>
 #include <Hadrons/Modules/MNoise/TimeDilutedSpinColorDiagonal.hpp>
@@ -42,9 +40,6 @@
 #include <Hadrons/Modules/MScalar/ScalarVP.hpp>
 #include <Hadrons/Modules/MScalar/Scalar.hpp>
 #include <Hadrons/Modules/MScalar/ChargedProp.hpp>
-#include <Hadrons/Modules/MNPR/Bilinear.hpp>
-#include <Hadrons/Modules/MNPR/Amputate.hpp>
-#include <Hadrons/Modules/MNPR/FourQuark.hpp>
 #include <Hadrons/Modules/MAction/DWF.hpp>
 #include <Hadrons/Modules/MAction/MobiusDWF.hpp>
 #include <Hadrons/Modules/MAction/Wilson.hpp>
@@ -55,6 +50,7 @@
 #include <Hadrons/Modules/MScalarSUN/TwoPointNPR.hpp>
 #include <Hadrons/Modules/MScalarSUN/ShiftProbe.hpp>
 #include <Hadrons/Modules/MScalarSUN/Div.hpp>
+#include <Hadrons/Modules/MScalarSUN/TimeMomProbe.hpp>
 #include <Hadrons/Modules/MScalarSUN/TrMag.hpp>
 #include <Hadrons/Modules/MScalarSUN/EMT.hpp>
 #include <Hadrons/Modules/MScalarSUN/TwoPoint.hpp>
--- a/Hadrons/Modules/MContraction/A2AAslashField.hpp
+++ b/Hadrons/Modules/MContraction/A2AAslashField.hpp
@@ -1,250 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/A2AAslashField.hpp
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_MContraction_A2AAslashField_hpp_
-#define Hadrons_MContraction_A2AAslashField_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-#include <Hadrons/A2AMatrix.hpp>
-
-#ifndef ASF_IO_TYPE
-#define ASF_IO_TYPE ComplexF
-#endif
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         A2AAslashField                                 *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MContraction)
-
-class A2AAslashFieldPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(A2AAslashFieldPar,
-                                    int, cacheBlock,
-                                    int, block,
-                                    std::string, left,
-                                    std::string, right,
-                                    std::string, output,
-                                    std::vector<std::string>, emField);
-};
-
-class A2AAslashFieldMetadata: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(A2AAslashFieldMetadata,
-                                    std::string, emFieldName);
-};
-
-template <typename T, typename FImpl>
-class AslashFieldKernel: public A2AKernel<T, typename FImpl::FermionField>
-{
-public:
-    typedef typename FImpl::FermionField FermionField;
-public:
-    AslashFieldKernel(const std::vector<LatticeComplex> &emB0,
-                      const std::vector<LatticeComplex> &emB1,
-                      GridBase *grid)
-    : emB0_(emB0), emB1_(emB1), grid_(grid)
-    {
-        vol_ = 1.;
-        for (auto &d: grid_->GlobalDimensions())
-        {
-            vol_ *= d;
-        }
-    }
-
-    virtual ~AslashFieldKernel(void) = default;
-    virtual void operator()(A2AMatrixSet<T> &m, const FermionField *left, 
-                            const FermionField *right,
-                            const unsigned int orthogDim, double &t)
-    {
-        A2Autils<FImpl>::AslashField(m, left, right, emB0_, emB1_, orthogDim, &t);
-    }
-
-    virtual double flops(const unsigned int blockSizei, const unsigned int blockSizej)
-    {
-        return 0.;
-    }
-
-    virtual double bytes(const unsigned int blockSizei, const unsigned int blockSizej)
-    {
-        return 0.;
-    }
-private:
-    const std::vector<LatticeComplex> &emB0_, &emB1_;
-    GridBase                          *grid_;
-    double                            vol_;
-};
-
-template <typename FImpl, typename PhotonImpl>
-class TA2AAslashField: public Module<A2AAslashFieldPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl,);
-    typedef typename PhotonImpl::GaugeField EmField;
-    typedef A2AMatrixBlockComputation<Complex, 
-                                      FermionField, 
-                                      A2AAslashFieldMetadata, 
-                                      ASF_IO_TYPE> Computation;
-    typedef AslashFieldKernel<Complex, FImpl> Kernel;
-public:
-    // constructor
-    TA2AAslashField(const std::string name);
-    // destructor
-    virtual ~TA2AAslashField(void) {};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(A2AAslashField, ARG(TA2AAslashField<FIMPL, PhotonR>), MContraction);
-
-/******************************************************************************
- *                 TA2AAslashField implementation                             *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl, typename PhotonImpl>
-TA2AAslashField<FImpl, PhotonImpl>::TA2AAslashField(const std::string name)
-: Module<A2AAslashFieldPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl, typename PhotonImpl>
-std::vector<std::string> TA2AAslashField<FImpl, PhotonImpl>::getInput(void)
-{
-    std::vector<std::string> in = par().emField;
-    
-    in.push_back(par().left);
-    in.push_back(par().right);
-
-    return in;
-}
-
-template <typename FImpl, typename PhotonImpl>
-std::vector<std::string> TA2AAslashField<FImpl, PhotonImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl, typename PhotonImpl>
-void TA2AAslashField<FImpl, PhotonImpl>::setup(void)
-{
-    envTmp(Computation, "computation", 1, envGetGrid(FermionField), 
-           env().getNd() - 1, par().emField.size(), 1, par().block, 
-           par().cacheBlock, this);
-    envTmp(std::vector<ComplexField>, "B0", 1, 
-           par().emField.size(), envGetGrid(ComplexField));
-    envTmp(std::vector<ComplexField>, "B1", 1, 
-           par().emField.size(), envGetGrid(ComplexField));
-    envTmpLat(ComplexField, "Amu");
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl, typename PhotonImpl>
-void TA2AAslashField<FImpl, PhotonImpl>::execute(void)
-{
-    auto &left  = envGet(std::vector<FermionField>, par().left);
-    auto &right = envGet(std::vector<FermionField>, par().right);
-
-    int nt         = env().getDim().back();
-    int N_i        = left.size();
-    int N_j        = right.size();
-    int nem        = par().emField.size();
-    int block      = par().block;
-    int cacheBlock = par().cacheBlock;
-
-    LOG(Message) << "Computing all-to-all A-slash fields" << std::endl;
-    LOG(Message) << "Left: '" << par().left << "' Right: '" << par().right << "'" << std::endl;
-    LOG(Message) << "EM fields:" << std::endl;
-    for (auto &name: par().emField)
-    {
-        LOG(Message) << "  " << name << std::endl;
-    }
-    LOG(Message) << "A-slash field size: " << nt << "*" << N_i << "*" << N_j 
-                 << " (filesize " << sizeString(nt*N_i*N_j*sizeof(ASF_IO_TYPE)) 
-                 << "/EM field)" << std::endl;
-    
-    // preparing "B" complexified fields
-    startTimer("Complexify EM fields");
-    envGetTmp(std::vector<ComplexField>, B0);
-    envGetTmp(std::vector<ComplexField>, B1);
-    for (unsigned int i = 0; i < par().emField.size(); ++i)
-    {
-        auto &A = envGet(EmField, par().emField[i]);
-        envGetTmp(ComplexField, Amu);
-
-        B0[i]  = peekLorentz(A, 0);
-        B0[i] += timesI(peekLorentz(A, 1));
-        B1[i]  = peekLorentz(A, 2);
-        B1[i] += timesI(peekLorentz(A, 3));
-    }
-    stopTimer("Complexify EM fields");
-
-    // I/O name & metadata lambdas
-    auto ionameFn = [this](const unsigned int em, const unsigned int dummy)
-    {
-        return par().emField[em];
-    };
-
-    auto filenameFn = [this, &ionameFn](const unsigned int em, const unsigned int dummy)
-    {
-        return par().output + "." + std::to_string(vm().getTrajectory()) 
-               + "/" + ionameFn(em, dummy) + ".h5";
-    };
-
-    auto metadataFn = [this](const unsigned int em, const unsigned int dummy)
-    {
-        A2AAslashFieldMetadata md;
-
-        md.emFieldName = par().emField[em];
-        
-        return md;
-    };
-
-    // executing computation
-    Kernel kernel(B0, B1, envGetGrid(FermionField));
-
-    envGetTmp(Computation, computation);
-    computation.execute(left, right, kernel, ionameFn, filenameFn, metadataFn);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_MContraction_A2AAslashField_hpp_
--- a/Hadrons/Modules/MContraction/A2AMesonField.cc
+++ b/Hadrons/Modules/MContraction/A2AMesonField.cc
@@ -33,3 +33,4 @@ using namespace Hadrons;
 using namespace MContraction;

 template class Grid::Hadrons::MContraction::TA2AMesonField<FIMPL>;
+template class Grid::Hadrons::MContraction::TA2AMesonField<ZFIMPL>;
--- a/Hadrons/Modules/MContraction/A2AMesonField.hpp
+++ b/Hadrons/Modules/MContraction/A2AMesonField.hpp
@@ -33,8 +33,12 @@ See the full license in the file "LICENSE" in the top level distribution directo
 #include <Hadrons/Global.hpp>
 #include <Hadrons/Module.hpp>
 #include <Hadrons/ModuleFactory.hpp>
+#include <Hadrons/A2AVectors.hpp>
 #include <Hadrons/A2AMatrix.hpp>
+#include <Hadrons/Modules/MSolver/A2AVectors.hpp>
+#include <Hadrons/Modules/MContraction/A2AMesonFieldKernels.hpp>

+#define MF_PARALLEL_IO
 #ifndef MF_IO_TYPE
 #define MF_IO_TYPE ComplexF
 #endif
@@ -52,8 +56,8 @@ public:
    GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMesonFieldPar,
                                    int, cacheBlock,
                                    int, block,
-                                    std::string, left,
-                                    std::string, right,
+                                    std::string, v,
+                                    std::string, w,
                                    std::string, output,
                                    std::string, gammas,
                                    std::vector<std::string>, mom);
@@ -67,59 +71,22 @@ public:
                                    Gamma::Algebra, gamma);
 };

-template <typename T, typename FImpl>
-class MesonFieldKernel: public A2AKernel<T, typename FImpl::FermionField>
-{
-public:
-    typedef typename FImpl::FermionField FermionField;
-public:
-    MesonFieldKernel(const std::vector<Gamma::Algebra> &gamma,
-                     const std::vector<LatticeComplex> &mom,
-                     GridBase *grid)
-    : gamma_(gamma), mom_(mom), grid_(grid)
-    {
-        vol_ = 1.;
-        for (auto &d: grid_->GlobalDimensions())
-        {
-            vol_ *= d;
-        }
-    }
-
-    virtual ~MesonFieldKernel(void) = default;
-    virtual void operator()(A2AMatrixSet<T> &m, const FermionField *left, 
-                            const FermionField *right,
-                            const unsigned int orthogDim, double &t)
-    {
-        A2Autils<FImpl>::MesonField(m, left, right, gamma_, mom_, orthogDim, &t);
-    }
-
-    virtual double flops(const unsigned int blockSizei, const unsigned int blockSizej)
-    {
-        return vol_*(2*8.0+6.0+8.0*mom_.size())*blockSizei*blockSizej*gamma_.size();
-    }
-
-    virtual double bytes(const unsigned int blockSizei, const unsigned int blockSizej)
-    {
-        return vol_*(12.0*sizeof(T))*blockSizei*blockSizej
-               +  vol_*(2.0*sizeof(T)*mom_.size())*blockSizei*blockSizej*gamma_.size();
-    }
-private:
-    const std::vector<Gamma::Algebra> &gamma_;
-    const std::vector<LatticeComplex> &mom_;
-    GridBase                          *grid_;
-    double                            vol_;
-};
-
 template <typename FImpl>
 class TA2AMesonField : public Module<A2AMesonFieldPar>
 {
 public:
    FERM_TYPE_ALIASES(FImpl,);
-    typedef A2AMatrixBlockComputation<Complex, 
-                                      FermionField, 
-                                      A2AMesonFieldMetadata, 
-                                      MF_IO_TYPE> Computation;
-    typedef MesonFieldKernel<Complex, FImpl> Kernel;
+    SOLVER_TYPE_ALIASES(FImpl,);
+    typedef Eigen::TensorMap<Eigen::Tensor<Complex, 5, Eigen::RowMajor>>    MesonField;
+    typedef Eigen::TensorMap<Eigen::Tensor<MF_IO_TYPE, 5, Eigen::RowMajor>> MesonFieldIo;
+    typedef A2AMatrixIo<MF_IO_TYPE, A2AMesonFieldMetadata>                  MatrixIo;
+    struct IoHelper
+    {
+        MatrixIo              io;
+        A2AMesonFieldMetadata metadata;
+        size_t                offset;
+        unsigned int          i, j, blockSizei, blockSizej;
+    };
 public:
    // constructor
    TA2AMesonField(const std::string name);
@@ -133,13 +100,20 @@ public:
    // execution
    virtual void execute(void);
 private:
-    bool                               hasPhase_{false};
-    std::string                        momphName_;
-    std::vector<Gamma::Algebra>        gamma_;
-    std::vector<std::vector<Real>>     mom_;
+    // IO
+    std::string ioname(const unsigned int m, const unsigned int g) const;
+    std::string filename(const unsigned int m, const unsigned int g) const;
+    void saveBlock(const MF_IO_TYPE *data, IoHelper &h);
+private:
+    bool                                               hasPhase_{false};
+    std::string                                        momphName_;
+    std::vector<Gamma::Algebra>                        gamma_;
+    std::vector<std::vector<Real>>                     mom_;
+    std::vector<IoHelper>                              nodeIo_;
 };

 MODULE_REGISTER(A2AMesonField, ARG(TA2AMesonField<FIMPL>), MContraction);
+MODULE_REGISTER(ZA2AMesonField, ARG(TA2AMesonField<ZFIMPL>), MContraction);

 /******************************************************************************
 *                  TA2AMesonField implementation                             *
@@ -156,7 +130,7 @@ TA2AMesonField<FImpl>::TA2AMesonField(const std::string name)
 template <typename FImpl>
 std::vector<std::string> TA2AMesonField<FImpl>::getInput(void)
 {
-    std::vector<std::string> in = {par().left, par().right};
+    std::vector<std::string> in = {par().v, par().w};

    return in;
 }
@@ -212,31 +186,34 @@ void TA2AMesonField<FImpl>::setup(void)
        }
        mom_.push_back(p);
    }
+    
    envCache(std::vector<ComplexField>, momphName_, 1, 
             par().mom.size(), envGetGrid(ComplexField));
    envTmpLat(ComplexField, "coor");
-    envTmp(Computation, "computation", 1, envGetGrid(FermionField), 
-           env().getNd() - 1, mom_.size(), gamma_.size(), par().block, 
-           par().cacheBlock, this);
+    // preallocate memory for meson field block
+    auto tgp = env().getDim().back()*gamma_.size()*mom_.size();
+
+    envTmp(Vector<MF_IO_TYPE>, "mfBuf", 1, tgp*par().block*par().block);
+    envTmp(Vector<Complex>, "mfCache", 1, tgp*par().cacheBlock*par().cacheBlock);
 }

 // execution ///////////////////////////////////////////////////////////////////
 template <typename FImpl>
 void TA2AMesonField<FImpl>::execute(void)
 {
-    auto &left  = envGet(std::vector<FermionField>, par().left);
-    auto &right = envGet(std::vector<FermionField>, par().right);
+    auto &v = envGet(std::vector<FermionField>, par().v);
+    auto &w = envGet(std::vector<FermionField>, par().w);

    int nt         = env().getDim().back();
-    int N_i        = left.size();
-    int N_j        = right.size();
+    int N_i        = w.size();
+    int N_j        = v.size();
    int ngamma     = gamma_.size();
    int nmom       = mom_.size();
    int block      = par().block;
    int cacheBlock = par().cacheBlock;

    LOG(Message) << "Computing all-to-all meson fields" << std::endl;
-    LOG(Message) << "Left: '" << par().left << "' Right: '" << par().right << "'" << std::endl;
+    LOG(Message) << "W: '" << par().w << "' V: '" << par().v << "'" << std::endl;
    LOG(Message) << "Momenta:" << std::endl;
    for (auto &p: mom_)
    {
@@ -251,6 +228,9 @@ void TA2AMesonField<FImpl>::execute(void)
                 << " (filesize " << sizeString(nt*N_i*N_j*sizeof(MF_IO_TYPE)) 
                 << "/momentum/bilinear)" << std::endl;

+    ///////////////////////////////////////////////
+    // Momentum setup
+    ///////////////////////////////////////////////
    auto &ph = envGet(std::vector<ComplexField>, momphName_);

    if (!hasPhase_)
@@ -273,43 +253,189 @@ void TA2AMesonField<FImpl>::execute(void)
        hasPhase_ = true;
        stopTimer("Momentum phases");
    }
+    
+    //////////////////////////////////////////////////////////////////////////
+    // i,j   is first  loop over SchurBlock factors reusing 5D matrices
+    // ii,jj is second loop over cacheBlock factors for high perf contractoin
+    // iii,jjj are loops within cacheBlock
+    // Total index is sum of these  i+ii+iii etc...
+    //////////////////////////////////////////////////////////////////////////
+    
+    double flops;
+    double bytes;
+    double vol      = env().getVolume();
+    double t_kernel = 0.0;
+    double nodes    = env().getGrid()->NodeCount();
+    double tot_kernel;

-    auto ionameFn = [this](const unsigned int m, const unsigned int g)
+    envGetTmp(Vector<MF_IO_TYPE>, mfBuf);
+    envGetTmp(Vector<Complex>, mfCache);
+    
+    double t0    = usecond();
+    int NBlock_i = N_i/block + (((N_i % block) != 0) ? 1 : 0);
+    int NBlock_j = N_j/block + (((N_j % block) != 0) ? 1 : 0);
+
+    for(int i=0;i<N_i;i+=block)
+    for(int j=0;j<N_j;j+=block)
    {
-        std::stringstream ss;
+        // Get the W and V vectors for this block^2 set of terms
+        int N_ii = MIN(N_i-i,block);
+        int N_jj = MIN(N_j-j,block);

-        ss << gamma_[g] << "_";
-        for (unsigned int mu = 0; mu < mom_[m].size(); ++mu)
+        LOG(Message) << "Meson field block " 
+                    << j/block + NBlock_j*i/block + 1 
+                    << "/" << NBlock_i*NBlock_j << " [" << i <<" .. " 
+                    << i+N_ii-1 << ", " << j <<" .. " << j+N_jj-1 << "]" 
+                    << std::endl;
+
+        MesonFieldIo mfBlock(mfBuf.data(),nmom,ngamma,nt,N_ii,N_jj);
+
+        // Series of cache blocked chunks of the contractions within this block
+        flops = 0.0;
+        bytes = 0.0;
+        for(int ii=0;ii<N_ii;ii+=cacheBlock)
+        for(int jj=0;jj<N_jj;jj+=cacheBlock)
        {
-            ss << mom_[m][mu] << ((mu == mom_[m].size() - 1) ? "" : "_");
+            int N_iii = MIN(N_ii-ii,cacheBlock);
+            int N_jjj = MIN(N_jj-jj,cacheBlock);
+            MesonField mfCacheBlock(mfCache.data(),nmom,ngamma,nt,N_iii,N_jjj);    
+
+            startTimer("contraction: total");
+            makeMesonFieldBlock(mfCacheBlock, &w[i+ii], &v[j+jj], gamma_, ph, 
+                                env().getNd() - 1, this);
+            stopTimer("contraction: total");
+            
+            // flops for general N_c & N_s
+            flops += vol * ( 2 * 8.0 + 6.0 + 8.0*nmom) * N_iii*N_jjj*ngamma;
+            bytes += vol * (12.0 * sizeof(Complex) ) * N_iii*N_jjj
+                     +  vol * ( 2.0 * sizeof(Complex) *nmom ) * N_iii*N_jjj* ngamma;
+
+            startTimer("cache copy");
+            parallel_for_nest5(int m =0;m< nmom;m++)
+            for(int g =0;g< ngamma;g++)
+            for(int t =0;t< nt;t++)
+            for(int iii=0;iii< N_iii;iii++)
+            for(int jjj=0;jjj< N_jjj;jjj++)
+            {
+                mfBlock(m,g,t,ii+iii,jj+jjj) = mfCacheBlock(m,g,t,iii,jjj);
+            }
+            stopTimer("cache copy");
        }

-        return ss.str();
-    };
+        // perf
+        tot_kernel = getDTimer("contraction: colour trace & mom.")
+                     + getDTimer("contraction: local space sum");
+        t_kernel   = tot_kernel - t_kernel;
+        LOG(Message) << "Kernel perf " << flops/t_kernel/1.0e3/nodes 
+                     << " Gflop/s/node " << std::endl;
+        LOG(Message) << "Kernel perf " << bytes/t_kernel*1.0e6/1024/1024/1024/nodes 
+                     << " GB/s/node "  << std::endl;
+        t_kernel = tot_kernel;

-    auto filenameFn = [this, &ionameFn](const unsigned int m, const unsigned int g)
-    {
-        return par().output + "." + std::to_string(vm().getTrajectory()) 
-               + "/" + ionameFn(m, g) + ".h5";
-    };
-
-    auto metadataFn = [this](const unsigned int m, const unsigned int g)
-    {
-        A2AMesonFieldMetadata md;
-
-        for (auto pmu: mom_[m])
+        // IO
+        if (!par().output.empty())
        {
-            md.momentum.push_back(pmu);
-        }
-        md.gamma = gamma_[g];
+            double       blockSize, ioTime;
+            unsigned int myRank = env().getGrid()->ThisRank(),
+                         nRank  = env().getGrid()->RankCount();
        
-        return md;
-    };
+            LOG(Message) << "Writing block to disk" << std::endl;
+            ioTime = -getDTimer("IO: write block");
+            startTimer("IO: total");
+            makeFileDir(filename(0, 0), env().getGrid());
+#ifdef MF_PARALLEL_IO
+            env().getGrid()->Barrier();
+            nodeIo_.clear();
+            for(int f = myRank; f < nmom*ngamma; f += nRank)
+            {
+                const unsigned int    m = f/ngamma, g = f % ngamma;
+                IoHelper              h;

-    Kernel      kernel(gamma_, ph, envGetGrid(FermionField));
+                h.io = MatrixIo(filename(m, g), ioname(m, g), nt, N_i, N_j);
+                for (auto pmu: mom_[m])
+                {
+                    h.metadata.momentum.push_back(pmu);
+                }
+                h.metadata.gamma = gamma_[g];
+                h.i              = i;
+                h.j              = j;
+                h.blockSizei     = mfBlock.dimension(3);
+                h.blockSizej     = mfBlock.dimension(4);
+                h.offset         = (m*ngamma + g)*nt*h.blockSizei*h.blockSizej;
+                nodeIo_.push_back(h);
+            }
+            // parallel IO
+            for (auto &h: nodeIo_)
+            {
+                saveBlock(mfBlock.data(), h);
+            }
+            env().getGrid()->Barrier();
+#else
+            // serial IO
+            for(int m = 0; m < nmom; m++)
+            for(int g = 0; g < ngamma; g++)
+            {
+                IoHelper h;

-    envGetTmp(Computation, computation);
-    computation.execute(left, right, kernel, ionameFn, filenameFn, metadataFn);
+                h.io = MatrixIo(filename(m, g), ioname(m, g), nt, N_i, N_j);
+                for (auto pmu: mom_[m])
+                {
+                    h.metadata.momentum.push_back(pmu);
+                }
+                h.metadata.gamma = gamma_[g];
+                h.i              = i;
+                h.j              = j;
+                h.blockSizei     = mfBlock.dimension(3);
+                h.blockSizej     = mfBlock.dimension(4);
+                h.offset         = (m*ngamma + g)*nt*h.blockSizei*h.blockSizej;
+                saveBlock(mfBlock.data(), h);
+            }
+#endif
+            stopTimer("IO: total");
+            blockSize  = static_cast<double>(nmom*ngamma*nt*N_ii*N_jj*sizeof(MF_IO_TYPE));
+            ioTime    += getDTimer("IO: write block");
+            LOG(Message) << "HDF5 IO done " << sizeString(blockSize) << " in "
+                         << ioTime  << " us (" 
+                         << blockSize/ioTime*1.0e6/1024/1024
+                         << " MB/s)" << std::endl;
+        }
+    }
+}
+
+// IO
+template <typename FImpl>
+std::string TA2AMesonField<FImpl>::ioname(unsigned int m, unsigned int g) const
+{
+    std::stringstream ss;
+
+    ss << gamma_[g] << "_";
+    for (unsigned int mu = 0; mu < mom_[m].size(); ++mu)
+    {
+        ss << mom_[m][mu] << ((mu == mom_[m].size() - 1) ? "" : "_");
+    }
+
+    return ss.str();
+}
+
+template <typename FImpl>
+std::string TA2AMesonField<FImpl>::filename(unsigned int m, unsigned int g) const
+{
+    return par().output + "." + std::to_string(vm().getTrajectory()) 
+           + "/" + ioname(m, g) + ".h5";
+}
+
+template <typename FImpl>
+void TA2AMesonField<FImpl>::saveBlock(const MF_IO_TYPE *data, IoHelper &h)
+{
+    if ((h.i == 0) and (h.j == 0))
+    {
+        startTimer("IO: file creation");
+        h.io.initFile(h.metadata, par().block);
+        stopTimer("IO: file creation");
+    }
+    startTimer("IO: write block");
+    h.io.saveBlock(data + h.offset, h.i, h.j, h.blockSizei, h.blockSizej);
+    stopTimer("IO: write block");
 }

 END_MODULE_NAMESPACE
--- a/Hadrons/Modules/MContraction/A2AMesonFieldKernels.hpp
+++ b/Hadrons/Modules/MContraction/A2AMesonFieldKernels.hpp
@@ -0,0 +1,224 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: Hadrons/Modules/MContraction/A2AMesonFieldKernels.hpp
+
+Copyright (C) 2015-2018
+
+Author: Antonin Portelli <antonin.portelli@me.com>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef Hadrons_MContraction_A2AMesonFieldKernels_hpp_
+#define Hadrons_MContraction_A2AMesonFieldKernels_hpp_
+
+#include <Hadrons/Global.hpp>
+#include <Hadrons/Module.hpp>
+#include <Grid/Eigen/unsupported/CXX11/Tensor>
+
+BEGIN_HADRONS_NAMESPACE
+
+BEGIN_MODULE_NAMESPACE(MContraction)
+
+////////////////////////////////////////////////////////////////////////////////
+// Cache blocked arithmetic routine
+// Could move to Grid ???
+////////////////////////////////////////////////////////////////////////////////
+template <typename Field, typename MesonField>
+void makeMesonFieldBlock(MesonField &mat, 
+                         const Field *lhs_wi,
+                         const Field *rhs_vj,
+                         std::vector<Gamma::Algebra> gamma,
+                         const std::vector<LatticeComplex> &mom,
+                         int orthogdim,
+                         ModuleBase *caller = nullptr) 
+{
+    typedef typename Field::vector_object vobj;
+    typedef typename vobj::scalar_object  sobj;
+    typedef typename vobj::scalar_type    scalar_type;
+    typedef typename vobj::vector_type    vector_type;
+
+    typedef iSpinMatrix<vector_type> SpinMatrix_v;
+    typedef iSpinMatrix<scalar_type> SpinMatrix_s;
+    
+    int Lblock = mat.dimension(3); 
+    int Rblock = mat.dimension(4);
+
+    GridBase *grid = lhs_wi[0]._grid;
+    
+    const int    Nd = grid->_ndimension;
+    const int Nsimd = grid->Nsimd();
+
+    int Nt     = grid->GlobalDimensions()[orthogdim];
+    int Ngamma = gamma.size();
+    int Nmom   = mom.size();
+
+    int fd=grid->_fdimensions[orthogdim];
+    int ld=grid->_ldimensions[orthogdim];
+    int rd=grid->_rdimensions[orthogdim];
+
+    // will locally sum vectors first
+    // sum across these down to scalars
+    // splitting the SIMD
+    int MFrvol = rd*Lblock*Rblock*Nmom;
+    int MFlvol = ld*Lblock*Rblock*Nmom;
+
+    Vector<SpinMatrix_v > lvSum(MFrvol);
+    parallel_for (int r = 0; r < MFrvol; r++)
+    {
+        lvSum[r] = zero;
+    }
+
+    Vector<SpinMatrix_s > lsSum(MFlvol);             
+    parallel_for (int r = 0; r < MFlvol; r++)
+    {
+        lsSum[r]=scalar_type(0.0);
+    }
+
+    int e1=    grid->_slice_nblock[orthogdim];
+    int e2=    grid->_slice_block [orthogdim];
+    int stride=grid->_slice_stride[orthogdim];
+
+    if (caller) caller->startTimer("contraction: colour trace & mom.");
+    // Nested parallelism would be ok
+    // Wasting cores here. Test case r
+    parallel_for(int r=0;r<rd;r++)
+    {
+        int so=r*grid->_ostride[orthogdim]; // base offset for start of plane 
+
+        for(int n=0;n<e1;n++)
+        for(int b=0;b<e2;b++)
+        {
+            int ss= so+n*stride+b;
+
+            for(int i=0;i<Lblock;i++)
+            {
+                auto left = conjugate(lhs_wi[i]._odata[ss]);
+
+                for(int j=0;j<Rblock;j++)
+                {
+                    SpinMatrix_v vv;
+                    auto right = rhs_vj[j]._odata[ss];
+
+                    for(int s1=0;s1<Ns;s1++)
+                    for(int s2=0;s2<Ns;s2++)
+                    {
+                        vv()(s1,s2)() = left()(s2)(0) * right()(s1)(0)
+                                        + left()(s2)(1) * right()(s1)(1)
+                                        + left()(s2)(2) * right()(s1)(2);
+                    }
+                    
+                    // After getting the sitewise product do the mom phase loop
+                    int base = Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*r;
+
+                    for ( int m=0;m<Nmom;m++)
+                    {
+                        int idx = m+base;
+                        auto phase = mom[m]._odata[ss];
+                        mac(&lvSum[idx],&vv,&phase);
+                    }
+                }
+            }
+        }
+    }
+    if (caller) caller->stopTimer("contraction: colour trace & mom.");
+
+    // Sum across simd lanes in the plane, breaking out orthog dir.
+    if (caller) caller->startTimer("contraction: local space sum");
+    parallel_for(int rt=0;rt<rd;rt++)
+    {
+        std::vector<int> icoor(Nd);
+        std::vector<SpinMatrix_s> extracted(Nsimd);               
+
+        for(int i=0;i<Lblock;i++)
+        for(int j=0;j<Rblock;j++)
+        for(int m=0;m<Nmom;m++)
+        {
+
+            int ij_rdx = m+Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*rt;
+
+            extract(lvSum[ij_rdx],extracted);
+            for(int idx=0;idx<Nsimd;idx++)
+            {
+                grid->iCoorFromIindex(icoor,idx);
+
+                int ldx    = rt+icoor[orthogdim]*rd;
+                int ij_ldx = m+Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*ldx;
+
+                lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx];
+            }
+        }
+    }
+    if (caller) caller->stopTimer("contraction: local space sum");
+
+    // ld loop and local only??
+    if (caller) caller->startTimer("contraction: spin trace");
+    int pd = grid->_processors[orthogdim];
+    int pc = grid->_processor_coor[orthogdim];
+    parallel_for_nest2(int lt=0;lt<ld;lt++)
+    {
+        for(int pt=0;pt<pd;pt++)
+        {
+            int t = lt + pt*ld;
+            if (pt == pc)
+            {
+                for(int i=0;i<Lblock;i++)
+                for(int j=0;j<Rblock;j++)
+                for(int m=0;m<Nmom;m++)
+                {
+                    int ij_dx = m+Nmom*i + Nmom*Lblock * j + Nmom*Lblock * Rblock * lt;
+
+                    for(int mu=0;mu<Ngamma;mu++)
+                    {
+                        // this is a bit slow
+                        mat(m,mu,t,i,j) = trace(lsSum[ij_dx]*Gamma(gamma[mu]));
+                    }
+                }
+            } 
+            else 
+            { 
+                const scalar_type zz(0.0);
+
+                for(int i=0;i<Lblock;i++)
+                for(int j=0;j<Rblock;j++)
+                for(int mu=0;mu<Ngamma;mu++)
+                for(int m=0;m<Nmom;m++)
+                {
+                    mat(m,mu,t,i,j) =zz;
+                }
+            }
+        }
+    }
+    if (caller) caller->stopTimer("contraction: spin trace");
+    ////////////////////////////////////////////////////////////////////
+    // This global sum is taking as much as 50% of time on 16 nodes
+    // Vector size is 7 x 16 x 32 x 16 x 16 x sizeof(complex) = 2MB - 60MB depending on volume
+    // Healthy size that should suffice
+    ////////////////////////////////////////////////////////////////////
+    if (caller) caller->startTimer("contraction: global sum");
+    grid->GlobalSumVector(&mat(0,0,0,0,0),Nmom*Ngamma*Nt*Lblock*Rblock);
+    if (caller) caller->stopTimer("contraction: global sum");
+}
+
+END_MODULE_NAMESPACE
+
+END_HADRONS_NAMESPACE
+
+#endif //Hadrons_MContraction_A2AMesonField_hpp_
--- a/Hadrons/Modules/MGauge/GaugeFix.cc
+++ b/Hadrons/Modules/MGauge/GaugeFix.cc
@@ -1,36 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MGauge/GaugeFix.cc
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Hadrons/Modules/MGauge/GaugeFix.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MGauge;
-
-template class Grid::Hadrons::MGauge::TGaugeFix<GIMPL>;
--- a/Hadrons/Modules/MGauge/GaugeFix.hpp
+++ b/Hadrons/Modules/MGauge/GaugeFix.hpp
@@ -1,135 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MGauge/GaugeFix.hpp
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_MGaugeFix_hpp_
-#define Hadrons_MGaugeFix_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-#include <Grid/qcd/utils/GaugeFix.h>
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                              Fix gauge                                    *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MGauge)
-
-class GaugeFixPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(GaugeFixPar,
-                                    std::string, gauge,
-                                    Real,  alpha,
-                                    int, maxiter, 
-                                    Real, Omega_tol, 
-                                    Real, Phi_tol,
-                                    bool, Fourier);
-};
-
-template <typename GImpl>
-class TGaugeFix: public Module<GaugeFixPar>
-{
-public:
-    GAUGE_TYPE_ALIASES(GImpl,);
-public:
-    // constructor
-    TGaugeFix(const std::string name);
-    // destructor
-    virtual ~TGaugeFix(void) {};
-    // dependencies/products
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(GaugeFix, TGaugeFix<GIMPL>, MGauge);
-
-/******************************************************************************
-*                            TGaugeFix implementation                             *
-******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename GImpl>
-TGaugeFix<GImpl>::TGaugeFix(const std::string name)
-: Module<GaugeFixPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename GImpl>
-std::vector<std::string> TGaugeFix<GImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().gauge};
-    return in;
-}
-
-template <typename GImpl>
-std::vector<std::string> TGaugeFix<GImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename GImpl>
-void TGaugeFix<GImpl>::setup(void)
-{
-    envCreateLat(GaugeField, getName());
-}
-
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename GImpl>
-void TGaugeFix<GImpl>::execute(void)
-//Loads the gauge and fixes it
-{
-    std::cout << "executing" << std::endl;
-    LOG(Message) << "Fixing the Gauge" << std::endl;
-    LOG(Message) << par().gauge << std::endl;
-    auto &U     = envGet(GaugeField, par().gauge);
-    auto &Umu   = envGet(GaugeField, getName());
-    LOG(Message) << "Gauge Field fetched" << std::endl;
-    //do we allow maxiter etc to be user set?
-    Real alpha     = par().alpha;
-    int  maxiter   = par().maxiter;
-    Real Omega_tol = par().Omega_tol;
-    Real Phi_tol   = par().Phi_tol;
-    bool Fourier   = par().Fourier;
-    FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(U,alpha,maxiter,Omega_tol,Phi_tol,Fourier);
-    Umu = U;
-    LOG(Message) << "Gauge Fixed" << std::endl;
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_MGaugeFix_hpp_
--- a/Hadrons/Modules/MIO/LoadEigenPack.cc
+++ b/Hadrons/Modules/MIO/LoadEigenPack.cc
@@ -32,4 +32,4 @@ using namespace Hadrons;
 using namespace MIO;

 template class Grid::Hadrons::MIO::TLoadEigenPack<FermionEigenPack<FIMPL>>;
-template class Grid::Hadrons::MIO::TLoadEigenPack<FermionEigenPack<FIMPL, FIMPLF>>;
+
--- a/Hadrons/Modules/MIO/LoadEigenPack.hpp
+++ b/Hadrons/Modules/MIO/LoadEigenPack.hpp
@@ -54,9 +54,7 @@ template <typename Pack>
 class TLoadEigenPack: public Module<LoadEigenPackPar>
 {
 public:
-    typedef typename Pack::Field   Field;
-    typedef typename Pack::FieldIo FieldIo;
-    typedef BaseEigenPack<Field>   BasePack;
+    typedef EigenPack<typename Pack::Field> BasePack;
 public:
    // constructor
    TLoadEigenPack(const std::string name);
@@ -72,7 +70,6 @@ public:
 };

 MODULE_REGISTER_TMP(LoadFermionEigenPack, TLoadEigenPack<FermionEigenPack<FIMPL>>, MIO);
-MODULE_REGISTER_TMP(LoadFermionEigenPackIo32, ARG(TLoadEigenPack<FermionEigenPack<FIMPL, FIMPLF>>), MIO);

 /******************************************************************************
 *                    TLoadEigenPack implementation                           *
@@ -104,14 +101,9 @@ std::vector<std::string> TLoadEigenPack<Pack>::getOutput(void)
 template <typename Pack>
 void TLoadEigenPack<Pack>::setup(void)
 {
-    GridBase *gridIo = nullptr;
-
-    if (typeHash<Field>() != typeHash<FieldIo>())
-    {
-        gridIo = envGetRbGrid(FieldIo, par().Ls);
-    }
+    env().createGrid(par().Ls);
    envCreateDerived(BasePack, Pack, getName(), par().Ls, par().size, 
-                     envGetRbGrid(Field, par().Ls), gridIo);
+                     env().getRbGrid(par().Ls));
 }

 // execution ///////////////////////////////////////////////////////////////////
--- a/Hadrons/Modules/MNPR/Amputate.cc
+++ b/Hadrons/Modules/MNPR/Amputate.cc
@@ -1,35 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MNPR/Amputate.cc
-
-Copyright (C) 2015-2018
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MNPR/Amputate.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MNPR;
-
-template class Grid::Hadrons::MNPR::TAmputate<FIMPL,FIMPL>;
-
--- a/Hadrons/Modules/MNPR/Amputate.hpp
+++ b/Hadrons/Modules/MNPR/Amputate.hpp
@@ -1,200 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MNPR/Amputate.hpp
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Julia Kettle J.R.Kettle-2@sms.ed.ac.uk
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Amputate_hpp_
-#define Hadrons_Amputate_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-#include <Grid/Eigen/LU>
-//#include <Grid/qcd/utils/PropagatorUtils.h>
-//#include <Grid/serialisation/Serialisation.h>
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                                TAmputate                                       *
-        Performs bilinear contractions of the type tr[g5*adj(Sout)*g5*G*Sin]
-        Suitable for non exceptional momenta
-******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MNPR)
-
-class AmputatePar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(AmputatePar,
-                                    std::string,    Sin, //need to make this a propogator type?
-                                    std::string,    Sout, //same
-                                    std::string,    vertex,
-                                    std::string,    pin,
-                                    std::string,    pout,
-                                    std::string,    output,
-                                    std::string,    input);
-};
-
-template <typename FImpl1, typename FImpl2>
-class TAmputate: public Module<AmputatePar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl1, 1);
-    FERM_TYPE_ALIASES(FImpl2, 2);
-    class Result: Serializable
-    {
-    public:
-        GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
-                                        std::vector<Complex>, Vamp,
-                                        ); 
-    };
-public:
-    // constructor
-    TAmputate(const std::string name);
-    // destructor
-    virtual ~TAmputate(void) {};
-    // dependencies/products
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    virtual SpinColourMatrix invertspincolmat(SpinColourMatrix &scmat);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(Amputate, ARG(TAmputate<FIMPL, FIMPL>), MNPR);
-
-/******************************************************************************
- *                           TAmputate implementation                            *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-TAmputate<FImpl1, FImpl2>::TAmputate(const std::string name)
-: Module<AmputatePar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-std::vector<std::string> TAmputate<FImpl1, FImpl2>::getInput(void)
-{
-    std::vector<std::string> input = {par().Sin, par().Sout, par().vertex};
-    
-    return input;
-}
-
-template <typename FImpl1, typename FImpl2>
-std::vector<std::string> TAmputate<FImpl1, FImpl2>::getOutput(void)
-{
-    std::vector<std::string> output = {getName()};
-    
-    
-    return output;
-}
-
-// Invert spin colour matrix using Eigen
-template <typename Fimpl1, typename Fimpl2>
-SpinColourMatrix TAmputate<Fimpl1, Fimpl2>::invertspincolmat(SpinColourMatrix &scmat)
-{
-    Eigen::MatrixXcf scmat_2d(Ns*Nc,Ns*Nc);
-    for(int ic=0; ic<Nc; ic++){
-    for(int jc=0; jc<Nc; jc++){
-        for(int is=0; is<Ns; is++){
-        for(int js=0; js<Ns; js++){
-            scmat_2d(Ns*ic+is,Ns*jc+js) = scmat()(is,js)(ic,jc);
-        }}
-    }}      
-    Eigen::MatrixXcf scmat_2d_inv = scmat_2d.inverse();
-    SpinColourMatrix scmat_inv;
-    for(int ic=0; ic<Nc; ic++){
-    for(int jc=0; jc<Nc; jc++){
-        for(int is=0; is<Ns; is++){
-        for(int js=0; js<Ns; js++){
-            scmat_inv()(is,js)(ic,jc) = scmat_2d_inv(Ns*ic+is,Ns*jc+js);
-        }}
-    }}      
-    return scmat_inv;
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-void TAmputate<FImpl1, FImpl2>::execute(void)
-{
-    LOG(Message) << "Computing bilinear amputations '" << getName() << "' using"
-                 << " momentum '" << par().Sin << "' and '" << par().Sout << "'"
-                 << std::endl;
-    BinaryWriter                    writer(par().output);
-    PropagatorField1                &Sin = *env().template getObject<PropagatorField1>(par().Sin); //Do these have the phases taken into account?? Don't think so. FIX
-    PropagatorField2                &Sout = *env().template getObject<PropagatorField2>(par().Sout);
-    std::vector<int>                pin  = strToVec<int>(par().pin), pout = strToVec<int>(par().pout);
-    std::vector<Real>               latt_size(pin.begin(), pin.end()); 
-    LatticeComplex                  pdotxin(env().getGrid()), pdotxout(env().getGrid()), coor(env().getGrid());
-    LOG(Message) << "Propagators set up " << std::endl;
-    std::vector<SpinColourMatrix>   vertex; // Let's read from file here
-    Gamma                           g5(Gamma::Algebra::Gamma5);
-    Result                          result;
-    LOG(Message) << "reading file - "  << par().input << std::endl;
-    BinaryReader                    reader(par().input); 
-    Complex                         Ci(0.0,1.0);
-
-    std::string svertex;
-    read(reader,"vertex", vertex);
-    LOG(Message) << "vertex read" << std::endl;
-
-    pdotxin=zero;
-    pdotxout=zero;
-    for (unsigned int mu = 0; mu < 4; ++mu)
-    {
-        Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
-        LatticeCoordinate(coor,mu);
-        pdotxin = pdotxin +(TwoPiL * pin[mu]) * coor;
-        pdotxout= pdotxout +(TwoPiL * pout[mu]) * coor;
-    }
-    Sin = Sin*exp(-Ci*pdotxin); //phase corrections
-    Sout = Sout*exp(-Ci*pdotxout);
-
-    SpinColourMatrix Sin_mom = sum(Sin);
-    SpinColourMatrix Sout_mom = sum(Sout);
-    LOG(Message) << "summed over lattice" << std::endl;
-   
-    LOG(Message) << "Lattice -> spincolourmatrix conversion" << std::endl;
-
-    SpinColourMatrix Sin_inv = invertspincolmat(Sin_mom);
-    SpinColourMatrix Sout_inv = invertspincolmat(Sout_mom);
-    LOG(Message) << "Inversions done" << std::endl;
-
-    result.Vamp.resize(Gamma::nGamma/2);
-    for( int mu=0; mu < Gamma::nGamma/2; mu++){
-        Gamma::Algebra gam = mu;
-        result.Vamp[mu] = 1/12.0*trace(adj(Gamma(mu*2+1))*g5*Sout_inv*g5*vertex[mu]*Sin_inv);
-        LOG(Message) << "Vamp[" << mu << "] - " << result.Vamp[mu] << std::endl;
-        }
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Amputate_hpp_
--- a/Hadrons/Modules/MNPR/Bilinear.cc
+++ b/Hadrons/Modules/MNPR/Bilinear.cc
@@ -1,35 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MNPR/Bilinear.cc
-
-Copyright (C) 2015-2018
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MNPR/Bilinear.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MNPR;
-
-template class Grid::Hadrons::MNPR::TBilinear<FIMPL,FIMPL>;
-
--- a/Hadrons/Modules/MNPR/Bilinear.hpp
+++ b/Hadrons/Modules/MNPR/Bilinear.hpp
@@ -1,225 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MNPR/Bilinear.hpp
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Julia Kettle J.R.Kettle-2@sms.ed.ac.uk
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Bilinear_hpp_
-#define Hadrons_Bilinear_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-//#include <Grid/qcd/utils/PropagatorUtils.h>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                                TBilinear                                       *
-        Performs bilinear contractions of the type tr[g5*adj(Sout)*g5*G*Sin]
-        Suitable for non exceptional momenta in Rome-Southampton NPR
-******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MNPR)
-
-class BilinearPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(BilinearPar,
-                                    std::string,    Sin,
-                                    std::string,    Sout,
-                                    std::string,    pin,
-                                    std::string,    pout,
-                                    std::string,    output);
-};
-
-template <typename FImpl1, typename FImpl2>
-class TBilinear: public Module<BilinearPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl1, 1);
-    FERM_TYPE_ALIASES(FImpl2, 2);
-    class Result: Serializable
-    {
-    public:
-        GRID_SERIALIZABLE_CLASS_MEMBERS(Result, 
-                                        std::vector<SpinColourMatrix>, bilinear);
-    };
-public:
-    // constructor
-    TBilinear(const std::string name);
-    // destructor
-    virtual ~TBilinear(void) {};
-    // dependencies/products
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    //LatticeSpinColourMatrix PhaseProps(LatticeSpinColourMatrix S, std::vector<Real> p);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(Bilinear, ARG(TBilinear<FIMPL, FIMPL>), MNPR);
-
-/******************************************************************************
- *                           TBilinear implementation                            *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-TBilinear<FImpl1, FImpl2>::TBilinear(const std::string name)
-: Module<BilinearPar>(name)
-{}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-void TBilinear<FImpl1, FImpl2>::setup(void)
-{
-    //env().template registerLattice<LatticeSpinColourMatrix>(getName());
-    //env().template registerObject<SpinColourMatrix>(getName());
-}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-std::vector<std::string> TBilinear<FImpl1, FImpl2>::getInput(void)
-{
-    std::vector<std::string> input = {par().Sin, par().Sout};
-    
-    return input;
-}
-
-template <typename FImpl1, typename FImpl2>
-std::vector<std::string> TBilinear<FImpl1, FImpl2>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-/*
-/////Phase propagators//////////////////////////
-template <typename FImpl1, typename FImpl2>
-LatticeSpinColourMatrix TBilinear<FImpl1, FImpl2>::PhaseProps(LatticeSpinColourMatrix S, std::vector<Real> p)
-{
-    GridBase *grid = S._grid;
-    LatticeComplex      pdotx(grid),  coor(grid);
-    std::vector<int>   latt_size = grid->_fdimensions; 
-    Complex             Ci(0.0,1.0);
-    pdotx=zero;
-    for (unsigned int mu = 0; mu < 4; ++mu)
-    {
-        Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
-        LatticeCoordinate(coor,mu);
-        pdotx = pdotx +(TwoPiL * p[mu]) * coor;
-    }
-    S = S*exp(-Ci*pdotx);
-    return S;
-}
-*/
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-void TBilinear<FImpl1, FImpl2>::execute(void)
-{
-/**************************************************************************
-
-Compute the bilinear vertex needed for the NPR.
-V(G) = sum_x  [ g5 * adj(S'(x,p2)) * g5 * G * S'(x,p1) ]_{si,sj,ci,cj}
-G is one of the 16 gamma vertices [I,gmu,g5,g5gmu,sig(mu,nu)]
-
-        * G
-       / \
-    p1/   \p2
-     /     \
-    /       \
-
-Returns a spin-colour matrix, with indices si,sj, ci,cj
-
-Conventions:
-p1 - incoming momenta
-p2 - outgoing momenta
-q = (p1-p2)
-**************************************************************************/
-
-    LOG(Message) << "Computing bilinear contractions '" << getName() << "' using"
-                 << " momentum '" << par().Sin << "' and '" << par().Sout << "'"
-                 << std::endl;
-     
-    BinaryWriter             writer(par().output);
-    
-
-    // Propogators
-    LatticeSpinColourMatrix     &Sin = *env().template getObject<LatticeSpinColourMatrix>(par().Sin);
-    LatticeSpinColourMatrix     &Sout = *env().template getObject<LatticeSpinColourMatrix>(par().Sout);
-    LatticeComplex              pdotxin(env().getGrid()), pdotxout(env().getGrid()), coor(env().getGrid());
-    // momentum on legs
-    std::vector<Real>           pin  = strToVec<Real>(par().pin), pout = strToVec<Real>(par().pout);
-    std::vector<Real>           latt_size(pin.begin(), pin.end()); 
-    //bilinears
-    LatticeSpinColourMatrix     bilinear_x(env().getGrid());
-    SpinColourMatrix            bilinear;
-    Gamma                       g5(Gamma::Algebra::Gamma5);
-    Result                      result;
-    Complex                     Ci(0.0,1.0);
-
-    //
-
-    pdotxin=zero;
-    pdotxout=zero;
-    for (unsigned int mu = 0; mu < 4; ++mu)
-    {
-        Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
-        LatticeCoordinate(coor,mu);
-        pdotxin = pdotxin +(TwoPiL * pin[mu]) * coor;
-        pdotxout= pdotxout +(TwoPiL * pout[mu]) * coor;
-    }
-    Sin = Sin*exp(-Ci*pdotxin); //phase corrections
-    Sout = Sout*exp(-Ci*pdotxout);
-    
-    ////Set up gamma vector//////////////////////////
-    std::vector<Gamma> gammavector;
-    for( int i=0; i<Gamma::nGamma; i++){
-        Gamma::Algebra gam = i;
-        gammavector.push_back(Gamma(gam));
-    }
-    result.bilinear.resize(Gamma::nGamma);
-    /////////////////////////////////////////////////
-    //LatticeSpinMatrix temp = g5*Sout;
-    ////////Form Vertex//////////////////////////////
-    for (int i=0; i < Gamma::nGamma; i++){
-        bilinear_x = g5*adj(Sout)*g5*gammavector[i]*Sin; 
-        result.bilinear[i] = sum(bilinear_x); //sum over lattice sites
-    }
-    //////////////////////////////////////////////////
-    write(writer, par().output, result.bilinear);
-    LOG(Message) << "Complete. Writing results to " << par().output << std:: endl;
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Bilinear_hpp_
--- a/Hadrons/Modules/MNPR/FourQuark.cc
+++ b/Hadrons/Modules/MNPR/FourQuark.cc
@@ -1,35 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MNPR/FourQuark.cc
-
-Copyright (C) 2015-2018
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MNPR/FourQuark.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MNPR;
-
-template class Grid::Hadrons::MNPR::TFourQuark<FIMPL,FIMPL>;
-
--- a/Hadrons/Modules/MNPR/FourQuark.hpp
+++ b/Hadrons/Modules/MNPR/FourQuark.hpp
@@ -1,274 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MNPR/FourQuark.hpp
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Julia Kettle J.R.Kettle-2@sms.ed.ac.uk
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_FourQuark_hpp_
-#define Hadrons_FourQuark_hpp_
-
-#include <typeinfo>
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-#include <Grid/serialisation/Serialisation.h>
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                                TFourQuark                                       *
-        Performs fourquark contractions of the type tr[g5*adj(Sout)*g5*G*Sin]
-        Suitable for non exceptional momenta
-******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MNPR)
-
-class FourQuarkPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(FourQuarkPar,
-                                    std::string,    Sin, //need to make this a propogator type?
-                                    std::string,    Sout, //same
-                                    std::string,    pin,
-                                    std::string,    pout,
-                                    bool,           fullbasis,
-                                    std::string,    output);
-};
-
-template <typename FImpl1, typename FImpl2>
-class TFourQuark: public Module<FourQuarkPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl1, 1);
-    FERM_TYPE_ALIASES(FImpl2, 2);
-    class Result: Serializable
-    {
-    public:
-        GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
-                                        std::vector<SpinColourSpinColourMatrix>, fourquark);
-    };
-public:
-    // constructor
-    TFourQuark(const std::string name);
-    // destructor
-    virtual ~TFourQuark(void) {};
-    // dependencies/products
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void tensorprod(LatticeSpinColourSpinColourMatrix &lret, LatticeSpinColourMatrix a, LatticeSpinColourMatrix b);
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(FourQuark, ARG(TFourQuark<FIMPL, FIMPL>), MNPR);
-
-/******************************************************************************
- *                           TFourQuark implementation                            *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-TFourQuark<FImpl1, FImpl2>::TFourQuark(const std::string name)
-: Module<FourQuarkPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-std::vector<std::string> TFourQuark<FImpl1, FImpl2>::getInput(void)
-{
-    std::vector<std::string> input = {par().Sin, par().Sout};
-    
-    return input;
-}
-
-template <typename FImpl1, typename FImpl2>
-std::vector<std::string> TFourQuark<FImpl1, FImpl2>::getOutput(void)
-{
-    std::vector<std::string> output = {getName()};
-    
-    return output;
-}
-
-
-template <typename FImpl1, typename FImpl2>
-void TFourQuark<FImpl1, FImpl2>::tensorprod(LatticeSpinColourSpinColourMatrix &lret, LatticeSpinColourMatrix a, LatticeSpinColourMatrix b)
-{
-#if 0
-            parallel_for(auto site=lret.begin();site<lret.end();site++) {
-                for (int si; si < 4; ++si){
-                for(int sj; sj <4; ++sj){
-                    for (int ci; ci < 3; ++ci){
-                    for (int cj; cj < 3; ++cj){
-                        for (int sk; sk < 4; ++sk){
-                        for(int sl; sl <4; ++sl){
-                            for (int ck; ck < 3; ++ck){
-                            for (int cl; cl < 3; ++cl){
-                        lret[site]()(si,sj)(ci,cj)(sk,sl)(ck,cl)=a[site]()(si,sj)(ci,cj)*b[site]()(sk,sl)(ck,cl);
-                            }}
-                        }}
-                    }}
-                }}
-        }
-#else 
-            // FIXME ; is there a general need for this construct ? In which case we should encapsulate the
-            //         below loops in a helper function.
-            //LOG(Message) << "sp co mat a is - " << a << std::endl;
-            //LOG(Message) << "sp co mat b is - " << b << std::endl;
-            parallel_for(auto site=lret.begin();site<lret.end();site++) {
-            vTComplex left;
-                for(int si=0; si < Ns; ++si){
-                for(int sj=0; sj < Ns; ++sj){
-                    for (int ci=0; ci < Nc; ++ci){
-                    for (int cj=0; cj < Nc; ++cj){
-                      //LOG(Message) << "si, sj, ci, cj -  " << si << ", " << sj  << ", "<< ci  << ", "<< cj << std::endl;
-                      left()()() = a[site]()(si,sj)(ci,cj);
-                      //LOG(Message) << left << std::endl;
-                      lret[site]()(si,sj)(ci,cj)=left()*b[site]();
-                    }}
-                }}
-            }
-#endif      
-}
-
-
-
-
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-void TFourQuark<FImpl1, FImpl2>::setup(void)
-{
-    envCreateLat(LatticeSpinColourMatrix, getName());
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-void TFourQuark<FImpl1, FImpl2>::execute(void)
-{
-
-/*********************************************************************************
-
-TFourQuark : Creates the four quark vertex required for the NPR of four-quark ops
-
-V_{Gamma_1,Gamma_2} = sum_x [ ( g5 * adj(S'(x,p2)) * g5 * G1 * S'(x,p1) )_ci,cj;si,sj x ( g5 * adj(S'(x,p2)) * g5 * G2 S'(x,p1) )_ck,cl;sk,cl ]
-
-Create a bilinear vertex for G1 and G2  the spin and colour indices are kept free. Where there are 16 potential Gs.
-We then find the outer product of V1 and V2, keeping the spin and colour indices uncontracted
-Then this is summed over the lattice coordinate
-Result is a SpinColourSpinColourMatrix - with 4 colour and 4 spin indices. 
-We have up to 256 of these including the offdiag (G1 != G2).
-
-        \         /
-         \p1   p1/
-          \     /
-           \   /
-         G1 * * G2
-           /   \
-          /     \
-         /p2   p2\
-        /         \
-
-*********************************************************************************/
-
-
-
-
-    LOG(Message) << "Computing fourquark contractions '" << getName() << "' using"
-                 << " momentum '" << par().Sin << "' and '" << par().Sout << "'"
-                 << std::endl;
-    
-    BinaryWriter             writer(par().output);
-    
-    PropagatorField1                            &Sin = *env().template getObject<PropagatorField1>(par().Sin);
-    PropagatorField2                            &Sout = *env().template getObject<PropagatorField2>(par().Sout);
-    std::vector<Real>                           pin  = strToVec<Real>(par().pin), pout = strToVec<Real>(par().pout);
-    bool                                        fullbasis = par().fullbasis;
-    Gamma                                       g5(Gamma::Algebra::Gamma5);
-    Result                                      result;
-    std::vector<Real>                           latt_size(pin.begin(), pin.end());
-    LatticeComplex                              pdotxin(env().getGrid()), pdotxout(env().getGrid()), coor(env().getGrid());
-    LatticeSpinColourMatrix                     bilinear_mu(env().getGrid()), bilinear_nu(env().getGrid());
-    LatticeSpinColourSpinColourMatrix           lret(env().getGrid()); 
-    Complex                         Ci(0.0,1.0);
-
-    //Phase propagators
-    //Sin = Grid::QCD::PropUtils::PhaseProps(Sin,pin);
-    //Sout = Grid::QCD::PropUtils::PhaseProps(Sout,pout);
-    
-    //find p.x for in and out so phase can be accounted for in propagators
-    pdotxin=zero;
-    pdotxout=zero;
-    for (unsigned int mu = 0; mu < 4; ++mu)
-    {
-        Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
-        LatticeCoordinate(coor,mu);
-        pdotxin = pdotxin +(TwoPiL * pin[mu]) * coor;
-        pdotxout= pdotxout +(TwoPiL * pout[mu]) * coor;
-    }
-    Sin = Sin*exp(-Ci*pdotxin); //phase corrections
-    Sout = Sout*exp(-Ci*pdotxout);
-
-
-    //Set up Gammas 
-    std::vector<Gamma> gammavector;
-     for( int i=1; i<Gamma::nGamma; i+=2){
-         Gamma::Algebra gam = i;
-         gammavector.push_back(Gamma(gam));
-       }
-    
-    lret = zero;
-    if (fullbasis == true){ // all combinations of mu and nu
-        result.fourquark.resize(Gamma::nGamma/2*Gamma::nGamma/2);
-        for( int mu=0; mu<Gamma::nGamma/2; mu++){ 
-            bilinear_mu = g5*adj(Sout)*g5*gammavector[mu]*Sin;
-            for ( int nu=0; nu<Gamma::nGamma; nu++){
-                LatticeSpinColourMatrix     bilinear_nu(env().getGrid());
-                bilinear_nu = g5*adj(Sout)*g5*gammavector[nu]*Sin;
-                LOG(Message) << "bilinear_nu for nu = " << nu << " is - " << bilinear_mu << std::endl;
-                result.fourquark[mu*Gamma::nGamma/2 + nu] = zero;
-                tensorprod(lret,bilinear_mu,bilinear_nu);
-                result.fourquark[mu*Gamma::nGamma/2 + nu] = sum(lret);
-            }
-        }
-    } else {
-        result.fourquark.resize(Gamma::nGamma/2);
-        for ( int mu=0; mu<1; mu++){
-        //for( int mu=0; mu<Gamma::nGamma/2; mu++ ){
-            bilinear_mu = g5*adj(Sout)*g5*gammavector[mu]*Sin;
-            //LOG(Message) << "bilinear_mu for mu = " << mu << " is - " << bilinear_mu << std::endl;
-            result.fourquark[mu] = zero;
-            tensorprod(lret,bilinear_mu,bilinear_mu); //tensor outer product
-            result.fourquark[mu] = sum(lret);
-        }
-    }
-    write(writer, "fourquark", result.fourquark);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_FourQuark_hpp_
--- a/Hadrons/Modules/MContraction/A2AAslashField.cc
+++ b/Hadrons/Modules/MContraction/A2AAslashField.cc
@@ -2,7 +2,7 @@

 Grid physics library, www.github.com/paboyle/Grid 

-Source file: Hadrons/Modules/MContraction/A2AAslashField.cc
+Source file: Hadrons/Modules/MScalarSUN/TimeMomProbe.cc

 Copyright (C) 2015-2018

@@ -25,10 +25,14 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 See the full license in the file "LICENSE" in the top level distribution directory
 *************************************************************************************/
 /*  END LEGAL */
-#include <Hadrons/Modules/MContraction/A2AAslashField.hpp>
+#include <Hadrons/Modules/MScalarSUN/TimeMomProbe.hpp>

 using namespace Grid;
 using namespace Hadrons;
-using namespace MContraction;
+using namespace MScalarSUN;

-template class Grid::Hadrons::MContraction::TA2AAslashField<FIMPL, PhotonR>;
+template class Grid::Hadrons::MScalarSUN::TTimeMomProbe<ScalarNxNAdjImplR<2>>;
+template class Grid::Hadrons::MScalarSUN::TTimeMomProbe<ScalarNxNAdjImplR<3>>;
+template class Grid::Hadrons::MScalarSUN::TTimeMomProbe<ScalarNxNAdjImplR<4>>;
+template class Grid::Hadrons::MScalarSUN::TTimeMomProbe<ScalarNxNAdjImplR<5>>;
+template class Grid::Hadrons::MScalarSUN::TTimeMomProbe<ScalarNxNAdjImplR<6>>;
--- a/Hadrons/Modules/MScalarSUN/TimeMomProbe.hpp
+++ b/Hadrons/Modules/MScalarSUN/TimeMomProbe.hpp
@@ -0,0 +1,268 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: Hadrons/Modules/MScalarSUN/TimeMomProbe.hpp
+
+Copyright (C) 2015-2018
+
+Author: Antonin Portelli <antonin.portelli@me.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef Hadrons_MScalarSUN_TimeMomProbe_hpp_
+#define Hadrons_MScalarSUN_TimeMomProbe_hpp_
+
+#include <Hadrons/Global.hpp>
+#include <Hadrons/Module.hpp>
+#include <Hadrons/ModuleFactory.hpp>
+#include <Hadrons/Modules/MScalarSUN/Utils.hpp>
+
+BEGIN_HADRONS_NAMESPACE
+
+/******************************************************************************
+ *          n-point functions O(t,p)*tr(phi(t_1,p_1)*...*phi(t_n,p_n))        *
+ ******************************************************************************/
+BEGIN_MODULE_NAMESPACE(MScalarSUN)
+
+class TimeMomProbePar: Serializable
+{
+public:
+    GRID_SERIALIZABLE_CLASS_MEMBERS(TimeMomProbePar,
+                                    std::string,              field,
+                                    std::vector<std::string>, op,
+                                    std::vector<std::vector<std::string>>, timeMom,
+                                    std::string,              output);
+};
+
+class TimeMomProbeResult: Serializable
+{
+public:
+    GRID_SERIALIZABLE_CLASS_MEMBERS(TimeMomProbeResult,
+                                    std::string,                   op,
+                                    std::vector<std::vector<int>>, timeMom,
+                                    std::vector<Complex>,          data);
+};
+
+template <typename SImpl>
+class TTimeMomProbe: public Module<TimeMomProbePar>
+{
+public:
+    typedef typename SImpl::Field                    Field;
+    typedef typename SImpl::SiteField::scalar_object Site;
+    typedef typename SImpl::ComplexField             ComplexField;
+    typedef          std::vector<Complex>            SlicedOp;
+public:
+    // constructor
+    TTimeMomProbe(const std::string name);
+    // destructor
+    virtual ~TTimeMomProbe(void) {};
+    // dependency relation
+    virtual std::vector<std::string> getInput(void);
+    virtual std::vector<std::string> getOutput(void);
+    // setup
+    virtual void setup(void);
+    // execution
+    virtual void execute(void);
+private:
+    void vectorModulo(std::vector<int> &v);
+};
+
+MODULE_REGISTER_TMP(TimeMomProbeSU2, TTimeMomProbe<ScalarNxNAdjImplR<2>>, MScalarSUN);
+MODULE_REGISTER_TMP(TimeMomProbeSU3, TTimeMomProbe<ScalarNxNAdjImplR<3>>, MScalarSUN);
+MODULE_REGISTER_TMP(TimeMomProbeSU4, TTimeMomProbe<ScalarNxNAdjImplR<4>>, MScalarSUN);
+MODULE_REGISTER_TMP(TimeMomProbeSU5, TTimeMomProbe<ScalarNxNAdjImplR<5>>, MScalarSUN);
+MODULE_REGISTER_TMP(TimeMomProbeSU6, TTimeMomProbe<ScalarNxNAdjImplR<6>>, MScalarSUN);
+
+/******************************************************************************
+ *                        TTimeMomProbe implementation                        *
+ ******************************************************************************/
+// constructor /////////////////////////////////////////////////////////////////
+template <typename SImpl>
+TTimeMomProbe<SImpl>::TTimeMomProbe(const std::string name)
+: Module<TimeMomProbePar>(name)
+{}
+
+// dependencies/products ///////////////////////////////////////////////////////
+template <typename SImpl>
+std::vector<std::string> TTimeMomProbe<SImpl>::getInput(void)
+{
+    std::vector<std::string> in = par().op;
+    
+    in.push_back(par().field);
+
+    return in;
+}
+
+template <typename SImpl>
+std::vector<std::string> TTimeMomProbe<SImpl>::getOutput(void)
+{
+    std::vector<std::string> out;
+    
+    return out;
+}
+
+// setup ///////////////////////////////////////////////////////////////////////
+template <typename SImpl>
+void TTimeMomProbe<SImpl>::setup(void)
+{
+    envTmpLat(ComplexField, "ftBuf");
+    envTmpLat(Field, "ftMatBuf");
+}
+
+// execution ///////////////////////////////////////////////////////////////////
+// NB: time is direction 0
+template <typename SImpl>
+void TTimeMomProbe<SImpl>::vectorModulo(std::vector<int> &v)
+{
+    for (unsigned int mu = 0; mu < env().getNd(); ++mu)
+    {
+        auto d = env().getDim(mu);
+        v[mu] = ((v[mu] % d) + d) % d;
+    }
+}
+
+template <typename SImpl>
+void TTimeMomProbe<SImpl>::execute(void)
+{
+    const unsigned int                           nd = env().getNd();
+    const unsigned int                           nt = env().getDim(0);
+    double                                       partVol = 1.;
+    std::set<std::vector<int>>                   timeMomSet;
+    std::vector<std::vector<std::vector<int>>>   timeMom;
+    std::vector<std::vector<int>>                transferMom;
+    FFT                                          fft(envGetGrid(Field));
+    std::vector<int>                             dMask(nd, 1);
+    std::vector<TimeMomProbeResult>              result;
+    std::map<std::string, std::vector<SlicedOp>> slicedOp;
+    std::vector<SlicedOp>                        slicedProbe;
+    auto                                         &phi = envGet(Field, par().field);
+
+    envGetTmp(ComplexField, ftBuf);
+    envGetTmp(Field, ftMatBuf);
+    dMask[0] = 0;
+    for (unsigned int mu = 1; mu < nd; ++mu)
+    {
+        partVol *= env().getDim(mu);
+    }
+    timeMom.resize(par().timeMom.size());
+    for (unsigned int p = 0; p < timeMom.size(); ++p)
+    {
+        for (auto &tms: par().timeMom[p])
+        {
+            std::vector<int> tm = strToVec<int>(tms);
+            
+            timeMom[p].push_back(tm);
+            timeMomSet.insert(tm);
+        }
+        transferMom.push_back(std::vector<int>(nd - 1, 0));
+        for (auto &tm: timeMom[p])
+        {
+            for (unsigned int j = 1; j < nd; ++j)
+            {
+                transferMom[p][j - 1] -= tm[j];
+            }
+        }
+        LOG(Message) << "Probe " << p << " (" << timeMom[p].size() << " points) : " << std::endl;
+        LOG(Message) << "  phi(t_i, p_i) for (t_i, p_i) in " << timeMom[p] << std::endl;
+        LOG(Message) << "  operator with momentum " << transferMom[p] << std::endl;
+    }
+    LOG(Message) << "FFT: field '" << par().field << "'" << std::endl;
+    fft.FFT_dim_mask(ftMatBuf, phi, dMask, FFT::forward);
+    slicedProbe.resize(timeMom.size());
+    for (unsigned int p = 0; p < timeMom.size(); ++p)
+    {
+        std::vector<int> qt;
+
+        LOG(Message) << "Making probe " << p << std::endl;
+        slicedProbe[p].resize(nt);
+        for (unsigned int t = 0; t < nt; ++t)
+        {
+            Site acc;
+            
+            for (unsigned int i = 0; i < timeMom[p].size(); ++i)
+            {
+                Site buf;
+
+                qt     = timeMom[p][i];
+                qt[0] += t;
+                vectorModulo(qt);
+                peekSite(buf, ftMatBuf, qt);
+                if (i == 0)
+                {
+                    acc = buf;
+                }
+                else
+                {
+                    acc *= buf;
+                }
+            }
+            slicedProbe[p][t] = TensorRemove(trace(acc));
+        }
+        //std::cout << slicedProbe[p]<< std::endl;
+    }
+    for (auto &o: par().op)
+    {
+        auto &op = envGet(ComplexField, o);
+
+        slicedOp[o].resize(transferMom.size());
+        LOG(Message) << "FFT: operator '" << o << "'" << std::endl;
+        fft.FFT_dim_mask(ftBuf, op, dMask, FFT::forward);
+        //std::cout << ftBuf << std::endl;
+        for (unsigned int p = 0; p < transferMom.size(); ++p)
+        {
+            std::vector<int> qt(nd, 0);
+
+            for (unsigned int j = 1; j < nd; ++j)
+            {
+                qt[j] = transferMom[p][j - 1];
+            }
+            slicedOp[o][p].resize(nt);
+            for (unsigned int t = 0; t < nt; ++t)
+            {
+                TComplex buf;
+
+                qt[0] = t;
+                vectorModulo(qt);
+                peekSite(buf, ftBuf, qt);
+                slicedOp[o][p][t] = TensorRemove(buf);
+            }
+            //std::cout << ftBuf << std::endl;
+            //std::cout << slicedOp[o][p] << std::endl;
+        }
+    }
+    LOG(Message) << "Making correlators" << std::endl;
+    for (auto &o: par().op)
+    for (unsigned int p = 0; p < timeMom.size(); ++p)
+    {
+        TimeMomProbeResult r;
+
+        LOG(Message) << "  <" << o << " probe_" << p << ">" << std::endl;
+        r.op      = o;
+        r.timeMom = timeMom[p];
+        r.data    = makeTwoPoint(slicedOp[o][p], slicedProbe[p], 1./partVol);
+        result.push_back(r);
+    }
+    saveResult(par().output, "timemomprobe", result);
+}
+
+END_MODULE_NAMESPACE
+
+END_HADRONS_NAMESPACE
+
+#endif // Hadrons_MScalarSUN_TimeMomProbe_hpp_
--- a/Hadrons/Modules/MScalarSUN/TrMag.hpp
+++ b/Hadrons/Modules/MScalarSUN/TrMag.hpp
@@ -124,8 +124,7 @@ void TTrMag<SImpl>::execute(void)
    std::vector<TrMagResult> result;
    auto                     &phi = envGet(Field, par().field);

-    auto m2 = sum(phi);
-    auto mn = m2;
+    auto m2 = sum(phi), mn = m2;

    m2 = -m2*m2;
    mn = 1.;
--- a/Hadrons/Modules/MScalarSUN/Utils.hpp
+++ b/Hadrons/Modules/MScalarSUN/Utils.hpp
@@ -103,7 +103,7 @@ std::vector<Complex> makeTwoPoint(const std::vector<SinkSite>   &sink,
    {
        for (unsigned int t  = 0; t < nt; ++t)
        {
-            res[dt] += trace(sink[(t+dt)%nt]*adj(source[t]));
+            res[dt] += trace(sink[(t+dt)%nt]*source[t]);
        }
        res[dt] *= factor/static_cast<double>(nt);
    }
--- a/Hadrons/Modules/MSolver/A2AVectors.cc
+++ b/Hadrons/Modules/MSolver/A2AVectors.cc
@@ -32,5 +32,5 @@ using namespace Grid;
 using namespace Hadrons;
 using namespace MSolver;

-template class Grid::Hadrons::MSolver::TA2AVectors<FIMPL, BaseFermionEigenPack<FIMPL>>;
-template class Grid::Hadrons::MSolver::TA2AVectors<ZFIMPL, BaseFermionEigenPack<ZFIMPL>>;
+template class Grid::Hadrons::MSolver::TA2AVectors<FIMPL, FermionEigenPack<FIMPL>>;
+template class Grid::Hadrons::MSolver::TA2AVectors<ZFIMPL, FermionEigenPack<ZFIMPL>>;
--- a/Hadrons/Modules/MSolver/A2AVectors.hpp
+++ b/Hadrons/Modules/MSolver/A2AVectors.hpp
@@ -79,9 +79,9 @@ private:
 };

 MODULE_REGISTER_TMP(A2AVectors, 
-    ARG(TA2AVectors<FIMPL, BaseFermionEigenPack<FIMPL>>), MSolver);
+    ARG(TA2AVectors<FIMPL, FermionEigenPack<FIMPL>>), MSolver);
 MODULE_REGISTER_TMP(ZA2AVectors, 
-    ARG(TA2AVectors<ZFIMPL, BaseFermionEigenPack<ZFIMPL>>), MSolver);
+    ARG(TA2AVectors<ZFIMPL, FermionEigenPack<ZFIMPL>>), MSolver);

 /******************************************************************************
 *                       TA2AVectors implementation                           *
--- a/Hadrons/Modules/MSolver/Guesser.hpp
+++ b/Hadrons/Modules/MSolver/Guesser.hpp
@@ -39,7 +39,7 @@ std::shared_ptr<LinearFunction<typename FImpl::FermionField>>
 makeGuesser(const std::string epackName)
 {
    typedef typename FImpl::FermionField                  FermionField;
-    typedef BaseFermionEigenPack<FImpl>                   EPack;
+    typedef FermionEigenPack<FImpl>                       EPack;
    typedef CoarseFermionEigenPack<FImpl, nBasis>         CoarseEPack;
    typedef DeflatedGuesser<FermionField>                 FineGuesser;
    typedef LocalCoherenceDeflatedGuesser<
--- a/Hadrons/Modules/MSolver/LocalCoherenceLanczos.hpp
+++ b/Hadrons/Modules/MSolver/LocalCoherenceLanczos.hpp
@@ -63,7 +63,7 @@ public:
    typedef LocalCoherenceLanczos<typename FImpl::SiteSpinor, 
                                  typename FImpl::SiteComplex, 
                                  nBasis>                LCL;
-    typedef BaseFermionEigenPack<FImpl>                  BasePack;
+    typedef FermionEigenPack<FImpl>                      BasePack;
    typedef CoarseFermionEigenPack<FImpl, nBasis>        CoarsePack;
    typedef HADRONS_DEFAULT_SCHUR_OP<FMat, FermionField> SchurFMat;
 public:
--- a/Hadrons/Modules/MSource/Momentum.cc
+++ b/Hadrons/Modules/MSource/Momentum.cc
@@ -1,36 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MSource/Momentum.cc
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MSource/Momentum.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MSource;
-
-template class Grid::Hadrons::MSource::TMomentum<FIMPL>;
-
--- a/Hadrons/Modules/MSource/Momentum.hpp
+++ b/Hadrons/Modules/MSource/Momentum.hpp
@@ -1,149 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MSource/Momentum.hpp
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_Momentum_hpp_
-#define Hadrons_Momentum_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/* 
-Plane Wave source
-----------------
-src_x = e^i2pi/L * p *position
-*/
-
-/******************************************************************************
- *                          Plane Wave source                                 *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MSource)
-
-class MomentumPar: Serializable
-{
-public:
-//What is meant by serializable in this context
-GRID_SERIALIZABLE_CLASS_MEMBERS(MomentumPar,
-std::string, mom);
-};
-
-
-template <typename FImpl>
-class TMomentum: public Module<MomentumPar>
-{
-public:
-FERM_TYPE_ALIASES(FImpl,);
-public:
-// constructor
-TMomentum(const std::string name);
-// destructor
-virtual ~TMomentum(void) {};
-// dependency relation
-virtual std::vector<std::string> getInput(void);
-virtual std::vector<std::string> getOutput(void);
-// setup
-virtual void setup(void);
-// execution
-virtual void execute(void);
-};
-
-MODULE_REGISTER_TMP(Momentum, TMomentum<FIMPL>, MSource);
-//MODULE_REGISTER_NS(Momentum, TMomentum, MSource);
-
-/******************************************************************************
-*                       TMomentum template implementation                     *
-******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TMomentum<FImpl>::TMomentum(const std::string name)
-: Module<MomentumPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TMomentum<FImpl>::getInput(void)
-{
-    std::vector<std::string> in;
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TMomentum<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    return out;
-}
-
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TMomentum<FImpl>::setup(void)
-{
-    envCreateLat(PropagatorField, getName());
-}
-
-
-//execution//////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TMomentum<FImpl>::execute(void)
-{
-    LOG(Message) << "Generating planewave momentum source with momentum " << par().mom << std::endl;
-    //what does this env do?
-    PropagatorField &src = envGet(PropagatorField, getName());
-    Lattice<iScalar<vInteger>> t(env().getGrid());
-    LatticeComplex             C(env().getGrid()), coor(env().getGrid());
-    std::vector<Real>          p;
-    std::vector<Real> latt_size(GridDefaultLatt().begin(), GridDefaultLatt().end()); 
-    Complex                    i(0.0,1.0);
-
-    LOG(Message) << " " << std::endl;
-    //get the momentum from parameters
-    p  = strToVec<Real>(par().mom);
-    C = zero;
-    LOG(Message) << "momentum converted from string - " << std::to_string(p[0]) <<std::to_string(p[1]) <<std::to_string(p[2]) <<   std::to_string(p[3]) << std::endl;
-    for(int mu=0;mu<4;mu++){
-    Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
-    LatticeCoordinate(coor,mu);
-    C = C +(TwoPiL * p[mu]) * coor;
-    }
-    C = exp(C*i);
-    LOG(Message) << "exponential of pdotx taken " << std::endl;
-    src = src + C;
-    LOG(Message) << "source created" << std::endl;
-
-}
-
-
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Momentum_hpp_
--- a/Hadrons/Modules/MSource/Point.hpp
+++ b/Hadrons/Modules/MSource/Point.hpp
@@ -126,11 +126,6 @@ void TPoint<FImpl>::execute(void)
    auto             &src     = envGet(PropagatorField, getName());
    SitePropagator   id;
    
-    if (position.size() != env().getNd())
-    {
-        HADRONS_ERROR(Size, "position has " + std::to_string(position.size())
-                      + " components (must have " + std::to_string(env().getNd()) + ")");
-    }
    id  = 1.;
    src = zero;
    pokeSite(id, src, position);
--- a/Hadrons/TimerArray.cc
+++ b/Hadrons/TimerArray.cc
@@ -1,126 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/TimerArray.cc
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/TimerArray.hpp>
-
-using namespace Grid;
-using namespace QCD;
-using namespace Hadrons;
-
-void TimerArray::startTimer(const std::string &name)
-{
-    if (!name.empty())
-    {
-        timer_[name].Start();
-    }
-}
-
-GridTime TimerArray::getTimer(const std::string &name)
-{
-    GridTime t;
-    
-    if (!name.empty())
-    {
-        try
-        {
-            bool running = timer_.at(name).isRunning();
-
-            if (running) stopTimer(name);
-            t = timer_.at(name).Elapsed();
-            if (running) startTimer(name);
-        }
-        catch (std::out_of_range &)
-        {
-            t = GridTime::zero();
-        }
-    }
-    else
-    {
-        t = GridTime::zero();
-    }
-
-    return t;
-}
-
-double TimerArray::getDTimer(const std::string &name)
-{
-    return static_cast<double>(getTimer(name).count());
-}
-
-void TimerArray::startCurrentTimer(const std::string &name)
-{
-    if (!name.empty())
-    {
-        stopCurrentTimer();
-        startTimer(name);
-        currentTimer_ = name;
-    }
-}
-
-void TimerArray::stopTimer(const std::string &name)
-{
-    if (timer_.at(name).isRunning())
-    {
-        timer_.at(name).Stop();
-    }
-}
-
-void TimerArray::stopCurrentTimer(void)
-{
-    if (!currentTimer_.empty())
-    {
-        stopTimer(currentTimer_);
-        currentTimer_ = "";
-    }
-}
-
-void TimerArray::stopAllTimers(void)
-{
-    for (auto &t: timer_)
-    {
-        stopTimer(t.first);
-    }
-    currentTimer_ = "";
-}
-
-void TimerArray::resetTimers(void)
-{
-    timer_.clear();
-    currentTimer_ = "";
-}
-
-std::map<std::string, GridTime> TimerArray::getTimings(void)
-{
-    std::map<std::string, GridTime> timing;
-
-    for (auto &t: timer_)
-    {
-        timing[t.first] = t.second.Elapsed();
-    }
-
-    return timing;
-}
--- a/Hadrons/TimerArray.hpp
+++ b/Hadrons/TimerArray.hpp
@@ -1,56 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/TimerArray.hpp
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#ifndef Hadrons_TimerArray_hpp_
-#define Hadrons_TimerArray_hpp_
-
-#include <Hadrons/Global.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-class TimerArray
-{
-public:
-    TimerArray(void) = default;
-    virtual ~TimerArray(void) = default;
-    void                            startTimer(const std::string &name);
-    GridTime                        getTimer(const std::string &name);
-    double                          getDTimer(const std::string &name);
-    void                            startCurrentTimer(const std::string &name);
-    void                            stopTimer(const std::string &name);
-    void                            stopCurrentTimer(void);
-    void                            stopAllTimers(void);
-    void                            resetTimers(void);
-    std::map<std::string, GridTime> getTimings(void);
-private:
-    std::string                          currentTimer_;
-    std::map<std::string, GridStopWatch> timer_; 
-};
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_TimerArray_hpp_
--- a/Hadrons/Utilities/EigenPackCast.cc
+++ b/Hadrons/Utilities/EigenPackCast.cc
@@ -1,30 +1,3 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Utilities/EigenPackCast.cc
-
-Copyright (C) 2015-2018
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
 #include <Hadrons/EigenPack.hpp>
 #include <Hadrons/Environment.hpp>

@@ -86,12 +59,8 @@ void convert(const std::string outFilename, const std::string inFilename,
        }
    }

-    FOut         bufOut(gOut);
-    FIn          bufIn(gIn), testIn(gIn);
-    ScidacWriter binWriter(gOut->IsBoss());
-    ScidacReader binReader;
-    PackRecord   record;
-    RealD        eval;
+    FOut bufOut(gOut);
+    FIn  bufIn(gIn), testIn(gIn);

    LOG(Message) << "==== EIGENPACK CONVERSION" << std::endl;
    LOG(Message) << "Lattice       : " << gIn->GlobalDimensions() << std::endl;
@@ -106,6 +75,10 @@ void convert(const std::string outFilename, const std::string inFilename,
    {
        for(unsigned int k = 0; k < size; ++k)
        {
+            ScidacWriter binWriter(gOut->IsBoss());
+            ScidacReader binReader;
+            PackRecord   record;
+            VecRecord    vecRecord;
            std::string  outV = outFilename + "/v" + std::to_string(k) + ".bin";
            std::string  inV  = inFilename + "/v" + std::to_string(k) + ".bin";

@@ -115,25 +88,40 @@ void convert(const std::string outFilename, const std::string inFilename,
            makeFileDir(outV, gOut);
            binWriter.open(outV);
            binReader.open(inV);
-            EigenPackIo::readHeader(record, binReader);
-            EigenPackIo::writeHeader(binWriter, record);
-            EigenPackIo::readElement<FIn>(bufIn, eval, k, binReader);
-            EigenPackIo::writeElement<FIn, FOut>(binWriter, bufIn, eval, k, &bufOut, &testIn);
+            EPIn::readHeader(record, binReader);
+            EPOut::writeHeader(binWriter, record);
+            EPIn::readElement(bufIn, vecRecord, binReader);
+            precisionChange(bufOut, bufIn);
+            precisionChange(testIn, bufOut);
+            testIn -= bufIn;
+            LOG(Message) << "Diff norm^2: " << norm2(testIn) << std::endl;
+            EPOut::writeElement(binWriter, bufOut, vecRecord);
            binWriter.close();
            binReader.close();
        }
    }
    else
    {
+        ScidacWriter binWriter(gOut->IsBoss());
+        ScidacReader binReader;
+        PackRecord   record;
+
        makeFileDir(outFilename, gOut);
        binWriter.open(outFilename);
        binReader.open(inFilename);
-        EigenPackIo::readHeader(record, binReader);
-        EigenPackIo::writeHeader(binWriter, record);
+        EPIn::readHeader(record, binReader);
+        EPOut::writeHeader(binWriter, record);
        for(unsigned int k = 0; k < size; ++k)
        {
-            EigenPackIo::readElement<FIn>(bufIn, eval, k, binReader);
-            EigenPackIo::writeElement<FIn, FOut>(binWriter, bufIn, eval, k, &bufOut, &testIn);
+            VecRecord vecRecord;
+
+            LOG(Message) << "==== Converting vector " << k << std::endl;
+            EPIn::readElement(bufIn, vecRecord, binReader);
+            precisionChange(bufOut, bufIn);
+            precisionChange(testIn, bufOut);
+            testIn -= bufIn;
+            LOG(Message) << "Diff norm^2: " << norm2(testIn) << std::endl;
+            EPOut::writeElement(binWriter, bufOut, vecRecord);
        }
        binWriter.close();
        binReader.close();
--- a/Hadrons/Utilities/HadronsXmlRun.cc
+++ b/Hadrons/Utilities/HadronsXmlRun.cc
@@ -2,7 +2,7 @@

 Grid physics library, www.github.com/paboyle/Grid 

-Source file: Hadrons/Utilities/HadronsXmlRun.cc
+Source file: Hadrons/HadronsXmlRun.cc

 Copyright (C) 2015-2018

--- a/Hadrons/Utilities/Makefile.am
+++ b/Hadrons/Utilities/Makefile.am
@@ -1,8 +1,10 @@
+AM_LDFLAGS += -L../../Hadrons
+
 bin_PROGRAMS = HadronsXmlRun HadronsFermionEP64To32

 HadronsXmlRun_SOURCES = HadronsXmlRun.cc
-HadronsXmlRun_LDADD   = ../libHadrons.a ../../Grid/libGrid.a
+HadronsXmlRun_LDADD   = -lHadrons -lGrid

 HadronsFermionEP64To32_SOURCES  = EigenPackCast.cc
 HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField
-HadronsFermionEP64To32_LDADD    = ../libHadrons.a ../../Grid/libGrid.a
+HadronsFermionEP64To32_LDADD    = -lHadrons -lGrid
--- a/Hadrons/modules.inc
+++ b/Hadrons/modules.inc
@@ -4,14 +4,12 @@ modules_cc =\
  Modules/MContraction/Meson.cc \
  Modules/MContraction/WeakNeutral4ptDisc.cc \
  Modules/MContraction/WeakHamiltonianNonEye.cc \
-  Modules/MContraction/A2AAslashField.cc \
  Modules/MContraction/WardIdentity.cc \
  Modules/MContraction/A2AMesonField.cc \
  Modules/MContraction/DiscLoop.cc \
  Modules/MContraction/Gamma3pt.cc \
  Modules/MFermion/FreeProp.cc \
  Modules/MFermion/GaugeProp.cc \
-  Modules/MSource/Momentum.cc \
  Modules/MSource/Point.cc \
  Modules/MSource/Wall.cc \
  Modules/MSource/SeqConserved.cc \
@@ -29,7 +27,6 @@ modules_cc =\
  Modules/MGauge/StochEm.cc \
  Modules/MGauge/Random.cc \
  Modules/MGauge/FundtoHirep.cc \
-  Modules/MGauge/GaugeFix.cc \
  Modules/MNoise/TimeDilutedSpinColorDiagonal.cc \
  Modules/MUtilities/RandomVectors.cc \
  Modules/MUtilities/TestSeqGamma.cc \
@@ -40,9 +37,6 @@ modules_cc =\
  Modules/MScalar/VPCounterTerms.cc \
  Modules/MScalar/ChargedProp.cc \
  Modules/MScalar/ScalarVP.cc \
-  Modules/MNPR/Amputate.cc \
-  Modules/MNPR/Bilinear.cc \
-  Modules/MNPR/FourQuark.cc \
  Modules/MAction/Wilson.cc \
  Modules/MAction/MobiusDWF.cc \
  Modules/MAction/ZMobiusDWF.cc \
@@ -51,6 +45,7 @@ modules_cc =\
  Modules/MAction/ScaledDWF.cc \
  Modules/MScalarSUN/TrPhi.cc \
  Modules/MScalarSUN/Grad.cc \
+  Modules/MScalarSUN/TimeMomProbe.cc \
  Modules/MScalarSUN/TrMag.cc \
  Modules/MScalarSUN/TrKinetic.cc \
  Modules/MScalarSUN/EMT.cc \
@@ -68,8 +63,8 @@ modules_cc =\

 modules_hpp =\
  Modules/MContraction/Baryon.hpp \
-  Modules/MContraction/A2AAslashField.hpp \
  Modules/MContraction/A2AMesonField.hpp \
+  Modules/MContraction/A2AMesonFieldKernels.hpp \
  Modules/MContraction/Meson.hpp \
  Modules/MContraction/WeakHamiltonian.hpp \
  Modules/MContraction/WeakHamiltonianNonEye.hpp \
@@ -85,7 +80,6 @@ modules_hpp =\
  Modules/MSource/Wall.hpp \
  Modules/MSource/Z2.hpp \
  Modules/MSource/SeqConserved.hpp \
-  Modules/MSource/Momentum.hpp \
  Modules/MSink/Smear.hpp \
  Modules/MSink/Point.hpp \
  Modules/MSolver/MixedPrecisionRBPrecCG.hpp \
@@ -97,7 +91,6 @@ modules_hpp =\
  Modules/MGauge/StoutSmearing.hpp \
  Modules/MGauge/Unit.hpp \
  Modules/MGauge/Random.hpp \
-  Modules/MGauge/GaugeFix.hpp \
  Modules/MGauge/FundtoHirep.hpp \
  Modules/MGauge/StochEm.hpp \
  Modules/MNoise/TimeDilutedSpinColorDiagonal.hpp \
@@ -111,9 +104,6 @@ modules_hpp =\
  Modules/MScalar/ScalarVP.hpp \
  Modules/MScalar/Scalar.hpp \
  Modules/MScalar/ChargedProp.hpp \
-  Modules/MNPR/Bilinear.hpp \
-  Modules/MNPR/Amputate.hpp \
-  Modules/MNPR/FourQuark.hpp \
  Modules/MAction/DWF.hpp \
  Modules/MAction/MobiusDWF.hpp \
  Modules/MAction/Wilson.hpp \
@@ -124,6 +114,7 @@ modules_hpp =\
  Modules/MScalarSUN/TwoPointNPR.hpp \
  Modules/MScalarSUN/ShiftProbe.hpp \
  Modules/MScalarSUN/Div.hpp \
+  Modules/MScalarSUN/TimeMomProbe.hpp \
  Modules/MScalarSUN/TrMag.hpp \
  Modules/MScalarSUN/EMT.hpp \
  Modules/MScalarSUN/TwoPoint.hpp \
--- a/benchmarks/Benchmark_IO.cc
+++ b/benchmarks/Benchmark_IO.cc
@@ -1,48 +1,108 @@
+#include <Grid/Grid.h>
+#ifdef HAVE_LIME

-#include "Benchmark_IO.hpp"
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;

+#define MSG cout << GridLogMessage
+#define SEP \
+"============================================================================="
 #ifndef BENCH_IO_LMAX
 #define BENCH_IO_LMAX 40
 #endif

-using namespace Grid;
-using namespace QCD;
+typedef function<void(const string, LatticeFermion &)> WriterFn;
+typedef function<void(LatticeFermion &, const string)> ReaderFn;

-std::string filestem(const int l)
+string filestem(const int l)
 {
-  return "iobench_l" + std::to_string(l);
+  return "iobench_l" + to_string(l);
+}
+
+void limeWrite(const string filestem, LatticeFermion &vec)
+{
+  emptyUserRecord record;
+  ScidacWriter    binWriter(vec._grid->IsBoss());
+
+  binWriter.open(filestem + ".bin");
+  binWriter.writeScidacFieldRecord(vec, record);
+  binWriter.close();
+}
+
+void limeRead(LatticeFermion &vec, const string filestem)
+{
+  emptyUserRecord record;
+  ScidacReader    binReader;
+
+  binReader.open(filestem + ".bin");
+  binReader.readScidacFieldRecord(vec, record);
+  binReader.close();
+}
+
+void writeBenchmark(const int l, const WriterFn &write)
+{
+  auto                      mpi  = GridDefaultMpi();
+  auto                      simd = GridDefaultSimd(Nd, vComplex::Nsimd());
+  vector<int>               latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
+  unique_ptr<GridCartesian> gPt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
+  GridCartesian             *g = gPt.get();
+  GridParallelRNG           rng(g);
+  LatticeFermion            vec(g);
+  emptyUserRecord           record;
+  ScidacWriter              binWriter(g->IsBoss());
+
+  cout << "-- Local volume " << l << "^4" << endl;
+  random(rng, vec);
+  write(filestem(l), vec);
+}
+
+void readBenchmark(const int l, const ReaderFn &read)
+{
+  auto                      mpi  = GridDefaultMpi();
+  auto                      simd = GridDefaultSimd(Nd, vComplex::Nsimd());
+  vector<int>               latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
+  unique_ptr<GridCartesian> gPt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
+  GridCartesian             *g = gPt.get();
+  LatticeFermion            vec(g);
+  emptyUserRecord           record;
+  ScidacReader              binReader;
+
+  cout << "-- Local volume " << l << "^4" << endl;
+  read(vec, filestem(l));
 }

 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);

+  auto simd = GridDefaultSimd(Nd,vComplex::Nsimd());
+  auto mpi  = GridDefaultMpi();
+
  int64_t threads = GridThread::GetThreads();
-  MSG << "Grid is setup to use " << threads << " threads" << std::endl;
-  MSG << SEP << std::endl;
-  MSG << "Benchmark Lime write" << std::endl;
-  MSG << SEP << std::endl;
+  MSG << "Grid is setup to use " << threads << " threads" << endl;
+  MSG << SEP << endl;
+  MSG << "Benchmark Lime write" << endl;
+  MSG << SEP << endl;
  for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
  {
-    auto             mpi  = GridDefaultMpi();
-    std::vector<int> latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
-
-    std::cout << "-- Local volume " << l << "^4" << std::endl;
-    writeBenchmark<LatticeFermion>(latt, filestem(l), limeWrite<LatticeFermion>);
+    writeBenchmark(l, limeWrite);
  }

-  MSG << "Benchmark Lime read" << std::endl;
-  MSG << SEP << std::endl;
+  MSG << "Benchmark Lime read" << endl;
+  MSG << SEP << endl;
  for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
  {
-    auto             mpi  = GridDefaultMpi();
-    std::vector<int> latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
-
-    std::cout << "-- Local volume " << l << "^4" << std::endl;
-    readBenchmark<LatticeFermion>(latt, filestem(l), limeRead<LatticeFermion>);
+    readBenchmark(l, limeRead);
  }

  Grid_finalize();

  return EXIT_SUCCESS;
 }
+#else
+int main (int argc, char ** argv)
+{
+  return EXIT_SUCCESS;
+}
+#endif
--- a/benchmarks/Benchmark_IO.hpp
+++ b/benchmarks/Benchmark_IO.hpp
@@ -1,107 +0,0 @@
-#ifndef Benchmark_IO_hpp_
-#define Benchmark_IO_hpp_
-
-#include <Grid/Grid.h>
-
-#define MSG std::cout << GridLogMessage
-#define SEP \
-"============================================================================="
-
-namespace Grid {
-
-template <typename Field>
-using WriterFn = std::function<void(const std::string, Field &)> ;
-template <typename Field>
-using ReaderFn = std::function<void(Field &, const std::string)>;
-
-template <typename Field>
-void limeWrite(const std::string filestem, Field &vec)
-{
-  emptyUserRecord   record;
-  QCD::ScidacWriter binWriter(vec._grid->IsBoss());
-
-  binWriter.open(filestem + ".bin");
-  binWriter.writeScidacFieldRecord(vec, record);
-  binWriter.close();
-}
-
-template <typename Field>
-void limeRead(Field &vec, const std::string filestem)
-{
-  emptyUserRecord   record;
-  QCD::ScidacReader binReader;
-
-  binReader.open(filestem + ".bin");
-  binReader.readScidacFieldRecord(vec, record);
-  binReader.close();
-}
-
-inline void makeGrid(std::shared_ptr<GridBase> &gPt, 
-                     const std::shared_ptr<GridCartesian> &gBasePt,
-                     const unsigned int Ls = 1, const bool rb = false)
-{
-  if (rb)
-  {
-    if (Ls > 1)
-    {
-      gPt.reset(QCD::SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, gBasePt.get()));
-    }
-    else
-    {
-      gPt.reset(QCD::SpaceTimeGrid::makeFourDimRedBlackGrid(gBasePt.get()));
-    }
-  }
-  else
-  {
-    if (Ls > 1)
-    {
-        gPt.reset(QCD::SpaceTimeGrid::makeFiveDimGrid(Ls, gBasePt.get()));
-    }
-    else
-    {
-        gPt = gBasePt;
-    }
-  }
-}
-
-template <typename Field>
-void writeBenchmark(const std::vector<int> &latt, const std::string filename,
-                    const WriterFn<Field> &write, 
-                    const unsigned int Ls = 1, const bool rb = false)
-{
-  auto                           mpi  = GridDefaultMpi();
-  auto                           simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
-  std::shared_ptr<GridCartesian> gBasePt(QCD::SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
-  std::shared_ptr<GridBase>      gPt;
-
-  makeGrid(gPt, gBasePt, Ls, rb);
-
-  GridBase                       *g = gPt.get();
-  GridParallelRNG                rng(g);
-  Field                          vec(g);
-
-  random(rng, vec);
-  write(filename, vec);
-}
-
-template <typename Field>
-void readBenchmark(const std::vector<int> &latt, const std::string filename,
-                   const ReaderFn<Field> &read, 
-                   const unsigned int Ls = 1, const bool rb = false)
-{
-  auto                           mpi  = GridDefaultMpi();
-  auto                           simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
-  std::shared_ptr<GridCartesian> gBasePt(QCD::SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
-  std::shared_ptr<GridBase>      gPt;
-
-  makeGrid(gPt, gBasePt, Ls, rb);
-
-  GridBase                       *g = gPt.get();
-  Field                          vec(g);
-
-  read(vec, filename);
-}
-
-}
-
-#endif // Benchmark_IO_hpp_
--- a/benchmarks/Benchmark_IO_vs_dir.cc
+++ b/benchmarks/Benchmark_IO_vs_dir.cc
@@ -1,63 +0,0 @@
-#include "Benchmark_IO.hpp"
-
-#define MSG std::cout << GridLogMessage
-#define SEP \
-"============================================================================="
-#ifndef BENCH_IO_LMAX
-#define BENCH_IO_LMAX 40
-#endif
-
-using namespace Grid;
-using namespace QCD;
-
-int main (int argc, char ** argv)
-{
-  std::vector<std::string> dir;
-  unsigned int             Ls;
-  bool                     rb;
-  if (argc < 4)
-  {
-    std::cerr << "usage: " << argv[0] << " <Ls> <RB {0|1}> <dir1> [<dir2> ... <dirn>] [Grid options]";
-    std::cerr << std::endl;
-  }
-  Ls = std::stoi(argv[1]);
-  rb = (std::string(argv[2]) == "1");
-  for (unsigned int i = 3; i < argc; ++i)
-  {
-    std::string a = argv[i];
-
-    if (a[0] != '-')
-    {
-      dir.push_back(std::string(argv[i]));
-    }
-    else
-    {
-      break;
-    }
-  }
-  Grid_init(&argc,&argv);
-
-
-  int64_t threads = GridThread::GetThreads();
-  MSG << "Grid is setup to use " << threads << " threads" << std::endl;
-  MSG << SEP << std::endl;
-  MSG << "Benchmark Lime write" << std::endl;
-  MSG << SEP << std::endl;
-  for (auto &d: dir)
-  {
-    MSG << "-- Directory " << d << std::endl;
-    writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermion>, Ls, rb);
-  }
-
-  MSG << "Benchmark Lime read" << std::endl;
-  MSG << SEP << std::endl;
-  for (auto &d: dir)
-  {
-    MSG << "-- Directory " << d << std::endl;
-    readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermion>, Ls, rb);
-  }
-
-  Grid_finalize();
-
-  return EXIT_SUCCESS;
-}
--- a/tests/debug/Test_cayley_cg.cc
+++ b/tests/debug/Test_cayley_cg.cc
@@ -1,4 +1,5 @@
    /*************************************************************************************
+
    Grid physics library, www.github.com/paboyle/Grid 

    Source file: ./tests/Test_cayley_cg.cc
@@ -26,7 +27,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
    *************************************************************************************/
    /*  END LEGAL */
 #include <Grid/Grid.h>
-#include <Grid/qcd/action/fermion/Reconstruct5Dprop.h>
+#include <Grid/algorithms/iterative/Reconstruct5Dprop.h>

 using namespace std;
 using namespace Grid;
@@ -46,7 +47,6 @@ struct scal {

 template<class What> 
 void  TestCGinversions(What & Ddwf, 
-		       LatticeGaugeField &Umu,
 		       GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
 		       GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
 		       RealD mass, RealD M5,
@@ -78,23 +78,12 @@ void  TestCGprec(What & Ddwf,

 template<class What> 
 void  TestReconstruct5D(What & Ddwf, 
-			LatticeGaugeField &Umu,
 			GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
 			GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
 			RealD mass, RealD M5,
 			GridParallelRNG *RNG4,
 			GridParallelRNG *RNG5);

-template<class What,class WhatF> 
-void  TestReconstruct5DFA(What & Ddwf, 
-			  WhatF & DdwfF, 
-			  LatticeGaugeField &Umu,
-			  GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
-			  GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
-			  RealD mass, RealD M5,
-			  GridParallelRNG *RNG4,
-			  GridParallelRNG *RNG5);
-
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
@@ -103,104 +92,63 @@ int main (int argc, char ** argv)
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

  const int Ls=8;
-  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), 
-								   GridDefaultSimd(Nd,vComplex::Nsimd()),
-								   GridDefaultMpi());
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);


-  GridCartesian         * UGridF   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), 
-								    GridDefaultSimd(Nd,vComplexF::Nsimd()),
-								    GridDefaultMpi());
-  GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
-  GridCartesian         * FGridF   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
-  GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
-
-
  std::vector<int> seeds4({1,2,3,4});
  std::vector<int> seeds5({5,6,7,8});
  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);

  LatticeGaugeField Umu(UGrid);
-  LatticeGaugeFieldF UmuF(UGridF);
  SU3::HotConfiguration(RNG4,Umu);
-  precisionChange(UmuF,Umu);
  std::vector<LatticeColourMatrix> U(4,UGrid);

  RealD mass=0.1;
  RealD M5  =1.8;
-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  std::cout<<GridLogMessage <<"DomainWallFermion test"<<std::endl;
-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-  DomainWallFermionF DdwfF(UmuF,*FGridF,*FrbGridF,*UGridF,*UrbGridF,mass,M5);
-  TestCGinversions<DomainWallFermionR>(Ddwf,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
-  TestReconstruct5DFA<DomainWallFermionR,DomainWallFermionF>(Ddwf,DdwfF,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  TestCGinversions<DomainWallFermionR>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);

  RealD b=1.5;// Scale factor b+c=2, b-c=1
  RealD c=0.5;
  std::vector<ComplexD> gamma(Ls,ComplexD(1.0,0.0));

-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  std::cout<<GridLogMessage <<"MobiusFermion test"<<std::endl;
-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  MobiusFermionR Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
-  MobiusFermionF DmobF(UmuF,*FGridF,*FrbGridF,*UGridF,*UrbGridF,mass,M5,b,c);
-  TestCGinversions<MobiusFermionR>(Dmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
-  TestReconstruct5DFA<MobiusFermionR,MobiusFermionF>(Dmob,DmobF,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  TestCGinversions<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);

-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  std::cout<<GridLogMessage <<"ZMobiusFermion test"<<std::endl;
-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  ZMobiusFermionR ZDmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,gamma,b,c);
-  TestCGinversions<ZMobiusFermionR>(ZDmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
-  TestReconstruct5D<ZMobiusFermionR>(ZDmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  TestCGinversions<ZMobiusFermionR>(ZDmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);

-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  MobiusZolotarevFermionR Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0);
-  TestCGinversions<MobiusZolotarevFermionR>(Dzolo,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
-  TestReconstruct5D<MobiusZolotarevFermionR>(Dzolo,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  TestCGinversions<MobiusZolotarevFermionR>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);

-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  std::cout<<GridLogMessage <<"ScaledShamirFermion test"<<std::endl;
-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  ScaledShamirFermionR Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
-  ScaledShamirFermionF DshamF(UmuF,*FGridF,*FrbGridF,*UGridF,*UrbGridF,mass,M5,2.0);
-  TestCGinversions<ScaledShamirFermionR>(Dsham,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
-  TestReconstruct5DFA<ScaledShamirFermionR,ScaledShamirFermionF>(Dsham,DshamF,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  TestCGinversions<ScaledShamirFermionR>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);

-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  std::cout<<GridLogMessage <<"ShamirZolotarevFermion test"<<std::endl;
-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  ShamirZolotarevFermionR Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
-  TestCGinversions<ShamirZolotarevFermionR>(Dshamz,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
-  TestReconstruct5D<ShamirZolotarevFermionR>(Dshamz,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  TestCGinversions<ShamirZolotarevFermionR>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);

-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  std::cout<<GridLogMessage <<"OverlapWilsonCayleyTanhFermion test"<<std::endl;
-  std::cout<<GridLogMessage <<"======================"<<std::endl;
-  OverlapWilsonCayleyTanhFermionR Dov (Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
-  OverlapWilsonCayleyTanhFermionF DovF(UmuF,*FGridF,*FrbGridF,*UGridF,*UrbGridF,mass,M5,1.0);
-  TestCGinversions<OverlapWilsonCayleyTanhFermionR>(Dov,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
-  TestReconstruct5DFA<OverlapWilsonCayleyTanhFermionR,OverlapWilsonCayleyTanhFermionF>(Dov,DovF,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonCayleyTanhFermionR Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestCGinversions<OverlapWilsonCayleyTanhFermionR>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);

-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  std::cout<<GridLogMessage <<"OverlapWilsonCayleyZolotarevFermion test"<<std::endl;
-  std::cout<<GridLogMessage <<"======================"<<std::endl;
  OverlapWilsonCayleyZolotarevFermionR Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
-  TestCGinversions<OverlapWilsonCayleyZolotarevFermionR>(Dovz,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
-  TestReconstruct5D<OverlapWilsonCayleyZolotarevFermionR>(Dovz,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  TestCGinversions<OverlapWilsonCayleyZolotarevFermionR>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);

  Grid_finalize();
 }
 template<class What> 
 void  TestCGinversions(What & Ddwf, 
-		       LatticeGaugeField &Umu,
 		       GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
 		       GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
 		       RealD mass, RealD M5,
@@ -213,8 +161,10 @@ void  TestCGinversions(What & Ddwf,
  TestCGprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
  std::cout<<GridLogMessage << "Testing red black Schur inverter"<<std::endl;
  TestCGschur<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
-}

+  std::cout<<GridLogMessage << "Testing 5D PV reconstruction"<<std::endl;
+  TestReconstruct5D<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
+}

 template<class What> 
 void  TestCGunprec(What & Ddwf, 
@@ -253,7 +203,6 @@ void  TestCGprec(What & Ddwf,

 template<class What> 
 void  TestReconstruct5D(What & Ddwf, 
-			LatticeGaugeField & Umu,
 			GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
 			GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
 			RealD mass, RealD M5,
@@ -267,13 +216,9 @@ void  TestReconstruct5D(What & Ddwf,
  LatticeFermion src_NE(FGrid);
  LatticeFermion result(FGrid);
  LatticeFermion result_rec(FGrid);
-  LatticeFermion result_madwf(FGrid);

  MdagMLinearOperator<What,LatticeFermion> HermOp(Ddwf);
-  double Resid = 1.0e-12;
-  double Residi = 1.0e-6;
-  ConjugateGradient<LatticeFermion> CG(Resid,10000);
-  ConjugateGradient<LatticeFermion> CGi(Residi,10000);
+  ConjugateGradient<LatticeFermion> CG(1.0e-12,10000);

  Ddwf.ImportPhysicalFermionSource(src4,src);
  Ddwf.Mdag(src,src_NE);
@@ -287,16 +232,9 @@ void  TestReconstruct5D(What & Ddwf,

  std::cout <<GridLogMessage<< " Reconstructing " <<std::endl;

-  ////////////////////////////
-  // RBprec PV inverse
-  ////////////////////////////
-  typedef LatticeFermion Field;
-  typedef SchurRedBlackDiagTwoSolve<Field> SchurSolverType; 
-  typedef SchurRedBlackDiagTwoSolve<Field> SchurSolverTypei; 
-  typedef PauliVillarsSolverRBprec<Field,SchurSolverType> PVinverter;
-  SchurSolverType SchurSolver(CG);
-  PVinverter      PVinverse(SchurSolver);
-
+  //  typedef PauliVillarsSolverUnprec<LatticeFermion> PVinverter;
+  typedef PauliVillarsSolverRBprec<LatticeFermion> PVinverter;
+  PVinverter PVinverse(CG);
  Reconstruct5DfromPhysical<LatticeFermion,PVinverter> reconstructor(PVinverse);

  reconstructor(Ddwf,res4,src4,result_rec);
@@ -307,88 +245,6 @@ void  TestReconstruct5D(What & Ddwf,
  result_rec = result_rec - result;
  std::cout <<GridLogMessage << "Difference "<<norm2(result_rec)<<std::endl;

-  //////////////////////////////
-  // Now try MADWF
-  //////////////////////////////
-  SchurSolverTypei SchurSolveri(CGi);
-  ZeroGuesser<LatticeFermion> Guess;
-  MADWF<What,What,PVinverter,SchurSolverTypei,ZeroGuesser<LatticeFermion> > 
-    madwf(Ddwf,Ddwf,PVinverse,SchurSolveri,Guess,Resid,10);
-  
-  madwf(src4,result_madwf);
-  result_madwf = result_madwf - result;
-  std::cout <<GridLogMessage << "Difference "<<norm2(result_madwf)<<std::endl;
-
-
-}
-template<class What,class WhatF> 
-void  TestReconstruct5DFA(What & Ddwf, 
-			  WhatF & DdwfF, 
-			  LatticeGaugeField & Umu,
-			  GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
-			  GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
-			  RealD mass, RealD M5,
-			  GridParallelRNG *RNG4,
-			  GridParallelRNG *RNG5)
-{
-  LatticeFermion src4   (UGrid); random(*RNG4,src4);
-  LatticeFermion res4   (UGrid); res4 = zero;
-
-  LatticeFermion src   (FGrid);
-  LatticeFermion src_NE(FGrid);
-  LatticeFermion result(FGrid);
-  LatticeFermion result_rec(FGrid);
-  LatticeFermion result_madwf(FGrid);
-
-  MdagMLinearOperator<What,LatticeFermion> HermOp(Ddwf);
-  double Resid = 1.0e-12;
-  double Residi = 1.0e-5;
-  ConjugateGradient<LatticeFermion> CG(Resid,10000);
-  ConjugateGradient<LatticeFermionF> CGi(Residi,10000);
-
-  Ddwf.ImportPhysicalFermionSource(src4,src);
-  Ddwf.Mdag(src,src_NE);
-  CG(HermOp,src_NE,result);
-
-  Ddwf.ExportPhysicalFermionSolution(result, res4);
-
-  Ddwf.M(result,src_NE);
-  src_NE = src_NE - src;
-  std::cout <<GridLogMessage<< " True residual is " << norm2(src_NE)<<std::endl;
-
-  std::cout <<GridLogMessage<< " Reconstructing " <<std::endl;
-
-  ////////////////////////////
-  // Fourier accel PV inverse
-  ////////////////////////////
-  typedef LatticeFermion Field;
-  typedef LatticeFermionF FieldF;
-  typedef SchurRedBlackDiagTwoSolve<FieldF> SchurSolverTypei; 
-  typedef PauliVillarsSolverFourierAccel<LatticeFermion,LatticeGaugeField> PVinverter;
-  PVinverter PVinverse(Umu,CG);
-
-  Reconstruct5DfromPhysical<LatticeFermion,PVinverter> reconstructor(PVinverse);
-
-  reconstructor(Ddwf,res4,src4,result_rec);
-
-  std::cout <<GridLogMessage << "Result     "<<norm2(result)<<std::endl;
-  std::cout <<GridLogMessage << "Result_rec "<<norm2(result_rec)<<std::endl;
-
-  result_rec = result_rec - result;
-  std::cout <<GridLogMessage << "Difference "<<norm2(result_rec)<<std::endl;
-
-  //////////////////////////////
-  // Now try MADWF
-  //////////////////////////////
-  SchurSolverTypei SchurSolver(CGi);
-  ZeroGuesser<LatticeFermionF> Guess;
-  MADWF<What,WhatF,PVinverter,SchurSolverTypei,ZeroGuesser<LatticeFermionF> > 
-    madwf(Ddwf,DdwfF,PVinverse,SchurSolver,Guess,Resid,10);
-  
-  madwf(src4,result_madwf);
-  result_madwf = result_madwf - result;
-  std::cout <<GridLogMessage << "Difference "<<norm2(result_madwf)<<std::endl;
-
 }


--- a/tests/hadrons/Test_diskvector.cc
+++ b/tests/hadrons/Test_diskvector.cc
@@ -91,22 +91,6 @@ int main(int argc, char *argv[])
    v13r = v[13];
    LOG(Message) << "v[13] correct? " 
                 << ((v13r == v13w) ? "yes" : "no" ) << std::endl;
-    LOG(Message) << "hit ratio " << v.hitRatio() << std::endl;
-
-    EigenDiskVector<ComplexD>         w("eigendiskvector_test", 1000, 4);
-    EigenDiskVector<ComplexD>::Matrix m,n;
-
-    w[2] = EigenDiskVectorMat<ComplexD>::Random(2000, 2000);
-    m    = w[2];
-    w[3] = EigenDiskVectorMat<ComplexD>::Random(2000, 2000);
-    w[4] = EigenDiskVectorMat<ComplexD>::Random(2000, 2000);
-    w[5] = EigenDiskVectorMat<ComplexD>::Random(2000, 2000);
-    w[6] = EigenDiskVectorMat<ComplexD>::Random(2000, 2000);
-    w[7] = EigenDiskVectorMat<ComplexD>::Random(2000, 2000);
-    n    = w[2];
-    LOG(Message) << "w[2] correct? " 
-                 << ((m == n) ? "yes" : "no" ) << std::endl;
-    LOG(Message) << "hit ratio " << w.hitRatio() << std::endl;

    Grid_finalize();