Getting closer to having a wilson solver... introducing a first and untested

cut at Conjugate gradient. Also copied in Remez, Zolotarev, Chebyshev from Mike Clark, Tony Kennedy and my BFM package respectively since we know we will need these. I wanted the structure of algorithms/approx algorithms/iterative etc.. to start taking shape.
2025-08-02 12:47:07 +01:00 · 2015-05-18 07:47:05 +01:00
parent 8e99e4671f
commit d0e4673a3f
22 changed files with 1798 additions and 157 deletions
--- a/13
+++ b/13
@@ -4244,7 +4244,18 @@ fi
 done
-#AC_CHECK_HEADERS(machine/endian.h)
+for ac_header in gmp.h
 do :
  ac_fn_cxx_check_header_mongrel "$LINENO" "gmp.h" "ac_cv_header_gmp_h" "$ac_includes_default"
 if test "x$ac_cv_header_gmp_h" = xyes; then :
  cat >>confdefs.h <<_ACEOF
 #define HAVE_GMP_H 1
 _ACEOF
 fi
 done
 ac_fn_cxx_check_decl "$LINENO" "ntohll" "ac_cv_have_decl_ntohll" "#include <arpa/inet.h>
 "
 if test "x$ac_cv_have_decl_ntohll" = xyes; then :
--- a/configure.ac
+++ b/configure.ac
@@ -20,7 +20,7 @@ AC_CHECK_HEADERS(mm_malloc.h)
 AC_CHECK_HEADERS(malloc/malloc.h)
 AC_CHECK_HEADERS(malloc.h)
 AC_CHECK_HEADERS(endian.h)
-#AC_CHECK_HEADERS(machine/endian.h)
+AC_CHECK_HEADERS(gmp.h)
 AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
 AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
--- a/lib/Grid.h
+++ b/lib/Grid.h
@@ -10,15 +10,17 @@
 #ifndef GRID_H
 #define GRID_H
-#include <stdio.h>
+#include <cassert>
 #include <complex>
 #include <vector>
-#include <valarray>
+
 #include <iostream>
-#include <cassert>
+#include <iomanip>
 #include <random>
 #include <functional>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/time.h>
 #include <stdio.h>
@@ -45,16 +47,19 @@
 #endif
 #include <Grid_aligned_allocator.h>
 #include <Grid_simd.h>
 #include <Grid_threads.h>
-#include <Grid_cartesian.h>
+#include <Grid_cartesian.h> // subdir aggregate
-
+#include <Grid_math.h>      // subdir aggregate
-#include <Grid_math.h>
+#include <Grid_lattice.h>   // subdir aggregate
 #include <Grid_lattice.h>
 #include <Grid_comparison.h>
-#include <Grid_cshift.h>
+#include <Grid_cshift.h>    // subdir aggregate
-#include <Grid_stencil.h>
+#include <Grid_stencil.h>   // subdir aggregate
 #include <Grid_algorithms.h>// subdir aggregate
 #include <qcd/Grid_qcd.h>
 #include <parallelIO/GridNerscIO.h>
--- a/lib/Grid_algorithms.h
+++ b/lib/Grid_algorithms.h
@@ -0,0 +1,34 @@
 #ifndef GRID_ALGORITHMS_H
 #define GRID_ALGORITHMS_H
 #include <algorithms/SparseMatrix.h>
 #include <algorithms/LinearOperator.h>
 #include <algorithms/iterative/ConjugateGradient.h>
 #include <algorithms/iterative/NormalEquations.h>
 #include <algorithms/iterative/SchurRedBlack.h>
 #include <algorithms/approx/Zolotarev.h>
 #include <algorithms/approx/Chebyshev.h>
 #include <algorithms/approx/Remez.h>
 // Eigen/lanczos
 // EigCg
 // MCR
 // Pcg
 // Multishift CG
 // Hdcg
 // GCR
 // etc..
 // integrator/Leapfrog
 // integrator/Omelyan
 // integrator/ForceGradient
 // montecarlo/hmc
 // montecarlo/rhmc
 // montecarlo/metropolis
 // etc...
 #endif
--- a/lib/Grid_config.h.in
+++ b/lib/Grid_config.h.in
@@ -29,6 +29,9 @@
 /* Define to 1 if you have the `gettimeofday' function. */
 #undef HAVE_GETTIMEOFDAY
 /* Define to 1 if you have the <gmp.h> header file. */
 #undef HAVE_GMP_H
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -19,35 +19,50 @@ libGrid_a_SOURCES =\
 	stencil/Grid_stencil_common.cc\
 	qcd/Grid_qcd_dirac.cc\
 	qcd/Grid_qcd_wilson_dop.cc\
 	algorithms/approx/Zolotarev.cc\
 	algorithms/approx/Remez.cc\
 	$(extra_sources)
 #
 # Include files
 #
-include_HEADERS =\
+nobase_include_HEADERS = algorithms/approx/bigfloat.h\
 	algorithms/approx/Chebyshev.h\
 	algorithms/approx/Remez.h\
 	algorithms/approx/Zolotarev.h\
 	algorithms/iterative/ConjugateGradient.h\
 	algorithms/iterative/NormalEquations.h\
 	algorithms/iterative/SchurRedBlack.h\
 	algorithms/LinearOperator.h\
 	algorithms/SparseMatrix.h\
 	cartesian/Grid_cartesian_base.h\
 	cartesian/Grid_cartesian_full.h\
 	cartesian/Grid_cartesian_red_black.h\
 	communicator/Grid_communicator_base.h\
 	cshift/Grid_cshift_common.h\
 	cshift/Grid_cshift_mpi.h\
 	cshift/Grid_cshift_none.h\
 	Grid.h\
 	Grid_algorithms.h\
 	Grid_aligned_allocator.h\
 	Grid_cartesian.h\
 	Grid_communicator.h\
 	Grid_comparison.h\
 	Grid_config.h\
 	Grid_cshift.h\
 	Grid_extract.h\
 	Grid_lattice.h\
 	Grid_math.h\
 	Grid_simd.h\
-	Grid_stencil.h
+	Grid_stencil.h\
-
+	Grid_threads.h\
 nobase_include_HEADERS=\
 	cartesian/Grid_cartesian_base.h\
 	cartesian/Grid_cartesian_full.h\
 	cartesian/Grid_cartesian_red_black.h\
 	cshift/Grid_cshift_common.h\
 	cshift/Grid_cshift_mpi.h\
 	cshift/Grid_cshift_none.h\
 	lattice/Grid_lattice_arith.h\
 	lattice/Grid_lattice_base.h\
 	lattice/Grid_lattice_comparison.h\
 	lattice/Grid_lattice_conformable.h\
 	lattice/Grid_lattice_coordinate.h\
 	lattice/Grid_lattice_ET.h\
 	lattice/Grid_lattice_local.h\
 	lattice/Grid_lattice_overload.h\
 	lattice/Grid_lattice_peekpoke.h\
 	lattice/Grid_lattice_reality.h\
 	lattice/Grid_lattice_reduction.h\
@@ -71,8 +86,11 @@ nobase_include_HEADERS=\
 	math/Grid_math_trace.h\
 	math/Grid_math_traits.h\
 	math/Grid_math_transpose.h\
 	parallelIO/GridNerscIO.h\
 	qcd/Grid_qcd.h\
 	qcd/Grid_qcd_2spinor.h\
 	qcd/Grid_qcd_dirac.h\
 	qcd/Grid_qcd_wilson_dop.h\
 	simd/Grid_vComplexD.h\
 	simd/Grid_vComplexF.h\
 	simd/Grid_vInteger.h\
--- a/lib/algorithms/LinearOperator.h
+++ b/lib/algorithms/LinearOperator.h
@@ -1,26 +1,8 @@
 #ifndef  GRID_ALGORITHM_LINEAR_OP_H
 #define  GRID_ALGORITHM_LINEAR_OP_H
 #include <Grid.h>
 namespace Grid {
  /////////////////////////////////////////////////////////////////////////////////////////////
  // Interface defining what I expect of a general sparse matrix, such as a Fermion action
  /////////////////////////////////////////////////////////////////////////////////////////////
    template<class Field> class SparseMatrixBase {
    public:
      // Full checkerboar operations
      virtual void M    (const Field &in, Field &out);
      virtual void Mdag (const Field &in, Field &out);
      virtual RealD MdagM(const Field &in, Field &out);
      // half checkerboard operaions
      virtual void Mpc      (const Field &in, Field &out);
      virtual void MpcDag   (const Field &in, Field &out);
      virtual RealD MpcDagMpc(const Field &in, Field &out);
    };
  /////////////////////////////////////////////////////////////////////////////////////////////
  // LinearOperators Take a something and return a something.
  /////////////////////////////////////////////////////////////////////////////////////////////
@@ -61,11 +43,11 @@ namespace Grid {
  // the wrappers implement the specialisation of "Op" and "AdjOp" to the cases minimising
  // replication of code.
  /////////////////////////////////////////////////////////////////////////////////////////////
-    template<class SparseMatrix,class Field>
+    template<class Matrix,class Field>
    class NonHermitianOperator : public LinearOperatorBase<Field> {
-      SparseMatrix &_Mat;
+      Matrix &_Mat;
    public:
-      NonHermitianOperator(SparseMatrix &Mat): _Mat(Mat){};
+      NonHermitianOperator(Matrix &Mat): _Mat(Mat){};
      void Op     (const Field &in, Field &out){
 	_Mat.M(in,out);
      }
@@ -77,11 +59,11 @@ namespace Grid {
    ////////////////////////////////////////////////////////////////////////////////////
    // Redblack Non hermitian wrapper
    ////////////////////////////////////////////////////////////////////////////////////
-    template<class SparseMatrix,class Field>
+    template<class Matrix,class Field>
-    class NonHermitianRedBlackOperator : public LinearOperatorBase<Field> {
+    class NonHermitianCheckerBoardedOperator : public LinearOperatorBase<Field> {
-      SparseMatrix &_Mat;
+      Matrix &_Mat;
    public:
-      NonHermitianRedBlackOperator(SparseMatrix &Mat): _Mat(Mat){};
+      NonHermitianCheckerBoardedOperator(Matrix &Mat): _Mat(Mat){};
      void Op     (const Field &in, Field &out){
 	_Mat.Mpc(in,out);
      }
@@ -93,75 +75,35 @@ namespace Grid {
    ////////////////////////////////////////////////////////////////////////////////////
    // Hermitian wrapper
    ////////////////////////////////////////////////////////////////////////////////////
-    template<class SparseMatrix,class Field>
+    template<class Matrix,class Field>
    class HermitianOperator : public HermitianOperatorBase<Field> {
-      SparseMatrix &_Mat;
+      Matrix &_Mat;
    public:
-      HermitianOperator(SparseMatrix &Mat): _Mat(Mat) {};
+      HermitianOperator(Matrix &Mat): _Mat(Mat) {};
      RealD OpAndNorm(const Field &in, Field &out){
 	return _Mat.MdagM(in,out);
      }
    };
    ////////////////////////////////////////////////////////////////////////////////////
-    // Hermitian RedBlack wrapper
+    // Hermitian CheckerBoarded wrapper
    ////////////////////////////////////////////////////////////////////////////////////
-    template<class SparseMatrix,class Field>
+    template<class Matrix,class Field>
-    class HermitianRedBlackOperator : public HermitianOperatorBase<Field> {
+    class HermitianCheckerBoardedOperator : public HermitianOperatorBase<Field> {
-      SparseMatrix &_Mat;
+      Matrix &_Mat;
    public:
-      HermitianRedBlackOperator(SparseMatrix &Mat): _Mat(Mat) {};
+      HermitianCheckerBoardedOperator(Matrix &Mat): _Mat(Mat) {};
-      RealD OpAndNorm(const Field &in, Field &out){
+      void OpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
-	return _Mat.MpcDagMpc(in,out);
+	_Mat.MpcDagMpc(in,out,n1,n2);
      }
    };
    /////////////////////////////////////////////////////////////
    // Base classes for functions of operators
    /////////////////////////////////////////////////////////////
    template<class Field> class OperatorFunction {
    public:
-      virtual void operator() (LinearOperatorBase<Field> *Linop, const Field &in, Field &out) = 0;
+      virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
    };
    /////////////////////////////////////////////////////////////
    // Base classes for polynomial functions of operators ? needed?
    /////////////////////////////////////////////////////////////
    template<class Field> class OperatorPolynomial : public OperatorFunction<Field> {
    public:
      virtual void operator() (LinearOperatorBase<Field> *Linop,const Field &in, Field &out) = 0;
    };
    /////////////////////////////////////////////////////////////
    // Base classes for iterative processes based on operators
    // single input vec, single output vec.
    /////////////////////////////////////////////////////////////
    template<class Field> class IterativeProcess : public OperatorFunction<Field> {
    public:
      RealD   Tolerance;
      Integer MaxIterations;
      IterativeProcess(RealD tol,Integer maxit) : Tolerance(tol),MaxIterations(maxit) {};
      virtual void operator() (LinearOperatorBase<Field> *Linop,const Field &in, Field &out) = 0;
    };
    /////////////////////////////////////////////////////////////
    // Grand daddy iterative method
    /////////////////////////////////////////////////////////////
    template<class Field> class ConjugateGradient : public IterativeProcess<Field> {
    public:
      virtual void operator() (HermitianOperatorBase<Field> *Linop,const Field &in, Field &out) = 0;
    };
    /////////////////////////////////////////////////////////////
    // A little more modern
    /////////////////////////////////////////////////////////////
    template<class Field> class PreconditionedConjugateGradient : public  IterativeProcess<Field>  {
    public:
      void operator() (HermitianOperatorBase<Field> *Linop,
 		       OperatorFunction<Field> *Preconditioner,
 		       const Field &in, 
 		       Field &out) = 0;
    };
    // FIXME : To think about
--- a/lib/algorithms/SparseMatrix.h
+++ b/lib/algorithms/SparseMatrix.h
@@ -0,0 +1,72 @@
 #ifndef  GRID_ALGORITHM_SPARSE_MATRIX_H
 #define  GRID_ALGORITHM_SPARSE_MATRIX_H
 #include <Grid.h>
 namespace Grid {
  /////////////////////////////////////////////////////////////////////////////////////////////
  // Interface defining what I expect of a general sparse matrix, such as a Fermion action
  /////////////////////////////////////////////////////////////////////////////////////////////
    template<class Field> class SparseMatrixBase {
    public:
      // Full checkerboar operations
      virtual RealD M    (const Field &in, Field &out)=0;
      virtual RealD Mdag (const Field &in, Field &out)=0;
      virtual void  MdagM(const Field &in, Field &out,RealD &ni,RealD &no) {
 	Field tmp (in._grid);
 	ni=M(in,tmp);
 	no=Mdag(tmp,out);
      }
    };
  /////////////////////////////////////////////////////////////////////////////////////////////
  // Interface augmented by a red black sparse matrix, such as a Fermion action
  /////////////////////////////////////////////////////////////////////////////////////////////
    template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
    public:
      // half checkerboard operaions
      virtual  void Meooe    (const Field &in, Field &out)=0;
      virtual  void Mooee    (const Field &in, Field &out)=0;
      virtual  void MooeeInv (const Field &in, Field &out)=0;
      virtual  void MeooeDag    (const Field &in, Field &out)=0;
      virtual  void MooeeDag    (const Field &in, Field &out)=0;
      virtual  void MooeeInvDag (const Field &in, Field &out)=0;
      // Schur decomp operators
      virtual  RealD Mpc      (const Field &in, Field &out) {
 	Field tmp(in._grid);
 	Meooe(in,tmp);
 	MooeeInv(tmp,out);
 	Meooe(out,tmp);
 	Mooee(in,out);
 	out=out-tmp; // axpy_norm
 	RealD n=norm2(out);
 	return n;
      }
      virtual  RealD MpcDag   (const Field &in, Field &out){
 	Field tmp(in._grid);
 	MeooeDag(in,tmp);
 	MooeeInvDag(tmp,out);
 	MeooeDag(out,tmp);
 	MooeeDag(in,out);
 	out=out-tmp; // axpy_norm
 	RealD n=norm2(out);
 	return n;
      }
      virtual void MpcDagMpc(const Field &in, Field &out,RealD ni,RealD no) {
 	Field tmp(in._grid);
 	ni=Mpc(in,tmp);
 	no=Mpc(tmp,out);
      }
    };
 }
 #endif
--- a/lib/algorithms/approx/PolynomialApprox.h
+++ b/lib/algorithms/approx/PolynomialApprox.h
@@ -1,5 +1,5 @@
-#ifndef GRID_POLYNOMIAL_APPROX_H
+#ifndef GRID_CHEBYSHEV_H
-#define GRID_POLYNOMIAL_APPROX_H
+#define GRID_CHEBYSHEV_H
 #include<Grid.h>
 #include<algorithms/LinearOperator.h>
@@ -12,7 +12,7 @@ namespace Grid {
  template<class Field>
  class Polynomial : public OperatorFunction<Field> {
  private:
-    std::vector<double> _oeffs;
+    std::vector<double> Coeffs;
  public:
    Polynomial(std::vector<double> &_Coeffs) : Coeffs(_Coeffs) {};
@@ -111,17 +111,13 @@ namespace Grid {
      double xscale = 2.0/(hi-lo);
      double mscale = -(hi+lo)/(hi-lo);
      Field *T0=Tnm;
      Field *T1=Tn;
      // Tn=T1 = (xscale M + mscale)in
      Linop.Op(T0,y);
      T1=y*xscale+in*mscale;
      // sum = .5 c[0] T0 + c[1] T1
-      out = (0.5*coeffs[0])*T0 + coeffs[1]*T1;
+      out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
      for(int n=2;n<order;n++){
@@ -131,7 +127,7 @@ namespace Grid {
 	*Tnp=2.0*y-(*Tnm);
-	out=out+coeffs[n]* (*Tnp);
+	out=out+Coeffs[n]* (*Tnp);
 	// Cycle pointers to avoid copies
 	Field *swizzle = Tnm;
--- a/lib/algorithms/approx/LICENSE
+++ b/lib/algorithms/approx/LICENSE
@@ -0,0 +1,21 @@
 Copyright (c) 2011 Michael Clark
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in
 all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
--- a/lib/algorithms/approx/README
+++ b/lib/algorithms/approx/README
@@ -0,0 +1,80 @@
 -----------------------------------------------------------------------------------
 PAB. Took Mike Clark's AlgRemez from GitHub and (modified a little) include.
 This is open source and license and readme and comments are preserved consistent
 with the license. Mike, thankyou!
 -----------------------------------------------------------------------------------
 -----------------------------------------------------------------------------------
 AlgRemez
 The archive downloadable here contains an implementation of the Remez
 algorithm which calculates optimal rational (and polynomial)
 approximations to the nth root over a given spectral range.  The Remez
 algorithm, although in principle is extremely straightforward to
 program, is quite difficult to get completely correct, e.g., the Maple
 implementation of the algorithm does not always converge to the
 correct answer.
 To use this algorithm you need to install GMP, the GNU Multiple
 Precision Library, and when configuring the install, you must include
 the --enable-mpfr option (see the GMP manual for more details).  You
 also have to edit the Makefile for AlgRemez appropriately for your
 system, namely to point corrrectly to the location of the GMP library.
 The simple main program included with this archive invokes the
 AlgRemez class to calculate an approximation given by command line
 arguments.  It is invoked by the following
 ./test y z n d lambda_low lambda_high precision,
 where the function to be approximated is f(x) = x^(y/z), with degree
 (n,d) over the spectral range [lambda_low, lambda_high], using
 precision digits of precision in the arithmetic.  So an example would
 be
 ./test 1 2 5 5 0.0004 64 40
 which corresponds to constructing a rational approximation to the
 square root function, with degree (5,5) over the range [0.0004,64]
 with 40 digits of precision used for the arithmetic.  The parameters y
 and z must be positive, the approximation to f(x) = x^(-y/z) is simply
 the inverse of the approximation to f(x) = x^(y/z).  After the
 approximation has been constructed, the roots and poles of the
 rational function are found, and then the partial fraction expansion
 of both the rational function and it's inverse are found, the results
 of which are output to a file called "approx.dat".  In addition, the
 error function of the approximation is output to "error.dat", where it
 can be checked that the resultant approximation satisfies Chebychev's
 criterion, namely all error maxima are equal in magnitude, and
 adjacent maxima are oppostie in sign.  There are some caveats here
 however, the optimal polynomial approximation has complex roots, and
 the root finding implemented here cannot (yet) handle complex roots.
 In addition, the partial fraction expansion of rational approximations
 is only found for the case n = d, i.e., the degree of numerator
 polynomial equals that of the denominator polynomial.  The convention
 for the partial fraction expansion is that polar shifts are always
 written added to x, not subtracted.
 To do list
 1.  Include an exponential dampening factor in the function to be
 approximated.  This may sound trivial to implement, but for some
 parameters, the algorithm seems to breakdown.  Also, the roots in the
 rational approximation sometimes become complex, which currently
 breaks the stupidly simple root finding code.
 2. Make the algorithm faster - it's too slow when running on qcdoc.
 3. Add complex root finding.
 4. Add more options for error minimisation - currently the code
 minimises the relative error, should add options for absolute error,
 and other norms.
 There will be a forthcoming publication concerning the results
 generated by this software, but in the meantime, if you use this
 software, please cite it as
 "M.A. Clark and A.D. Kennedy, https://github.com/mikeaclark/AlgRemez, 2005".
 If you have any problems using the software, then please email scientist.mike@gmail.com.
--- a/lib/algorithms/approx/Remez.cc
+++ b/lib/algorithms/approx/Remez.cc
@@ -0,0 +1,755 @@
 /*
  Mike Clark - 25th May 2005
  alg_remez.C
  AlgRemez is an implementation of the Remez algorithm, which in this
  case is used for generating the optimal nth root rational
  approximation.
  Note this class requires the gnu multiprecision (GNU MP) library.
 */
 #include<math.h>
 #include<stdio.h>
 #include<stdlib.h>
 #include<string>
 #include<iostream>
 #include<iomanip>
 #include<cassert>
 #include<algorithms/approx/Remez.h>
 // Constructor
 AlgRemez::AlgRemez(double lower, double upper, long precision) 
 {
  prec = precision;
  bigfloat::setDefaultPrecision(prec);
  apstrt = lower;
  apend = upper;
  apwidt = apend - apstrt;
  std::cout<<"Approximation bounds are ["<<apstrt<<","<<apend<<"]\n";
  std::cout<<"Precision of arithmetic is "<<precision<<std::endl;
  alloc = 0;
  n = 0;
  d = 0;
  foundRoots = 0;
  // Only require the approximation spread to be less than 1 ulp
  tolerance = 1e-15;
 }
 // Destructor
 AlgRemez::~AlgRemez()
 {
  if (alloc) {
    delete [] param;
    delete [] roots;
    delete [] poles;
    delete [] xx;
    delete [] mm;
    delete [] a_power;
    delete [] a;
  }
 }
 // Free memory and reallocate as necessary
 void AlgRemez::allocate(int num_degree, int den_degree)
 {
  // Arrays have previously been allocated, deallocate first, then allocate
  if (alloc) {
    delete [] param;
    delete [] roots;
    delete [] poles;
    delete [] xx;
    delete [] mm;
  }
  // Note use of new and delete in memory allocation - cannot run on qcdsp
  param = new bigfloat[num_degree+den_degree+1];
  roots = new bigfloat[num_degree];
  poles = new bigfloat[den_degree];
  xx = new bigfloat[num_degree+den_degree+3];
  mm = new bigfloat[num_degree+den_degree+2];
  if (!alloc) {
    // The coefficients of the sum in the exponential
    a = new bigfloat[SUM_MAX];
    a_power = new int[SUM_MAX];
  }
  alloc = 1;
 }
 // Reset the bounds of the approximation
 void AlgRemez::setBounds(double lower, double upper)
 {
  apstrt = lower;
  apend = upper;
  apwidt = apend - apstrt;
 }
 // Generate the rational approximation x^(pnum/pden)
 double AlgRemez::generateApprox(int degree, unsigned long pnum, 
 				unsigned long pden)
 {
  return generateApprox(degree, degree, pnum, pden);
 }
 double AlgRemez::generateApprox(int num_degree, int den_degree, 
 				unsigned long pnum, unsigned long pden)
 {
  double *a_param = 0;
  int *a_pow = 0;
  return generateApprox(num_degree, den_degree, pnum, pden, 0, a_param, a_pow);
 }
 // Generate the rational approximation x^(pnum/pden)
 double AlgRemez::generateApprox(int num_degree, int den_degree, 
 				unsigned long pnum, unsigned long pden,
 				int a_len, double *a_param, int *a_pow)
 {
  std::cout<<"Degree of the approximation is ("<<num_degree<<","<<den_degree<<")\n";
  std::cout<<"Approximating the function x^("<<pnum<<"/"<<pden<<"\n";
  // Reallocate arrays, since degree has changed
  if (num_degree != n || den_degree != d) allocate(num_degree,den_degree);
  assert(a_len<=SUM_MAX);
  step = new bigfloat[num_degree+den_degree+2];
  a_length = a_len;
  for (int j=0; j<a_len; j++) {
    a[j]= a_param[j];
    a_power[j] = a_pow[j];
  }
  power_num = pnum;
  power_den = pden;
  spread = 1.0e37;
  iter = 0;
  n = num_degree;
  d = den_degree;
  neq = n + d + 1;
  initialGuess();
  stpini(step);
  while (spread > tolerance) { //iterate until convergance
    if (iter++%100==0) 
      std::cout<<"Iteration " <<iter-1<<" spread "<<(double)spread<<" delta "<<(double)delta<<std::endl; 
    equations();
    assert( delta>= tolerance);
    search(step);
  }
  int sign;
  double error = (double)getErr(mm[0],&sign);
  std::cout<<"Converged at "<<iter<<" iterations; error = "<<error<<std::endl;
  // Once the approximation has been generated, calculate the roots
  if(!root()) {
    std::cout<<"Root finding failed\n";
  } else {
    foundRoots = 1;
  }
  delete [] step;
  // Return the maximum error in the approximation
  return error;
 }
 // Return the partial fraction expansion of the approximation x^(pnum/pden)
 int AlgRemez::getPFE(double *Res, double *Pole, double *Norm) {
  if (n!=d) {
    std::cout<<"Cannot handle case: Numerator degree neq Denominator degree\n";
    return 0;
  }
  if (!alloc) {
    std::cout<<"Approximation not yet generated\n";
    return 0;
  }
  if (!foundRoots) {
    std::cout<<"Roots not found, so PFE cannot be taken\n";
    return 0;
  }
  bigfloat *r = new bigfloat[n];
  bigfloat *p = new bigfloat[d];
  for (int i=0; i<n; i++) r[i] = roots[i];
  for (int i=0; i<d; i++) p[i] = poles[i];
  // Perform a partial fraction expansion
  pfe(r, p, norm);
  // Convert to double and return
  *Norm = (double)norm;
  for (int i=0; i<n; i++) Res[i] = (double)r[i];
  for (int i=0; i<d; i++) Pole[i] = (double)p[i];
  delete [] r;
  delete [] p;
  // Where the smallest shift is located
  return 0;
 }
 // Return the partial fraction expansion of the approximation x^(-pnum/pden)
 int AlgRemez::getIPFE(double *Res, double *Pole, double *Norm) {
  if (n!=d) {
    std::cout<<"Cannot handle case: Numerator degree neq Denominator degree\n";
    return 0;
  }
  if (!alloc) {
    std::cout<<"Approximation not yet generated\n";
    return 0;
  }
  if (!foundRoots) {
    std::cout<<"Roots not found, so PFE cannot be taken\n";
    return 0;
  }
  bigfloat *r = new bigfloat[d];
  bigfloat *p = new bigfloat[n];
  // Want the inverse function
  for (int i=0; i<n; i++) {
    r[i] = poles[i];
    p[i] = roots[i];
  }
  // Perform a partial fraction expansion
  pfe(r, p, (bigfloat)1l/norm);
  // Convert to double and return
  *Norm = (double)((bigfloat)1l/(norm));
  for (int i=0; i<n; i++) {
    Res[i] = (double)r[i];
    Pole[i] = (double)p[i];
  }
  delete [] r;
  delete [] p;
  // Where the smallest shift is located
  return 0;
 }
 // Initial values of maximal and minimal errors
 void AlgRemez::initialGuess() {
  // Supply initial guesses for solution points
  long ncheb = neq;			// Degree of Chebyshev error estimate
  bigfloat a, r;
  // Find ncheb+1 extrema of Chebyshev polynomial
  a = ncheb;
  mm[0] = apstrt;
  for (long i = 1; i < ncheb; i++) {
    r = 0.5 * (1 - cos((M_PI * i)/(double) a));
    //r *= sqrt_bf(r);
    r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
    mm[i] = apstrt + r * apwidt;
  }
  mm[ncheb] = apend;
  a = 2.0 * ncheb;
  for (long i = 0; i <= ncheb; i++) {
    r = 0.5 * (1 - cos(M_PI * (2*i+1)/(double) a));
    //r *= sqrt_bf(r); // Squeeze to low end of interval
    r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
    xx[i] = apstrt + r * apwidt;
  }
 }
 // Initialise step sizes
 void AlgRemez::stpini(bigfloat *step) {
  xx[neq+1] = apend;
  delta = 0.25;
  step[0] = xx[0] - apstrt;
  for (int i = 1; i < neq; i++) step[i] = xx[i] - xx[i-1];
  step[neq] = step[neq-1];
 }
 // Search for error maxima and minima
 void AlgRemez::search(bigfloat *step) {
  bigfloat a, q, xm, ym, xn, yn, xx0, xx1;
  int i, j, meq, emsign, ensign, steps;
  meq = neq + 1;
  bigfloat *yy = new bigfloat[meq];
  bigfloat eclose = 1.0e30;
  bigfloat farther = 0l;
  j = 1;
  xx0 = apstrt;
  for (i = 0; i < meq; i++) {
    steps = 0;
    xx1 = xx[i]; // Next zero
    if (i == meq-1) xx1 = apend;
    xm = mm[i];
    ym = getErr(xm,&emsign);
    q = step[i];
    xn = xm + q;
    if (xn < xx0 || xn >= xx1) {	// Cannot skip over adjacent boundaries
      q = -q;
      xn = xm;
      yn = ym;
      ensign = emsign;
    } else {
      yn = getErr(xn,&ensign);
      if (yn < ym) {
 	q = -q;
 	xn = xm;
 	yn = ym;
 	ensign = emsign;
      }
    }
    while(yn >= ym) {		// March until error becomes smaller.
      if (++steps > 10) break;
      ym = yn;
      xm = xn;
      emsign = ensign;
      a = xm + q;
      if (a == xm || a <= xx0 || a >= xx1) break;// Must not skip over the zeros either side.
      xn = a;
      yn = getErr(xn,&ensign);
    }
    mm[i] = xm;			// Position of maximum
    yy[i] = ym;			// Value of maximum
    if (eclose > ym) eclose = ym;
    if (farther < ym) farther = ym;
    xx0 = xx1; // Walk to next zero.
  } // end of search loop
  q = (farther - eclose);	// Decrease step size if error spread increased
  if (eclose != 0.0) q /= eclose; // Relative error spread
  if (q >= spread) delta *= 0.5; // Spread is increasing; decrease step size
  spread = q;
  for (i = 0; i < neq; i++) {
    q = yy[i+1];
    if (q != 0.0) q = yy[i] / q  - (bigfloat)1l;
    else q = 0.0625;
    if (q > (bigfloat)0.25) q = 0.25;
    q *= mm[i+1] - mm[i];
    step[i] = q * delta;
  }
  step[neq] = step[neq-1];
  for (i = 0; i < neq; i++) {	// Insert new locations for the zeros.
    xm = xx[i] - step[i];
    if (xm <= apstrt) continue;
    if (xm >= apend) continue;
    if (xm <= mm[i]) xm = (bigfloat)0.5 * (mm[i] + xx[i]);
    if (xm >= mm[i+1]) xm = (bigfloat)0.5 * (mm[i+1] + xx[i]);
    xx[i] = xm;
  }
  delete [] yy;
 }
 // Solve the equations
 void AlgRemez::equations(void) {
  bigfloat x, y, z;
  int i, j, ip;
  bigfloat *aa;
  bigfloat *AA = new bigfloat[(neq)*(neq)];
  bigfloat *BB = new bigfloat[neq];
  for (i = 0; i < neq; i++) {	// set up the equations for solution by simq()
    ip = neq * i;		// offset to 1st element of this row of matrix
    x = xx[i];			// the guess for this row
    y = func(x);		// right-hand-side vector
    z = (bigfloat)1l;
    aa = AA+ip;
    for (j = 0; j <= n; j++) {
      *aa++ = z;
      z *= x;
    }
    z = (bigfloat)1l;
    for (j = 0; j < d; j++) {
      *aa++ = -y * z;
      z *= x;
    }
    BB[i] = y * z;		// Right hand side vector
  }
  // Solve the simultaneous linear equations.
  if (simq(AA, BB, param, neq)) {
    std::cout<<"simq failed\n";
    exit(0);
  }
  delete [] AA;
  delete [] BB;
 }
 // Evaluate the rational form P(x)/Q(x) using coefficients
 // from the solution vector param
 bigfloat AlgRemez::approx(const bigfloat x) {
  bigfloat yn, yd;
  int i;
  // Work backwards toward the constant term.
  yn = param[n];		// Highest order numerator coefficient
  for (i = n-1; i >= 0; i--) yn = x * yn  +  param[i]; 
  yd = x + param[n+d];	// Highest degree coefficient = 1.0
  for (i = n+d-1; i > n; i--) yd = x * yd  +  param[i];
  return(yn/yd);
 }
 // Compute size and sign of the approximation error at x
 bigfloat AlgRemez::getErr(bigfloat x, int *sign) {
  bigfloat e, f;
  f = func(x);
  e = approx(x) - f;
  if (f != 0) e /= f;
  if (e < (bigfloat)0.0) {
    *sign = -1;
    e = -e;
  }
  else *sign = 1;
  return(e);
 }
 // Calculate function required for the approximation.
 bigfloat AlgRemez::func(const bigfloat x) {
  bigfloat z = (bigfloat)power_num / (bigfloat)power_den;
  bigfloat y;
  if (x == (bigfloat)1.0) y = (bigfloat)1.0;
  else y = pow_bf(x,z);
  if (a_length > 0) {
    bigfloat sum = 0l;
    for (int j=0; j<a_length; j++) sum += a[j]*pow_bf(x,a_power[j]);
    return y * exp_bf(sum);
  } else {
    return y;
  }
 }
 // Solve the system AX=B
 int AlgRemez::simq(bigfloat A[], bigfloat B[], bigfloat X[], int n) {
  int i, j, ij, ip, ipj, ipk, ipn;
  int idxpiv, iback;
  int k, kp, kp1, kpk, kpn;
  int nip, nkp, nm1;
  bigfloat em, q, rownrm, big, size, pivot, sum;
  bigfloat *aa;
  // simq() work vector
  int *IPS = new int[(neq) * sizeof(int)];
  nm1 = n - 1;
  // Initialize IPS and X
  ij = 0;
  for (i = 0; i < n; i++) {
    IPS[i] = i;
    rownrm = 0.0;
    for(j = 0; j < n; j++) {
      q = abs_bf(A[ij]);
      if(rownrm < q) rownrm = q;
      ++ij;
    }
    if (rownrm == (bigfloat)0l) {
      std::cout<<"simq rownrm=0\n";
      delete [] IPS;
      return(1);
    }
    X[i] = (bigfloat)1.0 / rownrm;
  }
  for (k = 0; k < nm1; k++) {
    big = 0.0;
    idxpiv = 0;
    for (i = k; i < n; i++) {
      ip = IPS[i];
      ipk = n*ip + k;
      size = abs_bf(A[ipk]) * X[ip];
      if (size > big) {
 	big = size;
 	idxpiv = i;
      }
    }
    if (big == (bigfloat)0l) {
      std::cout<<"simq big=0\n";
      delete [] IPS;
      return(2);
    }
    if (idxpiv != k) {
      j = IPS[k];
      IPS[k] = IPS[idxpiv];
      IPS[idxpiv] = j;
    }
    kp = IPS[k];
    kpk = n*kp + k;
    pivot = A[kpk];
    kp1 = k+1;
    for (i = kp1; i < n; i++) {
      ip = IPS[i];
      ipk = n*ip + k;
      em = -A[ipk] / pivot;
      A[ipk] = -em;
      nip = n*ip;
      nkp = n*kp;
      aa = A+nkp+kp1;
      for (j = kp1; j < n; j++) {
 	ipj = nip + j;
 	A[ipj] = A[ipj] + em * *aa++;
      }
    }
  }
  kpn = n * IPS[n-1] + n - 1;	// last element of IPS[n] th row
  if (A[kpn] == (bigfloat)0l) {
    std::cout<<"simq A[kpn]=0\n";
    delete [] IPS;
    return(3);
  }
  ip = IPS[0];
  X[0] = B[ip];
  for (i = 1; i < n; i++) {
    ip = IPS[i];
    ipj = n * ip;
    sum = 0.0;
    for (j = 0; j < i; j++) {
      sum += A[ipj] * X[j];
      ++ipj;
    }
    X[i] = B[ip] - sum;
  }
  ipn = n * IPS[n-1] + n - 1;
  X[n-1] = X[n-1] / A[ipn];
  for (iback = 1; iback < n; iback++) {
    //i goes (n-1),...,1
    i = nm1 - iback;
    ip = IPS[i];
    nip = n*ip;
    sum = 0.0;
    aa = A+nip+i+1;
    for (j= i + 1; j < n; j++) 
      sum += *aa++ * X[j];
    X[i] = (X[i] - sum) / A[nip+i];
  }
  delete [] IPS;
  return(0);
 }
 // Calculate the roots of the approximation
 int AlgRemez::root() {
  long i,j;
  bigfloat x,dx=0.05;
  bigfloat upper=1, lower=-100000;
  bigfloat tol = 1e-20;
  bigfloat *poly = new bigfloat[neq+1];
  // First find the numerator roots
  for (i=0; i<=n; i++) poly[i] = param[i];
  for (i=n-1; i>=0; i--) {
    roots[i] = rtnewt(poly,i+1,lower,upper,tol);
    if (roots[i] == 0.0) {
      std::cout<<"Failure to converge on root "<<i+1<<"/"<<n<<"\n";
      return 0;
    }
    poly[0] = -poly[0]/roots[i];
    for (j=1; j<=i; j++) poly[j] = (poly[j-1] - poly[j])/roots[i];
  }
 // Now find the denominator roots
  poly[d] = 1l;
  for (i=0; i<d; i++) poly[i] = param[n+1+i];
  for (i=d-1; i>=0; i--) {
    poles[i]=rtnewt(poly,i+1,lower,upper,tol);
    if (poles[i] == 0.0) {
      std::cout<<"Failure to converge on pole "<<i+1<<"/"<<d<<"\n";
      return 0;
    }
    poly[0] = -poly[0]/poles[i];
    for (j=1; j<=i; j++) poly[j] = (poly[j-1] - poly[j])/poles[i];
  }
  norm = param[n];
  delete [] poly;
  return 1;
 }
 // Evaluate the polynomial
 bigfloat AlgRemez::polyEval(bigfloat x, bigfloat *poly, long size) {
  bigfloat f = poly[size];
  for (int i=size-1; i>=0; i--) f = f*x + poly[i];
  return f;
 }
 // Evaluate the differential of the polynomial
 bigfloat AlgRemez::polyDiff(bigfloat x, bigfloat *poly, long size) {
  bigfloat df = (bigfloat)size*poly[size];
  for (int i=size-1; i>0; i--) df = df*x + (bigfloat)i*poly[i];
  return df;
 }
 // Newton's method to calculate roots
 bigfloat AlgRemez::rtnewt(bigfloat *poly, long i, bigfloat x1, 
 			  bigfloat x2, bigfloat xacc) {
  int j;
  bigfloat df, dx, f, rtn;
  rtn=(bigfloat)0.5*(x1+x2);
  for (j=1; j<=JMAX;j++) {
    f = polyEval(rtn, poly, i);
    df = polyDiff(rtn, poly, i);
    dx = f/df;
    rtn -= dx;
    if (abs_bf(dx) < xacc) return rtn;
  }
  std::cout<<"Maximum number of iterations exceeded in rtnewt\n";
  return 0.0;
 }
 // Evaluate the partial fraction expansion of the rational function
 // with res roots and poles poles.  Result is overwritten on input
 // arrays.
 void AlgRemez::pfe(bigfloat *res, bigfloat *poles, bigfloat norm) {
  int i,j,small;
  bigfloat temp;
  bigfloat *numerator = new bigfloat[n];
  bigfloat *denominator = new bigfloat[d];
  // Construct the polynomials explicitly 
  for (i=1; i<n; i++) {
    numerator[i] = 0l;
    denominator[i] = 0l;
  }
  numerator[0]=1l;
  denominator[0]=1l;
  for (j=0; j<n; j++) {
    for (i=n-1; i>=0; i--) {
      numerator[i] *= -res[j];
      denominator[i] *= -poles[j];
      if (i>0) {
 	numerator[i] += numerator[i-1];
 	denominator[i] += denominator[i-1];
      }
    }
  }
  // Convert to proper fraction form.
  // Fraction is now in the form 1 + n/d, where O(n)+1=O(d)
  for (i=0; i<n; i++) numerator[i] -= denominator[i];
  // Find the residues of the partial fraction expansion and absorb the
  // coefficients.
  for (i=0; i<n; i++) {
    res[i] = 0l;
    for (j=n-1; j>=0; j--) {
      res[i] = poles[i]*res[i]+numerator[j];
    }
    for (j=n-1; j>=0; j--) {
      if (i!=j) res[i] /= poles[i]-poles[j];
    }
    res[i] *= norm;
  }  
  // res now holds the residues
  j = 0;
  for (i=0; i<n; i++) poles[i] = -poles[i];
  // Move the ordering of the poles from smallest to largest
  for (j=0; j<n; j++) {
    small = j;
    for (i=j+1; i<n; i++) {
      if (poles[i] < poles[small]) small = i;
    }
    if (small != j) {
      temp = poles[small];
      poles[small] = poles[j];
      poles[j] = temp;
      temp = res[small];
      res[small] = res[j];
      res[j] = temp;
    }
  }
  delete [] numerator;
  delete [] denominator;
 }
 double AlgRemez::evaluateApprox(double x) {
  return (double)approx((bigfloat)x);
 }
 double AlgRemez::evaluateInverseApprox(double x) {
  return 1.0/(double)approx((bigfloat)x);
 }
 double AlgRemez::evaluateFunc(double x) {
  return (double)func((bigfloat)x);
 }
 double AlgRemez::evaluateInverseFunc(double x) {
  return 1.0/(double)func((bigfloat)x);
 }
 void AlgRemez::csv(std::ostream & os)
 {
  double lambda_low = apstrt;
  double lambda_high= apend;
  for (double x=lambda_low; x<lambda_high; x*=1.05) {
    double f = evaluateFunc(x);
    double r = evaluateApprox(x);
    os<< x<<","<<r<<","<<f<<","<<r-f<<std::endl;
  }
  return;
 }
--- a/lib/algorithms/approx/Remez.h
+++ b/lib/algorithms/approx/Remez.h
@@ -0,0 +1,167 @@
 /*
  Mike Clark - 25th May 2005
  alg_remez.h
  AlgRemez is an implementation of the Remez algorithm, which in this
  case is used for generating the optimal nth root rational
  approximation.
  Note this class requires the gnu multiprecision (GNU MP) library.
 */
 #ifndef INCLUDED_ALG_REMEZ_H
 #define INCLUDED_ALG_REMEZ_H
 #include <algorithms/approx/bigfloat.h>
 #define JMAX 10000 //Maximum number of iterations of Newton's approximation
 #define SUM_MAX 10 // Maximum number of terms in exponential
 /*
 *Usage examples
  AlgRemez remez(lambda_low,lambda_high,precision);
  error = remez.generateApprox(n,d,y,z);
  remez.getPFE(res,pole,&norm);
  remez.getIPFE(res,pole,&norm);
  remez.csv(ostream &os);
 */
 class AlgRemez
 {
 private:
  char *cname;
  // The approximation parameters
  bigfloat *param, *roots, *poles;
  bigfloat norm;
  // The numerator and denominator degree (n=d)
  int n, d;
  // The bounds of the approximation
  bigfloat apstrt, apwidt, apend;
  // the numerator and denominator of the power we are approximating
  unsigned long power_num; 
  unsigned long power_den;
  // Flag to determine whether the arrays have been allocated
  int alloc;
  // Flag to determine whether the roots have been found
  int foundRoots;
  // Variables used to calculate the approximation
  int nd1, iter;
  bigfloat *xx, *mm, *step;
  bigfloat delta, spread, tolerance;
  // The exponential summation coefficients
  bigfloat *a;
  int *a_power;
  int a_length;
  // The number of equations we must solve at each iteration (n+d+1)
  int neq;
  // The precision of the GNU MP library
  long prec;
  // Initial values of maximal and minmal errors
  void initialGuess();
  // Solve the equations
  void equations();
  // Search for error maxima and minima
  void search(bigfloat *step); 
  // Initialise step sizes
  void stpini(bigfloat *step);
  // Calculate the roots of the approximation
  int root();
  // Evaluate the polynomial
  bigfloat polyEval(bigfloat x, bigfloat *poly, long size);
  //complex_bf polyEval(complex_bf x, complex_bf *poly, long size);
  // Evaluate the differential of the polynomial
  bigfloat polyDiff(bigfloat x, bigfloat *poly, long size);
  //complex_bf polyDiff(complex_bf x, complex_bf *poly, long size);
  // Newton's method to calculate roots
  bigfloat rtnewt(bigfloat *poly, long i, bigfloat x1, bigfloat x2, bigfloat xacc);
  //complex_bf rtnewt(complex_bf *poly, long i, bigfloat x1, bigfloat x2, bigfloat xacc);
  // Evaluate the partial fraction expansion of the rational function
  // with res roots and poles poles.  Result is overwritten on input
  // arrays.
  void pfe(bigfloat *res, bigfloat* poles, bigfloat norm);
  // Calculate function required for the approximation
  bigfloat func(bigfloat x);
  // Compute size and sign of the approximation error at x
  bigfloat getErr(bigfloat x, int *sign);
  // Solve the system AX=B
  int simq(bigfloat *A, bigfloat *B, bigfloat *X, int n);
  // Free memory and reallocate as necessary
  void allocate(int num_degree, int den_degree);
  // Evaluate the rational form P(x)/Q(x) using coefficients from the
  // solution vector param
  bigfloat approx(bigfloat x);
 public:
  // Constructor
  AlgRemez(double lower, double upper, long prec);
  // Destructor
  virtual ~AlgRemez();
  // Reset the bounds of the approximation
  void setBounds(double lower, double upper);
  // Generate the rational approximation x^(pnum/pden)
  double generateApprox(int num_degree, int den_degree, 
 			unsigned long power_num, unsigned long power_den, 
 			int a_len, double* a_param, int* a_pow);
  double generateApprox(int num_degree, int den_degree, 
 			unsigned long power_num, unsigned long power_den);
  double generateApprox(int degree, unsigned long power_num, 
 			unsigned long power_den);
  // Return the partial fraction expansion of the approximation x^(pnum/pden)
  int getPFE(double *res, double *pole, double *norm);
  // Return the partial fraction expansion of the approximation x^(-pnum/pden)
  int getIPFE(double *res, double *pole, double *norm);
  // Evaluate the rational form P(x)/Q(x) using coefficients from the
  // solution vector param
  double evaluateApprox(double x);
  // Evaluate the rational form Q(x)/P(x) using coefficients from the
  // solution vector param
  double evaluateInverseApprox(double x);
  // Calculate function required for the approximation
  double evaluateFunc(double x);
  // Calculate inverse function required for the approximation
  double evaluateInverseFunc(double x);
  // Dump csv of function, approx and error
  void csv(std::ostream &os);
 };
 #endif  // Include guard
--- a/lib/algorithms/approx/Zolotarev.cc
+++ b/lib/algorithms/approx/Zolotarev.cc
--- a/lib/algorithms/approx/bigfloat.h
+++ b/lib/algorithms/approx/bigfloat.h
@@ -0,0 +1,368 @@
 /*
  Mike Clark - 25th May 2005
  bigfloat.h
  Simple C++ wrapper for multiprecision datatype used by AlgRemez
  algorithm
 */
 #ifndef INCLUDED_BIGFLOAT_H
 #define INCLUDED_BIGFLOAT_H
 #ifdef HAVE_GMP
 #include <gmp.h>
 #include <mpf2mpfr.h>
 #include <mpfr.h>
 class bigfloat {
 private:
  mpf_t x;
 public:
  bigfloat() { mpf_init(x); }
  bigfloat(const bigfloat& y) { mpf_init_set(x, y.x); }
  bigfloat(const unsigned long u) { mpf_init_set_ui(x, u); }
  bigfloat(const long i) { mpf_init_set_si(x, i); }
  bigfloat(const int i) {mpf_init_set_si(x,(long)i);}
  bigfloat(const float d) { mpf_init_set_d(x, (double)d); }
  bigfloat(const double d) { mpf_init_set_d(x, d); }  
  bigfloat(const char *str) { mpf_init_set_str(x, (char*)str, 10); }
  ~bigfloat(void) { mpf_clear(x); }
  operator const double (void) const { return (double)mpf_get_d(x); }
  static void setDefaultPrecision(unsigned long dprec) {
    unsigned long bprec =  (unsigned long)(3.321928094 * (double)dprec);
    mpf_set_default_prec(bprec);
  }
  void setPrecision(unsigned long dprec) {
    unsigned long bprec =  (unsigned long)(3.321928094 * (double)dprec);
    mpf_set_prec(x,bprec);
  }
  unsigned long getPrecision(void) const { return mpf_get_prec(x); }
  unsigned long getDefaultPrecision(void) const { return mpf_get_default_prec(); }
  bigfloat& operator=(const bigfloat& y) {
    mpf_set(x, y.x); 
    return *this;
  }
  bigfloat& operator=(const unsigned long y) { 
    mpf_set_ui(x, y);
    return *this; 
  }
  bigfloat& operator=(const signed long y) {
    mpf_set_si(x, y); 
    return *this;
  }
  bigfloat& operator=(const float y) {
    mpf_set_d(x, (double)y); 
    return *this;
  }
  bigfloat& operator=(const double y) {
    mpf_set_d(x, y); 
    return *this;
  }
  size_t write(void);
  size_t read(void);
  /* Arithmetic Functions */
  bigfloat& operator+=(const bigfloat& y) { return *this = *this + y; }
  bigfloat& operator-=(const bigfloat& y) { return *this = *this - y; }
  bigfloat& operator*=(const bigfloat& y) { return *this = *this * y; }
  bigfloat& operator/=(const bigfloat& y) { return *this = *this / y; }
  friend bigfloat operator+(const bigfloat& x, const bigfloat& y) {
    bigfloat a;
    mpf_add(a.x,x.x,y.x);
    return a;
  }
  friend bigfloat operator+(const bigfloat& x, const unsigned long y) {
    bigfloat a;
    mpf_add_ui(a.x,x.x,y);
    return a;
  }
  friend bigfloat operator-(const bigfloat& x, const bigfloat& y) {
    bigfloat a;
    mpf_sub(a.x,x.x,y.x);
    return a;
  }
  friend bigfloat operator-(const unsigned long x, const bigfloat& y) {
    bigfloat a;
    mpf_ui_sub(a.x,x,y.x);
    return a;
  }
  friend bigfloat operator-(const bigfloat& x, const unsigned long y) {
    bigfloat a;
    mpf_sub_ui(a.x,x.x,y);
    return a;
  }
  friend bigfloat operator-(const bigfloat& x) {
    bigfloat a;
    mpf_neg(a.x,x.x);
    return a;
  }
  friend bigfloat operator*(const bigfloat& x, const bigfloat& y) {
    bigfloat a;
    mpf_mul(a.x,x.x,y.x);
    return a;
  }
  friend bigfloat operator*(const bigfloat& x, const unsigned long y) {
    bigfloat a;
    mpf_mul_ui(a.x,x.x,y);
    return a;
  }
  friend bigfloat operator/(const bigfloat& x, const bigfloat& y){
    bigfloat a;
    mpf_div(a.x,x.x,y.x);
    return a;
  }
  friend bigfloat operator/(const unsigned long x, const bigfloat& y){
    bigfloat a;
    mpf_ui_div(a.x,x,y.x);
    return a;
  }
  friend bigfloat operator/(const bigfloat& x, const unsigned long y){
    bigfloat a;
    mpf_div_ui(a.x,x.x,y);
    return a;
  }
  friend bigfloat sqrt_bf(const bigfloat& x){
    bigfloat a;
    mpf_sqrt(a.x,x.x);
    return a;
  }
  friend bigfloat sqrt_bf(const unsigned long x){
    bigfloat a;
    mpf_sqrt_ui(a.x,x);
    return a;
  }
  friend bigfloat abs_bf(const bigfloat& x){
    bigfloat a;
    mpf_abs(a.x,x.x);
    return a;
  }
  friend bigfloat pow_bf(const bigfloat& a, long power) {
    bigfloat b;
    mpf_pow_ui(b.x,a.x,power);
    return b;
  }
  friend bigfloat pow_bf(const bigfloat& a, bigfloat &power) {
    bigfloat b;
    mpfr_pow(b.x,a.x,power.x,GMP_RNDN);
    return b;
  }
  friend bigfloat exp_bf(const bigfloat& a) {
    bigfloat b;
    mpfr_exp(b.x,a.x,GMP_RNDN);
    return b;
  }
  /* Comparison Functions */
  friend int operator>(const bigfloat& x, const bigfloat& y) {
    int test;
    test = mpf_cmp(x.x,y.x);
    if (test > 0) return 1;
    else return 0;
  }
  friend int operator<(const bigfloat& x, const bigfloat& y) {
    int test;
    test = mpf_cmp(x.x,y.x);
    if (test < 0) return 1;
    else return 0;
  }
  friend int sgn(const bigfloat&);
 };
 #else
 typedef double mfloat; 
 class bigfloat {
 private:
  mfloat x;
 public:
  bigfloat() { }
  bigfloat(const bigfloat& y) { x=y.x; }
  bigfloat(const unsigned long u) { x=u; }
  bigfloat(const long i) { x=i; }
  bigfloat(const int i) { x=i;}
  bigfloat(const float d) { x=d;}
  bigfloat(const double d) {  x=d;}
  bigfloat(const char *str) { x=std::stod(std::string(str));}
  ~bigfloat(void) { }
  operator double (void) const { return (double)x; }
  static void setDefaultPrecision(unsigned long dprec) {
  }
  void setPrecision(unsigned long dprec) {
  }
  unsigned long getPrecision(void) const { return 64; }
  unsigned long getDefaultPrecision(void) const { return 64; }
  bigfloat& operator=(const bigfloat& y)     { x=y.x;    return *this;  }
  bigfloat& operator=(const unsigned long y) { x=y; return *this; }
  bigfloat& operator=(const signed long y)   { x=y; return *this; }
  bigfloat& operator=(const float y)    { x=y; return *this; }
  bigfloat& operator=(const double y)   { x=y; return *this; }
  size_t write(void);
  size_t read(void);
  /* Arithmetic Functions */
  bigfloat& operator+=(const bigfloat& y) { return *this = *this + y; }
  bigfloat& operator-=(const bigfloat& y) { return *this = *this - y; }
  bigfloat& operator*=(const bigfloat& y) { return *this = *this * y; }
  bigfloat& operator/=(const bigfloat& y) { return *this = *this / y; }
  friend bigfloat operator+(const bigfloat& x, const bigfloat& y) { 
    bigfloat a;
    a.x=x.x+y.x;
    return a;
  }
  friend bigfloat operator+(const bigfloat& x, const unsigned long y) {
    bigfloat a;
    a.x=x.x+y;
    return a;
  }
  friend bigfloat operator-(const bigfloat& x, const bigfloat& y) {
    bigfloat a;
    a.x=x.x-y.x;
    return a;
  }
  friend bigfloat operator-(const unsigned long x, const bigfloat& y) {
    bigfloat bx(x);
    return bx-y;
  }
  friend bigfloat operator-(const bigfloat& x, const unsigned long y) {
    bigfloat by(y);
    return x-by;
  }
  friend bigfloat operator-(const bigfloat& x) {
    bigfloat a;
    a.x=-x.x;
    return a;
  }
  friend bigfloat operator*(const bigfloat& x, const bigfloat& y) {
    bigfloat a;
    a.x=x.x*y.x;
    return a;
  }
  friend bigfloat operator*(const bigfloat& x, const unsigned long y) {
    bigfloat a;
    a.x=x.x*y;
    return a;
  }
  friend bigfloat operator/(const bigfloat& x, const bigfloat& y){
    bigfloat a;
    a.x=x.x/y.x;
    return a;
  }
  friend bigfloat operator/(const unsigned long x, const bigfloat& y){
    bigfloat bx(x);
    return bx/y;
  }
  friend bigfloat operator/(const bigfloat& x, const unsigned long y){
    bigfloat by(y);
    return x/by;
  }
  friend bigfloat sqrt_bf(const bigfloat& x){
    bigfloat a;
    a.x= sqrt(x.x);
    return a;
  }
  friend bigfloat sqrt_bf(const unsigned long x){
    bigfloat a(x);
    return sqrt_bf(a);
  }
  friend bigfloat abs_bf(const bigfloat& x){
    bigfloat a;
    a.x=abs(x.x);
    return a;
  }
  friend bigfloat pow_bf(const bigfloat& a, long power) {
    bigfloat b;
    b.x=pow(a.x,power);
    return b;
  }
  friend bigfloat pow_bf(const bigfloat& a, bigfloat &power) {
    bigfloat b;
    b.x=pow(a.x,power.x);
    return b;
  }
  friend bigfloat exp_bf(const bigfloat& a) {
    bigfloat b;
    b.x=exp(a.x);
    return b;
  }
  /* Comparison Functions */
  friend int operator>(const bigfloat& x, const bigfloat& y) {
    return x.x>y.x;
  }
  friend int operator<(const bigfloat& x, const bigfloat& y) {
    return x.x<y.x;
  }
  friend int sgn(const bigfloat& x) {
    if ( x.x>=0 )  return 1;   
    else return 0;
  }
  /* Miscellaneous Functions */
  //  friend bigfloat& random(void);
 };
 #endif
 #endif
--- a/lib/algorithms/iterative/ConjugateGradient.h
+++ b/lib/algorithms/iterative/ConjugateGradient.h
@@ -1,19 +1,25 @@
 #ifndef GRID_CONJUGATE_GRADIENT_H
 #define GRID_CONJUGATE_GRADIENT_H
 #include<Grid.h>
 #include<algorithms/LinearOperator.h>
 namespace Grid {
-  template<class Field> class ConjugateGradient : public IterativeProcess<Field> {
+    /////////////////////////////////////////////////////////////
-  public:
+    // Base classes for iterative processes based on operators
    // single input vec, single output vec.
    /////////////////////////////////////////////////////////////
-    ConjugateGradient(RealD tol,Integer maxit): IterativeProces(tol,maxit) {};
+  template<class Field> 
    class ConjugateGradient :  public OperatorFunction<Field> {
 public:                                                
    RealD   Tolerance;
    Integer MaxIterations;
-    void operator() (HermitianOperatorBase<Field> *Linop,const Field &src, Field &psi){
+    ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) { 
      std::cout << Tolerance<<std::endl;
    };
    void operator() (HermitianOperatorBase<Field> &Linop,const Field &src, Field &psi){
      RealD residual = Tolerance;
      RealD cp,c,a,d,b,ssq,qq,b_pred;
      Field   p(src);
@@ -32,24 +38,24 @@ namespace Grid {
      cp =a;
      ssq=norm2(src);
-      std::cout <<setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
+      std::cout <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
-      std::cout <<setprecision(4)<< "ConjugateGradient:   src "<<ssq  <<std::endl;
+      std::cout <<std::setprecision(4)<< "ConjugateGradient:   src "<<ssq  <<std::endl;
-      std::cout <<setprecision(4)<< "ConjugateGradient:    mp "<<d    <<std::endl;
+      std::cout <<std::setprecision(4)<< "ConjugateGradient:    mp "<<d    <<std::endl;
-      std::cout <<setprecision(4)<< "ConjugateGradient:   mmp "<<b    <<std::endl;
+      std::cout <<std::setprecision(4)<< "ConjugateGradient:   mmp "<<b    <<std::endl;
-      std::cout <<setprecision(4)<< "ConjugateGradient:     r "<<cp   <<std::endl;
+      std::cout <<std::setprecision(4)<< "ConjugateGradient:     r "<<cp   <<std::endl;
-      std::cout <<setprecision(4)<< "ConjugateGradient:     p "<<a    <<std::endl;
+      std::cout <<std::setprecision(4)<< "ConjugateGradient:     p "<<a    <<std::endl;
-      RealD rsq =  residual* residual*ssq;
+      RealD rsq =  Tolerance* Tolerance*ssq;
      //Check if guess is really REALLY good :)
      if ( cp <= rsq ) {
-	return 0;
+	return;
      }
-      std::cout << setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
+      std::cout << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
      int k;
-      for (k=1;k<=max_iter;k++){
+      for (k=1;k<=MaxIterations;k++){
 	c=cp;
@@ -62,10 +68,10 @@ namespace Grid {
 	b = cp/c;
 	// Fuse these loops ; should be really easy
 	// Update psi; New (conjugate/M-orthogonal) search direction
 	psi= a*p+psi;
 	p  = p*b+r;
 	std::cout << "Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
 	// Stopping condition
 	if ( cp <= rsq ) { 
@@ -78,10 +84,11 @@ namespace Grid {
 	  RealD resnorm = sqrt(norm2(p));
 	  RealD true_residual = resnorm/srcnorm;
 	  std::cout<<"ConjugateGradient: true   residual  is "<<true_residual<<" sol "<<psinorm<<" src "<<srcnorm<<std::endl;
-	  std::cout<<"ConjugateGradient: target residual was "<<residual<<std::endl;
+	  std::cout<<"ConjugateGradient: target residual was "<<Tolerance<<std::endl;
 	  return k;
 	}
      }
      std::cout<<"ConjugateGradient did NOT converge"<<std::endl;
      assert(0);
    }
  };
 }
--- a/lib/algorithms/iterative/NormalEquations.h
+++ b/lib/algorithms/iterative/NormalEquations.h
@@ -0,0 +1,34 @@
 #ifndef GRID_NORMAL_EQUATIONS_H
 #define GRID_NORMAL_EQUATIONS_H
 namespace Grid {
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
  // Take a matrix and form a Red Black solver calling a Herm solver
  // Use of RB info prevents making SchurRedBlackSolve conform to standard interface
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
  template<class Field> class NormalEquations : public OperatorFunction<Field>{
  private:
    SparseMatrixBase<Field> & _Matrix;
    OperatorFunction<Field> & _HermitianSolver;
  public:
    /////////////////////////////////////////////////////
    // Wrap the usual normal equations Schur trick
    /////////////////////////////////////////////////////
  NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver) 
    :  _Matrix(Matrix), _HermitianSolver(HermitianSolver) {}; 
    void operator() (const Field &in, Field &out){
      Field src(in._grid);
      _Matrix.Mdag(in,src);
      _HermitianSolver(src,out);  // Mdag M out = Mdag in
    }     
  };
 }
 #endif
--- a/lib/algorithms/iterative/SchurRedBlack.h
+++ b/lib/algorithms/iterative/SchurRedBlack.h
@@ -0,0 +1,106 @@
 #ifndef GRID_SCHUR_RED_BLACK_H
 #define GRID_SCHUR_RED_BLACK_H
  /*
   * Red black Schur decomposition
   *
   *  M = (Mee Meo) =  (1             0 )   (Mee   0               )  (1 Mee^{-1} Meo)
   *      (Moe Moo)    (Moe Mee^-1    1 )   (0   Moo-Moe Mee^-1 Meo)  (0   1         )
   *                =         L                     D                     U
   *
   * L^-1 = (1              0 )
   *        (-MoeMee^{-1}   1 )   
   * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} )
   *           ( 0       1                    )
   * L^{-d}  = ( 1      -Mee^{-dag} Moe^{dag} )
   *           ( 0       1                    )
   *
   * U^-1 = (1   -Mee^{-1} Meo)
   *        (0    1           )
   * U^{dag} = ( 1                 0)
   *           (Meo^dag Mee^{-dag} 1)
   * U^{-dag} = (  1                 0)
   *            (-Meo^dag Mee^{-dag} 1)
   ***********************
   *     M psi = eta
   ***********************
   *Odd
   * i)   (D_oo)^{\dag} D_oo psi_o = (D_oo)^\dag L^{-1}  eta_o
   *                        eta_o' = D_oo (eta_o - Moe Mee^{-1} eta_e)
   *Even
   * ii)  Mee psi_e + Meo psi_o = src_e
   *
   *   => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
   *
   */
 namespace Grid {
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
  // Take a matrix and form a Red Black solver calling a Herm solver
  // Use of RB info prevents making SchurRedBlackSolve conform to standard interface
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
  template<class Field> class SchurRedBlackSolve : public OperatorFunction<Field>{
  private:
    SparseMatrixBase<Field> & _Matrix;
    OperatorFunction<Field> & _HermitianRBSolver;
    int CBfactorise;
  public:
    /////////////////////////////////////////////////////
    // Wrap the usual normal equations Schur trick
    /////////////////////////////////////////////////////
  SchurRedBlackSolve(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianRBSolver) 
    :  _Matrix(Matrix), _HermitianRBSolver(HermitianRBSolver) { 
      CBfactorise=0;
    };
    void operator() (const Field &in, Field &out){
      // FIXME CGdiagonalMee not implemented virtual function
      // FIXME need to make eo grid from full grid.
      // FIXME use CBfactorise to control schur decomp
      const int Even=0;
      const int Odd =1;
      // Make a cartesianRedBlack from full Grid
      GridRedBlackCartesian grid(in._grid);
      Field src_e(&grid);
      Field src_o(&grid);
      Field sol_e(&grid);
      Field sol_o(&grid);
      Field   tmp(&grid);
      Field  Mtmp(&grid);
      pickCheckerboard(Even,src_e,in);
      pickCheckerboard(Odd ,src_o,in);
      /////////////////////////////////////////////////////
      // src_o = Mdag * (source_o - Moe MeeInv source_e)
      /////////////////////////////////////////////////////
      _Matrix.MooeeInv(src_e,tmp);     //    MooeeInv(source[Even],tmp,DaggerNo,Even);
      _Matrix.Meooe   (tmp,Mtmp);      //    Meo     (tmp,src,Odd,DaggerNo);
      tmp=src_o-Mtmp;                  //    axpy    (tmp,src,source[Odd],-1.0);
      _Matrix.MpcDag(tmp,src_o);       //    Mprec(tmp,src,Mtmp,DaggerYes);  
      //////////////////////////////////////////////////////////////
      // Call the red-black solver
      //////////////////////////////////////////////////////////////
      _HermitianRBSolver(src_o,sol_o); //  CGNE_prec_MdagM(solution[Odd],src);
      ///////////////////////////////////////////////////
      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
      ///////////////////////////////////////////////////
      _Matrix.Meooe(sol_o,tmp);        // Meo(solution[Odd],tmp,Even,DaggerNo);
      src_e = src_e-tmp;               // axpy(src,tmp,source[Even],-1.0);
      _Matrix.MooeeInv(src_e,sol_e);   // MooeeInv(src,solution[Even],DaggerNo,Even);
      setCheckerboard(out,sol_e);
      setCheckerboard(out,sol_o);
    }     
  };
 }
 #endif
--- a/lib/algorithms/iterative/TODO
+++ b/lib/algorithms/iterative/TODO
@@ -0,0 +1,15 @@
 - ConjugateGradientMultiShift
 - MCR
 - Potentially Useful Boost libraries
 - MultiArray
 - Aligned allocator; memory pool
 - Remez -- Mike or Boost?
 - Multiprecision
 - quaternians
 - Tokenize
 - Serialization
 - Regex
 - Proto (ET)
 - uBlas
--- a/lib/qcd/Grid_qcd_wilson_dop.cc
+++ b/lib/qcd/Grid_qcd_wilson_dop.cc
@@ -85,20 +85,40 @@ void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeF
  }
 }
-void WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
+RealD WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
 {
  Dhop(in,out);
-  return;
+  return 0.0;
 }
-void WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
+RealD WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
 {
  Dhop(in,out);
-  return;
+  return 0.0;
 }
-void WilsonMatrix::MdagM(const LatticeFermion &in, LatticeFermion &out)
+
 void WilsonMatrix::Meooe(const LatticeFermion &in, LatticeFermion &out)
 {
  Dhop(in,out);
-  return;
+}
 void WilsonMatrix::MeooeDag(const LatticeFermion &in, LatticeFermion &out)
 {
  Dhop(in,out);
 }
 void WilsonMatrix::Mooee(const LatticeFermion &in, LatticeFermion &out)
 {
  return ;
 }
 void WilsonMatrix::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
 {
  return ;
 }
 void WilsonMatrix::MooeeDag(const LatticeFermion &in, LatticeFermion &out)
 {
  return ;
 }
 void WilsonMatrix::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
 {
  return ;
 }
 void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
@@ -278,18 +298,6 @@ void WilsonMatrix::Dw(const LatticeFermion &in, LatticeFermion &out)
 {
  return;
 }
 void WilsonMatrix::MpcDag   (const LatticeFermion &in, LatticeFermion &out)
 {
  return;
 }
 void WilsonMatrix::Mpc      (const LatticeFermion &in, LatticeFermion &out)
 {
  return;
 }
 void WilsonMatrix::MpcDagMpc(const LatticeFermion &in, LatticeFermion &out)
 {
  return;
 }
--- a/lib/qcd/Grid_qcd_wilson_dop.h
+++ b/lib/qcd/Grid_qcd_wilson_dop.h
@@ -1,15 +1,12 @@
 #ifndef  GRID_QCD_WILSON_DOP_H
 #define  GRID_QCD_WILSON_DOP_H
 #include <Grid.h>
 #include <algorithms/LinearOperator.h>
 namespace Grid {
  namespace QCD {
-    class WilsonMatrix : public SparseMatrixBase<LatticeFermion>
+    class WilsonMatrix : public CheckerBoardedSparseMatrixBase<LatticeFermion>
    {
      //NB r=1;
    public:
@@ -36,14 +33,16 @@ namespace Grid {
      void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
      // override multiply
-      virtual void M    (const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void Mdag (const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
      virtual void MdagM(const LatticeFermion &in, LatticeFermion &out);
      // half checkerboard operaions
-      void Mpc      (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
-      void MpcDag   (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
-      void MpcDagMpc(const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
      // non-hermitian hopping term; half cb or both
      void Dhop(const LatticeFermion &in, LatticeFermion &out);