From 11cb3e9a01dcd306cdda4c4e8cc7f6a8e042308f Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Mon, 18 May 2015 07:47:05 +0100
Subject: [PATCH] Getting closer to having a wilson solver... introducing a
 first and untested cut at Conjugate gradient. Also copied in Remez,
 Zolotarev, Chebyshev from Mike Clark, Tony Kennedy and my BFM package
 respectively since we know we will need these. I wanted the structure of

algorithms/approx
algorithms/iterative

etc.. to start taking shape.
---
 configure                                     |  13 +-
 configure.ac                                  |   2 +-
 lib/Grid.h                                    |  23 +-
 lib/Grid_algorithms.h                         |  34 +
 lib/Grid_config.h.in                          |   3 +
 lib/Makefile.am                               |  40 +-
 lib/algorithms/LinearOperator.h               |  94 +--
 lib/algorithms/SparseMatrix.h                 |  72 ++
 .../{PolynomialApprox.h => Chebyshev.h}       |  14 +-
 lib/algorithms/approx/LICENSE                 |  21 +
 lib/algorithms/approx/README                  |  80 ++
 lib/algorithms/approx/Remez.cc                | 755 ++++++++++++++++++
 lib/algorithms/approx/Remez.h                 | 167 ++++
 .../approx/{zolotarev.c => Zolotarev.cc}      |   0
 lib/algorithms/approx/bigfloat.h              | 368 +++++++++
 lib/algorithms/iterative/ConjugateGradient.h  |  49 +-
 lib/algorithms/iterative/NormalEquations.h    |  34 +
 lib/algorithms/iterative/SchurRedBlack.h      | 106 +++
 lib/algorithms/iterative/TODO                 |  15 +
 lib/qcd/Grid_qcd_wilson_dop.cc                |  44 +-
 lib/qcd/Grid_qcd_wilson_dop.h                 |  19 +-
 tests/Grid_main.cc                            |   2 +-
 22 files changed, 1798 insertions(+), 157 deletions(-)
 create mode 100644 lib/Grid_algorithms.h
 create mode 100644 lib/algorithms/SparseMatrix.h
 rename lib/algorithms/approx/{PolynomialApprox.h => Chebyshev.h} (93%)
 create mode 100644 lib/algorithms/approx/LICENSE
 create mode 100644 lib/algorithms/approx/README
 create mode 100755 lib/algorithms/approx/Remez.cc
 create mode 100755 lib/algorithms/approx/Remez.h
 rename lib/algorithms/approx/{zolotarev.c => Zolotarev.cc} (100%)
 create mode 100755 lib/algorithms/approx/bigfloat.h
 create mode 100644 lib/algorithms/iterative/NormalEquations.h
 create mode 100644 lib/algorithms/iterative/SchurRedBlack.h
 create mode 100644 lib/algorithms/iterative/TODO
diff --git a/configure b/configure
index deec23c3..ee90f6bc 100755
--- a/configure
+++ b/configure
@@ -4244,7 +4244,18 @@ fi
 
 done
 
-#AC_CHECK_HEADERS(machine/endian.h)
+for ac_header in gmp.h
+do :
+  ac_fn_cxx_check_header_mongrel "$LINENO" "gmp.h" "ac_cv_header_gmp_h" "$ac_includes_default"
+if test "x$ac_cv_header_gmp_h" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_GMP_H 1
+_ACEOF
+
+fi
+
+done
+
 ac_fn_cxx_check_decl "$LINENO" "ntohll" "ac_cv_have_decl_ntohll" "#include <arpa/inet.h>
 "
 if test "x$ac_cv_have_decl_ntohll" = xyes; then :
diff --git a/configure.ac b/configure.ac
index 97206a01..14fd45bb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -20,7 +20,7 @@ AC_CHECK_HEADERS(mm_malloc.h)
 AC_CHECK_HEADERS(malloc/malloc.h)
 AC_CHECK_HEADERS(malloc.h)
 AC_CHECK_HEADERS(endian.h)
-#AC_CHECK_HEADERS(machine/endian.h)
+AC_CHECK_HEADERS(gmp.h)
 AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
 AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
 
diff --git a/lib/Grid.h b/lib/Grid.h
index 2936dd27..edd94769 100644
--- a/lib/Grid.h
+++ b/lib/Grid.h
@@ -10,15 +10,17 @@
 #ifndef GRID_H
 #define GRID_H
 
-#include <stdio.h>
+#include <cassert>
 
 #include <complex>
 #include <vector>
-#include <valarray>
+
 #include <iostream>
-#include <cassert>
+#include <iomanip>
 #include <random>
 #include <functional>
+
+#include <stdio.h>
 #include <stdlib.h>
 #include <sys/time.h>
 #include <stdio.h>
@@ -45,16 +47,19 @@
 #endif
 
 #include <Grid_aligned_allocator.h>
+
 #include <Grid_simd.h>
 #include <Grid_threads.h>
 
-#include <Grid_cartesian.h>
-
-#include <Grid_math.h>
-#include <Grid_lattice.h>
+#include <Grid_cartesian.h> // subdir aggregate
+#include <Grid_math.h>      // subdir aggregate
+#include <Grid_lattice.h>   // subdir aggregate
 #include <Grid_comparison.h>
-#include <Grid_cshift.h>
-#include <Grid_stencil.h>
+#include <Grid_cshift.h>    // subdir aggregate
+#include <Grid_stencil.h>   // subdir aggregate
+
+#include <Grid_algorithms.h>// subdir aggregate
+
 #include <qcd/Grid_qcd.h>
 #include <parallelIO/GridNerscIO.h>
 
diff --git a/lib/Grid_algorithms.h b/lib/Grid_algorithms.h
new file mode 100644
index 00000000..b5a2814a
--- /dev/null
+++ b/lib/Grid_algorithms.h
@@ -0,0 +1,34 @@
+#ifndef GRID_ALGORITHMS_H
+#define GRID_ALGORITHMS_H
+
+#include <algorithms/SparseMatrix.h>
+#include <algorithms/LinearOperator.h>
+
+#include <algorithms/iterative/ConjugateGradient.h>
+#include <algorithms/iterative/NormalEquations.h>
+#include <algorithms/iterative/SchurRedBlack.h>
+
+#include <algorithms/approx/Zolotarev.h>
+#include <algorithms/approx/Chebyshev.h>
+#include <algorithms/approx/Remez.h>
+
+// Eigen/lanczos
+// EigCg
+// MCR
+// Pcg
+// Multishift CG
+// Hdcg
+// GCR
+// etc..
+
+// integrator/Leapfrog
+// integrator/Omelyan
+// integrator/ForceGradient
+
+// montecarlo/hmc
+// montecarlo/rhmc
+// montecarlo/metropolis
+// etc...
+
+
+#endif
diff --git a/lib/Grid_config.h.in b/lib/Grid_config.h.in
index 47643530..0ce09cf5 100644
--- a/lib/Grid_config.h.in
+++ b/lib/Grid_config.h.in
@@ -29,6 +29,9 @@
 /* Define to 1 if you have the `gettimeofday' function. */
 #undef HAVE_GETTIMEOFDAY
 
+/* Define to 1 if you have the <gmp.h> header file. */
+#undef HAVE_GMP_H
+
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
 
diff --git a/lib/Makefile.am b/lib/Makefile.am
index f76b0b64..938f7ca1 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -19,35 +19,50 @@ libGrid_a_SOURCES =\
 	stencil/Grid_stencil_common.cc\
 	qcd/Grid_qcd_dirac.cc\
 	qcd/Grid_qcd_wilson_dop.cc\
+	algorithms/approx/Zolotarev.cc\
+	algorithms/approx/Remez.cc\
 	$(extra_sources)
 
 #
 # Include files
 #
-include_HEADERS =\
+nobase_include_HEADERS = algorithms/approx/bigfloat.h\
+	algorithms/approx/Chebyshev.h\
+	algorithms/approx/Remez.h\
+	algorithms/approx/Zolotarev.h\
+	algorithms/iterative/ConjugateGradient.h\
+	algorithms/iterative/NormalEquations.h\
+	algorithms/iterative/SchurRedBlack.h\
+	algorithms/LinearOperator.h\
+	algorithms/SparseMatrix.h\
+	cartesian/Grid_cartesian_base.h\
+	cartesian/Grid_cartesian_full.h\
+	cartesian/Grid_cartesian_red_black.h\
+	communicator/Grid_communicator_base.h\
+	cshift/Grid_cshift_common.h\
+	cshift/Grid_cshift_mpi.h\
+	cshift/Grid_cshift_none.h\
 	Grid.h\
+	Grid_algorithms.h\
 	Grid_aligned_allocator.h\
 	Grid_cartesian.h\
 	Grid_communicator.h\
 	Grid_comparison.h\
-	Grid_config.h\
 	Grid_cshift.h\
+	Grid_extract.h\
 	Grid_lattice.h\
 	Grid_math.h\
 	Grid_simd.h\
-	Grid_stencil.h
-
-nobase_include_HEADERS=\
-	cartesian/Grid_cartesian_base.h\
-	cartesian/Grid_cartesian_full.h\
-	cartesian/Grid_cartesian_red_black.h\
-	cshift/Grid_cshift_common.h\
-	cshift/Grid_cshift_mpi.h\
-	cshift/Grid_cshift_none.h\
+	Grid_stencil.h\
+	Grid_threads.h\
 	lattice/Grid_lattice_arith.h\
+	lattice/Grid_lattice_base.h\
+	lattice/Grid_lattice_comparison.h\
 	lattice/Grid_lattice_conformable.h\
 	lattice/Grid_lattice_coordinate.h\
+	lattice/Grid_lattice_ET.h\
 	lattice/Grid_lattice_local.h\
+	lattice/Grid_lattice_overload.h\
 	lattice/Grid_lattice_peekpoke.h\
 	lattice/Grid_lattice_reality.h\
 	lattice/Grid_lattice_reduction.h\
@@ -71,8 +86,11 @@ nobase_include_HEADERS=\
 	math/Grid_math_trace.h\
 	math/Grid_math_traits.h\
 	math/Grid_math_transpose.h\
+	parallelIO/GridNerscIO.h\
 	qcd/Grid_qcd.h\
+	qcd/Grid_qcd_2spinor.h\
 	qcd/Grid_qcd_dirac.h\
+	qcd/Grid_qcd_wilson_dop.h\
 	simd/Grid_vComplexD.h\
 	simd/Grid_vComplexF.h\
 	simd/Grid_vInteger.h\
diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h
index 12259236..bb948b95 100644
--- a/lib/algorithms/LinearOperator.h
+++ b/lib/algorithms/LinearOperator.h
@@ -1,26 +1,8 @@
 #ifndef  GRID_ALGORITHM_LINEAR_OP_H
 #define  GRID_ALGORITHM_LINEAR_OP_H
 
-#include <Grid.h>
-
 namespace Grid {
 
-  /////////////////////////////////////////////////////////////////////////////////////////////
-  // Interface defining what I expect of a general sparse matrix, such as a Fermion action
-  /////////////////////////////////////////////////////////////////////////////////////////////
-    template<class Field> class SparseMatrixBase {
-    public:
-      // Full checkerboar operations
-      virtual void M    (const Field &in, Field &out);
-      virtual void Mdag (const Field &in, Field &out);
-      virtual RealD MdagM(const Field &in, Field &out);
-      
-      // half checkerboard operaions
-      virtual void Mpc      (const Field &in, Field &out);
-      virtual void MpcDag   (const Field &in, Field &out);
-      virtual RealD MpcDagMpc(const Field &in, Field &out);
-    };
-
   /////////////////////////////////////////////////////////////////////////////////////////////
   // LinearOperators Take a something and return a something.
   /////////////////////////////////////////////////////////////////////////////////////////////
@@ -61,11 +43,11 @@ namespace Grid {
   // the wrappers implement the specialisation of "Op" and "AdjOp" to the cases minimising
   // replication of code.
   /////////////////////////////////////////////////////////////////////////////////////////////
-    template<class SparseMatrix,class Field>
+    template<class Matrix,class Field>
     class NonHermitianOperator : public LinearOperatorBase<Field> {
-      SparseMatrix &_Mat;
+      Matrix &_Mat;
     public:
-      NonHermitianOperator(SparseMatrix &Mat): _Mat(Mat){};
+      NonHermitianOperator(Matrix &Mat): _Mat(Mat){};
       void Op     (const Field &in, Field &out){
 	_Mat.M(in,out);
       }
@@ -77,11 +59,11 @@ namespace Grid {
     ////////////////////////////////////////////////////////////////////////////////////
     // Redblack Non hermitian wrapper
     ////////////////////////////////////////////////////////////////////////////////////
-    template<class SparseMatrix,class Field>
-    class NonHermitianRedBlackOperator : public LinearOperatorBase<Field> {
-      SparseMatrix &_Mat;
+    template<class Matrix,class Field>
+    class NonHermitianCheckerBoardedOperator : public LinearOperatorBase<Field> {
+      Matrix &_Mat;
     public:
-      NonHermitianRedBlackOperator(SparseMatrix &Mat): _Mat(Mat){};
+      NonHermitianCheckerBoardedOperator(Matrix &Mat): _Mat(Mat){};
       void Op     (const Field &in, Field &out){
 	_Mat.Mpc(in,out);
       }
@@ -93,75 +75,35 @@ namespace Grid {
     ////////////////////////////////////////////////////////////////////////////////////
     // Hermitian wrapper
     ////////////////////////////////////////////////////////////////////////////////////
-    template<class SparseMatrix,class Field>
+    template<class Matrix,class Field>
     class HermitianOperator : public HermitianOperatorBase<Field> {
-      SparseMatrix &_Mat;
+      Matrix &_Mat;
     public:
-      HermitianOperator(SparseMatrix &Mat): _Mat(Mat) {};
+      HermitianOperator(Matrix &Mat): _Mat(Mat) {};
       RealD OpAndNorm(const Field &in, Field &out){
 	return _Mat.MdagM(in,out);
       }
     };
 
     ////////////////////////////////////////////////////////////////////////////////////
-    // Hermitian RedBlack wrapper
+    // Hermitian CheckerBoarded wrapper
     ////////////////////////////////////////////////////////////////////////////////////
-    template<class SparseMatrix,class Field>
-    class HermitianRedBlackOperator : public HermitianOperatorBase<Field> {
-      SparseMatrix &_Mat;
+    template<class Matrix,class Field>
+    class HermitianCheckerBoardedOperator : public HermitianOperatorBase<Field> {
+      Matrix &_Mat;
     public:
-      HermitianRedBlackOperator(SparseMatrix &Mat): _Mat(Mat) {};
-      RealD OpAndNorm(const Field &in, Field &out){
-	return _Mat.MpcDagMpc(in,out);
+      HermitianCheckerBoardedOperator(Matrix &Mat): _Mat(Mat) {};
+      void OpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
+	_Mat.MpcDagMpc(in,out,n1,n2);
       }
     };
 
-
     /////////////////////////////////////////////////////////////
     // Base classes for functions of operators
     /////////////////////////////////////////////////////////////
     template<class Field> class OperatorFunction {
     public:
-      virtual void operator() (LinearOperatorBase<Field> *Linop, const Field &in, Field &out) = 0;
-    };
-
-    /////////////////////////////////////////////////////////////
-    // Base classes for polynomial functions of operators ? needed?
-    /////////////////////////////////////////////////////////////
-    template<class Field> class OperatorPolynomial : public OperatorFunction<Field> {
-    public:
-      virtual void operator() (LinearOperatorBase<Field> *Linop,const Field &in, Field &out) = 0;
-    };
-
-    /////////////////////////////////////////////////////////////
-    // Base classes for iterative processes based on operators
-    // single input vec, single output vec.
-    /////////////////////////////////////////////////////////////
-    template<class Field> class IterativeProcess : public OperatorFunction<Field> {
-    public:
-      RealD   Tolerance;
-      Integer MaxIterations;
-      IterativeProcess(RealD tol,Integer maxit) : Tolerance(tol),MaxIterations(maxit) {};
-      virtual void operator() (LinearOperatorBase<Field> *Linop,const Field &in, Field &out) = 0;
-    };
-
-    /////////////////////////////////////////////////////////////
-    // Grand daddy iterative method
-    /////////////////////////////////////////////////////////////
-    template<class Field> class ConjugateGradient : public IterativeProcess<Field> {
-    public:
-      virtual void operator() (HermitianOperatorBase<Field> *Linop,const Field &in, Field &out) = 0;
-    };
-
-    /////////////////////////////////////////////////////////////
-    // A little more modern
-    /////////////////////////////////////////////////////////////
-    template<class Field> class PreconditionedConjugateGradient : public  IterativeProcess<Field>  {
-    public:
-      void operator() (HermitianOperatorBase<Field> *Linop,
-		       OperatorFunction<Field> *Preconditioner,
-		       const Field &in, 
-		       Field &out) = 0;
+      virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
     };
 
     // FIXME : To think about
diff --git a/lib/algorithms/SparseMatrix.h b/lib/algorithms/SparseMatrix.h
new file mode 100644
index 00000000..6c54fc92
--- /dev/null
+++ b/lib/algorithms/SparseMatrix.h
@@ -0,0 +1,72 @@
+#ifndef  GRID_ALGORITHM_SPARSE_MATRIX_H
+#define  GRID_ALGORITHM_SPARSE_MATRIX_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  /////////////////////////////////////////////////////////////////////////////////////////////
+  // Interface defining what I expect of a general sparse matrix, such as a Fermion action
+  /////////////////////////////////////////////////////////////////////////////////////////////
+    template<class Field> class SparseMatrixBase {
+    public:
+      // Full checkerboar operations
+      virtual RealD M    (const Field &in, Field &out)=0;
+      virtual RealD Mdag (const Field &in, Field &out)=0;
+      virtual void  MdagM(const Field &in, Field &out,RealD &ni,RealD &no) {
+	Field tmp (in._grid);
+	ni=M(in,tmp);
+	no=Mdag(tmp,out);
+      }
+    };
+
+  /////////////////////////////////////////////////////////////////////////////////////////////
+  // Interface augmented by a red black sparse matrix, such as a Fermion action
+  /////////////////////////////////////////////////////////////////////////////////////////////
+    template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
+    public:
+      
+      // half checkerboard operaions
+      virtual  void Meooe    (const Field &in, Field &out)=0;
+      virtual  void Mooee    (const Field &in, Field &out)=0;
+      virtual  void MooeeInv (const Field &in, Field &out)=0;
+
+      virtual  void MeooeDag    (const Field &in, Field &out)=0;
+      virtual  void MooeeDag    (const Field &in, Field &out)=0;
+      virtual  void MooeeInvDag (const Field &in, Field &out)=0;
+
+      // Schur decomp operators
+      virtual  RealD Mpc      (const Field &in, Field &out) {
+	Field tmp(in._grid);
+
+	Meooe(in,tmp);
+	MooeeInv(tmp,out);
+	Meooe(out,tmp);
+
+	Mooee(in,out);
+	out=out-tmp; // axpy_norm
+	RealD n=norm2(out);
+	return n;
+      }
+      virtual  RealD MpcDag   (const Field &in, Field &out){
+	Field tmp(in._grid);
+
+	MeooeDag(in,tmp);
+	MooeeInvDag(tmp,out);
+	MeooeDag(out,tmp);
+
+	MooeeDag(in,out);
+	out=out-tmp; // axpy_norm
+	RealD n=norm2(out);
+	return n;
+      }
+      virtual void MpcDagMpc(const Field &in, Field &out,RealD ni,RealD no) {
+	Field tmp(in._grid);
+	ni=Mpc(in,tmp);
+	no=Mpc(tmp,out);
+      }
+    };
+
+}
+
+#endif
diff --git a/lib/algorithms/approx/PolynomialApprox.h b/lib/algorithms/approx/Chebyshev.h
similarity index 93%
rename from lib/algorithms/approx/PolynomialApprox.h
rename to lib/algorithms/approx/Chebyshev.h
index 5515b325..5c7741e4 100644
--- a/lib/algorithms/approx/PolynomialApprox.h
+++ b/lib/algorithms/approx/Chebyshev.h
@@ -1,5 +1,5 @@
-#ifndef GRID_POLYNOMIAL_APPROX_H
-#define GRID_POLYNOMIAL_APPROX_H
+#ifndef GRID_CHEBYSHEV_H
+#define GRID_CHEBYSHEV_H
 
 #include<Grid.h>
 #include<algorithms/LinearOperator.h>
@@ -12,7 +12,7 @@ namespace Grid {
   template<class Field>
   class Polynomial : public OperatorFunction<Field> {
   private:
-    std::vector<double> _oeffs;
+    std::vector<double> Coeffs;
   public:
     Polynomial(std::vector<double> &_Coeffs) : Coeffs(_Coeffs) {};
 
@@ -111,17 +111,13 @@ namespace Grid {
       double xscale = 2.0/(hi-lo);
       double mscale = -(hi+lo)/(hi-lo);
 
-      Field *T0=Tnm;
-      Field *T1=Tn;
-      
-
       // Tn=T1 = (xscale M + mscale)in
       Linop.Op(T0,y);
 
       T1=y*xscale+in*mscale;
 
       // sum = .5 c[0] T0 + c[1] T1
-      out = (0.5*coeffs[0])*T0 + coeffs[1]*T1;
+      out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
 
       for(int n=2;n<order;n++){
 	
@@ -131,7 +127,7 @@ namespace Grid {
 
 	*Tnp=2.0*y-(*Tnm);
 	
-	out=out+coeffs[n]* (*Tnp);
+	out=out+Coeffs[n]* (*Tnp);
 
 	// Cycle pointers to avoid copies
 	Field *swizzle = Tnm;
diff --git a/lib/algorithms/approx/LICENSE b/lib/algorithms/approx/LICENSE
new file mode 100644
index 00000000..f971c5ed
--- /dev/null
+++ b/lib/algorithms/approx/LICENSE
@@ -0,0 +1,21 @@
+
+Copyright (c) 2011 Michael Clark
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
diff --git a/lib/algorithms/approx/README b/lib/algorithms/approx/README
new file mode 100644
index 00000000..90d8b010
--- /dev/null
+++ b/lib/algorithms/approx/README
@@ -0,0 +1,80 @@
+-----------------------------------------------------------------------------------
+
+PAB. Took Mike Clark's AlgRemez from GitHub and (modified a little) include.
+This is open source and license and readme and comments are preserved consistent
+with the license. Mike, thankyou!
+-----------------------------------------------------------------------------------
+-----------------------------------------------------------------------------------
+AlgRemez
+
+The archive downloadable here contains an implementation of the Remez
+algorithm which calculates optimal rational (and polynomial)
+approximations to the nth root over a given spectral range.  The Remez
+algorithm, although in principle is extremely straightforward to
+program, is quite difficult to get completely correct, e.g., the Maple
+implementation of the algorithm does not always converge to the
+correct answer.
+
+To use this algorithm you need to install GMP, the GNU Multiple
+Precision Library, and when configuring the install, you must include
+the --enable-mpfr option (see the GMP manual for more details).  You
+also have to edit the Makefile for AlgRemez appropriately for your
+system, namely to point corrrectly to the location of the GMP library.
+
+The simple main program included with this archive invokes the
+AlgRemez class to calculate an approximation given by command line
+arguments.  It is invoked by the following
+
+./test y z n d lambda_low lambda_high precision,
+
+where the function to be approximated is f(x) = x^(y/z), with degree
+(n,d) over the spectral range [lambda_low, lambda_high], using
+precision digits of precision in the arithmetic.  So an example would
+be
+
+./test 1 2 5 5 0.0004 64 40
+
+which corresponds to constructing a rational approximation to the
+square root function, with degree (5,5) over the range [0.0004,64]
+with 40 digits of precision used for the arithmetic.  The parameters y
+and z must be positive, the approximation to f(x) = x^(-y/z) is simply
+the inverse of the approximation to f(x) = x^(y/z).  After the
+approximation has been constructed, the roots and poles of the
+rational function are found, and then the partial fraction expansion
+of both the rational function and it's inverse are found, the results
+of which are output to a file called "approx.dat".  In addition, the
+error function of the approximation is output to "error.dat", where it
+can be checked that the resultant approximation satisfies Chebychev's
+criterion, namely all error maxima are equal in magnitude, and
+adjacent maxima are oppostie in sign.  There are some caveats here
+however, the optimal polynomial approximation has complex roots, and
+the root finding implemented here cannot (yet) handle complex roots.
+In addition, the partial fraction expansion of rational approximations
+is only found for the case n = d, i.e., the degree of numerator
+polynomial equals that of the denominator polynomial.  The convention
+for the partial fraction expansion is that polar shifts are always
+written added to x, not subtracted.
+
+To do list
+
+1.  Include an exponential dampening factor in the function to be
+approximated.  This may sound trivial to implement, but for some
+parameters, the algorithm seems to breakdown.  Also, the roots in the
+rational approximation sometimes become complex, which currently
+breaks the stupidly simple root finding code.
+
+2. Make the algorithm faster - it's too slow when running on qcdoc.
+
+3. Add complex root finding.
+
+4. Add more options for error minimisation - currently the code
+minimises the relative error, should add options for absolute error,
+and other norms.
+
+There will be a forthcoming publication concerning the results
+generated by this software, but in the meantime, if you use this
+software, please cite it as
+"M.A. Clark and A.D. Kennedy, https://github.com/mikeaclark/AlgRemez, 2005".
+
+If you have any problems using the software, then please email scientist.mike@gmail.com.
+
diff --git a/lib/algorithms/approx/Remez.cc b/lib/algorithms/approx/Remez.cc
new file mode 100755
index 00000000..a787e766
--- /dev/null
+++ b/lib/algorithms/approx/Remez.cc
@@ -0,0 +1,755 @@
+/*
+
+  Mike Clark - 25th May 2005
+
+  alg_remez.C
+
+  AlgRemez is an implementation of the Remez algorithm, which in this
+  case is used for generating the optimal nth root rational
+  approximation.
+
+  Note this class requires the gnu multiprecision (GNU MP) library.
+
+*/
+
+#include<math.h>
+#include<stdio.h>
+#include<stdlib.h>
+#include<string>
+#include<iostream>
+#include<iomanip>
+#include<cassert>
+
+#include<algorithms/approx/Remez.h>
+
+// Constructor
+AlgRemez::AlgRemez(double lower, double upper, long precision) 
+{
+  prec = precision;
+  bigfloat::setDefaultPrecision(prec);
+
+  apstrt = lower;
+  apend = upper;
+  apwidt = apend - apstrt;
+
+  std::cout<<"Approximation bounds are ["<<apstrt<<","<<apend<<"]\n";
+  std::cout<<"Precision of arithmetic is "<<precision<<std::endl;
+
+  alloc = 0;
+  n = 0;
+  d = 0;
+
+  foundRoots = 0;
+
+  // Only require the approximation spread to be less than 1 ulp
+  tolerance = 1e-15;
+}
+
+// Destructor
+AlgRemez::~AlgRemez()
+{
+  if (alloc) {
+    delete [] param;
+    delete [] roots;
+    delete [] poles;
+    delete [] xx;
+    delete [] mm;
+    delete [] a_power;
+    delete [] a;
+  }
+}
+
+// Free memory and reallocate as necessary
+void AlgRemez::allocate(int num_degree, int den_degree)
+{
+  // Arrays have previously been allocated, deallocate first, then allocate
+  if (alloc) {
+    delete [] param;
+    delete [] roots;
+    delete [] poles;
+    delete [] xx;
+    delete [] mm;
+  }
+
+  // Note use of new and delete in memory allocation - cannot run on qcdsp
+  param = new bigfloat[num_degree+den_degree+1];
+  roots = new bigfloat[num_degree];
+  poles = new bigfloat[den_degree];
+  xx = new bigfloat[num_degree+den_degree+3];
+  mm = new bigfloat[num_degree+den_degree+2];
+
+  if (!alloc) {
+    // The coefficients of the sum in the exponential
+    a = new bigfloat[SUM_MAX];
+    a_power = new int[SUM_MAX];
+  }
+
+  alloc = 1;
+}
+
+// Reset the bounds of the approximation
+void AlgRemez::setBounds(double lower, double upper)
+{
+  apstrt = lower;
+  apend = upper;
+  apwidt = apend - apstrt;
+}
+
+// Generate the rational approximation x^(pnum/pden)
+double AlgRemez::generateApprox(int degree, unsigned long pnum, 
+				unsigned long pden)
+{
+  return generateApprox(degree, degree, pnum, pden);
+}
+
+double AlgRemez::generateApprox(int num_degree, int den_degree, 
+				unsigned long pnum, unsigned long pden)
+{
+  double *a_param = 0;
+  int *a_pow = 0;
+  return generateApprox(num_degree, den_degree, pnum, pden, 0, a_param, a_pow);
+}
+
+// Generate the rational approximation x^(pnum/pden)
+double AlgRemez::generateApprox(int num_degree, int den_degree, 
+				unsigned long pnum, unsigned long pden,
+				int a_len, double *a_param, int *a_pow)
+{
+  std::cout<<"Degree of the approximation is ("<<num_degree<<","<<den_degree<<")\n";
+  std::cout<<"Approximating the function x^("<<pnum<<"/"<<pden<<"\n";
+
+  // Reallocate arrays, since degree has changed
+  if (num_degree != n || den_degree != d) allocate(num_degree,den_degree);
+
+  assert(a_len<=SUM_MAX);
+
+  step = new bigfloat[num_degree+den_degree+2];
+
+  a_length = a_len;
+  for (int j=0; j<a_len; j++) {
+    a[j]= a_param[j];
+    a_power[j] = a_pow[j];
+  }
+
+  power_num = pnum;
+  power_den = pden;
+  spread = 1.0e37;
+  iter = 0;
+
+  n = num_degree;
+  d = den_degree;
+  neq = n + d + 1;
+
+  initialGuess();
+  stpini(step);
+
+  while (spread > tolerance) { //iterate until convergance
+
+    if (iter++%100==0) 
+      std::cout<<"Iteration " <<iter-1<<" spread "<<(double)spread<<" delta "<<(double)delta<<std::endl; 
+
+    equations();
+    assert( delta>= tolerance);
+
+    search(step);
+  }
+
+  int sign;
+  double error = (double)getErr(mm[0],&sign);
+  std::cout<<"Converged at "<<iter<<" iterations; error = "<<error<<std::endl;
+
+  // Once the approximation has been generated, calculate the roots
+  if(!root()) {
+    std::cout<<"Root finding failed\n";
+  } else {
+    foundRoots = 1;
+  }
+  
+  delete [] step;
+
+  // Return the maximum error in the approximation
+  return error;
+}
+
+// Return the partial fraction expansion of the approximation x^(pnum/pden)
+int AlgRemez::getPFE(double *Res, double *Pole, double *Norm) {
+
+  if (n!=d) {
+    std::cout<<"Cannot handle case: Numerator degree neq Denominator degree\n";
+    return 0;
+  }
+
+  if (!alloc) {
+    std::cout<<"Approximation not yet generated\n";
+    return 0;
+  }
+
+  if (!foundRoots) {
+    std::cout<<"Roots not found, so PFE cannot be taken\n";
+    return 0;
+  }
+
+  bigfloat *r = new bigfloat[n];
+  bigfloat *p = new bigfloat[d];
+  
+  for (int i=0; i<n; i++) r[i] = roots[i];
+  for (int i=0; i<d; i++) p[i] = poles[i];
+  
+  // Perform a partial fraction expansion
+  pfe(r, p, norm);
+
+  // Convert to double and return
+  *Norm = (double)norm;
+  for (int i=0; i<n; i++) Res[i] = (double)r[i];
+  for (int i=0; i<d; i++) Pole[i] = (double)p[i];
+
+  delete [] r;
+  delete [] p;
+
+  // Where the smallest shift is located
+  return 0;
+}
+
+// Return the partial fraction expansion of the approximation x^(-pnum/pden)
+int AlgRemez::getIPFE(double *Res, double *Pole, double *Norm) {
+
+  if (n!=d) {
+    std::cout<<"Cannot handle case: Numerator degree neq Denominator degree\n";
+    return 0;
+  }
+
+  if (!alloc) {
+    std::cout<<"Approximation not yet generated\n";
+    return 0;
+  }
+
+  if (!foundRoots) {
+    std::cout<<"Roots not found, so PFE cannot be taken\n";
+    return 0;
+  }
+
+  bigfloat *r = new bigfloat[d];
+  bigfloat *p = new bigfloat[n];
+  
+  // Want the inverse function
+  for (int i=0; i<n; i++) {
+    r[i] = poles[i];
+    p[i] = roots[i];
+  }
+
+  // Perform a partial fraction expansion
+  pfe(r, p, (bigfloat)1l/norm);
+
+  // Convert to double and return
+  *Norm = (double)((bigfloat)1l/(norm));
+  for (int i=0; i<n; i++) {
+    Res[i] = (double)r[i];
+    Pole[i] = (double)p[i];
+  }
+
+  delete [] r;
+  delete [] p;
+
+  // Where the smallest shift is located
+  return 0;
+}
+
+// Initial values of maximal and minimal errors
+void AlgRemez::initialGuess() {
+
+  // Supply initial guesses for solution points
+  long ncheb = neq;			// Degree of Chebyshev error estimate
+  bigfloat a, r;
+
+  // Find ncheb+1 extrema of Chebyshev polynomial
+
+  a = ncheb;
+  mm[0] = apstrt;
+  for (long i = 1; i < ncheb; i++) {
+    r = 0.5 * (1 - cos((M_PI * i)/(double) a));
+    //r *= sqrt_bf(r);
+    r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
+    mm[i] = apstrt + r * apwidt;
+  }
+  mm[ncheb] = apend;
+
+  a = 2.0 * ncheb;
+  for (long i = 0; i <= ncheb; i++) {
+    r = 0.5 * (1 - cos(M_PI * (2*i+1)/(double) a));
+    //r *= sqrt_bf(r); // Squeeze to low end of interval
+    r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
+    xx[i] = apstrt + r * apwidt;
+  }
+}
+
+// Initialise step sizes
+void AlgRemez::stpini(bigfloat *step) {
+  xx[neq+1] = apend;
+  delta = 0.25;
+  step[0] = xx[0] - apstrt;
+  for (int i = 1; i < neq; i++) step[i] = xx[i] - xx[i-1];
+  step[neq] = step[neq-1];
+}
+
+// Search for error maxima and minima
+void AlgRemez::search(bigfloat *step) {
+  bigfloat a, q, xm, ym, xn, yn, xx0, xx1;
+  int i, j, meq, emsign, ensign, steps;
+
+  meq = neq + 1;
+  bigfloat *yy = new bigfloat[meq];
+
+  bigfloat eclose = 1.0e30;
+  bigfloat farther = 0l;
+
+  j = 1;
+  xx0 = apstrt;
+
+  for (i = 0; i < meq; i++) {
+    steps = 0;
+    xx1 = xx[i]; // Next zero
+    if (i == meq-1) xx1 = apend;
+    xm = mm[i];
+    ym = getErr(xm,&emsign);
+    q = step[i];
+    xn = xm + q;
+    if (xn < xx0 || xn >= xx1) {	// Cannot skip over adjacent boundaries
+      q = -q;
+      xn = xm;
+      yn = ym;
+      ensign = emsign;
+    } else {
+      yn = getErr(xn,&ensign);
+      if (yn < ym) {
+	q = -q;
+	xn = xm;
+	yn = ym;
+	ensign = emsign;
+      }
+    }
+  
+    while(yn >= ym) {		// March until error becomes smaller.
+      if (++steps > 10) break;
+      ym = yn;
+      xm = xn;
+      emsign = ensign;
+      a = xm + q;
+      if (a == xm || a <= xx0 || a >= xx1) break;// Must not skip over the zeros either side.
+      xn = a;
+      yn = getErr(xn,&ensign);
+    }
+
+    mm[i] = xm;			// Position of maximum
+    yy[i] = ym;			// Value of maximum
+
+    if (eclose > ym) eclose = ym;
+    if (farther < ym) farther = ym;
+
+    xx0 = xx1; // Walk to next zero.
+  } // end of search loop
+
+  q = (farther - eclose);	// Decrease step size if error spread increased
+  if (eclose != 0.0) q /= eclose; // Relative error spread
+  if (q >= spread) delta *= 0.5; // Spread is increasing; decrease step size
+  spread = q;
+
+  for (i = 0; i < neq; i++) {
+    q = yy[i+1];
+    if (q != 0.0) q = yy[i] / q  - (bigfloat)1l;
+    else q = 0.0625;
+    if (q > (bigfloat)0.25) q = 0.25;
+    q *= mm[i+1] - mm[i];
+    step[i] = q * delta;
+  }
+  step[neq] = step[neq-1];
+  
+  for (i = 0; i < neq; i++) {	// Insert new locations for the zeros.
+    xm = xx[i] - step[i];
+    if (xm <= apstrt) continue;
+    if (xm >= apend) continue;
+    if (xm <= mm[i]) xm = (bigfloat)0.5 * (mm[i] + xx[i]);
+    if (xm >= mm[i+1]) xm = (bigfloat)0.5 * (mm[i+1] + xx[i]);
+    xx[i] = xm;
+  }
+
+  delete [] yy;
+}
+
+// Solve the equations
+void AlgRemez::equations(void) {
+  bigfloat x, y, z;
+  int i, j, ip;
+  bigfloat *aa;
+
+  bigfloat *AA = new bigfloat[(neq)*(neq)];
+  bigfloat *BB = new bigfloat[neq];
+  
+  for (i = 0; i < neq; i++) {	// set up the equations for solution by simq()
+    ip = neq * i;		// offset to 1st element of this row of matrix
+    x = xx[i];			// the guess for this row
+    y = func(x);		// right-hand-side vector
+
+    z = (bigfloat)1l;
+    aa = AA+ip;
+    for (j = 0; j <= n; j++) {
+      *aa++ = z;
+      z *= x;
+    }
+
+    z = (bigfloat)1l;
+    for (j = 0; j < d; j++) {
+      *aa++ = -y * z;
+      z *= x;
+    }
+    BB[i] = y * z;		// Right hand side vector
+  }
+
+  // Solve the simultaneous linear equations.
+  if (simq(AA, BB, param, neq)) {
+    std::cout<<"simq failed\n";
+    exit(0);
+  }
+
+  delete [] AA;
+  delete [] BB;
+
+}
+
+// Evaluate the rational form P(x)/Q(x) using coefficients
+// from the solution vector param
+bigfloat AlgRemez::approx(const bigfloat x) {
+  bigfloat yn, yd;
+  int i;
+
+  // Work backwards toward the constant term.
+  yn = param[n];		// Highest order numerator coefficient
+  for (i = n-1; i >= 0; i--) yn = x * yn  +  param[i]; 
+  yd = x + param[n+d];	// Highest degree coefficient = 1.0
+  for (i = n+d-1; i > n; i--) yd = x * yd  +  param[i];
+
+  return(yn/yd);
+}
+
+// Compute size and sign of the approximation error at x
+bigfloat AlgRemez::getErr(bigfloat x, int *sign) {
+  bigfloat e, f;
+
+  f = func(x);
+  e = approx(x) - f;
+  if (f != 0) e /= f;
+  if (e < (bigfloat)0.0) {
+    *sign = -1;
+    e = -e;
+  }
+  else *sign = 1;
+  
+  return(e);
+}
+
+// Calculate function required for the approximation.
+bigfloat AlgRemez::func(const bigfloat x) {
+
+  bigfloat z = (bigfloat)power_num / (bigfloat)power_den;
+  bigfloat y;
+
+  if (x == (bigfloat)1.0) y = (bigfloat)1.0;
+  else y = pow_bf(x,z);
+
+  if (a_length > 0) {
+    bigfloat sum = 0l;
+    for (int j=0; j<a_length; j++) sum += a[j]*pow_bf(x,a_power[j]);
+    return y * exp_bf(sum);
+  } else {
+    return y;
+  }
+
+}
+
+// Solve the system AX=B
+int AlgRemez::simq(bigfloat A[], bigfloat B[], bigfloat X[], int n) {
+
+  int i, j, ij, ip, ipj, ipk, ipn;
+  int idxpiv, iback;
+  int k, kp, kp1, kpk, kpn;
+  int nip, nkp, nm1;
+  bigfloat em, q, rownrm, big, size, pivot, sum;
+  bigfloat *aa;
+
+  // simq() work vector
+  int *IPS = new int[(neq) * sizeof(int)];
+
+  nm1 = n - 1;
+  // Initialize IPS and X
+  
+  ij = 0;
+  for (i = 0; i < n; i++) {
+    IPS[i] = i;
+    rownrm = 0.0;
+    for(j = 0; j < n; j++) {
+      q = abs_bf(A[ij]);
+      if(rownrm < q) rownrm = q;
+      ++ij;
+    }
+    if (rownrm == (bigfloat)0l) {
+      std::cout<<"simq rownrm=0\n";
+      delete [] IPS;
+      return(1);
+    }
+    X[i] = (bigfloat)1.0 / rownrm;
+  }
+  
+  for (k = 0; k < nm1; k++) {
+    big = 0.0;
+    idxpiv = 0;
+    
+    for (i = k; i < n; i++) {
+      ip = IPS[i];
+      ipk = n*ip + k;
+      size = abs_bf(A[ipk]) * X[ip];
+      if (size > big) {
+	big = size;
+	idxpiv = i;
+      }
+    }
+    
+    if (big == (bigfloat)0l) {
+      std::cout<<"simq big=0\n";
+      delete [] IPS;
+      return(2);
+    }
+    if (idxpiv != k) {
+      j = IPS[k];
+      IPS[k] = IPS[idxpiv];
+      IPS[idxpiv] = j;
+    }
+    kp = IPS[k];
+    kpk = n*kp + k;
+    pivot = A[kpk];
+    kp1 = k+1;
+    for (i = kp1; i < n; i++) {
+      ip = IPS[i];
+      ipk = n*ip + k;
+      em = -A[ipk] / pivot;
+      A[ipk] = -em;
+      nip = n*ip;
+      nkp = n*kp;
+      aa = A+nkp+kp1;
+      for (j = kp1; j < n; j++) {
+	ipj = nip + j;
+	A[ipj] = A[ipj] + em * *aa++;
+      }
+    }
+  }
+  kpn = n * IPS[n-1] + n - 1;	// last element of IPS[n] th row
+  if (A[kpn] == (bigfloat)0l) {
+    std::cout<<"simq A[kpn]=0\n";
+    delete [] IPS;
+    return(3);
+  }
+
+  
+  ip = IPS[0];
+  X[0] = B[ip];
+  for (i = 1; i < n; i++) {
+    ip = IPS[i];
+    ipj = n * ip;
+    sum = 0.0;
+    for (j = 0; j < i; j++) {
+      sum += A[ipj] * X[j];
+      ++ipj;
+    }
+    X[i] = B[ip] - sum;
+  }
+  
+  ipn = n * IPS[n-1] + n - 1;
+  X[n-1] = X[n-1] / A[ipn];
+  
+  for (iback = 1; iback < n; iback++) {
+    //i goes (n-1),...,1
+    i = nm1 - iback;
+    ip = IPS[i];
+    nip = n*ip;
+    sum = 0.0;
+    aa = A+nip+i+1;
+    for (j= i + 1; j < n; j++) 
+      sum += *aa++ * X[j];
+    X[i] = (X[i] - sum) / A[nip+i];
+  }
+  
+  delete [] IPS;
+  return(0);
+}
+
+// Calculate the roots of the approximation
+int AlgRemez::root() {
+
+  long i,j;
+  bigfloat x,dx=0.05;
+  bigfloat upper=1, lower=-100000;
+  bigfloat tol = 1e-20;
+
+  bigfloat *poly = new bigfloat[neq+1];
+
+  // First find the numerator roots
+  for (i=0; i<=n; i++) poly[i] = param[i];
+
+  for (i=n-1; i>=0; i--) {
+    roots[i] = rtnewt(poly,i+1,lower,upper,tol);
+    if (roots[i] == 0.0) {
+      std::cout<<"Failure to converge on root "<<i+1<<"/"<<n<<"\n";
+      return 0;
+    }
+    poly[0] = -poly[0]/roots[i];
+    for (j=1; j<=i; j++) poly[j] = (poly[j-1] - poly[j])/roots[i];
+  }
+  
+ // Now find the denominator roots
+  poly[d] = 1l;
+  for (i=0; i<d; i++) poly[i] = param[n+1+i];
+
+  for (i=d-1; i>=0; i--) {
+    poles[i]=rtnewt(poly,i+1,lower,upper,tol);
+    if (poles[i] == 0.0) {
+      std::cout<<"Failure to converge on pole "<<i+1<<"/"<<d<<"\n";
+      return 0;
+    }
+    poly[0] = -poly[0]/poles[i];
+    for (j=1; j<=i; j++) poly[j] = (poly[j-1] - poly[j])/poles[i];
+  }
+
+  norm = param[n];
+
+  delete [] poly;
+
+  return 1;
+}
+
+// Evaluate the polynomial
+bigfloat AlgRemez::polyEval(bigfloat x, bigfloat *poly, long size) {
+  bigfloat f = poly[size];
+  for (int i=size-1; i>=0; i--) f = f*x + poly[i];
+  return f;
+}
+
+// Evaluate the differential of the polynomial
+bigfloat AlgRemez::polyDiff(bigfloat x, bigfloat *poly, long size) {
+  bigfloat df = (bigfloat)size*poly[size];
+  for (int i=size-1; i>0; i--) df = df*x + (bigfloat)i*poly[i];
+  return df;
+}
+
+
+// Newton's method to calculate roots
+bigfloat AlgRemez::rtnewt(bigfloat *poly, long i, bigfloat x1, 
+			  bigfloat x2, bigfloat xacc) {
+  int j;
+  bigfloat df, dx, f, rtn;
+
+  rtn=(bigfloat)0.5*(x1+x2);
+  for (j=1; j<=JMAX;j++) {
+    f = polyEval(rtn, poly, i);
+    df = polyDiff(rtn, poly, i);
+    dx = f/df;
+    rtn -= dx;
+    if (abs_bf(dx) < xacc) return rtn;
+  }
+  std::cout<<"Maximum number of iterations exceeded in rtnewt\n";
+  return 0.0;
+}
+
+// Evaluate the partial fraction expansion of the rational function
+// with res roots and poles poles.  Result is overwritten on input
+// arrays.
+void AlgRemez::pfe(bigfloat *res, bigfloat *poles, bigfloat norm) {
+  int i,j,small;
+  bigfloat temp;
+  bigfloat *numerator = new bigfloat[n];
+  bigfloat *denominator = new bigfloat[d];
+
+  // Construct the polynomials explicitly 
+  for (i=1; i<n; i++) {
+    numerator[i] = 0l;
+    denominator[i] = 0l;
+  }
+  numerator[0]=1l;
+  denominator[0]=1l;
+
+  for (j=0; j<n; j++) {
+    for (i=n-1; i>=0; i--) {
+      numerator[i] *= -res[j];
+      denominator[i] *= -poles[j];
+      if (i>0) {
+	numerator[i] += numerator[i-1];
+	denominator[i] += denominator[i-1];
+      }
+    }
+  }
+
+  // Convert to proper fraction form.
+  // Fraction is now in the form 1 + n/d, where O(n)+1=O(d)
+  for (i=0; i<n; i++) numerator[i] -= denominator[i];
+
+  // Find the residues of the partial fraction expansion and absorb the
+  // coefficients.
+  for (i=0; i<n; i++) {
+    res[i] = 0l;
+    for (j=n-1; j>=0; j--) {
+      res[i] = poles[i]*res[i]+numerator[j];
+    }
+    for (j=n-1; j>=0; j--) {
+      if (i!=j) res[i] /= poles[i]-poles[j];
+    }
+    res[i] *= norm;
+  }  
+
+  // res now holds the residues
+  j = 0;
+  for (i=0; i<n; i++) poles[i] = -poles[i];
+
+  // Move the ordering of the poles from smallest to largest
+  for (j=0; j<n; j++) {
+    small = j;
+    for (i=j+1; i<n; i++) {
+      if (poles[i] < poles[small]) small = i;
+    }
+    if (small != j) {
+      temp = poles[small];
+      poles[small] = poles[j];
+      poles[j] = temp;
+      temp = res[small];
+      res[small] = res[j];
+      res[j] = temp;
+    }
+  }
+
+  delete [] numerator;
+  delete [] denominator;
+}
+
+double AlgRemez::evaluateApprox(double x) {
+  return (double)approx((bigfloat)x);
+}
+
+double AlgRemez::evaluateInverseApprox(double x) {
+  return 1.0/(double)approx((bigfloat)x);
+}
+
+double AlgRemez::evaluateFunc(double x) {
+  return (double)func((bigfloat)x);
+}
+
+double AlgRemez::evaluateInverseFunc(double x) {
+  return 1.0/(double)func((bigfloat)x);
+}
+
+void AlgRemez::csv(std::ostream & os)
+{
+  double lambda_low = apstrt;
+  double lambda_high= apend;
+  for (double x=lambda_low; x<lambda_high; x*=1.05) {
+    double f = evaluateFunc(x);
+    double r = evaluateApprox(x);
+    os<< x<<","<<r<<","<<f<<","<<r-f<<std::endl;
+  }
+  return;
+}
diff --git a/lib/algorithms/approx/Remez.h b/lib/algorithms/approx/Remez.h
new file mode 100755
index 00000000..906811be
--- /dev/null
+++ b/lib/algorithms/approx/Remez.h
@@ -0,0 +1,167 @@
+/*
+
+  Mike Clark - 25th May 2005
+
+  alg_remez.h
+
+  AlgRemez is an implementation of the Remez algorithm, which in this
+  case is used for generating the optimal nth root rational
+  approximation.
+
+  Note this class requires the gnu multiprecision (GNU MP) library.
+
+*/
+
+#ifndef INCLUDED_ALG_REMEZ_H
+#define INCLUDED_ALG_REMEZ_H
+
+#include <algorithms/approx/bigfloat.h>
+
+#define JMAX 10000 //Maximum number of iterations of Newton's approximation
+#define SUM_MAX 10 // Maximum number of terms in exponential
+
+/*
+ *Usage examples
+  AlgRemez remez(lambda_low,lambda_high,precision);
+  error = remez.generateApprox(n,d,y,z);
+  remez.getPFE(res,pole,&norm);
+  remez.getIPFE(res,pole,&norm);
+  remez.csv(ostream &os);
+ */
+class AlgRemez
+{
+ private:
+  char *cname;
+
+  // The approximation parameters
+  bigfloat *param, *roots, *poles;
+  bigfloat norm;
+
+  // The numerator and denominator degree (n=d)
+  int n, d;
+  
+  // The bounds of the approximation
+  bigfloat apstrt, apwidt, apend;
+
+  // the numerator and denominator of the power we are approximating
+  unsigned long power_num; 
+  unsigned long power_den;
+
+  // Flag to determine whether the arrays have been allocated
+  int alloc;
+
+  // Flag to determine whether the roots have been found
+  int foundRoots;
+
+  // Variables used to calculate the approximation
+  int nd1, iter;
+  bigfloat *xx, *mm, *step;
+  bigfloat delta, spread, tolerance;
+
+  // The exponential summation coefficients
+  bigfloat *a;
+  int *a_power;
+  int a_length;
+
+  // The number of equations we must solve at each iteration (n+d+1)
+  int neq;
+
+  // The precision of the GNU MP library
+  long prec;
+
+  // Initial values of maximal and minmal errors
+  void initialGuess();
+
+  // Solve the equations
+  void equations();
+
+  // Search for error maxima and minima
+  void search(bigfloat *step); 
+
+  // Initialise step sizes
+  void stpini(bigfloat *step);
+
+  // Calculate the roots of the approximation
+  int root();
+
+  // Evaluate the polynomial
+  bigfloat polyEval(bigfloat x, bigfloat *poly, long size);
+  //complex_bf polyEval(complex_bf x, complex_bf *poly, long size);
+
+  // Evaluate the differential of the polynomial
+  bigfloat polyDiff(bigfloat x, bigfloat *poly, long size);
+  //complex_bf polyDiff(complex_bf x, complex_bf *poly, long size);
+
+  // Newton's method to calculate roots
+  bigfloat rtnewt(bigfloat *poly, long i, bigfloat x1, bigfloat x2, bigfloat xacc);
+  //complex_bf rtnewt(complex_bf *poly, long i, bigfloat x1, bigfloat x2, bigfloat xacc);
+
+  // Evaluate the partial fraction expansion of the rational function
+  // with res roots and poles poles.  Result is overwritten on input
+  // arrays.
+  void pfe(bigfloat *res, bigfloat* poles, bigfloat norm);
+
+  // Calculate function required for the approximation
+  bigfloat func(bigfloat x);
+
+  // Compute size and sign of the approximation error at x
+  bigfloat getErr(bigfloat x, int *sign);
+
+  // Solve the system AX=B
+  int simq(bigfloat *A, bigfloat *B, bigfloat *X, int n);
+
+  // Free memory and reallocate as necessary
+  void allocate(int num_degree, int den_degree);
+
+  // Evaluate the rational form P(x)/Q(x) using coefficients from the
+  // solution vector param
+  bigfloat approx(bigfloat x);
+
+ public:
+  
+  // Constructor
+  AlgRemez(double lower, double upper, long prec);
+
+  // Destructor
+  virtual ~AlgRemez();
+
+  // Reset the bounds of the approximation
+  void setBounds(double lower, double upper);
+
+  // Generate the rational approximation x^(pnum/pden)
+  double generateApprox(int num_degree, int den_degree, 
+			unsigned long power_num, unsigned long power_den, 
+			int a_len, double* a_param, int* a_pow);
+  double generateApprox(int num_degree, int den_degree, 
+			unsigned long power_num, unsigned long power_den);
+  double generateApprox(int degree, unsigned long power_num, 
+			unsigned long power_den);
+
+  // Return the partial fraction expansion of the approximation x^(pnum/pden)
+  int getPFE(double *res, double *pole, double *norm);
+
+  // Return the partial fraction expansion of the approximation x^(-pnum/pden)
+  int getIPFE(double *res, double *pole, double *norm);
+
+  // Evaluate the rational form P(x)/Q(x) using coefficients from the
+  // solution vector param
+  double evaluateApprox(double x);
+
+  // Evaluate the rational form Q(x)/P(x) using coefficients from the
+  // solution vector param
+  double evaluateInverseApprox(double x);
+
+  // Calculate function required for the approximation
+  double evaluateFunc(double x);
+
+  // Calculate inverse function required for the approximation
+  double evaluateInverseFunc(double x);
+  
+  // Dump csv of function, approx and error
+  void csv(std::ostream &os);
+};
+
+#endif  // Include guard
+
+
+
diff --git a/lib/algorithms/approx/zolotarev.c b/lib/algorithms/approx/Zolotarev.cc
similarity index 100%
rename from lib/algorithms/approx/zolotarev.c
rename to lib/algorithms/approx/Zolotarev.cc
diff --git a/lib/algorithms/approx/bigfloat.h b/lib/algorithms/approx/bigfloat.h
new file mode 100755
index 00000000..7f59ea33
--- /dev/null
+++ b/lib/algorithms/approx/bigfloat.h
@@ -0,0 +1,368 @@
+/*
+  Mike Clark - 25th May 2005
+
+  bigfloat.h
+
+  Simple C++ wrapper for multiprecision datatype used by AlgRemez
+  algorithm
+*/
+
+#ifndef INCLUDED_BIGFLOAT_H
+#define INCLUDED_BIGFLOAT_H
+
+#ifdef HAVE_GMP
+#include <gmp.h>
+#include <mpf2mpfr.h>
+#include <mpfr.h>
+class bigfloat {
+
+private:
+
+  mpf_t x;
+
+public:
+
+  bigfloat() { mpf_init(x); }
+  bigfloat(const bigfloat& y) { mpf_init_set(x, y.x); }
+  bigfloat(const unsigned long u) { mpf_init_set_ui(x, u); }
+  bigfloat(const long i) { mpf_init_set_si(x, i); }
+  bigfloat(const int i) {mpf_init_set_si(x,(long)i);}
+  bigfloat(const float d) { mpf_init_set_d(x, (double)d); }
+  bigfloat(const double d) { mpf_init_set_d(x, d); }  
+  bigfloat(const char *str) { mpf_init_set_str(x, (char*)str, 10); }
+  ~bigfloat(void) { mpf_clear(x); }
+  operator const double (void) const { return (double)mpf_get_d(x); }
+  static void setDefaultPrecision(unsigned long dprec) {
+    unsigned long bprec =  (unsigned long)(3.321928094 * (double)dprec);
+    mpf_set_default_prec(bprec);
+  }
+
+  void setPrecision(unsigned long dprec) {
+    unsigned long bprec =  (unsigned long)(3.321928094 * (double)dprec);
+    mpf_set_prec(x,bprec);
+  }
+  
+  unsigned long getPrecision(void) const { return mpf_get_prec(x); }
+
+  unsigned long getDefaultPrecision(void) const { return mpf_get_default_prec(); }
+
+  bigfloat& operator=(const bigfloat& y) {
+    mpf_set(x, y.x); 
+    return *this;
+  }
+
+  bigfloat& operator=(const unsigned long y) { 
+    mpf_set_ui(x, y);
+    return *this; 
+  }
+  
+  bigfloat& operator=(const signed long y) {
+    mpf_set_si(x, y); 
+    return *this;
+  }
+  
+  bigfloat& operator=(const float y) {
+    mpf_set_d(x, (double)y); 
+    return *this;
+  }
+
+  bigfloat& operator=(const double y) {
+    mpf_set_d(x, y); 
+    return *this;
+  }
+
+  size_t write(void);
+  size_t read(void);
+
+  /* Arithmetic Functions */
+
+  bigfloat& operator+=(const bigfloat& y) { return *this = *this + y; }
+  bigfloat& operator-=(const bigfloat& y) { return *this = *this - y; }
+  bigfloat& operator*=(const bigfloat& y) { return *this = *this * y; }
+  bigfloat& operator/=(const bigfloat& y) { return *this = *this / y; }
+
+  friend bigfloat operator+(const bigfloat& x, const bigfloat& y) {
+    bigfloat a;
+    mpf_add(a.x,x.x,y.x);
+    return a;
+  }
+
+  friend bigfloat operator+(const bigfloat& x, const unsigned long y) {
+    bigfloat a;
+    mpf_add_ui(a.x,x.x,y);
+    return a;
+  }
+
+  friend bigfloat operator-(const bigfloat& x, const bigfloat& y) {
+    bigfloat a;
+    mpf_sub(a.x,x.x,y.x);
+    return a;
+  }
+  
+  friend bigfloat operator-(const unsigned long x, const bigfloat& y) {
+    bigfloat a;
+    mpf_ui_sub(a.x,x,y.x);
+    return a;
+  }
+  
+  friend bigfloat operator-(const bigfloat& x, const unsigned long y) {
+    bigfloat a;
+    mpf_sub_ui(a.x,x.x,y);
+    return a;
+  }
+
+  friend bigfloat operator-(const bigfloat& x) {
+    bigfloat a;
+    mpf_neg(a.x,x.x);
+    return a;
+  }
+
+  friend bigfloat operator*(const bigfloat& x, const bigfloat& y) {
+    bigfloat a;
+    mpf_mul(a.x,x.x,y.x);
+    return a;
+  }
+
+  friend bigfloat operator*(const bigfloat& x, const unsigned long y) {
+    bigfloat a;
+    mpf_mul_ui(a.x,x.x,y);
+    return a;
+  }
+
+  friend bigfloat operator/(const bigfloat& x, const bigfloat& y){
+    bigfloat a;
+    mpf_div(a.x,x.x,y.x);
+    return a;
+  }
+
+  friend bigfloat operator/(const unsigned long x, const bigfloat& y){
+    bigfloat a;
+    mpf_ui_div(a.x,x,y.x);
+    return a;
+  }
+
+  friend bigfloat operator/(const bigfloat& x, const unsigned long y){
+    bigfloat a;
+    mpf_div_ui(a.x,x.x,y);
+    return a;
+  }
+
+  friend bigfloat sqrt_bf(const bigfloat& x){
+    bigfloat a;
+    mpf_sqrt(a.x,x.x);
+    return a;
+  }
+
+  friend bigfloat sqrt_bf(const unsigned long x){
+    bigfloat a;
+    mpf_sqrt_ui(a.x,x);
+    return a;
+  }
+
+  friend bigfloat abs_bf(const bigfloat& x){
+    bigfloat a;
+    mpf_abs(a.x,x.x);
+    return a;
+  }
+
+  friend bigfloat pow_bf(const bigfloat& a, long power) {
+    bigfloat b;
+    mpf_pow_ui(b.x,a.x,power);
+    return b;
+  }
+
+  friend bigfloat pow_bf(const bigfloat& a, bigfloat &power) {
+    bigfloat b;
+    mpfr_pow(b.x,a.x,power.x,GMP_RNDN);
+    return b;
+  }
+
+  friend bigfloat exp_bf(const bigfloat& a) {
+    bigfloat b;
+    mpfr_exp(b.x,a.x,GMP_RNDN);
+    return b;
+  }
+
+  /* Comparison Functions */
+
+  friend int operator>(const bigfloat& x, const bigfloat& y) {
+    int test;
+    test = mpf_cmp(x.x,y.x);
+    if (test > 0) return 1;
+    else return 0;
+  }
+
+  friend int operator<(const bigfloat& x, const bigfloat& y) {
+    int test;
+    test = mpf_cmp(x.x,y.x);
+    if (test < 0) return 1;
+    else return 0;
+  }
+
+  friend int sgn(const bigfloat&);
+
+};
+#else
+
+typedef double mfloat; 
+class bigfloat {
+private:
+
+  mfloat x;
+
+public:
+
+  bigfloat() { }
+  bigfloat(const bigfloat& y) { x=y.x; }
+  bigfloat(const unsigned long u) { x=u; }
+  bigfloat(const long i) { x=i; }
+  bigfloat(const int i) { x=i;}
+  bigfloat(const float d) { x=d;}
+  bigfloat(const double d) {  x=d;}
+  bigfloat(const char *str) { x=std::stod(std::string(str));}
+  ~bigfloat(void) { }
+  operator double (void) const { return (double)x; }
+  static void setDefaultPrecision(unsigned long dprec) {
+  }
+
+  void setPrecision(unsigned long dprec) {
+  }
+  
+  unsigned long getPrecision(void) const { return 64; }
+  unsigned long getDefaultPrecision(void) const { return 64; }
+
+  bigfloat& operator=(const bigfloat& y)     { x=y.x;    return *this;  }
+  bigfloat& operator=(const unsigned long y) { x=y; return *this; }
+  bigfloat& operator=(const signed long y)   { x=y; return *this; }
+  bigfloat& operator=(const float y)    { x=y; return *this; }
+  bigfloat& operator=(const double y)   { x=y; return *this; }
+
+  size_t write(void);
+  size_t read(void);
+
+  /* Arithmetic Functions */
+
+  bigfloat& operator+=(const bigfloat& y) { return *this = *this + y; }
+  bigfloat& operator-=(const bigfloat& y) { return *this = *this - y; }
+  bigfloat& operator*=(const bigfloat& y) { return *this = *this * y; }
+  bigfloat& operator/=(const bigfloat& y) { return *this = *this / y; }
+
+  friend bigfloat operator+(const bigfloat& x, const bigfloat& y) { 
+    bigfloat a;
+    a.x=x.x+y.x;
+    return a;
+  }
+
+  friend bigfloat operator+(const bigfloat& x, const unsigned long y) {
+    bigfloat a;
+    a.x=x.x+y;
+    return a;
+  }
+
+  friend bigfloat operator-(const bigfloat& x, const bigfloat& y) {
+    bigfloat a;
+    a.x=x.x-y.x;
+    return a;
+  }
+  
+  friend bigfloat operator-(const unsigned long x, const bigfloat& y) {
+    bigfloat bx(x);
+    return bx-y;
+  }
+  
+  friend bigfloat operator-(const bigfloat& x, const unsigned long y) {
+    bigfloat by(y);
+    return x-by;
+  }
+
+  friend bigfloat operator-(const bigfloat& x) {
+    bigfloat a;
+    a.x=-x.x;
+    return a;
+  }
+
+  friend bigfloat operator*(const bigfloat& x, const bigfloat& y) {
+    bigfloat a;
+    a.x=x.x*y.x;
+    return a;
+  }
+
+  friend bigfloat operator*(const bigfloat& x, const unsigned long y) {
+    bigfloat a;
+    a.x=x.x*y;
+    return a;
+  }
+
+  friend bigfloat operator/(const bigfloat& x, const bigfloat& y){
+    bigfloat a;
+    a.x=x.x/y.x;
+    return a;
+  }
+
+  friend bigfloat operator/(const unsigned long x, const bigfloat& y){
+    bigfloat bx(x);
+    return bx/y;
+  }
+
+  friend bigfloat operator/(const bigfloat& x, const unsigned long y){
+    bigfloat by(y);
+    return x/by;
+  }
+
+  friend bigfloat sqrt_bf(const bigfloat& x){
+    bigfloat a;
+    a.x= sqrt(x.x);
+    return a;
+  }
+
+  friend bigfloat sqrt_bf(const unsigned long x){
+    bigfloat a(x);
+    return sqrt_bf(a);
+  }
+
+  friend bigfloat abs_bf(const bigfloat& x){
+    bigfloat a;
+    a.x=abs(x.x);
+    return a;
+  }
+
+  friend bigfloat pow_bf(const bigfloat& a, long power) {
+    bigfloat b;
+    b.x=pow(a.x,power);
+    return b;
+  }
+
+  friend bigfloat pow_bf(const bigfloat& a, bigfloat &power) {
+    bigfloat b;
+    b.x=pow(a.x,power.x);
+    return b;
+  }
+
+  friend bigfloat exp_bf(const bigfloat& a) {
+    bigfloat b;
+    b.x=exp(a.x);
+    return b;
+  }
+
+  /* Comparison Functions */
+  friend int operator>(const bigfloat& x, const bigfloat& y) {
+    return x.x>y.x;
+  }
+
+  friend int operator<(const bigfloat& x, const bigfloat& y) {
+    return x.x<y.x;
+  }
+
+  friend int sgn(const bigfloat& x) {
+    if ( x.x>=0 )  return 1;   
+    else return 0;
+  }
+
+  /* Miscellaneous Functions */
+
+  //  friend bigfloat& random(void);
+};
+
+#endif
+
+#endif
diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h
index ad305d2f..bcb6ab60 100644
--- a/lib/algorithms/iterative/ConjugateGradient.h
+++ b/lib/algorithms/iterative/ConjugateGradient.h
@@ -1,19 +1,25 @@
 #ifndef GRID_CONJUGATE_GRADIENT_H
 #define GRID_CONJUGATE_GRADIENT_H
 
-#include<Grid.h>
-#include<algorithms/LinearOperator.h>
-
 namespace Grid {
 
-  template<class Field> class ConjugateGradient : public IterativeProcess<Field> {
-  public:
+    /////////////////////////////////////////////////////////////
+    // Base classes for iterative processes based on operators
+    // single input vec, single output vec.
+    /////////////////////////////////////////////////////////////
 
-    ConjugateGradient(RealD tol,Integer maxit): IterativeProces(tol,maxit) {};
+  template<class Field> 
+    class ConjugateGradient :  public OperatorFunction<Field> {
+public:                                                
+    RealD   Tolerance;
+    Integer MaxIterations;
 
-    void operator() (HermitianOperatorBase<Field> *Linop,const Field &src, Field &psi){
+    ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) { 
+      std::cout << Tolerance<<std::endl;
+    };
+
+    void operator() (HermitianOperatorBase<Field> &Linop,const Field &src, Field &psi){
 
-      RealD residual = Tolerance;
       RealD cp,c,a,d,b,ssq,qq,b_pred;
       
       Field   p(src);
@@ -32,24 +38,24 @@ namespace Grid {
       cp =a;
       ssq=norm2(src);
       
-      std::cout <<setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
-      std::cout <<setprecision(4)<< "ConjugateGradient:   src "<<ssq  <<std::endl;
-      std::cout <<setprecision(4)<< "ConjugateGradient:    mp "<<d    <<std::endl;
-      std::cout <<setprecision(4)<< "ConjugateGradient:   mmp "<<b    <<std::endl;
-      std::cout <<setprecision(4)<< "ConjugateGradient:     r "<<cp   <<std::endl;
-      std::cout <<setprecision(4)<< "ConjugateGradient:     p "<<a    <<std::endl;
+      std::cout <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
+      std::cout <<std::setprecision(4)<< "ConjugateGradient:   src "<<ssq  <<std::endl;
+      std::cout <<std::setprecision(4)<< "ConjugateGradient:    mp "<<d    <<std::endl;
+      std::cout <<std::setprecision(4)<< "ConjugateGradient:   mmp "<<b    <<std::endl;
+      std::cout <<std::setprecision(4)<< "ConjugateGradient:     r "<<cp   <<std::endl;
+      std::cout <<std::setprecision(4)<< "ConjugateGradient:     p "<<a    <<std::endl;
       
-      RealD rsq =  residual* residual*ssq;
+      RealD rsq =  Tolerance* Tolerance*ssq;
       
       //Check if guess is really REALLY good :)
       if ( cp <= rsq ) {
-	return 0;
+	return;
       }
       
-      std::cout << setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
+      std::cout << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
       
       int k;
-      for (k=1;k<=max_iter;k++){
+      for (k=1;k<=MaxIterations;k++){
 	
 	c=cp;
 	
@@ -62,10 +68,10 @@ namespace Grid {
 	b = cp/c;
 	
 	// Fuse these loops ; should be really easy
-	// Update psi; New (conjugate/M-orthogonal) search direction
 	psi= a*p+psi;
 	p  = p*b+r;
 	  
+	std::cout << "Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
 	// Stopping condition
 	if ( cp <= rsq ) { 
 	  
@@ -78,10 +84,11 @@ namespace Grid {
 	  RealD resnorm = sqrt(norm2(p));
 	  RealD true_residual = resnorm/srcnorm;
 	  std::cout<<"ConjugateGradient: true   residual  is "<<true_residual<<" sol "<<psinorm<<" src "<<srcnorm<<std::endl;
-	  std::cout<<"ConjugateGradient: target residual was "<<residual<<std::endl;
-	  return k;
+	  std::cout<<"ConjugateGradient: target residual was "<<Tolerance<<std::endl;
 	}
       }
+      std::cout<<"ConjugateGradient did NOT converge"<<std::endl;
+      assert(0);
     }
   };
 }
diff --git a/lib/algorithms/iterative/NormalEquations.h b/lib/algorithms/iterative/NormalEquations.h
new file mode 100644
index 00000000..3f40f8a5
--- /dev/null
+++ b/lib/algorithms/iterative/NormalEquations.h
@@ -0,0 +1,34 @@
+#ifndef GRID_NORMAL_EQUATIONS_H
+#define GRID_NORMAL_EQUATIONS_H
+
+namespace Grid {
+
+  ///////////////////////////////////////////////////////////////////////////////////////////////////////
+  // Take a matrix and form a Red Black solver calling a Herm solver
+  // Use of RB info prevents making SchurRedBlackSolve conform to standard interface
+  ///////////////////////////////////////////////////////////////////////////////////////////////////////
+  template<class Field> class NormalEquations : public OperatorFunction<Field>{
+  private:
+    SparseMatrixBase<Field> & _Matrix;
+    OperatorFunction<Field> & _HermitianSolver;
+
+  public:
+
+    /////////////////////////////////////////////////////
+    // Wrap the usual normal equations Schur trick
+    /////////////////////////////////////////////////////
+  NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver) 
+    :  _Matrix(Matrix), _HermitianSolver(HermitianSolver) {}; 
+
+    void operator() (const Field &in, Field &out){
+ 
+      Field src(in._grid);
+
+      _Matrix.Mdag(in,src);
+      _HermitianSolver(src,out);  // Mdag M out = Mdag in
+ 
+    }     
+  };
+
+}
+#endif
diff --git a/lib/algorithms/iterative/SchurRedBlack.h b/lib/algorithms/iterative/SchurRedBlack.h
new file mode 100644
index 00000000..e6a7b59e
--- /dev/null
+++ b/lib/algorithms/iterative/SchurRedBlack.h
@@ -0,0 +1,106 @@
+#ifndef GRID_SCHUR_RED_BLACK_H
+#define GRID_SCHUR_RED_BLACK_H
+
+  /*
+   * Red black Schur decomposition
+   *
+   *  M = (Mee Meo) =  (1             0 )   (Mee   0               )  (1 Mee^{-1} Meo)
+   *      (Moe Moo)    (Moe Mee^-1    1 )   (0   Moo-Moe Mee^-1 Meo)  (0   1         )
+   *                =         L                     D                     U
+   *
+   * L^-1 = (1              0 )
+   *        (-MoeMee^{-1}   1 )   
+   * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} )
+   *           ( 0       1                    )
+   * L^{-d}  = ( 1      -Mee^{-dag} Moe^{dag} )
+   *           ( 0       1                    )
+   *
+   * U^-1 = (1   -Mee^{-1} Meo)
+   *        (0    1           )
+   * U^{dag} = ( 1                 0)
+   *           (Meo^dag Mee^{-dag} 1)
+   * U^{-dag} = (  1                 0)
+   *            (-Meo^dag Mee^{-dag} 1)
+   ***********************
+   *     M psi = eta
+   ***********************
+   *Odd
+   * i)   (D_oo)^{\dag} D_oo psi_o = (D_oo)^\dag L^{-1}  eta_o
+   *                        eta_o' = D_oo (eta_o - Moe Mee^{-1} eta_e)
+   *Even
+   * ii)  Mee psi_e + Meo psi_o = src_e
+   *
+   *   => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
+   *
+   */
+
+namespace Grid {
+
+  ///////////////////////////////////////////////////////////////////////////////////////////////////////
+  // Take a matrix and form a Red Black solver calling a Herm solver
+  // Use of RB info prevents making SchurRedBlackSolve conform to standard interface
+  ///////////////////////////////////////////////////////////////////////////////////////////////////////
+  template<class Field> class SchurRedBlackSolve : public OperatorFunction<Field>{
+  private:
+    SparseMatrixBase<Field> & _Matrix;
+    OperatorFunction<Field> & _HermitianRBSolver;
+    int CBfactorise;
+  public:
+
+    /////////////////////////////////////////////////////
+    // Wrap the usual normal equations Schur trick
+    /////////////////////////////////////////////////////
+  SchurRedBlackSolve(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianRBSolver) 
+    :  _Matrix(Matrix), _HermitianRBSolver(HermitianRBSolver) { 
+      CBfactorise=0;
+    };
+
+    void operator() (const Field &in, Field &out){
+
+      // FIXME CGdiagonalMee not implemented virtual function
+      // FIXME need to make eo grid from full grid.
+      // FIXME use CBfactorise to control schur decomp
+      const int Even=0;
+      const int Odd =1;
+
+      // Make a cartesianRedBlack from full Grid
+      GridRedBlackCartesian grid(in._grid);
+ 
+      Field src_e(&grid);
+      Field src_o(&grid);
+      Field sol_e(&grid);
+      Field sol_o(&grid);
+      Field   tmp(&grid);
+      Field  Mtmp(&grid);
+      
+      pickCheckerboard(Even,src_e,in);
+      pickCheckerboard(Odd ,src_o,in);
+
+      /////////////////////////////////////////////////////
+      // src_o = Mdag * (source_o - Moe MeeInv source_e)
+      /////////////////////////////////////////////////////
+      _Matrix.MooeeInv(src_e,tmp);     //    MooeeInv(source[Even],tmp,DaggerNo,Even);
+      _Matrix.Meooe   (tmp,Mtmp);      //    Meo     (tmp,src,Odd,DaggerNo);
+      tmp=src_o-Mtmp;                  //    axpy    (tmp,src,source[Odd],-1.0);
+      _Matrix.MpcDag(tmp,src_o);       //    Mprec(tmp,src,Mtmp,DaggerYes);  
+
+      //////////////////////////////////////////////////////////////
+      // Call the red-black solver
+      //////////////////////////////////////////////////////////////
+      _HermitianRBSolver(src_o,sol_o); //  CGNE_prec_MdagM(solution[Odd],src);
+
+      ///////////////////////////////////////////////////
+      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
+      ///////////////////////////////////////////////////
+      _Matrix.Meooe(sol_o,tmp);        // Meo(solution[Odd],tmp,Even,DaggerNo);
+      src_e = src_e-tmp;               // axpy(src,tmp,source[Even],-1.0);
+      _Matrix.MooeeInv(src_e,sol_e);   // MooeeInv(src,solution[Even],DaggerNo,Even);
+     
+      setCheckerboard(out,sol_e);
+      setCheckerboard(out,sol_o);
+ 
+    }     
+  };
+
+}
+#endif
diff --git a/lib/algorithms/iterative/TODO b/lib/algorithms/iterative/TODO
new file mode 100644
index 00000000..ca3bca3b
--- /dev/null
+++ b/lib/algorithms/iterative/TODO
@@ -0,0 +1,15 @@
+- ConjugateGradientMultiShift
+- MCR
+
+- Potentially Useful Boost libraries
+
+- MultiArray
+- Aligned allocator; memory pool
+- Remez -- Mike or Boost?
+- Multiprecision
+- quaternians
+- Tokenize
+- Serialization
+- Regex
+- Proto (ET)
+- uBlas
diff --git a/lib/qcd/Grid_qcd_wilson_dop.cc b/lib/qcd/Grid_qcd_wilson_dop.cc
index c4d37a87..6a67efe3 100644
--- a/lib/qcd/Grid_qcd_wilson_dop.cc
+++ b/lib/qcd/Grid_qcd_wilson_dop.cc
@@ -85,20 +85,40 @@ void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeF
   }
 }
 
-void WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
+RealD WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
 {
   Dhop(in,out);
-  return;
+  return 0.0;
 }
-void WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
+RealD WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
 {
   Dhop(in,out);
-  return;
+  return 0.0;
 }
-void WilsonMatrix::MdagM(const LatticeFermion &in, LatticeFermion &out)
+
+void WilsonMatrix::Meooe(const LatticeFermion &in, LatticeFermion &out)
 {
   Dhop(in,out);
-  return;
+}
+void WilsonMatrix::MeooeDag(const LatticeFermion &in, LatticeFermion &out)
+{
+  Dhop(in,out);
+}
+void WilsonMatrix::Mooee(const LatticeFermion &in, LatticeFermion &out)
+{
+  return ;
+}
+void WilsonMatrix::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
+{
+  return ;
+}
+void WilsonMatrix::MooeeDag(const LatticeFermion &in, LatticeFermion &out)
+{
+  return ;
+}
+void WilsonMatrix::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
+{
+  return ;
 }
 
 void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
@@ -278,18 +298,6 @@ void WilsonMatrix::Dw(const LatticeFermion &in, LatticeFermion &out)
 {
   return;
 }
-void WilsonMatrix::MpcDag   (const LatticeFermion &in, LatticeFermion &out)
-{
-  return;
-}
-void WilsonMatrix::Mpc      (const LatticeFermion &in, LatticeFermion &out)
-{
-  return;
-}
-void WilsonMatrix::MpcDagMpc(const LatticeFermion &in, LatticeFermion &out)
-{
-  return;
-}
 
 
 
diff --git a/lib/qcd/Grid_qcd_wilson_dop.h b/lib/qcd/Grid_qcd_wilson_dop.h
index 2f5105c8..1098d330 100644
--- a/lib/qcd/Grid_qcd_wilson_dop.h
+++ b/lib/qcd/Grid_qcd_wilson_dop.h
@@ -1,15 +1,12 @@
 #ifndef  GRID_QCD_WILSON_DOP_H
 #define  GRID_QCD_WILSON_DOP_H
 
-#include <Grid.h>
-
-#include <algorithms/LinearOperator.h>
 
 namespace Grid {
 
   namespace QCD {
 
-    class WilsonMatrix : public SparseMatrixBase<LatticeFermion>
+    class WilsonMatrix : public CheckerBoardedSparseMatrixBase<LatticeFermion>
     {
       //NB r=1;
     public:
@@ -36,14 +33,16 @@ namespace Grid {
       void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
 
       // override multiply
-      virtual void M    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void Mdag (const LatticeFermion &in, LatticeFermion &out);
-      virtual void MdagM(const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
 
       // half checkerboard operaions
-      void Mpc      (const LatticeFermion &in, LatticeFermion &out);
-      void MpcDag   (const LatticeFermion &in, LatticeFermion &out);
-      void MpcDagMpc(const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
 
       // non-hermitian hopping term; half cb or both
       void Dhop(const LatticeFermion &in, LatticeFermion &out);
diff --git a/tests/Grid_main.cc b/tests/Grid_main.cc
index 7515ebd1..45778922 100644
--- a/tests/Grid_main.cc
+++ b/tests/Grid_main.cc
@@ -48,7 +48,7 @@ int main (int argc, char ** argv)
     latt_size[3] = lat;
     double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
     
-    GridCartesian Fine(latt_size,simd_layout,mpi_layout);
+    GridCartesian           Fine(latt_size,simd_layout,mpi_layout);
     GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
     GridParallelRNG       FineRNG(&Fine);
     FineRNG.SeedRandomDevice();