Merge branch 'develop' into feature/gpu-port

2025-10-23 01:04:48 +01:00 · 2018-12-13 05:11:34 +00:00
parent adbdc4e65b c509bd3fe2
commit b57a4d32aa
647 changed files with 49155 additions and 11160 deletions
--- a/Grid/algorithms/approx/Chebyshev.h
+++ b/Grid/algorithms/approx/Chebyshev.h
@@ -0,0 +1,377 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/algorithms/approx/Chebyshev.h
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: paboyle <paboyle@ph.ed.ac.uk>
+Author: Christoph Lehner <clehner@bnl.gov>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_CHEBYSHEV_H
+#define GRID_CHEBYSHEV_H
+
+#include <Grid/algorithms/LinearOperator.h>
+
+NAMESPACE_BEGIN(Grid);
+
+struct ChebyParams : Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams,
+				  RealD, alpha,  
+				  RealD, beta,   
+				  int, Npoly);
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////
+// Generic Chebyshev approximations
+////////////////////////////////////////////////////////////////////////////////////////////
+template<class Field>
+class Chebyshev : public OperatorFunction<Field> {
+private:
+  std::vector<RealD> Coeffs;
+  int order;
+  RealD hi;
+  RealD lo;
+
+public:
+  void csv(std::ostream &out){
+    RealD diff = hi-lo;
+    RealD delta = diff*1.0e-9;
+    for (RealD x=lo; x<hi; x+=delta) {
+      delta*=1.1;
+      RealD f = approx(x);
+      out<< x<<" "<<f<<std::endl;
+    }
+    return;
+  }
+
+  // Convenience for plotting the approximation
+  void   PlotApprox(std::ostream &out) {
+    out<<"Polynomial approx ["<<lo<<","<<hi<<"]"<<std::endl;
+    for(RealD x=lo;x<hi;x+=(hi-lo)/50.0){
+      out <<x<<"\t"<<approx(x)<<std::endl;
+    }
+  };
+
+  Chebyshev(){};
+  Chebyshev(ChebyParams p){ Init(p.alpha,p.beta,p.Npoly);};
+  Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
+  Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);};
+
+  ////////////////////////////////////////////////////////////////////////////////////////////////////
+  // c.f. numerical recipes "chebft"/"chebev". This is sec 5.8 "Chebyshev approximation".
+  ////////////////////////////////////////////////////////////////////////////////////////////////////
+  // CJ: the one we need for Lanczos
+  void Init(RealD _lo,RealD _hi,int _order)
+  {
+    lo=_lo;
+    hi=_hi;
+    order=_order;
+      
+    if(order < 2) exit(-1);
+    Coeffs.resize(order);
+    Coeffs.assign(0.,order);
+    Coeffs[order-1] = 1.;
+  };
+
+  void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
+  {
+    lo=_lo;
+    hi=_hi;
+    order=_order;
+      
+    if(order < 2) exit(-1);
+    Coeffs.resize(order);
+    for(int j=0;j<order;j++){
+      RealD s=0;
+      for(int k=0;k<order;k++){
+	RealD y=std::cos(M_PI*(k+0.5)/order);
+	RealD x=0.5*(y*(hi-lo)+(hi+lo));
+	RealD f=func(x);
+	s=s+f*std::cos( j*M_PI*(k+0.5)/order );
+      }
+      Coeffs[j] = s * 2.0/order;
+    }
+  };
+
+    
+  void JacksonSmooth(void){
+    RealD M=order;
+    RealD alpha = M_PI/(M+2);
+    RealD lmax = std::cos(alpha);
+    RealD sumUsq =0;
+    std::vector<RealD> U(M);
+    std::vector<RealD> a(M);
+    std::vector<RealD> g(M);
+    for(int n=0;n<=M;n++){
+      U[n] = std::sin((n+1)*std::acos(lmax))/std::sin(std::acos(lmax));
+      sumUsq += U[n]*U[n];
+    }      
+    sumUsq = std::sqrt(sumUsq);
+
+    for(int i=1;i<=M;i++){
+      a[i] = U[i]/sumUsq;
+    }
+    g[0] = 1.0;
+    for(int m=1;m<=M;m++){
+      g[m] = 0;
+      for(int i=0;i<=M-m;i++){
+	g[m]+= a[i]*a[m+i];
+      }
+    }
+    for(int m=1;m<=M;m++){
+      Coeffs[m]*=g[m];
+    }
+  }
+  RealD approx(RealD x) // Convenience for plotting the approximation
+  {
+    RealD Tn;
+    RealD Tnm;
+    RealD Tnp;
+      
+    RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
+      
+    RealD T0=1;
+    RealD T1=y;
+      
+    RealD sum;
+    sum = 0.5*Coeffs[0]*T0;
+    sum+= Coeffs[1]*T1;
+      
+    Tn =T1;
+    Tnm=T0;
+    for(int i=2;i<order;i++){
+      Tnp=2*y*Tn-Tnm;
+      Tnm=Tn;
+      Tn =Tnp;
+      sum+= Tn*Coeffs[i];
+    }
+    return sum;
+  };
+
+  RealD approxD(RealD x)
+  {
+    RealD Un;
+    RealD Unm;
+    RealD Unp;
+      
+    RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
+      
+    RealD U0=1;
+    RealD U1=2*y;
+      
+    RealD sum;
+    sum = Coeffs[1]*U0;
+    sum+= Coeffs[2]*U1*2.0;
+      
+    Un =U1;
+    Unm=U0;
+    for(int i=2;i<order-1;i++){
+      Unp=2*y*Un-Unm;
+      Unm=Un;
+      Un =Unp;
+      sum+= Un*Coeffs[i+1]*(i+1.0);
+    }
+    return sum/(0.5*(hi-lo));
+  };
+    
+  RealD approxInv(RealD z, RealD x0, int maxiter, RealD resid) {
+    RealD x = x0;
+    RealD eps;
+      
+    int i;
+    for (i=0;i<maxiter;i++) {
+      eps = approx(x) - z;
+      if (fabs(eps / z) < resid)
+	return x;
+      x = x - eps / approxD(x);
+    }
+      
+    return std::numeric_limits<double>::quiet_NaN();
+  }
+    
+  // Implement the required interface
+  void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
+
+    GridBase *grid=in.Grid();
+
+    // std::cout << "Chevyshef(): in.Grid()="<<in.Grid()<<std::endl;
+    //std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
+
+    int vol=grid->gSites();
+
+    Field T0(grid); T0 = in;  
+    Field T1(grid); 
+    Field T2(grid);
+    Field y(grid);
+      
+    Field *Tnm = &T0;
+    Field *Tn  = &T1;
+    Field *Tnp = &T2;
+
+    // Tn=T1 = (xscale M + mscale)in
+    RealD xscale = 2.0/(hi-lo);
+    RealD mscale = -(hi+lo)/(hi-lo);
+    Linop.HermOp(T0,y);
+    T1=y*xscale+in*mscale;
+
+    // sum = .5 c[0] T0 + c[1] T1
+    out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
+    for(int n=2;n<order;n++){
+	
+      Linop.HermOp(*Tn,y);
+
+      y=xscale*y+mscale*(*Tn);
+
+      *Tnp=2.0*y-(*Tnm);
+
+      out=out+Coeffs[n]* (*Tnp);
+
+      // Cycle pointers to avoid copies
+      Field *swizzle = Tnm;
+      Tnm    =Tn;
+      Tn     =Tnp;
+      Tnp    =swizzle;
+	  
+    }
+  }
+};
+
+
+template<class Field>
+class ChebyshevLanczos : public Chebyshev<Field> {
+private:
+  std::vector<RealD> Coeffs;
+  int order;
+  RealD alpha;
+  RealD beta;
+  RealD mu;
+
+public:
+  ChebyshevLanczos(RealD _alpha,RealD _beta,RealD _mu,int _order) :
+    alpha(_alpha),
+    beta(_beta),
+    mu(_mu)
+  {
+    order=_order;
+    Coeffs.resize(order);
+    for(int i=0;i<_order;i++){
+      Coeffs[i] = 0.0;
+    }
+    Coeffs[order-1]=1.0;
+  };
+
+  void csv(std::ostream &out){
+    for (RealD x=-1.2*alpha; x<1.2*alpha; x+=(2.0*alpha)/10000) {
+      RealD f = approx(x);
+      out<< x<<" "<<f<<std::endl;
+    }
+    return;
+  }
+
+  RealD approx(RealD xx) // Convenience for plotting the approximation
+  {
+    RealD Tn;
+    RealD Tnm;
+    RealD Tnp;
+    Real aa = alpha * alpha;
+    Real bb = beta  *  beta;
+      
+    RealD x = ( 2.0 * (xx-mu)*(xx-mu) - (aa+bb) ) / (aa-bb);
+
+    RealD y= x;
+      
+    RealD T0=1;
+    RealD T1=y;
+      
+    RealD sum;
+    sum = 0.5*Coeffs[0]*T0;
+    sum+= Coeffs[1]*T1;
+      
+    Tn =T1;
+    Tnm=T0;
+    for(int i=2;i<order;i++){
+      Tnp=2*y*Tn-Tnm;
+      Tnm=Tn;
+      Tn =Tnp;
+      sum+= Tn*Coeffs[i];
+    }
+    return sum;
+  };
+
+  // shift_Multiply in Rudy's code
+  void AminusMuSq(LinearOperatorBase<Field> &Linop, const Field &in, Field &out) 
+  {
+    GridBase *grid=in.Grid();
+    Field tmp(grid);
+
+    RealD aa= alpha*alpha;
+    RealD bb= beta * beta;
+
+    Linop.HermOp(in,out);
+    out = out - mu*in;
+
+    Linop.HermOp(out,tmp);
+    tmp = tmp - mu * out;
+
+    out = (2.0/ (aa-bb) ) * tmp -  ((aa+bb)/(aa-bb))*in;
+  };
+  // Implement the required interface
+  void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
+
+    GridBase *grid=in.Grid();
+
+    int vol=grid->gSites();
+
+    Field T0(grid); T0 = in;  
+    Field T1(grid); 
+    Field T2(grid);
+    Field  y(grid);
+      
+    Field *Tnm = &T0;
+    Field *Tn  = &T1;
+    Field *Tnp = &T2;
+
+    // Tn=T1 = (xscale M )*in
+    AminusMuSq(Linop,T0,T1);
+
+    // sum = .5 c[0] T0 + c[1] T1
+    out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
+    for(int n=2;n<order;n++){
+	
+      AminusMuSq(Linop,*Tn,y);
+
+      *Tnp=2.0*y-(*Tnm);
+
+      out=out+Coeffs[n]* (*Tnp);
+
+      // Cycle pointers to avoid copies
+      Field *swizzle = Tnm;
+      Tnm    =Tn;
+      Tn     =Tnp;
+      Tnp    =swizzle;
+	  
+    }
+  }
+};
+NAMESPACE_END(Grid);
+#endif
--- a/Grid/algorithms/approx/Forecast.h
+++ b/Grid/algorithms/approx/Forecast.h
@@ -0,0 +1,152 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/approx/Forecast.h
+
+Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: paboyle <paboyle@ph.ed.ac.uk>
+Author: David Murphy <dmurphy@phys.columbia.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+			   /*  END LEGAL */
+
+#ifndef INCLUDED_FORECAST_H
+#define INCLUDED_FORECAST_H
+
+NAMESPACE_BEGIN(Grid);
+
+// Abstract base class.
+// Takes a matrix (Mat), a source (phi), and a vector of Fields (chi)
+// and returns a forecasted solution to the system D*psi = phi (psi).
+template<class Matrix, class Field>
+class Forecast
+{
+public:
+  virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& chi) = 0;
+};
+
+// Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012),
+// used to forecast solutions across poles of the EOFA heatbath.
+//
+// Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C)
+template<class Matrix, class Field>
+class ChronoForecast : public Forecast<Matrix,Field>
+{
+public:
+  Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns)
+  {
+    int degree = prev_solns.size();
+    Field chi(phi); // forecasted solution
+
+    // Trivial cases
+    if(degree == 0){ chi = Zero(); return chi; }
+    else if(degree == 1){ return prev_solns[0]; }
+
+    //    RealD dot;
+    ComplexD xp;
+    Field r(phi); // residual
+    Field Mv(phi);
+    std::vector<Field> v(prev_solns); // orthonormalized previous solutions
+    std::vector<Field> MdagMv(degree,phi);
+
+    // Array to hold the matrix elements
+    std::vector<std::vector<ComplexD>> G(degree, std::vector<ComplexD>(degree));
+
+    // Solution and source vectors
+    std::vector<ComplexD> a(degree);
+    std::vector<ComplexD> b(degree);
+
+    // Orthonormalize the vector basis
+    for(int i=0; i<degree; i++){
+      v[i] *= 1.0/std::sqrt(norm2(v[i]));
+      for(int j=i+1; j<degree; j++){ v[j] -= innerProduct(v[i],v[j]) * v[i]; }
+    }
+
+    // Perform sparse matrix multiplication and construct rhs
+    for(int i=0; i<degree; i++){
+      b[i] = innerProduct(v[i],phi);
+      Mat.M(v[i],Mv);
+      Mat.Mdag(Mv,MdagMv[i]);
+      G[i][i] = innerProduct(v[i],MdagMv[i]);
+    }
+
+    // Construct the matrix
+    for(int j=0; j<degree; j++){
+      for(int k=j+1; k<degree; k++){
+	G[j][k] = innerProduct(v[j],MdagMv[k]);
+	G[k][j] = conjugate(G[j][k]);
+      }}
+
+    // Gauss-Jordan elimination with partial pivoting
+    for(int i=0; i<degree; i++){
+
+      // Perform partial pivoting
+      int k = i;
+      for(int j=i+1; j<degree; j++){ if(abs(G[j][j]) > abs(G[k][k])){ k = j; } }
+      if(k != i){
+	xp = b[k];
+	b[k] = b[i];
+	b[i] = xp;
+	for(int j=0; j<degree; j++){
+	  xp = G[k][j];
+	  G[k][j] = G[i][j];
+	  G[i][j] = xp;
+	}
+      }
+
+      // Convert matrix to upper triangular form
+      for(int j=i+1; j<degree; j++){
+	xp = G[j][i]/G[i][i];
+	b[j] -= xp * b[i];
+	for(int k=0; k<degree; k++){ G[j][k] -= xp*G[i][k]; }
+      }
+    }
+
+    // Use Gaussian elimination to solve equations and calculate initial guess
+    chi = Zero();
+    r = phi;
+    for(int i=degree-1; i>=0; i--){
+      a[i] = 0.0;
+      for(int j=i+1; j<degree; j++){ a[i] += G[i][j] * a[j]; }
+      a[i] = (b[i]-a[i])/G[i][i];
+      chi += a[i]*v[i];
+      r -= a[i]*MdagMv[i];
+    }
+
+    RealD true_r(0.0);
+    ComplexD tmp;
+    for(int i=0; i<degree; i++){
+      tmp = -b[i];
+      for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
+      tmp = conjugate(tmp)*tmp;
+      true_r += std::sqrt(tmp.real());
+    }
+
+    RealD error = std::sqrt(norm2(r)/norm2(phi));
+    std::cout << GridLogMessage << "ChronoForecast: |res|/|src| = " << error << std::endl;
+
+    return chi;
+  };
+};
+
+NAMESPACE_END(Grid);
+
+#endif
--- a/Grid/algorithms/approx/LICENSE
+++ b/Grid/algorithms/approx/LICENSE
@@ -0,0 +1,21 @@
+
+Copyright (c) 2011 Michael Clark
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
--- a/Grid/algorithms/approx/MultiShiftFunction.cc
+++ b/Grid/algorithms/approx/MultiShiftFunction.cc
@@ -0,0 +1,57 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/algorithms/approx/MultiShiftFunction.cc
+
+    Copyright (C) 2015
+
+Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/GridCore.h>
+
+NAMESPACE_BEGIN(Grid);
+
+double MultiShiftFunction::approx(double x)
+{
+  double a = norm;
+  for(int n=0;n<poles.size();n++){
+    a = a + residues[n]/(x+poles[n]);
+  }
+  return a;
+}
+void MultiShiftFunction::gnuplot(std::ostream &out)
+{
+  out<<"f(x) = "<<norm<<"";
+  for(int n=0;n<poles.size();n++){
+    out<<"+("<<residues[n]<<"/(x+"<<poles[n]<<"))";
+  }
+  out<<";"<<std::endl;
+}
+void MultiShiftFunction::csv(std::ostream &out)
+{
+  for (double x=lo; x<hi; x*=1.05) {
+    double f = approx(x);
+    double r = sqrt(x);
+    out<< x<<","<<r<<","<<f<<","<<r-f<<std::endl;
+  }
+  return;
+}
+NAMESPACE_END(Grid);
--- a/Grid/algorithms/approx/MultiShiftFunction.h
+++ b/Grid/algorithms/approx/MultiShiftFunction.h
@@ -0,0 +1,67 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/algorithms/approx/MultiShiftFunction.h
+
+    Copyright (C) 2015
+
+Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#ifndef MULTI_SHIFT_FUNCTION
+#define MULTI_SHIFT_FUNCTION
+
+NAMESPACE_BEGIN(Grid);
+
+class MultiShiftFunction {
+public:
+  int order;
+  std::vector<RealD> poles;
+  std::vector<RealD> residues;
+  std::vector<RealD> tolerances;
+  RealD norm;
+  RealD lo,hi;
+
+  MultiShiftFunction(int n,RealD _lo,RealD _hi): poles(n), residues(n), lo(_lo), hi(_hi) {;};
+  RealD approx(RealD x);
+  void csv(std::ostream &out);
+  void gnuplot(std::ostream &out);
+
+  void Init(AlgRemez & remez,double tol,bool inverse) 
+  {
+    order=remez.getDegree();
+    tolerances.resize(remez.getDegree(),tol);
+    poles.resize(remez.getDegree());
+    residues.resize(remez.getDegree());
+    remez.getBounds(lo,hi);
+    if ( inverse ) remez.getIPFE (&residues[0],&poles[0],&norm);
+    else           remez.getPFE (&residues[0],&poles[0],&norm);
+  }
+  // Allow deferred initialisation
+  MultiShiftFunction(void){};
+  MultiShiftFunction(AlgRemez & remez,double tol,bool inverse)
+  {
+    Init(remez,tol,inverse);
+  }
+
+};
+NAMESPACE_END(Grid);
+#endif
--- a/Grid/algorithms/approx/README
+++ b/Grid/algorithms/approx/README
@@ -0,0 +1,80 @@
+-----------------------------------------------------------------------------------
+
+PAB. Took Mike Clark's AlgRemez from GitHub and (modified a little) include.
+This is open source and license and readme and comments are preserved consistent
+with the license. Mike, thankyou!
+-----------------------------------------------------------------------------------
+-----------------------------------------------------------------------------------
+AlgRemez
+
+The archive downloadable here contains an implementation of the Remez
+algorithm which calculates optimal rational (and polynomial)
+approximations to the nth root over a given spectral range.  The Remez
+algorithm, although in principle is extremely straightforward to
+program, is quite difficult to get completely correct, e.g., the Maple
+implementation of the algorithm does not always converge to the
+correct answer.
+
+To use this algorithm you need to install GMP, the GNU Multiple
+Precision Library, and when configuring the install, you must include
+the --enable-mpfr option (see the GMP manual for more details).  You
+also have to edit the Makefile for AlgRemez appropriately for your
+system, namely to point corrrectly to the location of the GMP library.
+
+The simple main program included with this archive invokes the
+AlgRemez class to calculate an approximation given by command line
+arguments.  It is invoked by the following
+
+./test y z n d lambda_low lambda_high precision,
+
+where the function to be approximated is f(x) = x^(y/z), with degree
+(n,d) over the spectral range [lambda_low, lambda_high], using
+precision digits of precision in the arithmetic.  So an example would
+be
+
+./test 1 2 5 5 0.0004 64 40
+
+which corresponds to constructing a rational approximation to the
+square root function, with degree (5,5) over the range [0.0004,64]
+with 40 digits of precision used for the arithmetic.  The parameters y
+and z must be positive, the approximation to f(x) = x^(-y/z) is simply
+the inverse of the approximation to f(x) = x^(y/z).  After the
+approximation has been constructed, the roots and poles of the
+rational function are found, and then the partial fraction expansion
+of both the rational function and it's inverse are found, the results
+of which are output to a file called "approx.dat".  In addition, the
+error function of the approximation is output to "error.dat", where it
+can be checked that the resultant approximation satisfies Chebychev's
+criterion, namely all error maxima are equal in magnitude, and
+adjacent maxima are oppostie in sign.  There are some caveats here
+however, the optimal polynomial approximation has complex roots, and
+the root finding implemented here cannot (yet) handle complex roots.
+In addition, the partial fraction expansion of rational approximations
+is only found for the case n = d, i.e., the degree of numerator
+polynomial equals that of the denominator polynomial.  The convention
+for the partial fraction expansion is that polar shifts are always
+written added to x, not subtracted.
+
+To do list
+
+1.  Include an exponential dampening factor in the function to be
+approximated.  This may sound trivial to implement, but for some
+parameters, the algorithm seems to breakdown.  Also, the roots in the
+rational approximation sometimes become complex, which currently
+breaks the stupidly simple root finding code.
+
+2. Make the algorithm faster - it's too slow when running on qcdoc.
+
+3. Add complex root finding.
+
+4. Add more options for error minimisation - currently the code
+minimises the relative error, should add options for absolute error,
+and other norms.
+
+There will be a forthcoming publication concerning the results
+generated by this software, but in the meantime, if you use this
+software, please cite it as
+"M.A. Clark and A.D. Kennedy, https://github.com/mikeaclark/AlgRemez, 2005".
+
+If you have any problems using the software, then please email scientist.mike@gmail.com.
+
--- a/Grid/algorithms/approx/Remez.cc
+++ b/Grid/algorithms/approx/Remez.cc
@@ -0,0 +1,759 @@
+/*
+
+  Mike Clark - 25th May 2005
+
+  alg_remez.C
+
+  AlgRemez is an implementation of the Remez algorithm, which in this
+  case is used for generating the optimal nth root rational
+  approximation.
+
+  Note this class requires the gnu multiprecision (GNU MP) library.
+
+*/
+
+#include<math.h>
+#include<stdio.h>
+#include<stdlib.h>
+#include<string>
+#include<iostream>
+#include<iomanip>
+#include<cassert>
+
+#include<Grid/algorithms/approx/Remez.h>
+
+// Constructor
+AlgRemez::AlgRemez(double lower, double upper, long precision) 
+{
+  prec = precision;
+  bigfloat::setDefaultPrecision(prec);
+
+  apstrt = lower;
+  apend = upper;
+  apwidt = apend - apstrt;
+
+  std::cout<<"Approximation bounds are ["<<apstrt<<","<<apend<<"]\n";
+  std::cout<<"Precision of arithmetic is "<<precision<<std::endl;
+
+  alloc = 0;
+  n = 0;
+  d = 0;
+
+  foundRoots = 0;
+
+  // Only require the approximation spread to be less than 1 ulp
+  tolerance = 1e-15;
+}
+
+// Destructor
+AlgRemez::~AlgRemez()
+{
+  if (alloc) {
+    delete [] param;
+    delete [] roots;
+    delete [] poles;
+    delete [] xx;
+    delete [] mm;
+    delete [] a_power;
+    delete [] a;
+  }
+}
+
+// Free memory and reallocate as necessary
+void AlgRemez::allocate(int num_degree, int den_degree)
+{
+  // Arrays have previously been allocated, deallocate first, then allocate
+  if (alloc) {
+    delete [] param;
+    delete [] roots;
+    delete [] poles;
+    delete [] xx;
+    delete [] mm;
+  }
+
+  // Note use of new and delete in memory allocation - cannot run on qcdsp
+  param = new bigfloat[num_degree+den_degree+1];
+  roots = new bigfloat[num_degree];
+  poles = new bigfloat[den_degree];
+  xx = new bigfloat[num_degree+den_degree+3];
+  mm = new bigfloat[num_degree+den_degree+2];
+
+  if (!alloc) {
+    // The coefficients of the sum in the exponential
+    a = new bigfloat[SUM_MAX];
+    a_power = new int[SUM_MAX];
+  }
+
+  alloc = 1;
+}
+
+// Reset the bounds of the approximation
+void AlgRemez::setBounds(double lower, double upper)
+{
+  apstrt = lower;
+  apend = upper;
+  apwidt = apend - apstrt;
+}
+
+// Generate the rational approximation x^(pnum/pden)
+double AlgRemez::generateApprox(int degree, unsigned long pnum, 
+				unsigned long pden)
+{
+  return generateApprox(degree, degree, pnum, pden);
+}
+
+double AlgRemez::generateApprox(int num_degree, int den_degree, 
+				unsigned long pnum, unsigned long pden)
+{
+  double *a_param = 0;
+  int *a_pow = 0;
+  return generateApprox(num_degree, den_degree, pnum, pden, 0, a_param, a_pow);
+}
+
+// Generate the rational approximation x^(pnum/pden)
+double AlgRemez::generateApprox(int num_degree, int den_degree, 
+				unsigned long pnum, unsigned long pden,
+				int a_len, double *a_param, int *a_pow)
+{
+  std::cout<<"Degree of the approximation is ("<<num_degree<<","<<den_degree<<")\n";
+  std::cout<<"Approximating the function x^("<<pnum<<"/"<<pden<<")\n";
+
+  // Reallocate arrays, since degree has changed
+  if (num_degree != n || den_degree != d) allocate(num_degree,den_degree);
+
+  assert(a_len<=SUM_MAX);
+
+  step = new bigfloat[num_degree+den_degree+2];
+
+  a_length = a_len;
+  for (int j=0; j<a_len; j++) {
+    a[j]= a_param[j];
+    a_power[j] = a_pow[j];
+  }
+
+  power_num = pnum;
+  power_den = pden;
+  spread = 1.0e37;
+  iter = 0;
+
+  n = num_degree;
+  d = den_degree;
+  neq = n + d + 1;
+
+  initialGuess();
+  stpini(step);
+
+  while (spread > tolerance) { //iterate until convergance
+
+    if (iter++%100==0) 
+      std::cout<<"Iteration " <<iter-1<<" spread "<<(double)spread<<" delta "<<(double)delta<<std::endl; 
+
+    equations();
+    if (delta < tolerance) {
+      std::cout<<"Delta too small, try increasing precision\n";
+      assert(0);
+    };    
+    assert( delta>= tolerance);
+
+    search(step);
+  }
+
+  int sign;
+  double error = (double)getErr(mm[0],&sign);
+  std::cout<<"Converged at "<<iter<<" iterations; error = "<<error<<std::endl;
+
+  // Once the approximation has been generated, calculate the roots
+  if(!root()) {
+    std::cout<<"Root finding failed\n";
+  } else {
+    foundRoots = 1;
+  }
+  
+  delete [] step;
+
+  // Return the maximum error in the approximation
+  return error;
+}
+
+// Return the partial fraction expansion of the approximation x^(pnum/pden)
+int AlgRemez::getPFE(double *Res, double *Pole, double *Norm) {
+
+  if (n!=d) {
+    std::cout<<"Cannot handle case: Numerator degree neq Denominator degree\n";
+    return 0;
+  }
+
+  if (!alloc) {
+    std::cout<<"Approximation not yet generated\n";
+    return 0;
+  }
+
+  if (!foundRoots) {
+    std::cout<<"Roots not found, so PFE cannot be taken\n";
+    return 0;
+  }
+
+  bigfloat *r = new bigfloat[n];
+  bigfloat *p = new bigfloat[d];
+  
+  for (int i=0; i<n; i++) r[i] = roots[i];
+  for (int i=0; i<d; i++) p[i] = poles[i];
+  
+  // Perform a partial fraction expansion
+  pfe(r, p, norm);
+
+  // Convert to double and return
+  *Norm = (double)norm;
+  for (int i=0; i<n; i++) Res[i] = (double)r[i];
+  for (int i=0; i<d; i++) Pole[i] = (double)p[i];
+
+  delete [] r;
+  delete [] p;
+
+  // Where the smallest shift is located
+  return 0;
+}
+
+// Return the partial fraction expansion of the approximation x^(-pnum/pden)
+int AlgRemez::getIPFE(double *Res, double *Pole, double *Norm) {
+
+  if (n!=d) {
+    std::cout<<"Cannot handle case: Numerator degree neq Denominator degree\n";
+    return 0;
+  }
+
+  if (!alloc) {
+    std::cout<<"Approximation not yet generated\n";
+    return 0;
+  }
+
+  if (!foundRoots) {
+    std::cout<<"Roots not found, so PFE cannot be taken\n";
+    return 0;
+  }
+
+  bigfloat *r = new bigfloat[d];
+  bigfloat *p = new bigfloat[n];
+  
+  // Want the inverse function
+  for (int i=0; i<n; i++) {
+    r[i] = poles[i];
+    p[i] = roots[i];
+  }
+
+  // Perform a partial fraction expansion
+  pfe(r, p, (bigfloat)1l/norm);
+
+  // Convert to double and return
+  *Norm = (double)((bigfloat)1l/(norm));
+  for (int i=0; i<n; i++) {
+    Res[i] = (double)r[i];
+    Pole[i] = (double)p[i];
+  }
+
+  delete [] r;
+  delete [] p;
+
+  // Where the smallest shift is located
+  return 0;
+}
+
+// Initial values of maximal and minimal errors
+void AlgRemez::initialGuess() {
+
+  // Supply initial guesses for solution points
+  long ncheb = neq;			// Degree of Chebyshev error estimate
+  bigfloat a, r;
+
+  // Find ncheb+1 extrema of Chebyshev polynomial
+
+  a = ncheb;
+  mm[0] = apstrt;
+  for (long i = 1; i < ncheb; i++) {
+    r = 0.5 * (1 - cos((M_PI * i)/(double) a));
+    //r *= sqrt_bf(r);
+    r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
+    mm[i] = apstrt + r * apwidt;
+  }
+  mm[ncheb] = apend;
+
+  a = 2.0 * ncheb;
+  for (long i = 0; i <= ncheb; i++) {
+    r = 0.5 * (1 - cos(M_PI * (2*i+1)/(double) a));
+    //r *= sqrt_bf(r); // Squeeze to low end of interval
+    r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
+    xx[i] = apstrt + r * apwidt;
+  }
+}
+
+// Initialise step sizes
+void AlgRemez::stpini(bigfloat *step) {
+  xx[neq+1] = apend;
+  delta = 0.25;
+  step[0] = xx[0] - apstrt;
+  for (int i = 1; i < neq; i++) step[i] = xx[i] - xx[i-1];
+  step[neq] = step[neq-1];
+}
+
+// Search for error maxima and minima
+void AlgRemez::search(bigfloat *step) {
+  bigfloat a, q, xm, ym, xn, yn, xx0, xx1;
+  int i, meq, emsign, ensign, steps;
+
+  meq = neq + 1;
+  bigfloat *yy = new bigfloat[meq];
+
+  bigfloat eclose = 1.0e30;
+  bigfloat farther = 0l;
+
+  xx0 = apstrt;
+
+  for (i = 0; i < meq; i++) {
+    steps = 0;
+    xx1 = xx[i]; // Next zero
+    if (i == meq-1) xx1 = apend;
+    xm = mm[i];
+    ym = getErr(xm,&emsign);
+    q = step[i];
+    xn = xm + q;
+    if (xn < xx0 || xn >= xx1) {	// Cannot skip over adjacent boundaries
+      q = -q;
+      xn = xm;
+      yn = ym;
+      ensign = emsign;
+    } else {
+      yn = getErr(xn,&ensign);
+      if (yn < ym) {
+	q = -q;
+	xn = xm;
+	yn = ym;
+	ensign = emsign;
+      }
+    }
+  
+    while(yn >= ym) {		// March until error becomes smaller.
+      if (++steps > 10) break;
+      ym = yn;
+      xm = xn;
+      emsign = ensign;
+      a = xm + q;
+      if (a == xm || a <= xx0 || a >= xx1) break;// Must not skip over the zeros either side.
+      xn = a;
+      yn = getErr(xn,&ensign);
+    }
+
+    mm[i] = xm;			// Position of maximum
+    yy[i] = ym;			// Value of maximum
+
+    if (eclose > ym) eclose = ym;
+    if (farther < ym) farther = ym;
+
+    xx0 = xx1; // Walk to next zero.
+  } // end of search loop
+
+  q = (farther - eclose);	// Decrease step size if error spread increased
+  if (eclose != 0.0) q /= eclose; // Relative error spread
+  if (q >= spread) delta *= 0.5; // Spread is increasing; decrease step size
+  spread = q;
+
+  for (i = 0; i < neq; i++) {
+    q = yy[i+1];
+    if (q != 0.0) q = yy[i] / q  - (bigfloat)1l;
+    else q = 0.0625;
+    if (q > (bigfloat)0.25) q = 0.25;
+    q *= mm[i+1] - mm[i];
+    step[i] = q * delta;
+  }
+  step[neq] = step[neq-1];
+  
+  for (i = 0; i < neq; i++) {	// Insert new locations for the zeros.
+    xm = xx[i] - step[i];
+    if (xm <= apstrt) continue;
+    if (xm >= apend) continue;
+    if (xm <= mm[i]) xm = (bigfloat)0.5 * (mm[i] + xx[i]);
+    if (xm >= mm[i+1]) xm = (bigfloat)0.5 * (mm[i+1] + xx[i]);
+    xx[i] = xm;
+  }
+
+  delete [] yy;
+}
+
+// Solve the equations
+void AlgRemez::equations(void) {
+  bigfloat x, y, z;
+  int i, j, ip;
+  bigfloat *aa;
+
+  bigfloat *AA = new bigfloat[(neq)*(neq)];
+  bigfloat *BB = new bigfloat[neq];
+  
+  for (i = 0; i < neq; i++) {	// set up the equations for solution by simq()
+    ip = neq * i;		// offset to 1st element of this row of matrix
+    x = xx[i];			// the guess for this row
+    y = func(x);		// right-hand-side vector
+
+    z = (bigfloat)1l;
+    aa = AA+ip;
+    for (j = 0; j <= n; j++) {
+      *aa++ = z;
+      z *= x;
+    }
+
+    z = (bigfloat)1l;
+    for (j = 0; j < d; j++) {
+      *aa++ = -y * z;
+      z *= x;
+    }
+    BB[i] = y * z;		// Right hand side vector
+  }
+
+  // Solve the simultaneous linear equations.
+  if (simq(AA, BB, param, neq)) {
+    std::cout<<"simq failed\n";
+    exit(0);
+  }
+
+  delete [] AA;
+  delete [] BB;
+
+}
+
+// Evaluate the rational form P(x)/Q(x) using coefficients
+// from the solution vector param
+bigfloat AlgRemez::approx(const bigfloat x) {
+  bigfloat yn, yd;
+  int i;
+
+  // Work backwards toward the constant term.
+  yn = param[n];		// Highest order numerator coefficient
+  for (i = n-1; i >= 0; i--) yn = x * yn  +  param[i]; 
+  yd = x + param[n+d];	// Highest degree coefficient = 1.0
+  for (i = n+d-1; i > n; i--) yd = x * yd  +  param[i];
+
+  return(yn/yd);
+}
+
+// Compute size and sign of the approximation error at x
+bigfloat AlgRemez::getErr(bigfloat x, int *sign) {
+  bigfloat e, f;
+
+  f = func(x);
+  e = approx(x) - f;
+  if (f != 0) e /= f;
+  if (e < (bigfloat)0.0) {
+    *sign = -1;
+    e = -e;
+  }
+  else *sign = 1;
+  
+  return(e);
+}
+
+// Calculate function required for the approximation.
+bigfloat AlgRemez::func(const bigfloat x) {
+
+  bigfloat z = (bigfloat)power_num / (bigfloat)power_den;
+  bigfloat y;
+
+  if (x == (bigfloat)1.0) y = (bigfloat)1.0;
+  else y = pow_bf(x,z);
+
+  if (a_length > 0) {
+    bigfloat sum = 0l;
+    for (int j=0; j<a_length; j++) sum += a[j]*pow_bf(x,a_power[j]);
+    return y * exp_bf(sum);
+  } else {
+    return y;
+  }
+
+}
+
+// Solve the system AX=B
+int AlgRemez::simq(bigfloat A[], bigfloat B[], bigfloat X[], int n) {
+
+  int i, j, ij, ip, ipj, ipk, ipn;
+  int idxpiv, iback;
+  int k, kp, kp1, kpk, kpn;
+  int nip, nkp, nm1;
+  bigfloat em, q, rownrm, big, size, pivot, sum;
+  bigfloat *aa;
+
+  // simq() work vector
+  int *IPS = new int[(neq) * sizeof(int)];
+
+  nm1 = n - 1;
+  // Initialize IPS and X
+  
+  ij = 0;
+  for (i = 0; i < n; i++) {
+    IPS[i] = i;
+    rownrm = 0.0;
+    for(j = 0; j < n; j++) {
+      q = abs_bf(A[ij]);
+      if(rownrm < q) rownrm = q;
+      ++ij;
+    }
+    if (rownrm == (bigfloat)0l) {
+      std::cout<<"simq rownrm=0\n";
+      delete [] IPS;
+      return(1);
+    }
+    X[i] = (bigfloat)1.0 / rownrm;
+  }
+  
+  for (k = 0; k < nm1; k++) {
+    big = 0.0;
+    idxpiv = 0;
+    
+    for (i = k; i < n; i++) {
+      ip = IPS[i];
+      ipk = n*ip + k;
+      size = abs_bf(A[ipk]) * X[ip];
+      if (size > big) {
+	big = size;
+	idxpiv = i;
+      }
+    }
+    
+    if (big == (bigfloat)0l) {
+      std::cout<<"simq big=0\n";
+      delete [] IPS;
+      return(2);
+    }
+    if (idxpiv != k) {
+      j = IPS[k];
+      IPS[k] = IPS[idxpiv];
+      IPS[idxpiv] = j;
+    }
+    kp = IPS[k];
+    kpk = n*kp + k;
+    pivot = A[kpk];
+    kp1 = k+1;
+    for (i = kp1; i < n; i++) {
+      ip = IPS[i];
+      ipk = n*ip + k;
+      em = -A[ipk] / pivot;
+      A[ipk] = -em;
+      nip = n*ip;
+      nkp = n*kp;
+      aa = A+nkp+kp1;
+      for (j = kp1; j < n; j++) {
+	ipj = nip + j;
+	A[ipj] = A[ipj] + em * *aa++;
+      }
+    }
+  }
+  kpn = n * IPS[n-1] + n - 1;	// last element of IPS[n] th row
+  if (A[kpn] == (bigfloat)0l) {
+    std::cout<<"simq A[kpn]=0\n";
+    delete [] IPS;
+    return(3);
+  }
+
+  
+  ip = IPS[0];
+  X[0] = B[ip];
+  for (i = 1; i < n; i++) {
+    ip = IPS[i];
+    ipj = n * ip;
+    sum = 0.0;
+    for (j = 0; j < i; j++) {
+      sum += A[ipj] * X[j];
+      ++ipj;
+    }
+    X[i] = B[ip] - sum;
+  }
+  
+  ipn = n * IPS[n-1] + n - 1;
+  X[n-1] = X[n-1] / A[ipn];
+  
+  for (iback = 1; iback < n; iback++) {
+    //i goes (n-1),...,1
+    i = nm1 - iback;
+    ip = IPS[i];
+    nip = n*ip;
+    sum = 0.0;
+    aa = A+nip+i+1;
+    for (j= i + 1; j < n; j++) 
+      sum += *aa++ * X[j];
+    X[i] = (X[i] - sum) / A[nip+i];
+  }
+  
+  delete [] IPS;
+  return(0);
+}
+
+// Calculate the roots of the approximation
+int AlgRemez::root() {
+
+  long i,j;
+  bigfloat x,dx=0.05;
+  bigfloat upper=1, lower=-100000;
+  bigfloat tol = 1e-20;
+
+  bigfloat *poly = new bigfloat[neq+1];
+
+  // First find the numerator roots
+  for (i=0; i<=n; i++) poly[i] = param[i];
+
+  for (i=n-1; i>=0; i--) {
+    roots[i] = rtnewt(poly,i+1,lower,upper,tol);
+    if (roots[i] == 0.0) {
+      std::cout<<"Failure to converge on root "<<i+1<<"/"<<n<<"\n";
+      return 0;
+    }
+    poly[0] = -poly[0]/roots[i];
+    for (j=1; j<=i; j++) poly[j] = (poly[j-1] - poly[j])/roots[i];
+  }
+  
+ // Now find the denominator roots
+  poly[d] = 1l;
+  for (i=0; i<d; i++) poly[i] = param[n+1+i];
+
+  for (i=d-1; i>=0; i--) {
+    poles[i]=rtnewt(poly,i+1,lower,upper,tol);
+    if (poles[i] == 0.0) {
+      std::cout<<"Failure to converge on pole "<<i+1<<"/"<<d<<"\n";
+      return 0;
+    }
+    poly[0] = -poly[0]/poles[i];
+    for (j=1; j<=i; j++) poly[j] = (poly[j-1] - poly[j])/poles[i];
+  }
+
+  norm = param[n];
+
+  delete [] poly;
+
+  return 1;
+}
+
+// Evaluate the polynomial
+bigfloat AlgRemez::polyEval(bigfloat x, bigfloat *poly, long size) {
+  bigfloat f = poly[size];
+  for (int i=size-1; i>=0; i--) f = f*x + poly[i];
+  return f;
+}
+
+// Evaluate the differential of the polynomial
+bigfloat AlgRemez::polyDiff(bigfloat x, bigfloat *poly, long size) {
+  bigfloat df = (bigfloat)size*poly[size];
+  for (int i=size-1; i>0; i--) df = df*x + (bigfloat)i*poly[i];
+  return df;
+}
+
+
+// Newton's method to calculate roots
+bigfloat AlgRemez::rtnewt(bigfloat *poly, long i, bigfloat x1, 
+			  bigfloat x2, bigfloat xacc) {
+  int j;
+  bigfloat df, dx, f, rtn;
+
+  rtn=(bigfloat)0.5*(x1+x2);
+  for (j=1; j<=JMAX;j++) {
+    f = polyEval(rtn, poly, i);
+    df = polyDiff(rtn, poly, i);
+    dx = f/df;
+    rtn -= dx;
+    if (abs_bf(dx) < xacc) return rtn;
+  }
+  std::cout<<"Maximum number of iterations exceeded in rtnewt\n";
+  return 0.0;
+}
+
+// Evaluate the partial fraction expansion of the rational function
+// with res roots and poles poles.  Result is overwritten on input
+// arrays.
+void AlgRemez::pfe(bigfloat *res, bigfloat *poles, bigfloat norm) {
+  int i,j,small;
+  bigfloat temp;
+  bigfloat *numerator = new bigfloat[n];
+  bigfloat *denominator = new bigfloat[d];
+
+  // Construct the polynomials explicitly 
+  for (i=1; i<n; i++) {
+    numerator[i] = 0l;
+    denominator[i] = 0l;
+  }
+  numerator[0]=1l;
+  denominator[0]=1l;
+
+  for (j=0; j<n; j++) {
+    for (i=n-1; i>=0; i--) {
+      numerator[i] *= -res[j];
+      denominator[i] *= -poles[j];
+      if (i>0) {
+	numerator[i] += numerator[i-1];
+	denominator[i] += denominator[i-1];
+      }
+    }
+  }
+
+  // Convert to proper fraction form.
+  // Fraction is now in the form 1 + n/d, where O(n)+1=O(d)
+  for (i=0; i<n; i++) numerator[i] -= denominator[i];
+
+  // Find the residues of the partial fraction expansion and absorb the
+  // coefficients.
+  for (i=0; i<n; i++) {
+    res[i] = 0l;
+    for (j=n-1; j>=0; j--) {
+      res[i] = poles[i]*res[i]+numerator[j];
+    }
+    for (j=n-1; j>=0; j--) {
+      if (i!=j) res[i] /= poles[i]-poles[j];
+    }
+    res[i] *= norm;
+  }  
+
+  // res now holds the residues
+  j = 0;
+  for (i=0; i<n; i++) poles[i] = -poles[i];
+
+  // Move the ordering of the poles from smallest to largest
+  for (j=0; j<n; j++) {
+    small = j;
+    for (i=j+1; i<n; i++) {
+      if (poles[i] < poles[small]) small = i;
+    }
+    if (small != j) {
+      temp = poles[small];
+      poles[small] = poles[j];
+      poles[j] = temp;
+      temp = res[small];
+      res[small] = res[j];
+      res[j] = temp;
+    }
+  }
+
+  delete [] numerator;
+  delete [] denominator;
+}
+
+double AlgRemez::evaluateApprox(double x) {
+  return (double)approx((bigfloat)x);
+}
+
+double AlgRemez::evaluateInverseApprox(double x) {
+  return 1.0/(double)approx((bigfloat)x);
+}
+
+double AlgRemez::evaluateFunc(double x) {
+  return (double)func((bigfloat)x);
+}
+
+double AlgRemez::evaluateInverseFunc(double x) {
+  return 1.0/(double)func((bigfloat)x);
+}
+
+void AlgRemez::csv(std::ostream & os)
+{
+  double lambda_low = apstrt;
+  double lambda_high= apend;
+  for (double x=lambda_low; x<lambda_high; x*=1.05) {
+    double f = evaluateFunc(x);
+    double r = evaluateApprox(x);
+    os<< x<<","<<r<<","<<f<<","<<r-f<<std::endl;
+  }
+  return;
+}
+
--- a/Grid/algorithms/approx/Remez.h
+++ b/Grid/algorithms/approx/Remez.h
@@ -0,0 +1,184 @@
+/*
+
+  Mike Clark - 25th May 2005
+
+  alg_remez.h
+
+  AlgRemez is an implementation of the Remez algorithm, which in this
+  case is used for generating the optimal nth root rational
+  approximation.
+
+  Note this class requires the gnu multiprecision (GNU MP) library.
+
+*/
+
+#ifndef INCLUDED_ALG_REMEZ_H
+#define INCLUDED_ALG_REMEZ_H
+
+#include <stddef.h>
+#include <Grid/GridStd.h>
+
+#ifdef HAVE_LIBGMP
+#include "bigfloat.h"
+#else
+#include "bigfloat_double.h"
+#endif
+
+#define JMAX 10000 //Maximum number of iterations of Newton's approximation
+#define SUM_MAX 10 // Maximum number of terms in exponential
+
+/*
+ *Usage examples
+  AlgRemez remez(lambda_low,lambda_high,precision);
+  error = remez.generateApprox(n,d,y,z);
+  remez.getPFE(res,pole,&norm);
+  remez.getIPFE(res,pole,&norm);
+  remez.csv(ostream &os);
+ */
+
+class AlgRemez
+{
+ private:
+  char *cname;
+
+  // The approximation parameters
+  bigfloat *param, *roots, *poles;
+  bigfloat norm;
+
+  // The numerator and denominator degree (n=d)
+  int n, d;
+  
+  // The bounds of the approximation
+  bigfloat apstrt, apwidt, apend;
+
+  // the numerator and denominator of the power we are approximating
+  unsigned long power_num; 
+  unsigned long power_den;
+
+  // Flag to determine whether the arrays have been allocated
+  int alloc;
+
+  // Flag to determine whether the roots have been found
+  int foundRoots;
+
+  // Variables used to calculate the approximation
+  int nd1, iter;
+  bigfloat *xx, *mm, *step;
+  bigfloat delta, spread, tolerance;
+
+  // The exponential summation coefficients
+  bigfloat *a;
+  int *a_power;
+  int a_length;
+
+  // The number of equations we must solve at each iteration (n+d+1)
+  int neq;
+
+  // The precision of the GNU MP library
+  long prec;
+
+  // Initial values of maximal and minmal errors
+  void initialGuess();
+
+  // Solve the equations
+  void equations();
+
+  // Search for error maxima and minima
+  void search(bigfloat *step); 
+
+  // Initialise step sizes
+  void stpini(bigfloat *step);
+
+  // Calculate the roots of the approximation
+  int root();
+
+  // Evaluate the polynomial
+  bigfloat polyEval(bigfloat x, bigfloat *poly, long size);
+  //complex_bf polyEval(complex_bf x, complex_bf *poly, long size);
+
+  // Evaluate the differential of the polynomial
+  bigfloat polyDiff(bigfloat x, bigfloat *poly, long size);
+  //complex_bf polyDiff(complex_bf x, complex_bf *poly, long size);
+
+  // Newton's method to calculate roots
+  bigfloat rtnewt(bigfloat *poly, long i, bigfloat x1, bigfloat x2, bigfloat xacc);
+  //complex_bf rtnewt(complex_bf *poly, long i, bigfloat x1, bigfloat x2, bigfloat xacc);
+
+  // Evaluate the partial fraction expansion of the rational function
+  // with res roots and poles poles.  Result is overwritten on input
+  // arrays.
+  void pfe(bigfloat *res, bigfloat* poles, bigfloat norm);
+
+  // Calculate function required for the approximation
+  bigfloat func(bigfloat x);
+
+  // Compute size and sign of the approximation error at x
+  bigfloat getErr(bigfloat x, int *sign);
+
+  // Solve the system AX=B
+  int simq(bigfloat *A, bigfloat *B, bigfloat *X, int n);
+
+  // Free memory and reallocate as necessary
+  void allocate(int num_degree, int den_degree);
+
+  // Evaluate the rational form P(x)/Q(x) using coefficients from the
+  // solution vector param
+  bigfloat approx(bigfloat x);
+
+ public:
+  
+  // Constructor
+  AlgRemez(double lower, double upper, long prec);
+
+  // Destructor
+  virtual ~AlgRemez();
+
+  int getDegree(void){ 
+    assert(n==d);
+    return n;
+  }
+  // Reset the bounds of the approximation
+  void setBounds(double lower, double upper);
+  // Reset the bounds of the approximation
+  void getBounds(double &lower, double &upper) { 
+    lower=(double)apstrt;
+    upper=(double)apend;
+  }
+
+  // Generate the rational approximation x^(pnum/pden)
+  double generateApprox(int num_degree, int den_degree, 
+			unsigned long power_num, unsigned long power_den, 
+			int a_len, double* a_param, int* a_pow);
+  double generateApprox(int num_degree, int den_degree, 
+			unsigned long power_num, unsigned long power_den);
+  double generateApprox(int degree, unsigned long power_num, 
+			unsigned long power_den);
+
+  // Return the partial fraction expansion of the approximation x^(pnum/pden)
+  int getPFE(double *res, double *pole, double *norm);
+
+  // Return the partial fraction expansion of the approximation x^(-pnum/pden)
+  int getIPFE(double *res, double *pole, double *norm);
+
+  // Evaluate the rational form P(x)/Q(x) using coefficients from the
+  // solution vector param
+  double evaluateApprox(double x);
+
+  // Evaluate the rational form Q(x)/P(x) using coefficients from the
+  // solution vector param
+  double evaluateInverseApprox(double x);
+
+  // Calculate function required for the approximation
+  double evaluateFunc(double x);
+
+  // Calculate inverse function required for the approximation
+  double evaluateInverseFunc(double x);
+  
+  // Dump csv of function, approx and error
+  void csv(std::ostream &os);
+};
+
+#endif  // Include guard
+
+
+
--- a/Grid/algorithms/approx/Zolotarev.cc
+++ b/Grid/algorithms/approx/Zolotarev.cc
@@ -0,0 +1,730 @@
+/* -*- Mode: C; comment-column: 22; fill-column: 79; compile-command: "gcc -o zolotarev zolotarev.c -ansi -pedantic -lm -DTEST"; -*- */
+#define VERSION Source Time-stamp: <2015-05-18 16:32:08 neo>
+
+/* These C routines evalute the optimal rational approximation to the signum
+ * function for epsilon < |x| < 1 using Zolotarev's theorem.
+ *
+ * To obtain reliable results for high degree approximations (large n) it is
+ * necessary to compute using sufficiently high precision arithmetic. To this
+ * end the code has been parameterised to work with the preprocessor names
+ * INTERNAL_PRECISION and PRECISION set to float, double, or long double as
+ * appropriate. INTERNAL_PRECISION is used in computing the Zolotarev
+ * coefficients, which are converted to PRECISION before being returned to the
+ * caller. Presumably even higher precision could be obtained using GMP or
+ * similar package, but bear in mind that rounding errors might also be
+ * significant in evaluating the resulting polynomial. The convergence criteria
+ * have been written in a precision-independent form. */
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+
+#ifndef INTERNAL_PRECISION
+#define INTERNAL_PRECISION double
+#endif
+
+#include "Zolotarev.h"
+#define ZOLOTAREV_INTERNAL
+#undef ZOLOTAREV_DATA
+#define ZOLOTAREV_DATA izd
+#undef ZPRECISION
+#define ZPRECISION INTERNAL_PRECISION
+#include "Zolotarev.h"
+#undef ZOLOTAREV_INTERNAL
+
+/* The ANSI standard appears not to know what pi is */
+
+#ifndef M_PI
+#define M_PI ((INTERNAL_PRECISION) 3.141592653589793238462643383279502884197\
+169399375105820974944592307816406286208998628034825342117068)
+#endif
+
+#define ZERO ((INTERNAL_PRECISION) 0)
+#define ONE ((INTERNAL_PRECISION) 1)
+#define TWO ((INTERNAL_PRECISION) 2)
+#define THREE ((INTERNAL_PRECISION) 3)
+#define FOUR ((INTERNAL_PRECISION) 4)
+#define HALF (ONE/TWO)
+
+/* The following obscenity seems to be the simplest (?) way to coerce the C
+ * preprocessor to convert the value of a preprocessor token into a string. */
+
+#define PP2(x) #x
+#define PP1(a,b,c) a ## b(c)
+#define STRINGIFY(name) PP1(PP,2,name)
+
+/* Compute the partial fraction expansion coefficients (alpha) from the
+ * factored form */
+NAMESPACE_BEGIN(Grid);
+NAMESPACE_BEGIN(Approx);
+
+static void construct_partfrac(izd *z) {
+  int dn = z -> dn, dd = z -> dd, type = z -> type;
+  int j, k, da = dd + 1 + type;
+  INTERNAL_PRECISION A = z -> A, *a = z -> a, *ap = z -> ap, *alpha;
+  alpha = (INTERNAL_PRECISION*) malloc(da * sizeof(INTERNAL_PRECISION));
+  for (j = 0; j < dd; j++)
+    for (k = 0, alpha[j] = A; k < dd; k++)
+      alpha[j] *=
+	(k < dn ? ap[j] - a[k] : ONE) / (k == j ? ONE : ap[j] - ap[k]);
+  if(type == 1)	      /* implicit pole at zero? */
+    for (k = 0, alpha[dd] = A * (dn > dd ? - a[dd] : ONE); k < dd; k++) {
+      alpha[dd] *= a[k] / ap[k];
+      alpha[k] *= (dn > dd ? ap[k] - a[dd] : ONE) / ap[k];
+    }
+  alpha[da-1] = dn == da - 1 ? A : ZERO;
+  z -> alpha = alpha;
+  z -> da = da;
+  return;
+}
+
+/* Convert factored polynomial into dense polynomial. The input is the overall
+ * factor A and the roots a[i], such that p = A product(x - a[i], i = 1..d) */
+
+static INTERNAL_PRECISION *poly_factored_to_dense(INTERNAL_PRECISION A, 
+						  INTERNAL_PRECISION *a,
+						  int d) {
+  INTERNAL_PRECISION *p;
+  int i, j;
+  p = (INTERNAL_PRECISION *) malloc((d + 2) * sizeof(INTERNAL_PRECISION));
+  p[0] = A;
+  for (i = 0; i < d; i++) {
+    p[i+1] = p[i];
+    for (j = i; j > 0; j--) p[j] = p[j-1] - a[i]*p[j];
+    p[0] *= - a[i];
+  }
+  return p;
+}
+
+/* Convert a rational function of the form R0(x) = x p(x^2)/q(x^2) (type 0) or
+ * R1(x) = p(x^2)/[x q(x^2)] (type 1) into its continued fraction
+ * representation. We assume that 0 <= deg(q) - deg(p) <= 1 for type 0 and 0 <=
+ * deg(p) - deg(q) <= 1 for type 1. On input p and q are in factored form, and
+ * deg(q) = dq, deg(p) = dp.  The output is the continued fraction coefficients
+ * beta, where R(x) = beta[0] x + 1/(beta[1] x + 1/(...)).
+ *
+ * The method used is as follows. There are four cases to consider:
+ *
+ * 0.i.  Type 0, deg p = deg q
+ *
+ * 0.ii. Type 0, deg p = deg q - 1
+ *
+ * 1.i.  Type 1, deg p = deg q
+ *
+ * 1.ii. Type 1, deg p = deg q + 1
+ *
+ * and these are connected by two transformations:
+ *
+ * A. To obtain a continued fraction expansion of type 1 we use a single-step
+ * polynomial division we find beta and r(x) such that p(x) = beta x q(x) +
+ * r(x), with deg(r) = deg(q). This implies that p(x^2) = beta x^2 q(x^2) +
+ * r(x^2), and thus R1(x) = x beta + r(x^2)/(x q(x^2)) = x beta + 1/R0(x)
+ * with R0(x) = x q(x^2)/r(x^2).
+ *
+ * B. A continued fraction expansion of type 0 is obtained in a similar, but
+ * not identical, manner. We use the polynomial division algorithm to compute
+ * the quotient beta and the remainder r that satisfy p(x) = beta q(x) + r(x)
+ * with deg(r) = deg(q) - 1. We thus have x p(x^2) = x beta q(x^2) + x r(x^2),
+ * so R0(x) = x beta + x r(x^2)/q(x^2) = x beta + 1/R1(x) with R1(x) = q(x^2) /
+ * (x r(x^2)).
+ *
+ * Note that the deg(r) must be exactly deg(q) for (A) and deg(q) - 1 for (B)
+ * because p and q have disjoint roots all of multiplicity 1. This means that
+ * the division algorithm requires only a single polynomial subtraction step.
+ *
+ * The transformations between the cases form the following finite state
+ * automaton:
+ *
+ * +------+            +------+            +------+            +------+
+ * |      |            |      | ---(A)---> |      |            |      |
+ * | 0.ii | ---(B)---> | 1.ii |            | 0.i  | <---(A)--- | 1.i  |
+ * |      |            |      | <---(B)--- |      |            |      |
+ * +------+            +------+            +------+            +------+
+ */
+
+static INTERNAL_PRECISION *contfrac_A(INTERNAL_PRECISION *,
+				      INTERNAL_PRECISION *,
+				      INTERNAL_PRECISION *,
+				      INTERNAL_PRECISION *, int, int);
+
+static INTERNAL_PRECISION *contfrac_B(INTERNAL_PRECISION *,
+				      INTERNAL_PRECISION *,
+				      INTERNAL_PRECISION *,
+				      INTERNAL_PRECISION *, int, int);
+
+static void construct_contfrac(izd *z){
+  INTERNAL_PRECISION *r, A = z -> A, *p = z -> a, *q = z -> ap;
+  int dp = z -> dn, dq = z -> dd, type = z -> type;
+
+  z -> db = 2 * dq + 1 + type;
+  z -> beta = (INTERNAL_PRECISION *)
+    malloc(z -> db * sizeof(INTERNAL_PRECISION));
+  p = poly_factored_to_dense(A, p, dp);
+  q = poly_factored_to_dense(ONE, q, dq);
+  r = (INTERNAL_PRECISION *) malloc((MAX(dp,dq) + 1) *
+				    sizeof(INTERNAL_PRECISION));
+  if (type == 0) (void) contfrac_B(z -> beta, p, q, r, dp, dq);
+  else (void) contfrac_A(z -> beta, p, q, r, dp, dq);
+  free(p); free(q); free(r);
+  return;
+}
+
+static INTERNAL_PRECISION *contfrac_A(INTERNAL_PRECISION *beta,
+				      INTERNAL_PRECISION *p,
+				      INTERNAL_PRECISION *q,
+				      INTERNAL_PRECISION *r, int dp, int dq) {
+  INTERNAL_PRECISION quot, *rb;
+  int j;
+
+  /* p(x) = x beta q(x) + r(x); dp = dq or dp = dq + 1 */
+
+  quot = dp == dq ? ZERO : p[dp] / q[dq];
+  r[0] = p[0];
+  for (j = 1; j <= dp; j++) r[j] = p[j] - quot * q[j-1];
+#ifdef DEBUG
+  printf("%s: Continued Fraction form: deg p = %2d, deg q = %2d, beta = %g\n",
+	 __FUNCTION__, dp, dq, (float) quot);
+  for (j = 0; j <= dq + 1; j++)
+    printf("\tp[%2d] = %14.6g\tq[%2d] = %14.6g\tr[%2d] = %14.6g\n",
+	   j, (float) (j > dp ? ZERO : p[j]),
+	   j, (float) (j == 0 ? ZERO : q[j-1]),
+	   j, (float) (j == dp ? ZERO : r[j]));
+#endif /* DEBUG */
+  *(rb = contfrac_B(beta, q, r, p, dq, dq)) = quot;
+  return rb + 1;
+}
+
+static INTERNAL_PRECISION *contfrac_B(INTERNAL_PRECISION *beta,
+				      INTERNAL_PRECISION *p,
+				      INTERNAL_PRECISION *q,
+				      INTERNAL_PRECISION *r, int dp, int dq) {
+  INTERNAL_PRECISION quot, *rb;
+  int j;
+
+  /* p(x) = beta q(x) + r(x); dp = dq or dp = dq - 1 */
+
+  quot = dp == dq ? p[dp] / q[dq] : ZERO;
+  for (j = 0; j < dq; j++) r[j] = p[j] - quot * q[j];
+#ifdef DEBUG
+  printf("%s: Continued Fraction form: deg p = %2d, deg q = %2d, beta = %g\n",
+	 __FUNCTION__, dp, dq, (float) quot);
+  for (j = 0; j <= dq; j++)
+    printf("\tp[%2d] = %14.6g\tq[%2d] = %14.6g\tr[%2d] = %14.6g\n",
+	   j, (float) (j > dp ? ZERO : p[j]),
+	   j, (float) q[j],
+	   j, (float) (j == dq ? ZERO : r[j]));
+#endif /* DEBUG */
+  *(rb = dq > 0 ? contfrac_A(beta, q, r, p, dq, dq-1) : beta) = quot;
+  return rb + 1;
+}
+
+/* The global variable U is used to hold the argument u throughout the AGM
+ * recursion. The global variables F and K are set in the innermost
+ * instantiation of the recursive function AGM to the values of the elliptic
+ * integrals F(u,k) and K(k) respectively. They must be made thread local to
+ * make this code thread-safe in a multithreaded environment. */
+
+static INTERNAL_PRECISION U, F, K;	/* THREAD LOCAL */
+
+/* Recursive implementation of Gauss' arithmetico-geometric mean, which is the
+ * kernel of the method used to compute the Jacobian elliptic functions
+ * sn(u,k), cn(u,k), and dn(u,k) with parameter k (where 0 < k < 1), as well
+ * as the elliptic integral F(s,k) satisfying F(sn(u,k)) = u and the complete
+ * elliptic integral K(k).
+ *
+ * The algorithm used is a recursive implementation of the Gauss (Landen)
+ * transformation.
+ *
+ * The function returns the value of sn(u,k'), where k' is the dual parameter,
+ * and also sets the values of the global variables F and K.  The latter is
+ * used to determine the sign of cn(u,k').
+ *
+ * The algorithm is deemed to have converged when b ceases to increase. This
+ * works whatever INTERNAL_PRECISION is specified. */
+
+static INTERNAL_PRECISION AGM(INTERNAL_PRECISION a,
+			      INTERNAL_PRECISION b,
+			      INTERNAL_PRECISION s) {
+  static INTERNAL_PRECISION pb = -ONE;
+  INTERNAL_PRECISION c, d, xi;
+
+  if (b <= pb) {
+    pb = -ONE;
+    F = asin(s) / a;		/* Here, a is the AGM */
+    K = M_PI / (TWO * a);
+    return sin(U * a);
+  }
+  pb = b;
+  c = a - b;
+  d = a + b;
+  xi = AGM(HALF*d, sqrt(a*b), ONE + c*c == ONE ?
+	   HALF*s*d/a : (a - sqrt(a*a - s*s*c*d))/(c*s));
+  return 2*a*xi / (d + c*xi*xi);
+}
+
+/* Computes sn(u,k), cn(u,k), dn(u,k), F(u,k), and K(k). It is essentially a
+ * wrapper for the routine AGM. The sign of cn(u,k) is defined to be -1 if
+ * K(k) < u < 3*K(k) and +1 otherwise, and thus sign is computed by some quite
+ * unnecessarily obfuscated bit manipulations. */
+
+static void sncndnFK(INTERNAL_PRECISION u, INTERNAL_PRECISION k,
+		     INTERNAL_PRECISION* sn, INTERNAL_PRECISION* cn,
+		     INTERNAL_PRECISION* dn, INTERNAL_PRECISION* elF,
+		     INTERNAL_PRECISION* elK) {
+  int sgn;
+  U = u;
+  *sn = AGM(ONE, sqrt(ONE - k*k), u);
+  sgn = ((int) (fabs(u) / K)) % 4; /* sgn = 0, 1, 2, 3 */
+  sgn ^= sgn >> 1;    /* (sgn & 1) = 0, 1, 1, 0 */
+  sgn = 1 - ((sgn & 1) << 1);	/* sgn = 1, -1, -1, 1 */
+  *cn = ((INTERNAL_PRECISION) sgn) * sqrt(ONE - *sn * *sn);
+  *dn = sqrt(ONE - k*k* *sn * *sn);
+  *elF = F;
+  *elK = K;
+}
+
+/* Compute the coefficients for the optimal rational approximation R(x) to
+ * sgn(x) of degree n over the interval epsilon < |x| < 1 using Zolotarev's
+ * formula. 
+ *
+ * Set type = 0 for the Zolotarev approximation, which is zero at x = 0, and
+ * type = 1 for the approximation which is infinite at x = 0. */
+
+zolotarev_data* zolotarev(PRECISION epsilon, int n, int type) {
+  INTERNAL_PRECISION A, c, cp, kp, ksq, sn, cn, dn, Kp, Kj, z, z0, t, M, F,
+    l, invlambda, xi, xisq, *tv, s, opl;
+  int m, czero, ts;
+  zolotarev_data *zd;
+  izd *d = (izd*) malloc(sizeof(izd));
+
+  d -> type = type;
+  d -> epsilon = (INTERNAL_PRECISION) epsilon;
+  d -> n = n;
+  d -> dd = n / 2;
+  d -> dn = d -> dd - 1 + n % 2; /* n even: dn = dd - 1, n odd: dn = dd */
+  d -> deg_denom = 2 * d -> dd;
+  d -> deg_num = 2 * d -> dn + 1;
+
+  d -> a = (INTERNAL_PRECISION*) malloc((1 + d -> dn) *
+					sizeof(INTERNAL_PRECISION));
+  d -> ap = (INTERNAL_PRECISION*) malloc(d -> dd *
+					 sizeof(INTERNAL_PRECISION));
+  ksq = d -> epsilon * d -> epsilon;
+  kp = sqrt(ONE - ksq);
+  sncndnFK(ZERO, kp, &sn, &cn, &dn, &F, &Kp); /* compute Kp = K(kp) */
+  z0 = TWO * Kp / (INTERNAL_PRECISION) n;
+  M = ONE;
+  A = ONE / d -> epsilon;
+
+  sncndnFK(HALF * z0, kp, &sn, &cn, &dn, &F, &Kj); /* compute xi */
+  xi = ONE / dn;
+  xisq = xi * xi;
+  invlambda = xi;
+
+  for (m = 0; m < d -> dd; m++) {
+    czero = 2 * (m + 1) == n; /* n even and m = dd -1 */
+
+    z = z0 * ((INTERNAL_PRECISION) m + ONE);
+    sncndnFK(z, kp, &sn, &cn, &dn, &F, &Kj);
+    t = cn / sn;
+    c = - t*t;
+    if (!czero) (d -> a)[d -> dn - 1 - m] = ksq / c;
+
+    z = z0 * ((INTERNAL_PRECISION) m + HALF);
+    sncndnFK(z, kp, &sn, &cn, &dn, &F, &Kj);
+    t = cn / sn;
+    cp = - t*t;
+    (d -> ap)[d -> dd - 1 - m] = ksq / cp;
+
+    M *= (ONE - c) / (ONE - cp);
+    A *= (czero ? -ksq : c) * (ONE - cp) / (cp * (ONE - c));
+    invlambda *= (ONE - c*xisq) / (ONE - cp*xisq);
+  }
+  invlambda /= M;
+  d -> A = TWO / (ONE + invlambda) * A;
+  d -> Delta = (invlambda - ONE) / (invlambda + ONE);
+
+  d -> gamma = (INTERNAL_PRECISION*) malloc((1 + d -> n) *
+					    sizeof(INTERNAL_PRECISION));
+  l = ONE / invlambda;
+  opl = ONE + l;
+  sncndnFK(sqrt( d -> type == 1
+		   ? (THREE + l) / (FOUR * opl)
+		   : (ONE + THREE*l) / (opl*opl*opl)
+	       ), sqrt(ONE - l*l), &sn, &cn, &dn, &F, &Kj);
+  s = M * F;
+  for (m = 0; m < d -> n; m++) {
+    sncndnFK(s + TWO*Kp*m/n, kp, &sn, &cn, &dn, &F, &Kj);
+    d -> gamma[m] = d -> epsilon / dn;
+  }
+
+  /* If R(x) is a Zolotarev rational approximation of degree (n,m) with maximum
+   * error Delta, then (1 - Delta^2) / R(x) is also an optimal Chebyshev
+   * approximation of degree (m,n) */
+
+  if (d -> type == 1) {
+    d -> A = (ONE - d -> Delta * d -> Delta) / d -> A;
+    tv = d -> a; d -> a = d -> ap; d -> ap = tv;
+    ts = d -> dn; d -> dn = d -> dd; d -> dd = ts;
+    ts = d -> deg_num; d -> deg_num = d -> deg_denom; d -> deg_denom = ts;
+  }
+
+  construct_partfrac(d);
+  construct_contfrac(d);
+
+  /* Converting everything to PRECISION for external use only */
+
+  zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
+  zd -> A = (PRECISION) d -> A;
+  zd -> Delta = (PRECISION) d -> Delta;
+  zd -> epsilon = (PRECISION) d -> epsilon;
+  zd -> n = d -> n;
+  zd -> type = d -> type;
+  zd -> dn = d -> dn;
+  zd -> dd = d -> dd;
+  zd -> da = d -> da;
+  zd -> db = d -> db;
+  zd -> deg_num = d -> deg_num;
+  zd -> deg_denom = d -> deg_denom;
+
+  zd -> a = (PRECISION*) malloc(zd -> dn * sizeof(PRECISION));
+  for (m = 0; m < zd -> dn; m++) zd -> a[m] = (PRECISION) d -> a[m];
+  free(d -> a);
+
+  zd -> ap = (PRECISION*) malloc(zd -> dd * sizeof(PRECISION));
+  for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (PRECISION) d -> ap[m];
+  free(d -> ap);
+
+  zd -> alpha = (PRECISION*) malloc(zd -> da * sizeof(PRECISION));
+  for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (PRECISION) d -> alpha[m];
+  free(d -> alpha);
+
+  zd -> beta = (PRECISION*) malloc(zd -> db * sizeof(PRECISION));
+  for (m = 0; m < zd -> db; m++) zd -> beta[m] = (PRECISION) d -> beta[m];
+  free(d -> beta);
+
+  zd -> gamma = (PRECISION*) malloc(zd -> n * sizeof(PRECISION));
+  for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (PRECISION) d -> gamma[m];
+  free(d -> gamma);
+
+  free(d);
+  return zd;
+}
+
+
+void zolotarev_free(zolotarev_data *zdata)
+{
+    free(zdata -> a);
+    free(zdata -> ap);
+    free(zdata -> alpha);
+    free(zdata -> beta);
+    free(zdata -> gamma);
+    free(zdata);
+}
+
+
+zolotarev_data* higham(PRECISION epsilon, int n) {
+  INTERNAL_PRECISION A, M, c, cp, z, z0, t, epssq;
+  int m, czero;
+  zolotarev_data *zd;
+  izd *d = (izd*) malloc(sizeof(izd));
+
+  d -> type = 0;
+  d -> epsilon = (INTERNAL_PRECISION) epsilon;
+  d -> n = n;
+  d -> dd = n / 2;
+  d -> dn = d -> dd - 1 + n % 2; /* n even: dn = dd - 1, n odd: dn = dd */
+  d -> deg_denom = 2 * d -> dd;
+  d -> deg_num = 2 * d -> dn + 1;
+
+  d -> a = (INTERNAL_PRECISION*) malloc((1 + d -> dn) *
+					sizeof(INTERNAL_PRECISION));
+  d -> ap = (INTERNAL_PRECISION*) malloc(d -> dd *
+					 sizeof(INTERNAL_PRECISION));
+  A = (INTERNAL_PRECISION) n;
+  z0 = M_PI / A;
+  A = n % 2 == 0 ? A : ONE / A;
+  M = d -> epsilon * A;
+  epssq = d -> epsilon * d -> epsilon;
+
+  for (m = 0; m < d -> dd; m++) {
+    czero = 2 * (m + 1) == n; /* n even and m = dd - 1*/
+
+    if (!czero) {
+      z = z0 * ((INTERNAL_PRECISION) m + ONE);
+      t = tan(z);
+      c = - t*t;
+      (d -> a)[d -> dn - 1 - m] = c;
+      M *= epssq - c;
+    }
+
+    z = z0 * ((INTERNAL_PRECISION) m + HALF);
+    t = tan(z);
+    cp = - t*t;
+    (d -> ap)[d -> dd - 1 - m] = cp;
+    M /= epssq - cp;
+  }
+
+  d -> gamma = (INTERNAL_PRECISION*) malloc((1 + d -> n) *
+					    sizeof(INTERNAL_PRECISION));
+  for (m = 0; m < d -> n; m++) d -> gamma[m] = ONE;
+
+  d -> A = A;
+  d -> Delta = ONE - M;
+
+  construct_partfrac(d);
+  construct_contfrac(d);
+
+  /* Converting everything to PRECISION for external use only */
+
+  zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
+  zd -> A = (PRECISION) d -> A;
+  zd -> Delta = (PRECISION) d -> Delta;
+  zd -> epsilon = (PRECISION) d -> epsilon;
+  zd -> n = d -> n;
+  zd -> type = d -> type;
+  zd -> dn = d -> dn;
+  zd -> dd = d -> dd;
+  zd -> da = d -> da;
+  zd -> db = d -> db;
+  zd -> deg_num = d -> deg_num;
+  zd -> deg_denom = d -> deg_denom;
+
+  zd -> a = (PRECISION*) malloc(zd -> dn * sizeof(PRECISION));
+  for (m = 0; m < zd -> dn; m++) zd -> a[m] = (PRECISION) d -> a[m];
+  free(d -> a);
+
+  zd -> ap = (PRECISION*) malloc(zd -> dd * sizeof(PRECISION));
+  for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (PRECISION) d -> ap[m];
+  free(d -> ap);
+
+  zd -> alpha = (PRECISION*) malloc(zd -> da * sizeof(PRECISION));
+  for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (PRECISION) d -> alpha[m];
+  free(d -> alpha);
+
+  zd -> beta = (PRECISION*) malloc(zd -> db * sizeof(PRECISION));
+  for (m = 0; m < zd -> db; m++) zd -> beta[m] = (PRECISION) d -> beta[m];
+  free(d -> beta);
+
+  zd -> gamma = (PRECISION*) malloc(zd -> n * sizeof(PRECISION));
+  for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (PRECISION) d -> gamma[m];
+  free(d -> gamma);
+
+  free(d);
+  return zd;
+}
+
+NAMESPACE_END(Approx);
+NAMESPACE_END(Grid);
+
+#ifdef TEST
+
+#undef ZERO
+#define ZERO ((PRECISION) 0)
+#undef ONE
+#define ONE ((PRECISION) 1)
+#undef TWO
+#define TWO ((PRECISION) 2)
+
+/* Evaluate the rational approximation R(x) using the factored form */
+
+static PRECISION zolotarev_eval(PRECISION x, zolotarev_data* rdata) {
+  int m;
+  PRECISION R;
+
+  if (rdata -> type == 0) {
+    R = rdata -> A * x;
+    for (m = 0; m < rdata -> deg_denom/2; m++)
+      R *= (2*(m+1) > rdata -> deg_num ? ONE : x*x - rdata -> a[m]) /
+	(x*x - rdata -> ap[m]);
+  } else {
+    R = rdata -> A / x;
+    for (m = 0; m < rdata -> deg_num/2; m++)
+      R *= (x*x - rdata -> a[m]) /
+	(2*(m+1) > rdata -> deg_denom ? ONE : x*x - rdata -> ap[m]);
+  }
+  return R;
+}
+
+/* Evaluate the rational approximation R(x) using the partial fraction form */
+
+static PRECISION zolotarev_partfrac_eval(PRECISION x, zolotarev_data* rdata) {
+  int m;
+  PRECISION R = rdata -> alpha[rdata -> da - 1];
+  for (m = 0; m < rdata -> dd; m++)
+    R += rdata -> alpha[m] / (x * x - rdata -> ap[m]);
+  if (rdata -> type == 1) R += rdata -> alpha[rdata -> dd] / (x * x);
+  return R * x;
+}    
+
+/* Evaluate the rational approximation R(x) using continued fraction form. 
+ *
+ * If x = 0 and type = 1 then the result should be INF, whereas if x = 0 and
+ * type = 0 then the result should be 0, but division by zero will occur at
+ * intermediate stages of the evaluation. For IEEE implementations with
+ * non-signalling overflow this will work correctly since 1/(1/0) = 1/INF = 0,
+ * but with signalling overflow you will get an error message. */
+
+static PRECISION zolotarev_contfrac_eval(PRECISION x, zolotarev_data* rdata) {
+  int m;
+  PRECISION R = rdata -> beta[0] * x;
+  for (m = 1; m < rdata -> db; m++) R = rdata -> beta[m] * x + ONE / R;
+  return R;
+}    
+
+/* Evaluate the rational approximation R(x) using Cayley form */
+
+static PRECISION zolotarev_cayley_eval(PRECISION x, zolotarev_data* rdata) {
+  int m;
+  PRECISION T;
+
+  T = rdata -> type == 0 ? ONE : -ONE;
+  for (m = 0; m < rdata -> n; m++)
+    T *= (rdata -> gamma[m] - x) / (rdata -> gamma[m] + x);
+  return (ONE - T) / (ONE + T);
+}
+
+
+/* Test program. Apart from printing out the parameters for R(x) it produces
+ * the following data files for plotting (unless NPLOT is defined):
+ *
+ * zolotarev-fn is a plot of R(x) for |x| < 1.2. This should look like sgn(x).
+ *
+ * zolotarev-err is a plot of the error |R(x) - sgn(x)| scaled by 1/Delta. This
+ * should oscillate deg_num + deg_denom + 2 times between +1 and -1 over the
+ * domain epsilon <= |x| <= 1.
+ *
+ * If ALLPLOTS is defined then zolotarev-partfrac (zolotarev-contfrac) is a
+ * plot of the difference between the values of R(x) computed using the
+ * factored form and the partial fraction (continued fraction) form, scaled by
+ * 1/Delta. It should be zero everywhere. */
+
+int main(int argc, char** argv) {
+  
+  int m, n, plotpts = 5000, type = 0;
+  float eps, x, ypferr, ycferr, ycaylerr, maxypferr, maxycferr, maxycaylerr;
+  zolotarev_data *rdata;
+  PRECISION y;
+  FILE *plot_function, *plot_error, 
+    *plot_partfrac, *plot_contfrac, *plot_cayley;
+
+  if (argc < 3 || argc > 4) {
+    fprintf(stderr, "Usage: %s epsilon n [type]\n", *argv);
+    exit(EXIT_FAILURE);
+  }
+  sscanf(argv[1], "%g", &eps);	/* First argument is epsilon */
+  sscanf(argv[2], "%d", &n);	/* Second argument is n */
+  if (argc == 4) sscanf(argv[3], "%d", &type); /* Third argument is type */
+
+  if (type < 0 || type > 2) {
+    fprintf(stderr, "%s: type must be 0 (Zolotarev R(0) = 0),\n"
+	    "\t\t1 (Zolotarev R(0) = Inf, or 2 (Higham)\n", *argv);
+    exit(EXIT_FAILURE);
+  }
+
+  rdata = type == 2 
+    ? higham((PRECISION) eps, n) 
+    : zolotarev((PRECISION) eps, n, type);
+
+  printf("Zolotarev Test: R(epsilon = %g, n = %d, type = %d)\n\t" 
+	 STRINGIFY(VERSION) "\n\t" STRINGIFY(HVERSION)
+	 "\n\tINTERNAL_PRECISION = " STRINGIFY(INTERNAL_PRECISION)
+	 "\tPRECISION = " STRINGIFY(PRECISION)
+	 "\n\n\tRational approximation of degree (%d,%d), %s at x = 0\n"
+	 "\tDelta = %g (maximum error)\n\n"
+	 "\tA = %g (overall factor)\n",
+	 (float) rdata -> epsilon, rdata -> n, type,
+	 rdata -> deg_num, rdata -> deg_denom,
+	 rdata -> type == 1 ? "infinite" : "zero",
+	 (float) rdata -> Delta, (float) rdata -> A);
+  for (m = 0; m < MIN(rdata -> dd, rdata -> dn); m++)
+    printf("\ta[%2d] = %14.8g\t\ta'[%2d] = %14.8g\n",
+	   m + 1, (float) rdata -> a[m], m + 1, (float) rdata -> ap[m]);
+  if (rdata -> dd > rdata -> dn)
+    printf("\t\t\t\t\ta'[%2d] = %14.8g\n",
+	   rdata -> dn + 1, (float) rdata -> ap[rdata -> dn]);
+  if (rdata -> dd < rdata -> dn)
+    printf("\ta[%2d] = %14.8g\n",
+	   rdata -> dd + 1, (float) rdata -> a[rdata -> dd]);
+
+  printf("\n\tPartial fraction coefficients\n");
+  printf("\talpha[ 0] = %14.8g\n",
+	 (float) rdata -> alpha[rdata -> da - 1]);
+  for (m = 0; m < rdata -> dd; m++)
+    printf("\talpha[%2d] = %14.8g\ta'[%2d] = %14.8g\n",
+	   m + 1, (float) rdata -> alpha[m], m + 1, (float) rdata -> ap[m]);
+  if (rdata -> type == 1)
+    printf("\talpha[%2d] = %14.8g\ta'[%2d] = %14.8g\n",
+	   rdata -> dd + 1, (float) rdata -> alpha[rdata -> dd],
+	   rdata -> dd + 1, (float) ZERO);
+
+  printf("\n\tContinued fraction coefficients\n");
+  for (m = 0; m < rdata -> db; m++)
+    printf("\tbeta[%2d] = %14.8g\n", m, (float) rdata -> beta[m]);
+
+  printf("\n\tCayley transform coefficients\n");
+  for (m = 0; m < rdata -> n; m++)
+    printf("\tgamma[%2d] = %14.8g\n", m, (float) rdata -> gamma[m]);
+
+#ifndef NPLOT
+  plot_function = fopen("zolotarev-fn.dat", "w");
+  plot_error = fopen("zolotarev-err.dat", "w");
+#ifdef ALLPLOTS
+  plot_partfrac = fopen("zolotarev-partfrac.dat", "w");
+  plot_contfrac = fopen("zolotarev-contfrac.dat", "w");
+  plot_cayley = fopen("zolotarev-cayley.dat", "w");
+#endif /* ALLPLOTS */
+  for (m = 0, maxypferr = maxycferr = maxycaylerr = 0.0; m <= plotpts; m++) {
+    x = 2.4 * (float) m / plotpts - 1.2;
+    if (rdata -> type == 0 || fabs(x) * (float) plotpts > 1.0) {
+      /* skip x = 0 for type 1, as R(0) is singular */
+      y = zolotarev_eval((PRECISION) x, rdata);
+      fprintf(plot_function, "%g %g\n", x, (float) y);
+      fprintf(plot_error, "%g %g\n",
+	      x, (float)((y - ((x > 0.0 ? ONE : -ONE))) / rdata -> Delta));
+      ypferr = (float)((zolotarev_partfrac_eval((PRECISION) x, rdata) - y)
+		       / rdata -> Delta);
+      ycferr = (float)((zolotarev_contfrac_eval((PRECISION) x, rdata) - y)
+		       / rdata -> Delta);
+      ycaylerr = (float)((zolotarev_cayley_eval((PRECISION) x, rdata) - y)
+		       / rdata -> Delta);
+      if (fabs(x) < 1.0 && fabs(x) > rdata -> epsilon) {
+	maxypferr = MAX(maxypferr, fabs(ypferr));
+	maxycferr = MAX(maxycferr, fabs(ycferr));
+	maxycaylerr = MAX(maxycaylerr, fabs(ycaylerr));
+      }
+#ifdef ALLPLOTS
+      fprintf(plot_partfrac, "%g %g\n", x, ypferr);
+      fprintf(plot_contfrac, "%g %g\n", x, ycferr);
+      fprintf(plot_cayley, "%g %g\n", x, ycaylerr);
+#endif /* ALLPLOTS */
+    }
+  }
+#ifdef ALLPLOTS
+  fclose(plot_cayley);
+  fclose(plot_contfrac);
+  fclose(plot_partfrac);
+#endif /* ALLPLOTS */
+  fclose(plot_error);
+  fclose(plot_function);
+
+  printf("\n\tMaximum PF error = %g (relative to Delta)\n", maxypferr);
+  printf("\tMaximum CF error = %g (relative to Delta)\n", maxycferr);
+  printf("\tMaximum Cayley error = %g (relative to Delta)\n", maxycaylerr);
+#endif /* NPLOT */
+
+  free(rdata -> a);
+  free(rdata -> ap);
+  free(rdata -> alpha);
+  free(rdata -> beta);
+  free(rdata -> gamma);
+  free(rdata);
+
+  return EXIT_SUCCESS;
+}
+
+#endif /* TEST */
+
--- a/Grid/algorithms/approx/Zolotarev.h
+++ b/Grid/algorithms/approx/Zolotarev.h
@@ -0,0 +1,88 @@
+/* -*- Mode: C; comment-column: 22; fill-column: 79; -*- */
+
+#ifdef __cplusplus
+#include <Grid/Namespace.h>
+NAMESPACE_BEGIN(Grid);
+NAMESPACE_BEGIN(Approx);
+#endif
+
+#define HVERSION Header Time-stamp: <14-OCT-2004 09:26:51.00 adk@MISSCONTRARY>
+
+#ifndef ZOLOTAREV_INTERNAL
+#ifndef PRECISION
+#define PRECISION double
+#endif
+#define ZPRECISION PRECISION
+#define ZOLOTAREV_DATA zolotarev_data
+#endif
+
+/* This struct contains the coefficients which parameterise an optimal rational
+ * approximation to the signum function.
+ *
+ * The parameterisations used are:
+ *
+ * Factored form for type 0 (R0(0) = 0)
+ *
+ * R0(x) = A * x * prod(x^2 - a[j], j = 0 .. dn-1) / prod(x^2 - ap[j], j = 0
+ * .. dd-1),
+ *
+ * where deg_num = 2*dn + 1 and deg_denom = 2*dd.
+ *
+ * Factored form for type 1 (R1(0) = infinity)
+ *
+ * R1(x) = (A / x) * prod(x^2 - a[j], j = 0 .. dn-1) / prod(x^2 - ap[j], j = 0
+ * .. dd-1),
+ *
+ * where deg_num = 2*dn and deg_denom = 2*dd + 1. 
+ *
+ * Partial fraction form
+ *
+ * R(x) = alpha[da] * x + sum(alpha[j] * x / (x^2 - ap[j]), j = 0 .. da-1)
+ *
+ * where da = dd for type 0 and da = dd + 1 with ap[dd] = 0 for type 1.
+ *
+ * Continued fraction form 
+ *
+ * R(x) = beta[db-1] * x + 1 / (beta[db-2] * x + 1 / (beta[db-3] * x + ...))
+ *
+ * with the final coefficient being beta[0], with d' = 2 * dd + 1 for type 0
+ * and db = 2 * dd + 2 for type 1.
+ *
+ * Cayley form (Chiu's domain wall formulation)
+ *
+ * R(x) = (1 - T(x)) / (1 + T(x))
+ *
+ * where T(x) = prod((x - gamma[j]) / (x + gamma[j]), j = 0 .. n-1)
+ */
+
+typedef struct {
+  ZPRECISION *a,      /* zeros of numerator, a[0 .. dn-1] */
+    *ap,	      /* poles (zeros of denominator), ap[0 .. dd-1] */
+    A,		      /* overall factor */
+    *alpha,	      /* coefficients of partial fraction, alpha[0 .. da-1] */
+    *beta,	      /* coefficients of continued fraction, beta[0 .. db-1] */
+    *gamma,	      /* zeros of numerator of T in Cayley form */
+    Delta,	      /* maximum error, |R(x) - sgn(x)| <= Delta */
+    epsilon;	      /* minimum x value, epsilon < |x| < 1 */
+  int n,	      /* approximation degree */
+    type,	      /* 0: R(0) = 0, 1: R(0) = infinity */
+    dn, dd, da, db,   /* number of elements of a, ap, alpha, and beta */
+    deg_num,	      /* degree of numerator = deg_denom +/- 1 */
+    deg_denom;	      /* degree of denominator */
+} ZOLOTAREV_DATA;
+
+#ifndef ZOLOTAREV_INTERNAL
+
+/* zolotarev(epsilon, n, type) returns a pointer to an initialised
+ * zolotarev_data structure. The arguments must satisfy the constraints that
+ * epsilon > 0, n > 0, and type = 0 or 1. */
+
+ZOLOTAREV_DATA* higham(PRECISION epsilon, int n) ;
+ZOLOTAREV_DATA* zolotarev(PRECISION epsilon, int n, int type);
+void zolotarev_free(zolotarev_data *zdata);
+#endif
+
+#ifdef __cplusplus
+NAMESPACE_END(Approx);
+NAMESPACE_END(Grid);
+#endif
--- a/Grid/algorithms/approx/bigfloat.h
+++ b/Grid/algorithms/approx/bigfloat.h
@@ -0,0 +1,206 @@
+/*
+  Mike Clark - 25th May 2005
+
+  bigfloat.h
+
+  Simple C++ wrapper for multiprecision datatype used by AlgRemez
+  algorithm
+*/
+
+#ifndef INCLUDED_BIGFLOAT_H
+#define INCLUDED_BIGFLOAT_H
+
+
+#include <gmp.h>
+#include <mpf2mpfr.h>
+#include <mpfr.h>
+class bigfloat {
+
+private:
+
+  mpf_t x;
+
+public:
+
+  bigfloat() { mpf_init(x); }
+  bigfloat(const bigfloat& y) { mpf_init_set(x, y.x); }
+  bigfloat(const unsigned long u) { mpf_init_set_ui(x, u); }
+  bigfloat(const long i) { mpf_init_set_si(x, i); }
+  bigfloat(const int i) {mpf_init_set_si(x,(long)i);}
+  bigfloat(const float d) { mpf_init_set_d(x, (double)d); }
+  bigfloat(const double d) { mpf_init_set_d(x, d); }  
+  bigfloat(const char *str) { mpf_init_set_str(x, (char*)str, 10); }
+  ~bigfloat(void) { mpf_clear(x); }
+  operator double (void) const { return (double)mpf_get_d(x); }
+  static void setDefaultPrecision(unsigned long dprec) {
+    unsigned long bprec =  (unsigned long)(3.321928094 * (double)dprec);
+    mpf_set_default_prec(bprec);
+  }
+
+  void setPrecision(unsigned long dprec) {
+    unsigned long bprec =  (unsigned long)(3.321928094 * (double)dprec);
+    mpf_set_prec(x,bprec);
+  }
+  
+  unsigned long getPrecision(void) const { return mpf_get_prec(x); }
+
+  unsigned long getDefaultPrecision(void) const { return mpf_get_default_prec(); }
+
+  bigfloat& operator=(const bigfloat& y) {
+    mpf_set(x, y.x); 
+    return *this;
+  }
+
+  bigfloat& operator=(const unsigned long y) { 
+    mpf_set_ui(x, y);
+    return *this; 
+  }
+  
+  bigfloat& operator=(const signed long y) {
+    mpf_set_si(x, y); 
+    return *this;
+  }
+  
+  bigfloat& operator=(const float y) {
+    mpf_set_d(x, (double)y); 
+    return *this;
+  }
+
+  bigfloat& operator=(const double y) {
+    mpf_set_d(x, y); 
+    return *this;
+  }
+
+  size_t write(void);
+  size_t read(void);
+
+  /* Arithmetic Functions */
+
+  bigfloat& operator+=(const bigfloat& y) { return *this = *this + y; }
+  bigfloat& operator-=(const bigfloat& y) { return *this = *this - y; }
+  bigfloat& operator*=(const bigfloat& y) { return *this = *this * y; }
+  bigfloat& operator/=(const bigfloat& y) { return *this = *this / y; }
+
+  friend bigfloat operator+(const bigfloat& x, const bigfloat& y) {
+    bigfloat a;
+    mpf_add(a.x,x.x,y.x);
+    return a;
+  }
+
+  friend bigfloat operator+(const bigfloat& x, const unsigned long y) {
+    bigfloat a;
+    mpf_add_ui(a.x,x.x,y);
+    return a;
+  }
+
+  friend bigfloat operator-(const bigfloat& x, const bigfloat& y) {
+    bigfloat a;
+    mpf_sub(a.x,x.x,y.x);
+    return a;
+  }
+  
+  friend bigfloat operator-(const unsigned long x, const bigfloat& y) {
+    bigfloat a;
+    mpf_ui_sub(a.x,x,y.x);
+    return a;
+  }
+  
+  friend bigfloat operator-(const bigfloat& x, const unsigned long y) {
+    bigfloat a;
+    mpf_sub_ui(a.x,x.x,y);
+    return a;
+  }
+
+  friend bigfloat operator-(const bigfloat& x) {
+    bigfloat a;
+    mpf_neg(a.x,x.x);
+    return a;
+  }
+
+  friend bigfloat operator*(const bigfloat& x, const bigfloat& y) {
+    bigfloat a;
+    mpf_mul(a.x,x.x,y.x);
+    return a;
+  }
+
+  friend bigfloat operator*(const bigfloat& x, const unsigned long y) {
+    bigfloat a;
+    mpf_mul_ui(a.x,x.x,y);
+    return a;
+  }
+
+  friend bigfloat operator/(const bigfloat& x, const bigfloat& y){
+    bigfloat a;
+    mpf_div(a.x,x.x,y.x);
+    return a;
+  }
+
+  friend bigfloat operator/(const unsigned long x, const bigfloat& y){
+    bigfloat a;
+    mpf_ui_div(a.x,x,y.x);
+    return a;
+  }
+
+  friend bigfloat operator/(const bigfloat& x, const unsigned long y){
+    bigfloat a;
+    mpf_div_ui(a.x,x.x,y);
+    return a;
+  }
+
+  friend bigfloat sqrt_bf(const bigfloat& x){
+    bigfloat a;
+    mpf_sqrt(a.x,x.x);
+    return a;
+  }
+
+  friend bigfloat sqrt_bf(const unsigned long x){
+    bigfloat a;
+    mpf_sqrt_ui(a.x,x);
+    return a;
+  }
+
+  friend bigfloat abs_bf(const bigfloat& x){
+    bigfloat a;
+    mpf_abs(a.x,x.x);
+    return a;
+  }
+
+  friend bigfloat pow_bf(const bigfloat& a, long power) {
+    bigfloat b;
+    mpf_pow_ui(b.x,a.x,power);
+    return b;
+  }
+
+  friend bigfloat pow_bf(const bigfloat& a, bigfloat &power) {
+    bigfloat b;
+    mpfr_pow(b.x,a.x,power.x,GMP_RNDN);
+    return b;
+  }
+
+  friend bigfloat exp_bf(const bigfloat& a) {
+    bigfloat b;
+    mpfr_exp(b.x,a.x,GMP_RNDN);
+    return b;
+  }
+
+  /* Comparison Functions */
+
+  friend int operator>(const bigfloat& x, const bigfloat& y) {
+    int test;
+    test = mpf_cmp(x.x,y.x);
+    if (test > 0) return 1;
+    else return 0;
+  }
+
+  friend int operator<(const bigfloat& x, const bigfloat& y) {
+    int test;
+    test = mpf_cmp(x.x,y.x);
+    if (test < 0) return 1;
+    else return 0;
+  }
+
+  friend int sgn(const bigfloat&);
+
+};
+
+#endif
--- a/Grid/algorithms/approx/bigfloat_double.h
+++ b/Grid/algorithms/approx/bigfloat_double.h
@@ -0,0 +1,189 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/algorithms/approx/bigfloat_double.h
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <math.h>
+
+typedef double mfloat; 
+class bigfloat {
+private:
+
+  mfloat x;
+
+public:
+
+  bigfloat() { }
+  bigfloat(const bigfloat& y) { x=y.x; }
+  bigfloat(const unsigned long u) { x=u; }
+  bigfloat(const long i) { x=i; }
+  bigfloat(const int i) { x=i;}
+  bigfloat(const float d) { x=d;}
+  bigfloat(const double d) {  x=d;}
+  bigfloat(const char *str) { x=std::stod(std::string(str));}
+  ~bigfloat(void) { }
+  operator double (void) const { return (double)x; }
+  static void setDefaultPrecision(unsigned long dprec) {
+  }
+
+  void setPrecision(unsigned long dprec) {
+  }
+  
+  unsigned long getPrecision(void) const { return 64; }
+  unsigned long getDefaultPrecision(void) const { return 64; }
+
+  bigfloat& operator=(const bigfloat& y)     { x=y.x;    return *this;  }
+  bigfloat& operator=(const unsigned long y) { x=y; return *this; }
+  bigfloat& operator=(const signed long y)   { x=y; return *this; }
+  bigfloat& operator=(const float y)    { x=y; return *this; }
+  bigfloat& operator=(const double y)   { x=y; return *this; }
+
+  size_t write(void);
+  size_t read(void);
+
+  /* Arithmetic Functions */
+
+  bigfloat& operator+=(const bigfloat& y) { return *this = *this + y; }
+  bigfloat& operator-=(const bigfloat& y) { return *this = *this - y; }
+  bigfloat& operator*=(const bigfloat& y) { return *this = *this * y; }
+  bigfloat& operator/=(const bigfloat& y) { return *this = *this / y; }
+
+  friend bigfloat operator+(const bigfloat& x, const bigfloat& y) { 
+    bigfloat a;
+    a.x=x.x+y.x;
+    return a;
+  }
+
+  friend bigfloat operator+(const bigfloat& x, const unsigned long y) {
+    bigfloat a;
+    a.x=x.x+y;
+    return a;
+  }
+
+  friend bigfloat operator-(const bigfloat& x, const bigfloat& y) {
+    bigfloat a;
+    a.x=x.x-y.x;
+    return a;
+  }
+  
+  friend bigfloat operator-(const unsigned long x, const bigfloat& y) {
+    bigfloat bx(x);
+    return bx-y;
+  }
+  
+  friend bigfloat operator-(const bigfloat& x, const unsigned long y) {
+    bigfloat by(y);
+    return x-by;
+  }
+
+  friend bigfloat operator-(const bigfloat& x) {
+    bigfloat a;
+    a.x=-x.x;
+    return a;
+  }
+
+  friend bigfloat operator*(const bigfloat& x, const bigfloat& y) {
+    bigfloat a;
+    a.x=x.x*y.x;
+    return a;
+  }
+
+  friend bigfloat operator*(const bigfloat& x, const unsigned long y) {
+    bigfloat a;
+    a.x=x.x*y;
+    return a;
+  }
+
+  friend bigfloat operator/(const bigfloat& x, const bigfloat& y){
+    bigfloat a;
+    a.x=x.x/y.x;
+    return a;
+  }
+
+  friend bigfloat operator/(const unsigned long x, const bigfloat& y){
+    bigfloat bx(x);
+    return bx/y;
+  }
+
+  friend bigfloat operator/(const bigfloat& x, const unsigned long y){
+    bigfloat by(y);
+    return x/by;
+  }
+
+  friend bigfloat sqrt_bf(const bigfloat& x){
+    bigfloat a;
+    a.x= sqrt(x.x);
+    return a;
+  }
+
+  friend bigfloat sqrt_bf(const unsigned long x){
+    bigfloat a(x);
+    return sqrt_bf(a);
+  }
+
+  friend bigfloat abs_bf(const bigfloat& x){
+    bigfloat a;
+    a.x=fabs(x.x);
+    return a;
+  }
+
+  friend bigfloat pow_bf(const bigfloat& a, long power) {
+    bigfloat b;
+    b.x=pow(a.x,power);
+    return b;
+  }
+
+  friend bigfloat pow_bf(const bigfloat& a, bigfloat &power) {
+    bigfloat b;
+    b.x=pow(a.x,power.x);
+    return b;
+  }
+
+  friend bigfloat exp_bf(const bigfloat& a) {
+    bigfloat b;
+    b.x=exp(a.x);
+    return b;
+  }
+
+  /* Comparison Functions */
+  friend int operator>(const bigfloat& x, const bigfloat& y) {
+    return x.x>y.x;
+  }
+
+  friend int operator<(const bigfloat& x, const bigfloat& y) {
+    return x.x<y.x;
+  }
+
+  friend int sgn(const bigfloat& x) {
+    if ( x.x>=0 )  return 1;   
+    else return 0;
+  }
+
+  /* Miscellaneous Functions */
+
+  //  friend bigfloat& random(void);
+};
+
+