Merge branch 'master' of https://github.com/paboyle/Grid

2026-02-26 00:26:12 +00:00 · 2015-12-03 12:11:10 -05:00
parent ee9ecb6115 26161addd0
commit fb81acca3c
233 changed files with 33004 additions and 12555 deletions
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -12,9 +12,6 @@ namespace Grid {
    std::vector<int> directions   ;
    std::vector<int> displacements;

-    // FIXME -- don't like xposing the operator directions
-    // as different to the geometrical dirs
-    // Also don't like special casing five dim.. should pass an object in template
  Geometry(int _d)  {
  
      int base = (_d==5) ? 1:0;
@@ -35,12 +32,12 @@ namespace Grid {
      displacements[2*_d]=0;
      
      //// report back
-      std::cout<<"directions    :";
+      std::cout<<GridLogMessage<<"directions    :";
      for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
      std::cout <<std::endl;
-      std::cout<<"displacements :";
+      std::cout<<GridLogMessage<<"displacements :";
      for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
-      std::cout <<std::endl;
+      std::cout<<std::endl;
    }
  
    /*
@@ -64,6 +61,97 @@ namespace Grid {

  };
  
+  template<class Fobj,class CComplex,int nbasis>
+  class Aggregation   {
+  public:
+    typedef iVector<CComplex,nbasis >             siteVector;
+    typedef Lattice<siteVector>                 CoarseVector;
+    typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
+
+    typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field
+    typedef Lattice<Fobj >        FineField;
+
+    GridBase *CoarseGrid;
+    GridBase *FineGrid;
+    std::vector<Lattice<Fobj> > subspace;
+
+    Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid) : 
+      CoarseGrid(_CoarseGrid),
+      FineGrid(_FineGrid),
+      subspace(nbasis,_FineGrid)
+	{
+	};
+  
+    void Orthogonalise(void){
+      CoarseScalar InnerProd(CoarseGrid); 
+      blockOrthogonalise(InnerProd,subspace);
+    } 
+    void CheckOrthogonal(void){
+      CoarseVector iProj(CoarseGrid); 
+      CoarseVector eProj(CoarseGrid); 
+      Lattice<CComplex> pokey(CoarseGrid);
+
+      
+      for(int i=0;i<nbasis;i++){
+	blockProject(iProj,subspace[i],subspace);
+
+	eProj=zero; 
+	for(int ss=0;ss<CoarseGrid->oSites();ss++){
+	  eProj._odata[ss](i)=CComplex(1.0);
+	}
+	eProj=eProj - iProj;
+	std::cout<<GridLogMessage<<"Orthog check error "<<i<<" " << norm2(eProj)<<std::endl;
+      }
+      std::cout<<GridLogMessage <<"CheckOrthog done"<<std::endl;
+    }
+    void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){
+      blockProject(CoarseVec,FineVec,subspace);
+    }
+    void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
+      blockPromote(CoarseVec,FineVec,subspace);
+    }
+    void CreateSubspaceRandom(GridParallelRNG &RNG){
+      for(int i=0;i<nbasis;i++){
+	random(RNG,subspace[i]);
+	std::cout<<GridLogMessage<<" norm subspace["<<i<<"] "<<norm2(subspace[i])<<std::endl;
+      }
+      Orthogonalise();
+    }
+    virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
+
+      RealD scale;
+
+      ConjugateGradient<FineField> CG(1.0e-2,10000);
+      FineField noise(FineGrid);
+      FineField Mn(FineGrid);
+
+      for(int b=0;b<nn;b++){
+	
+	gaussian(RNG,noise);
+	scale = std::pow(norm2(noise),-0.5); 
+	noise=noise*scale;
+
+	hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise   ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
+
+	for(int i=0;i<1;i++){
+
+	  CG(hermop,noise,subspace[b]);
+
+	  noise = subspace[b];
+	  scale = std::pow(norm2(noise),-0.5); 
+	  noise=noise*scale;
+
+	}
+
+	hermop.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(Mn)<<std::endl;
+	subspace[b]   = noise;
+
+      }
+
+      Orthogonalise();
+
+    }
+  };
  // Fine Object == (per site) type of fine field
  // nbasis      == number of deflation vectors
  template<class Fobj,class CComplex,int nbasis>
@@ -82,7 +170,7 @@ namespace Grid {
    ////////////////////
    Geometry         geom;
    GridBase *       _grid; 
-    CartesianStencil Stencil; 
+    CartesianStencil<siteVector,siteVector,SimpleCompressor<siteVector> > Stencil; 

    std::vector<CoarseMatrix> A;

@@ -101,24 +189,22 @@ namespace Grid {
      SimpleCompressor<siteVector> compressor;
      Stencil.HaloExchange(in,comm_buf,compressor);

-      //PARALLEL_FOR_LOOP
+PARALLEL_FOR_LOOP
      for(int ss=0;ss<Grid()->oSites();ss++){
        siteVector res = zero;
 	siteVector nbr;
-	int offset,local,perm,ptype;
-
+	int ptype;
+	StencilEntry *SE;
 	for(int point=0;point<geom.npoint;point++){
-	  offset = Stencil._offsets [point][ss];
-	  local  = Stencil._is_local[point][ss];
-	  perm   = Stencil._permute [point][ss];
-	  ptype  = Stencil._permute_type[point];
+
+	  SE=Stencil.GetEntry(ptype,point,ss);
 	  
-	  if(local&&perm) { 
-	    permute(nbr,in._odata[offset],ptype);
-	  } else if(local) { 
-	    nbr = in._odata[offset];
+	  if(SE->_is_local&&SE->_permute) { 
+	    permute(nbr,in._odata[SE->_offset],ptype);
+	  } else if(SE->_is_local) { 
+	    nbr = in._odata[SE->_offset];
 	  } else {
-	    nbr = comm_buf[offset];
+	    nbr = comm_buf[SE->_offset];
 	  }
 	  res = res + A[point]._odata[ss]*nbr;
 	}
@@ -145,7 +231,8 @@ namespace Grid {
      comm_buf.resize(Stencil._unified_buffer_size);
    };

-    void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,std::vector<Lattice<Fobj> > & subspace){
+    void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
+			 Aggregation<Fobj,CComplex,nbasis> & Subspace){

      FineField iblock(FineGrid); // contributions from within this block
      FineField oblock(FineGrid); // contributions from outwith this block
@@ -162,8 +249,7 @@ namespace Grid {
      CoarseScalar InnerProd(Grid()); 

      // Orthogonalise the subblocks over the basis
-      blockOrthogonalise(InnerProd,subspace);
-      blockProject(iProj,subspace[0],subspace);
+      blockOrthogonalise(InnerProd,Subspace.subspace);

      // Compute the matrix elements of linop between this orthonormal
      // set of vectors.
@@ -177,7 +263,10 @@ namespace Grid {
      assert(self_stencil!=-1);

      for(int i=0;i<nbasis;i++){
-	phi=subspace[i];
+	phi=Subspace.subspace[i];
+	
+	std::cout<<GridLogMessage<<"("<<i<<").."<<std::endl;
+
 	for(int p=0;p<geom.npoint;p++){ 

 	  int dir   = geom.directions[p];
@@ -210,8 +299,11 @@ namespace Grid {
 	    assert(0);
 	  }

-	  blockProject(iProj,iblock,subspace);
-	  blockProject(oProj,oblock,subspace);
+	  Subspace.ProjectToSubspace(iProj,iblock);
+	  Subspace.ProjectToSubspace(oProj,oblock);
+	  //	  blockProject(iProj,iblock,Subspace.subspace);
+	  //	  blockProject(oProj,oblock,Subspace.subspace);
+PARALLEL_FOR_LOOP
 	  for(int ss=0;ss<Grid()->oSites();ss++){
 	    for(int j=0;j<nbasis;j++){
 	      if( disp!= 0 ) {
@@ -227,33 +319,33 @@ namespace Grid {
      ///////////////////////////
      // test code worth preserving in if block
      ///////////////////////////
-      std::cout<< " Computed matrix elements "<< self_stencil <<std::endl;
+      std::cout<<GridLogMessage<< " Computed matrix elements "<< self_stencil <<std::endl;
      for(int p=0;p<geom.npoint;p++){
-	std::cout<< "A["<<p<<"]" << std::endl;
-	std::cout<< A[p] << std::endl;
+	std::cout<<GridLogMessage<< "A["<<p<<"]" << std::endl;
+	std::cout<<GridLogMessage<< A[p] << std::endl;
      }
-      std::cout<< " picking by block0 "<< self_stencil <<std::endl;
+      std::cout<<GridLogMessage<< " picking by block0 "<< self_stencil <<std::endl;

-      phi=subspace[0];
+      phi=Subspace.subspace[0];
      std::vector<int> bc(FineGrid->_ndimension,0);

      blockPick(Grid(),phi,tmp,bc);      // Pick out a block
      linop.Op(tmp,Mphi);                // Apply big dop
-      blockProject(iProj,Mphi,subspace); // project it and print it
-      std::cout<< " Computed matrix elements from block zero only "<<std::endl;
-      std::cout<< iProj <<std::endl;
-      std::cout<<"Computed Coarse Operator"<<std::endl;
+      blockProject(iProj,Mphi,Subspace.subspace); // project it and print it
+      std::cout<<GridLogMessage<< " Computed matrix elements from block zero only "<<std::endl;
+      std::cout<<GridLogMessage<< iProj <<std::endl;
+      std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
 #endif
-      //      AssertHermitian();
      //      ForceHermitian();
-      //      ForceDiagonal();
+      AssertHermitian();
+      // ForceDiagonal();
    }
    void ForceDiagonal(void) {


-      std::cout<<"**************************************************"<<std::endl;
-      std::cout<<"****   Forcing coarse operator to be diagonal ****"<<std::endl;
-      std::cout<<"**************************************************"<<std::endl;
+      std::cout<<GridLogMessage<<"**************************************************"<<std::endl;
+      std::cout<<GridLogMessage<<"****   Forcing coarse operator to be diagonal ****"<<std::endl;
+      std::cout<<GridLogMessage<<"**************************************************"<<std::endl;
      for(int p=0;p<8;p++){
 	A[p]=zero;
      }
@@ -263,7 +355,7 @@ namespace Grid {

      Complex one(1.0);

-      iMatrix<Complex,nbasis> ident;  ident=one;
+      iMatrix<CComplex,nbasis> ident;  ident=one;

      val = val*adj(val);
      val = val + 1.0;
@@ -279,7 +371,7 @@ namespace Grid {
 	int dd=d+1;
 	A[2*d] = adj(Cshift(A[2*d+1],dd,1));
      }
-      A[8] = 0.5*(A[8] + adj(A[8]));
+      //      A[8] = 0.5*(A[8] + adj(A[8]));
    }
    void AssertHermitian(void) {
      CoarseMatrix AA    (Grid());
@@ -293,13 +385,13 @@ namespace Grid {
 	
 	Diff = AA - adj(AAc);

-	std::cout<<"Norm diff dim "<<d<<" "<< norm2(Diff)<<std::endl;
-	std::cout<<"Norm dim "<<d<<" "<< norm2(AA)<<std::endl;
+	std::cout<<GridLogMessage<<"Norm diff dim "<<d<<" "<< norm2(Diff)<<std::endl;
+	std::cout<<GridLogMessage<<"Norm dim "<<d<<" "<< norm2(AA)<<std::endl;
 	  
      }
      Diff = A[8] - adj(A[8]);
-      std::cout<<"Norm diff local "<< norm2(Diff)<<std::endl;
-      std::cout<<"Norm local "<< norm2(A[8])<<std::endl;
+      std::cout<<GridLogMessage<<"Norm diff local "<< norm2(Diff)<<std::endl;
+      std::cout<<GridLogMessage<<"Norm local "<< norm2(A[8])<<std::endl;
    }
    
  };
--- a/lib/algorithms/LinearOperator.h
+++ b/lib/algorithms/LinearOperator.h
@@ -71,6 +71,47 @@ namespace Grid {
      }
    };

+    ////////////////////////////////////////////////////////////////////
+    // Construct herm op and shift it for mgrid smoother
+    ////////////////////////////////////////////////////////////////////
+    template<class Matrix,class Field>
+    class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> {
+      Matrix &_Mat;
+      RealD _shift;
+    public:
+    ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
+      // Support for coarsening to a multigrid
+      void OpDiag (const Field &in, Field &out) {
+	_Mat.Mdiag(in,out);
+	assert(0);
+      }
+      void OpDir  (const Field &in, Field &out,int dir,int disp) {
+	_Mat.Mdir(in,out,dir,disp);
+	assert(0);
+      }
+      void Op     (const Field &in, Field &out){
+	_Mat.M(in,out);
+	assert(0);
+      }
+      void AdjOp     (const Field &in, Field &out){
+	_Mat.Mdag(in,out);
+	assert(0);
+      }
+      void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
+	_Mat.MdagM(in,out,n1,n2);
+	out = out + _shift*in;
+
+	ComplexD dot;	
+	dot= innerProduct(in,out);
+	n1=real(dot);
+	n2=norm2(out);
+      }
+      void HermOp(const Field &in, Field &out){
+	RealD n1,n2;
+	HermOpAndNorm(in,out,n1,n2);
+      }
+    };
+
    ////////////////////////////////////////////////////////////////////
    // Wrap an already herm matrix
    ////////////////////////////////////////////////////////////////////
@@ -147,6 +188,7 @@ namespace Grid {
    };
    template<class Matrix,class Field>
      class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> {
+    protected:
      Matrix &_Mat;
    public:
      SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
@@ -173,6 +215,7 @@ namespace Grid {
    };
    template<class Matrix,class Field>
      class SchurDiagOneOperator :  public SchurOperatorBase<Field> {
+    protected:
      Matrix &_Mat;
    public:
      SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
@@ -199,6 +242,7 @@ namespace Grid {
      }
    };

+
    /////////////////////////////////////////////////////////////
    // Base classes for functions of operators
    /////////////////////////////////////////////////////////////
@@ -207,6 +251,11 @@ namespace Grid {
      virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
    };

+    template<class Field> class LinearFunction {
+    public:
+      virtual void operator() (const Field &in, Field &out) = 0;
+    };
+
    /////////////////////////////////////////////////////////////
    // Base classes for Multishift solvers for operators
    /////////////////////////////////////////////////////////////
--- a/lib/algorithms/Preconditioner.h
+++ b/lib/algorithms/Preconditioner.h
@@ -0,0 +1,19 @@
+#ifndef GRID_PRECONDITIONER_H
+#define GRID_PRECONDITIONER_H
+
+namespace Grid {
+
+  template<class Field> class Preconditioner :  public LinearFunction<Field> { 
+    virtual void operator()(const Field &src, Field & psi)=0;
+  };
+
+  template<class Field> class TrivialPrecon :  public Preconditioner<Field> { 
+  public:
+    void operator()(const Field &src, Field & psi){
+      psi = src;
+    }
+    TrivialPrecon(void){};
+  };
+
+}
+#endif
--- a/lib/algorithms/approx/Chebyshev.h
+++ b/lib/algorithms/approx/Chebyshev.h
@@ -9,23 +9,34 @@ namespace Grid {
  ////////////////////////////////////////////////////////////////////////////////////////////
  // Simple general polynomial with user supplied coefficients
  ////////////////////////////////////////////////////////////////////////////////////////////
+  template<class Field>
+  class HermOpOperatorFunction : public OperatorFunction<Field> {
+    void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
+      Linop.HermOp(in,out);
+    };
+  };
+
  template<class Field>
  class Polynomial : public OperatorFunction<Field> {
  private:
-    std::vector<double> Coeffs;
+    std::vector<RealD> Coeffs;
  public:
-    Polynomial(std::vector<double> &_Coeffs) : Coeffs(_Coeffs) {};
+    Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { };

    // Implement the required interface
    void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {

-      Field AtoN = in;
+      Field AtoN(in._grid);
+      Field Mtmp(in._grid);
+      AtoN = in;
      out = AtoN*Coeffs[0];
-
+      //      std::cout <<"Poly in " <<norm2(in)<<std::endl;
+      //      std::cout <<"0 " <<norm2(out)<<std::endl;
      for(int n=1;n<Coeffs.size();n++){
-	Field Mtmp=AtoN;
-	Linop.Op(Mtmp,AtoN);
+	Mtmp = AtoN;
+	Linop.HermOp(Mtmp,AtoN);
 	out=out+AtoN*Coeffs[n];
+	//	std::cout << n<<" " <<norm2(out)<<std::endl;
      }
    };
  };
@@ -36,21 +47,36 @@ namespace Grid {
  template<class Field>
  class Chebyshev : public OperatorFunction<Field> {
  private:
-    std::vector<double> Coeffs;
+    std::vector<RealD> Coeffs;
    int order;
-    double hi;
-    double lo;
+    RealD hi;
+    RealD lo;

  public:
    void csv(std::ostream &out){
-      for (double x=lo; x<hi; x+=(hi-lo)/1000) {
-	double f = approx(x);
+      for (RealD x=lo; x<hi; x+=(hi-lo)/1000) {
+	RealD f = approx(x);
 	out<< x<<" "<<f<<std::endl;
      }
      return;
    }

-    Chebyshev(double _lo,double _hi,int _order, double (* func)(double) ){
+    // Convenience for plotting the approximation
+    void   PlotApprox(std::ostream &out) {
+      out<<"Polynomial approx ["<<lo<<","<<hi<<"]"<<std::endl;
+      for(RealD x=lo;x<hi;x+=(hi-lo)/50.0){
+	out <<x<<"\t"<<approx(x)<<std::endl;
+      }
+    };
+
+    Chebyshev(){};
+    Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
+    
+    ////////////////////////////////////////////////////////////////////////////////////////////////////
+    // c.f. numerical recipes "chebft"/"chebev". This is sec 5.8 "Chebyshev approximation".
+    ////////////////////////////////////////////////////////////////////////////////////////////////////
+    void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
+    {
      lo=_lo;
      hi=_hi;
      order=_order;
@@ -58,29 +84,58 @@ namespace Grid {
      if(order < 2) exit(-1);
      Coeffs.resize(order);
      for(int j=0;j<order;j++){
-	double s=0;
+	RealD s=0;
 	for(int k=0;k<order;k++){
-	  double y=std::cos(M_PI*(k+0.5)/order);
-	  double x=0.5*(y*(hi-lo)+(hi+lo));
-	  double f=func(x);
+	  RealD y=std::cos(M_PI*(k+0.5)/order);
+	  RealD x=0.5*(y*(hi-lo)+(hi+lo));
+	  RealD f=func(x);
 	  s=s+f*std::cos( j*M_PI*(k+0.5)/order );
 	}
 	Coeffs[j] = s * 2.0/order;
      }
    };

-    double approx(double x) // Convenience for plotting the approximation
+    
+    void JacksonSmooth(void){
+      RealD M=order;
+      RealD alpha = M_PI/(M+2);
+      RealD lmax = std::cos(alpha);
+      RealD sumUsq =0;
+      std::vector<RealD> U(M);
+      std::vector<RealD> a(M);
+      std::vector<RealD> g(M);
+      for(int n=0;n<=M;n++){
+	U[n] = std::sin((n+1)*std::acos(lmax))/std::sin(std::acos(lmax));
+	sumUsq += U[n]*U[n];
+      }      
+      sumUsq = std::sqrt(sumUsq);
+
+      for(int i=1;i<=M;i++){
+	a[i] = U[i]/sumUsq;
+      }
+      g[0] = 1.0;
+      for(int m=1;m<=M;m++){
+	g[m] = 0;
+	for(int i=0;i<=M-m;i++){
+	  g[m]+= a[i]*a[m+i];
+	}
+      }
+      for(int m=1;m<=M;m++){
+	Coeffs[m]*=g[m];
+      }
+    }
+    RealD approx(RealD x) // Convenience for plotting the approximation
    {
-      double Tn;
-      double Tnm;
-      double Tnp;
+      RealD Tn;
+      RealD Tnm;
+      RealD Tnp;
      
-      double y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
+      RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
      
-      double T0=1;
-      double T1=y;
+      RealD T0=1;
+      RealD T1=y;
      
-      double sum;
+      RealD sum;
      sum = 0.5*Coeffs[0]*T0;
      sum+= Coeffs[1]*T1;
      
@@ -95,46 +150,38 @@ namespace Grid {
      return sum;
    };

-    // Convenience for plotting the approximation
-    void   PlotApprox(std::ostream &out) {
-      out<<"Polynomial approx ["<<lo<<","<<hi<<"]"<<std::endl;
-      for(double x=lo;x<hi;x+=(hi-lo)/50.0){
-	out <<x<<"\t"<<approx(x)<<std::endl;
-      }
-    };
-
-    // Implement the required interface; could require Lattice base class
+    // Implement the required interface
    void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {

-      Field T0 = in;
-      Field T1 = T0; // Field T1(T0._grid); more efficient but hardwires Lattice class
-      Field T2 = T1;
+      GridBase *grid=in._grid;
+
+      int vol=grid->gSites();
+
+      Field T0(grid); T0 = in;  
+      Field T1(grid); 
+      Field T2(grid);
+      Field y(grid);
      
-      // use a pointer trick to eliminate copies
      Field *Tnm = &T0;
      Field *Tn  = &T1;
      Field *Tnp = &T2;
-      Field y   = in;
-  
-      double xscale = 2.0/(hi-lo);
-      double mscale = -(hi+lo)/(hi-lo);

      // Tn=T1 = (xscale M + mscale)in
-      Linop.Op(T0,y);
-
+      RealD xscale = 2.0/(hi-lo);
+      RealD mscale = -(hi+lo)/(hi-lo);
+      Linop.HermOp(T0,y);
      T1=y*xscale+in*mscale;

      // sum = .5 c[0] T0 + c[1] T1
      out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
-
      for(int n=2;n<order;n++){
 	
-	Linop.Op(*Tn,y);
+	Linop.HermOp(*Tn,y);

 	y=xscale*y+mscale*(*Tn);

 	*Tnp=2.0*y-(*Tnm);
-	
+
 	out=out+Coeffs[n]* (*Tnp);

 	// Cycle pointers to avoid copies
@@ -148,5 +195,121 @@ namespace Grid {
  };


+  template<class Field>
+  class ChebyshevLanczos : public Chebyshev<Field> {
+  private:
+    std::vector<RealD> Coeffs;
+    int order;
+    RealD alpha;
+    RealD beta;
+    RealD mu;
+
+  public:
+    ChebyshevLanczos(RealD _alpha,RealD _beta,RealD _mu,int _order) :
+    alpha(_alpha),
+      beta(_beta),
+          mu(_mu)
+    {
+      order=_order;
+      Coeffs.resize(order);
+      for(int i=0;i<_order;i++){
+	Coeffs[i] = 0.0;
+      }
+      Coeffs[order-1]=1.0;
+    };
+
+    void csv(std::ostream &out){
+      for (RealD x=-1.2*alpha; x<1.2*alpha; x+=(2.0*alpha)/10000) {
+	RealD f = approx(x);
+	out<< x<<" "<<f<<std::endl;
+      }
+      return;
+    }
+
+    RealD approx(RealD xx) // Convenience for plotting the approximation
+    {
+      RealD Tn;
+      RealD Tnm;
+      RealD Tnp;
+      Real aa = alpha * alpha;
+      Real bb = beta  *  beta;
+      
+      RealD x = ( 2.0 * (xx-mu)*(xx-mu) - (aa+bb) ) / (aa-bb);
+
+      RealD y= x;
+      
+      RealD T0=1;
+      RealD T1=y;
+      
+      RealD sum;
+      sum = 0.5*Coeffs[0]*T0;
+      sum+= Coeffs[1]*T1;
+      
+      Tn =T1;
+      Tnm=T0;
+      for(int i=2;i<order;i++){
+	Tnp=2*y*Tn-Tnm;
+	Tnm=Tn;
+	Tn =Tnp;
+	sum+= Tn*Coeffs[i];
+      }
+      return sum;
+    };
+
+    // shift_Multiply in Rudy's code
+    void AminusMuSq(LinearOperatorBase<Field> &Linop, const Field &in, Field &out) 
+    {
+      GridBase *grid=in._grid;
+      Field tmp(grid);
+
+      RealD aa= alpha*alpha;
+      RealD bb= beta * beta;
+
+      Linop.HermOp(in,out);
+      out = out - mu*in;
+
+      Linop.HermOp(out,tmp);
+      tmp = tmp - mu * out;
+
+      out = (2.0/ (aa-bb) ) * tmp -  ((aa+bb)/(aa-bb))*in;
+    };
+    // Implement the required interface
+    void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
+
+      GridBase *grid=in._grid;
+
+      int vol=grid->gSites();
+
+      Field T0(grid); T0 = in;  
+      Field T1(grid); 
+      Field T2(grid);
+      Field  y(grid);
+      
+      Field *Tnm = &T0;
+      Field *Tn  = &T1;
+      Field *Tnp = &T2;
+
+      // Tn=T1 = (xscale M )*in
+      AminusMuSq(Linop,T0,T1);
+
+      // sum = .5 c[0] T0 + c[1] T1
+      out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
+      for(int n=2;n<order;n++){
+	
+	AminusMuSq(Linop,*Tn,y);
+
+	*Tnp=2.0*y-(*Tnm);
+
+	out=out+Coeffs[n]* (*Tnp);
+
+	// Cycle pointers to avoid copies
+	Field *swizzle = Tnm;
+	Tnm    =Tn;
+	Tn     =Tnp;
+	Tnp    =swizzle;
+	  
+      }
+    }
+  };
 }
 #endif
--- a/lib/algorithms/approx/MultiShiftFunction.h
+++ b/lib/algorithms/approx/MultiShiftFunction.h
@@ -1,6 +1,8 @@
 #ifndef MULTI_SHIFT_FUNCTION
 #define MULTI_SHIFT_FUNCTION
+
 namespace Grid {
+
 class MultiShiftFunction {
 public:
  int order;
@@ -9,20 +11,29 @@ public:
  std::vector<RealD> tolerances;
  RealD norm;
  RealD lo,hi;
+
  MultiShiftFunction(int n,RealD _lo,RealD _hi): poles(n), residues(n), lo(_lo), hi(_hi) {;};
  RealD approx(RealD x);
  void csv(std::ostream &out);
  void gnuplot(std::ostream &out);
-  MultiShiftFunction(AlgRemez & remez,double tol,bool inverse) :
-      order(remez.getDegree()),
-      tolerances(remez.getDegree(),tol),
-      poles(remez.getDegree()),
-      residues(remez.getDegree())
+
+  void Init(AlgRemez & remez,double tol,bool inverse) 
  {
+    order=remez.getDegree();
+    tolerances.resize(remez.getDegree(),tol);
+    poles.resize(remez.getDegree());
+    residues.resize(remez.getDegree());
    remez.getBounds(lo,hi);
    if ( inverse ) remez.getIPFE (&residues[0],&poles[0],&norm);
-    else remez.getPFE (&residues[0],&poles[0],&norm);
+    else           remez.getPFE (&residues[0],&poles[0],&norm);
  }
+  // Allow deferred initialisation
+  MultiShiftFunction(void){};
+  MultiShiftFunction(AlgRemez & remez,double tol,bool inverse)
+  {
+    Init(remez,tol,inverse);
+  }
+
 };
 }
 #endif
--- a/lib/algorithms/approx/Remez.cc
+++ b/lib/algorithms/approx/Remez.cc
@@ -758,3 +758,4 @@ void AlgRemez::csv(std::ostream & os)
  }
  return;
 }
+
--- a/lib/algorithms/approx/Remez.h
+++ b/lib/algorithms/approx/Remez.h
@@ -15,7 +15,10 @@
 #ifndef INCLUDED_ALG_REMEZ_H
 #define INCLUDED_ALG_REMEZ_H

-#include <algorithms/approx/bigfloat.h>
+#include <stddef.h>
+
+//#include <algorithms/approx/bigfloat.h>
+#include <algorithms/approx/bigfloat_double.h>

 #define JMAX 10000 //Maximum number of iterations of Newton's approximation
 #define SUM_MAX 10 // Maximum number of terms in exponential
@@ -28,6 +31,7 @@
  remez.getIPFE(res,pole,&norm);
  remez.csv(ostream &os);
 */
+
 class AlgRemez
 {
 private:
--- a/lib/algorithms/iterative/AdefGeneric.h
+++ b/lib/algorithms/iterative/AdefGeneric.h
@@ -0,0 +1,370 @@
+#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
+#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
+
+  /*
+   * Compared to Tang-2009:  P=Pleft. P^T = PRight Q=MssInv. 
+   * Script A = SolverMatrix 
+   * Script P = Preconditioner
+   *
+   * Deflation methods considered
+   *      -- Solve P A x = P b        [ like Luscher ]
+   * DEF-1        M P A x = M P b     [i.e. left precon]
+   * DEF-2        P^T M A x = P^T M b
+   * ADEF-1       Preconditioner = M P + Q      [ Q + M + M A Q]
+   * ADEF-2       Preconditioner = P^T M + Q
+   * BNN          Preconditioner = P^T M P + Q
+   * BNN2         Preconditioner = M P + P^TM +Q - M P A M 
+   * 
+   * Implement ADEF-2
+   *
+   * Vstart = P^Tx + Qb
+   * M1 = P^TM + Q
+   * M2=M3=1
+   * Vout = x
+   */
+
+// abstract base
+template<class Field, class CoarseField>
+class TwoLevelFlexiblePcg : public LinearFunction<Field>
+{
+ public:
+  int verbose;
+  RealD   Tolerance;
+  Integer MaxIterations;
+  const int mmax = 5;
+  GridBase *grid;
+  GridBase *coarsegrid;
+
+  LinearOperatorBase<Field>   *_Linop
+  OperatorFunction<Field>     *_Smoother,
+  LinearFunction<CoarseField> *_CoarseSolver;
+
+  // Need somthing that knows how to get from Coarse to fine and back again
+  
+  // more most opertor functions
+  TwoLevelFlexiblePcg(RealD tol,
+		     Integer maxit,
+		     LinearOperatorBase<Field> *Linop,
+		     LinearOperatorBase<Field> *SmootherLinop,
+		     OperatorFunction<Field>   *Smoother,
+		     OperatorFunction<CoarseField>  CoarseLinop
+		     ) : 
+      Tolerance(tol), 
+      MaxIterations(maxit),
+      _Linop(Linop),
+      _PreconditionerLinop(PrecLinop),
+      _Preconditioner(Preconditioner)
+  { 
+    verbose=0;
+  };
+
+  // The Pcg routine is common to all, but the various matrices differ from derived 
+  // implementation to derived implmentation
+  void operator() (const Field &src, Field &psi){
+  void operator() (const Field &src, Field &psi){
+
+    psi.checkerboard = src.checkerboard;
+    grid             = src._grid;
+
+    RealD f;
+    RealD rtzp,rtz,a,d,b;
+    RealD rptzp;
+    RealD tn;
+    RealD guess = norm2(psi);
+    RealD ssq   = norm2(src);
+    RealD rsq   = ssq*Tolerance*Tolerance;
+    
+    /////////////////////////////
+    // Set up history vectors
+    /////////////////////////////
+    std::vector<Field> p  (mmax,grid);
+    std::vector<Field> mmp(mmax,grid);
+    std::vector<RealD> pAp(mmax);
+
+    Field x  (grid); x = psi;
+    Field z  (grid);
+    Field tmp(grid);
+    Field r  (grid);
+    Field mu (grid);
+  
+    //////////////////////////
+    // x0 = Vstart -- possibly modify guess
+    //////////////////////////
+    x=src;
+    Vstart(x,src);
+
+    // r0 = b -A x0
+    HermOp(x,mmp); // Shouldn't this be something else?
+    axpy (r, -1.0,mmp[0], src);    // Recomputes r=src-Ax0
+
+    //////////////////////////////////
+    // Compute z = M1 x
+    //////////////////////////////////
+    M1(r,z,tmp,mp,SmootherMirs);
+    rtzp =real(innerProduct(r,z));
+
+    ///////////////////////////////////////
+    // Solve for Mss mu = P A z and set p = z-mu
+    // Def2: p = 1 - Q Az = Pright z 
+    // Other algos M2 is trivial
+    ///////////////////////////////////////
+    M2(z,p[0]);
+
+    for (int k=0;k<=MaxIterations;k++){
+    
+      int peri_k  = k % mmax;
+      int peri_kp = (k+1) % mmax;
+
+      rtz=rtzp;
+      d= M3(p[peri_k],mp,mmp[peri_k],tmp);
+      a = rtz/d;
+    
+      // Memorise this
+      pAp[peri_k] = d;
+
+      axpy(x,a,p[peri_k],x);
+      RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
+
+      // Compute z = M x
+      M1(r,z,tmp,mp);
+
+      rtzp =real(innerProduct(r,z));
+
+      M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
+
+      p[peri_kp]=p[peri_k];
+
+      // Standard search direction  p -> z + b p    ; b = 
+      b = (rtzp)/rtz;
+
+      int northog;
+      //    northog     = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm
+      northog     = (k>mmax-1)?(mmax-1):k;        // This is the fCG-Tr(mmax-1) algorithm
+    
+      for(int back=0; back < northog; back++){
+	int peri_back = (k-back)%mmax;
+	RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp]));
+	RealD beta = -pbApk/pAp[peri_back];
+	axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]);
+      }
+
+      RealD rrn=sqrt(rn/ssq);
+      std::cout<<GridLogMessage<<"TwoLevelfPcg: k= "<<k<<" residual = "<<rrn<<std::endl;
+
+      // Stopping condition
+      if ( rn <= rsq ) { 
+
+	HermOp(x,mmp); // Shouldn't this be something else?
+	axpy(tmp,-1.0,src,mmp[0]);
+	
+	RealD psinorm = sqrt(norm2(x));
+	RealD srcnorm = sqrt(norm2(src));
+	RealD tmpnorm = sqrt(norm2(tmp));
+	RealD true_residual = tmpnorm/srcnorm;
+	std::cout<<GridLogMessage<<"TwoLevelfPcg:   true residual is "<<true_residual<<std::endl;
+	std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
+	return k;
+      }
+    }
+    // Non-convergence
+    assert(0);
+  }
+
+ public:
+
+  virtual void M(Field & in,Field & out,Field & tmp) {
+
+  }
+
+  virtual void M1(Field & in, Field & out) {// the smoother
+
+    // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
+    Field tmp(grid);
+    Field Min(grid);
+
+    PcgM(in,Min); // Smoother call
+
+    HermOp(Min,out);
+    axpy(tmp,-1.0,out,in);          // tmp  = in - A Min
+
+    ProjectToSubspace(tmp,PleftProj);     
+    ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
+    PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]  
+    axpy(out,1.0,Min,tmp); // Min+tmp
+  }
+
+  virtual void M2(const Field & in, Field & out) {
+    out=in;
+    // Must override for Def2 only
+    //  case PcgDef2:
+    //    Pright(in,out);
+    //    break;
+  }
+
+  virtual RealD M3(const Field & p, Field & mmp){
+    double d,dd;
+    HermOpAndNorm(p,mmp,d,dd);
+    return dd;
+    // Must override for Def1 only
+    //  case PcgDef1:
+    //    d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no
+    //      linop_d->Mprec(mmp,mp,tmp,1);// Dag yes
+    //    Pleft(mp,mmp);
+    //    d=real(linop_d->inner(p,mmp));
+  }
+
+  virtual void VstartDef2(Field & xconst Field & src){
+    //case PcgDef2:
+    //case PcgAdef2: 
+    //case PcgAdef2f:
+    //case PcgV11f:
+    ///////////////////////////////////
+    // Choose x_0 such that 
+    // x_0 = guess +  (A_ss^inv) r_s = guess + Ass_inv [src -Aguess]
+    //                               = [1 - Ass_inv A] Guess + Assinv src
+    //                               = P^T guess + Assinv src 
+    //                               = Vstart  [Tang notation]
+    // This gives:
+    // W^T (src - A x_0) = src_s - A guess_s - r_s
+    //                   = src_s - (A guess)_s - src_s  + (A guess)_s 
+    //                   = 0 
+    ///////////////////////////////////
+    Field r(grid);
+    Field mmp(grid);
+    
+    HermOp(x,mmp);
+    axpy (r, -1.0, mmp, src);        // r_{-1} = src - A x
+    ProjectToSubspace(r,PleftProj);     
+    ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s
+    PromoteFromSubspace(PleftMss_proj,mmp);  
+    x=x+mmp;
+
+  }
+
+  virtual void Vstart(Field & x,const Field & src){
+    return;
+  }
+
+  /////////////////////////////////////////////////////////////////////
+  // Only Def1 has non-trivial Vout. Override in Def1
+  /////////////////////////////////////////////////////////////////////
+  virtual void   Vout  (Field & in, Field & out,Field & src){
+    out = in;
+    //case PcgDef1:
+    //    //Qb + PT x
+    //    ProjectToSubspace(src,PleftProj);     
+    //    ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s
+    //    PromoteFromSubspace(PleftMss_proj,tmp);  
+    //    
+    //    Pright(in,out);
+    //    
+    //    linop_d->axpy(out,tmp,out,1.0);
+    //    break;
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////////////////////
+  // Pright and Pleft are common to all implementations
+  ////////////////////////////////////////////////////////////////////////////////////////////////
+  virtual void Pright(Field & in,Field & out){
+    // P_R  = [ 1              0 ] 
+    //        [ -Mss^-1 Msb    0 ] 
+    Field in_sbar(grid);
+
+    ProjectToSubspace(in,PleftProj);     
+    PromoteFromSubspace(PleftProj,out);  
+    axpy(in_sbar,-1.0,out,in);       // in_sbar = in - in_s 
+
+    HermOp(in_sbar,out);
+    ProjectToSubspace(out,PleftProj);           // Mssbar in_sbar  (project)
+
+    ApplyInverse     (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar 
+    PromoteFromSubspace(PleftMss_proj,out);     // 
+
+    axpy(out,-1.0,out,in_sbar);     // in_sbar - Mss^{-1} Mssbar in_sbar
+  }
+  virtual void Pleft (Field & in,Field & out){
+    // P_L  = [ 1  -Mbs Mss^-1] 
+    //        [ 0   0         ] 
+    Field in_sbar(grid);
+    Field    tmp2(grid);
+    Field    Mtmp(grid);
+
+    ProjectToSubspace(in,PleftProj);     
+    PromoteFromSubspace(PleftProj,out);  
+    axpy(in_sbar,-1.0,out,in);      // in_sbar = in - in_s
+
+    ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s
+    PromoteFromSubspace(PleftMss_proj,out);
+
+    HermOp(out,Mtmp);
+
+    ProjectToSubspace(Mtmp,PleftProj);      // Msbar s Mss^{-1}
+    PromoteFromSubspace(PleftProj,tmp2);
+
+    axpy(out,-1.0,tmp2,Mtmp);
+    axpy(out,-1.0,out,in_sbar);     // in_sbar - Msbars Mss^{-1} in_s
+  }
+}
+
+template<class Field>
+class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> {
+ public:
+  virtual void M(Field & in,Field & out,Field & tmp){
+
+  } 
+  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){
+
+  }
+  virtual void M2(Field & in, Field & out){
+
+  }
+  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){
+
+  }
+  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){
+
+  }
+}
+/*
+template<class Field>
+class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> {
+ public:
+  virtual void M(Field & in,Field & out,Field & tmp); 
+  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
+  virtual void M2(Field & in, Field & out);
+  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
+  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
+}
+
+template<class Field>
+class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> {
+ public:
+  virtual void M(Field & in,Field & out,Field & tmp); 
+  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
+  virtual void M2(Field & in, Field & out);
+  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
+  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
+  virtual void   Vout  (Field & in, Field & out,Field & src,Field & tmp);
+}
+
+template<class Field>
+class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> {
+ public:
+  virtual void M(Field & in,Field & out,Field & tmp); 
+  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
+  virtual void M2(Field & in, Field & out);
+  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
+  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
+}
+
+template<class Field>
+class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> {
+ public:
+  virtual void M(Field & in,Field & out,Field & tmp); 
+  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
+  virtual void M2(Field & in, Field & out);
+  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
+  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
+}
+*/
+#endif
--- a/lib/algorithms/iterative/ConjugateGradient.h
+++ b/lib/algorithms/iterative/ConjugateGradient.h
@@ -13,9 +13,7 @@ namespace Grid {
 public:                                                
    RealD   Tolerance;
    Integer MaxIterations;
-    int verbose;
    ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) { 
-      verbose=1;
    };


@@ -42,14 +40,12 @@ public:
      cp =a;
      ssq=norm2(src);

-      if ( verbose ) {
-	std::cout <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
-	std::cout <<std::setprecision(4)<< "ConjugateGradient:   src "<<ssq  <<std::endl;
-	std::cout <<std::setprecision(4)<< "ConjugateGradient:    mp "<<d    <<std::endl;
-	std::cout <<std::setprecision(4)<< "ConjugateGradient:   mmp "<<b    <<std::endl;
-	std::cout <<std::setprecision(4)<< "ConjugateGradient:  cp,r "<<cp   <<std::endl;
-	std::cout <<std::setprecision(4)<< "ConjugateGradient:     p "<<a    <<std::endl;
-      }
+      std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
+      std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:   src "<<ssq  <<std::endl;
+      std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:    mp "<<d    <<std::endl;
+      std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:   mmp "<<b    <<std::endl;
+      std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:  cp,r "<<cp   <<std::endl;
+      std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:     p "<<a    <<std::endl;

      RealD rsq =  Tolerance* Tolerance*ssq;
      
@@ -58,7 +54,7 @@ public:
 	return;
      }
      
-      std::cout << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
+      std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
      
      int k;
      for (k=1;k<=MaxIterations;k++){
@@ -69,23 +65,19 @@ public:

 	RealD    qqck = norm2(mmp);
 	ComplexD dck  = innerProduct(p,mmp);
-	//	if (verbose) std::cout <<std::setprecision(4)<< "ConjugateGradient:  d,qq "<<d<< " "<<qq <<" qqcheck "<< qqck<< " dck "<< dck<<std::endl;
      
 	a      = c/d;
 	b_pred = a*(a*qq-d)/c;

-
-	//	if (verbose) std::cout <<std::setprecision(4)<< "ConjugateGradient:  a,bp "<<a<< " "<<b_pred <<std::endl;
 	cp = axpy_norm(r,-a,mmp,r);
 	b = cp/c;
-	//	std::cout <<std::setprecision(4)<< "ConjugateGradient:  cp,b "<<cp<< " "<<b <<std::endl;
 	
 	// Fuse these loops ; should be really easy
 	psi= a*p+psi;
 	p  = p*b+r;
 	  
-	if (verbose) std::cout<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
-
+	std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
+	
 	// Stopping condition
 	if ( cp <= rsq ) { 
 	  
@@ -98,13 +90,14 @@ public:
 	  RealD resnorm = sqrt(norm2(p));
 	  RealD true_residual = resnorm/srcnorm;

-	  std::cout<<"ConjugateGradient: Converged on iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
-	  std::cout<<"ConjugateGradient: true   residual  is "<<true_residual<<" sol "<<psinorm<<" src "<<srcnorm<<std::endl;
-	  std::cout<<"ConjugateGradient: target residual was "<<Tolerance<<std::endl;
+	  std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
+		   <<" computed residual "<<sqrt(cp/ssq)
+		   <<" true residual     "<<true_residual
+		   <<" target "<<Tolerance<<std::endl;
 	  return;
 	}
      }
-      std::cout<<"ConjugateGradient did NOT converge"<<std::endl;
+      std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
      assert(0);
    }
  };
--- a/lib/algorithms/iterative/ConjugateGradientMultiShift.h
+++ b/lib/algorithms/iterative/ConjugateGradientMultiShift.h
@@ -27,10 +27,14 @@ public:

 void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
 {
-
  GridBase *grid = src._grid;
  int nshift = shifts.order;
  std::vector<Field> results(nshift,grid);
+  (*this)(Linop,src,results,psi);
+}
+void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi)
+{
+  int nshift = shifts.order;

  (*this)(Linop,src,results);
  
@@ -91,7 +95,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
  cp = norm2(src);
  for(int s=0;s<nshift;s++){
    rsq[s] = cp * mresidual[s] * mresidual[s];
-    std::cout<<"ConjugateGradientMultiShift: shift "<<s
+    std::cout<<GridLogMessage<<"ConjugateGradientMultiShift: shift "<<s
 	     <<" target resid "<<rsq[s]<<std::endl;
    ps[s] = src;
  }
@@ -109,7 +113,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
  // p and mmp is equal to d after this since
  // the d computation is tricky
  //  qq = real(innerProduct(p,mmp));
-  //  std::cout << "debug equal ?  qq "<<qq<<" d "<< d<<std::endl;
+  //  std::cout<<GridLogMessage << "debug equal ?  qq "<<qq<<" d "<< d<<std::endl;
  
  b = -cp /d;
  
@@ -214,7 +218,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
 	
 	if(css<rsq[s]){
 	  if ( ! converged[s] )
-	    std::cout<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
+	    std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
 	      converged[s]=1;
 	} else {
 	  all_converged=0;
@@ -225,8 +229,8 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
    
    if ( all_converged ){

-      std::cout<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
-      std::cout<< "CGMultiShift: Checking solutions"<<std::endl;
+      std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
+      std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
      
      // Check answers 
      for(int s=0; s < nshift; s++) { 
@@ -235,13 +239,13 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
 	axpy(r,-alpha[s],src,tmp);
 	RealD rn = norm2(r);
 	RealD cn = norm2(src);
-	std::cout<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
+	std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
      }
      return;
    }
  }
  // ugly hack
-  std::cout<<"CG multi shift did not converge"<<std::endl;
+  std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
  assert(0);
 }

--- a/lib/algorithms/iterative/ConjugateResidual.h
+++ b/lib/algorithms/iterative/ConjugateResidual.h
@@ -16,7 +16,7 @@ namespace Grid {
    int verbose;

    ConjugateResidual(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) { 
-      verbose=1;
+      verbose=0;
    };

    void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
@@ -37,14 +37,11 @@ namespace Grid {
      Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
      Linop.HermOpAndNorm(r,Ar,rAr,rAAr);

-      std::cout << "pAp, pAAp"<< pAp<<" "<<pAAp<<std::endl;
-      std::cout << "rAr, rAAr"<< rAr<<" "<<rAAr<<std::endl;
-
      cp =norm2(r);
      ssq=norm2(src);
      rsq=Tolerance*Tolerance*ssq;

-      std::cout<<"ConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
+      if (verbose) std::cout<<GridLogMessage<<"ConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;

      for(int k=1;k<MaxIterations;k++){

@@ -62,22 +59,23 @@ namespace Grid {
 
 	axpy(p,b,p,r);
 	pAAp=axpy_norm(Ap,b,Ap,Ar);
-
-	std::cout<<"ConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
+	
+	if(verbose) std::cout<<GridLogMessage<<"ConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;

 	if(cp<rsq) {
 	  Linop.HermOp(psi,Ap);
 	  axpy(r,-1.0,src,Ap);
-	  RealD true_resid = norm2(r);
-	  std::cout<<"ConjugateResidual: Converged on iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
-	  std::cout<<"ConjugateResidual: true   residual  is "<<true_resid<<std::endl;
-	  std::cout<<"ConjugateResidual: target residual was "<<Tolerance <<std::endl;
+	  RealD true_resid = norm2(r)/ssq;
+	  std::cout<<GridLogMessage<<"ConjugateResidual: Converged on iteration " <<k
+		   << " computed residual "<<sqrt(cp/ssq)
+	           << " true residual "<<sqrt(true_resid)
+	           << " target "       <<Tolerance <<std::endl;
 	  return;
 	}

      }

-      std::cout<<"ConjugateResidual did NOT converge"<<std::endl;
+      std::cout<<GridLogMessage<<"ConjugateResidual did NOT converge"<<std::endl;
      assert(0);
    }
  };
--- a/lib/algorithms/iterative/DenseMatrix.h
+++ b/lib/algorithms/iterative/DenseMatrix.h
@@ -0,0 +1,109 @@
+#ifndef GRID_DENSE_MATRIX_H
+#define GRID_DENSE_MATRIX_H
+
+namespace Grid {
+    /////////////////////////////////////////////////////////////
+    // Matrix untils
+    /////////////////////////////////////////////////////////////
+
+template<class T> using DenseVector = std::vector<T>;
+template<class T> using DenseMatrix = DenseVector<DenseVector<T> >;
+
+template<class T> void Size(DenseVector<T> & vec, int &N) 
+{ 
+  N= vec.size();
+}
+template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M) 
+{ 
+  N= mat.size();
+  M= mat[0].size();
+}
+
+template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N) 
+{ 
+  int M; Size(mat,N,M);
+  assert(N==M);
+}
+
+template<class T> void Resize(DenseVector<T > & mat, int N) { 
+  mat.resize(N);
+}
+template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) { 
+  mat.resize(N);
+  for(int i=0;i<N;i++){
+    mat[i].resize(M);
+  }
+}
+template<class T> void Fill(DenseMatrix<T> & mat, T&val) { 
+  int N,M;
+  Size(mat,N,M);
+  for(int i=0;i<N;i++){
+  for(int j=0;j<M;j++){
+    mat[i][j] = val;
+  }}
+}
+
+/** Transpose of a matrix **/
+template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){
+  int N,M;
+  Size(mat,N,M);
+  DenseMatrix<T> C; Resize(C,M,N);
+  for(int i=0;i<M;i++){
+  for(int j=0;j<N;j++){
+    C[i][j] = mat[j][i];
+  }} 
+  return C;
+}
+/** Set DenseMatrix to unit matrix **/
+template<class T> void Unity(DenseMatrix<T> &A){
+  int N;  SizeSquare(A,N);
+  for(int i=0;i<N;i++){
+    for(int j=0;j<N;j++){
+      if ( i==j ) A[i][j] = 1;
+      else        A[i][j] = 0;
+    } 
+  } 
+}
+
+/** Add C * I to matrix **/
+template<class T>
+void PlusUnit(DenseMatrix<T> & A,T c){
+  int dim;  SizeSquare(A,dim);
+  for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;} 
+}
+
+/** return the Hermitian conjugate of matrix **/
+template<class T>
+DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){
+
+  int dim; SizeSquare(mat,dim);
+
+  DenseMatrix<T> C; Resize(C,dim,dim);
+
+  for(int i=0;i<dim;i++){
+    for(int j=0;j<dim;j++){
+      C[i][j] = conj(mat[j][i]);
+    } 
+  } 
+  return C;
+}
+/**Get a square submatrix**/
+template <class T>
+DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end)
+{
+  DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st);
+
+  for(int i = row_st; i<row_end; i++){
+  for(int j = col_st; j<col_end; j++){
+    H[i-row_st][j-col_st]=A[i][j];
+  }}
+  return H;
+}
+
+}
+
+#include <algorithms/iterative/Householder.h>
+#include <algorithms/iterative/Francis.h>
+
+#endif
+
--- a/lib/algorithms/iterative/EigenSort.h
+++ b/lib/algorithms/iterative/EigenSort.h
@@ -0,0 +1,52 @@
+#ifndef GRID_EIGENSORT_H
+#define GRID_EIGENSORT_H
+
+
+namespace Grid {
+    /////////////////////////////////////////////////////////////
+    // Eigen sorter to begin with
+    /////////////////////////////////////////////////////////////
+
+template<class Field>
+class SortEigen {
+ private:
+  
+  static bool less_lmd(RealD left,RealD right){
+    return fabs(left) < fabs(right);
+  }  
+  static bool less_pair(std::pair<RealD,Field>& left,
+		 std::pair<RealD,Field>& right){
+    return fabs(left.first) < fabs(right.first);
+  }  
+  
+ public:
+
+  void push(DenseVector<RealD>& lmd,
+	    DenseVector<Field>& evec,int N) {
+
+    DenseVector<std::pair<RealD, Field> > emod;
+    typename DenseVector<std::pair<RealD, Field> >::iterator it;
+    
+    for(int i=0;i<lmd.size();++i){
+      emod.push_back(std::pair<RealD,Field>(lmd[i],evec[i]));
+    }
+
+    partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
+
+    it=emod.begin();
+    for(int i=0;i<N;++i){
+      lmd[i]=it->first;
+      evec[i]=it->second;
+      ++it;
+    }
+  }
+  void push(DenseVector<RealD>& lmd,int N) {
+    std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd);
+  }
+  bool saturated(RealD lmd, RealD thrs) {
+    return fabs(lmd) > fabs(thrs);
+  }
+};
+
+}
+#endif
--- a/lib/algorithms/iterative/Francis.h
+++ b/lib/algorithms/iterative/Francis.h
@@ -0,0 +1,498 @@
+#ifndef FRANCIS_H
+#define FRANCIS_H
+
+#include <cstdlib>
+#include <string>
+#include <cmath>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <fstream>
+#include <complex>
+#include <algorithm>
+
+//#include <timer.h>
+//#include <lapacke.h>
+//#include <Eigen/Dense>
+
+namespace Grid {
+
+template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
+template <class T> int     Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
+
+/**
+  Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm.
+H =
+      x  x  x  x  x  x  x  x  x
+      x  x  x  x  x  x  x  x  x
+      0  x  x  x  x  x  x  x  x
+      0  0  x  x  x  x  x  x  x
+      0  0  0  x  x  x  x  x  x
+      0  0  0  0  x  x  x  x  x
+      0  0  0  0  0  x  x  x  x
+      0  0  0  0  0  0  x  x  x
+      0  0  0  0  0  0  0  x  x
+Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.
+**/
+template <class T>
+int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
+{
+  DenseMatrix<T> H = Hin; 
+
+  int N ; SizeSquare(H,N);
+  int M = N;
+
+  Fill(evals,0);
+  Fill(evecs,0);
+
+  T s,t,x=0,y=0,z=0;
+  T u,d;
+  T apd,amd,bc;
+  DenseVector<T> p(N,0);
+  T nrm = Norm(H);    ///DenseMatrix Norm
+  int n, m;
+  int e = 0;
+  int it = 0;
+  int tot_it = 0;
+  int l = 0;
+  int r = 0;
+  DenseMatrix<T> P; Resize(P,N,N); Unity(P);
+  DenseVector<int> trows(N,0);
+
+  /// Check if the matrix is really hessenberg, if not abort
+  RealD sth = 0;
+  for(int j=0;j<N;j++){
+    for(int i=j+2;i<N;i++){
+      sth = abs(H[i][j]);
+      if(sth > small){
+	std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl;
+	exit(1);
+      }
+    }
+  }
+
+  do{
+    std::cout << "Francis QR Step N = " << N << std::endl;
+    /** Check for convergence
+      x  x  x  x  x
+      0  x  x  x  x
+      0  0  x  x  x
+      0  0  x  x  x
+      0  0  0  0  x
+      for this matrix l = 4
+     **/
+    do{
+      l = Chop_subdiag(H,nrm,e,small);
+      r = 0;    ///May have converged on more than one eval
+      ///Single eval
+      if(l == N-1){
+        evals[e] = H[l][l];
+        N--; e++; r++; it = 0;
+      }
+      ///RealD eval
+      if(l == N-2){
+        trows[l+1] = 1;    ///Needed for UTSolve
+        apd = H[l][l] + H[l+1][l+1];
+        amd = H[l][l] - H[l+1][l+1];
+        bc =  (T)4.0*H[l+1][l]*H[l][l+1];
+        evals[e]   = (T)0.5*( apd + sqrt(amd*amd + bc) );
+        evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) );
+        N-=2; e+=2; r++; it = 0;
+      }
+    } while(r>0);
+
+    if(N ==0) break;
+
+    DenseVector<T > ck; Resize(ck,3);
+    DenseVector<T> v;   Resize(v,3);
+
+    for(int m = N-3; m >= l; m--){
+      ///Starting vector essentially random shift.
+      if(it%10 == 0 && N >= 3 && it > 0){
+        s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
+        t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
+        x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
+        y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
+        z = H[m+1][m]*H[m+2][m+1];
+      }
+      ///Starting vector implicit Q theorem
+      else{
+        s = (H[N-2][N-2] + H[N-1][N-1]);
+        t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]);
+        x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
+        y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
+        z = H[m+1][m]*H[m+2][m+1];
+      }
+      ck[0] = x; ck[1] = y; ck[2] = z;
+
+      if(m == l) break;
+
+      /** Some stupid thing from numerical recipies, seems to work**/
+      // PAB.. for heaven's sake quote page, purpose, evidence it works.
+      //       what sort of comment is that!?!?!?
+      u=abs(H[m][m-1])*(abs(y)+abs(z));
+      d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1]));
+      if ((T)abs(u+d) == (T)abs(d) ){
+	l = m; break;
+      }
+
+      //if (u < small){l = m; break;}
+    }
+    if(it > 100000){
+     std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl;
+     std::cout << "got " << e << " evals " << l << " " << N << std::endl;
+      exit(1);
+    }
+    normalize(ck);    ///Normalization cancels in PHP anyway
+    T beta;
+    Householder_vector<T >(ck, 0, 2, v, beta);
+    Householder_mult<T >(H,v,beta,0,l,l+2,0);
+    Householder_mult<T >(H,v,beta,0,l,l+2,1);
+    ///Accumulate eigenvector
+    Householder_mult<T >(P,v,beta,0,l,l+2,1);
+    int sw = 0;      ///Are we on the last row?
+    for(int k=l;k<N-2;k++){
+      x = H[k+1][k];
+      y = H[k+2][k];
+      z = (T)0.0;
+      if(k+3 <= N-1){
+	z = H[k+3][k];
+      } else{
+	sw = 1; 
+	v[2] = (T)0.0;
+      }
+      ck[0] = x; ck[1] = y; ck[2] = z;
+      normalize(ck);
+      Householder_vector<T >(ck, 0, 2-sw, v, beta);
+      Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0);
+      Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1);
+      ///Accumulate eigenvector
+      Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1);
+    }
+    it++;
+    tot_it++;
+  }while(N > 1);
+  N = evals.size();
+  ///Annoying - UT solves in reverse order;
+  DenseVector<T> tmp; Resize(tmp,N);
+  for(int i=0;i<N;i++){
+    tmp[i] = evals[N-i-1];
+  } 
+  evals = tmp;
+  UTeigenvectors(H, trows, evals, evecs);
+  for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);}
+  return tot_it;
+}
+
+template <class T>
+int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
+{
+  /**
+  Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm.
+  H =
+  x  x  0  0  0  0
+  x  x  x  0  0  0
+  0  x  x  x  0  0
+  0  0  x  x  x  0
+  0  0  0  x  x  x
+  0  0  0  0  x  x
+  Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.  **/
+  return my_Wilkinson(Hin, evals, evecs, small, small);
+}
+
+template <class T>
+int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol)
+{
+  int N; SizeSquare(Hin,N);
+  int M = N;
+
+  ///I don't want to modify the input but matricies must be passed by reference
+  //Scale a matrix by its "norm"
+  //RealD Hnorm = abs( Hin.LargestDiag() ); H =  H*(1.0/Hnorm);
+  DenseMatrix<T> H;  H = Hin;
+  
+  RealD Hnorm = abs(Norm(Hin));
+  H = H * (1.0 / Hnorm);
+
+  // TODO use openmp and memset
+  Fill(evals,0);
+  Fill(evecs,0);
+
+  T s, t, x = 0, y = 0, z = 0;
+  T u, d;
+  T apd, amd, bc;
+  DenseVector<T> p; Resize(p,N); Fill(p,0);
+
+  T nrm = Norm(H);    ///DenseMatrix Norm
+  int n, m;
+  int e = 0;
+  int it = 0;
+  int tot_it = 0;
+  int l = 0;
+  int r = 0;
+  DenseMatrix<T> P; Resize(P,N,N);
+  Unity(P);
+  DenseVector<int> trows(N, 0);
+  /// Check if the matrix is really symm tridiag
+  RealD sth = 0;
+  for(int j = 0; j < N; ++j)
+  {
+    for(int i = j + 2; i < N; ++i)
+    {
+      if(abs(H[i][j]) > tol || abs(H[j][i]) > tol)
+      {
+	std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl;
+	std::cout << "Warning tridiagonalize and call again" << std::endl;
+        // exit(1); // see what is going on
+        //return;
+      }
+    }
+  }
+
+  do{
+    do{
+      //Jasper
+      //Check if the subdiagonal term is small enough (<small)
+      //if true then it is converged.
+      //check start from H.dim - e - 1
+      //How to deal with more than 2 are converged?
+      //What if Chop_symm_subdiag return something int the middle?
+      //--------------
+      l = Chop_symm_subdiag(H,nrm, e, small);
+      r = 0;    ///May have converged on more than one eval
+      //Jasper
+      //In this case
+      // x  x  0  0  0  0
+      // x  x  x  0  0  0
+      // 0  x  x  x  0  0
+      // 0  0  x  x  x  0
+      // 0  0  0  x  x  0
+      // 0  0  0  0  0  x  <- l
+      //--------------
+      ///Single eval
+      if(l == N - 1)
+      {
+        evals[e] = H[l][l];
+        N--;
+        e++;
+        r++;
+        it = 0;
+      }
+      //Jasper
+      // x  x  0  0  0  0
+      // x  x  x  0  0  0
+      // 0  x  x  x  0  0
+      // 0  0  x  x  0  0
+      // 0  0  0  0  x  x  <- l
+      // 0  0  0  0  x  x
+      //--------------
+      ///RealD eval
+      if(l == N - 2)
+      {
+        trows[l + 1] = 1;    ///Needed for UTSolve
+        apd = H[l][l] + H[l + 1][ l + 1];
+        amd = H[l][l] - H[l + 1][l + 1];
+        bc =  (T) 4.0 * H[l + 1][l] * H[l][l + 1];
+        evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc));
+        evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc));
+        N -= 2;
+        e += 2;
+        r++;
+        it = 0;
+      }
+    }while(r > 0);
+    //Jasper
+    //Already converged
+    //--------------
+    if(N == 0) break;
+
+    DenseVector<T> ck,v; Resize(ck,2); Resize(v,2);
+
+    for(int m = N - 3; m >= l; m--)
+    {
+      ///Starting vector essentially random shift.
+      if(it%10 == 0 && N >= 3 && it > 0)
+      {
+        t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]);
+        x = H[m][m] - t;
+        z = H[m + 1][m];
+      } else {
+      ///Starting vector implicit Q theorem
+        d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5;
+        t =  H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2] 
+	  / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2]));
+        x = H[m][m] - t;
+        z = H[m + 1][m];
+      }
+      //Jasper
+      //why it is here????
+      //-----------------------
+      if(m == l)
+        break;
+
+      u = abs(H[m][m - 1]) * (abs(y) + abs(z));
+      d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1]));
+      if ((T)abs(u + d) == (T)abs(d))
+      {
+        l = m;
+        break;
+      }
+    }
+    //Jasper
+    if(it > 1000000)
+    {
+      std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl;
+      std::cout << "got " << e << " evals " << l << " " << N << std::endl;
+      exit(1);
+    }
+    //
+    T s, c;
+    Givens_calc<T>(x, z, c, s);
+    Givens_mult<T>(H, l, l + 1, c, -s, 0);
+    Givens_mult<T>(H, l, l + 1, c,  s, 1);
+    Givens_mult<T>(P, l, l + 1, c,  s, 1);
+    //
+    for(int k = l; k < N - 2; ++k)
+    {
+      x = H.A[k + 1][k];
+      z = H.A[k + 2][k];
+      Givens_calc<T>(x, z, c, s);
+      Givens_mult<T>(H, k + 1, k + 2, c, -s, 0);
+      Givens_mult<T>(H, k + 1, k + 2, c,  s, 1);
+      Givens_mult<T>(P, k + 1, k + 2, c,  s, 1);
+    }
+    it++;
+    tot_it++;
+  }while(N > 1);
+
+  N = evals.size();
+  ///Annoying - UT solves in reverse order;
+  DenseVector<T> tmp(N);
+  for(int i = 0; i < N; ++i)
+    tmp[i] = evals[N-i-1];
+  evals = tmp;
+  //
+  UTeigenvectors(H, trows, evals, evecs);
+  //UTSymmEigenvectors(H, trows, evals, evecs);
+  for(int i = 0; i < evals.size(); ++i)
+  {
+    evecs[i] = P * evecs[i];
+    normalize(evecs[i]);
+    evals[i] = evals[i] * Hnorm;
+  }
+  // // FIXME this is to test
+  // Hin.write("evecs3", evecs);
+  // Hin.write("evals3", evals);
+  // // check rsd
+  // for(int i = 0; i < M; i++) {
+  //   vector<T> Aevec = Hin * evecs[i];
+  //   RealD norm2(0.);
+  //   for(int j = 0; j < M; j++) {
+  //     norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]);
+  //   }
+  // }
+  return tot_it;
+}
+
+template <class T>
+void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
+
+  /**
+  turn a matrix A =
+  x  x  x  x  x
+  x  x  x  x  x
+  x  x  x  x  x
+  x  x  x  x  x
+  x  x  x  x  x
+  into
+  x  x  x  x  x
+  x  x  x  x  x
+  0  x  x  x  x
+  0  0  x  x  x
+  0  0  0  x  x
+  with householder rotations
+  Slow.
+  */
+  int N ; SizeSquare(A,N);
+  DenseVector<T > p; Resize(p,N); Fill(p,0);
+
+  for(int k=start;k<N-2;k++){
+    //cerr << "hess" << k << std::endl;
+    DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1);
+    for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);}  ///kth column
+    normalize(ck);    ///Normalization cancels in PHP anyway
+    T beta;
+    Householder_vector<T >(ck, 0, ck.size()-1, v, beta);  ///Householder vector
+    Householder_mult<T>(A,v,beta,start,k+1,N-1,0);  ///A -> PA
+    Householder_mult<T >(A,v,beta,start,k+1,N-1,1);  ///PA -> PAP^H
+    ///Accumulate eigenvector
+    Householder_mult<T >(Q,v,beta,start,k+1,N-1,1);  ///Q -> QP^H
+  }
+  /*for(int l=0;l<N-2;l++){
+    for(int k=l+2;k<N;k++){
+    A(0,k,l);
+    }
+    }*/
+}
+
+template <class T>
+void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
+///Tridiagonalize a matrix
+  int N; SizeSquare(A,N);
+  Hess(A,Q,start);
+  /*for(int l=0;l<N-2;l++){
+    for(int k=l+2;k<N;k++){
+    A(0,l,k);
+    }
+    }*/
+}
+
+template <class T>
+void ForceTridiagonal(DenseMatrix<T> &A){
+///Tridiagonalize a matrix
+  int N ; SizeSquare(A,N);
+  for(int l=0;l<N-2;l++){
+    for(int k=l+2;k<N;k++){
+      A[l][k]=0;
+      A[k][l]=0;
+    }
+  }
+}
+
+template <class T>
+int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
+  ///Solve a symmetric eigensystem, not necessarily in tridiagonal form
+  int N; SizeSquare(Ain,N);
+  DenseMatrix<T > A; A = Ain;
+  DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q);
+  Tri(A,Q,0);
+  int it = my_Wilkinson<T>(A, evals, evecs, small);
+  for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
+  return it;
+}
+
+
+template <class T>
+int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
+  return my_Wilkinson(Ain, evals, evecs, small);
+}
+
+template <class T>
+int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
+  return my_SymmEigensystem(Ain, evals, evecs, small);
+}
+
+template <class T>
+int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
+///Solve a general eigensystem, not necessarily in tridiagonal form
+  int N = Ain.dim;
+  DenseMatrix<T > A(N); A = Ain;
+  DenseMatrix<T > Q(N);Q.Unity();
+  Hess(A,Q,0);
+  int it = QReigensystem<T>(A, evals, evecs, small);
+  for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
+  return it;
+}
+
+}
+#endif
--- a/lib/algorithms/iterative/Householder.h
+++ b/lib/algorithms/iterative/Householder.h
@@ -0,0 +1,215 @@
+#ifndef HOUSEHOLDER_H
+#define HOUSEHOLDER_H
+
+#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
+#define ENTER()  std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
+#define LEAVE()  std::cout << GridLogMessage << "EXIT  "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
+
+#include <cstdlib>
+#include <string>
+#include <cmath>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <fstream>
+#include <complex>
+#include <algorithm>
+
+namespace Grid {
+/** Comparison function for finding the max element in a vector **/
+template <class T> bool cf(T i, T j) { 
+  return abs(i) < abs(j); 
+}
+
+/** 
+	Calculate a real Givens angle 
+ **/
+template <class T> inline void Givens_calc(T y, T z, T &c, T &s){
+
+  RealD mz = (RealD)abs(z);
+  
+  if(mz==0.0){
+    c = 1; s = 0;
+  }
+  if(mz >= (RealD)abs(y)){
+    T t = -y/z;
+    s = (T)1.0 / sqrt ((T)1.0 + t * t);
+    c = s * t;
+  } else {
+    T t = -z/y;
+    c = (T)1.0 / sqrt ((T)1.0 + t * t);
+    s = c * t;
+  }
+}
+
+template <class T> inline void Givens_mult(DenseMatrix<T> &A,  int i, int k, T c, T s, int dir)
+{
+  int q ; SizeSquare(A,q);
+
+  if(dir == 0){
+    for(int j=0;j<q;j++){
+      T nu = A[i][j];
+      T w  = A[k][j];
+      A[i][j] = (c*nu + s*w);
+      A[k][j] = (-s*nu + c*w);
+    }
+  }
+
+  if(dir == 1){
+    for(int j=0;j<q;j++){
+      T nu = A[j][i];
+      T w  = A[j][k];
+      A[j][i] = (c*nu - s*w);
+      A[j][k] = (s*nu + c*w);
+    }
+  }
+}
+
+/**
+	from input = x;
+	Compute the complex Householder vector, v, such that
+	P = (I - b v transpose(v) )
+	b = 2/v.v
+
+	P | x |    | x | k = 0
+	| x |    | 0 | 
+	| x | =  | 0 |
+	| x |    | 0 | j = 3
+	| x |	   | x |
+
+	These are the "Unreduced" Householder vectors.
+
+ **/
+template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta)
+{
+  int N ; Size(input,N);
+  T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> );
+
+  if(abs(m) > 0.0){
+    T alpha = 0;
+
+    for(int i=k; i<j+1; i++){
+      v[i] = input[i]/m;
+      alpha = alpha + v[i]*conj(v[i]);
+    }
+    alpha = sqrt(alpha);
+    beta = (T)1.0/(alpha*(alpha + abs(v[k]) ));
+
+    if(abs(v[k]) > 0.0)  v[k] = v[k] + (v[k]/abs(v[k]))*alpha;
+    else                 v[k] = -alpha;
+  } else{
+    for(int i=k; i<j+1; i++){
+      v[i] = 0.0;
+    } 
+  }
+}
+
+/**
+	from input = x;
+	Compute the complex Householder vector, v, such that
+	P = (I - b v transpose(v) )
+	b = 2/v.v
+
+	Px = alpha*e_dir
+
+	These are the "Unreduced" Householder vectors.
+
+ **/
+
+template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta)
+{
+  int N = input.size();
+  T m = *max_element(input.begin() + k, input.begin() + j + 1, cf);
+  
+  if(abs(m) > 0.0){
+    T alpha = 0;
+
+    for(int i=k; i<j+1; i++){
+      v[i] = input[i]/m;
+      alpha = alpha + v[i]*conj(v[i]);
+    }
+    
+    alpha = sqrt(alpha);
+    beta = 1.0/(alpha*(alpha + abs(v[dir]) ));
+	
+    if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha;
+    else                  v[dir] = -alpha;
+  }else{
+    for(int i=k; i<j+1; i++){
+      v[i] = 0.0;
+    } 
+  }
+}
+
+/**
+	Compute the product PA if trans = 0
+	AP if trans = 1
+	P = (I - b v transpose(v) )
+	b = 2/v.v
+	start at element l of matrix A
+	v is of length j - k + 1 of v are nonzero
+ **/
+
+template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans)
+{
+  int N ; SizeSquare(A,N);
+
+  if(abs(beta) > 0.0){
+    for(int p=l; p<N; p++){
+      T s = 0;
+      if(trans==0){
+	for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p];
+	s *= beta;
+	for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);}
+      } else {
+	for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];}
+	s *= beta;
+	for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);}
+      }
+    }
+  }
+}
+
+/**
+	Compute the product PA if trans = 0
+	AP if trans = 1
+	P = (I - b v transpose(v) )
+	b = 2/v.v
+	start at element l of matrix A
+	v is of length j - k + 1 of v are nonzero
+	A is tridiagonal
+ **/
+template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans)
+{
+  if(abs(beta) > 0.0){
+
+    int N ; SizeSquare(A,N);
+
+    DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0); 
+
+    T s;
+    for(int p=l; p<M; p++){
+      s = 0;
+      if(trans==0){
+	for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p];
+      }else{
+	for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i];
+      }
+      s = beta*s;
+      if(trans==0){
+	for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k];
+      }else{
+	for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]);
+      }
+    }
+    for(int p=l; p<M; p++){
+      if(trans==0){
+	for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p];
+      }else{
+	for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i];
+      }
+    }
+  }
+}
+}
+#endif
--- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
+++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
--- a/lib/algorithms/iterative/Matrix.h
+++ b/lib/algorithms/iterative/Matrix.h
@@ -0,0 +1,426 @@
+#ifndef MATRIX_H
+#define MATRIX_H
+
+#include <cstdlib>
+#include <string>
+#include <cmath>
+#include <vector>
+#include <iostream>
+#include <iomanip>
+#include <complex>
+#include <typeinfo>
+#include <Grid.h>
+
+
+/** Sign function **/
+template <class T> T sign(T p){return ( p/abs(p) );}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////// Hijack STL containers for our wicked means /////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<class T> using Vector = Vector<T>;
+template<class T> using Matrix = Vector<Vector<T> >;
+
+template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); }
+
+template<class T> void Resize(Matrix<T > & mat, int N, int M) { 
+  mat.resize(N);
+  for(int i=0;i<N;i++){
+    mat[i].resize(M);
+  }
+}
+template<class T> void Size(Vector<T> & vec, int &N) 
+{ 
+  N= vec.size();
+}
+template<class T> void Size(Matrix<T> & mat, int &N,int &M) 
+{ 
+  N= mat.size();
+  M= mat[0].size();
+}
+template<class T> void SizeSquare(Matrix<T> & mat, int &N) 
+{ 
+  int M; Size(mat,N,M);
+  assert(N==M);
+}
+template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1) 
+{ 
+  int N2,M2;
+  Size(mat1,N1,M1);
+  Size(mat2,N2,M2);
+  assert(N1==N2);
+  assert(M1==M2);
+}
+
+//*****************************************
+//*	(Complex) Vector operations	*
+//*****************************************
+
+/**Conj of a Vector **/
+template <class T> Vector<T> conj(Vector<T> p){
+	Vector<T> q(p.size());
+	for(int i=0;i<p.size();i++){q[i] = conj(p[i]);}
+	return q;
+}
+
+/** Norm of a Vector**/
+template <class T> T norm(Vector<T> p){
+	T sum = 0;
+	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
+	return abs(sqrt(sum));
+}
+
+/** Norm squared of a Vector **/
+template <class T> T norm2(Vector<T> p){
+	T sum = 0;
+	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
+	return abs((sum));
+}
+
+/** Sum elements of a Vector **/
+template <class T> T trace(Vector<T> p){
+	T sum = 0;
+	for(int i=0;i<p.size();i++){sum = sum + p[i];}
+	return sum;
+}
+
+/** Fill a Vector with constant c **/
+template <class T> void Fill(Vector<T> &p, T c){
+	for(int i=0;i<p.size();i++){p[i] = c;}
+}
+/** Normalize a Vector **/
+template <class T> void normalize(Vector<T> &p){
+	T m = norm(p);
+	if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;}
+}
+/** Vector by scalar **/
+template <class T, class U> Vector<T> times(Vector<T> p, U s){
+	for(int i=0;i<p.size();i++){p[i] *= s;}
+	return p;
+}
+template <class T, class U> Vector<T> times(U s, Vector<T> p){
+	for(int i=0;i<p.size();i++){p[i] *= s;}
+	return p;
+}
+/** inner product of a and b = conj(a) . b **/
+template <class T> T inner(Vector<T> a, Vector<T> b){
+	T m = 0.;
+	for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];}
+	return m;
+}
+/** sum of a and b = a + b **/
+template <class T> Vector<T> add(Vector<T> a, Vector<T> b){
+	Vector<T> m(a.size());
+	for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];}
+	return m;
+}
+/** sum of a and b = a - b **/
+template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){
+	Vector<T> m(a.size());
+	for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];}
+	return m;
+}
+
+/** 
+ *********************************
+ *	Matrices	         *
+ *********************************
+ **/
+
+template<class T> void Fill(Matrix<T> & mat, T&val) { 
+  int N,M;
+  Size(mat,N,M);
+  for(int i=0;i<N;i++){
+  for(int j=0;j<M;j++){
+    mat[i][j] = val;
+  }}
+}
+
+/** Transpose of a matrix **/
+Matrix<T> Transpose(Matrix<T> & mat){
+  int N,M;
+  Size(mat,N,M);
+  Matrix C; Resize(C,M,N);
+  for(int i=0;i<M;i++){
+  for(int j=0;j<N;j++){
+    C[i][j] = mat[j][i];
+  }} 
+  return C;
+}
+/** Set Matrix to unit matrix **/
+template<class T> void Unity(Matrix<T> &mat){
+  int N;  SizeSquare(mat,N);
+  for(int i=0;i<N;i++){
+    for(int j=0;j<N;j++){
+      if ( i==j ) A[i][j] = 1;
+      else        A[i][j] = 0;
+    } 
+  } 
+}
+/** Add C * I to matrix **/
+template<class T>
+void PlusUnit(Matrix<T> & A,T c){
+  int dim;  SizeSquare(A,dim);
+  for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;} 
+}
+
+/** return the Hermitian conjugate of matrix **/
+Matrix<T> HermitianConj(Matrix<T> &mat){
+
+  int dim; SizeSquare(mat,dim);
+
+  Matrix<T> C; Resize(C,dim,dim);
+
+  for(int i=0;i<dim;i++){
+    for(int j=0;j<dim;j++){
+      C[i][j] = conj(mat[j][i]);
+    } 
+  } 
+  return C;
+}
+
+/** return diagonal entries as a Vector **/
+Vector<T> diag(Matrix<T> &A)
+{
+  int dim; SizeSquare(A,dim);
+  Vector<T> d; Resize(d,dim);
+
+  for(int i=0;i<dim;i++){
+    d[i] = A[i][i];
+  }
+  return d;
+}
+
+/** Left multiply by a Vector **/
+Vector<T> operator *(Vector<T> &B,Matrix<T> &A)
+{
+  int K,M,N; 
+  Size(B,K);
+  Size(A,M,N);
+  assert(K==M);
+  
+  Vector<T> C; Resize(C,N);
+
+  for(int j=0;j<N;j++){
+    T sum = 0.0;
+    for(int i=0;i<M;i++){
+      sum += B[i] * A[i][j];
+    }
+    C[j] =  sum;
+  }
+  return C; 
+}
+
+/** return 1/diagonal entries as a Vector **/
+Vector<T> inv_diag(Matrix<T> & A){
+  int dim; SizeSquare(A,dim);
+  Vector<T> d; Resize(d,dim);
+  for(int i=0;i<dim;i++){
+    d[i] = 1.0/A[i][i];
+  }
+  return d;
+}
+/** Matrix Addition **/
+inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B)
+{
+  int N,M  ; SizeSame(A,B,N,M);
+  Matrix C; Resize(C,N,M);
+  for(int i=0;i<N;i++){
+    for(int j=0;j<M;j++){
+      C[i][j] = A[i][j] +  B[i][j];
+    } 
+  } 
+  return C;
+} 
+/** Matrix Subtraction **/
+inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){
+  int N,M  ; SizeSame(A,B,N,M);
+  Matrix C; Resize(C,N,M);
+  for(int i=0;i<N;i++){
+  for(int j=0;j<M;j++){
+    C[i][j] = A[i][j] -  B[i][j];
+  }}
+  return C;
+} 
+
+/** Matrix scalar multiplication **/
+inline Matrix<T> operator* (Matrix<T> & A,T c){
+  int N,M; Size(A,N,M);
+  Matrix C; Resize(C,N,M);
+  for(int i=0;i<N;i++){
+  for(int j=0;j<M;j++){
+    C[i][j] = A[i][j]*c;
+  }} 
+  return C;
+} 
+/** Matrix Matrix multiplication **/
+inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){
+  int K,L,N,M;
+  Size(A,K,L);
+  Size(B,N,M); assert(L==N);
+  Matrix C; Resize(C,K,M);
+
+  for(int i=0;i<K;i++){
+    for(int j=0;j<M;j++){
+      T sum = 0.0;
+      for(int k=0;k<N;k++) sum += A[i][k]*B[k][j];
+      C[i][j] =sum;
+    }
+  }
+  return C; 
+} 
+/** Matrix Vector multiplication **/
+inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){
+  int M,N,K;
+  Size(A,N,M);
+  Size(B,K); assert(K==M);
+  Vector<T> C; Resize(C,N);
+  for(int i=0;i<N;i++){
+    T sum = 0.0;
+    for(int j=0;j<M;j++) sum += A[i][j]*B[j];
+    C[i] =  sum;
+  }
+  return C; 
+} 
+
+/** Some version of Matrix norm **/
+/*
+inline T Norm(){ // this is not a usual L2 norm
+    T norm = 0;
+    for(int i=0;i<dim;i++){
+      for(int j=0;j<dim;j++){
+	norm += abs(A[i][j]);
+    }}
+    return norm;
+  }
+*/
+
+/** Some version of Matrix norm **/
+template<class T> T LargestDiag(Matrix<T> &A)
+{
+  int dim ; SizeSquare(A,dim); 
+
+  T ld = abs(A[0][0]);
+  for(int i=1;i<dim;i++){
+    T cf = abs(A[i][i]);
+    if(abs(cf) > abs(ld) ){ld = cf;}
+  }
+  return ld;
+}
+
+/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
+template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small)
+{
+  int dim; SizeSquare(A,dim);
+  for(int l = dim - 1 - offset; l >= 1; l--) {             		
+    if((U)abs(A[l][l - 1]) < (U)small) {
+      A[l][l-1]=(U)0.0;
+      return l;
+    }
+  }
+  return 0;
+}
+
+/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
+template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small) 
+{
+  int dim; SizeSquare(A,dim);
+  for(int l = dim - 1 - offset; l >= 1; l--) {
+    if((U)abs(A[l][l - 1]) < (U)small) {
+      A[l][l - 1] = (U)0.0;
+      A[l - 1][l] = (U)0.0;
+      return l;
+    }
+  }
+  return 0;
+}
+/**Assign a submatrix to a larger one**/
+template<class T>
+void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
+{
+  for(int i = row_st; i<row_end; i++){
+    for(int j = col_st; j<col_end; j++){
+      A[i][j] = S[i - row_st][j - col_st];
+    }
+  }
+}
+
+/**Get a square submatrix**/
+template <class T>
+Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end)
+{
+  Matrix<T> H; Resize(row_end - row_st,col_end-col_st);
+
+  for(int i = row_st; i<row_end; i++){
+  for(int j = col_st; j<col_end; j++){
+    H[i-row_st][j-col_st]=A[i][j];
+  }}
+  return H;
+}
+  
+ /**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/
+template<class T>
+void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
+{
+  for(int i = row_st; i<row_end; i++){
+  for(int j = col_st; j<col_end; j++){
+    A[i][j] = S[i - row_st][j - col_st];
+  }}
+}
+  
+/** compute b_i A_ij b_j **/ // surprised no Conj
+template<class T> T proj(Matrix<T> A, Vector<T> B){
+  int dim; SizeSquare(A,dim);
+  int dimB; Size(B,dimB);
+  assert(dimB==dim);
+  T C = 0;
+  for(int i=0;i<dim;i++){
+    T sum = 0.0;
+    for(int j=0;j<dim;j++){
+      sum += A[i][j]*B[j];
+    }
+    C +=  B[i]*sum; // No conj?
+  }
+  return C; 
+}
+
+
+/*
+ *************************************************************
+ *
+ * Matrix Vector products
+ *
+ *************************************************************
+ */
+// Instead make a linop and call my CG;
+
+/// q -> q Q
+template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q)
+{
+  int M; SizeSquare(Q,M);
+  int N; Size(q,N); 
+  assert(M==N);
+
+  times(q,Q,N);
+}
+
+/// q -> q Q
+template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N)
+{
+  GridBase *grid = q[0]._grid;
+  int M; SizeSquare(Q,M);
+  int K; Size(q,K); 
+  assert(N<M);
+  assert(N<K);
+  Vector<Fermion> S(N,grid );
+  for(int j=0;j<N;j++){
+    S[j] = zero;
+    for(int k=0;k<N;k++){
+      S[j] = S[j] +  q[k]* Q[k][j]; 
+    }
+  }
+  for(int j=0;j<q.size();j++){
+    q[j] = S[j];
+  }
+}
+#endif
--- a/lib/algorithms/iterative/MatrixUtils.h
+++ b/lib/algorithms/iterative/MatrixUtils.h
@@ -0,0 +1,48 @@
+#ifndef GRID_MATRIX_UTILS_H
+#define GRID_MATRIX_UTILS_H
+
+namespace Grid {
+
+  namespace MatrixUtils { 
+
+    template<class T> inline void Size(Matrix<T>& A,int &N,int &M){
+      N=A.size(); assert(N>0);
+      M=A[0].size();
+      for(int i=0;i<N;i++){
+	assert(A[i].size()==M);
+      }
+    }
+
+    template<class T> inline void SizeSquare(Matrix<T>& A,int &N)
+    {
+      int M;
+      Size(A,N,M);
+      assert(N==M);
+    }
+
+    template<class T> inline void Fill(Matrix<T>& A,T & val)
+    { 
+      int N,M;
+      Size(A,N,M);
+      for(int i=0;i<N;i++){
+      for(int j=0;j<M;j++){
+	A[i][j]=val;
+      }}
+    }
+    template<class T> inline void Diagonal(Matrix<T>& A,T & val)
+    { 
+      int N;
+      SizeSquare(A,N);
+      for(int i=0;i<N;i++){
+	A[i][i]=val;
+      }
+    }
+    template<class T> inline void Identity(Matrix<T>& A)
+    {
+      Fill(A,0.0);
+      Diagonal(A,1.0);
+    }
+
+  };
+}
+#endif
--- a/lib/algorithms/iterative/PrecConjugateResidual.h
+++ b/lib/algorithms/iterative/PrecConjugateResidual.h
@@ -0,0 +1,92 @@
+#ifndef GRID_PREC_CONJUGATE_RESIDUAL_H
+#define GRID_PREC_CONJUGATE_RESIDUAL_H
+
+namespace Grid {
+
+    /////////////////////////////////////////////////////////////
+    // Base classes for iterative processes based on operators
+    // single input vec, single output vec.
+    /////////////////////////////////////////////////////////////
+
+  template<class Field> 
+    class PrecConjugateResidual : public OperatorFunction<Field> {
+  public:                                                
+    RealD   Tolerance;
+    Integer MaxIterations;
+    int verbose;
+    LinearFunction<Field> &Preconditioner;
+
+    PrecConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec) : Tolerance(tol), MaxIterations(maxit),      Preconditioner(Prec)
+    { 
+      verbose=1;
+    };
+
+    void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
+
+      RealD a, b, c, d;
+      RealD cp, ssq,rsq;
+      
+      RealD rAr, rAAr, rArp;
+      RealD pAp, pAAp;
+
+      GridBase *grid = src._grid;
+      Field r(grid),  p(grid), Ap(grid), Ar(grid), z(grid);
+      
+      psi=zero;
+      r  = src;
+      Preconditioner(r,p);
+
+      
+
+      Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
+      Ar=Ap;
+      rAr=pAp;
+      rAAr=pAAp;
+
+      cp =norm2(r);
+      ssq=norm2(src);
+      rsq=Tolerance*Tolerance*ssq;
+
+      if (verbose) std::cout<<GridLogMessage<<"PrecConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
+
+      for(int k=0;k<MaxIterations;k++){
+
+
+	Preconditioner(Ap,z);
+	RealD rq= real(innerProduct(Ap,z)); 
+
+	a = rAr/rq;
+
+   	axpy(psi,a,p,psi);
+   cp = axpy_norm(r,-a,z,r);
+
+	rArp=rAr;
+
+	Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
+
+	b   =rAr/rArp;
+ 
+	axpy(p,b,p,r);
+	pAAp=axpy_norm(Ap,b,Ap,Ar);
+	
+	if(verbose) std::cout<<GridLogMessage<<"PrecConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
+
+	if(cp<rsq) {
+	  Linop.HermOp(psi,Ap);
+	  axpy(r,-1.0,src,Ap);
+	  RealD true_resid = norm2(r)/ssq;
+	  std::cout<<GridLogMessage<<"PrecConjugateResidual: Converged on iteration " <<k
+		   << " computed residual "<<sqrt(cp/ssq)
+	           << " true residual "<<sqrt(true_resid)
+	           << " target "       <<Tolerance <<std::endl;
+	  return;
+	}
+
+      }
+
+      std::cout<<GridLogMessage<<"PrecConjugateResidual did NOT converge"<<std::endl;
+      assert(0);
+    }
+  };
+}
+#endif
--- a/lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
+++ b/lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
@@ -0,0 +1,175 @@
+#ifndef GRID_PREC_GCR_H
+#define GRID_PREC_GCR_H
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+//VPGCR Abe and Zhang, 2005.
+//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING
+//Computing and Information Volume 2, Number 2, Pages 147-161
+//NB. Likely not original reference since they are focussing on a preconditioner variant.
+//    but VPGCR was nicely written up in their paper
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+namespace Grid {
+
+  template<class Field>
+    class PrecGeneralisedConjugateResidual : public OperatorFunction<Field> {
+  public:                                                
+    RealD   Tolerance;
+    Integer MaxIterations;
+    int verbose;
+    int mmax;
+    int nstep;
+    int steps;
+    LinearFunction<Field> &Preconditioner;
+
+   PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) : 
+      Tolerance(tol), 
+      MaxIterations(maxit),
+      Preconditioner(Prec),
+      mmax(_mmax),
+      nstep(_nstep)
+    { 
+      verbose=1;
+    };
+
+    void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
+
+      psi=zero;
+      RealD cp, ssq,rsq;
+      ssq=norm2(src);
+      rsq=Tolerance*Tolerance*ssq;
+      
+      Field r(src._grid);
+
+      steps=0;
+      for(int k=0;k<MaxIterations;k++){
+
+	cp=GCRnStep(Linop,src,psi,rsq);
+
+	if ( verbose ) std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<std::endl;
+
+	if(cp<rsq) {
+	  Linop.HermOp(psi,r);
+	  axpy(r,-1.0,src,r);
+	  RealD tr = norm2(r);
+	  std::cout<<GridLogMessage<<"PrecGeneralisedConjugateResidual: Converged on iteration " <<steps
+		   << " computed residual "<<sqrt(cp/ssq)
+	           << " true residual "    <<sqrt(tr/ssq)
+	           << " target "           <<Tolerance <<std::endl;
+	  return;
+	}
+
+      }
+      std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
+      assert(0);
+    }
+    RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
+
+      RealD cp;
+      RealD a, b, c, d;
+      RealD zAz, zAAz;
+      RealD rAq, rq;
+
+      GridBase *grid = src._grid;
+
+      Field r(grid);
+      Field z(grid);
+      Field tmp(grid);
+      Field ttmp(grid);
+      Field Az(grid);
+
+      ////////////////////////////////
+      // history for flexible orthog
+      ////////////////////////////////
+      std::vector<Field> q(mmax,grid);
+      std::vector<Field> p(mmax,grid);
+      std::vector<RealD> qq(mmax);
+      
+      //////////////////////////////////
+      // initial guess x0 is taken as nonzero.
+      // r0=src-A x0 = src
+      //////////////////////////////////
+      Linop.HermOpAndNorm(psi,Az,zAz,zAAz); 
+      r=src-Az;
+      
+      /////////////////////
+      // p = Prec(r)
+      /////////////////////
+      Preconditioner(r,z);
+
+      std::cout<<GridLogMessage<< " Preconditioner in " << norm2(r)<<std::endl; 
+      std::cout<<GridLogMessage<< " Preconditioner out " << norm2(z)<<std::endl; 
+      
+      Linop.HermOp(z,tmp); 
+
+      std::cout<<GridLogMessage<< " Preconditioner Aout " << norm2(tmp)<<std::endl; 
+      ttmp=tmp;
+      tmp=tmp-r;
+
+      std::cout<<GridLogMessage<< " Preconditioner resid " << std::sqrt(norm2(tmp)/norm2(r))<<std::endl; 
+      /*
+      std::cout<<GridLogMessage<<r<<std::endl;
+      std::cout<<GridLogMessage<<z<<std::endl;
+      std::cout<<GridLogMessage<<ttmp<<std::endl;
+      std::cout<<GridLogMessage<<tmp<<std::endl;
+      */
+
+      Linop.HermOpAndNorm(z,Az,zAz,zAAz); 
+
+      //p[0],q[0],qq[0] 
+      p[0]= z;
+      q[0]= Az;
+      qq[0]= zAAz;
+
+      cp =norm2(r);
+
+      for(int k=0;k<nstep;k++){
+
+	steps++;
+
+	int kp     = k+1;
+	int peri_k = k %mmax;
+	int peri_kp= kp%mmax;
+
+	rq= real(innerProduct(r,q[peri_k])); // what if rAr not real?
+	a = rq/qq[peri_k];
+
+	axpy(psi,a,p[peri_k],psi);         
+
+	cp = axpy_norm(r,-a,q[peri_k],r);  
+
+	std::cout<<GridLogMessage<< " VPGCR_step resid" <<sqrt(cp/rsq)<<std::endl; 
+	if((k==nstep-1)||(cp<rsq)){
+	  return cp;
+	}
+
+	Preconditioner(r,z);// solve Az = r
+	Linop.HermOpAndNorm(z,Az,zAz,zAAz);
+
+
+	Linop.HermOp(z,tmp);
+        tmp=tmp-r;
+	std::cout<<GridLogMessage<< " Preconditioner resid" <<sqrt(norm2(tmp)/norm2(r))<<std::endl; 
+
+	q[peri_kp]=Az;
+	p[peri_kp]=z;
+
+	int northog = ((kp)>(mmax-1))?(mmax-1):(kp);  // if more than mmax done, we orthog all mmax history.
+	for(int back=0;back<northog;back++){
+
+	  int peri_back=(k-back)%mmax;   	  assert((k-back)>=0);
+
+	  b=-real(innerProduct(q[peri_back],Az))/qq[peri_back];
+	  p[peri_kp]=p[peri_kp]+b*p[peri_back];
+	  q[peri_kp]=q[peri_kp]+b*q[peri_back];
+
+	}
+	qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
+
+
+      }
+      assert(0); // never reached
+      return cp;
+    }
+  };
+}
+#endif
--- a/lib/algorithms/iterative/SchurRedBlack.h
+++ b/lib/algorithms/iterative/SchurRedBlack.h
@@ -89,7 +89,7 @@ namespace Grid {
      //////////////////////////////////////////////////////////////
      // Call the red-black solver
      //////////////////////////////////////////////////////////////
-      std::cout << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
+      std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd);

      ///////////////////////////////////////////////////
@@ -108,7 +108,7 @@ namespace Grid {
      RealD ns = norm2(in);
      RealD nr = norm2(resid);

-      std::cout << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
+      std::cout<<GridLogMessage << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
    }     
  };

--- a/lib/algorithms/iterative/bisec.c
+++ b/lib/algorithms/iterative/bisec.c
@@ -0,0 +1,122 @@
+#include <math.h>
+#include <stdlib.h>
+#include <vector>
+
+struct Bisection {
+
+static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig)
+{
+  int i,j;
+  std::vector<RealD> evec1(row_num+3);
+  std::vector<RealD> evec2(row_num+3);
+  RealD eps2;
+  ALPHA[1]=0.;
+  BETHA[1]=0.;
+  for(i=0;i<row_num-1;i++) {
+    ALPHA[i+1] = A[i*(row_num+1)].real();
+    BETHA[i+2] = A[i*(row_num+1)+1].real();
+  }
+  ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real();
+  bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2);
+  bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2);
+
+  // Do we really need to sort here?
+  int begin=1;
+  int end = row_num;
+  int swapped=1;
+  while(swapped) {
+    swapped=0;
+    for(i=begin;i<end;i++){
+      if(mag(evec2[i])>mag(evec2[i+1]))	{
+	swap(evec2+i,evec2+i+1);
+	swapped=1;
+      }
+    }
+    end--;
+    for(i=end-1;i>=begin;i--){
+      if(mag(evec2[i])>mag(evec2[i+1]))	{
+	swap(evec2+i,evec2+i+1);
+	swapped=1;
+      }
+    }
+    begin++;
+  }
+
+  for(i=0;i<row_num;i++){
+    for(j=0;j<row_num;j++) {
+      if(i==j) H[i*row_num+j]=evec2[i+1];
+      else H[i*row_num+j]=0.;
+    }
+  }
+}
+
+static void bisec(std::vector<RealD> &c,   
+		  std::vector<RealD> &b,
+		  int n,
+		  int m1,
+		  int m2,
+		  RealD eps1,
+		  RealD relfeh,
+		  std::vector<RealD> &x,
+		  RealD &eps2)
+{
+  std::vector<RealD> wu(n+2);
+
+  RealD h,q,x1,xu,x0,xmin,xmax; 
+  int i,a,k;
+
+  b[1]=0.0;
+  xmin=c[n]-fabs(b[n]);
+  xmax=c[n]+fabs(b[n]);
+  for(i=1;i<n;i++){
+    h=fabs(b[i])+fabs(b[i+1]);
+    if(c[i]+h>xmax) xmax= c[i]+h;
+    if(c[i]-h<xmin) xmin= c[i]-h;
+  }
+  xmax *=2.;
+
+  eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin);
+  if(eps1<=0.0) eps1=eps2;
+  eps2=0.5*eps1+7.0*(eps2);
+  x0=xmax;
+  for(i=m1;i<=m2;i++){
+    x[i]=xmax;
+    wu[i]=xmin;
+  }
+
+  for(k=m2;k>=m1;k--){
+    xu=xmin;
+    i=k;
+    do{
+      if(xu<wu[i]){
+	xu=wu[i];
+	i=m1-1;
+      }
+      i--;
+    }while(i>=m1);
+    if(x0>x[k]) x0=x[k];
+    while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){
+      x1=(xu+x0)/2;
+
+      a=0;
+      q=1.0;
+      for(i=1;i<=n;i++){
+	q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh);
+	if(q<0) a++;
+      }
+      //			printf("x1=%e a=%d\n",x1,a);
+      if(a<k){
+	if(a<m1){
+	  xu=x1;
+	  wu[m1]=x1;
+	}else {
+	  xu=x1;
+	  wu[a+1]=x1;
+	  if(x[a]>x1) x[a]=x1;
+	}
+      }else x0=x1;
+    }
+    x[k]=(x0+xu)/2;
+  }
+}
+}
--- a/lib/algorithms/iterative/get_eig.c
+++ b/lib/algorithms/iterative/get_eig.c
@@ -0,0 +1 @@
+